Files
alfred/agent/agent.py
T
francwa 9ca31e45e0 feat!: migrate to OpenAI native tool calls and fix circular deps (#fuck-gemini)
- Fix circular dependencies in agent/tools
- Migrate from custom JSON to OpenAI tool calls format
- Add async streaming (step_stream, complete_stream)
- Simplify prompt system and remove token counting
- Add 5 new API endpoints (/health, /v1/models, /api/memory/*)
- Add 3 new tools (get_torrent_by_index, add_torrent_by_index, set_language)
- Fix all 500 tests and add coverage config (80% threshold)
- Add comprehensive docs (README, pytest guide)

BREAKING: LLM interface changed, memory injection via get_memory()
2025-12-06 19:11:05 +01:00

279 lines
8.8 KiB
Python

"""Main agent for media library management."""
import json
import logging
from typing import Any, Protocol
from infrastructure.persistence import get_memory
from .config import settings
from .prompts import PromptBuilder
from .registry import Tool, make_tools
logger = logging.getLogger(__name__)
class LLMClient(Protocol):
"""Protocol defining the LLM client interface."""
def complete(self, messages: list[dict[str, Any]]) -> str:
"""Send messages to the LLM and get a response."""
...
class Agent:
"""
AI agent for media library management.
Orchestrates interactions between the LLM, memory, and tools
to respond to user requests.
Attributes:
llm: LLM client (DeepSeek or Ollama).
tools: Available tools for the agent.
prompt_builder: Builds system prompts with context.
max_tool_iterations: Maximum tool calls per request.
"""
def __init__(self, llm: LLMClient, max_tool_iterations: int = 5):
"""
Initialize the agent.
Args:
llm: LLM client compatible with the LLMClient protocol.
max_tool_iterations: Maximum tool iterations (default: 5).
"""
self.llm = llm
self.tools: dict[str, Tool] = make_tools()
self.prompt_builder = PromptBuilder(self.tools)
self.max_tool_iterations = max_tool_iterations
def _parse_intent(self, text: str) -> dict[str, Any] | None:
"""
Parse an LLM response to detect a tool call.
Args:
text: LLM response text.
Returns:
Dict with intent if a tool call is detected, None otherwise.
"""
text = text.strip()
# Try direct JSON parse
if text.startswith("{") and text.endswith("}"):
try:
data = json.loads(text)
if self._is_valid_intent(data):
return data
except json.JSONDecodeError:
pass
# Try to extract JSON from text
try:
start = text.find("{")
end = text.rfind("}") + 1
if start != -1 and end > start:
json_str = text[start:end]
data = json.loads(json_str)
if self._is_valid_intent(data):
return data
except json.JSONDecodeError:
pass
return None
def _is_valid_intent(self, data: Any) -> bool:
"""Check if parsed data is a valid tool intent."""
if not isinstance(data, dict) or "action" not in data:
return False
action = data.get("action")
return isinstance(action, dict) and isinstance(action.get("name"), str)
def _execute_action(self, intent: dict[str, Any]) -> dict[str, Any]:
"""
Execute a tool action requested by the LLM.
Args:
intent: Dict containing the action to execute.
Returns:
Tool execution result.
"""
action = intent["action"]
name: str = action["name"]
args: dict[str, Any] = action.get("args", {}) or {}
tool = self.tools.get(name)
if not tool:
logger.warning(f"Unknown tool requested: {name}")
return {
"error": "unknown_tool",
"tool": name,
"available_tools": list(self.tools.keys()),
}
try:
result = tool.func(**args)
# Track errors in episodic memory
if result.get("status") == "error" or result.get("error"):
memory = get_memory()
memory.episodic.add_error(
action=name,
error=result.get("error", result.get("message", "Unknown error")),
context={"args": args, "result": result},
)
return result
except TypeError as e:
error_msg = f"Bad arguments for {name}: {e}"
logger.error(error_msg)
memory = get_memory()
memory.episodic.add_error(
action=name, error=error_msg, context={"args": args}
)
return {"error": "bad_args", "message": str(e)}
except Exception as e:
error_msg = f"Error executing {name}: {e}"
logger.error(error_msg, exc_info=True)
memory = get_memory()
memory.episodic.add_error(action=name, error=str(e), context={"args": args})
return {"error": "execution_error", "message": str(e)}
def _check_unread_events(self) -> str:
"""
Check for unread background events and format them.
Returns:
Formatted string of events, or empty string if none.
"""
memory = get_memory()
events = memory.episodic.get_unread_events()
if not events:
return ""
lines = ["Recent events:"]
for event in events:
event_type = event.get("type", "unknown")
data = event.get("data", {})
if event_type == "download_complete":
lines.append(f" - Download completed: {data.get('name')}")
elif event_type == "new_files_detected":
lines.append(f" - {data.get('count')} new files detected")
else:
lines.append(f" - {event_type}: {data}")
return "\n".join(lines)
def step(self, user_input: str) -> str:
"""
Execute one agent step with iterative tool execution.
Process:
1. Check for unread events
2. Build system prompt with memory context
3. Query the LLM
4. If tool call detected, execute and loop
5. Return final text response
Args:
user_input: User message.
Returns:
Final response in natural text.
"""
logger.info("Starting agent step")
logger.debug(f"User input: {user_input}")
memory = get_memory()
# Check for background events
events_notification = self._check_unread_events()
if events_notification:
logger.info("Found unread background events")
# Build system prompt
system_prompt = self.prompt_builder.build_system_prompt()
# Initialize conversation
messages: list[dict[str, Any]] = [
{"role": "system", "content": system_prompt},
]
# Add conversation history
history = memory.stm.get_recent_history(settings.max_history_messages)
if history:
for msg in history:
messages.append({"role": msg["role"], "content": msg["content"]})
logger.debug(f"Added {len(history)} messages from history")
# Add events notification
if events_notification:
messages.append(
{"role": "system", "content": f"[NOTIFICATION]\n{events_notification}"}
)
# Add user input
messages.append({"role": "user", "content": user_input})
# Tool execution loop
iteration = 0
while iteration < self.max_tool_iterations:
logger.debug(f"Iteration {iteration + 1}/{self.max_tool_iterations}")
llm_response = self.llm.complete(messages)
logger.debug(f"LLM response: {llm_response[:200]}...")
intent = self._parse_intent(llm_response)
if not intent:
# Final text response
logger.info("No tool intent, returning response")
memory.stm.add_message("user", user_input)
memory.stm.add_message("assistant", llm_response)
memory.save()
return llm_response
# Execute tool
tool_name = intent.get("action", {}).get("name", "unknown")
logger.info(f"Executing tool: {tool_name}")
tool_result = self._execute_action(intent)
logger.debug(f"Tool result: {tool_result}")
# Add to conversation
messages.append(
{"role": "assistant", "content": json.dumps(intent, ensure_ascii=False)}
)
messages.append(
{
"role": "user",
"content": json.dumps(
{"tool_result": tool_result}, ensure_ascii=False
),
}
)
iteration += 1
# Max iterations reached
logger.warning(f"Max iterations ({self.max_tool_iterations}) reached")
messages.append(
{
"role": "user",
"content": "Please provide a final response based on the results.",
}
)
final_response = self.llm.complete(messages)
memory.stm.add_message("user", user_input)
memory.stm.add_message("assistant", final_response)
memory.save()
return final_response