"""Main agent for media library management.""" import json import logging from typing import Any, Dict, List, Optional from infrastructure.persistence import get_memory from .config import settings from .prompts import PromptBuilder from .registry import Tool, make_tools logger = logging.getLogger(__name__) class Agent: """ AI agent for media library management. Uses OpenAI-compatible tool calling API. """ def __init__(self, llm, max_tool_iterations: int = 5): """ Initialize the agent. Args: llm: LLM client with complete() method max_tool_iterations: Maximum number of tool execution iterations """ self.llm = llm self.tools: Dict[str, Tool] = make_tools() self.prompt_builder = PromptBuilder(self.tools) self.max_tool_iterations = max_tool_iterations def step(self, user_input: str) -> str: """ Execute one agent step with the user input. This method: 1. Adds user message to memory 2. Builds prompt with history and context 3. Calls LLM, executing tools as needed 4. Returns final response Args: user_input: User's message Returns: Agent's final response """ memory = get_memory() # Add user message to history memory.stm.add_message("user", user_input) memory.save() # Build initial messages system_prompt = self.prompt_builder.build_system_prompt() messages: List[Dict[str, Any]] = [ {"role": "system", "content": system_prompt} ] # Add conversation history history = memory.stm.get_recent_history(settings.max_history_messages) messages.extend(history) # Add unread events if any unread_events = memory.episodic.get_unread_events() if unread_events: events_text = "\n".join([ f"- {e['type']}: {e['data']}" for e in unread_events ]) messages.append({ "role": "system", "content": f"Background events:\n{events_text}" }) # Get tools specification for OpenAI format tools_spec = self.prompt_builder.build_tools_spec() # Tool execution loop for iteration in range(self.max_tool_iterations): # Call LLM with tools llm_result = self.llm.complete(messages, tools=tools_spec) # Handle both tuple (response, usage) and dict response if isinstance(llm_result, tuple): response_message, usage = llm_result else: response_message = llm_result # Check if there are tool calls tool_calls = response_message.get("tool_calls") if not tool_calls: # No tool calls, this is the final response final_content = response_message.get("content", "") memory.stm.add_message("assistant", final_content) memory.save() return final_content # Add assistant message with tool calls to conversation messages.append(response_message) # Execute each tool call for tool_call in tool_calls: tool_result = self._execute_tool_call(tool_call) # Add tool result to messages messages.append({ "tool_call_id": tool_call.get("id"), "role": "tool", "name": tool_call.get("function", {}).get("name"), "content": json.dumps(tool_result, ensure_ascii=False), }) # Max iterations reached, force final response messages.append({ "role": "system", "content": "Please provide a final response to the user without using any more tools." }) llm_result = self.llm.complete(messages) if isinstance(llm_result, tuple): final_message, usage = llm_result else: final_message = llm_result final_response = final_message.get("content", "I've completed the requested actions.") memory.stm.add_message("assistant", final_response) memory.save() return final_response def _execute_tool_call(self, tool_call: Dict[str, Any]) -> Dict[str, Any]: """ Execute a single tool call. Args: tool_call: OpenAI-format tool call dict Returns: Result dictionary """ function = tool_call.get("function", {}) tool_name = function.get("name", "") try: args_str = function.get("arguments", "{}") args = json.loads(args_str) except json.JSONDecodeError as e: logger.error(f"Failed to parse tool arguments: {e}") return { "error": "bad_args", "message": f"Invalid JSON arguments: {e}" } # Validate tool exists if tool_name not in self.tools: available = list(self.tools.keys()) return { "error": "unknown_tool", "message": f"Tool '{tool_name}' not found", "available_tools": available } tool = self.tools[tool_name] # Execute tool try: result = tool.func(**args) return result except KeyboardInterrupt: # Don't catch KeyboardInterrupt - let it propagate raise except TypeError as e: # Bad arguments memory = get_memory() memory.episodic.add_error(tool_name, f"bad_args: {e}") return { "error": "bad_args", "message": str(e), "tool": tool_name } except Exception as e: # Other errors memory = get_memory() memory.episodic.add_error(tool_name, str(e)) return { "error": "execution_failed", "message": str(e), "tool": tool_name }