alfred/agent/agent.py

"""Main agent for media library management."""
import json
import logging
from typing import Any, Dict, List, Optional

from infrastructure.persistence import get_memory

from .config import settings
from .prompts import PromptBuilder
from .registry import Tool, make_tools

logger = logging.getLogger(__name__)


class Agent:
    """
    AI agent for media library management.

    Uses OpenAI-compatible tool calling API.
    """

    def __init__(self, llm, max_tool_iterations: int = 5):
        """
        Initialize the agent.

        Args:
            llm: LLM client with complete() method
            max_tool_iterations: Maximum number of tool execution iterations
        """
        self.llm = llm
        self.tools: Dict[str, Tool] = make_tools()
        self.prompt_builder = PromptBuilder(self.tools)
        self.max_tool_iterations = max_tool_iterations

    def step(self, user_input: str) -> str:
        """
        Execute one agent step with the user input.

        This method:
        1. Adds user message to memory
        2. Builds prompt with history and context
        3. Calls LLM, executing tools as needed
        4. Returns final response

        Args:
            user_input: User's message

        Returns:
            Agent's final response
        """
        memory = get_memory()

        # Add user message to history
        memory.stm.add_message("user", user_input)
        memory.save()

        # Build initial messages
        system_prompt = self.prompt_builder.build_system_prompt()
        messages: List[Dict[str, Any]] = [
            {"role": "system", "content": system_prompt}
        ]

        # Add conversation history
        history = memory.stm.get_recent_history(settings.max_history_messages)
        messages.extend(history)

        # Add unread events if any
        unread_events = memory.episodic.get_unread_events()
        if unread_events:
            events_text = "\n".join([
                f"- {e['type']}: {e['data']}"
                for e in unread_events
            ])
            messages.append({
                "role": "system",
                "content": f"Background events:\n{events_text}"
            })

        # Get tools specification for OpenAI format
        tools_spec = self.prompt_builder.build_tools_spec()

        # Tool execution loop
        for iteration in range(self.max_tool_iterations):
            # Call LLM with tools
            llm_result = self.llm.complete(messages, tools=tools_spec)

            # Handle both tuple (response, usage) and dict response
            if isinstance(llm_result, tuple):
                response_message, usage = llm_result
            else:
                response_message = llm_result

            # Check if there are tool calls
            tool_calls = response_message.get("tool_calls")

            if not tool_calls:
                # No tool calls, this is the final response
                final_content = response_message.get("content", "")
                memory.stm.add_message("assistant", final_content)
                memory.save()
                return final_content

            # Add assistant message with tool calls to conversation
            messages.append(response_message)

            # Execute each tool call
            for tool_call in tool_calls:
                tool_result = self._execute_tool_call(tool_call)

                # Add tool result to messages
                messages.append({
                    "tool_call_id": tool_call.get("id"),
                    "role": "tool",
                    "name": tool_call.get("function", {}).get("name"),
                    "content": json.dumps(tool_result, ensure_ascii=False),
                })

        # Max iterations reached, force final response
        messages.append({
            "role": "system",
            "content": "Please provide a final response to the user without using any more tools."
        })

        llm_result = self.llm.complete(messages)
        if isinstance(llm_result, tuple):
            final_message, usage = llm_result
        else:
            final_message = llm_result

        final_response = final_message.get("content", "I've completed the requested actions.")
        memory.stm.add_message("assistant", final_response)
        memory.save()
        return final_response

    def _execute_tool_call(self, tool_call: Dict[str, Any]) -> Dict[str, Any]:
        """
        Execute a single tool call.

        Args:
            tool_call: OpenAI-format tool call dict

        Returns:
            Result dictionary
        """
        function = tool_call.get("function", {})
        tool_name = function.get("name", "")

        try:
            args_str = function.get("arguments", "{}")
            args = json.loads(args_str)
        except json.JSONDecodeError as e:
            logger.error(f"Failed to parse tool arguments: {e}")
            return {
                "error": "bad_args",
                "message": f"Invalid JSON arguments: {e}"
            }

        # Validate tool exists
        if tool_name not in self.tools:
            available = list(self.tools.keys())
            return {
                "error": "unknown_tool",
                "message": f"Tool '{tool_name}' not found",
                "available_tools": available
            }

        tool = self.tools[tool_name]

        # Execute tool
        try:
            result = tool.func(**args)
            return result
        except KeyboardInterrupt:
            # Don't catch KeyboardInterrupt - let it propagate
            raise
        except TypeError as e:
            # Bad arguments
            memory = get_memory()
            memory.episodic.add_error(tool_name, f"bad_args: {e}")
            return {
                "error": "bad_args",
                "message": str(e),
                "tool": tool_name
            }
        except Exception as e:
            # Other errors
            memory = get_memory()
            memory.episodic.add_error(tool_name, str(e))
            return {
                "error": "execution_failed",
                "message": str(e),
                "tool": tool_name
            }