feat!: migrate to OpenAI native tool calls and fix circular deps (#fuck-gemini)

- Fix circular dependencies in agent/tools - Migrate from custom JSON to OpenAI tool calls format - Add async streaming (step_stream, complete_stream) - Simplify prompt system and remove token counting - Add 5 new API endpoints (/health, /v1/models, /api/memory/*) - Add 3 new tools (get_torrent_by_index, add_torrent_by_index, set_language) - Fix all 500 tests and add coverage config (80% threshold) - Add comprehensive docs (README, pytest guide) BREAKING: LLM interface changed, memory injection via get_memory()
2025-12-06 19:11:05 +01:00
parent 2c8cdd3ab1
commit 9ca31e45e0
92 changed files with 7897 additions and 1786 deletions
@@ -1,147 +1,278 @@
-# agent/agent.py
-from typing import Any, Dict, List
-import json
+"""Main agent for media library management."""
+
+import json
+import logging
+from typing import Any, Protocol
+
+from infrastructure.persistence import get_memory

-from .llm import DeepSeekClient
-from infrastructure.persistence.memory import Memory
-from .registry import make_tools, Tool
-from .prompts import PromptBuilder
 from .config import settings
+from .prompts import PromptBuilder
+from .registry import Tool, make_tools
+
+logger = logging.getLogger(__name__)
+
+
+class LLMClient(Protocol):
+    """Protocol defining the LLM client interface."""
+
+    def complete(self, messages: list[dict[str, Any]]) -> str:
+        """Send messages to the LLM and get a response."""
+        ...
+

 class Agent:
-    def __init__(self, llm: DeepSeekClient, memory: Memory, max_tool_iterations: int = 5):
+    """
+    AI agent for media library management.
+
+    Orchestrates interactions between the LLM, memory, and tools
+    to respond to user requests.
+
+    Attributes:
+        llm: LLM client (DeepSeek or Ollama).
+        tools: Available tools for the agent.
+        prompt_builder: Builds system prompts with context.
+        max_tool_iterations: Maximum tool calls per request.
+    """
+
+    def __init__(self, llm: LLMClient, max_tool_iterations: int = 5):
+        """
+        Initialize the agent.
+
+        Args:
+            llm: LLM client compatible with the LLMClient protocol.
+            max_tool_iterations: Maximum tool iterations (default: 5).
+        """
        self.llm = llm
-        self.memory = memory
-        self.tools: Dict[str, Tool] = make_tools(memory)
+        self.tools: dict[str, Tool] = make_tools()
        self.prompt_builder = PromptBuilder(self.tools)
        self.max_tool_iterations = max_tool_iterations

+    def _parse_intent(self, text: str) -> dict[str, Any] | None:
+        """
+        Parse an LLM response to detect a tool call.

-    def _parse_intent(self, text: str) -> Dict[str, Any] | None:
+        Args:
+            text: LLM response text.
+
+        Returns:
+            Dict with intent if a tool call is detected, None otherwise.
+        """
+        text = text.strip()
+
+        # Try direct JSON parse
+        if text.startswith("{") and text.endswith("}"):
+            try:
+                data = json.loads(text)
+                if self._is_valid_intent(data):
+                    return data
+            except json.JSONDecodeError:
+                pass
+
+        # Try to extract JSON from text
        try:
-            data = json.loads(text)
+            start = text.find("{")
+            end = text.rfind("}") + 1
+            if start != -1 and end > start:
+                json_str = text[start:end]
+                data = json.loads(json_str)
+                if self._is_valid_intent(data):
+                    return data
        except json.JSONDecodeError:
-            return None
+            pass

-        if not isinstance(data, dict):
-            return None
+        return None

+    def _is_valid_intent(self, data: Any) -> bool:
+        """Check if parsed data is a valid tool intent."""
+        if not isinstance(data, dict) or "action" not in data:
+            return False
        action = data.get("action")
-        if not isinstance(action, dict):
-            return None
+        return isinstance(action, dict) and isinstance(action.get("name"), str)

-        name = action.get("name")
-        if not isinstance(name, str):
-            return None
+    def _execute_action(self, intent: dict[str, Any]) -> dict[str, Any]:
+        """
+        Execute a tool action requested by the LLM.

-        return data
+        Args:
+            intent: Dict containing the action to execute.

-    def _execute_action(self, intent: Dict[str, Any]) -> Dict[str, Any]:
+        Returns:
+            Tool execution result.
+        """
        action = intent["action"]
        name: str = action["name"]
-        args: Dict[str, Any] = action.get("args", {}) or {}
+        args: dict[str, Any] = action.get("args", {}) or {}

        tool = self.tools.get(name)
        if not tool:
-            return {"error": "unknown_tool", "tool": name}
+            logger.warning(f"Unknown tool requested: {name}")
+            return {
+                "error": "unknown_tool",
+                "tool": name,
+                "available_tools": list(self.tools.keys()),
+            }

        try:
            result = tool.func(**args)
+
+            # Track errors in episodic memory
+            if result.get("status") == "error" or result.get("error"):
+                memory = get_memory()
+                memory.episodic.add_error(
+                    action=name,
+                    error=result.get("error", result.get("message", "Unknown error")),
+                    context={"args": args, "result": result},
+                )
+
+            return result
+
        except TypeError as e:
-            # Mauvais arguments
+            error_msg = f"Bad arguments for {name}: {e}"
+            logger.error(error_msg)
+            memory = get_memory()
+            memory.episodic.add_error(
+                action=name, error=error_msg, context={"args": args}
+            )
            return {"error": "bad_args", "message": str(e)}

-        return result
+        except Exception as e:
+            error_msg = f"Error executing {name}: {e}"
+            logger.error(error_msg, exc_info=True)
+            memory = get_memory()
+            memory.episodic.add_error(action=name, error=str(e), context={"args": args})
+            return {"error": "execution_error", "message": str(e)}
+
+    def _check_unread_events(self) -> str:
+        """
+        Check for unread background events and format them.
+
+        Returns:
+            Formatted string of events, or empty string if none.
+        """
+        memory = get_memory()
+        events = memory.episodic.get_unread_events()
+
+        if not events:
+            return ""
+
+        lines = ["Recent events:"]
+        for event in events:
+            event_type = event.get("type", "unknown")
+            data = event.get("data", {})
+
+            if event_type == "download_complete":
+                lines.append(f"  - Download completed: {data.get('name')}")
+            elif event_type == "new_files_detected":
+                lines.append(f"  - {data.get('count')} new files detected")
+            else:
+                lines.append(f"  - {event_type}: {data}")
+
+        return "\n".join(lines)

    def step(self, user_input: str) -> str:
        """
-        Execute one agent step with iterative tool execution:
-        - Build system prompt
-        - Query LLM
-        - Loop: If JSON intent -> execute tool, add result to conversation, query LLM again
-        - Continue until LLM responds with text (no tool call) or max iterations reached
-        - Return final text response
+        Execute one agent step with iterative tool execution.
+
+        Process:
+        1. Check for unread events
+        2. Build system prompt with memory context
+        3. Query the LLM
+        4. If tool call detected, execute and loop
+        5. Return final text response
+
+        Args:
+            user_input: User message.
+
+        Returns:
+            Final response in natural text.
        """
-        print("Starting a new step...")
-        print("User input:", user_input)
+        logger.info("Starting agent step")
+        logger.debug(f"User input: {user_input}")

-        print("Current memory state:", self.memory.data)
+        memory = get_memory()

-        # Build system prompt using PromptBuilder
-        system_prompt = self.prompt_builder.build_system_prompt(self.memory.data)
+        # Check for background events
+        events_notification = self._check_unread_events()
+        if events_notification:
+            logger.info("Found unread background events")

-        # Initialize conversation with system prompt
-        messages: List[Dict[str, Any]] = [
+        # Build system prompt
+        system_prompt = self.prompt_builder.build_system_prompt()
+
+        # Initialize conversation
+        messages: list[dict[str, Any]] = [
            {"role": "system", "content": system_prompt},
        ]

-        # Add conversation history from memory (last N messages for context)
-        # Only add user/assistant messages, NOT system messages
-        history = self.memory.get("history", [])
-        max_history = settings.max_history_messages
-        if history and max_history > 0:
-            # Filter to keep only user and assistant messages
-            filtered_history = [
-                msg for msg in history
-                if msg.get("role") in ("user", "assistant")
-            ]
-            recent_history = filtered_history[-max_history:]
-            messages.extend(recent_history)
-            print(f"Added {len(recent_history)} messages from history (filtered)")
+        # Add conversation history
+        history = memory.stm.get_recent_history(settings.max_history_messages)
+        if history:
+            for msg in history:
+                messages.append({"role": msg["role"], "content": msg["content"]})
+            logger.debug(f"Added {len(history)} messages from history")

-        # Add current user input
+        # Add events notification
+        if events_notification:
+            messages.append(
+                {"role": "system", "content": f"[NOTIFICATION]\n{events_notification}"}
+            )
+
+        # Add user input
        messages.append({"role": "user", "content": user_input})

        # Tool execution loop
        iteration = 0
        while iteration < self.max_tool_iterations:
-            print(f"\n--- Iteration {iteration + 1} ---")
+            logger.debug(f"Iteration {iteration + 1}/{self.max_tool_iterations}")

-            # Get LLM response
-            print(messages)
            llm_response = self.llm.complete(messages)
-            print("LLM response:", llm_response)
+            logger.debug(f"LLM response: {llm_response[:200]}...")

-            # Try to parse as tool intent
            intent = self._parse_intent(llm_response)

            if not intent:
-                # No tool call - this is the final text response
-                print("No tool intent detected, returning final response")
-                # Save to history
-                self.memory.append_history("user", user_input)
-                self.memory.append_history("assistant", llm_response)
+                # Final text response
+                logger.info("No tool intent, returning response")
+                memory.stm.add_message("user", user_input)
+                memory.stm.add_message("assistant", llm_response)
+                memory.save()
                return llm_response

-            # Tool call detected - execute it
-            print("Intent detected:", intent)
+            # Execute tool
+            tool_name = intent.get("action", {}).get("name", "unknown")
+            logger.info(f"Executing tool: {tool_name}")
            tool_result = self._execute_action(intent)
-            print("Tool result:", tool_result)
+            logger.debug(f"Tool result: {tool_result}")

-            # Add assistant's tool call and result to conversation
-            messages.append({
-                "role": "assistant",
-                "content": json.dumps(intent, ensure_ascii=False)
-            })
-            messages.append({
-                "role": "user",
-                "content": json.dumps(
-                    {"tool_result": tool_result},
-                    ensure_ascii=False
-                )
-            })
+            # Add to conversation
+            messages.append(
+                {"role": "assistant", "content": json.dumps(intent, ensure_ascii=False)}
+            )
+            messages.append(
+                {
+                    "role": "user",
+                    "content": json.dumps(
+                        {"tool_result": tool_result}, ensure_ascii=False
+                    ),
+                }
+            )

            iteration += 1

-        # Max iterations reached - ask LLM for final response
-        print(f"\n--- Max iterations ({self.max_tool_iterations}) reached, requesting final response ---")
-        messages.append({
-            "role": "user",
-            "content": "Merci pour ces résultats. Peux-tu maintenant me donner une réponse finale en texte naturel ?"
-        })
+        # Max iterations reached
+        logger.warning(f"Max iterations ({self.max_tool_iterations}) reached")
+        messages.append(
+            {
+                "role": "user",
+                "content": "Please provide a final response based on the results.",
+            }
+        )

        final_response = self.llm.complete(messages)
-        # Save to history
-        self.memory.append_history("user", user_input)
-        self.memory.append_history("assistant", final_response)
+
+        memory.stm.add_message("user", user_input)
+        memory.stm.add_message("assistant", final_response)
+        memory.save()
+
        return final_response