feat!: migrate to OpenAI native tool calls and fix circular deps (#fuck-gemini)

- Fix circular dependencies in agent/tools - Migrate from custom JSON to OpenAI tool calls format - Add async streaming (step_stream, complete_stream) - Simplify prompt system and remove token counting - Add 5 new API endpoints (/health, /v1/models, /api/memory/*) - Add 3 new tools (get_torrent_by_index, add_torrent_by_index, set_language) - Fix all 500 tests and add coverage config (80% threshold) - Add comprehensive docs (README, pytest guide) BREAKING: LLM interface changed, memory injection via get_memory()
2025-12-06 19:11:05 +01:00
parent 2c8cdd3ab1
commit 9ca31e45e0
92 changed files with 7897 additions and 1786 deletions
@@ -1,15 +1,27 @@
-# agent/prompts.py
-from typing import Dict, Any
+"""Prompt builder for the agent system."""
+
 import json

-from .registry import Tool
+from infrastructure.persistence import get_memory
+
 from .parameters import format_parameters_for_prompt, get_missing_required_parameters
+from .registry import Tool


 class PromptBuilder:
-    """Handles construction of system prompts for the agent."""
+    """Builds system prompts for the agent with memory context.

-    def __init__(self, tools: Dict[str, Tool]):
+    Attributes:
+        tools: Dictionary of available tools.
+    """
+
+    def __init__(self, tools: dict[str, Tool]):
+        """
+        Initialize the prompt builder.
+
+        Args:
+            tools: Dictionary mapping tool names to Tool instances.
+        """
        self.tools = tools

    def _format_tools_description(self) -> str:
@@ -20,69 +32,139 @@ class PromptBuilder:
            for tool in self.tools.values()
        )

-    def _build_context(self, memory_data: dict) -> Dict[str, Any]:
-        """Build the context object with current state from memory."""
-        return memory_data
+    def _format_episodic_context(self) -> str:
+        """Format episodic memory context for the prompt."""
+        memory = get_memory()
+        lines = []

-    def build_system_prompt(self, memory_data: dict) -> str:
+        # Last search results
+        if memory.episodic.last_search_results:
+            search = memory.episodic.last_search_results
+            lines.append(f"LAST SEARCH: '{search.get('query')}'")
+            results = search.get("results", [])
+            if results:
+                lines.append(f"   {len(results)} results available:")
+                for r in results[:5]:
+                    name = r.get("name", r.get("title", "Unknown"))
+                    lines.append(f"   {r.get('index')}. {name}")
+                if len(results) > 5:
+                    lines.append(f"   ... and {len(results) - 5} more")
+
+        # Pending question
+        if memory.episodic.pending_question:
+            q = memory.episodic.pending_question
+            lines.append(f"\nPENDING QUESTION: {q.get('question')}")
+            for opt in q.get("options", []):
+                lines.append(f"   {opt.get('index')}. {opt.get('label')}")
+
+        # Active downloads
+        if memory.episodic.active_downloads:
+            lines.append(f"\nACTIVE DOWNLOADS: {len(memory.episodic.active_downloads)}")
+            for dl in memory.episodic.active_downloads[:3]:
+                lines.append(f"   - {dl.get('name')}: {dl.get('progress', 0)}%")
+
+        # Recent errors
+        if memory.episodic.recent_errors:
+            last_error = memory.episodic.recent_errors[-1]
+            lines.append(
+                f"\nLAST ERROR: {last_error.get('error')} "
+                f"(action: {last_error.get('action')})"
+            )
+
+        # Unread events
+        unread = [e for e in memory.episodic.background_events if not e.get("read")]
+        if unread:
+            lines.append(f"\nUNREAD EVENTS: {len(unread)}")
+            for e in unread[:3]:
+                lines.append(f"   - {e.get('type')}: {e.get('data', {})}")
+
+        return "\n".join(lines) if lines else ""
+
+    def _format_stm_context(self) -> str:
+        """Format short-term memory context for the prompt."""
+        memory = get_memory()
+        lines = []
+
+        # Current workflow
+        if memory.stm.current_workflow:
+            wf = memory.stm.current_workflow
+            lines.append(f"CURRENT WORKFLOW: {wf.get('type')}")
+            lines.append(f"   Target: {wf.get('target', {}).get('title', 'Unknown')}")
+            lines.append(f"   Stage: {wf.get('stage')}")
+
+        # Current topic
+        if memory.stm.current_topic:
+            lines.append(f"CURRENT TOPIC: {memory.stm.current_topic}")
+
+        # Extracted entities
+        if memory.stm.extracted_entities:
+            entities_json = json.dumps(
+                memory.stm.extracted_entities, ensure_ascii=False
+            )
+            lines.append(f"EXTRACTED ENTITIES: {entities_json}")
+
+        return "\n".join(lines) if lines else ""
+
+    def build_system_prompt(self) -> str:
        """
-        Build the system prompt with context provided as JSON.
-
-        Args:
-            memory_data: The full memory data dictionary
+        Build the system prompt with context from memory.

        Returns:
-            The complete system prompt string
+            The complete system prompt string.
        """
-        context = self._build_context(memory_data)
+        memory = get_memory()
        tools_desc = self._format_tools_description()
        params_desc = format_parameters_for_prompt()

        # Check for missing required parameters
-        missing_params = get_missing_required_parameters(memory_data)
+        missing_params = get_missing_required_parameters({"config": memory.ltm.config})
        missing_info = ""
        if missing_params:
-            missing_info = "\n\n⚠️ MISSING REQUIRED PARAMETERS:\n"
+            missing_info = "\n\nMISSING REQUIRED PARAMETERS:\n"
            for param in missing_params:
                missing_info += f"- {param.key}: {param.description}\n"
                missing_info += f"  Why needed: {param.why_needed}\n"

-        return (
-            "You are an AI agent helping a user manage their local media library.\n\n"
-            f"{params_desc}\n\n"
-            "CURRENT CONTEXT (JSON):\n"
-            f"{json.dumps(context, indent=2, ensure_ascii=False)}\n"
-            f"{missing_info}\n"
-            "IMPORTANT RULES:\n"
-            "1. Check the REQUIRED PARAMETERS section above to understand what information you need.\n"
-            "2. If any required parameter is missing (shown in MISSING REQUIRED PARAMETERS), "
-            "you MUST ask the user for it and explain WHY you need it based on the parameter description.\n"
-            "3. To use a tool, respond STRICTLY with this JSON format:\n"
-            '   { "thought": "explanation", "action": { "name": "tool_name", "args": { "arg": "value" } } }\n'
-            "   - No text before or after the JSON\n"
-            "   - All args must be complete and non-null\n"
-            "4. You can use MULTIPLE TOOLS IN SEQUENCE:\n"
-            "   - After executing a tool, you will receive its result\n"
-            "   - You can then decide to use another tool based on the result\n"
-            "   - Or provide a final text response to the user\n"
-            "   - Continue using tools until you have all the information needed\n"
-            "5. If you respond with text (not using a tool), respond normally in French.\n"
-            "6. When you have all the information needed, provide a final response in NATURAL TEXT (not JSON).\n"
-            "7. Extract the relevant information from the user's request and pass it as tool arguments.\n"
-            "\n"
-            "EXAMPLES:\n"
-            "   To set the download folder:\n"
-            '   { "thought": "User provided download path", "action": { "name": "set_path", "args": { "path_type": "download_folder", "path_value": "/home/user/downloads" } } }\n'
-            "\n"
-            "   To set the TV show folder:\n"
-            '   { "thought": "User provided TV show path", "action": { "name": "set_path", "args": { "path_type": "tvshow_folder", "path_value": "/home/user/media/tvshows" } } }\n'
-            "\n"
-            "   To list the download folder:\n"
-            '   { "thought": "User wants to see downloads", "action": { "name": "list_folder", "args": { "folder_type": "download", "path": "." } } }\n'
-            "\n"
-            "   To list a subfolder in TV shows:\n"
-            '   { "thought": "User wants to see a specific show", "action": { "name": "list_folder", "args": { "folder_type": "tvshow", "path": "Game.of.Thrones" } } }\n'
-            "\n"
-            "AVAILABLE TOOLS:\n"
-            f"{tools_desc}\n"
-        )
+        # Build context sections
+        episodic_context = self._format_episodic_context()
+        stm_context = self._format_stm_context()
+
+        config_json = json.dumps(memory.ltm.config, indent=2, ensure_ascii=False)
+
+        return f"""You are an AI agent helping a user manage their local media library.
+
+{params_desc}
+
+CURRENT CONFIGURATION:
+{config_json}
+{missing_info}
+
+{f"SESSION CONTEXT:{chr(10)}{stm_context}" if stm_context else ""}
+
+{f"CURRENT STATE:{chr(10)}{episodic_context}" if episodic_context else ""}
+
+IMPORTANT RULES:
+1. When the user refers to a number (e.g., "the 3rd one", "download number 2"), \
+use `add_torrent_by_index` or `get_torrent_by_index` with that number.
+2. If a torrent search was performed, results are numbered. \
+The user can reference them by number.
+3. To use a tool, respond STRICTLY with this JSON format:
+   {{ "thought": "explanation", "action": {{ "name": "tool_name", "args": {{ }} }} }}
+   - No text before or after the JSON
+4. You can use MULTIPLE TOOLS IN SEQUENCE.
+5. When you have all the information needed, respond in NATURAL TEXT (not JSON).
+6. If a required parameter is missing, ask the user for it.
+7. Respond in the same language as the user.
+
+EXAMPLES:
+- After a torrent search, if the user says "download the 3rd one":
+  {{ "thought": "User wants torrent #3", "action": {{ "name": "add_torrent_by_index", \
+"args": {{ "index": 3 }} }} }}
+
+- To search for torrents:
+  {{ "thought": "Searching torrents", "action": {{ "name": "find_torrents", \
+"args": {{ "media_title": "Inception 1080p" }} }} }}
+
+AVAILABLE TOOLS:
+{tools_desc}
+"""