Unfucked gemini's mess

2025-12-07 03:27:45 +01:00
parent 5b71233fb0
commit a923a760ef
24 changed files with 1885 additions and 1282 deletions
@@ -1,8 +1,7 @@
 """Main agent for media library management."""
-
 import json
 import logging
-from typing import Any, Protocol
+from typing import Any, Dict, List, Optional

 from infrastructure.persistence import get_memory

@@ -13,266 +12,182 @@ from .registry import Tool, make_tools
 logger = logging.getLogger(__name__)


-class LLMClient(Protocol):
-    """Protocol defining the LLM client interface."""
-
-    def complete(self, messages: list[dict[str, Any]]) -> str:
-        """Send messages to the LLM and get a response."""
-        ...
-
-
 class Agent:
    """
    AI agent for media library management.
-
-    Orchestrates interactions between the LLM, memory, and tools
-    to respond to user requests.
-
-    Attributes:
-        llm: LLM client (DeepSeek or Ollama).
-        tools: Available tools for the agent.
-        prompt_builder: Builds system prompts with context.
-        max_tool_iterations: Maximum tool calls per request.
+    
+    Uses OpenAI-compatible tool calling API.
    """

-    def __init__(self, llm: LLMClient, max_tool_iterations: int = 5):
+    def __init__(self, llm, max_tool_iterations: int = 5):
        """
        Initialize the agent.
-
+        
        Args:
-            llm: LLM client compatible with the LLMClient protocol.
-            max_tool_iterations: Maximum tool iterations (default: 5).
+            llm: LLM client with complete() method
+            max_tool_iterations: Maximum number of tool execution iterations
        """
        self.llm = llm
-        self.tools: dict[str, Tool] = make_tools()
+        self.tools: Dict[str, Tool] = make_tools()
        self.prompt_builder = PromptBuilder(self.tools)
        self.max_tool_iterations = max_tool_iterations

-    def _parse_intent(self, text: str) -> dict[str, Any] | None:
-        """
-        Parse an LLM response to detect a tool call.
-
-        Args:
-            text: LLM response text.
-
-        Returns:
-            Dict with intent if a tool call is detected, None otherwise.
-        """
-        text = text.strip()
-
-        # Try direct JSON parse
-        if text.startswith("{") and text.endswith("}"):
-            try:
-                data = json.loads(text)
-                if self._is_valid_intent(data):
-                    return data
-            except json.JSONDecodeError:
-                pass
-
-        # Try to extract JSON from text
-        try:
-            start = text.find("{")
-            end = text.rfind("}") + 1
-            if start != -1 and end > start:
-                json_str = text[start:end]
-                data = json.loads(json_str)
-                if self._is_valid_intent(data):
-                    return data
-        except json.JSONDecodeError:
-            pass
-
-        return None
-
-    def _is_valid_intent(self, data: Any) -> bool:
-        """Check if parsed data is a valid tool intent."""
-        if not isinstance(data, dict) or "action" not in data:
-            return False
-        action = data.get("action")
-        return isinstance(action, dict) and isinstance(action.get("name"), str)
-
-    def _execute_action(self, intent: dict[str, Any]) -> dict[str, Any]:
-        """
-        Execute a tool action requested by the LLM.
-
-        Args:
-            intent: Dict containing the action to execute.
-
-        Returns:
-            Tool execution result.
-        """
-        action = intent["action"]
-        name: str = action["name"]
-        args: dict[str, Any] = action.get("args", {}) or {}
-
-        tool = self.tools.get(name)
-        if not tool:
-            logger.warning(f"Unknown tool requested: {name}")
-            return {
-                "error": "unknown_tool",
-                "tool": name,
-                "available_tools": list(self.tools.keys()),
-            }
-
-        try:
-            result = tool.func(**args)
-
-            # Track errors in episodic memory
-            if result.get("status") == "error" or result.get("error"):
-                memory = get_memory()
-                memory.episodic.add_error(
-                    action=name,
-                    error=result.get("error", result.get("message", "Unknown error")),
-                    context={"args": args, "result": result},
-                )
-
-            return result
-
-        except TypeError as e:
-            error_msg = f"Bad arguments for {name}: {e}"
-            logger.error(error_msg)
-            memory = get_memory()
-            memory.episodic.add_error(
-                action=name, error=error_msg, context={"args": args}
-            )
-            return {"error": "bad_args", "message": str(e)}
-
-        except Exception as e:
-            error_msg = f"Error executing {name}: {e}"
-            logger.error(error_msg, exc_info=True)
-            memory = get_memory()
-            memory.episodic.add_error(action=name, error=str(e), context={"args": args})
-            return {"error": "execution_error", "message": str(e)}
-
-    def _check_unread_events(self) -> str:
-        """
-        Check for unread background events and format them.
-
-        Returns:
-            Formatted string of events, or empty string if none.
-        """
-        memory = get_memory()
-        events = memory.episodic.get_unread_events()
-
-        if not events:
-            return ""
-
-        lines = ["Recent events:"]
-        for event in events:
-            event_type = event.get("type", "unknown")
-            data = event.get("data", {})
-
-            if event_type == "download_complete":
-                lines.append(f"  - Download completed: {data.get('name')}")
-            elif event_type == "new_files_detected":
-                lines.append(f"  - {data.get('count')} new files detected")
-            else:
-                lines.append(f"  - {event_type}: {data}")
-
-        return "\n".join(lines)
-
    def step(self, user_input: str) -> str:
        """
-        Execute one agent step with iterative tool execution.
-
-        Process:
-        1. Check for unread events
-        2. Build system prompt with memory context
-        3. Query the LLM
-        4. If tool call detected, execute and loop
-        5. Return final text response
-
+        Execute one agent step with the user input.
+        
+        This method:
+        1. Adds user message to memory
+        2. Builds prompt with history and context
+        3. Calls LLM, executing tools as needed
+        4. Returns final response
+        
        Args:
-            user_input: User message.
-
+            user_input: User's message
+            
        Returns:
-            Final response in natural text.
+            Agent's final response
        """
-        logger.info("Starting agent step")
-        logger.debug(f"User input: {user_input}")
-
        memory = get_memory()
-
-        # Check for background events
-        events_notification = self._check_unread_events()
-        if events_notification:
-            logger.info("Found unread background events")
-
-        # Build system prompt
+        
+        # Add user message to history
+        memory.stm.add_message("user", user_input)
+        memory.save()
+        
+        # Build initial messages
        system_prompt = self.prompt_builder.build_system_prompt()
-
-        # Initialize conversation
-        messages: list[dict[str, Any]] = [
-            {"role": "system", "content": system_prompt},
+        messages: List[Dict[str, Any]] = [
+            {"role": "system", "content": system_prompt}
        ]
-
+        
        # Add conversation history
        history = memory.stm.get_recent_history(settings.max_history_messages)
-        if history:
-            for msg in history:
-                messages.append({"role": msg["role"], "content": msg["content"]})
-            logger.debug(f"Added {len(history)} messages from history")
-
-        # Add events notification
-        if events_notification:
-            messages.append(
-                {"role": "system", "content": f"[NOTIFICATION]\n{events_notification}"}
-            )
-
-        # Add user input
-        messages.append({"role": "user", "content": user_input})
-
+        messages.extend(history)
+        
+        # Add unread events if any
+        unread_events = memory.episodic.get_unread_events()
+        if unread_events:
+            events_text = "\n".join([
+                f"- {e['type']}: {e['data']}"
+                for e in unread_events
+            ])
+            messages.append({
+                "role": "system",
+                "content": f"Background events:\n{events_text}"
+            })
+        
+        # Get tools specification for OpenAI format
+        tools_spec = self.prompt_builder.build_tools_spec()
+        
        # Tool execution loop
-        iteration = 0
-        while iteration < self.max_tool_iterations:
-            logger.debug(f"Iteration {iteration + 1}/{self.max_tool_iterations}")
-
-            llm_response = self.llm.complete(messages)
-            logger.debug(f"LLM response: {llm_response[:200]}...")
-
-            intent = self._parse_intent(llm_response)
-
-            if not intent:
-                # Final text response
-                logger.info("No tool intent, returning response")
-                memory.stm.add_message("user", user_input)
-                memory.stm.add_message("assistant", llm_response)
+        for iteration in range(self.max_tool_iterations):
+            # Call LLM with tools
+            llm_result = self.llm.complete(messages, tools=tools_spec)
+            
+            # Handle both tuple (response, usage) and dict response
+            if isinstance(llm_result, tuple):
+                response_message, usage = llm_result
+            else:
+                response_message = llm_result
+            
+            # Check if there are tool calls
+            tool_calls = response_message.get("tool_calls")
+            
+            if not tool_calls:
+                # No tool calls, this is the final response
+                final_content = response_message.get("content", "")
+                memory.stm.add_message("assistant", final_content)
                memory.save()
-                return llm_response
-
-            # Execute tool
-            tool_name = intent.get("action", {}).get("name", "unknown")
-            logger.info(f"Executing tool: {tool_name}")
-            tool_result = self._execute_action(intent)
-            logger.debug(f"Tool result: {tool_result}")
-
-            # Add to conversation
-            messages.append(
-                {"role": "assistant", "content": json.dumps(intent, ensure_ascii=False)}
-            )
-            messages.append(
-                {
-                    "role": "user",
-                    "content": json.dumps(
-                        {"tool_result": tool_result}, ensure_ascii=False
-                    ),
-                }
-            )
-
-            iteration += 1
-
-        # Max iterations reached
-        logger.warning(f"Max iterations ({self.max_tool_iterations}) reached")
-        messages.append(
-            {
-                "role": "user",
-                "content": "Please provide a final response based on the results.",
-            }
-        )
-
-        final_response = self.llm.complete(messages)
-
-        memory.stm.add_message("user", user_input)
+                return final_content
+            
+            # Add assistant message with tool calls to conversation
+            messages.append(response_message)
+            
+            # Execute each tool call
+            for tool_call in tool_calls:
+                tool_result = self._execute_tool_call(tool_call)
+                
+                # Add tool result to messages
+                messages.append({
+                    "tool_call_id": tool_call.get("id"),
+                    "role": "tool",
+                    "name": tool_call.get("function", {}).get("name"),
+                    "content": json.dumps(tool_result, ensure_ascii=False),
+                })
+        
+        # Max iterations reached, force final response
+        messages.append({
+            "role": "system",
+            "content": "Please provide a final response to the user without using any more tools."
+        })
+        
+        llm_result = self.llm.complete(messages)
+        if isinstance(llm_result, tuple):
+            final_message, usage = llm_result
+        else:
+            final_message = llm_result
+        
+        final_response = final_message.get("content", "I've completed the requested actions.")
        memory.stm.add_message("assistant", final_response)
        memory.save()
-
        return final_response
+
+    def _execute_tool_call(self, tool_call: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Execute a single tool call.
+        
+        Args:
+            tool_call: OpenAI-format tool call dict
+            
+        Returns:
+            Result dictionary
+        """
+        function = tool_call.get("function", {})
+        tool_name = function.get("name", "")
+        
+        try:
+            args_str = function.get("arguments", "{}")
+            args = json.loads(args_str)
+        except json.JSONDecodeError as e:
+            logger.error(f"Failed to parse tool arguments: {e}")
+            return {
+                "error": "bad_args",
+                "message": f"Invalid JSON arguments: {e}"
+            }
+        
+        # Validate tool exists
+        if tool_name not in self.tools:
+            available = list(self.tools.keys())
+            return {
+                "error": "unknown_tool",
+                "message": f"Tool '{tool_name}' not found",
+                "available_tools": available
+            }
+        
+        tool = self.tools[tool_name]
+        
+        # Execute tool
+        try:
+            result = tool.func(**args)
+            return result
+        except KeyboardInterrupt:
+            # Don't catch KeyboardInterrupt - let it propagate
+            raise
+        except TypeError as e:
+            # Bad arguments
+            memory = get_memory()
+            memory.episodic.add_error(tool_name, f"bad_args: {e}")
+            return {
+                "error": "bad_args",
+                "message": str(e),
+                "tool": tool_name
+            }
+        except Exception as e:
+            # Other errors
+            memory = get_memory()
+            memory.episodic.add_error(tool_name, str(e))
+            return {
+                "error": "execution_failed",
+                "message": str(e),
+                "tool": tool_name
+            }