feat!: migrate to OpenAI native tool calls and fix circular deps (#fuck-gemini)

- Fix circular dependencies in agent/tools - Migrate from custom JSON to OpenAI tool calls format - Add async streaming (step_stream, complete_stream) - Simplify prompt system and remove token counting - Add 5 new API endpoints (/health, /v1/models, /api/memory/*) - Add 3 new tools (get_torrent_by_index, add_torrent_by_index, set_language) - Fix all 500 tests and add coverage config (80% threshold) - Add comprehensive docs (README, pytest guide) BREAKING: LLM interface changed, memory injection via get_memory()
2025-12-06 19:11:05 +01:00
parent 2c8cdd3ab1
commit 9ca31e45e0
92 changed files with 7897 additions and 1786 deletions
@@ -1,31 +1,18 @@
 """Ollama LLM client with robust error handling."""
-from typing import List, Dict, Any, Optional
+
 import logging
 import os
-import requests
+from typing import Any

-from requests.exceptions import RequestException, Timeout, HTTPError
+import requests
+from requests.exceptions import HTTPError, RequestException, Timeout

 from ..config import settings
+from .exceptions import LLMAPIError, LLMConfigurationError

 logger = logging.getLogger(__name__)


-class LLMError(Exception):
-    """Base exception for LLM-related errors."""
-    pass
-
-
-class LLMConfigurationError(LLMError):
-    """Raised when LLM is not properly configured."""
-    pass
-
-
-class LLMAPIError(LLMError):
-    """Raised when LLM API returns an error."""
-    pass
-
-
 class OllamaClient:
    """
    Client for interacting with Ollama API.
@@ -41,10 +28,10 @@ class OllamaClient:

    def __init__(
        self,
-        base_url: Optional[str] = None,
-        model: Optional[str] = None,
-        timeout: Optional[int] = None,
-        temperature: Optional[float] = None,
+        base_url: str | None = None,
+        model: str | None = None,
+        timeout: int | None = None,
+        temperature: float | None = None,
    ):
        """
        Initialize Ollama client.
@@ -58,10 +45,14 @@ class OllamaClient:
        Raises:
            LLMConfigurationError: If configuration is invalid
        """
-        self.base_url = base_url or os.getenv("OLLAMA_BASE_URL", "http://localhost:11434")
+        self.base_url = base_url or os.getenv(
+            "OLLAMA_BASE_URL", "http://localhost:11434"
+        )
        self.model = model or os.getenv("OLLAMA_MODEL", "llama3.2")
        self.timeout = timeout or settings.request_timeout
-        self.temperature = temperature if temperature is not None else settings.temperature
+        self.temperature = (
+            temperature if temperature is not None else settings.temperature
+        )

        if not self.base_url:
            raise LLMConfigurationError(
@@ -75,7 +66,7 @@ class OllamaClient:

        logger.info(f"Ollama client initialized with model: {self.model}")

-    def complete(self, messages: List[Dict[str, Any]]) -> str:
+    def complete(self, messages: list[dict[str, Any]]) -> str:
        """
        Generate a completion from the LLM.

@@ -97,7 +88,9 @@ class OllamaClient:
            if not isinstance(msg, dict):
                raise ValueError(f"Each message must be a dict, got {type(msg)}")
            if "role" not in msg or "content" not in msg:
-                raise ValueError(f"Each message must have 'role' and 'content' keys, got {msg.keys()}")
+                raise ValueError(
+                    f"Each message must have 'role' and 'content' keys, got {msg.keys()}"
+                )
            if msg["role"] not in ("system", "user", "assistant"):
                raise ValueError(f"Invalid role: {msg['role']}")

@@ -108,16 +101,12 @@ class OllamaClient:
            "stream": False,
            "options": {
                "temperature": self.temperature,
-            }
+            },
        }

        try:
            logger.debug(f"Sending request to {url} with {len(messages)} messages")
-            response = requests.post(
-                url,
-                json=payload,
-                timeout=self.timeout
-            )
+            response = requests.post(url, json=payload, timeout=self.timeout)
            response.raise_for_status()
            data = response.json()

@@ -156,7 +145,7 @@ class OllamaClient:
            logger.error(f"Failed to parse API response: {e}")
            raise LLMAPIError(f"Invalid API response format: {e}") from e

-    def list_models(self) -> List[str]:
+    def list_models(self) -> list[str]:
        """
        List available models in Ollama.