feat!: migrate to OpenAI native tool calls and fix circular deps (#fuck-gemini)

- Fix circular dependencies in agent/tools
- Migrate from custom JSON to OpenAI tool calls format
- Add async streaming (step_stream, complete_stream)
- Simplify prompt system and remove token counting
- Add 5 new API endpoints (/health, /v1/models, /api/memory/*)
- Add 3 new tools (get_torrent_by_index, add_torrent_by_index, set_language)
- Fix all 500 tests and add coverage config (80% threshold)
- Add comprehensive docs (README, pytest guide)

BREAKING: LLM interface changed, memory injection via get_memory()
This commit is contained in:
2025-12-06 19:11:05 +01:00
parent 2c8cdd3ab1
commit 9ca31e45e0
92 changed files with 7897 additions and 1786 deletions
+22 -33
View File
@@ -1,31 +1,18 @@
"""Ollama LLM client with robust error handling."""
from typing import List, Dict, Any, Optional
import logging
import os
import requests
from typing import Any
from requests.exceptions import RequestException, Timeout, HTTPError
import requests
from requests.exceptions import HTTPError, RequestException, Timeout
from ..config import settings
from .exceptions import LLMAPIError, LLMConfigurationError
logger = logging.getLogger(__name__)
class LLMError(Exception):
"""Base exception for LLM-related errors."""
pass
class LLMConfigurationError(LLMError):
"""Raised when LLM is not properly configured."""
pass
class LLMAPIError(LLMError):
"""Raised when LLM API returns an error."""
pass
class OllamaClient:
"""
Client for interacting with Ollama API.
@@ -41,10 +28,10 @@ class OllamaClient:
def __init__(
self,
base_url: Optional[str] = None,
model: Optional[str] = None,
timeout: Optional[int] = None,
temperature: Optional[float] = None,
base_url: str | None = None,
model: str | None = None,
timeout: int | None = None,
temperature: float | None = None,
):
"""
Initialize Ollama client.
@@ -58,10 +45,14 @@ class OllamaClient:
Raises:
LLMConfigurationError: If configuration is invalid
"""
self.base_url = base_url or os.getenv("OLLAMA_BASE_URL", "http://localhost:11434")
self.base_url = base_url or os.getenv(
"OLLAMA_BASE_URL", "http://localhost:11434"
)
self.model = model or os.getenv("OLLAMA_MODEL", "llama3.2")
self.timeout = timeout or settings.request_timeout
self.temperature = temperature if temperature is not None else settings.temperature
self.temperature = (
temperature if temperature is not None else settings.temperature
)
if not self.base_url:
raise LLMConfigurationError(
@@ -75,7 +66,7 @@ class OllamaClient:
logger.info(f"Ollama client initialized with model: {self.model}")
def complete(self, messages: List[Dict[str, Any]]) -> str:
def complete(self, messages: list[dict[str, Any]]) -> str:
"""
Generate a completion from the LLM.
@@ -97,7 +88,9 @@ class OllamaClient:
if not isinstance(msg, dict):
raise ValueError(f"Each message must be a dict, got {type(msg)}")
if "role" not in msg or "content" not in msg:
raise ValueError(f"Each message must have 'role' and 'content' keys, got {msg.keys()}")
raise ValueError(
f"Each message must have 'role' and 'content' keys, got {msg.keys()}"
)
if msg["role"] not in ("system", "user", "assistant"):
raise ValueError(f"Invalid role: {msg['role']}")
@@ -108,16 +101,12 @@ class OllamaClient:
"stream": False,
"options": {
"temperature": self.temperature,
}
},
}
try:
logger.debug(f"Sending request to {url} with {len(messages)} messages")
response = requests.post(
url,
json=payload,
timeout=self.timeout
)
response = requests.post(url, json=payload, timeout=self.timeout)
response.raise_for_status()
data = response.json()
@@ -156,7 +145,7 @@ class OllamaClient:
logger.error(f"Failed to parse API response: {e}")
raise LLMAPIError(f"Invalid API response format: {e}") from e
def list_models(self) -> List[str]:
def list_models(self) -> list[str]:
"""
List available models in Ollama.