feat!: migrate to OpenAI native tool calls and fix circular deps (#fuck-gemini)

- Fix circular dependencies in agent/tools - Migrate from custom JSON to OpenAI tool calls format - Add async streaming (step_stream, complete_stream) - Simplify prompt system and remove token counting - Add 5 new API endpoints (/health, /v1/models, /api/memory/*) - Add 3 new tools (get_torrent_by_index, add_torrent_by_index, set_language) - Fix all 500 tests and add coverage config (80% threshold) - Add comprehensive docs (README, pytest guide) BREAKING: LLM interface changed, memory injection via get_memory()
2025-12-06 19:11:05 +01:00
parent 2c8cdd3ab1
commit 9ca31e45e0
92 changed files with 7897 additions and 1786 deletions
@@ -1,96 +1,219 @@
-# app.py
+"""FastAPI application for the media library agent."""
+
+import json
+import logging
+import os
 import time
 import uuid
-import json
-from typing import Any, Dict
+from typing import Any

-from fastapi import FastAPI, Request
+from fastapi import FastAPI, HTTPException
 from fastapi.responses import JSONResponse, StreamingResponse
+from pydantic import BaseModel, Field, validator

-from agent.llm.deepseek import DeepSeekClient
-from agent.llm.ollama import OllamaClient
-from infrastructure.persistence.memory import Memory
 from agent.agent import Agent
-import os
+from agent.config import settings
+from agent.llm.deepseek import DeepSeekClient
+from agent.llm.exceptions import LLMAPIError, LLMConfigurationError
+from agent.llm.ollama import OllamaClient
+from infrastructure.persistence import get_memory, init_memory
+
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+)
+logger = logging.getLogger(__name__)

 app = FastAPI(
-    title="LibreChat Agent Backend",
-    version="0.1.0",
+    title="Agent Media API",
+    description="AI agent for managing a local media library",
+    version="0.2.0",
 )

-# Choose LLM based on environment variable
+# Initialize memory context at startup
+init_memory(storage_dir="memory_data")
+logger.info("Memory context initialized")
+
+# Initialize LLM based on environment variable
 llm_provider = os.getenv("LLM_PROVIDER", "deepseek").lower()

-if llm_provider == "ollama":
-    print("🦙 Using Ollama LLM")
-    llm = OllamaClient()
-else:
-    print("🤖 Using DeepSeek LLM")
-    llm = DeepSeekClient()
+try:
+    if llm_provider == "ollama":
+        logger.info("Using Ollama LLM")
+        llm = OllamaClient()
+    else:
+        logger.info("Using DeepSeek LLM")
+        llm = DeepSeekClient()
+except LLMConfigurationError as e:
+    logger.error(f"Failed to initialize LLM: {e}")
+    raise

-memory = Memory()
-agent = Agent(llm=llm, memory=memory)
+# Initialize agent
+agent = Agent(llm=llm, max_tool_iterations=settings.max_tool_iterations)
+logger.info("Agent Media API initialized")


-def extract_last_user_content(messages: list[Dict[str, Any]]) -> str:
-    last = ""
+# Pydantic models for request validation
+class ChatMessage(BaseModel):
+    """A single message in the conversation."""
+
+    role: str = Field(..., description="Role of the message sender")
+    content: str | None = Field(None, description="Content of the message")
+
+    @validator("content")
+    def content_must_not_be_empty_for_user(cls, v, values):
+        """Validate that user messages have non-empty content."""
+        if values.get("role") == "user" and not v:
+            raise ValueError("User messages must have non-empty content")
+        return v
+
+
+class ChatCompletionRequest(BaseModel):
+    """Request body for chat completions."""
+
+    model: str = Field(default="agent-media", description="Model to use")
+    messages: list[ChatMessage] = Field(..., description="List of messages")
+    stream: bool = Field(default=False, description="Whether to stream the response")
+    temperature: float | None = Field(default=None, ge=0.0, le=2.0)
+    max_tokens: int | None = Field(default=None, gt=0)
+
+    @validator("messages")
+    def messages_must_have_user_message(cls, v):
+        """Validate that there is at least one user message."""
+        if not any(msg.role == "user" for msg in v):
+            raise ValueError("At least one user message is required")
+        return v
+
+
+def extract_last_user_content(messages: list[dict[str, Any]]) -> str:
+    """
+    Extract the last user message from the conversation.
+
+    Args:
+        messages: List of message dictionaries.
+
+    Returns:
+        Content of the last user message, or empty string.
+    """
    for m in reversed(messages):
        if m.get("role") == "user":
-            last = m.get("content") or ""
-            break
-    return last
+            return m.get("content") or ""
+    return ""
+
+
+@app.get("/health")
+async def health_check():
+    """Health check endpoint."""
+    return {"status": "healthy", "version": "0.2.0"}
+
+
+@app.get("/v1/models")
+async def list_models():
+    """List available models (OpenAI-compatible endpoint)."""
+    return {
+        "object": "list",
+        "data": [
+            {
+                "id": "agent-media",
+                "object": "model",
+                "created": int(time.time()),
+                "owned_by": "local",
+            }
+        ],
+    }
+
+
+@app.get("/memory/state")
+async def get_memory_state():
+    """Debug endpoint to view full memory state."""
+    memory = get_memory()
+    return memory.get_full_state()
+
+
+@app.get("/memory/episodic/search-results")
+async def get_search_results():
+    """Debug endpoint to view last search results."""
+    memory = get_memory()
+    if memory.episodic.last_search_results:
+        return {
+            "status": "ok",
+            "query": memory.episodic.last_search_results.get("query"),
+            "type": memory.episodic.last_search_results.get("type"),
+            "timestamp": memory.episodic.last_search_results.get("timestamp"),
+            "result_count": len(memory.episodic.last_search_results.get("results", [])),
+            "results": memory.episodic.last_search_results.get("results", []),
+        }
+    return {"status": "empty", "message": "No search results in episodic memory"}
+
+
+@app.post("/memory/clear-session")
+async def clear_session():
+    """Clear session memories (STM + Episodic)."""
+    memory = get_memory()
+    memory.clear_session()
+    return {"status": "ok", "message": "Session memories cleared"}


@app.post("/v1/chat/completions")
-async def chat_completions(request: Request):
-    body = await request.json()
-    model = body.get("model", "local-deepseek-agent")
-    messages = body.get("messages", [])
-    stream = body.get("stream", False)
+async def chat_completions(chat_request: ChatCompletionRequest):
+    """
+    OpenAI-compatible chat completions endpoint.

-    user_input = extract_last_user_content(messages)
-    print("Received chat completion request, stream =", stream, "input:", user_input)
+    Accepts messages and returns agent response.
+    Supports both streaming and non-streaming modes.
+    """
+    # Convert Pydantic models to dicts for processing
+    messages_dict = [msg.dict() for msg in chat_request.messages]

-    # Process user input through the agent
-    answer = agent.step(user_input)
+    user_input = extract_last_user_content(messages_dict)
+
+    logger.info(
+        f"Chat request - stream={chat_request.stream}, input_length={len(user_input)}"
+    )
+
+    try:
+        answer = agent.step(user_input)
+    except LLMAPIError as e:
+        logger.error(f"LLM API error: {e}")
+        raise HTTPException(status_code=502, detail=f"LLM API error: {e}")
+    except Exception as e:
+        logger.error(f"Agent error: {e}", exc_info=True)
+        raise HTTPException(status_code=500, detail="Internal agent error")

-    # Ensuite = même logique de réponse (non-stream ou stream)
    created_ts = int(time.time())
    completion_id = f"chatcmpl-{uuid.uuid4().hex}"

-    if not stream:
-        resp = {
-            "id": completion_id,
-            "object": "chat.completion",
-            "created": created_ts,
-            "model": model,
-            "choices": [
-                {
-                    "index": 0,
-                    "finish_reason": "stop",
-                    "message": {
-                        "role": "assistant",
-                        "content": answer or "",
-                    },
-                }
-            ],
-            "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
-        }
-        return JSONResponse(resp)
+    if not chat_request.stream:
+        return JSONResponse(
+            {
+                "id": completion_id,
+                "object": "chat.completion",
+                "created": created_ts,
+                "model": chat_request.model,
+                "choices": [
+                    {
+                        "index": 0,
+                        "finish_reason": "stop",
+                        "message": {"role": "assistant", "content": answer or ""},
+                    }
+                ],
+                "usage": {
+                    "prompt_tokens": 0,
+                    "completion_tokens": 0,
+                    "total_tokens": 0,
+                },
+            }
+        )

    async def event_generator():
        chunk = {
            "id": completion_id,
            "object": "chat.completion.chunk",
            "created": created_ts,
-            "model": model,
+            "model": chat_request.model,
            "choices": [
                {
                    "index": 0,
-                    "delta": {
-                        "role": "assistant",
-                        "content": answer or "",
-                    },
+                    "delta": {"role": "assistant", "content": answer or ""},
                    "finish_reason": "stop",
                }
            ],