feat!: migrate to OpenAI native tool calls and fix circular deps (#fuck-gemini)

- Fix circular dependencies in agent/tools
- Migrate from custom JSON to OpenAI tool calls format
- Add async streaming (step_stream, complete_stream)
- Simplify prompt system and remove token counting
- Add 5 new API endpoints (/health, /v1/models, /api/memory/*)
- Add 3 new tools (get_torrent_by_index, add_torrent_by_index, set_language)
- Fix all 500 tests and add coverage config (80% threshold)
- Add comprehensive docs (README, pytest guide)

BREAKING: LLM interface changed, memory injection via get_memory()
This commit is contained in:
2025-12-06 19:11:05 +01:00
parent 2c8cdd3ab1
commit 9ca31e45e0
92 changed files with 7897 additions and 1786 deletions
+181 -58
View File
@@ -1,96 +1,219 @@
# app.py
"""FastAPI application for the media library agent."""
import json
import logging
import os
import time
import uuid
import json
from typing import Any, Dict
from typing import Any
from fastapi import FastAPI, Request
from fastapi import FastAPI, HTTPException
from fastapi.responses import JSONResponse, StreamingResponse
from pydantic import BaseModel, Field, validator
from agent.llm.deepseek import DeepSeekClient
from agent.llm.ollama import OllamaClient
from infrastructure.persistence.memory import Memory
from agent.agent import Agent
import os
from agent.config import settings
from agent.llm.deepseek import DeepSeekClient
from agent.llm.exceptions import LLMAPIError, LLMConfigurationError
from agent.llm.ollama import OllamaClient
from infrastructure.persistence import get_memory, init_memory
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)
app = FastAPI(
title="LibreChat Agent Backend",
version="0.1.0",
title="Agent Media API",
description="AI agent for managing a local media library",
version="0.2.0",
)
# Choose LLM based on environment variable
# Initialize memory context at startup
init_memory(storage_dir="memory_data")
logger.info("Memory context initialized")
# Initialize LLM based on environment variable
llm_provider = os.getenv("LLM_PROVIDER", "deepseek").lower()
if llm_provider == "ollama":
print("🦙 Using Ollama LLM")
llm = OllamaClient()
else:
print("🤖 Using DeepSeek LLM")
llm = DeepSeekClient()
try:
if llm_provider == "ollama":
logger.info("Using Ollama LLM")
llm = OllamaClient()
else:
logger.info("Using DeepSeek LLM")
llm = DeepSeekClient()
except LLMConfigurationError as e:
logger.error(f"Failed to initialize LLM: {e}")
raise
memory = Memory()
agent = Agent(llm=llm, memory=memory)
# Initialize agent
agent = Agent(llm=llm, max_tool_iterations=settings.max_tool_iterations)
logger.info("Agent Media API initialized")
def extract_last_user_content(messages: list[Dict[str, Any]]) -> str:
last = ""
# Pydantic models for request validation
class ChatMessage(BaseModel):
"""A single message in the conversation."""
role: str = Field(..., description="Role of the message sender")
content: str | None = Field(None, description="Content of the message")
@validator("content")
def content_must_not_be_empty_for_user(cls, v, values):
"""Validate that user messages have non-empty content."""
if values.get("role") == "user" and not v:
raise ValueError("User messages must have non-empty content")
return v
class ChatCompletionRequest(BaseModel):
"""Request body for chat completions."""
model: str = Field(default="agent-media", description="Model to use")
messages: list[ChatMessage] = Field(..., description="List of messages")
stream: bool = Field(default=False, description="Whether to stream the response")
temperature: float | None = Field(default=None, ge=0.0, le=2.0)
max_tokens: int | None = Field(default=None, gt=0)
@validator("messages")
def messages_must_have_user_message(cls, v):
"""Validate that there is at least one user message."""
if not any(msg.role == "user" for msg in v):
raise ValueError("At least one user message is required")
return v
def extract_last_user_content(messages: list[dict[str, Any]]) -> str:
"""
Extract the last user message from the conversation.
Args:
messages: List of message dictionaries.
Returns:
Content of the last user message, or empty string.
"""
for m in reversed(messages):
if m.get("role") == "user":
last = m.get("content") or ""
break
return last
return m.get("content") or ""
return ""
@app.get("/health")
async def health_check():
"""Health check endpoint."""
return {"status": "healthy", "version": "0.2.0"}
@app.get("/v1/models")
async def list_models():
"""List available models (OpenAI-compatible endpoint)."""
return {
"object": "list",
"data": [
{
"id": "agent-media",
"object": "model",
"created": int(time.time()),
"owned_by": "local",
}
],
}
@app.get("/memory/state")
async def get_memory_state():
"""Debug endpoint to view full memory state."""
memory = get_memory()
return memory.get_full_state()
@app.get("/memory/episodic/search-results")
async def get_search_results():
"""Debug endpoint to view last search results."""
memory = get_memory()
if memory.episodic.last_search_results:
return {
"status": "ok",
"query": memory.episodic.last_search_results.get("query"),
"type": memory.episodic.last_search_results.get("type"),
"timestamp": memory.episodic.last_search_results.get("timestamp"),
"result_count": len(memory.episodic.last_search_results.get("results", [])),
"results": memory.episodic.last_search_results.get("results", []),
}
return {"status": "empty", "message": "No search results in episodic memory"}
@app.post("/memory/clear-session")
async def clear_session():
"""Clear session memories (STM + Episodic)."""
memory = get_memory()
memory.clear_session()
return {"status": "ok", "message": "Session memories cleared"}
@app.post("/v1/chat/completions")
async def chat_completions(request: Request):
body = await request.json()
model = body.get("model", "local-deepseek-agent")
messages = body.get("messages", [])
stream = body.get("stream", False)
async def chat_completions(chat_request: ChatCompletionRequest):
"""
OpenAI-compatible chat completions endpoint.
user_input = extract_last_user_content(messages)
print("Received chat completion request, stream =", stream, "input:", user_input)
Accepts messages and returns agent response.
Supports both streaming and non-streaming modes.
"""
# Convert Pydantic models to dicts for processing
messages_dict = [msg.dict() for msg in chat_request.messages]
# Process user input through the agent
answer = agent.step(user_input)
user_input = extract_last_user_content(messages_dict)
logger.info(
f"Chat request - stream={chat_request.stream}, input_length={len(user_input)}"
)
try:
answer = agent.step(user_input)
except LLMAPIError as e:
logger.error(f"LLM API error: {e}")
raise HTTPException(status_code=502, detail=f"LLM API error: {e}")
except Exception as e:
logger.error(f"Agent error: {e}", exc_info=True)
raise HTTPException(status_code=500, detail="Internal agent error")
# Ensuite = même logique de réponse (non-stream ou stream)
created_ts = int(time.time())
completion_id = f"chatcmpl-{uuid.uuid4().hex}"
if not stream:
resp = {
"id": completion_id,
"object": "chat.completion",
"created": created_ts,
"model": model,
"choices": [
{
"index": 0,
"finish_reason": "stop",
"message": {
"role": "assistant",
"content": answer or "",
},
}
],
"usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
}
return JSONResponse(resp)
if not chat_request.stream:
return JSONResponse(
{
"id": completion_id,
"object": "chat.completion",
"created": created_ts,
"model": chat_request.model,
"choices": [
{
"index": 0,
"finish_reason": "stop",
"message": {"role": "assistant", "content": answer or ""},
}
],
"usage": {
"prompt_tokens": 0,
"completion_tokens": 0,
"total_tokens": 0,
},
}
)
async def event_generator():
chunk = {
"id": completion_id,
"object": "chat.completion.chunk",
"created": created_ts,
"model": model,
"model": chat_request.model,
"choices": [
{
"index": 0,
"delta": {
"role": "assistant",
"content": answer or "",
},
"delta": {"role": "assistant", "content": answer or ""},
"finish_reason": "stop",
}
],