2c8cdd3ab1
Working but need to check out code
102 lines
2.8 KiB
Python
102 lines
2.8 KiB
Python
# app.py
|
|
import time
|
|
import uuid
|
|
import json
|
|
from typing import Any, Dict
|
|
|
|
from fastapi import FastAPI, Request
|
|
from fastapi.responses import JSONResponse, StreamingResponse
|
|
|
|
from agent.llm.deepseek import DeepSeekClient
|
|
from agent.llm.ollama import OllamaClient
|
|
from infrastructure.persistence.memory import Memory
|
|
from agent.agent import Agent
|
|
import os
|
|
|
|
app = FastAPI(
|
|
title="LibreChat Agent Backend",
|
|
version="0.1.0",
|
|
)
|
|
|
|
# Choose LLM based on environment variable
|
|
llm_provider = os.getenv("LLM_PROVIDER", "deepseek").lower()
|
|
|
|
if llm_provider == "ollama":
|
|
print("🦙 Using Ollama LLM")
|
|
llm = OllamaClient()
|
|
else:
|
|
print("🤖 Using DeepSeek LLM")
|
|
llm = DeepSeekClient()
|
|
|
|
memory = Memory()
|
|
agent = Agent(llm=llm, memory=memory)
|
|
|
|
|
|
def extract_last_user_content(messages: list[Dict[str, Any]]) -> str:
|
|
last = ""
|
|
for m in reversed(messages):
|
|
if m.get("role") == "user":
|
|
last = m.get("content") or ""
|
|
break
|
|
return last
|
|
|
|
|
|
@app.post("/v1/chat/completions")
|
|
async def chat_completions(request: Request):
|
|
body = await request.json()
|
|
model = body.get("model", "local-deepseek-agent")
|
|
messages = body.get("messages", [])
|
|
stream = body.get("stream", False)
|
|
|
|
user_input = extract_last_user_content(messages)
|
|
print("Received chat completion request, stream =", stream, "input:", user_input)
|
|
|
|
# Process user input through the agent
|
|
answer = agent.step(user_input)
|
|
|
|
# Ensuite = même logique de réponse (non-stream ou stream)
|
|
created_ts = int(time.time())
|
|
completion_id = f"chatcmpl-{uuid.uuid4().hex}"
|
|
|
|
if not stream:
|
|
resp = {
|
|
"id": completion_id,
|
|
"object": "chat.completion",
|
|
"created": created_ts,
|
|
"model": model,
|
|
"choices": [
|
|
{
|
|
"index": 0,
|
|
"finish_reason": "stop",
|
|
"message": {
|
|
"role": "assistant",
|
|
"content": answer or "",
|
|
},
|
|
}
|
|
],
|
|
"usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
|
|
}
|
|
return JSONResponse(resp)
|
|
|
|
async def event_generator():
|
|
chunk = {
|
|
"id": completion_id,
|
|
"object": "chat.completion.chunk",
|
|
"created": created_ts,
|
|
"model": model,
|
|
"choices": [
|
|
{
|
|
"index": 0,
|
|
"delta": {
|
|
"role": "assistant",
|
|
"content": answer or "",
|
|
},
|
|
"finish_reason": "stop",
|
|
}
|
|
],
|
|
}
|
|
yield f"data: {json.dumps(chunk, ensure_ascii=False)}\n\n"
|
|
yield "data: [DONE]\n\n"
|
|
|
|
return StreamingResponse(event_generator(), media_type="text/event-stream")
|