97 lines
2.8 KiB
Python
97 lines
2.8 KiB
Python
# app.py
|
|
import time
|
|
import uuid
|
|
import json
|
|
from typing import Any, Dict
|
|
|
|
from fastapi import FastAPI, Request
|
|
from fastapi.responses import JSONResponse, StreamingResponse
|
|
|
|
from agent.llm import DeepSeekClient
|
|
from agent.memory import Memory
|
|
from agent.agent import Agent
|
|
from agent.commands import CommandRegistry
|
|
|
|
app = FastAPI(
|
|
title="LibreChat Agent Backend",
|
|
version="0.1.0",
|
|
)
|
|
|
|
llm = DeepSeekClient()
|
|
memory = Memory()
|
|
agent = Agent(llm=llm, memory=memory)
|
|
commands_registry = CommandRegistry(memory=memory)
|
|
|
|
|
|
def extract_last_user_content(messages: list[Dict[str, Any]]) -> str:
|
|
last = ""
|
|
for m in reversed(messages):
|
|
if m.get("role") == "user":
|
|
last = m.get("content") or ""
|
|
break
|
|
return last
|
|
|
|
|
|
@app.post("/v1/chat/completions")
|
|
async def chat_completions(request: Request):
|
|
body = await request.json()
|
|
model = body.get("model", "local-deepseek-agent")
|
|
messages = body.get("messages", [])
|
|
stream = body.get("stream", False)
|
|
|
|
user_input = extract_last_user_content(messages)
|
|
print("Received chat completion request, stream =", stream, "input:", user_input)
|
|
|
|
# 🔹 1) Si c'est une commande, on ne fait PAS intervenir le LLM
|
|
if user_input.strip().startswith("/"):
|
|
answer = commands_registry.handle(user_input)
|
|
else:
|
|
# 🔹 2) Sinon, logique agent + LLM comme avant
|
|
answer = agent.step(user_input)
|
|
|
|
# Ensuite = même logique de réponse (non-stream ou stream)
|
|
created_ts = int(time.time())
|
|
completion_id = f"chatcmpl-{uuid.uuid4().hex}"
|
|
|
|
if not stream:
|
|
resp = {
|
|
"id": completion_id,
|
|
"object": "chat.completion",
|
|
"created": created_ts,
|
|
"model": model,
|
|
"choices": [
|
|
{
|
|
"index": 0,
|
|
"finish_reason": "stop",
|
|
"message": {
|
|
"role": "assistant",
|
|
"content": answer or "",
|
|
},
|
|
}
|
|
],
|
|
"usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
|
|
}
|
|
return JSONResponse(resp)
|
|
|
|
async def event_generator():
|
|
chunk = {
|
|
"id": completion_id,
|
|
"object": "chat.completion.chunk",
|
|
"created": created_ts,
|
|
"model": model,
|
|
"choices": [
|
|
{
|
|
"index": 0,
|
|
"delta": {
|
|
"role": "assistant",
|
|
"content": answer or "",
|
|
},
|
|
"finish_reason": "stop",
|
|
}
|
|
],
|
|
}
|
|
yield f"data: {json.dumps(chunk, ensure_ascii=False)}\n\n"
|
|
yield "data: [DONE]\n\n"
|
|
|
|
return StreamingResponse(event_generator(), media_type="text/event-stream")
|