Files
alfred/app.py
Francwa 9ca31e45e0 feat!: migrate to OpenAI native tool calls and fix circular deps (#fuck-gemini)
- Fix circular dependencies in agent/tools
- Migrate from custom JSON to OpenAI tool calls format
- Add async streaming (step_stream, complete_stream)
- Simplify prompt system and remove token counting
- Add 5 new API endpoints (/health, /v1/models, /api/memory/*)
- Add 3 new tools (get_torrent_by_index, add_torrent_by_index, set_language)
- Fix all 500 tests and add coverage config (80% threshold)
- Add comprehensive docs (README, pytest guide)

BREAKING: LLM interface changed, memory injection via get_memory()
2025-12-06 19:11:05 +01:00

225 lines
7.0 KiB
Python

"""FastAPI application for the media library agent."""
import json
import logging
import os
import time
import uuid
from typing import Any
from fastapi import FastAPI, HTTPException
from fastapi.responses import JSONResponse, StreamingResponse
from pydantic import BaseModel, Field, validator
from agent.agent import Agent
from agent.config import settings
from agent.llm.deepseek import DeepSeekClient
from agent.llm.exceptions import LLMAPIError, LLMConfigurationError
from agent.llm.ollama import OllamaClient
from infrastructure.persistence import get_memory, init_memory
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)
app = FastAPI(
title="Agent Media API",
description="AI agent for managing a local media library",
version="0.2.0",
)
# Initialize memory context at startup
init_memory(storage_dir="memory_data")
logger.info("Memory context initialized")
# Initialize LLM based on environment variable
llm_provider = os.getenv("LLM_PROVIDER", "deepseek").lower()
try:
if llm_provider == "ollama":
logger.info("Using Ollama LLM")
llm = OllamaClient()
else:
logger.info("Using DeepSeek LLM")
llm = DeepSeekClient()
except LLMConfigurationError as e:
logger.error(f"Failed to initialize LLM: {e}")
raise
# Initialize agent
agent = Agent(llm=llm, max_tool_iterations=settings.max_tool_iterations)
logger.info("Agent Media API initialized")
# Pydantic models for request validation
class ChatMessage(BaseModel):
"""A single message in the conversation."""
role: str = Field(..., description="Role of the message sender")
content: str | None = Field(None, description="Content of the message")
@validator("content")
def content_must_not_be_empty_for_user(cls, v, values):
"""Validate that user messages have non-empty content."""
if values.get("role") == "user" and not v:
raise ValueError("User messages must have non-empty content")
return v
class ChatCompletionRequest(BaseModel):
"""Request body for chat completions."""
model: str = Field(default="agent-media", description="Model to use")
messages: list[ChatMessage] = Field(..., description="List of messages")
stream: bool = Field(default=False, description="Whether to stream the response")
temperature: float | None = Field(default=None, ge=0.0, le=2.0)
max_tokens: int | None = Field(default=None, gt=0)
@validator("messages")
def messages_must_have_user_message(cls, v):
"""Validate that there is at least one user message."""
if not any(msg.role == "user" for msg in v):
raise ValueError("At least one user message is required")
return v
def extract_last_user_content(messages: list[dict[str, Any]]) -> str:
"""
Extract the last user message from the conversation.
Args:
messages: List of message dictionaries.
Returns:
Content of the last user message, or empty string.
"""
for m in reversed(messages):
if m.get("role") == "user":
return m.get("content") or ""
return ""
@app.get("/health")
async def health_check():
"""Health check endpoint."""
return {"status": "healthy", "version": "0.2.0"}
@app.get("/v1/models")
async def list_models():
"""List available models (OpenAI-compatible endpoint)."""
return {
"object": "list",
"data": [
{
"id": "agent-media",
"object": "model",
"created": int(time.time()),
"owned_by": "local",
}
],
}
@app.get("/memory/state")
async def get_memory_state():
"""Debug endpoint to view full memory state."""
memory = get_memory()
return memory.get_full_state()
@app.get("/memory/episodic/search-results")
async def get_search_results():
"""Debug endpoint to view last search results."""
memory = get_memory()
if memory.episodic.last_search_results:
return {
"status": "ok",
"query": memory.episodic.last_search_results.get("query"),
"type": memory.episodic.last_search_results.get("type"),
"timestamp": memory.episodic.last_search_results.get("timestamp"),
"result_count": len(memory.episodic.last_search_results.get("results", [])),
"results": memory.episodic.last_search_results.get("results", []),
}
return {"status": "empty", "message": "No search results in episodic memory"}
@app.post("/memory/clear-session")
async def clear_session():
"""Clear session memories (STM + Episodic)."""
memory = get_memory()
memory.clear_session()
return {"status": "ok", "message": "Session memories cleared"}
@app.post("/v1/chat/completions")
async def chat_completions(chat_request: ChatCompletionRequest):
"""
OpenAI-compatible chat completions endpoint.
Accepts messages and returns agent response.
Supports both streaming and non-streaming modes.
"""
# Convert Pydantic models to dicts for processing
messages_dict = [msg.dict() for msg in chat_request.messages]
user_input = extract_last_user_content(messages_dict)
logger.info(
f"Chat request - stream={chat_request.stream}, input_length={len(user_input)}"
)
try:
answer = agent.step(user_input)
except LLMAPIError as e:
logger.error(f"LLM API error: {e}")
raise HTTPException(status_code=502, detail=f"LLM API error: {e}")
except Exception as e:
logger.error(f"Agent error: {e}", exc_info=True)
raise HTTPException(status_code=500, detail="Internal agent error")
created_ts = int(time.time())
completion_id = f"chatcmpl-{uuid.uuid4().hex}"
if not chat_request.stream:
return JSONResponse(
{
"id": completion_id,
"object": "chat.completion",
"created": created_ts,
"model": chat_request.model,
"choices": [
{
"index": 0,
"finish_reason": "stop",
"message": {"role": "assistant", "content": answer or ""},
}
],
"usage": {
"prompt_tokens": 0,
"completion_tokens": 0,
"total_tokens": 0,
},
}
)
async def event_generator():
chunk = {
"id": completion_id,
"object": "chat.completion.chunk",
"created": created_ts,
"model": chat_request.model,
"choices": [
{
"index": 0,
"delta": {"role": "assistant", "content": answer or ""},
"finish_reason": "stop",
}
],
}
yield f"data: {json.dumps(chunk, ensure_ascii=False)}\n\n"
yield "data: [DONE]\n\n"
return StreamingResponse(event_generator(), media_type="text/event-stream")