"""Main agent for media library management.""" import json import logging from collections.abc import AsyncGenerator from typing import Any from infrastructure.persistence import get_memory from .config import settings from .prompts import PromptBuilder from .registry import Tool, make_tools logger = logging.getLogger(__name__) class Agent: """ AI agent for media library management. Uses OpenAI-compatible tool calling API. """ def __init__(self, llm, max_tool_iterations: int = 5): """ Initialize the agent. Args: llm: LLM client with complete() method max_tool_iterations: Maximum number of tool execution iterations """ self.llm = llm self.tools: dict[str, Tool] = make_tools() self.prompt_builder = PromptBuilder(self.tools) self.max_tool_iterations = max_tool_iterations def step(self, user_input: str) -> str: """ Execute one agent step with the user input. This method: 1. Adds user message to memory 2. Builds prompt with history and context 3. Calls LLM, executing tools as needed 4. Returns final response Args: user_input: User's message Returns: Agent's final response """ memory = get_memory() # Add user message to history memory.stm.add_message("user", user_input) memory.save() # Build initial messages system_prompt = self.prompt_builder.build_system_prompt() messages: list[dict[str, Any]] = [{"role": "system", "content": system_prompt}] # Add conversation history history = memory.stm.get_recent_history(settings.max_history_messages) messages.extend(history) # Add unread events if any unread_events = memory.episodic.get_unread_events() if unread_events: events_text = "\n".join( [f"- {e['type']}: {e['data']}" for e in unread_events] ) messages.append( {"role": "system", "content": f"Background events:\n{events_text}"} ) # Get tools specification for OpenAI format tools_spec = self.prompt_builder.build_tools_spec() # Tool execution loop for _iteration in range(self.max_tool_iterations): # Call LLM with tools llm_result = self.llm.complete(messages, tools=tools_spec) # Handle both tuple (response, usage) and dict response if isinstance(llm_result, tuple): response_message, usage = llm_result else: response_message = llm_result # Check if there are tool calls tool_calls = response_message.get("tool_calls") if not tool_calls: # No tool calls, this is the final response final_content = response_message.get("content", "") memory.stm.add_message("assistant", final_content) memory.save() return final_content # Add assistant message with tool calls to conversation messages.append(response_message) # Execute each tool call for tool_call in tool_calls: tool_result = self._execute_tool_call(tool_call) # Add tool result to messages messages.append( { "tool_call_id": tool_call.get("id"), "role": "tool", "name": tool_call.get("function", {}).get("name"), "content": json.dumps(tool_result, ensure_ascii=False), } ) # Max iterations reached, force final response messages.append( { "role": "system", "content": "Please provide a final response to the user without using any more tools.", } ) llm_result = self.llm.complete(messages) if isinstance(llm_result, tuple): final_message, usage = llm_result else: final_message = llm_result final_response = final_message.get( "content", "I've completed the requested actions." ) memory.stm.add_message("assistant", final_response) memory.save() return final_response def _execute_tool_call(self, tool_call: dict[str, Any]) -> dict[str, Any]: """ Execute a single tool call. Args: tool_call: OpenAI-format tool call dict Returns: Result dictionary """ function = tool_call.get("function", {}) tool_name = function.get("name", "") try: args_str = function.get("arguments", "{}") args = json.loads(args_str) except json.JSONDecodeError as e: logger.error(f"Failed to parse tool arguments: {e}") return {"error": "bad_args", "message": f"Invalid JSON arguments: {e}"} # Validate tool exists if tool_name not in self.tools: available = list(self.tools.keys()) return { "error": "unknown_tool", "message": f"Tool '{tool_name}' not found", "available_tools": available, } tool = self.tools[tool_name] # Execute tool try: result = tool.func(**args) return result except KeyboardInterrupt: # Don't catch KeyboardInterrupt - let it propagate raise except TypeError as e: # Bad arguments memory = get_memory() memory.episodic.add_error(tool_name, f"bad_args: {e}") return {"error": "bad_args", "message": str(e), "tool": tool_name} except Exception as e: # Other errors memory = get_memory() memory.episodic.add_error(tool_name, str(e)) return {"error": "execution_failed", "message": str(e), "tool": tool_name} async def step_streaming( self, user_input: str, completion_id: str, created_ts: int, model: str ) -> AsyncGenerator[dict[str, Any], None]: """ Execute agent step with streaming support for LibreChat. Yields SSE chunks for tool calls and final response. Args: user_input: User's message completion_id: Completion ID for the response created_ts: Timestamp for the response model: Model name Yields: SSE chunks in OpenAI format """ memory = get_memory() # Add user message to history memory.stm.add_message("user", user_input) memory.save() # Build initial messages system_prompt = self.prompt_builder.build_system_prompt() messages: list[dict[str, Any]] = [{"role": "system", "content": system_prompt}] # Add conversation history history = memory.stm.get_recent_history(settings.max_history_messages) messages.extend(history) # Add unread events if any unread_events = memory.episodic.get_unread_events() if unread_events: events_text = "\n".join( [f"- {e['type']}: {e['data']}" for e in unread_events] ) messages.append( {"role": "system", "content": f"Background events:\n{events_text}"} ) # Get tools specification for OpenAI format tools_spec = self.prompt_builder.build_tools_spec() # Tool execution loop for _iteration in range(self.max_tool_iterations): # Call LLM with tools llm_result = self.llm.complete(messages, tools=tools_spec) # Handle both tuple (response, usage) and dict response if isinstance(llm_result, tuple): response_message, usage = llm_result else: response_message = llm_result # Check if there are tool calls tool_calls = response_message.get("tool_calls") if not tool_calls: # No tool calls, this is the final response final_content = response_message.get("content", "") memory.stm.add_message("assistant", final_content) memory.save() # Stream the final response yield { "id": completion_id, "object": "chat.completion.chunk", "created": created_ts, "model": model, "choices": [ { "index": 0, "delta": {"role": "assistant", "content": final_content}, "finish_reason": "stop", } ], } return # Stream tool calls for tool_call in tool_calls: function = tool_call.get("function", {}) tool_name = function.get("name", "") tool_args = function.get("arguments", "{}") # Yield chunk indicating tool call yield { "id": completion_id, "object": "chat.completion.chunk", "created": created_ts, "model": model, "choices": [ { "index": 0, "delta": { "tool_calls": [ { "index": 0, "id": tool_call.get("id"), "type": "function", "function": { "name": tool_name, "arguments": tool_args, }, } ] }, "finish_reason": None, } ], } # Add assistant message with tool calls to conversation messages.append(response_message) # Execute each tool call and stream results for tool_call in tool_calls: tool_result = self._execute_tool_call(tool_call) function = tool_call.get("function", {}) tool_name = function.get("name", "") # Add tool result to messages messages.append( { "tool_call_id": tool_call.get("id"), "role": "tool", "name": tool_name, "content": json.dumps(tool_result, ensure_ascii=False), } ) # Stream tool result as content result_text = ( f"\n🔧 {tool_name}: {json.dumps(tool_result, ensure_ascii=False)}\n" ) yield { "id": completion_id, "object": "chat.completion.chunk", "created": created_ts, "model": model, "choices": [ { "index": 0, "delta": {"content": result_text}, "finish_reason": None, } ], } # Max iterations reached, force final response messages.append( { "role": "system", "content": "Please provide a final response to the user without using any more tools.", } ) llm_result = self.llm.complete(messages) if isinstance(llm_result, tuple): final_message, usage = llm_result else: final_message = llm_result final_response = final_message.get( "content", "I've completed the requested actions." ) memory.stm.add_message("assistant", final_response) memory.save() # Stream final response yield { "id": completion_id, "object": "chat.completion.chunk", "created": created_ts, "model": model, "choices": [ { "index": 0, "delta": {"content": final_response}, "finish_reason": "stop", } ], }