alfred/brain/agent/agent.py

"""Main agent for media library management."""

import json
import logging
from collections.abc import AsyncGenerator
from typing import Any

from infrastructure.persistence import get_memory

from .config import settings
from .prompts import PromptBuilder
from .registry import Tool, make_tools

logger = logging.getLogger(__name__)


class Agent:
    """
    AI agent for media library management.

    Uses OpenAI-compatible tool calling API.
    """

    def __init__(self, llm, max_tool_iterations: int = 5):
        """
        Initialize the agent.

        Args:
            llm: LLM client with complete() method
            max_tool_iterations: Maximum number of tool execution iterations
        """
        self.llm = llm
        self.tools: dict[str, Tool] = make_tools()
        self.prompt_builder = PromptBuilder(self.tools)
        self.max_tool_iterations = max_tool_iterations

    def step(self, user_input: str) -> str:
        """
        Execute one agent step with the user input.

        This method:
        1. Adds user message to memory
        2. Builds prompt with history and context
        3. Calls LLM, executing tools as needed
        4. Returns final response

        Args:
            user_input: User's message

        Returns:
            Agent's final response
        """
        memory = get_memory()

        # Add user message to history
        memory.stm.add_message("user", user_input)
        memory.save()

        # Build initial messages
        system_prompt = self.prompt_builder.build_system_prompt()
        messages: list[dict[str, Any]] = [{"role": "system", "content": system_prompt}]

        # Add conversation history
        history = memory.stm.get_recent_history(settings.max_history_messages)
        messages.extend(history)

        # Add unread events if any
        unread_events = memory.episodic.get_unread_events()
        if unread_events:
            events_text = "\n".join(
                [f"- {e['type']}: {e['data']}" for e in unread_events]
            )
            messages.append(
                {"role": "system", "content": f"Background events:\n{events_text}"}
            )

        # Get tools specification for OpenAI format
        tools_spec = self.prompt_builder.build_tools_spec()

        # Tool execution loop
        for _iteration in range(self.max_tool_iterations):
            # Call LLM with tools
            llm_result = self.llm.complete(messages, tools=tools_spec)

            # Handle both tuple (response, usage) and dict response
            if isinstance(llm_result, tuple):
                response_message, usage = llm_result
            else:
                response_message = llm_result

            # Check if there are tool calls
            tool_calls = response_message.get("tool_calls")

            if not tool_calls:
                # No tool calls, this is the final response
                final_content = response_message.get("content", "")
                memory.stm.add_message("assistant", final_content)
                memory.save()
                return final_content

            # Add assistant message with tool calls to conversation
            messages.append(response_message)

            # Execute each tool call
            for tool_call in tool_calls:
                tool_result = self._execute_tool_call(tool_call)

                # Add tool result to messages
                messages.append(
                    {
                        "tool_call_id": tool_call.get("id"),
                        "role": "tool",
                        "name": tool_call.get("function", {}).get("name"),
                        "content": json.dumps(tool_result, ensure_ascii=False),
                    }
                )

        # Max iterations reached, force final response
        messages.append(
            {
                "role": "system",
                "content": "Please provide a final response to the user without using any more tools.",
            }
        )

        llm_result = self.llm.complete(messages)
        if isinstance(llm_result, tuple):
            final_message, usage = llm_result
        else:
            final_message = llm_result

        final_response = final_message.get(
            "content", "I've completed the requested actions."
        )
        memory.stm.add_message("assistant", final_response)
        memory.save()
        return final_response

    def _execute_tool_call(self, tool_call: dict[str, Any]) -> dict[str, Any]:
        """
        Execute a single tool call.

        Args:
            tool_call: OpenAI-format tool call dict

        Returns:
            Result dictionary
        """
        function = tool_call.get("function", {})
        tool_name = function.get("name", "")

        try:
            args_str = function.get("arguments", "{}")
            args = json.loads(args_str)
        except json.JSONDecodeError as e:
            logger.error(f"Failed to parse tool arguments: {e}")
            return {"error": "bad_args", "message": f"Invalid JSON arguments: {e}"}

        # Validate tool exists
        if tool_name not in self.tools:
            available = list(self.tools.keys())
            return {
                "error": "unknown_tool",
                "message": f"Tool '{tool_name}' not found",
                "available_tools": available,
            }

        tool = self.tools[tool_name]

        # Execute tool
        try:
            result = tool.func(**args)
            return result
        except KeyboardInterrupt:
            # Don't catch KeyboardInterrupt - let it propagate
            raise
        except TypeError as e:
            # Bad arguments
            memory = get_memory()
            memory.episodic.add_error(tool_name, f"bad_args: {e}")
            return {"error": "bad_args", "message": str(e), "tool": tool_name}
        except Exception as e:
            # Other errors
            memory = get_memory()
            memory.episodic.add_error(tool_name, str(e))
            return {"error": "execution_failed", "message": str(e), "tool": tool_name}

    async def step_streaming(
        self, user_input: str, completion_id: str, created_ts: int, model: str
    ) -> AsyncGenerator[dict[str, Any], None]:
        """
        Execute agent step with streaming support for LibreChat.

        Yields SSE chunks for tool calls and final response.

        Args:
            user_input: User's message
            completion_id: Completion ID for the response
            created_ts: Timestamp for the response
            model: Model name

        Yields:
            SSE chunks in OpenAI format
        """
        memory = get_memory()

        # Add user message to history
        memory.stm.add_message("user", user_input)
        memory.save()

        # Build initial messages
        system_prompt = self.prompt_builder.build_system_prompt()
        messages: list[dict[str, Any]] = [{"role": "system", "content": system_prompt}]

        # Add conversation history
        history = memory.stm.get_recent_history(settings.max_history_messages)
        messages.extend(history)

        # Add unread events if any
        unread_events = memory.episodic.get_unread_events()
        if unread_events:
            events_text = "\n".join(
                [f"- {e['type']}: {e['data']}" for e in unread_events]
            )
            messages.append(
                {"role": "system", "content": f"Background events:\n{events_text}"}
            )

        # Get tools specification for OpenAI format
        tools_spec = self.prompt_builder.build_tools_spec()

        # Tool execution loop
        for _iteration in range(self.max_tool_iterations):
            # Call LLM with tools
            llm_result = self.llm.complete(messages, tools=tools_spec)

            # Handle both tuple (response, usage) and dict response
            if isinstance(llm_result, tuple):
                response_message, usage = llm_result
            else:
                response_message = llm_result

            # Check if there are tool calls
            tool_calls = response_message.get("tool_calls")

            if not tool_calls:
                # No tool calls, this is the final response
                final_content = response_message.get("content", "")
                memory.stm.add_message("assistant", final_content)
                memory.save()

                # Stream the final response
                yield {
                    "id": completion_id,
                    "object": "chat.completion.chunk",
                    "created": created_ts,
                    "model": model,
                    "choices": [
                        {
                            "index": 0,
                            "delta": {"role": "assistant", "content": final_content},
                            "finish_reason": "stop",
                        }
                    ],
                }
                return

            # Stream tool calls
            for tool_call in tool_calls:
                function = tool_call.get("function", {})
                tool_name = function.get("name", "")
                tool_args = function.get("arguments", "{}")

                # Yield chunk indicating tool call
                yield {
                    "id": completion_id,
                    "object": "chat.completion.chunk",
                    "created": created_ts,
                    "model": model,
                    "choices": [
                        {
                            "index": 0,
                            "delta": {
                                "tool_calls": [
                                    {
                                        "index": 0,
                                        "id": tool_call.get("id"),
                                        "type": "function",
                                        "function": {
                                            "name": tool_name,
                                            "arguments": tool_args,
                                        },
                                    }
                                ]
                            },
                            "finish_reason": None,
                        }
                    ],
                }

            # Add assistant message with tool calls to conversation
            messages.append(response_message)

            # Execute each tool call and stream results
            for tool_call in tool_calls:
                tool_result = self._execute_tool_call(tool_call)
                function = tool_call.get("function", {})
                tool_name = function.get("name", "")

                # Add tool result to messages
                messages.append(
                    {
                        "tool_call_id": tool_call.get("id"),
                        "role": "tool",
                        "name": tool_name,
                        "content": json.dumps(tool_result, ensure_ascii=False),
                    }
                )

                # Stream tool result as content
                result_text = (
                    f"\n🔧 {tool_name}: {json.dumps(tool_result, ensure_ascii=False)}\n"
                )
                yield {
                    "id": completion_id,
                    "object": "chat.completion.chunk",
                    "created": created_ts,
                    "model": model,
                    "choices": [
                        {
                            "index": 0,
                            "delta": {"content": result_text},
                            "finish_reason": None,
                        }
                    ],
                }

        # Max iterations reached, force final response
        messages.append(
            {
                "role": "system",
                "content": "Please provide a final response to the user without using any more tools.",
            }
        )

        llm_result = self.llm.complete(messages)
        if isinstance(llm_result, tuple):
            final_message, usage = llm_result
        else:
            final_message = llm_result

        final_response = final_message.get(
            "content", "I've completed the requested actions."
        )
        memory.stm.add_message("assistant", final_response)
        memory.save()

        # Stream final response
        yield {
            "id": completion_id,
            "object": "chat.completion.chunk",
            "created": created_ts,
            "model": model,
            "choices": [
                {
                    "index": 0,
                    "delta": {"content": final_response},
                    "finish_reason": "stop",
                }
            ],
        }