Unfucked gemini's mess

2025-12-07 03:27:45 +01:00
parent 5b71233fb0
commit a923a760ef
24 changed files with 1885 additions and 1282 deletions
--- a/tests/test_agent.py
+++ b/tests/test_agent.py
@@ -32,109 +32,33 @@ class TestAgentInit:
            "set_path_for_folder",
            "list_folder",
            "find_media_imdb_id",
-            "find_torrents",
+            "find_torrent",
            "add_torrent_by_index",
            "add_torrent_to_qbittorrent",
            "get_torrent_by_index",
+            "set_language",
        ]

        for tool_name in expected_tools:
            assert tool_name in agent.tools


-class TestParseIntent:
-    """Tests for _parse_intent method."""
-
-    def test_parse_valid_json(self, memory, mock_llm):
-        """Should parse valid tool call JSON."""
-        agent = Agent(llm=mock_llm)
-
-        text = '{"thought": "test", "action": {"name": "find_torrents", "args": {"media_title": "Inception"}}}'
-        intent = agent._parse_intent(text)
-
-        assert intent is not None
-        assert intent["action"]["name"] == "find_torrents"
-        assert intent["action"]["args"]["media_title"] == "Inception"
-
-    def test_parse_json_with_surrounding_text(self, memory, mock_llm):
-        """Should extract JSON from surrounding text."""
-        agent = Agent(llm=mock_llm)
-
-        text = 'Let me search for that. {"thought": "searching", "action": {"name": "find_torrents", "args": {}}} Done.'
-        intent = agent._parse_intent(text)
-
-        assert intent is not None
-        assert intent["action"]["name"] == "find_torrents"
-
-    def test_parse_plain_text(self, memory, mock_llm):
-        """Should return None for plain text."""
-        agent = Agent(llm=mock_llm)
-
-        text = "I found 3 torrents for Inception!"
-        intent = agent._parse_intent(text)
-
-        assert intent is None
-
-    def test_parse_invalid_json(self, memory, mock_llm):
-        """Should return None for invalid JSON."""
-        agent = Agent(llm=mock_llm)
-
-        text = '{"thought": "test", "action": {invalid}}'
-        intent = agent._parse_intent(text)
-
-        assert intent is None
-
-    def test_parse_json_without_action(self, memory, mock_llm):
-        """Should return None for JSON without action."""
-        agent = Agent(llm=mock_llm)
-
-        text = '{"thought": "test", "result": "something"}'
-        intent = agent._parse_intent(text)
-
-        assert intent is None
-
-    def test_parse_json_with_invalid_action(self, memory, mock_llm):
-        """Should return None for invalid action structure."""
-        agent = Agent(llm=mock_llm)
-
-        text = '{"thought": "test", "action": "not_an_object"}'
-        intent = agent._parse_intent(text)
-
-        assert intent is None
-
-    def test_parse_json_without_action_name(self, memory, mock_llm):
-        """Should return None if action has no name."""
-        agent = Agent(llm=mock_llm)
-
-        text = '{"thought": "test", "action": {"args": {}}}'
-        intent = agent._parse_intent(text)
-
-        assert intent is None
-
-    def test_parse_whitespace(self, memory, mock_llm):
-        """Should handle whitespace around JSON."""
-        agent = Agent(llm=mock_llm)
-
-        text = (
-            '   \n  {"thought": "test", "action": {"name": "test", "args": {}}}  \n  '
-        )
-        intent = agent._parse_intent(text)
-
-        assert intent is not None
-
-
-class TestExecuteAction:
-    """Tests for _execute_action method."""
+class TestExecuteToolCall:
+    """Tests for _execute_tool_call method."""

    def test_execute_known_tool(self, memory, mock_llm, real_folder):
        """Should execute known tool."""
        agent = Agent(llm=mock_llm)
        memory.ltm.set_config("download_folder", str(real_folder["downloads"]))

-        intent = {
-            "action": {"name": "list_folder", "args": {"folder_type": "download"}}
+        tool_call = {
+            "id": "call_123",
+            "function": {
+                "name": "list_folder",
+                "arguments": '{"folder_type": "download"}'
+            }
        }
-        result = agent._execute_action(intent)
+        result = agent._execute_tool_call(tool_call)

        assert result["status"] == "ok"

@@ -142,8 +66,14 @@ class TestExecuteAction:
        """Should return error for unknown tool."""
        agent = Agent(llm=mock_llm)

-        intent = {"action": {"name": "unknown_tool", "args": {}}}
-        result = agent._execute_action(intent)
+        tool_call = {
+            "id": "call_123",
+            "function": {
+                "name": "unknown_tool",
+                "arguments": '{}'
+            }
+        }
+        result = agent._execute_tool_call(tool_call)

        assert result["error"] == "unknown_tool"
        assert "available_tools" in result
@@ -152,9 +82,14 @@ class TestExecuteAction:
        """Should return error for bad arguments."""
        agent = Agent(llm=mock_llm)

-        # Missing required argument
-        intent = {"action": {"name": "set_path_for_folder", "args": {}}}
-        result = agent._execute_action(intent)
+        tool_call = {
+            "id": "call_123",
+            "function": {
+                "name": "set_path_for_folder",
+                "arguments": '{}'
+            }
+        }
+        result = agent._execute_tool_call(tool_call)

        assert result["error"] == "bad_args"

@@ -162,24 +97,33 @@ class TestExecuteAction:
        """Should track errors in episodic memory."""
        agent = Agent(llm=mock_llm)

-        intent = {
-            "action": {"name": "list_folder", "args": {"folder_type": "download"}}
+        # Use invalid arguments to trigger a TypeError
+        tool_call = {
+            "id": "call_123",
+            "function": {
+                "name": "set_path_for_folder",
+                "arguments": '{"folder_name": 123}'  # Wrong type
+            }
        }
-        result = agent._execute_action(intent)  # Will fail - folder not configured
+        result = agent._execute_tool_call(tool_call)

        mem = get_memory()
        assert len(mem.episodic.recent_errors) > 0

-    def test_execute_with_none_args(self, memory, mock_llm, real_folder):
-        """Should handle None args."""
+    def test_execute_with_invalid_json(self, memory, mock_llm):
+        """Should handle invalid JSON arguments."""
        agent = Agent(llm=mock_llm)
-        memory.ltm.set_config("download_folder", str(real_folder["downloads"]))

-        intent = {"action": {"name": "list_folder", "args": None}}
-        result = agent._execute_action(intent)
+        tool_call = {
+            "id": "call_123",
+            "function": {
+                "name": "list_folder",
+                "arguments": '{invalid json}'
+            }
+        }
+        result = agent._execute_tool_call(tool_call)

-        # Should fail gracefully with bad_args, not crash
-        assert "error" in result
+        assert result["error"] == "bad_args"


 class TestStep:
@@ -187,16 +131,14 @@ class TestStep:

    def test_step_text_response(self, memory, mock_llm):
        """Should return text response when no tool call."""
-        mock_llm.complete.return_value = "Hello! How can I help you?"
        agent = Agent(llm=mock_llm)

        response = agent.step("Hello")

-        assert response == "Hello! How can I help you?"
+        assert response == "I found what you're looking for!"

    def test_step_saves_to_history(self, memory, mock_llm):
        """Should save conversation to STM history."""
-        mock_llm.complete.return_value = "Hello!"
        agent = Agent(llm=mock_llm)

        agent.step("Hi there")
@@ -208,72 +150,84 @@ class TestStep:
        assert history[0]["content"] == "Hi there"
        assert history[1]["role"] == "assistant"

-    def test_step_with_tool_call(self, memory, mock_llm, real_folder):
+    def test_step_with_tool_call(self, memory, mock_llm_with_tool_call, real_folder):
        """Should execute tool and continue."""
        memory.ltm.set_config("download_folder", str(real_folder["downloads"]))

-        mock_llm.complete.side_effect = [
-            '{"thought": "listing", "action": {"name": "list_folder", "args": {"folder_type": "download"}}}',
-            "I found 2 items in your download folder.",
-        ]
-        agent = Agent(llm=mock_llm)
+        agent = Agent(llm=mock_llm_with_tool_call)

        response = agent.step("List my downloads")

-        assert "2 items" in response or "found" in response.lower()
-        assert mock_llm.complete.call_count == 2
+        assert "found" in response.lower() or "torrent" in response.lower()
+        assert mock_llm_with_tool_call.complete.call_count == 2
+        
+        # CRITICAL: Verify tools were passed to LLM
+        first_call_args = mock_llm_with_tool_call.complete.call_args_list[0]
+        assert first_call_args[1]['tools'] is not None, "Tools not passed to LLM!"
+        assert len(first_call_args[1]['tools']) > 0, "Tools list is empty!"

    def test_step_max_iterations(self, memory, mock_llm):
        """Should stop after max iterations."""
-        # Always return tool call
-        mock_llm.complete.return_value = '{"thought": "loop", "action": {"name": "list_folder", "args": {"folder_type": "download"}}}'
+        call_count = [0]
+        
+        def mock_complete(messages, tools=None):
+            call_count[0] += 1
+            # CRITICAL: Verify tools are passed (except on forced final call)
+            if call_count[0] <= 3:
+                assert tools is not None, f"Tools not passed on call {call_count[0]}!"
+            
+            if call_count[0] <= 3:
+                return {
+                    "role": "assistant",
+                    "content": None,
+                    "tool_calls": [{
+                        "id": f"call_{call_count[0]}",
+                        "function": {
+                            "name": "list_folder",
+                            "arguments": '{"folder_type": "download"}'
+                        }
+                    }]
+                }
+            else:
+                return {
+                    "role": "assistant",
+                    "content": "I couldn't complete the task."
+                }
+        
+        mock_llm.complete = Mock(side_effect=mock_complete)
        agent = Agent(llm=mock_llm, max_tool_iterations=3)

-        # Mock the final response after max iterations
-        def side_effect(messages):
-            if "final response" in str(messages[-1].get("content", "")).lower():
-                return "I couldn't complete the task."
-            return '{"thought": "loop", "action": {"name": "list_folder", "args": {"folder_type": "download"}}}'
-
-        mock_llm.complete.side_effect = side_effect
-
        response = agent.step("Do something")

-        # Should have called LLM max_iterations + 1 times (for final response)
-        assert mock_llm.complete.call_count == 4
+        assert call_count[0] == 4

    def test_step_includes_history(self, memory_with_history, mock_llm):
        """Should include conversation history in prompt."""
-        mock_llm.complete.return_value = "Response"
        agent = Agent(llm=mock_llm)

        agent.step("New message")

-        # Check that history was included in the call
        call_args = mock_llm.complete.call_args[0][0]
        messages_content = [m.get("content", "") for m in call_args]
-        assert any("Hello" in c for c in messages_content)
+        assert any("Hello" in str(c) for c in messages_content)

    def test_step_includes_events(self, memory, mock_llm):
        """Should include unread events in prompt."""
        memory.episodic.add_background_event("download_complete", {"name": "Movie.mkv"})
-        mock_llm.complete.return_value = "Response"
        agent = Agent(llm=mock_llm)

        agent.step("What's new?")

        call_args = mock_llm.complete.call_args[0][0]
        messages_content = [m.get("content", "") for m in call_args]
-        assert any("download" in c.lower() for c in messages_content)
+        assert any("download" in str(c).lower() for c in messages_content)

    def test_step_saves_ltm(self, memory, mock_llm, temp_dir):
        """Should save LTM after step."""
-        mock_llm.complete.return_value = "Response"
        agent = Agent(llm=mock_llm)

        agent.step("Hello")

-        # Check that LTM file was written
        ltm_file = temp_dir / "ltm.json"
        assert ltm_file.exists()

@@ -281,49 +235,55 @@ class TestStep:
 class TestAgentIntegration:
    """Integration tests for Agent."""

-    @patch("agent.tools.api.SearchTorrentsUseCase")
-    def test_search_and_select_workflow(self, mock_use_case_class, memory, mock_llm):
-        """Should handle search and select workflow."""
-        # Mock torrent search
-        mock_response = Mock()
-        mock_response.to_dict.return_value = {
-            "status": "ok",
-            "torrents": [
-                {"name": "Inception.1080p", "seeders": 100, "magnet": "magnet:?xt=..."},
-            ],
-            "count": 1,
-        }
-        mock_use_case = Mock()
-        mock_use_case.execute.return_value = mock_response
-        mock_use_case_class.return_value = mock_use_case
-
-        # First call: tool call, second call: response
-        mock_llm.complete.side_effect = [
-            '{"thought": "searching", "action": {"name": "find_torrents", "args": {"media_title": "Inception"}}}',
-            "I found 1 torrent for Inception!",
-        ]
-
-        agent = Agent(llm=mock_llm)
-        response = agent.step("Find Inception")
-
-        assert "found" in response.lower() or "torrent" in response.lower()
-
-        # Check that results are in episodic memory
-        mem = get_memory()
-        assert mem.episodic.last_search_results is not None
-
    def test_multiple_tool_calls(self, memory, mock_llm, real_folder):
        """Should handle multiple tool calls in sequence."""
        memory.ltm.set_config("download_folder", str(real_folder["downloads"]))
        memory.ltm.set_config("movie_folder", str(real_folder["movies"]))

-        mock_llm.complete.side_effect = [
-            '{"thought": "list downloads", "action": {"name": "list_folder", "args": {"folder_type": "download"}}}',
-            '{"thought": "list movies", "action": {"name": "list_folder", "args": {"folder_type": "movie"}}}',
-            "I listed both folders for you.",
-        ]
-
+        call_count = [0]
+        
+        def mock_complete(messages, tools=None):
+            call_count[0] += 1
+            # CRITICAL: Verify tools are passed on every call
+            assert tools is not None, f"Tools not passed on call {call_count[0]}!"
+            
+            if call_count[0] == 1:
+                return {
+                    "role": "assistant",
+                    "content": None,
+                    "tool_calls": [{
+                        "id": "call_1",
+                        "function": {
+                            "name": "list_folder",
+                            "arguments": '{"folder_type": "download"}'
+                        }
+                    }]
+                }
+            elif call_count[0] == 2:
+                # CRITICAL: Verify tool result was sent back
+                tool_messages = [m for m in messages if m.get('role') == 'tool']
+                assert len(tool_messages) > 0, "Tool result not sent back to LLM!"
+                
+                return {
+                    "role": "assistant",
+                    "content": None,
+                    "tool_calls": [{
+                        "id": "call_2",
+                        "function": {
+                            "name": "list_folder",
+                            "arguments": '{"folder_type": "movie"}'
+                        }
+                    }]
+                }
+            else:
+                return {
+                    "role": "assistant",
+                    "content": "I listed both folders for you."
+                }
+        
+        mock_llm.complete = Mock(side_effect=mock_complete)
        agent = Agent(llm=mock_llm)
+        
        response = agent.step("List my downloads and movies")

-        assert mock_llm.complete.call_count == 3
+        assert call_count[0] == 3