Unfucked gemini's mess

This commit is contained in:
2025-12-07 03:27:45 +01:00
parent 5b71233fb0
commit a923a760ef
24 changed files with 1885 additions and 1282 deletions

View File

@@ -32,109 +32,33 @@ class TestAgentInit:
"set_path_for_folder",
"list_folder",
"find_media_imdb_id",
"find_torrents",
"find_torrent",
"add_torrent_by_index",
"add_torrent_to_qbittorrent",
"get_torrent_by_index",
"set_language",
]
for tool_name in expected_tools:
assert tool_name in agent.tools
class TestParseIntent:
"""Tests for _parse_intent method."""
def test_parse_valid_json(self, memory, mock_llm):
"""Should parse valid tool call JSON."""
agent = Agent(llm=mock_llm)
text = '{"thought": "test", "action": {"name": "find_torrents", "args": {"media_title": "Inception"}}}'
intent = agent._parse_intent(text)
assert intent is not None
assert intent["action"]["name"] == "find_torrents"
assert intent["action"]["args"]["media_title"] == "Inception"
def test_parse_json_with_surrounding_text(self, memory, mock_llm):
"""Should extract JSON from surrounding text."""
agent = Agent(llm=mock_llm)
text = 'Let me search for that. {"thought": "searching", "action": {"name": "find_torrents", "args": {}}} Done.'
intent = agent._parse_intent(text)
assert intent is not None
assert intent["action"]["name"] == "find_torrents"
def test_parse_plain_text(self, memory, mock_llm):
"""Should return None for plain text."""
agent = Agent(llm=mock_llm)
text = "I found 3 torrents for Inception!"
intent = agent._parse_intent(text)
assert intent is None
def test_parse_invalid_json(self, memory, mock_llm):
"""Should return None for invalid JSON."""
agent = Agent(llm=mock_llm)
text = '{"thought": "test", "action": {invalid}}'
intent = agent._parse_intent(text)
assert intent is None
def test_parse_json_without_action(self, memory, mock_llm):
"""Should return None for JSON without action."""
agent = Agent(llm=mock_llm)
text = '{"thought": "test", "result": "something"}'
intent = agent._parse_intent(text)
assert intent is None
def test_parse_json_with_invalid_action(self, memory, mock_llm):
"""Should return None for invalid action structure."""
agent = Agent(llm=mock_llm)
text = '{"thought": "test", "action": "not_an_object"}'
intent = agent._parse_intent(text)
assert intent is None
def test_parse_json_without_action_name(self, memory, mock_llm):
"""Should return None if action has no name."""
agent = Agent(llm=mock_llm)
text = '{"thought": "test", "action": {"args": {}}}'
intent = agent._parse_intent(text)
assert intent is None
def test_parse_whitespace(self, memory, mock_llm):
"""Should handle whitespace around JSON."""
agent = Agent(llm=mock_llm)
text = (
' \n {"thought": "test", "action": {"name": "test", "args": {}}} \n '
)
intent = agent._parse_intent(text)
assert intent is not None
class TestExecuteAction:
"""Tests for _execute_action method."""
class TestExecuteToolCall:
"""Tests for _execute_tool_call method."""
def test_execute_known_tool(self, memory, mock_llm, real_folder):
"""Should execute known tool."""
agent = Agent(llm=mock_llm)
memory.ltm.set_config("download_folder", str(real_folder["downloads"]))
intent = {
"action": {"name": "list_folder", "args": {"folder_type": "download"}}
tool_call = {
"id": "call_123",
"function": {
"name": "list_folder",
"arguments": '{"folder_type": "download"}'
}
}
result = agent._execute_action(intent)
result = agent._execute_tool_call(tool_call)
assert result["status"] == "ok"
@@ -142,8 +66,14 @@ class TestExecuteAction:
"""Should return error for unknown tool."""
agent = Agent(llm=mock_llm)
intent = {"action": {"name": "unknown_tool", "args": {}}}
result = agent._execute_action(intent)
tool_call = {
"id": "call_123",
"function": {
"name": "unknown_tool",
"arguments": '{}'
}
}
result = agent._execute_tool_call(tool_call)
assert result["error"] == "unknown_tool"
assert "available_tools" in result
@@ -152,9 +82,14 @@ class TestExecuteAction:
"""Should return error for bad arguments."""
agent = Agent(llm=mock_llm)
# Missing required argument
intent = {"action": {"name": "set_path_for_folder", "args": {}}}
result = agent._execute_action(intent)
tool_call = {
"id": "call_123",
"function": {
"name": "set_path_for_folder",
"arguments": '{}'
}
}
result = agent._execute_tool_call(tool_call)
assert result["error"] == "bad_args"
@@ -162,24 +97,33 @@ class TestExecuteAction:
"""Should track errors in episodic memory."""
agent = Agent(llm=mock_llm)
intent = {
"action": {"name": "list_folder", "args": {"folder_type": "download"}}
# Use invalid arguments to trigger a TypeError
tool_call = {
"id": "call_123",
"function": {
"name": "set_path_for_folder",
"arguments": '{"folder_name": 123}' # Wrong type
}
}
result = agent._execute_action(intent) # Will fail - folder not configured
result = agent._execute_tool_call(tool_call)
mem = get_memory()
assert len(mem.episodic.recent_errors) > 0
def test_execute_with_none_args(self, memory, mock_llm, real_folder):
"""Should handle None args."""
def test_execute_with_invalid_json(self, memory, mock_llm):
"""Should handle invalid JSON arguments."""
agent = Agent(llm=mock_llm)
memory.ltm.set_config("download_folder", str(real_folder["downloads"]))
intent = {"action": {"name": "list_folder", "args": None}}
result = agent._execute_action(intent)
tool_call = {
"id": "call_123",
"function": {
"name": "list_folder",
"arguments": '{invalid json}'
}
}
result = agent._execute_tool_call(tool_call)
# Should fail gracefully with bad_args, not crash
assert "error" in result
assert result["error"] == "bad_args"
class TestStep:
@@ -187,16 +131,14 @@ class TestStep:
def test_step_text_response(self, memory, mock_llm):
"""Should return text response when no tool call."""
mock_llm.complete.return_value = "Hello! How can I help you?"
agent = Agent(llm=mock_llm)
response = agent.step("Hello")
assert response == "Hello! How can I help you?"
assert response == "I found what you're looking for!"
def test_step_saves_to_history(self, memory, mock_llm):
"""Should save conversation to STM history."""
mock_llm.complete.return_value = "Hello!"
agent = Agent(llm=mock_llm)
agent.step("Hi there")
@@ -208,72 +150,84 @@ class TestStep:
assert history[0]["content"] == "Hi there"
assert history[1]["role"] == "assistant"
def test_step_with_tool_call(self, memory, mock_llm, real_folder):
def test_step_with_tool_call(self, memory, mock_llm_with_tool_call, real_folder):
"""Should execute tool and continue."""
memory.ltm.set_config("download_folder", str(real_folder["downloads"]))
mock_llm.complete.side_effect = [
'{"thought": "listing", "action": {"name": "list_folder", "args": {"folder_type": "download"}}}',
"I found 2 items in your download folder.",
]
agent = Agent(llm=mock_llm)
agent = Agent(llm=mock_llm_with_tool_call)
response = agent.step("List my downloads")
assert "2 items" in response or "found" in response.lower()
assert mock_llm.complete.call_count == 2
assert "found" in response.lower() or "torrent" in response.lower()
assert mock_llm_with_tool_call.complete.call_count == 2
# CRITICAL: Verify tools were passed to LLM
first_call_args = mock_llm_with_tool_call.complete.call_args_list[0]
assert first_call_args[1]['tools'] is not None, "Tools not passed to LLM!"
assert len(first_call_args[1]['tools']) > 0, "Tools list is empty!"
def test_step_max_iterations(self, memory, mock_llm):
"""Should stop after max iterations."""
# Always return tool call
mock_llm.complete.return_value = '{"thought": "loop", "action": {"name": "list_folder", "args": {"folder_type": "download"}}}'
call_count = [0]
def mock_complete(messages, tools=None):
call_count[0] += 1
# CRITICAL: Verify tools are passed (except on forced final call)
if call_count[0] <= 3:
assert tools is not None, f"Tools not passed on call {call_count[0]}!"
if call_count[0] <= 3:
return {
"role": "assistant",
"content": None,
"tool_calls": [{
"id": f"call_{call_count[0]}",
"function": {
"name": "list_folder",
"arguments": '{"folder_type": "download"}'
}
}]
}
else:
return {
"role": "assistant",
"content": "I couldn't complete the task."
}
mock_llm.complete = Mock(side_effect=mock_complete)
agent = Agent(llm=mock_llm, max_tool_iterations=3)
# Mock the final response after max iterations
def side_effect(messages):
if "final response" in str(messages[-1].get("content", "")).lower():
return "I couldn't complete the task."
return '{"thought": "loop", "action": {"name": "list_folder", "args": {"folder_type": "download"}}}'
mock_llm.complete.side_effect = side_effect
response = agent.step("Do something")
# Should have called LLM max_iterations + 1 times (for final response)
assert mock_llm.complete.call_count == 4
assert call_count[0] == 4
def test_step_includes_history(self, memory_with_history, mock_llm):
"""Should include conversation history in prompt."""
mock_llm.complete.return_value = "Response"
agent = Agent(llm=mock_llm)
agent.step("New message")
# Check that history was included in the call
call_args = mock_llm.complete.call_args[0][0]
messages_content = [m.get("content", "") for m in call_args]
assert any("Hello" in c for c in messages_content)
assert any("Hello" in str(c) for c in messages_content)
def test_step_includes_events(self, memory, mock_llm):
"""Should include unread events in prompt."""
memory.episodic.add_background_event("download_complete", {"name": "Movie.mkv"})
mock_llm.complete.return_value = "Response"
agent = Agent(llm=mock_llm)
agent.step("What's new?")
call_args = mock_llm.complete.call_args[0][0]
messages_content = [m.get("content", "") for m in call_args]
assert any("download" in c.lower() for c in messages_content)
assert any("download" in str(c).lower() for c in messages_content)
def test_step_saves_ltm(self, memory, mock_llm, temp_dir):
"""Should save LTM after step."""
mock_llm.complete.return_value = "Response"
agent = Agent(llm=mock_llm)
agent.step("Hello")
# Check that LTM file was written
ltm_file = temp_dir / "ltm.json"
assert ltm_file.exists()
@@ -281,49 +235,55 @@ class TestStep:
class TestAgentIntegration:
"""Integration tests for Agent."""
@patch("agent.tools.api.SearchTorrentsUseCase")
def test_search_and_select_workflow(self, mock_use_case_class, memory, mock_llm):
"""Should handle search and select workflow."""
# Mock torrent search
mock_response = Mock()
mock_response.to_dict.return_value = {
"status": "ok",
"torrents": [
{"name": "Inception.1080p", "seeders": 100, "magnet": "magnet:?xt=..."},
],
"count": 1,
}
mock_use_case = Mock()
mock_use_case.execute.return_value = mock_response
mock_use_case_class.return_value = mock_use_case
# First call: tool call, second call: response
mock_llm.complete.side_effect = [
'{"thought": "searching", "action": {"name": "find_torrents", "args": {"media_title": "Inception"}}}',
"I found 1 torrent for Inception!",
]
agent = Agent(llm=mock_llm)
response = agent.step("Find Inception")
assert "found" in response.lower() or "torrent" in response.lower()
# Check that results are in episodic memory
mem = get_memory()
assert mem.episodic.last_search_results is not None
def test_multiple_tool_calls(self, memory, mock_llm, real_folder):
"""Should handle multiple tool calls in sequence."""
memory.ltm.set_config("download_folder", str(real_folder["downloads"]))
memory.ltm.set_config("movie_folder", str(real_folder["movies"]))
mock_llm.complete.side_effect = [
'{"thought": "list downloads", "action": {"name": "list_folder", "args": {"folder_type": "download"}}}',
'{"thought": "list movies", "action": {"name": "list_folder", "args": {"folder_type": "movie"}}}',
"I listed both folders for you.",
]
call_count = [0]
def mock_complete(messages, tools=None):
call_count[0] += 1
# CRITICAL: Verify tools are passed on every call
assert tools is not None, f"Tools not passed on call {call_count[0]}!"
if call_count[0] == 1:
return {
"role": "assistant",
"content": None,
"tool_calls": [{
"id": "call_1",
"function": {
"name": "list_folder",
"arguments": '{"folder_type": "download"}'
}
}]
}
elif call_count[0] == 2:
# CRITICAL: Verify tool result was sent back
tool_messages = [m for m in messages if m.get('role') == 'tool']
assert len(tool_messages) > 0, "Tool result not sent back to LLM!"
return {
"role": "assistant",
"content": None,
"tool_calls": [{
"id": "call_2",
"function": {
"name": "list_folder",
"arguments": '{"folder_type": "movie"}'
}
}]
}
else:
return {
"role": "assistant",
"content": "I listed both folders for you."
}
mock_llm.complete = Mock(side_effect=mock_complete)
agent = Agent(llm=mock_llm)
response = agent.step("List my downloads and movies")
assert mock_llm.complete.call_count == 3
assert call_count[0] == 3