From cb3a8627f6e79527f67e1f5c8b7805b132f900a0 Mon Sep 17 00:00:00 2001 From: Reason-Wang Date: Fri, 15 Aug 2025 21:31:15 +0000 Subject: [PATCH] Make tool content to be pure observation, fix tests --- agents/agents/agents/agent_base.py | 9 ++-- agents/agents/agents/chain/chain_base.py | 13 ++--- agents/agents/tools/utils/schema.py | 4 +- agents/tests/scripts/test_gpu_runs.sh | 10 ++++ agents/tests/unit/agents/test_auto_agent.py | 12 ++--- agents/tests/unit/agents/test_chain.py | 42 --------------- ..._agent_with_code.py => test_code_agent.py} | 5 +- .../tests/unit/agents/test_initialization.py | 51 ++++++++++++++----- agents/tests/unit/agents/test_react_agent.py | 45 ++++++++-------- agents/tests/unit/agents/test_vision_agent.py | 2 + agents/tests/unit/agents/test_vllm_backend.py | 42 --------------- .../tests/unit/agents/test_webshop_agent.py | 22 ++++++-- pyproject.toml | 5 ++ 13 files changed, 117 insertions(+), 145 deletions(-) create mode 100644 agents/tests/scripts/test_gpu_runs.sh rename agents/tests/unit/agents/{test_agent_with_code.py => test_code_agent.py} (95%) delete mode 100644 agents/tests/unit/agents/test_vllm_backend.py diff --git a/agents/agents/agents/agent_base.py b/agents/agents/agents/agent_base.py index 43b01f2..89bf41f 100644 --- a/agents/agents/agents/agent_base.py +++ b/agents/agents/agents/agent_base.py @@ -254,12 +254,9 @@ def extract_final_response(self, messages: List[Dict[str, Any]]) -> str: if last_message_role == "assistant": return last_message_content elif last_message_role == "tool": - try: - response = json.loads(last_message_content) - if "content" in response: - return response["content"] - except json.JSONDecodeError: - return last_message_content + return last_message_content + else: + raise ValueError(f"The last message role must be assistant or tool, but got {last_message_role}") @abstractmethod def parse(self, responses: List[str], tools: List[Any], **args) -> Tuple[dict, int, int]: diff --git a/agents/agents/agents/chain/chain_base.py b/agents/agents/agents/chain/chain_base.py index 669be97..faf8f71 100644 --- a/agents/agents/agents/chain/chain_base.py +++ b/agents/agents/agents/chain/chain_base.py @@ -312,6 +312,10 @@ async def _run_single_chain(self, ) thought_node.is_terminal = new_msg.get("status", "continue") in self.terminal_status current_node = thought_node + + # Check if the thought node is terminal - if so, break the loop + if current_node.is_terminal: + break # Handle tool calls if current_node.messages[-1].get("tool_calls"): @@ -331,16 +335,13 @@ async def _run_single_chain(self, # Process observation observation = result["observation"] - observation_json = json.dumps({ - "name": result["name"], - "content": observation, - }, indent=4) - action_input_node.observation = observation_json + action_input_node.observation = observation action_input_node.observation_code = result["status"] newest_messages.append({ "role": "tool", "tool_call_id": tool_call["id"], + "tool_name": result["name"], "content": [{"type": "text", "text": observation}], }) action_input_node.messages = deepcopy(newest_messages) @@ -535,7 +536,7 @@ def monitor_step(self) -> None: avg_turns += 1 if msg['role'] == 'tool': avg_tool_calls += 1 - tool_call_name = json.loads(msg['content'][0]['text'])['name'] + tool_call_name = msg['tool_name'] tool_calls_by_name[tool_call_name] += 1 avg_turns /= len(messages) diff --git a/agents/agents/tools/utils/schema.py b/agents/agents/tools/utils/schema.py index 28b4815..37016ce 100644 --- a/agents/agents/tools/utils/schema.py +++ b/agents/agents/tools/utils/schema.py @@ -129,7 +129,9 @@ def validate_schema(name, description, signature, docs): docs_description = docs['description'] if description and docs_description and docs_description != description: # raise ValueError(f"Description mismatch: {description} != {docs_description}") - warnings.warn(f"Description mismatch: {description} != {docs_description}, use the specified description by default.") + # warnings.warn(f"Description mismatch: {description} != {docs_description}, use the specified description by default.") + # TODO: currently we don't do anything here and prioritize the specified description by default. + pass docs_params = docs['params'] diff --git a/agents/tests/scripts/test_gpu_runs.sh b/agents/tests/scripts/test_gpu_runs.sh new file mode 100644 index 0000000..e033a51 --- /dev/null +++ b/agents/tests/scripts/test_gpu_runs.sh @@ -0,0 +1,10 @@ +#! /bin/bash + +# Test GPU runs + +python -m pytest -x tests/unit/agents/test_initialization.py || exit 1 +python -m pytest -x tests/unit/agents/test_auto_agent.py || exit 1 +python -m pytest -x tests/unit/agents/test_code_agent.py || exit 1 +python -m pytest -x tests/unit/agents/test_react_agent.py || exit 1 +python -m pytest -x tests/unit/agents/test_webshop_agent.py || exit 1 +python -m pytest -x tests/unit/agents/test_vision_agent.py || exit 1 \ No newline at end of file diff --git a/agents/tests/unit/agents/test_auto_agent.py b/agents/tests/unit/agents/test_auto_agent.py index 5b5a5c7..7159a4c 100644 --- a/agents/tests/unit/agents/test_auto_agent.py +++ b/agents/tests/unit/agents/test_auto_agent.py @@ -8,7 +8,7 @@ def test_auto_agent_from_config_react(): config = { "agent_type": "react", "model_name_or_path": "Qwen/Qwen2.5-3B-Instruct", - "template": "qwen-7b-chat", + "template": "qwen2.5", "tools": ["google_search", "answer"], "backend": "client" } @@ -17,7 +17,7 @@ def test_auto_agent_from_config_react(): assert isinstance(agent, ReactAgent) assert agent.model_name_or_path == "Qwen/Qwen2.5-3B-Instruct" - assert agent.template == "qwen-7b-chat" + assert agent.template == "qwen2.5" assert len(agent.tools) == 2 assert agent.backend == "client" @@ -26,7 +26,7 @@ def test_auto_agent_from_config_code(): config = { "agent_type": "code", "model_name_or_path": "Qwen/Qwen2.5-3B-Instruct", - "template": "qwen-7b-chat", + "template": "qwen2.5", "tools": ["code_interpreter"], "backend": "client" } @@ -43,7 +43,7 @@ def test_auto_agent_from_pretrained(): agent = AutoAgent.from_pretrained( model_name_or_path="Qwen/Qwen2.5-3B-Instruct", agent_type="react", - template="qwen-7b-chat", + template="qwen2.5", tools=["google_search", "answer"], debug=True, backend="client" @@ -55,7 +55,7 @@ def test_auto_agent_with_reward(): config = { "agent_type": "react", "model_name_or_path": "Qwen/Qwen2.5-3B-Instruct", - "template": "qwen-7b-chat", + "template": "qwen2.5", "tools": ["google_search", "answer"], "reward_name": "qa_f1_reward", "backend": "client" @@ -71,7 +71,7 @@ def test_auto_agent_invalid_type(): config = { "agent_type": "invalid_type", "model_name_or_path": "Qwen/Qwen2.5-3B-Instruct", - "template": "qwen-7b-chat", + "template": "qwesn2.5", "tools": ["google_search", "answer"], "backend": "client" } diff --git a/agents/tests/unit/agents/test_chain.py b/agents/tests/unit/agents/test_chain.py index 4997283..5861ea4 100644 --- a/agents/tests/unit/agents/test_chain.py +++ b/agents/tests/unit/agents/test_chain.py @@ -81,45 +81,3 @@ def test_chain_to_json(): assert json_data[1]["type"] == "Action" -def test_multi_level_chain(): - chain = Chain(info={"question": "test question"}) - - # Level 0 - root = chain.add_node(type="Thought", description="Initial thought") - - # Level 1 - action = chain.add_node(type="Action", description="google_search") - - # Level 2 - action_input = chain.add_node(type="Action Input", description='{"query": "test"}') - - # Level 3 - observation = chain.add_node( - type="Observation", - description="Result", - observation="Search results here" - ) - - assert root.depth == 0 - assert action.depth == 1 - assert action_input.depth == 2 - assert observation.depth == 3 - - # Check parent-child relationships - assert root.children[0] == action - assert action.children[0] == action_input - assert action_input.children[0] == observation - - -def test_node_to_json_recursive(): - # Create a chain with multiple nodes - chain = Chain(info={"question": "test question"}) - root = chain.add_node(type="Thought", description="Initial thought") - action = chain.add_node(type="Action", description="google_search") - - # Get recursive JSON - json_data = root.to_json_recursive() - - assert json_data["type"] == "Thought" - assert len(json_data["children"]) == 1 - assert json_data["children"][0]["type"] == "Action" \ No newline at end of file diff --git a/agents/tests/unit/agents/test_agent_with_code.py b/agents/tests/unit/agents/test_code_agent.py similarity index 95% rename from agents/tests/unit/agents/test_agent_with_code.py rename to agents/tests/unit/agents/test_code_agent.py index b6b191e..662b33e 100644 --- a/agents/tests/unit/agents/test_agent_with_code.py +++ b/agents/tests/unit/agents/test_code_agent.py @@ -10,9 +10,8 @@ async def test_code_agent_end_to_end(): agent = CodeAgent( "Qwen/Qwen2.5-3B-Instruct", tools=tools, - template="qwen-7b-chat", + template="qwen2.5", backend="async_vllm", - debug=True ) question1 = "Every morning Aya goes for a $9$-kilometer-long walk and stops at a coffee shop afterwards. When she walks at a constant speed of $s$ kilometers per hour, the walk takes her 4 hours, including $t$ minutes spent in the coffee shop. When she walks $s+2$ kilometers per hour, the walk takes her 2 hours and 24 minutes, including $t$ minutes spent in the coffee shop. Suppose Aya walks at $s+\frac{1}{2}$ kilometers per hour. Find the number of minutes the walk takes her, including the $t$ minutes spent in the coffee shop." @@ -39,7 +38,7 @@ async def test_code_agent_end_to_end(): await agent.run_async( max_steps=4, start_messages=messages, - num_chains=5 + num_chains=2 ) messages = agent.get_messages() diff --git a/agents/tests/unit/agents/test_initialization.py b/agents/tests/unit/agents/test_initialization.py index c76cf9a..61055ee 100644 --- a/agents/tests/unit/agents/test_initialization.py +++ b/agents/tests/unit/agents/test_initialization.py @@ -1,10 +1,13 @@ from agents.agents.agent_base import BaseAgent from agents.agents.specialized.code_agent import CodeAgent -from agents.tools import code_interpreter +from agents.agents.react.react_agent import ReactAgent +from agents.agents.specialized.think_agent import ThinkAgent +from agents.tools import code_interpreter, google_search_serper, answer_qa import pytest -@pytest.mark.parametrize("backend", ["vllm", "client"]) +@pytest.mark.gpu +@pytest.mark.parametrize("backend", ["async_vllm", "client"]) def test_agent_initialization_backend(backend: str): # Initialize the code agent print(f"Testing {backend} backend") @@ -14,7 +17,7 @@ def test_agent_initialization_backend(backend: str): agent = CodeAgent( "Qwen/Qwen2.5-3B-Instruct", tools=tools, - template="qwen-7b-chat", + template="qwen2.5", backend=backend ) print("Agent initialized successfully") @@ -26,25 +29,49 @@ def test_agent_initialization_backend(backend: str): assert agent.backend == backend assert agent.tools == tools assert agent.model_name_or_path == "Qwen/Qwen2.5-3B-Instruct" - assert agent.template == "qwen-7b-chat" + assert agent.template == "qwen2.5" # Test basic methods messages = agent.get_messages() assert isinstance(messages, list) - -def test_code_agent_initialization(): +@pytest.mark.gpu +@pytest.mark.parametrize("backend", ["async_vllm", "client"]) +def test_code_agent_initialization(backend: str): tools = [code_interpreter] agent = CodeAgent( "Qwen/Qwen2.5-3B-Instruct", tools=tools, - template="qwen-7b-chat", - backend="client" + template="qwen2.5", + backend=backend ) - # Check system prompt is set correctly - assert "multi-turn manner" in agent.system_prompt - assert agent.max_length == 8192 +@pytest.mark.gpu +@pytest.mark.parametrize("backend", ["async_vllm", "client"]) +def test_react_agent_initialization(backend: str): + tools = [google_search_serper, answer_qa] + task_info = "Test search task" + agent = ReactAgent( + "Qwen/Qwen2.5-3B-Instruct", + tools=tools, + template="qwen2.5", + task_info=task_info, + backend=backend + ) + + # Check system prompt contains task info and tools + assert task_info in agent.system_prompt + assert "google_search" in agent.system_prompt + assert "answer" in agent.system_prompt - \ No newline at end of file +@pytest.mark.gpu +@pytest.mark.parametrize("backend", ["async_vllm", "client"]) +def test_think_agent_initialization(backend: str): + tools = [code_interpreter] + agent = ThinkAgent( + "Qwen/Qwen2.5-3B-Instruct", + tools=tools, + template="qwen2.5", + backend=backend + ) diff --git a/agents/tests/unit/agents/test_react_agent.py b/agents/tests/unit/agents/test_react_agent.py index dcb2043..82a19bb 100644 --- a/agents/tests/unit/agents/test_react_agent.py +++ b/agents/tests/unit/agents/test_react_agent.py @@ -1,25 +1,7 @@ import pytest from agents.agents.react.react_agent import ReactAgent, parse_react_step from agents.tools.src.search.google_search import google_search_serper -from agents.tools.src.react.tools import answer - - -def test_react_agent_initialization(): - tools = [google_search_serper, answer] - task_info = "Test search task" - agent = ReactAgent( - "Qwen/Qwen2.5-3B-Instruct", - tools=tools, - template="qwen2.5", - task_info=task_info, - backend="client" - ) - - # Check system prompt contains task info and tools - assert task_info in agent.system_prompt - assert "google_search" in agent.system_prompt - assert "answer" in agent.system_prompt - +from agents.tools import answer_qa def test_parse_react_step(): # Test with a valid ReAct step @@ -40,13 +22,15 @@ def test_parse_react_step(): assert result_missing["input"] is None -def test_react_agent_parse(): - tools = [google_search_serper, answer] +@pytest.mark.gpu +@pytest.mark.asyncio(loop_scope="session") +async def test_react_agent_parse_run(): + tools = [google_search_serper, answer_qa] agent = ReactAgent( "Qwen/Qwen2.5-3B-Instruct", tools=tools, template="qwen2.5", - backend="client" + backend="async_vllm" ) responses = ["""Thought: I need to search for information. @@ -60,4 +44,19 @@ def test_react_agent_parse(): assert "Thought: I need to search for information." in result[0]["content"][0]["text"] assert len(result[0]["tool_calls"]) == 1 assert result[0]["tool_calls"][0]["function"]["name"] == "google_search" - assert result[0]["tool_calls"][0]["function"]["arguments"] == {"query": "test query"} \ No newline at end of file + assert result[0]["tool_calls"][0]["function"]["arguments"] == {"query": "test query"} + + messages = [ + { + "messages": [ + { + "role": "user", + "content": "What is the capital of France?" + } + ] + } + ] + await agent.run_async(start_messages=messages, max_steps=4, num_chains=1) + messages_list = agent.get_messages() + print(messages_list[0]) + \ No newline at end of file diff --git a/agents/tests/unit/agents/test_vision_agent.py b/agents/tests/unit/agents/test_vision_agent.py index 0f67ab5..eedc521 100644 --- a/agents/tests/unit/agents/test_vision_agent.py +++ b/agents/tests/unit/agents/test_vision_agent.py @@ -2,6 +2,8 @@ from agents.tools import answer_qa import pytest + +@pytest.mark.gpu @pytest.mark.asyncio(loop_scope="session") async def test_vision_agent(): tools = [answer_qa] diff --git a/agents/tests/unit/agents/test_vllm_backend.py b/agents/tests/unit/agents/test_vllm_backend.py deleted file mode 100644 index 09ddec6..0000000 --- a/agents/tests/unit/agents/test_vllm_backend.py +++ /dev/null @@ -1,42 +0,0 @@ -from agents.agents.llm_backend import AsyncVLLMBackend, VLLMBackend -import pytest - -def test_vllm_backend(): - backend = VLLMBackend(model_name_or_path="Qwen/Qwen2.5-VL-3B-Instruct", template="qwen2.5-vl") - messages_list = [ - [ - { - "role": "user", - "content": [ - { - "type": "image", - "image": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg", - }, - {"type": "text", "text": "Describe this image."}, - ], - }, - ] - ] - result = backend.generate(messages_list) - print(result) - - -@pytest.mark.asyncio(loop_scope="session") -async def test_async_vllm_backend(): - backend = AsyncVLLMBackend(model_name_or_path="Qwen/Qwen2.5-VL-3B-Instruct", template="qwen2.5-vl") - messages_list = [ - [ - { - "role": "user", - "content": [ - { - "type": "image", - "image": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg", - }, - {"type": "text", "text": "Describe this image."}, - ], - }, - ] - ] - result = await backend.generate_async(messages_list) - print(result) \ No newline at end of file diff --git a/agents/tests/unit/agents/test_webshop_agent.py b/agents/tests/unit/agents/test_webshop_agent.py index 49ff192..59f5c91 100644 --- a/agents/tests/unit/agents/test_webshop_agent.py +++ b/agents/tests/unit/agents/test_webshop_agent.py @@ -2,17 +2,18 @@ from agents.agents.react.react_agent import ReactAgent from agents.tools.src.webshop.tools import webshop_browser from agents.tools.src.react.tools import answer -from rewards.webshop_reward import WebshopReward +from agents.rewards import webshop_reward +@pytest.mark.gpu @pytest.mark.asyncio async def test_webshop_agent_call(): tools = [webshop_browser, answer] agent = ReactAgent( "Qwen/Qwen2.5-3B-Instruct", tools=tools, - reward_fn=WebshopReward, - template="qwen-7b-chat", + reward_fn=webshop_reward, + template="qwen2.5", backend="async_vllm", debug=True ) @@ -24,6 +25,19 @@ async def test_webshop_agent_call(): {"role": "user", "content": f"{question}"} ], "question": f"{question}", + "asin": "B07FYPSNH8", + "category": "grocery", + "query": "beverages", + "name": "OWYN - 100% Vegan Plant-Based Protein Shakes | Cold Brew Coffee, 12 Fl Oz | Dairy-Free, Gluten-Free, Soy-Free, Tree Nut-Free, Egg-Free, Allergy-Free, Vegetarian", + "product_category": "Grocery & Gourmet Food \u203a Beverages \u203a Bottled Beverages, Water & Drink Mixes \u203a Meal Replacement & Protein Drinks \u203a Protein Drinks", + "instruction_text": "i am looking for a gluten free, 100% vegan plant based protein shake that is soy-free, and price lower than 40.00 dollars", + "attributes": [ + "gluten free" + ], + "price_upper": 40.0, + "goal_options": [], + "weight": 1, + "task_id": 0, }, ] @@ -35,4 +49,4 @@ async def test_webshop_agent_call(): ) messages = agent.get_messages() - print(messages) \ No newline at end of file + print(messages) diff --git a/pyproject.toml b/pyproject.toml index 4003daa..92d6504 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,6 +5,11 @@ build-backend = "setuptools.build_meta" [tool.setuptools.packages.find] include = ["agents", "verl"] +[tool.pytest.ini_options] +markers = [ + "gpu: marks tests as requiring GPU resources" +] + [project] name = "AgentFly" version = "0.0.1"