Agent-One-Lab · Reason-Wang · Aug 15, 2025 · Aug 15, 2025
diff --git a/agents/agents/agents/agent_base.py b/agents/agents/agents/agent_base.py
@@ -254,12 +254,9 @@ def extract_final_response(self, messages: List[Dict[str, Any]]) -> str:
         if last_message_role == "assistant":
             return last_message_content
         elif last_message_role == "tool":
-            try:
-                response = json.loads(last_message_content)
-                if "content" in response:
-                    return response["content"]
-            except json.JSONDecodeError:
-                return last_message_content
+            return last_message_content
+        else:
+            raise ValueError(f"The last message role must be assistant or tool, but got {last_message_role}")
 
     @abstractmethod
     def parse(self, responses: List[str], tools: List[Any], **args) -> Tuple[dict, int, int]:

diff --git a/agents/agents/agents/chain/chain_base.py b/agents/agents/agents/chain/chain_base.py
@@ -312,6 +312,10 @@ async def _run_single_chain(self,
                 )
                 thought_node.is_terminal = new_msg.get("status", "continue") in self.terminal_status
                 current_node = thought_node
+
+                # Check if the thought node is terminal - if so, break the loop
+                if current_node.is_terminal:
+                    break
 
             # Handle tool calls
             if current_node.messages[-1].get("tool_calls"):
@@ -331,16 +335,13 @@ async def _run_single_chain(self,
 
                     # Process observation
                     observation = result["observation"]
-                    observation_json = json.dumps({
-                        "name": result["name"],
-                        "content": observation,
-                    }, indent=4)
 
-                    action_input_node.observation = observation_json
+                    action_input_node.observation = observation
                     action_input_node.observation_code = result["status"]
                     newest_messages.append({
                         "role": "tool",
                         "tool_call_id": tool_call["id"],
+                        "tool_name": result["name"],
                         "content": [{"type": "text", "text": observation}],
                     })
                     action_input_node.messages = deepcopy(newest_messages)
@@ -535,7 +536,7 @@ def monitor_step(self) -> None:
                     avg_turns += 1
                 if msg['role'] == 'tool':
                     avg_tool_calls += 1
-                    tool_call_name = json.loads(msg['content'][0]['text'])['name']
+                    tool_call_name = msg['tool_name']
                     tool_calls_by_name[tool_call_name] += 1
 
         avg_turns /= len(messages)

diff --git a/agents/agents/tools/utils/schema.py b/agents/agents/tools/utils/schema.py
@@ -129,7 +129,9 @@ def validate_schema(name, description, signature, docs):
     docs_description = docs['description']
     if description and docs_description and docs_description != description:
         # raise ValueError(f"Description mismatch: {description} != {docs_description}")
-        warnings.warn(f"Description mismatch: {description} != {docs_description}, use the specified description by default.")
+        # warnings.warn(f"Description mismatch: {description} != {docs_description}, use the specified description by default.")
+        # TODO: currently we don't do anything here and prioritize the specified description by default.
+        pass
 
 
     docs_params = docs['params']

diff --git a/agents/tests/scripts/test_gpu_runs.sh b/agents/tests/scripts/test_gpu_runs.sh
@@ -0,0 +1,10 @@
+#! /bin/bash
+
+# Test GPU runs
+
+python -m pytest -x tests/unit/agents/test_initialization.py || exit 1
+python -m pytest -x tests/unit/agents/test_auto_agent.py || exit 1
+python -m pytest -x tests/unit/agents/test_code_agent.py || exit 1
+python -m pytest -x tests/unit/agents/test_react_agent.py || exit 1
+python -m pytest -x tests/unit/agents/test_webshop_agent.py || exit 1
+python -m pytest -x tests/unit/agents/test_vision_agent.py || exit 1
diff --git a/agents/tests/unit/agents/test_auto_agent.py b/agents/tests/unit/agents/test_auto_agent.py
@@ -8,7 +8,7 @@ def test_auto_agent_from_config_react():
     config = {
         "agent_type": "react",
         "model_name_or_path": "Qwen/Qwen2.5-3B-Instruct",
-        "template": "qwen-7b-chat",
+        "template": "qwen2.5",
         "tools": ["google_search", "answer"],
         "backend": "client"
     }
@@ -17,7 +17,7 @@ def test_auto_agent_from_config_react():
 
     assert isinstance(agent, ReactAgent)
     assert agent.model_name_or_path == "Qwen/Qwen2.5-3B-Instruct"
-    assert agent.template == "qwen-7b-chat"
+    assert agent.template == "qwen2.5"
     assert len(agent.tools) == 2
     assert agent.backend == "client"
 
@@ -26,7 +26,7 @@ def test_auto_agent_from_config_code():
     config = {
         "agent_type": "code",
         "model_name_or_path": "Qwen/Qwen2.5-3B-Instruct",
-        "template": "qwen-7b-chat",
+        "template": "qwen2.5",
         "tools": ["code_interpreter"],
         "backend": "client"
     }
@@ -43,7 +43,7 @@ def test_auto_agent_from_pretrained():
     agent = AutoAgent.from_pretrained(
         model_name_or_path="Qwen/Qwen2.5-3B-Instruct",
         agent_type="react",
-        template="qwen-7b-chat",
+        template="qwen2.5",
         tools=["google_search", "answer"],
         debug=True,
         backend="client"
@@ -55,7 +55,7 @@ def test_auto_agent_with_reward():
     config = {
         "agent_type": "react",
         "model_name_or_path": "Qwen/Qwen2.5-3B-Instruct",
-        "template": "qwen-7b-chat",
+        "template": "qwen2.5",
         "tools": ["google_search", "answer"],
         "reward_name": "qa_f1_reward",
         "backend": "client"
@@ -71,7 +71,7 @@ def test_auto_agent_invalid_type():
     config = {
         "agent_type": "invalid_type",
         "model_name_or_path": "Qwen/Qwen2.5-3B-Instruct",
-        "template": "qwen-7b-chat",
+        "template": "qwesn2.5",
         "tools": ["google_search", "answer"],
         "backend": "client"
     }

diff --git a/agents/tests/unit/agents/test_chain.py b/agents/tests/unit/agents/test_chain.py
@@ -81,45 +81,3 @@ def test_chain_to_json():
     assert json_data[1]["type"] == "Action"
 
 
-def test_multi_level_chain():
-    chain = Chain(info={"question": "test question"})
-
-    # Level 0
-    root = chain.add_node(type="Thought", description="Initial thought")
-
-    # Level 1
-    action = chain.add_node(type="Action", description="google_search")
-
-    # Level 2
-    action_input = chain.add_node(type="Action Input", description='{"query": "test"}')
-
-    # Level 3
-    observation = chain.add_node(
-        type="Observation",
-        description="Result",
-        observation="Search results here"
-    )
-
-    assert root.depth == 0
-    assert action.depth == 1
-    assert action_input.depth == 2
-    assert observation.depth == 3
-
-    # Check parent-child relationships
-    assert root.children[0] == action
-    assert action.children[0] == action_input
-    assert action_input.children[0] == observation
-
-
-def test_node_to_json_recursive():
-    # Create a chain with multiple nodes
-    chain = Chain(info={"question": "test question"})
-    root = chain.add_node(type="Thought", description="Initial thought")
-    action = chain.add_node(type="Action", description="google_search")
-
-    # Get recursive JSON
-    json_data = root.to_json_recursive()
-
-    assert json_data["type"] == "Thought"
-    assert len(json_data["children"]) == 1
-    assert json_data["children"][0]["type"] == "Action" 
diff --git a/...tests/unit/agents/test_agent_with_code.py → agents/tests/unit/agents/test_code_agent.py b/...tests/unit/agents/test_agent_with_code.py → agents/tests/unit/agents/test_code_agent.py
@@ -10,9 +10,8 @@ async def test_code_agent_end_to_end():
     agent = CodeAgent(
         "Qwen/Qwen2.5-3B-Instruct",
         tools=tools,
-        template="qwen-7b-chat",
+        template="qwen2.5",
         backend="async_vllm",
-        debug=True
     )
 
     question1 = "Every morning Aya goes for a $9$-kilometer-long walk and stops at a coffee shop afterwards. When she walks at a constant speed of $s$ kilometers per hour, the walk takes her 4 hours, including $t$ minutes spent in the coffee shop. When she walks $s+2$ kilometers per hour, the walk takes her 2 hours and 24 minutes, including $t$ minutes spent in the coffee shop. Suppose Aya walks at $s+\frac{1}{2}$ kilometers per hour. Find the number of minutes the walk takes her, including the $t$ minutes spent in the coffee shop."
@@ -39,7 +38,7 @@ async def test_code_agent_end_to_end():
     await agent.run_async(
         max_steps=4,
         start_messages=messages,
-        num_chains=5
+        num_chains=2
     )
 
     messages = agent.get_messages()

diff --git a/agents/tests/unit/agents/test_initialization.py b/agents/tests/unit/agents/test_initialization.py
@@ -1,10 +1,13 @@
 from agents.agents.agent_base import BaseAgent
 from agents.agents.specialized.code_agent import CodeAgent
-from agents.tools import code_interpreter
+from agents.agents.react.react_agent import ReactAgent
+from agents.agents.specialized.think_agent import ThinkAgent
+from agents.tools import code_interpreter, google_search_serper, answer_qa
 import pytest
 
 
-@pytest.mark.parametrize("backend", ["vllm", "client"])
+@pytest.mark.gpu
+@pytest.mark.parametrize("backend", ["async_vllm", "client"])
 def test_agent_initialization_backend(backend: str):
     # Initialize the code agent
     print(f"Testing {backend} backend")
@@ -14,7 +17,7 @@ def test_agent_initialization_backend(backend: str):
         agent = CodeAgent(
             "Qwen/Qwen2.5-3B-Instruct",
             tools=tools,
-            template="qwen-7b-chat",
+            template="qwen2.5",
             backend=backend
         )
         print("Agent initialized successfully")
@@ -26,25 +29,49 @@ def test_agent_initialization_backend(backend: str):
     assert agent.backend == backend
     assert agent.tools == tools
     assert agent.model_name_or_path == "Qwen/Qwen2.5-3B-Instruct"
-    assert agent.template == "qwen-7b-chat"
+    assert agent.template == "qwen2.5"
 
     # Test basic methods
     messages = agent.get_messages()
     assert isinstance(messages, list)
 
-
-def test_code_agent_initialization():
+@pytest.mark.gpu
+@pytest.mark.parametrize("backend", ["async_vllm", "client"])
+def test_code_agent_initialization(backend: str):
     tools = [code_interpreter]
     agent = CodeAgent(
         "Qwen/Qwen2.5-3B-Instruct",
         tools=tools,
-        template="qwen-7b-chat",
-        backend="client"
+        template="qwen2.5",
+        backend=backend
     )
 
-    # Check system prompt is set correctly
-    assert "multi-turn manner" in agent.system_prompt
-    assert agent.max_length == 8192
 
+@pytest.mark.gpu
+@pytest.mark.parametrize("backend", ["async_vllm", "client"])
+def test_react_agent_initialization(backend: str):
+    tools = [google_search_serper, answer_qa]
+    task_info = "Test search task"
+    agent = ReactAgent(
+        "Qwen/Qwen2.5-3B-Instruct",
+        tools=tools,
+        template="qwen2.5",
+        task_info=task_info,
+        backend=backend
+    )
+
+    # Check system prompt contains task info and tools
+    assert task_info in agent.system_prompt
+    assert "google_search" in agent.system_prompt
+    assert "answer" in agent.system_prompt
 
-
+@pytest.mark.gpu
+@pytest.mark.parametrize("backend", ["async_vllm", "client"])
+def test_think_agent_initialization(backend: str):
+    tools = [code_interpreter]
+    agent = ThinkAgent(
+        "Qwen/Qwen2.5-3B-Instruct",
+        tools=tools,
+        template="qwen2.5",
+        backend=backend
+    )
diff --git a/agents/tests/unit/agents/test_react_agent.py b/agents/tests/unit/agents/test_react_agent.py
@@ -1,25 +1,7 @@
 import pytest
 from agents.agents.react.react_agent import ReactAgent, parse_react_step
 from agents.tools.src.search.google_search import google_search_serper
-from agents.tools.src.react.tools import answer
-
-
-def test_react_agent_initialization():
-    tools = [google_search_serper, answer]
-    task_info = "Test search task"
-    agent = ReactAgent(
-        "Qwen/Qwen2.5-3B-Instruct",
-        tools=tools,
-        template="qwen2.5",
-        task_info=task_info,
-        backend="client"
-    )
-
-    # Check system prompt contains task info and tools
-    assert task_info in agent.system_prompt
-    assert "google_search" in agent.system_prompt
-    assert "answer" in agent.system_prompt
-
+from agents.tools import answer_qa
 
 def test_parse_react_step():
     # Test with a valid ReAct step
@@ -40,13 +22,15 @@ def test_parse_react_step():
     assert result_missing["input"] is None
 
 
-def test_react_agent_parse():
-    tools = [google_search_serper, answer]
+@pytest.mark.gpu
+@pytest.mark.asyncio(loop_scope="session")
+async def test_react_agent_parse_run():
+    tools = [google_search_serper, answer_qa]
     agent = ReactAgent(
         "Qwen/Qwen2.5-3B-Instruct",
         tools=tools,
         template="qwen2.5",
-        backend="client"
+        backend="async_vllm"
     )
 
     responses = ["""Thought: I need to search for information.
@@ -60,4 +44,19 @@ def test_react_agent_parse():
     assert "Thought: I need to search for information." in result[0]["content"][0]["text"]
     assert len(result[0]["tool_calls"]) == 1
     assert result[0]["tool_calls"][0]["function"]["name"] == "google_search"
-    assert result[0]["tool_calls"][0]["function"]["arguments"] == {"query": "test query"}
+    assert result[0]["tool_calls"][0]["function"]["arguments"] == {"query": "test query"}
+
+    messages = [
+        {
+            "messages": [
+                {
+                    "role": "user",
+                    "content": "What is the capital of France?"
+                }
+            ]
+        }
+    ]
+    await agent.run_async(start_messages=messages, max_steps=4, num_chains=1)
+    messages_list = agent.get_messages()
+    print(messages_list[0])
+
diff --git a/agents/tests/unit/agents/test_vision_agent.py b/agents/tests/unit/agents/test_vision_agent.py
@@ -2,6 +2,8 @@
 from agents.tools import answer_qa
 import pytest
 
+
+@pytest.mark.gpu
 @pytest.mark.asyncio(loop_scope="session")
 async def test_vision_agent():
     tools = [answer_qa]