diff --git a/.github/workflows/cpu_tests.yml b/.github/workflows/cpu_tests.yml
index 8fe4526..3e55748 100644
--- a/.github/workflows/cpu_tests.yml
+++ b/.github/workflows/cpu_tests.yml
@@ -9,18 +9,19 @@ on:
 jobs:
   test-envs:
     runs-on: ubuntu-latest
-    timeout-minutes: 15
+    timeout-minutes: 20
 
     strategy:
       matrix:
+        python-version: ["3.10"]
         test-file:
-          - tests/unit/envs/ --ignore tests/unit/envs/test_webshop_text_env.py --ignore tests/unit/envs/test_alfworld_env.py
-          - tests/unit/envs/test_alfworld_env.py
+          - agentfly/tests/unit/envs/ --ignore agentfly/tests/unit/envs/test_webshop_text_env.py --ignore agentfly/tests/unit/envs/test_alfworld_env.py
+          - agentfly/tests/unit/envs/test_alfworld_env.py
           # - tests/unit/envs/test_webshop_text_env.py # TODO: add minimal variant of the webshop docker image
-          - tests/unit/rewards/ --ignore tests/unit/rewards/test_env_id.py --ignore tests/unit/rewards/test_webshop_reward.py
-          - tests/unit/tools/ --ignore tests/unit/tools/test_webshop_tool.py --ignore tests/unit/tools/test_scienceworld_tool.py --ignore tests/unit/tools/test_code_tool.py
-          - tests/unit/tools/test_scienceworld_tool.py
-          - tests/unit/tools/test_code_tool.py
+          - agentfly/tests/unit/rewards/ --ignore agentfly/tests/unit/rewards/test_env_id.py --ignore agentfly/tests/unit/rewards/test_webshop_reward.py
+          - agentfly/tests/unit/tools/ --ignore agentfly/tests/unit/tools/test_webshop_tool.py --ignore agentfly/tests/unit/tools/test_scienceworld_tool.py --ignore agentfly/tests/unit/tools/test_code_tool.py
+          - agentfly/tests/unit/tools/test_scienceworld_tool.py
+          - agentfly/tests/unit/tools/test_code_tool.py
           # - test/unit/agents/ # TODO: recheck this
 
     steps:
@@ -34,6 +35,13 @@ jobs:
         with:
           python-version: '3.10'
 
+      - name: Verify Python
+        run: |
+          which python
+          python --version
+          which pip
+          python -m pip --version
+
       - name: Free up disk space
         run: |
           echo "Before cleanup:"
@@ -50,8 +58,9 @@ jobs:
 
       - name: Install dependencies (main repo)
         run: |
-          pip install -r agents/requirements.txt
+          pip install -e .
           pip install datasets
+          pip install -e '.[dev]' --no-build-isolation
 
       - name: Cache AgentFly cache
         uses: actions/cache@v4
@@ -75,5 +84,4 @@ jobs:
 
       - name: Run unit test (${{ matrix.test-file }})
         run: |
-          cd agents
-          python -m pytest ${{ matrix.test-file }}
+          pytest ${{ matrix.test-file }}
diff --git a/.gitignore b/.gitignore
index d2646d5..b173538 100644
--- a/.gitignore
+++ b/.gitignore
@@ -122,7 +122,7 @@ tests/e2e/toy_examples/deepspeed/synchronous/output.txt
 
 # data
 *.parquet
-agents/agents/data/*
+agentfly/agents/data/*
 
 # local logs
 logs
@@ -131,10 +131,12 @@ outputs
 *.out
 
 # Notebooks
-agents/tests/*.ipynb
-agents/tests/*.jpg
-agents/tests/*.jpeg
-agents/tests/*.png
-agents/agents/*.ipynb
-agents/temp/
+agentfly/tests/*.ipynb
+agentfly/tests/*.jpg
+agentfly/tests/*.jpeg
+agentfly/tests/*.png
+agentfly/agents/*.ipynb
+agentfly/temp/
+agentfly/data/
+*.ipynb
 
diff --git a/README.md b/README.md
index 3c88df4..20628e9 100644
--- a/README.md
+++ b/README.md
@@ -48,30 +48,28 @@ bash install.sh # Assume conda with python3.10.x
 ```
 **Option 2**: Customized Installation
 
-Clone and initialize the project:
-```bash
-git clone https://github.com/Agent-One-Lab/AgentFly
-cd AgentFly
-git submodule init
-git submodule update
-```
-Basic python packages installation:
-```bash
-pip install -e .
-pip install -e '.[verl]' --no-build-isolation
-```
-Optionally, some tools actually require some additional dependencies:
-
-Some of our tools & environments are managed by *enroot* backend. To use them, please install [enroot](https://github.com/NVIDIA/enroot/blob/master/doc/installation.md) (sudo required). Such tools include code_interpreter, retrieval, webshop, alfworld, sciencworld.
-
-Search requires redis to cache results, an optional way to install with conda:
-```bash
-conda install conda-forge::redis-server==7.4.0
-```
+Please refer to [installation.md](docs/start/installation.md) for custmoized installation.
 
 ## Quick Start
-```
+```python
+# Really small example to build an agent and run
+from agentfly.agents import HFAgent
+from agentfly.tools import calculate
+messages = [{"role": "user", "content": "What is the result of 1 + 1?"}]
+agent = HFAgent(
+    model_name_or_path="Qwen/Qwen2.5-3B-Instruct",
+    tools=[calculate],
+    template="qwen2.5",
+    backend="async_vllm",
+)
+await agent.run(
+    messages=messages,
+    max_turns=3,
+    num_chains=1
+)
 
+trajectories = agent.trajectories
+print(trajectories)
 ```
 
 ## Features
diff --git a/agents/agents/__init__.py b/agentfly/__init__.py
similarity index 100%
rename from agents/agents/__init__.py
rename to agentfly/__init__.py
diff --git a/agents/agents/agents/__init__.py b/agentfly/agents/__init__.py
similarity index 56%
rename from agents/agents/agents/__init__.py
rename to agentfly/agents/__init__.py
index 6152c72..b3d2564 100644
--- a/agents/agents/agents/__init__.py
+++ b/agentfly/agents/__init__.py
@@ -2,5 +2,5 @@
 from .specialized.code_agent import CodeAgent
 from .specialized.think_agent import ThinkAgent
 from .specialized.gui_agent import GUIAgent
-
-__all__ = ["ReactAgent", "CodeAgent", "ThinkAgent", "GUIAgent"]
\ No newline at end of file
+from .specialized.hf_agent import HFAgent
+from .templates.utils import process_vision_info, tokenize_conversation, tokenize_conversations
\ No newline at end of file
diff --git a/agents/agents/agents/agent_base.py b/agentfly/agents/agent_base.py
similarity index 89%
rename from agents/agents/agents/agent_base.py
rename to agentfly/agents/agent_base.py
index a58092e..82505e9 100644
--- a/agents/agents/agents/agent_base.py
+++ b/agentfly/agents/agent_base.py
@@ -1,7 +1,7 @@
 from abc import ABC, abstractmethod
 from collections import defaultdict
 import json
-
+from .utils.messages import MessagesList
 from .templates.templates import get_template
 from ..__init__ import AGENT_DATA_DIR
 from .llm_backend import AsyncVLLMBackend, AsyncVerlBackend, ClientBackend, TransformersBackend, VLLMBackend
@@ -11,7 +11,7 @@
 import torch
 from .templates.utils import tokenize_conversations
 from .templates.vision_processor import is_vision_template
-from .chain.chain_base import ChainGeneration
+from .chain.chain_base import ChainRollout
 import os
 import transformers
 import warnings
@@ -27,7 +27,7 @@
 
 Logger = logging.getLogger(__name__)
 
-class BaseAgent(ChainGeneration, ABC):
+class BaseAgent(ChainRollout, ABC):
     """
     Base class for all agents. All agent should subclass this class. A customized agent can implement the following methods:
     
@@ -43,7 +43,7 @@ def __init__(
         system_prompt: str = None,
         tools: List = None,
         max_length: int=8192,
-        backend: str = "transformers",
+        backend: str = "async_vllm",
         backend_config: Any = None,
         reward_fn: Callable = None,
         log_file: str = "agent",
@@ -156,6 +156,47 @@ def _init_llm_engine(self, model_name_or_path: str, backend: str):
 
         return llm_engine
 
+    def _preprocess_messages(self, messages: List[Dict]):
+        """
+        Do some necessary preprocessings to the messages, such as adding the sytem prompt
+        Args:
+            messages: List of messages to preprocess.
+
+        Returns:
+            List of preprocessed messages.
+        """
+        messages_list = MessagesList.from_data(messages)
+        for messages in messages_list:
+            if self.system_prompt:
+                messages.set_system_prompt(self.system_prompt, enforce=False)
+
+        return messages_list.to_list()
+
+    async def run(self,
+        messages: Union[List[dict], np.ndarray, Dict],
+        max_turns: int,
+        generation_config: Optional[Dict[str, Any]] = None,
+        **kwargs,
+    ):
+        """
+        This is the main interface for running the agent. It is a wrapper of different 
+        rollout methods, which must be asynchronous. Currently, we only support chain-based rollout.
+        Args:
+            messages: List of messages to generate responses for.
+            max_turns: The maximum number of turns to generate.
+            generation_config: The generation configuration.
+            **kwargs: Additional keyword arguments for generation.
+
+        """
+        processed_messages = self._preprocess_messages(messages)
+
+        return await self.run_async(
+            processed_messages,
+            max_turns=max_turns,
+            generation_config=generation_config,
+            **kwargs,
+        )
+
     def set_llm_engine(self, llm_engine: Any, tokenizer: Any, processor: Any):
         assert self.backend == "async_verl", "Only async verl backend is supported for now"
 
diff --git a/agents/agents/agents/auto.py b/agentfly/agents/auto.py
similarity index 98%
rename from agents/agents/agents/auto.py
rename to agentfly/agents/auto.py
index c62e8f7..82d72e3 100644
--- a/agents/agents/agents/auto.py
+++ b/agentfly/agents/auto.py
@@ -7,6 +7,7 @@
 from .react.react_agent import ReactAgent
 from .specialized.code_agent import CodeAgent
 from .specialized.gui_agent import GUIAgent
+from .specialized.hf_agent import HFAgent
 from ..rewards.reward_base import get_reward_from_name
 
 
@@ -167,4 +168,5 @@ def from_pretrained(
 AutoAgent.register_agent("code", CodeAgent)
 AutoAgent.register_agent("openai", OpenAIAgent)
 AutoAgent.register_agent("think", ThinkAgent)
-AutoAgent.register_agent("gui", GUIAgent)
\ No newline at end of file
+AutoAgent.register_agent("gui", GUIAgent)
+AutoAgent.register_agent("hf", HFAgent)
\ No newline at end of file
diff --git a/agents/agents/agents/backend_config.py b/agentfly/agents/backend_config.py
similarity index 100%
rename from agents/agents/agents/backend_config.py
rename to agentfly/agents/backend_config.py
diff --git a/agents/agents/agents/chain/__init__.py b/agentfly/agents/chain/__init__.py
similarity index 100%
rename from agents/agents/agents/chain/__init__.py
rename to agentfly/agents/chain/__init__.py
diff --git a/agents/agents/agents/chain/chain_base.py b/agentfly/agents/chain/chain_base.py
similarity index 87%
rename from agents/agents/agents/chain/chain_base.py
rename to agentfly/agents/chain/chain_base.py
index 3fcf12d..c610c23 100644
--- a/agents/agents/agents/chain/chain_base.py
+++ b/agentfly/agents/chain/chain_base.py
@@ -3,6 +3,7 @@
 from dataclasses import dataclass, field
 import json
 import time
+from ..utils.messages import MessagesList, Messages
 from ...utils.timing import Timer
 from typing import Any, Dict, List, Optional, Tuple, Union, Callable
 import uuid
@@ -18,6 +19,7 @@
 
 @dataclass
 class Node:
+    messages: Messages
     is_terminal: bool = False
     is_pruned: bool = False
     type: Optional[str] = None
@@ -26,7 +28,6 @@ class Node:
     observation_code: Optional[str] = None
     parent: Optional["Node"] = None
     children: List["Node"] = field(default_factory=list)
-    messages: List[Any] = field(default_factory=list)
 
     @property
     def depth(self) -> int:
@@ -92,7 +93,7 @@ def add_node(
         observation_code: Optional[str] = None,
         messages: Optional[List[Any]] = None
     ) -> Node:
-        messages = messages if messages is not None else []
+        messages = Messages.from_turns(messages)
         new_node = Node(
             is_terminal=is_terminal,
             is_pruned=is_pruned,
@@ -124,7 +125,7 @@ def to_json(self) -> List[dict]:
         return chain_json
 
 
-class ChainGeneration:
+class ChainRollout:
     """
     Basic class for chain-based rollout. It starts multiple chains and runs them asynchronously.
     """
@@ -157,18 +158,18 @@ def to_json(self) -> dict:
             "chains": [chain.to_json() for chain in self.chains]
         }
 
-    def initialise_chains(self, msgs_list: List[List[Dict]], info_list: List[Dict], num_chains: int) -> Tuple[Dict[str, Chain], Dict[str, Node]]:
+    def initialize_chains(self, messages_list: MessagesList, num_chains: int) -> Tuple[Dict[str, Chain], Dict[str, Node]]:
         chains = {}
         start_nodes = {}
-        group_ids = [str(uuid.uuid4()) for _ in range(len(msgs_list))]
+        group_ids = [str(uuid.uuid4()) for _ in range(len(messages_list))]
 
-        for group_idx, (prompt_msgs, info) in enumerate(zip(msgs_list, info_list)):
+        for group_idx, messages in enumerate(messages_list):
             group_id = group_ids[group_idx]
             for j in range(num_chains):
-                ch = Chain(info | {"group_id": group_id})
+                ch = Chain(messages.meta | {"group_id": group_id})
                 root = ch.add_node(
                     type="Action Input",
-                    messages=deepcopy(prompt_msgs)
+                    messages=deepcopy(messages.messages)
                 )
 
                 cid = str(uuid.uuid4())
@@ -182,40 +183,13 @@ def get_messages(self) -> List[Any]:
         for id, node in self.current_nodes.items():
             info = self.chains[id].info
             message_item = {}
-            message_item["messages"] = node.messages
+            message_item["messages"] = node.messages.messages
             message_item.update(info)
             messages.append(message_item)
         return messages
 
-    def prepare_chain_messages(self, start_messages: Union[List[dict], np.ndarray]):
-        if isinstance(start_messages, np.ndarray):
-            start_messages_list = start_messages.tolist()
-        else:
-            start_messages_list = start_messages
-
-        if self.system_prompt is not None and self.system_prompt != "":
-            for message in start_messages_list:
-                if message["messages"][0]["role"] != "system":
-                    message["messages"].insert(0, {"role": "system", "content": self.system_prompt})
-        
-        example_message = start_messages_list[0]
-        if isinstance(example_message, dict):
-            assert "messages" in example_message
-        
-        messages_list = []
-        other_info_list = []
-        for message in start_messages_list:
-            messages_list.append(message["messages"])
-            info = {}
-            for key, value in message.items():
-                if key != "messages":
-                    info[key] = value
-            other_info_list.append(info)
-        
-        return messages_list, other_info_list
-
-    def validate_run_args(self, max_steps: int, num_chains: int, enable_streaming: bool):
-        assert max_steps >= 1, "max_steps must be at least 1."
+    def validate_run_args(self, max_turns: int, num_chains: int, enable_streaming: bool):
+        assert max_turns >= 1, "max_turns must be at least 1."
         assert num_chains >= 1, "num_chains must be at least 1."
         for observer in self.streaming_manager.observers:
             if isinstance(observer, ConsoleStreamObserver) and enable_streaming:
@@ -223,8 +197,8 @@ def validate_run_args(self, max_steps: int, num_chains: int, enable_streaming: b
         
     
     async def run_async(self,
-        max_steps: int,
-        start_messages: Union[List[dict], np.ndarray],
+        messages: List[Dict],
+        max_turns: int,
         num_chains: int,
         generation_config: Optional[Dict[str, Any]] = None,
         enable_streaming: bool = False,
@@ -241,13 +215,13 @@ async def run_async(self,
             enable_streaming: Whether to enable streaming mode.
             streaming_callback: Optional callback for streaming events.
         """
-        self.validate_run_args(max_steps, num_chains, enable_streaming)
+        self.validate_run_args(max_turns, num_chains, enable_streaming)
         Monitor.ensure_started()
         self.reset()
-        messages_list, other_info_list = self.prepare_chain_messages(start_messages)
-        chains, first_nodes = self.initialise_chains(
+
+        messages_list = MessagesList.from_data(messages)
+        chains, first_nodes = self.initialize_chains(
             messages_list,
-            other_info_list,
             num_chains
         )
         tool_schemas = [tool.schema for tool in self.tools]
@@ -260,7 +234,7 @@ async def run_async(self,
                         node,
                         chains[cid],
                         tool_schemas,
-                        max_steps=max_steps,
+                        max_turns=max_turns,
                         done_queue=done_q,
                         enable_streaming=enable_streaming
                     )
@@ -284,7 +258,7 @@ async def _run_single_chain(self,
         first_node: Node,
         chain: Chain,
         tools: List[Dict],
-        max_steps: int,
+        max_turns: int,
         done_queue: asyncio.Queue,
         enable_streaming: bool = False
     ):
@@ -295,8 +269,8 @@ async def _run_single_chain(self,
         depth = 0
         have_set_tools = False
 
-        while not current_node.is_terminal and depth < max_steps:
-            newest_messages = deepcopy(current_node.messages)
+        while not current_node.is_terminal and depth < max_turns:
+            newest_messages = current_node.messages.copy()
             
             if not current_node.is_terminal:
                 # Generate response
@@ -307,7 +281,7 @@ async def _run_single_chain(self,
                 newest_messages.append(new_msg)
                 thought_node = chain.add_node(
                     type="Thought",
-                    messages=deepcopy(newest_messages),
+                    messages=newest_messages.copy(),
                     description=new_msg.get("content", "")
                 )
                 thought_node.is_terminal = new_msg.get("status", "continue") in self.terminal_status
@@ -329,7 +303,7 @@ async def _run_single_chain(self,
                     # Create action input node
                     action_input_node = chain.add_node(
                         type="Action Input",
-                        messages=deepcopy(newest_messages),
+                        messages=newest_messages.copy(),
                         description=result.get("arguments", "")
                     )
                     
@@ -344,7 +318,7 @@ async def _run_single_chain(self,
                         "tool_name": result["name"],
                         "content": [{"type": "text", "text": observation}],
                     })
-                    action_input_node.messages = deepcopy(newest_messages)
+                    action_input_node.messages = newest_messages.copy()
                     action_input_node.is_terminal = result["status"] in self.terminal_status
             else:
                 # No tool calls, chain is finished
@@ -389,7 +363,7 @@ async def generate_streaming_wrapper(messages_list, **kwargs):
             if has_streaming:
                 # Collect full response from streaming
                 full_response = ""
-                async for chunk in self.generate_streaming([current_node.messages], tools=tools):
+                async for chunk in self.generate_streaming([current_node.messages.messages], tools=tools):
                     await self.streaming_manager.emit_event(StreamEvent(
                         event_type=StreamEventType.LLM_GENERATION_CHUNK,
                         chain_id=chain_id,
@@ -412,12 +386,12 @@ async def generate_streaming_wrapper(messages_list, **kwargs):
                 ))
                 
                 # Parse response
-                new_msg = self.parse([full_response], self.tools)
+                new_msg = self.parse([full_response], tools=self.tools)
                 return new_msg[0]
             else:
                 # Fallback to non-streaming generation
-                responses = await self.generate_async([current_node.messages], tools=tools, num_return_sequences=1)
-                new_msg = self.parse(responses, self.tools)
+                responses = await self.generate_async([current_node.messages.messages], tools=tools, num_return_sequences=1)
+                new_msg = self.parse(responses, tools=self.tools)
                 
                 # Emit a single chunk event for the full response
                 full_response = new_msg[0].get("content", "")
@@ -452,8 +426,8 @@ async def generate_streaming_wrapper(messages_list, **kwargs):
                 return new_msg[0]
         else:
             # Non-streaming generation
-            responses = await self.generate_async([current_node.messages], tools=tools, num_return_sequences=1)
-            new_msg = self.parse(responses, self.tools)
+            responses = await self.generate_async([current_node.messages.messages], tools=tools, num_return_sequences=1)
+            new_msg = self.parse(responses, tools=self.tools)
             return new_msg[0]
 
     async def _execute_tool_call(self, tool_call, newest_messages, chain, chain_id, depth, have_set_tools, enable_streaming):
@@ -464,7 +438,7 @@ async def _execute_tool_call(self, tool_call, newest_messages, chain, chain_id,
         # Create action node
         action_node = chain.add_node(
             type="Action",
-            messages=deepcopy(newest_messages),
+            messages=newest_messages.copy(),
             description=tool_name
         )
         
@@ -502,7 +476,7 @@ async def _execute_tool_call(self, tool_call, newest_messages, chain, chain_id,
     async def _finalize_chain(self, chain_id, chain, current_node, depth):
         """Finalize the chain with reward calculation and cleanup."""
         if self._reward_fn is not None:
-            trajectory = current_node.messages
+            trajectory = current_node.messages.messages
             final_response = self.extract_final_response(trajectory)
             other_args = {k: v for k, v in chain.info.items() if k not in ['prediction', 'trajectory', 'id']}
             chain.info["reward"] = await self._reward_fn(prediction=final_response, **other_args, trajectory=trajectory, id=chain_id)
diff --git a/agents/agents/agents/chain/streaming_observer.py b/agentfly/agents/chain/streaming_observer.py
similarity index 100%
rename from agents/agents/agents/chain/streaming_observer.py
rename to agentfly/agents/chain/streaming_observer.py
diff --git a/agents/agents/agents/chain/websocket_streaming.py b/agentfly/agents/chain/websocket_streaming.py
similarity index 100%
rename from agents/agents/agents/chain/websocket_streaming.py
rename to agentfly/agents/chain/websocket_streaming.py
diff --git a/agents/agents/agents/llm_backend.py b/agentfly/agents/llm_backend.py
similarity index 100%
rename from agents/agents/agents/llm_backend.py
rename to agentfly/agents/llm_backend.py
diff --git a/agentfly/agents/parsers/__init__.py b/agentfly/agents/parsers/__init__.py
new file mode 100644
index 0000000..7fbeb4f
--- /dev/null
+++ b/agentfly/agents/parsers/__init__.py
@@ -0,0 +1 @@
+from .utils import extract_tool_calls
\ No newline at end of file
diff --git a/agentfly/agents/parsers/utils.py b/agentfly/agents/parsers/utils.py
new file mode 100644
index 0000000..53ced5f
--- /dev/null
+++ b/agentfly/agents/parsers/utils.py
@@ -0,0 +1,31 @@
+import re
+from typing import List, Dict
+import json
+from ..utils.json import jsonish
+
+def extract_tool_calls(action_input: str) -> List[Dict]:
+    if action_input is None:
+        return []
+    
+    tool_call_str = ""
+    # Extract the tool call from the action input
+    # 1. Extract with qwen style
+    pattern = re.compile(r"<tool_call>\s*(.*?)\s*</tool_call>", re.DOTALL)
+    m = pattern.search(action_input)
+    # If we find a tool call, extract it
+    if m:
+        tool_call_str = m.group(1).strip()
+        try:
+            tool_call = jsonish(tool_call_str)
+            return [tool_call]
+        except:
+            pass
+    
+    # 2. Extract directly
+    try:
+        tool_call = jsonish(action_input)
+        return [tool_call]
+    except:
+        pass
+    
+    return []
\ No newline at end of file
diff --git a/agents/agents/agents/react/__init__.py b/agentfly/agents/react/__init__.py
similarity index 100%
rename from agents/agents/agents/react/__init__.py
rename to agentfly/agents/react/__init__.py
diff --git a/agents/agents/agents/react/react_agent.py b/agentfly/agents/react/react_agent.py
similarity index 89%
rename from agents/agents/agents/react/react_agent.py
rename to agentfly/agents/react/react_agent.py
index 3c52228..4853274 100644
--- a/agents/agents/agents/react/react_agent.py
+++ b/agentfly/agents/react/react_agent.py
@@ -4,6 +4,7 @@
 from typing import Any, Dict, List, Optional
 from ..utils.json import jsonish
 from ...tools.tool_base import Tool
+from ..parsers import extract_tool_calls
 try:
     from verl.protocol import DataProto
 except ImportError:
@@ -12,6 +13,10 @@
 import torch
 import numpy as np
 import re
+import logging
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
 
 def parse_react_step(text: str) -> Dict[str, Optional[str]]:
     """
@@ -46,33 +51,6 @@ def parse_react_step(text: str) -> Dict[str, Optional[str]]:
     
     return result
 
-def extract_tool_calls(action_input: str) -> List[Dict]:
-    if action_input is None:
-        return []
-    
-    tool_call_str = ""
-    # Extract the tool call from the action input
-    # 1. Extract with qwen style
-    pattern = re.compile(r"<tool_call>\s*(.*?)\s*</tool_call>", re.DOTALL)
-    m = pattern.search(action_input)
-    # If we find a tool call, extract it
-    if m:
-        tool_call_str = m.group(1).strip()
-        try:
-            tool_call = jsonish(tool_call_str)
-            return [tool_call]
-        except:
-            pass
-    
-    # 2. Extract directly
-    try:
-        tool_call = jsonish(action_input)
-        return [tool_call]
-    except:
-        pass
-    
-    return []
-
 
 ReactSystemPromptTemplate = """You are a ReAct-style agent. When you receive a user query, in each step, you must:
 
@@ -156,6 +134,8 @@ def parse(self, responses: List[str], tools: List[Any]) -> List[Dict]:
             else:
                 tool_calls = extract_tool_calls(action_input)
             
+            logger.debug(f"[ReactAgent] extracted tool_calls: {tool_calls}")
+
             formatted_tool_calls = []
             # We only support one tool call for now
             if len(tool_calls) == 1:
diff --git a/agents/agents/agents/specialized/__init__.py b/agentfly/agents/specialized/__init__.py
similarity index 100%
rename from agents/agents/agents/specialized/__init__.py
rename to agentfly/agents/specialized/__init__.py
diff --git a/agents/agents/agents/specialized/code_agent.py b/agentfly/agents/specialized/code_agent.py
similarity index 100%
rename from agents/agents/agents/specialized/code_agent.py
rename to agentfly/agents/specialized/code_agent.py
diff --git a/agents/agents/agents/specialized/gui_agent.py b/agentfly/agents/specialized/gui_agent.py
similarity index 99%
rename from agents/agents/agents/specialized/gui_agent.py
rename to agentfly/agents/specialized/gui_agent.py
index 9436819..654a4f8 100644
--- a/agents/agents/agents/specialized/gui_agent.py
+++ b/agentfly/agents/specialized/gui_agent.py
@@ -5,7 +5,7 @@
 import logging
 from typing import List, Any, Tuple, Dict, Optional
 from ..agent_base import BaseAgent
-from agents.utils.ui_action_parser import parse_action_to_structure_output, IMAGE_FACTOR
+from ...utils.ui_action_parser import parse_action_to_structure_output, IMAGE_FACTOR
 
 logger = logging.getLogger(__name__)
 
diff --git a/agentfly/agents/specialized/hf_agent.py b/agentfly/agents/specialized/hf_agent.py
new file mode 100644
index 0000000..df75ba5
--- /dev/null
+++ b/agentfly/agents/specialized/hf_agent.py
@@ -0,0 +1,47 @@
+
+from ast import Dict
+import json
+from typing import List
+from ..agent_base import BaseAgent
+from ..parsers import extract_tool_calls
+
+class HFAgent(BaseAgent):
+    def __init__(self, model_name_or_path: str, **kwargs):
+        super().__init__(model_name_or_path, **kwargs)
+
+    def parse(self, responses: List[str], **kwargs) -> List[Dict]:
+        new_messages_list = []
+        for response in responses:
+            tool_calls = extract_tool_calls(response)
+
+            formatted_tool_calls = []
+            if len(tool_calls) == 1:
+                tool_call = tool_calls[0]
+                try:
+                    tool_call = json.loads(tool_call)
+                    # {"name": "...", "arguments": "..."}
+                    if "name" in tool_call and "arguments" in tool_call:
+                        name = tool_call["name"]
+                        arguments = tool_call["arguments"]
+                    
+                    formatted_tool_calls.append({
+                        "id": None,
+                        "type": "function",
+                        "function": {
+                            "name": name,
+                            "arguments": arguments,
+                        }
+                    })
+                except:
+                    pass
+            message = {
+                "role": "assistant",
+                "content": [{"type": "text", "text": response}],
+                "tool_calls": formatted_tool_calls,
+                "loss": True
+            }
+            new_messages_list.append(message)
+        return new_messages_list
+        
+
+    
\ No newline at end of file
diff --git a/agents/agents/agents/specialized/openai_agent.py b/agentfly/agents/specialized/openai_agent.py
similarity index 100%
rename from agents/agents/agents/specialized/openai_agent.py
rename to agentfly/agents/specialized/openai_agent.py
diff --git a/agents/agents/agents/specialized/think_agent.py b/agentfly/agents/specialized/think_agent.py
similarity index 100%
rename from agents/agents/agents/specialized/think_agent.py
rename to agentfly/agents/specialized/think_agent.py
diff --git a/agents/agents/agents/templates/__init__.py b/agentfly/agents/templates/__init__.py
similarity index 100%
rename from agents/agents/agents/templates/__init__.py
rename to agentfly/agents/templates/__init__.py
diff --git a/agents/agents/agents/templates/constants.py b/agentfly/agents/templates/constants.py
similarity index 100%
rename from agents/agents/agents/templates/constants.py
rename to agentfly/agents/templates/constants.py
diff --git a/agents/agents/agents/templates/preprocess.py b/agentfly/agents/templates/preprocess.py
similarity index 100%
rename from agents/agents/agents/templates/preprocess.py
rename to agentfly/agents/templates/preprocess.py
diff --git a/agents/agents/agents/templates/system_policy.py b/agentfly/agents/templates/system_policy.py
similarity index 100%
rename from agents/agents/agents/templates/system_policy.py
rename to agentfly/agents/templates/system_policy.py
diff --git a/agents/agents/agents/templates/templates.py b/agentfly/agents/templates/templates.py
similarity index 100%
rename from agents/agents/agents/templates/templates.py
rename to agentfly/agents/templates/templates.py
diff --git a/agents/agents/agents/templates/tool_policy.py b/agentfly/agents/templates/tool_policy.py
similarity index 100%
rename from agents/agents/agents/templates/tool_policy.py
rename to agentfly/agents/templates/tool_policy.py
diff --git a/agents/agents/agents/templates/utils.py b/agentfly/agents/templates/utils.py
similarity index 100%
rename from agents/agents/agents/templates/utils.py
rename to agentfly/agents/templates/utils.py
diff --git a/agents/agents/agents/templates/vision_processor.py b/agentfly/agents/templates/vision_processor.py
similarity index 100%
rename from agents/agents/agents/templates/vision_processor.py
rename to agentfly/agents/templates/vision_processor.py
diff --git a/agents/agents/agents/utils/__init__.py b/agentfly/agents/utils/__init__.py
similarity index 100%
rename from agents/agents/agents/utils/__init__.py
rename to agentfly/agents/utils/__init__.py
diff --git a/agents/agents/agents/utils/json.py b/agentfly/agents/utils/json.py
similarity index 100%
rename from agents/agents/agents/utils/json.py
rename to agentfly/agents/utils/json.py
diff --git a/agentfly/agents/utils/messages.py b/agentfly/agents/utils/messages.py
new file mode 100644
index 0000000..a388319
--- /dev/null
+++ b/agentfly/agents/utils/messages.py
@@ -0,0 +1,356 @@
+from __future__ import annotations
+from typing import Any, Dict, Iterable, List, Mapping, MutableMapping, Sequence, Union
+from copy import deepcopy
+import numpy as np
+
+Part = Dict[str, Any]          # e.g., {"type": "text", "text": "..."} or {"type": "image", "image": "..."}
+Turn = Dict[str, Any]          # {"role": "...", "content": [Part, ...], ...}
+MessageDict = Dict[str, Any]   # {"messages": [Turn, ...], <other meta keys>...}
+
+
+class MessagesValidationError(ValueError):
+    pass
+
+
+class Messages:
+    """
+    One message item (a dict with key 'messages'). Validates that each turn has:
+      - 'role': str
+      - 'content': List[Part]
+    Each Part must have a 'type'. Built-in validation for 'text' and 'image' types.
+
+    If allow_legacy_content=True, non-list content like a str or single part dict
+    will be coerced into a valid list of parts.
+    """
+
+    __slots__ = ("_data", "_strict", "_allow_legacy_content")
+
+    def __init__(
+        self,
+        data: Mapping[str, Any],
+        *,
+        strict: bool = True,
+        allow_legacy_content: bool = True,
+    ) -> None:
+        if not isinstance(data, Mapping):
+            raise MessagesValidationError(f"Message must be a dict, got {type(data).__name__}")
+        if "messages" not in data:
+            raise MessagesValidationError("Message dict must contain a 'messages' key.")
+
+        self._strict = strict
+        self._allow_legacy_content = allow_legacy_content
+        turns = self._validate_many_turns(data["messages"])
+        self._data: MessageDict = dict(data)
+        self._data["messages"] = turns
+
+
+    def _validate_part(self, p: Mapping[str, Any], idx: int | None = None) -> Part:
+        if not isinstance(p, Mapping):
+            raise MessagesValidationError(
+                f"Message content part{'' if idx is None else f' at index {idx}'} must be a dict, got {type(p).__name__}"
+            )
+        if "type" not in p:
+            raise MessagesValidationError(
+                f"Message content part{'' if idx is None else f' at index {idx}'} missing 'type': {p}"
+            )
+
+        ptype = p["type"]
+        if ptype == "text":
+            if "text" not in p:
+                raise MessagesValidationError("Text part missing 'text' field.")
+            if not isinstance(p["text"], str):
+                raise MessagesValidationError("Text part 'text' must be a string.")
+            return dict(p)
+
+        if ptype == "image":
+            # accept either 'image' (e.g., base64) or 'image_url'
+            if "image" not in p and "image_url" not in p:
+                raise MessagesValidationError("Image part must include 'image' or 'image_url'.")
+            return dict(p)
+
+        # Unknown part type: require at least 'type'; pass through
+        return dict(p)
+
+    def _coerce_to_parts(self, content: Any) -> List[Part]:
+        """Lenient coercion for legacy content representations."""
+        # Already a list, validate each part
+        if isinstance(content, list):
+            return [self._validate_part(p, i) for i, p in enumerate(content)]
+
+        # A plain string, treat as text part
+        if isinstance(content, str):
+            return [self._validate_part({"type": "text", "text": content}, 0)]
+
+        # Anything else is invalid
+        raise MessagesValidationError(
+            f"Turn 'content' must be a list of a string or a dictionary with following format {{type: text, text: str}}, but got {type(content).__name__}."
+        )
+
+    # ---------- Turn validation ----------
+    def _validate_turn(self, t: Mapping[str, Any], idx: int | None = None) -> Turn:
+        if not isinstance(t, Mapping):
+            raise MessagesValidationError(
+                f"Turn{'' if idx is None else f' at index {idx}'} must be a dict, got {type(t).__name__}"
+            )
+        if "role" not in t:
+            raise MessagesValidationError(
+                f"Turn{'' if idx is None else f' at index {idx}'} missing 'role'."
+            )
+        role = t["role"]
+        if not isinstance(role, str):
+            raise MessagesValidationError("Turn 'role' must be a string.")
+
+        if "content" not in t:
+            raise MessagesValidationError("Turn missing 'content'.")
+        else:
+            content = t["content"]
+            if isinstance(content, list):
+                content_parts = [self._validate_part(p, i) for i, p in enumerate(content)]
+                if len(content_parts) == 0:
+                    raise MessagesValidationError(f"Turn {'' if idx is None else f' at index {idx}'} 'content' must contain at least one part.")
+            else:
+                if self._allow_legacy_content:
+                    content_parts = self._coerce_to_parts(content)
+                else:
+                    raise MessagesValidationError("Turn 'content' must be a string or a list of dictionary with following format {{type: text, text: str}}.")
+
+        # Shallow copy + normalized fields
+        out = dict(t)
+        out["role"] = role
+        out["content"] = content_parts
+        return out
+
+    def _validate_many_turns(self, items: Iterable[Mapping[str, Any]]) -> List[Turn]:
+        out: List[Turn] = []
+        for i, m in enumerate(items):
+            out.append(self._validate_turn(m, i))
+        return out
+
+    # ---------- Turn mutation API ----------
+    def add(self, role: str, content: Any, **extra: Any) -> None:
+        """
+        Add a single turn. 'content' can be a list of parts, a string (text),
+        or a single part dict if allow_legacy_content=True.
+        """
+        obj: MutableMapping[str, Any] = {"role": role, "content": content}
+        if extra:
+            obj.update(extra)
+        self.append(obj)
+
+    def append(self, turn: Mapping[str, Any]) -> None:
+        self._data["messages"].append(self._validate_turn(turn))
+
+    def extend(self, turns: Iterable[Mapping[str, Any]]) -> None:
+        self._data["messages"].extend(self._validate_many_turns(turns))
+
+    # ---------- Metadata helpers ----------
+    def set_meta(self, key: str, value: Any) -> None:
+        if key == "messages":
+            raise MessagesValidationError("Use turn methods to modify 'messages'.")
+        self._data[key] = value
+
+    def update_meta(self, **kwargs: Any) -> None:
+        if "messages" in kwargs:
+            raise MessagesValidationError("Use turn methods to modify 'messages'.")
+        self._data.update(kwargs)
+
+    # ---------- Accessors ----------
+    @property
+    def messages(self) -> List[Turn]:
+        return list(self._data["messages"])
+
+    @property
+    def meta(self) -> Dict[str, Any]:
+        return {k: v for k, v in self._data.items() if k != "messages"}
+
+    def to_dict(self) -> MessageDict:
+        out = dict(self._data)
+        out["messages"] = list(self._data["messages"])
+        return out
+
+    def set_system_prompt(self, system_prompt: str, enforce: bool = True) -> None:
+        
+        assert isinstance(system_prompt, str), "System prompt must be a string."
+
+        if "messages" in self._data:
+            if self._data["messages"][0]["role"] == "system":
+                if enforce:
+                    self._data["messages"][0]["content"] = [{"type": "text", "text": system_prompt}]
+                else:
+                    raise MessagesValidationError("System prompt already exists.")
+            else:
+                self._data["messages"].insert(0, {"role": "system", "content": system_prompt})
+
+    def __len__(self) -> int:
+        return len(self._data["messages"])
+
+    def __iter__(self):
+        return iter(self._data["messages"])
+
+    def __getitem__(self, idx: int) -> Turn:
+        return self._data["messages"][idx]
+
+    def __repr__(self) -> str:
+        metas = {k: v for k, v in self._data.items() if k != "messages"}
+        return f"Message(turns={len(self)}, meta_keys={list(metas.keys())}, strict={self._strict})"
+
+    def copy(self) -> "Messages":
+        return Messages(deepcopy(self._data), strict=self._strict, allow_legacy_content=self._allow_legacy_content)
+
+    # ---------- Constructors ----------
+    @classmethod
+    def from_turns(
+        cls,
+        turns: Iterable[Mapping[str, Any]],
+        *,
+        strict: bool = True,
+        allow_legacy_content: bool = True,
+        **meta: Any,
+    ) -> "Messages":
+        return cls({"messages": list(turns), **meta}, strict=strict, allow_legacy_content=allow_legacy_content)
+
+
+class MessagesList:
+    """
+    A collection of `Message` items (each is a dict with 'messages').
+
+    `from_data(...)` normalizes any of the supported input shapes:
+      1) List[Dict] with 'messages'
+      2) List[List[Turn]]
+      3) Dict with 'messages'
+      4) List[Turn]
+
+    `default_meta` is merged only when wrapping raw turn lists (cases 2 & 4).
+    """
+
+    __slots__ = ("_items", "_strict", "_allow_legacy_content")
+
+    def __init__(
+        self,
+        items: Iterable[Union[Messages, Mapping[str, Any]]] | None = None,
+        *,
+        strict: bool = True,
+        allow_legacy_content: bool = True,
+    ):
+        self._strict = strict
+        self._allow_legacy_content = allow_legacy_content
+        self._items: List[Messages] = []
+        if items:
+            self.extend(items)
+
+    @staticmethod
+    def _is_sequence(obj: Any) -> bool:
+        return isinstance(obj, Sequence) and not isinstance(obj, (str, bytes))
+
+    @classmethod
+    def from_data(
+        cls,
+        data: Union[Mapping[str, Any], Sequence[Any], np.ndarray],
+        *,
+        default_meta: Mapping[str, Any] | None = None,
+        strict: bool = True,
+        allow_legacy_content: bool = True,
+    ) -> "MessagesList":
+        if isinstance(data, np.ndarray):
+            data = data.tolist()
+
+        default_meta = dict(default_meta or {})
+        ms = cls(strict=strict, allow_legacy_content=allow_legacy_content)
+
+        # 3) Dict with 'messages'
+        if isinstance(data, Mapping):
+            if "messages" not in data:
+                raise MessagesValidationError("Dict input must contain a 'messages' key.")
+            ms.append(Messages(data, strict=strict, allow_legacy_content=allow_legacy_content))
+            return ms
+
+        if not isinstance(data, Sequence):
+            raise MessagesValidationError(f"Unsupported input type: {type(data).__name__}")
+
+        seq = list(data)
+        if not seq:
+            raise MessagesValidationError("Input is an empty list.")
+
+        first = seq[0]
+
+        # 1) List[Dict] each with 'messages'
+        if isinstance(first, Mapping) and "messages" in first:
+            for i, item in enumerate(seq):
+                if not isinstance(item, Mapping) or "messages" not in item:
+                    raise MessagesValidationError(
+                        f"List appears to be message items but index {i} is not a dict with 'messages'."
+                    )
+                ms.append(Messages(item, strict=strict, allow_legacy_content=allow_legacy_content))
+            return ms
+
+        # 2) List[List[Turn]]  (each inner list is a list of turns)
+        if cls._is_sequence(first) and (len(first) == 0 or (isinstance(first[0], Mapping) and "role" in first[0])):
+            for inner in seq:
+                ms.append(
+                    Messages.from_turns(
+                        inner,
+                        strict=strict,
+                        allow_legacy_content=allow_legacy_content,
+                        **default_meta,
+                    )
+                )
+            return ms
+
+        # 4) List[Turn] (single item)
+        if isinstance(first, Mapping) and "role" in first:
+            ms.append(
+                Messages.from_turns(
+                    seq,
+                    strict=strict,
+                    allow_legacy_content=allow_legacy_content,
+                    **default_meta,
+                )
+            )
+            return ms
+
+        raise MessagesValidationError(
+            "Input does not match any accepted format. "
+            "Supported: list of dicts with 'messages', list of turn-lists, "
+            "single dict with 'messages', or a single list of turn dicts."
+        )
+
+    # ---- collection API ----
+    def append(self, item: Union[Messages, Mapping[str, Any]]) -> None:
+        self._items.append(
+            item if isinstance(item, Messages)
+            else Messages(item, strict=self._strict, allow_legacy_content=self._allow_legacy_content)
+        )
+
+    def append_turns(self, turns: Iterable[Mapping[str, Any]], **meta: Any) -> None:
+        self._items.append(
+            Messages.from_turns(
+                turns,
+                strict=self._strict,
+                allow_legacy_content=self._allow_legacy_content,
+                **meta,
+            )
+        )
+
+    def extend(self, items: Iterable[Union[Messages, Mapping[str, Any]]]) -> None:
+        for it in items:
+            self.append(it)
+
+    # ---- accessors ----
+    def to_list(self) -> List[MessageDict]:
+        return [m.to_dict() for m in self._items]
+
+    def __len__(self) -> int:
+        return len(self._items)
+
+    def __iter__(self):
+        return iter(self._items)
+
+    def __getitem__(self, idx: int) -> Messages:
+        return self._items[idx]
+
+    def __repr__(self) -> str:
+        return f"Messages(n_items={len(self)}, strict={self._strict})"
+
+
+    
+
diff --git a/agents/agents/agents/utils/tokenizer.py b/agentfly/agents/utils/tokenizer.py
similarity index 100%
rename from agents/agents/agents/utils/tokenizer.py
rename to agentfly/agents/utils/tokenizer.py
diff --git a/agents/agents/configs/code.yaml b/agentfly/configs/code.yaml
similarity index 100%
rename from agents/agents/configs/code.yaml
rename to agentfly/configs/code.yaml
diff --git a/agents/agents/configs/redis/redis.conf b/agentfly/configs/redis/redis.conf
similarity index 100%
rename from agents/agents/configs/redis/redis.conf
rename to agentfly/configs/redis/redis.conf
diff --git a/agents/agents/configs/search.yaml b/agentfly/configs/search.yaml
similarity index 100%
rename from agents/agents/configs/search.yaml
rename to agentfly/configs/search.yaml
diff --git a/agents/agents/configs/vllm.yaml b/agentfly/configs/vllm.yaml
similarity index 100%
rename from agents/agents/configs/vllm.yaml
rename to agentfly/configs/vllm.yaml
diff --git a/agents/agents/envs/manager/__init__.py b/agentfly/dockers/__init__.py
similarity index 100%
rename from agents/agents/envs/manager/__init__.py
rename to agentfly/dockers/__init__.py
diff --git a/agents/agents/dockers/alfworld_env/Dockerfile b/agentfly/dockers/alfworld_env/Dockerfile
similarity index 100%
rename from agents/agents/dockers/alfworld_env/Dockerfile
rename to agentfly/dockers/alfworld_env/Dockerfile
diff --git a/agents/agents/dockers/alfworld_env/alfworld_http_server.py b/agentfly/dockers/alfworld_env/alfworld_http_server.py
similarity index 100%
rename from agents/agents/dockers/alfworld_env/alfworld_http_server.py
rename to agentfly/dockers/alfworld_env/alfworld_http_server.py
diff --git a/agents/agents/dockers/alfworld_env/base_config.yaml b/agentfly/dockers/alfworld_env/base_config.yaml
similarity index 100%
rename from agents/agents/dockers/alfworld_env/base_config.yaml
rename to agentfly/dockers/alfworld_env/base_config.yaml
diff --git a/agents/agents/dockers/alfworld_env/build.sh b/agentfly/dockers/alfworld_env/build.sh
similarity index 100%
rename from agents/agents/dockers/alfworld_env/build.sh
rename to agentfly/dockers/alfworld_env/build.sh
diff --git a/agents/agents/dockers/alfworld_env/build_image.sh b/agentfly/dockers/alfworld_env/build_image.sh
similarity index 100%
rename from agents/agents/dockers/alfworld_env/build_image.sh
rename to agentfly/dockers/alfworld_env/build_image.sh
diff --git a/agents/agents/dockers/alfworld_env/requirements.txt b/agentfly/dockers/alfworld_env/requirements.txt
similarity index 100%
rename from agents/agents/dockers/alfworld_env/requirements.txt
rename to agentfly/dockers/alfworld_env/requirements.txt
diff --git a/agents/agents/dockers/alfworld_env/start.sh b/agentfly/dockers/alfworld_env/start.sh
similarity index 100%
rename from agents/agents/dockers/alfworld_env/start.sh
rename to agentfly/dockers/alfworld_env/start.sh
diff --git a/agents/agents/dockers/build_alfworld.sh b/agentfly/dockers/build_alfworld.sh
similarity index 100%
rename from agents/agents/dockers/build_alfworld.sh
rename to agentfly/dockers/build_alfworld.sh
diff --git a/agents/agents/dockers/build_image.sh b/agentfly/dockers/build_image.sh
similarity index 100%
rename from agents/agents/dockers/build_image.sh
rename to agentfly/dockers/build_image.sh
diff --git a/agents/agents/dockers/python_env/Dockerfile b/agentfly/dockers/python_env/Dockerfile
similarity index 100%
rename from agents/agents/dockers/python_env/Dockerfile
rename to agentfly/dockers/python_env/Dockerfile
diff --git a/agents/agents/dockers/python_env/python_http_server.py b/agentfly/dockers/python_env/python_http_server.py
similarity index 100%
rename from agents/agents/dockers/python_env/python_http_server.py
rename to agentfly/dockers/python_env/python_http_server.py
diff --git a/agents/agents/dockers/python_env/requirements.txt b/agentfly/dockers/python_env/requirements.txt
similarity index 100%
rename from agents/agents/dockers/python_env/requirements.txt
rename to agentfly/dockers/python_env/requirements.txt
diff --git a/agents/agents/dockers/python_env/snippet_runner.py b/agentfly/dockers/python_env/snippet_runner.py
similarity index 100%
rename from agents/agents/dockers/python_env/snippet_runner.py
rename to agentfly/dockers/python_env/snippet_runner.py
diff --git a/agents/agents/dockers/python_env/start.sh b/agentfly/dockers/python_env/start.sh
similarity index 100%
rename from agents/agents/dockers/python_env/start.sh
rename to agentfly/dockers/python_env/start.sh
diff --git a/agents/agents/dockers/run_container.sh b/agentfly/dockers/run_container.sh
similarity index 100%
rename from agents/agents/dockers/run_container.sh
rename to agentfly/dockers/run_container.sh
diff --git a/agents/agents/dockers/scienceworld_env/Dockerfile b/agentfly/dockers/scienceworld_env/Dockerfile
similarity index 100%
rename from agents/agents/dockers/scienceworld_env/Dockerfile
rename to agentfly/dockers/scienceworld_env/Dockerfile
diff --git a/agents/agents/dockers/scienceworld_env/build_image.sh b/agentfly/dockers/scienceworld_env/build_image.sh
similarity index 100%
rename from agents/agents/dockers/scienceworld_env/build_image.sh
rename to agentfly/dockers/scienceworld_env/build_image.sh
diff --git a/agents/agents/dockers/scienceworld_env/requirements.txt b/agentfly/dockers/scienceworld_env/requirements.txt
similarity index 100%
rename from agents/agents/dockers/scienceworld_env/requirements.txt
rename to agentfly/dockers/scienceworld_env/requirements.txt
diff --git a/agents/agents/dockers/scienceworld_env/run_container.sh b/agentfly/dockers/scienceworld_env/run_container.sh
similarity index 100%
rename from agents/agents/dockers/scienceworld_env/run_container.sh
rename to agentfly/dockers/scienceworld_env/run_container.sh
diff --git a/agents/agents/dockers/scienceworld_env/scienceworld_server.py b/agentfly/dockers/scienceworld_env/scienceworld_server.py
similarity index 100%
rename from agents/agents/dockers/scienceworld_env/scienceworld_server.py
rename to agentfly/dockers/scienceworld_env/scienceworld_server.py
diff --git a/agents/agents/dockers/scienceworld_env/start.sh b/agentfly/dockers/scienceworld_env/start.sh
similarity index 100%
rename from agents/agents/dockers/scienceworld_env/start.sh
rename to agentfly/dockers/scienceworld_env/start.sh
diff --git a/agents/agents/dockers/webshop_env/Dockerfile b/agentfly/dockers/webshop_env/Dockerfile
similarity index 100%
rename from agents/agents/dockers/webshop_env/Dockerfile
rename to agentfly/dockers/webshop_env/Dockerfile
diff --git a/agents/agents/dockers/webshop_env/build_image.sh b/agentfly/dockers/webshop_env/build_image.sh
similarity index 100%
rename from agents/agents/dockers/webshop_env/build_image.sh
rename to agentfly/dockers/webshop_env/build_image.sh
diff --git a/agents/agents/dockers/webshop_env/engine.py b/agentfly/dockers/webshop_env/engine.py
similarity index 100%
rename from agents/agents/dockers/webshop_env/engine.py
rename to agentfly/dockers/webshop_env/engine.py
diff --git a/agents/agents/dockers/webshop_env/goal.py b/agentfly/dockers/webshop_env/goal.py
similarity index 100%
rename from agents/agents/dockers/webshop_env/goal.py
rename to agentfly/dockers/webshop_env/goal.py
diff --git a/agents/agents/dockers/webshop_env/indexes/_1.fdm b/agentfly/dockers/webshop_env/indexes/_1.fdm
similarity index 100%
rename from agents/agents/dockers/webshop_env/indexes/_1.fdm
rename to agentfly/dockers/webshop_env/indexes/_1.fdm
diff --git a/agents/agents/dockers/webshop_env/indexes/_1.fdt b/agentfly/dockers/webshop_env/indexes/_1.fdt
similarity index 100%
rename from agents/agents/dockers/webshop_env/indexes/_1.fdt
rename to agentfly/dockers/webshop_env/indexes/_1.fdt
diff --git a/agents/agents/dockers/webshop_env/indexes/_1.fdx b/agentfly/dockers/webshop_env/indexes/_1.fdx
similarity index 100%
rename from agents/agents/dockers/webshop_env/indexes/_1.fdx
rename to agentfly/dockers/webshop_env/indexes/_1.fdx
diff --git a/agents/agents/dockers/webshop_env/indexes/_1.fnm b/agentfly/dockers/webshop_env/indexes/_1.fnm
similarity index 100%
rename from agents/agents/dockers/webshop_env/indexes/_1.fnm
rename to agentfly/dockers/webshop_env/indexes/_1.fnm
diff --git a/agents/agents/dockers/webshop_env/indexes/_1.nvd b/agentfly/dockers/webshop_env/indexes/_1.nvd
similarity index 100%
rename from agents/agents/dockers/webshop_env/indexes/_1.nvd
rename to agentfly/dockers/webshop_env/indexes/_1.nvd
diff --git a/agents/agents/dockers/webshop_env/indexes/_1.nvm b/agentfly/dockers/webshop_env/indexes/_1.nvm
similarity index 100%
rename from agents/agents/dockers/webshop_env/indexes/_1.nvm
rename to agentfly/dockers/webshop_env/indexes/_1.nvm
diff --git a/agents/agents/dockers/webshop_env/indexes/_1.si b/agentfly/dockers/webshop_env/indexes/_1.si
similarity index 100%
rename from agents/agents/dockers/webshop_env/indexes/_1.si
rename to agentfly/dockers/webshop_env/indexes/_1.si
diff --git a/agents/agents/dockers/webshop_env/indexes/_1.tvd b/agentfly/dockers/webshop_env/indexes/_1.tvd
similarity index 100%
rename from agents/agents/dockers/webshop_env/indexes/_1.tvd
rename to agentfly/dockers/webshop_env/indexes/_1.tvd
diff --git a/agents/agents/dockers/webshop_env/indexes/_1.tvm b/agentfly/dockers/webshop_env/indexes/_1.tvm
similarity index 100%
rename from agents/agents/dockers/webshop_env/indexes/_1.tvm
rename to agentfly/dockers/webshop_env/indexes/_1.tvm
diff --git a/agents/agents/dockers/webshop_env/indexes/_1.tvx b/agentfly/dockers/webshop_env/indexes/_1.tvx
similarity index 100%
rename from agents/agents/dockers/webshop_env/indexes/_1.tvx
rename to agentfly/dockers/webshop_env/indexes/_1.tvx
diff --git a/agents/agents/dockers/webshop_env/indexes/_1_Lucene90_0.doc b/agentfly/dockers/webshop_env/indexes/_1_Lucene90_0.doc
similarity index 100%
rename from agents/agents/dockers/webshop_env/indexes/_1_Lucene90_0.doc
rename to agentfly/dockers/webshop_env/indexes/_1_Lucene90_0.doc
diff --git a/agents/agents/dockers/webshop_env/indexes/_1_Lucene90_0.dvd b/agentfly/dockers/webshop_env/indexes/_1_Lucene90_0.dvd
similarity index 100%
rename from agents/agents/dockers/webshop_env/indexes/_1_Lucene90_0.dvd
rename to agentfly/dockers/webshop_env/indexes/_1_Lucene90_0.dvd
diff --git a/agents/agents/dockers/webshop_env/indexes/_1_Lucene90_0.dvm b/agentfly/dockers/webshop_env/indexes/_1_Lucene90_0.dvm
similarity index 100%
rename from agents/agents/dockers/webshop_env/indexes/_1_Lucene90_0.dvm
rename to agentfly/dockers/webshop_env/indexes/_1_Lucene90_0.dvm
diff --git a/agents/agents/dockers/webshop_env/indexes/_1_Lucene90_0.pos b/agentfly/dockers/webshop_env/indexes/_1_Lucene90_0.pos
similarity index 100%
rename from agents/agents/dockers/webshop_env/indexes/_1_Lucene90_0.pos
rename to agentfly/dockers/webshop_env/indexes/_1_Lucene90_0.pos
diff --git a/agents/agents/dockers/webshop_env/indexes/_1_Lucene90_0.tim b/agentfly/dockers/webshop_env/indexes/_1_Lucene90_0.tim
similarity index 100%
rename from agents/agents/dockers/webshop_env/indexes/_1_Lucene90_0.tim
rename to agentfly/dockers/webshop_env/indexes/_1_Lucene90_0.tim
diff --git a/agents/agents/dockers/webshop_env/indexes/_1_Lucene90_0.tip b/agentfly/dockers/webshop_env/indexes/_1_Lucene90_0.tip
similarity index 100%
rename from agents/agents/dockers/webshop_env/indexes/_1_Lucene90_0.tip
rename to agentfly/dockers/webshop_env/indexes/_1_Lucene90_0.tip
diff --git a/agents/agents/dockers/webshop_env/indexes/_1_Lucene90_0.tmd b/agentfly/dockers/webshop_env/indexes/_1_Lucene90_0.tmd
similarity index 100%
rename from agents/agents/dockers/webshop_env/indexes/_1_Lucene90_0.tmd
rename to agentfly/dockers/webshop_env/indexes/_1_Lucene90_0.tmd
diff --git a/agents/agents/dockers/webshop_env/indexes/segments_2 b/agentfly/dockers/webshop_env/indexes/segments_2
similarity index 100%
rename from agents/agents/dockers/webshop_env/indexes/segments_2
rename to agentfly/dockers/webshop_env/indexes/segments_2
diff --git a/agents/agents/dockers/webshop_env/normalize.py b/agentfly/dockers/webshop_env/normalize.py
similarity index 100%
rename from agents/agents/dockers/webshop_env/normalize.py
rename to agentfly/dockers/webshop_env/normalize.py
diff --git a/agents/agents/dockers/webshop_env/requirements.txt b/agentfly/dockers/webshop_env/requirements.txt
similarity index 100%
rename from agents/agents/dockers/webshop_env/requirements.txt
rename to agentfly/dockers/webshop_env/requirements.txt
diff --git a/agents/agents/dockers/webshop_env/run_container.sh b/agentfly/dockers/webshop_env/run_container.sh
similarity index 100%
rename from agents/agents/dockers/webshop_env/run_container.sh
rename to agentfly/dockers/webshop_env/run_container.sh
diff --git a/agents/agents/dockers/webshop_env/start.sh b/agentfly/dockers/webshop_env/start.sh
similarity index 100%
rename from agents/agents/dockers/webshop_env/start.sh
rename to agentfly/dockers/webshop_env/start.sh
diff --git a/agents/agents/dockers/webshop_env/static/images/no-image-available.png b/agentfly/dockers/webshop_env/static/images/no-image-available.png
similarity index 100%
rename from agents/agents/dockers/webshop_env/static/images/no-image-available.png
rename to agentfly/dockers/webshop_env/static/images/no-image-available.png
diff --git a/agents/agents/dockers/webshop_env/static/style.css b/agentfly/dockers/webshop_env/static/style.css
similarity index 100%
rename from agents/agents/dockers/webshop_env/static/style.css
rename to agentfly/dockers/webshop_env/static/style.css
diff --git a/agents/agents/dockers/webshop_env/templates/attributes_page.html b/agentfly/dockers/webshop_env/templates/attributes_page.html
similarity index 100%
rename from agents/agents/dockers/webshop_env/templates/attributes_page.html
rename to agentfly/dockers/webshop_env/templates/attributes_page.html
diff --git a/agents/agents/dockers/webshop_env/templates/description_page.html b/agentfly/dockers/webshop_env/templates/description_page.html
similarity index 100%
rename from agents/agents/dockers/webshop_env/templates/description_page.html
rename to agentfly/dockers/webshop_env/templates/description_page.html
diff --git a/agents/agents/dockers/webshop_env/templates/done_page.html b/agentfly/dockers/webshop_env/templates/done_page.html
similarity index 100%
rename from agents/agents/dockers/webshop_env/templates/done_page.html
rename to agentfly/dockers/webshop_env/templates/done_page.html
diff --git a/agents/agents/dockers/webshop_env/templates/features_page.html b/agentfly/dockers/webshop_env/templates/features_page.html
similarity index 100%
rename from agents/agents/dockers/webshop_env/templates/features_page.html
rename to agentfly/dockers/webshop_env/templates/features_page.html
diff --git a/agents/agents/dockers/webshop_env/templates/item_page.html b/agentfly/dockers/webshop_env/templates/item_page.html
similarity index 100%
rename from agents/agents/dockers/webshop_env/templates/item_page.html
rename to agentfly/dockers/webshop_env/templates/item_page.html
diff --git a/agents/agents/dockers/webshop_env/templates/results_page.html b/agentfly/dockers/webshop_env/templates/results_page.html
similarity index 100%
rename from agents/agents/dockers/webshop_env/templates/results_page.html
rename to agentfly/dockers/webshop_env/templates/results_page.html
diff --git a/agents/agents/dockers/webshop_env/templates/review_page.html b/agentfly/dockers/webshop_env/templates/review_page.html
similarity index 100%
rename from agents/agents/dockers/webshop_env/templates/review_page.html
rename to agentfly/dockers/webshop_env/templates/review_page.html
diff --git a/agents/agents/dockers/webshop_env/templates/search_page.html b/agentfly/dockers/webshop_env/templates/search_page.html
similarity index 100%
rename from agents/agents/dockers/webshop_env/templates/search_page.html
rename to agentfly/dockers/webshop_env/templates/search_page.html
diff --git a/agents/agents/dockers/webshop_env/utils.py b/agentfly/dockers/webshop_env/utils.py
similarity index 100%
rename from agents/agents/dockers/webshop_env/utils.py
rename to agentfly/dockers/webshop_env/utils.py
diff --git a/agents/agents/dockers/webshop_env/webshop_simulator_server.py b/agentfly/dockers/webshop_env/webshop_simulator_server.py
similarity index 100%
rename from agents/agents/dockers/webshop_env/webshop_simulator_server.py
rename to agentfly/dockers/webshop_env/webshop_simulator_server.py
diff --git a/agents/agents/dockers/workload_test.py b/agentfly/dockers/workload_test.py
similarity index 100%
rename from agents/agents/dockers/workload_test.py
rename to agentfly/dockers/workload_test.py
diff --git a/agents/agents/envs/__init__.py b/agentfly/envs/__init__.py
similarity index 62%
rename from agents/agents/envs/__init__.py
rename to agentfly/envs/__init__.py
index 3ab2108..5e7bbeb 100644
--- a/agents/agents/envs/__init__.py
+++ b/agentfly/envs/__init__.py
@@ -3,11 +3,3 @@
 from .webshop_text_env import WebAgentTextEnv
 from .scienceworld_env import ScienceWorldEnv
 from .manager.enroot import clear_enroot_containers
-
-__all__ = [
-    "PythonSandboxEnv",
-    "ALFWorldEnv",
-    "WebAgentTextEnv",
-    "ScienceWorldEnv",
-    "clear_enroot_containers",
-]
\ No newline at end of file
diff --git a/agents/agents/envs/alfworld_env.py b/agentfly/envs/alfworld_env.py
similarity index 100%
rename from agents/agents/envs/alfworld_env.py
rename to agentfly/envs/alfworld_env.py
diff --git a/agents/agents/envs/env_base.py b/agentfly/envs/env_base.py
similarity index 100%
rename from agents/agents/envs/env_base.py
rename to agentfly/envs/env_base.py
diff --git a/agents/agents/rewards/llm_as_judge/__init__.py b/agentfly/envs/manager/__init__.py
similarity index 100%
rename from agents/agents/rewards/llm_as_judge/__init__.py
rename to agentfly/envs/manager/__init__.py
diff --git a/agents/agents/envs/manager/enroot.py b/agentfly/envs/manager/enroot.py
similarity index 100%
rename from agents/agents/envs/manager/enroot.py
rename to agentfly/envs/manager/enroot.py
diff --git a/agents/agents/envs/manager/env_manager.py b/agentfly/envs/manager/env_manager.py
similarity index 100%
rename from agents/agents/envs/manager/env_manager.py
rename to agentfly/envs/manager/env_manager.py
diff --git a/agents/agents/envs/manager/resource.py b/agentfly/envs/manager/resource.py
similarity index 100%
rename from agents/agents/envs/manager/resource.py
rename to agentfly/envs/manager/resource.py
diff --git a/agents/agents/envs/manager/udocker.py b/agentfly/envs/manager/udocker.py
similarity index 100%
rename from agents/agents/envs/manager/udocker.py
rename to agentfly/envs/manager/udocker.py
diff --git a/agents/agents/envs/manager/warm_pool.py b/agentfly/envs/manager/warm_pool.py
similarity index 100%
rename from agents/agents/envs/manager/warm_pool.py
rename to agentfly/envs/manager/warm_pool.py
diff --git a/agents/agents/envs/osshell_env.py b/agentfly/envs/osshell_env.py
similarity index 100%
rename from agents/agents/envs/osshell_env.py
rename to agentfly/envs/osshell_env.py
diff --git a/agents/agents/envs/python_env.py b/agentfly/envs/python_env.py
similarity index 100%
rename from agents/agents/envs/python_env.py
rename to agentfly/envs/python_env.py
diff --git a/agents/agents/envs/redis_env.py b/agentfly/envs/redis_env.py
similarity index 100%
rename from agents/agents/envs/redis_env.py
rename to agentfly/envs/redis_env.py
diff --git a/agents/agents/envs/scienceworld_env.py b/agentfly/envs/scienceworld_env.py
similarity index 100%
rename from agents/agents/envs/scienceworld_env.py
rename to agentfly/envs/scienceworld_env.py
diff --git a/agents/agents/envs/webshop_text_env.py b/agentfly/envs/webshop_text_env.py
similarity index 99%
rename from agents/agents/envs/webshop_text_env.py
rename to agentfly/envs/webshop_text_env.py
index e481f68..1648f46 100644
--- a/agents/agents/envs/webshop_text_env.py
+++ b/agentfly/envs/webshop_text_env.py
@@ -357,11 +357,13 @@ async def aclose(self) -> None:
             await self._client.aclose()
             self._client = None
 
-    async def close(self) -> None:
+    def close(self) -> None:
         """
         Release everything allocated by the environment (alias for aclose).
         """
-        await self.aclose()
+        if self._container:
+            self._container.kill()
+            self._container = None
 
     async def _connect(self):
         """
diff --git a/agents/pytest.ini b/agentfly/pytest.ini
similarity index 100%
rename from agents/pytest.ini
rename to agentfly/pytest.ini
diff --git a/agents/requirements.txt b/agentfly/requirements.txt
similarity index 96%
rename from agents/requirements.txt
rename to agentfly/requirements.txt
index 8a857b0..0040f90 100644
--- a/agents/requirements.txt
+++ b/agentfly/requirements.txt
@@ -16,3 +16,4 @@ bs4
 qwen_vl_utils
 onnxruntime
 mpmath
+wandb
diff --git a/agentfly/rewards/__init__.py b/agentfly/rewards/__init__.py
new file mode 100644
index 0000000..09ba592
--- /dev/null
+++ b/agentfly/rewards/__init__.py
@@ -0,0 +1,20 @@
+from .reward_base import (
+    RewardFunction,
+    get_reward_from_name,
+    get_rewards_from_names,
+    list_available_rewards,
+    register_reward,
+    reward,
+)
+from .qa_reward import qa_f1_reward
+from .math_reward import (
+    math_reward,
+    math_reward_tool,
+    math_reward_think,
+    math_reward_string_equal,
+)
+from .webshop_reward import webshop_reward
+from .alfworld_reward import alfworld_episode_reward
+from .scienceworld_reward import scienceworld_reward
+from .gui_reward import gui_reward
+
diff --git a/agents/agents/rewards/alfworld_reward.py b/agentfly/rewards/alfworld_reward.py
similarity index 100%
rename from agents/agents/rewards/alfworld_reward.py
rename to agentfly/rewards/alfworld_reward.py
diff --git a/agents/agents/rewards/code_reward.py b/agentfly/rewards/code_reward.py
similarity index 100%
rename from agents/agents/rewards/code_reward.py
rename to agentfly/rewards/code_reward.py
diff --git a/agents/agents/rewards/gui_reward.py b/agentfly/rewards/gui_reward.py
similarity index 99%
rename from agents/agents/rewards/gui_reward.py
rename to agentfly/rewards/gui_reward.py
index 8a118c3..a441f15 100644
--- a/agents/agents/rewards/gui_reward.py
+++ b/agentfly/rewards/gui_reward.py
@@ -8,7 +8,7 @@
 from typing import Dict, Any, List, Tuple, Optional
 
 from .reward_base import reward
-from agents.utils.ui_action_parser import parse_action_to_structure_output, IMAGE_FACTOR
+from ..utils.ui_action_parser import parse_action_to_structure_output, IMAGE_FACTOR
 
 logger = logging.getLogger(__name__)
 
diff --git a/agents/agents/tools/src/__init__.py b/agentfly/rewards/llm_as_judge/__init__.py
similarity index 100%
rename from agents/agents/tools/src/__init__.py
rename to agentfly/rewards/llm_as_judge/__init__.py
diff --git a/agents/agents/rewards/llm_as_judge/llm_as_judge_client.py b/agentfly/rewards/llm_as_judge/llm_as_judge_client.py
similarity index 100%
rename from agents/agents/rewards/llm_as_judge/llm_as_judge_client.py
rename to agentfly/rewards/llm_as_judge/llm_as_judge_client.py
diff --git a/agents/agents/rewards/llm_as_judge/llm_as_judge_reward.py b/agentfly/rewards/llm_as_judge/llm_as_judge_reward.py
similarity index 100%
rename from agents/agents/rewards/llm_as_judge/llm_as_judge_reward.py
rename to agentfly/rewards/llm_as_judge/llm_as_judge_reward.py
diff --git a/agents/agents/rewards/math_reward.py b/agentfly/rewards/math_reward.py
similarity index 92%
rename from agents/agents/rewards/math_reward.py
rename to agentfly/rewards/math_reward.py
index 0b63515..ec1738f 100644
--- a/agents/agents/rewards/math_reward.py
+++ b/agentfly/rewards/math_reward.py
@@ -530,47 +530,6 @@ def math_reward_thought_with_tool(prediction: str, answer: str, trajectory: List
     }
 
 
-@reward(name="math_reward_thought_with_tool")
-def math_reward_thought_with_tool(prediction: str, answer: str, trajectory: List[Dict]) -> float:
-    has_called_tool = False
-    for msg in trajectory:
-        if msg["role"] == "tool":
-            has_called_tool = True
-            break
-
-    all_have_thought = True
-    for msg in trajectory:
-        if msg["role"] == "assistant":
-            if isinstance(msg["content"], str):
-                content = msg["content"]
-            elif isinstance(msg["content"], list):
-                content = msg["content"][-1]["text"]
-            else:
-                raise ValueError(f"Invalid content type: {type(msg['content'])}")
-            if not content.strip().lower().startswith("thought"):
-                all_have_thought = False
-                break
-    
-    reward = 0.0
-    answer_correct = symbolic_math_equal(prediction, answer)
-    if not has_called_tool:
-        reward = 0.0
-    elif has_called_tool and not all_have_thought and not answer_correct:
-        reward = 0.0
-    elif has_called_tool and all_have_thought and not answer_correct:
-        reward = 0.1
-    elif has_called_tool and not all_have_thought and answer_correct:
-        reward = 0.0
-    elif has_called_tool and all_have_thought and answer_correct:
-        reward = 1.0
-    else:
-        raise ValueError(f"Invalid prediction or trajectory for math reward with format: Trajectory: {trajectory}")
-    return {
-        "reward": reward,
-        "acc": 1.0 if answer_correct else 0.0,
-    }
-
-
 def parse_thinking_response(response: str):
     try:
         # First try to match complete <think>...</think> pattern
@@ -648,6 +607,30 @@ def math_reward_think(prediction: str, answer: str, trajectory: List[Dict]) -> f
             "acc": 0.0,
         }
 
+
+@reward(name="math_reward_string_equal")
+def math_reward_string_equal(prediction: str, answer: str, trajectory: List[Dict]) -> float:
+
+    def extract_last_number(s: str):
+        matches = re.findall(r'\d+', s)  # find all sequences of digits
+        return matches[-1] if matches else None
+
+    tool_count = 0
+    for msg in trajectory:
+        if msg["role"] == "tool":
+            tool_count += 1
+    
+    if tool_count < 1:
+        return 0.0
+    else:
+        prediction = extract_last_number(prediction)
+        
+        if prediction == answer:
+            return 1.0
+        else:
+            return 0.1
+
+
 if __name__ == "__main__":
     result = symbolic_math_equal("I got answer is \\boxed{2/3}", "May be it's \\boxed{\\frac{2}{3}}")
     print(result)
\ No newline at end of file
diff --git a/agents/agents/rewards/qa_reward.py b/agentfly/rewards/qa_reward.py
similarity index 100%
rename from agents/agents/rewards/qa_reward.py
rename to agentfly/rewards/qa_reward.py
diff --git a/agents/agents/rewards/reward_base.py b/agentfly/rewards/reward_base.py
similarity index 100%
rename from agents/agents/rewards/reward_base.py
rename to agentfly/rewards/reward_base.py
diff --git a/agents/agents/rewards/scienceworld_reward.py b/agentfly/rewards/scienceworld_reward.py
similarity index 100%
rename from agents/agents/rewards/scienceworld_reward.py
rename to agentfly/rewards/scienceworld_reward.py
diff --git a/agents/agents/rewards/webshop_reward.py b/agentfly/rewards/webshop_reward.py
similarity index 100%
rename from agents/agents/rewards/webshop_reward.py
rename to agentfly/rewards/webshop_reward.py
diff --git a/agents/agents/tools/src/code/__init__.py b/agentfly/tests/__init__.py
similarity index 100%
rename from agents/agents/tools/src/code/__init__.py
rename to agentfly/tests/__init__.py
diff --git a/agentfly/tests/docs/start/quick_example.py b/agentfly/tests/docs/start/quick_example.py
new file mode 100644
index 0000000..b55f620
--- /dev/null
+++ b/agentfly/tests/docs/start/quick_example.py
@@ -0,0 +1,40 @@
+from agentfly.agents import HFAgent
+from agentfly.tools import calculate, answer_math
+import pytest
+from agentfly.rewards import math_reward_string_equal
+
+@pytest.mark.asyncio
+async def test_quick_example():
+    # messages = [
+    #     {
+    #         "messages": [
+    #             {
+    #                 "role": "user",
+    #                 "content": "What is the result of 1 + 1?"
+    #             }
+    #         ]
+    #     }
+    # ]
+    # messages = [{"role": "user", "content": "What is the result of 1 + 1?"}]
+    messages = {
+        "messages": [
+            {"role": "user", "content": "Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May?"}
+        ],
+        "answer": "72"
+    }
+    agent = HFAgent(
+        model_name_or_path="Qwen/Qwen2.5-3B-Instruct",
+        tools=[calculate],
+        reward_fn=math_reward_string_equal,
+        template="qwen2.5",
+        backend="async_vllm",
+    )
+    await agent.run(
+        messages=messages,
+        max_turns=3,
+        num_chains=5
+    )
+
+    trajectories = agent.trajectories
+    print(trajectories)
+    print(agent.rewards)
\ No newline at end of file
diff --git a/agents/tests/scripts/test_gpu_runs.sh b/agentfly/tests/scripts/test_gpu_runs.sh
similarity index 100%
rename from agents/tests/scripts/test_gpu_runs.sh
rename to agentfly/tests/scripts/test_gpu_runs.sh
diff --git a/agents/agents/tools/src/react/__init__.py b/agentfly/tests/unit/__init__.py
similarity index 100%
rename from agents/agents/tools/src/react/__init__.py
rename to agentfly/tests/unit/__init__.py
diff --git a/agents/agents/tools/src/search/__init__.py b/agentfly/tests/unit/agents/__init__.py
similarity index 100%
rename from agents/agents/tools/src/search/__init__.py
rename to agentfly/tests/unit/agents/__init__.py
diff --git a/agents/agents/utils/__init__.py b/agentfly/tests/unit/agents/messages/__init__.py
similarity index 100%
rename from agents/agents/utils/__init__.py
rename to agentfly/tests/unit/agents/messages/__init__.py
diff --git a/agentfly/tests/unit/agents/messages/test_messages.py b/agentfly/tests/unit/agents/messages/test_messages.py
new file mode 100644
index 0000000..e8deecb
--- /dev/null
+++ b/agentfly/tests/unit/agents/messages/test_messages.py
@@ -0,0 +1,73 @@
+from .....agents.utils.messages import Messages, MessagesList, MessagesValidationError
+import pytest
+
+def test_messages_init():
+    # 1) List of dicts with "messages"
+    data1 = [
+        {
+            "messages": [
+                {"role": "user", "content": [{"type": "text", "text": "hi"}]},
+            ],
+            "run_id": 1,
+        },
+        {
+            "messages": [
+                {"role": "user", "content": [{"type": "text", "text": "hi"}]},
+            ],
+            "run_id": 2,
+            "other_key": "x",
+            "other_key2": "y",
+        }
+    ]
+    print(MessagesList.from_data(data1).to_list())
+
+    # 2) List of lists (each inner list is a turn list)
+    data2 = [
+        [
+            {"role": "user", "content": [{"type": "text", "text": "a"}]},
+        ],
+        [
+            {"role": "user", "content": [{"type": "image", "image": "BASE64..."}, {"type": "text", "text": "Describe"}]},
+            {"role": "assistant", "content": [{"type": "text", "text": "Looks like..."}]},
+        ],
+    ]
+    print(MessagesList.from_data(data2, default_meta={"source": "batch-42"}).to_list())
+
+    # 3) Dict with "messages"
+    data3 = {
+        "messages": [
+            {"role": "user", "content": [{"type": "text", "text": "ping"}]},
+        ],
+        "other_key": "x",
+    }
+    print(MessagesList.from_data(data3).to_list())
+
+    # 4) List of turn dicts (single item)
+    data4 = [
+        {"role": "user", "content": [{"type": "image", "image_url": "https://example.com/cat.png"}]},
+        {"role": "user", "content": [{"type": "text", "text": "Describe the image"}]},
+    ]
+    print(MessagesList.from_data(data4, default_meta={"dataset": "demo"}).to_list())
+
+    # Programmatic building (with helpers)
+    m = Messages.from_turns([], tag="manual")
+    m.add("user", [{"type": "text", "text": "Tell me a joke."}])
+    ms = MessagesList(strict=True)
+    ms.append(m)
+    print(ms.to_list())
+
+
+def test_messages_error_detection():
+    # Empty list
+    data = [
+        {}
+    ]
+    with pytest.raises(MessagesValidationError):
+        MessagesList.from_data(data)
+
+    # List of dicts with "messages" key but no "content" key
+    data = [
+        {"messages": [{"role": "user"}]},
+    ]
+    with pytest.raises(MessagesValidationError):
+        MessagesList.from_data(data)
diff --git a/agents/tests/unit/agents/mock_tests/__init__.py b/agentfly/tests/unit/agents/mock_tests/__init__.py
similarity index 100%
rename from agents/tests/unit/agents/mock_tests/__init__.py
rename to agentfly/tests/unit/agents/mock_tests/__init__.py
diff --git a/agents/tests/unit/agents/mock_tests/conftest.py b/agentfly/tests/unit/agents/mock_tests/conftest.py
similarity index 100%
rename from agents/tests/unit/agents/mock_tests/conftest.py
rename to agentfly/tests/unit/agents/mock_tests/conftest.py
diff --git a/agents/tests/unit/agents/mock_tests/test_mock_agent_integration.py b/agentfly/tests/unit/agents/mock_tests/test_mock_agent_integration.py
similarity index 98%
rename from agents/tests/unit/agents/mock_tests/test_mock_agent_integration.py
rename to agentfly/tests/unit/agents/mock_tests/test_mock_agent_integration.py
index e61a9d1..d2986aa 100644
--- a/agents/tests/unit/agents/mock_tests/test_mock_agent_integration.py
+++ b/agentfly/tests/unit/agents/mock_tests/test_mock_agent_integration.py
@@ -1,8 +1,8 @@
 import pytest
 from unittest.mock import Mock, patch, AsyncMock
-from agents.agents.auto import AutoAgent
-from agents.agents.react.react_agent import ReactAgent
-from agents.agents.specialized.code_agent import CodeAgent
+from .....agents.auto import AutoAgent
+from .....agents.react.react_agent import ReactAgent
+from .....agents.specialized.code_agent import CodeAgent
 
 
 class TestMockAgentIntegration:
@@ -230,7 +230,7 @@ def test_agent_template_compatibility(self, mock_tools, mock_chain_generation):
     def test_agent_async_operations_integration(self, mock_tools, mock_llm_engine):
         """Test async operations across different agent types"""
         # Mock LLM engine for both agents
-        with patch('agents.agents.agent_base.BaseAgent._setup_backend') as mock_setup:
+        with patch('agentfly.agents.agent_base.BaseAgent._setup_backend') as mock_setup:
             mock_setup.return_value = None
             
             # Test ReactAgent async operations
diff --git a/agents/tests/unit/agents/mock_tests/test_mock_auto_agent.py b/agentfly/tests/unit/agents/mock_tests/test_mock_auto_agent.py
similarity index 97%
rename from agents/tests/unit/agents/mock_tests/test_mock_auto_agent.py
rename to agentfly/tests/unit/agents/mock_tests/test_mock_auto_agent.py
index 365f795..eb79086 100644
--- a/agents/tests/unit/agents/mock_tests/test_mock_auto_agent.py
+++ b/agentfly/tests/unit/agents/mock_tests/test_mock_auto_agent.py
@@ -1,9 +1,9 @@
 import pytest
 from unittest.mock import Mock, patch, AsyncMock
-from agents.agents.auto import AutoAgent
-from agents.agents.react.react_agent import ReactAgent
-from agents.agents.specialized.code_agent import CodeAgent
-from agents.rewards import qa_f1_reward
+from .....agents.auto import AutoAgent
+from .....agents.react.react_agent import ReactAgent
+from .....agents.specialized.code_agent import CodeAgent
+from .....rewards import qa_f1_reward
 
 def test_auto_agent_registration():
     """Test agent registration functionality"""
diff --git a/agents/tests/unit/agents/mock_tests/test_mock_code_agent.py b/agentfly/tests/unit/agents/mock_tests/test_mock_code_agent.py
similarity index 95%
rename from agents/tests/unit/agents/mock_tests/test_mock_code_agent.py
rename to agentfly/tests/unit/agents/mock_tests/test_mock_code_agent.py
index 69bf824..63bfeab 100644
--- a/agents/tests/unit/agents/mock_tests/test_mock_code_agent.py
+++ b/agentfly/tests/unit/agents/mock_tests/test_mock_code_agent.py
@@ -1,6 +1,6 @@
 import pytest
 from unittest.mock import Mock, patch, AsyncMock
-from agents.agents.specialized.code_agent import CodeAgent, extract_python_code_markdown, CodeAgentSystemPrompt
+from agentfly.agents.specialized.code_agent import CodeAgent, extract_python_code_markdown, CodeAgentSystemPrompt
 
 def test_code_agent_initialization():
     """Test CodeAgent initialization without GPU dependencies"""
@@ -150,7 +150,7 @@ def test_code_agent_with_mock_llm_engine(mock_llm_engine):
     """Test CodeAgent with mocked LLM engine"""
     tools = ["code_interpreter"]
     
-    with patch('agents.agents.agent_base.BaseAgent._setup_backend') as mock_setup:
+    with patch('agentfly.agents.agent_base.BaseAgent._setup_backend') as mock_setup:
         mock_setup.return_value = None
         
         agent = CodeAgent(
@@ -208,7 +208,7 @@ def test_code_agent_chain_generation_integration(mock_chain_generation):
     """Test CodeAgent integration with chain generation methods"""
     tools = ["code_interpreter"]
     
-    with patch('agents.agents.agent_base.BaseAgent._setup_backend') as mock_setup:
+    with patch('agentfly.agents.agent_base.BaseAgent._setup_backend') as mock_setup:
         mock_setup.return_value = None
         
         agent = CodeAgent(
@@ -228,7 +228,7 @@ async def test_code_agent_async_operations(mock_llm_engine):
     """Test CodeAgent async operations with mocked dependencies"""
     tools = ["code_interpreter"]
     
-    with patch('agents.agents.agent_base.BaseAgent._setup_backend') as mock_setup:
+    with patch('agentfly.agents.agent_base.BaseAgent._setup_backend') as mock_setup:
         mock_setup.return_value = None
         
         agent = CodeAgent(
diff --git a/agents/tests/unit/agents/mock_tests/test_mock_react_agent.py b/agentfly/tests/unit/agents/mock_tests/test_mock_react_agent.py
similarity index 99%
rename from agents/tests/unit/agents/mock_tests/test_mock_react_agent.py
rename to agentfly/tests/unit/agents/mock_tests/test_mock_react_agent.py
index 84baf3a..7721559 100644
--- a/agents/tests/unit/agents/mock_tests/test_mock_react_agent.py
+++ b/agentfly/tests/unit/agents/mock_tests/test_mock_react_agent.py
@@ -1,6 +1,6 @@
 import pytest
 from unittest.mock import Mock, patch, AsyncMock
-from agents.agents.react.react_agent import ReactAgent, parse_react_step, extract_tool_calls, ReactSystemPromptTemplate
+from agentfly.agents.react.react_agent import ReactAgent, parse_react_step, extract_tool_calls, ReactSystemPromptTemplate
 
 
 class TestMockReactAgent:
diff --git a/agents/tests/unit/agents/templates/test_qwen3_prompt.py b/agentfly/tests/unit/agents/templates/__init__.py
similarity index 100%
rename from agents/tests/unit/agents/templates/test_qwen3_prompt.py
rename to agentfly/tests/unit/agents/templates/__init__.py
diff --git a/agentfly/tests/unit/agents/templates/test_qwen3_prompt.py b/agentfly/tests/unit/agents/templates/test_qwen3_prompt.py
new file mode 100644
index 0000000..e69de29
diff --git a/agents/tests/unit/agents/templates/test_template_utilities.py b/agentfly/tests/unit/agents/templates/test_template_utilities.py
similarity index 83%
rename from agents/tests/unit/agents/templates/test_template_utilities.py
rename to agentfly/tests/unit/agents/templates/test_template_utilities.py
index 47cba54..2cdbf4b 100644
--- a/agents/tests/unit/agents/templates/test_template_utilities.py
+++ b/agentfly/tests/unit/agents/templates/test_template_utilities.py
@@ -1,5 +1,5 @@
-from agents.agents.templates.templates import get_template, register_template, Template
-from agents.agents.templates.vision_processor import get_processor
+from .....agents.templates.templates import get_template, register_template, Template
+from .....agents.templates.vision_processor import get_processor
 
 def test_template_registration():
     register_template(
diff --git a/agents/tests/unit/agents/templates/test_text_templates_full_align.py b/agentfly/tests/unit/agents/templates/test_text_templates_full_align.py
similarity index 97%
rename from agents/tests/unit/agents/templates/test_text_templates_full_align.py
rename to agentfly/tests/unit/agents/templates/test_text_templates_full_align.py
index 006ee4e..74cf1f3 100644
--- a/agents/tests/unit/agents/templates/test_text_templates_full_align.py
+++ b/agentfly/tests/unit/agents/templates/test_text_templates_full_align.py
@@ -8,13 +8,13 @@
 """
 
 
-from agents.agents.templates.utils import compare_hf_template
+from .....agents.templates.utils import compare_hf_template
 from transformers import AutoTokenizer
 import pytest
 
 @pytest.mark.parametrize("model_name_or_path", [
-    # "Qwen/Qwen2.5-3B-Instruct",
-    "mistralai/Mistral-7B-Instruct-v0.3",
+    "Qwen/Qwen2.5-3B-Instruct",
+    # "mistralai/Mistral-7B-Instruct-v0.3",
 ])
 @pytest.mark.parametrize("messages", [
     [
diff --git a/agents/tests/unit/agents/templates/test_text_templates_partial_align.py b/agentfly/tests/unit/agents/templates/test_text_templates_partial_align.py
similarity index 96%
rename from agents/tests/unit/agents/templates/test_text_templates_partial_align.py
rename to agentfly/tests/unit/agents/templates/test_text_templates_partial_align.py
index 78b7f87..ea26532 100644
--- a/agents/tests/unit/agents/templates/test_text_templates_partial_align.py
+++ b/agentfly/tests/unit/agents/templates/test_text_templates_partial_align.py
@@ -1,7 +1,7 @@
 import pytest
 from transformers import AutoTokenizer
-from agents.agents.templates.templates import get_template
-from agents.agents.templates.utils import compare_hf_template
+from agentfly.agents.templates.templates import get_template
+from agentfly.agents.templates.utils import compare_hf_template
 
 # nemotron, phi-4, glm-4
 @pytest.mark.parametrize("template_name", ["qwen2.5-think", "qwen2.5-no-system-tool",])
diff --git a/agents/tests/unit/agents/templates/test_text_templates_tokenize.py b/agentfly/tests/unit/agents/templates/test_text_templates_tokenize.py
similarity index 96%
rename from agents/tests/unit/agents/templates/test_text_templates_tokenize.py
rename to agentfly/tests/unit/agents/templates/test_text_templates_tokenize.py
index a89e0b8..079537b 100644
--- a/agents/tests/unit/agents/templates/test_text_templates_tokenize.py
+++ b/agentfly/tests/unit/agents/templates/test_text_templates_tokenize.py
@@ -7,11 +7,11 @@
 Since the align for textual prompt is already tested in other files, we only need to test the tokenization of the templates.
 """
 
-from agents.agents.templates.utils import tokenize_conversation
+from .....agents.templates.utils import tokenize_conversation
 import pytest
 from transformers import AutoTokenizer
 import torch
-from agents.agents.templates.templates import Chat
+from .....agents.templates.templates import Chat
 
 @pytest.mark.parametrize("template", ["llama-3.2", "qwen2.5"])
 @pytest.mark.parametrize("messages", [
diff --git a/agents/tests/unit/agents/templates/test_vision_templates_full_align.py b/agentfly/tests/unit/agents/templates/test_vision_templates_full_align.py
similarity index 98%
rename from agents/tests/unit/agents/templates/test_vision_templates_full_align.py
rename to agentfly/tests/unit/agents/templates/test_vision_templates_full_align.py
index a0b8d8e..0d98620 100644
--- a/agents/tests/unit/agents/templates/test_vision_templates_full_align.py
+++ b/agentfly/tests/unit/agents/templates/test_vision_templates_full_align.py
@@ -9,7 +9,7 @@
 """
 
 
-from agents.agents.templates.utils import compare_hf_template
+from .....agents.templates.utils import compare_hf_template
 from transformers import AutoTokenizer
 import pytest
 # "qwen2.5-think", "qwen2.5", "qwen2.5-no-tool",
diff --git a/agents/tests/unit/agents/templates/test_vision_templates_tokenize.py b/agentfly/tests/unit/agents/templates/test_vision_templates_tokenize.py
similarity index 92%
rename from agents/tests/unit/agents/templates/test_vision_templates_tokenize.py
rename to agentfly/tests/unit/agents/templates/test_vision_templates_tokenize.py
index 6e17ef8..4117a5a 100644
--- a/agents/tests/unit/agents/templates/test_vision_templates_tokenize.py
+++ b/agentfly/tests/unit/agents/templates/test_vision_templates_tokenize.py
@@ -8,8 +8,8 @@
 """
 
 
-from agents.agents.templates.templates import Chat
-from agents.agents.templates.utils import compare_hf_template, tokenize_conversation
+from .....agents.templates.templates import Chat
+from .....agents.templates.utils import compare_hf_template, tokenize_conversation
 from transformers import AutoTokenizer
 import pytest
 import torch
@@ -118,9 +118,9 @@ def test_chat_template_equal(template, messages, tools, add_generation_prompt):
     assert torch.equal(official_inputs["input_ids"], implemented_inputs["input_ids"]), f"""Offical 
     prompt:\n{official_prompt}\nImplemented prompt:\n{implemented_prompt}"""
     
-    assert torch.equal(official_inputs["pixel_values"], implemented_inputs["pixel_values"])
+    assert torch.equal(official_inputs["pixel_values"], implemented_inputs["pixel_values"]), f"""Official pixel values: {official_inputs["pixel_values"].shape}\nImplemented pixel values: {implemented_inputs["pixel_values"].shape}"""
 
-    assert torch.equal(official_inputs["image_grid_thw"], implemented_inputs["image_grid_thw"])
+    assert torch.equal(official_inputs["image_grid_thw"], implemented_inputs["image_grid_thw"]), f"""Official image grid thw: {official_inputs["image_grid_thw"]}\nImplemented image grid thw: {implemented_inputs["image_grid_thw"]}"""
 
     assert implemented_inputs["input_ids"].shape == implemented_inputs["action_mask"].shape, f"""Official action mask shape: {official_inputs["action_mask"].shape}\nImplemented action mask shape: {implemented_inputs["action_mask"].shape}"""
 
diff --git a/agents/tests/unit/agents/test_auto_agent.py b/agentfly/tests/unit/agents/test_auto_agent.py
similarity index 94%
rename from agents/tests/unit/agents/test_auto_agent.py
rename to agentfly/tests/unit/agents/test_auto_agent.py
index 7159a4c..2cf1a53 100644
--- a/agents/tests/unit/agents/test_auto_agent.py
+++ b/agentfly/tests/unit/agents/test_auto_agent.py
@@ -1,7 +1,7 @@
 import pytest
-from agents.agents.auto import AutoAgent
-from agents.agents.react.react_agent import ReactAgent
-from agents.agents.specialized.code_agent import CodeAgent
+from ....agents.auto import AutoAgent
+from ....agents.react.react_agent import ReactAgent
+from ....agents.specialized.code_agent import CodeAgent
 
 
 def test_auto_agent_from_config_react():
diff --git a/agents/tests/unit/agents/test_chain.py b/agentfly/tests/unit/agents/test_chain.py
similarity index 96%
rename from agents/tests/unit/agents/test_chain.py
rename to agentfly/tests/unit/agents/test_chain.py
index 5861ea4..08a488f 100644
--- a/agents/tests/unit/agents/test_chain.py
+++ b/agentfly/tests/unit/agents/test_chain.py
@@ -1,5 +1,5 @@
 import pytest
-from agents.agents.agents.chain.chain_base import Chain, Node, ChainGeneration
+from ....agents.chain.chain_base import Chain, Node, ChainRollout
 
 
 def test_node_creation():
diff --git a/agents/tests/unit/agents/test_code_agent.py b/agentfly/tests/unit/agents/test_code_agent.py
similarity index 89%
rename from agents/tests/unit/agents/test_code_agent.py
rename to agentfly/tests/unit/agents/test_code_agent.py
index 662b33e..a4c9efc 100644
--- a/agents/tests/unit/agents/test_code_agent.py
+++ b/agentfly/tests/unit/agents/test_code_agent.py
@@ -1,6 +1,6 @@
 import pytest
-from agents.agents.specialized.code_agent import CodeAgent
-from agents.tools import code_interpreter
+from ....agents.specialized.code_agent import CodeAgent
+from ....tools import code_interpreter
 
 
 @pytest.mark.asyncio
@@ -35,9 +35,9 @@ async def test_code_agent_end_to_end():
         }
     ]
 
-    await agent.run_async(
-        max_steps=4,
-        start_messages=messages,
+    await agent.run(
+        max_turns=4,
+        messages=messages,
         num_chains=2
     )
 
diff --git a/agents/tests/unit/agents/test_gui_agent.py b/agentfly/tests/unit/agents/test_gui_agent.py
similarity index 93%
rename from agents/tests/unit/agents/test_gui_agent.py
rename to agentfly/tests/unit/agents/test_gui_agent.py
index bd1eca7..3c1cafb 100644
--- a/agents/tests/unit/agents/test_gui_agent.py
+++ b/agentfly/tests/unit/agents/test_gui_agent.py
@@ -9,10 +9,10 @@
 # Add the agents module to path
 sys.path.insert(0, str(Path(__file__).parent.parent.parent))
 
-from agents.agents.specialized.gui_agent import GUIAgent
-from agents.rewards.gui_reward import gui_reward
-from agents.utils.ui_action_parser import parse_action_to_structure_output, IMAGE_FACTOR
-
+from ....agents.specialized.gui_agent import GUIAgent
+from ....rewards.gui_reward import gui_reward
+from ....utils.ui_action_parser import parse_action_to_structure_output, IMAGE_FACTOR
+from ....tools import pyautogui_code_generator
 
 class TestGUIAgent:
     """Test suite for GUI Agent implementation."""
@@ -23,7 +23,8 @@ def test_gui_agent_initialization(self):
         agent = GUIAgent(
             model_name_or_path="ByteDance-Seed/UI-TARS-1.5-7B",
             template="qwen2.5-vl",
-            tools=["pyautogui_code_generator"]
+            tools=[pyautogui_code_generator],
+            backend="async_vllm"
         )
         assert agent is not None
         assert agent.system_prompt is not None
@@ -35,7 +36,8 @@ def test_gui_agent_parse_valid_response(self):
         agent = GUIAgent(
             model_name_or_path="ByteDance-Seed/UI-TARS-1.5-7B",
             template="qwen2.5-vl",
-            tools=[]
+            tools=[],
+            backend="async_vllm"
         )
         
         responses = [
@@ -56,7 +58,8 @@ def test_gui_agent_parse_terminal_action(self):
         agent = GUIAgent(
             model_name_or_path="ByteDance-Seed/UI-TARS-1.5-7B",
             template="qwen2.5-vl",
-            tools=[]
+            tools=[],
+            backend="async_vllm"
         )
         
         responses = [
@@ -74,7 +77,8 @@ def test_gui_agent_parse_empty_response(self):
         agent = GUIAgent(
             model_name_or_path="ByteDance-Seed/UI-TARS-1.5-7B",
             template="qwen2.5-vl",
-            tools=[]
+            tools=[],
+            backend="async_vllm"
         )
         
         responses = [""]
diff --git a/agents/tests/unit/agents/test_initialization.py b/agentfly/tests/unit/agents/test_initialization.py
similarity index 87%
rename from agents/tests/unit/agents/test_initialization.py
rename to agentfly/tests/unit/agents/test_initialization.py
index 61055ee..4416124 100644
--- a/agents/tests/unit/agents/test_initialization.py
+++ b/agentfly/tests/unit/agents/test_initialization.py
@@ -1,8 +1,8 @@
-from agents.agents.agent_base import BaseAgent
-from agents.agents.specialized.code_agent import CodeAgent
-from agents.agents.react.react_agent import ReactAgent
-from agents.agents.specialized.think_agent import ThinkAgent
-from agents.tools import code_interpreter, google_search_serper, answer_qa
+from ....agents.agent_base import BaseAgent
+from ....agents.specialized.code_agent import CodeAgent
+from ....agents.react.react_agent import ReactAgent
+from ....agents.specialized.think_agent import ThinkAgent
+from ....tools import code_interpreter, google_search_serper, answer_qa
 import pytest
 
 
diff --git a/agents/tests/unit/agents/test_react_agent.py b/agentfly/tests/unit/agents/test_react_agent.py
similarity index 87%
rename from agents/tests/unit/agents/test_react_agent.py
rename to agentfly/tests/unit/agents/test_react_agent.py
index 82a19bb..d59cf7b 100644
--- a/agents/tests/unit/agents/test_react_agent.py
+++ b/agentfly/tests/unit/agents/test_react_agent.py
@@ -1,7 +1,7 @@
 import pytest
-from agents.agents.react.react_agent import ReactAgent, parse_react_step
-from agents.tools.src.search.google_search import google_search_serper
-from agents.tools import answer_qa
+from ....agents.react.react_agent import ReactAgent, parse_react_step
+from ....tools.src.search.google_search import google_search_serper
+from ....tools import answer_qa
 
 def test_parse_react_step():
     # Test with a valid ReAct step
@@ -56,7 +56,11 @@ async def test_react_agent_parse_run():
             ]
         }
     ]
-    await agent.run_async(start_messages=messages, max_steps=4, num_chains=1)
+    await agent.run(
+        max_turns=4,
+        messages=messages,
+        num_chains=1
+    )
     messages_list = agent.get_messages()
     print(messages_list[0])
     
\ No newline at end of file
diff --git a/agents/tests/unit/agents/test_vision_agent.py b/agentfly/tests/unit/agents/test_vision_agent.py
similarity index 89%
rename from agents/tests/unit/agents/test_vision_agent.py
rename to agentfly/tests/unit/agents/test_vision_agent.py
index 67bf4d6..f679d0f 100644
--- a/agents/tests/unit/agents/test_vision_agent.py
+++ b/agentfly/tests/unit/agents/test_vision_agent.py
@@ -1,6 +1,6 @@
 import torch
-from agents.agents.react.react_agent import ReactAgent
-from agents.tools import answer_qa
+from ....agents.react.react_agent import ReactAgent
+from ....tools import answer_qa
 import pytest
 
 
@@ -37,9 +37,9 @@ async def test_vision_agent():
     ]
 
 
-    await react_agent.run_async(
-        max_steps=3,
-        start_messages=messages,
+    await react_agent.run(
+        max_turns=3,
+        messages=messages,
         num_chains=10
     )
     messages_list = react_agent.get_messages()
diff --git a/agents/tests/unit/agents/test_webshop_agent.py b/agentfly/tests/unit/agents/test_webshop_agent.py
similarity index 83%
rename from agents/tests/unit/agents/test_webshop_agent.py
rename to agentfly/tests/unit/agents/test_webshop_agent.py
index 59f5c91..b1c0001 100644
--- a/agents/tests/unit/agents/test_webshop_agent.py
+++ b/agentfly/tests/unit/agents/test_webshop_agent.py
@@ -1,12 +1,12 @@
 import pytest
-from agents.agents.react.react_agent import ReactAgent
-from agents.tools.src.webshop.tools import webshop_browser
-from agents.tools.src.react.tools import answer
-from agents.rewards import webshop_reward
+from ....agents.react.react_agent import ReactAgent
+from ....tools.src.webshop.tools import webshop_browser
+from ....tools.src.react.tools import answer
+from ....rewards import webshop_reward
 
 
 @pytest.mark.gpu
-@pytest.mark.asyncio
+@pytest.mark.asyncio(loop_scope="session")
 async def test_webshop_agent_call():
     tools = [webshop_browser, answer]
     agent = ReactAgent(
@@ -42,9 +42,9 @@ async def test_webshop_agent_call():
     ]
 
 
-    await agent.run_async(
-            max_steps=8,
-            start_messages=messages,
+    await agent.run(
+            max_turns=8,
+            messages=messages,
             num_chains=4
         )
 
diff --git a/agents/tests/unit/conftest.py b/agentfly/tests/unit/conftest.py
similarity index 100%
rename from agents/tests/unit/conftest.py
rename to agentfly/tests/unit/conftest.py
diff --git a/agentfly/tests/unit/envs/__init__.py b/agentfly/tests/unit/envs/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/agents/tests/unit/envs/test_alfworld_env.py b/agentfly/tests/unit/envs/test_alfworld_env.py
similarity index 99%
rename from agents/tests/unit/envs/test_alfworld_env.py
rename to agentfly/tests/unit/envs/test_alfworld_env.py
index f955bca..5b01fc7 100644
--- a/agents/tests/unit/envs/test_alfworld_env.py
+++ b/agentfly/tests/unit/envs/test_alfworld_env.py
@@ -1,6 +1,6 @@
 import asyncio
 import pytest
-from agents.envs.alfworld_env import ALFWorldEnv
+from ....envs.alfworld_env import ALFWorldEnv
 
 @pytest.mark.asyncio
 async def test_alfworld_env_get_info():
diff --git a/agents/tests/unit/envs/test_code_env.py b/agentfly/tests/unit/envs/test_code_env.py
similarity index 94%
rename from agents/tests/unit/envs/test_code_env.py
rename to agentfly/tests/unit/envs/test_code_env.py
index 2e60548..24a32a4 100644
--- a/agents/tests/unit/envs/test_code_env.py
+++ b/agentfly/tests/unit/envs/test_code_env.py
@@ -1,4 +1,4 @@
-from agents.envs.python_env import PythonSandboxEnv
+from ....envs.python_env import PythonSandboxEnv
 import asyncio
 import pytest
 
diff --git a/agents/tests/unit/envs/test_enroot.py b/agentfly/tests/unit/envs/test_enroot.py
similarity index 90%
rename from agents/tests/unit/envs/test_enroot.py
rename to agentfly/tests/unit/envs/test_enroot.py
index dd653c0..a1cd4b0 100644
--- a/agents/tests/unit/envs/test_enroot.py
+++ b/agentfly/tests/unit/envs/test_enroot.py
@@ -1,4 +1,4 @@
-from agents.envs.manager.enroot import from_env
+from ....envs.manager.enroot import from_env
 
 # Commented out because it's not working on github actions (status is 'exited')
 # def test_enroot_client():
diff --git a/agents/tests/unit/envs/test_env_run.py b/agentfly/tests/unit/envs/test_env_run.py
similarity index 95%
rename from agents/tests/unit/envs/test_env_run.py
rename to agentfly/tests/unit/envs/test_env_run.py
index e3cbd65..12d5a47 100644
--- a/agents/tests/unit/envs/test_env_run.py
+++ b/agentfly/tests/unit/envs/test_env_run.py
@@ -1,7 +1,7 @@
 import asyncio
 import time
-from agents.envs.manager.warm_pool import WarmPool
-from agents.envs.python_env import PythonSandboxEnv
+from ....envs.manager.warm_pool import WarmPool
+from ....envs.python_env import PythonSandboxEnv
 import pytest
 import requests
 
@@ -53,9 +53,6 @@ async def run(i: int):
     end_time = time.time()
     print(f"Time taken: {end_time - start_time} seconds")
 
-import asyncio, pytest, random
-from agents.envs.python_env import PythonSandboxEnv   # adjust to your package path
-
 # N_ENVS       = 1000     # total environments you want to exercise
 # MAX_PARALLEL = 32    # how many containers may run at the same time
 
diff --git a/agents/tests/unit/envs/test_pool.py b/agentfly/tests/unit/envs/test_pool.py
similarity index 62%
rename from agents/tests/unit/envs/test_pool.py
rename to agentfly/tests/unit/envs/test_pool.py
index fddc84c..3732fb5 100644
--- a/agents/tests/unit/envs/test_pool.py
+++ b/agentfly/tests/unit/envs/test_pool.py
@@ -1,5 +1,5 @@
-from agents.envs.manager.warm_pool import WarmPool
-from agents.envs.python_env import PythonSandboxEnv
+from ....envs.manager.warm_pool import WarmPool
+from ....envs.python_env import PythonSandboxEnv
 import pytest
 
 @pytest.mark.asyncio
diff --git a/agents/tests/unit/envs/test_redis_env.py b/agentfly/tests/unit/envs/test_redis_env.py
similarity index 97%
rename from agents/tests/unit/envs/test_redis_env.py
rename to agentfly/tests/unit/envs/test_redis_env.py
index 1fcd1dc..ce3fb2c 100644
--- a/agents/tests/unit/envs/test_redis_env.py
+++ b/agentfly/tests/unit/envs/test_redis_env.py
@@ -1,5 +1,5 @@
 import asyncio
-from agents.envs.redis_env import RedisEnv
+from ....envs.redis_env import RedisEnv
 import pytest
 
 # @pytest.mark.asyncio
diff --git a/agents/tests/unit/envs/test_scienceworld_env.py b/agentfly/tests/unit/envs/test_scienceworld_env.py
similarity index 95%
rename from agents/tests/unit/envs/test_scienceworld_env.py
rename to agentfly/tests/unit/envs/test_scienceworld_env.py
index 284d6fc..e03d047 100644
--- a/agents/tests/unit/envs/test_scienceworld_env.py
+++ b/agentfly/tests/unit/envs/test_scienceworld_env.py
@@ -1,5 +1,5 @@
 import pytest
-from agents.envs.scienceworld_env import ScienceWorldEnv
+from ....envs.scienceworld_env import ScienceWorldEnv
 from ast import literal_eval
 
     
diff --git a/agents/tests/unit/envs/test_webshop_text_env.py b/agentfly/tests/unit/envs/test_webshop_text_env.py
similarity index 98%
rename from agents/tests/unit/envs/test_webshop_text_env.py
rename to agentfly/tests/unit/envs/test_webshop_text_env.py
index 5306836..8b00a5b 100644
--- a/agents/tests/unit/envs/test_webshop_text_env.py
+++ b/agentfly/tests/unit/envs/test_webshop_text_env.py
@@ -1,5 +1,5 @@
 import pytest
-from agents.envs.webshop_text_env import WebAgentTextEnv
+from ....envs.webshop_text_env import WebAgentTextEnv
 from ast import literal_eval
 
 STANDARD_BUTTONS = [
diff --git a/agentfly/tests/unit/rewards/__init__.py b/agentfly/tests/unit/rewards/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/agents/tests/unit/rewards/test_env_id.py b/agentfly/tests/unit/rewards/test_env_id.py
similarity index 81%
rename from agents/tests/unit/rewards/test_env_id.py
rename to agentfly/tests/unit/rewards/test_env_id.py
index 5c12efd..1bafbca 100644
--- a/agents/tests/unit/rewards/test_env_id.py
+++ b/agentfly/tests/unit/rewards/test_env_id.py
@@ -1,10 +1,10 @@
 import pytest
 
-from agents.rewards.reward_base import reward
-from agents.tools.tool_base import tool
-from agents.envs.webshop_text_env import WebAgentTextEnv
+from ....rewards.reward_base import reward
+from ....tools.tool_base import tool
+from ....envs.webshop_text_env import WebAgentTextEnv
 
-@pytest.mark.asyncio()
+@pytest.mark.asyncio(loop_scope="session")
 async def test_tool_reward_env():
     @tool(env_cls=WebAgentTextEnv, name="test_tool", pool_size=4)
     async def test_tool(prediction: str, env: WebAgentTextEnv):
diff --git a/agents/tests/unit/rewards/test_llm_as_judge_reward.py b/agentfly/tests/unit/rewards/test_llm_as_judge_reward.py
similarity index 85%
rename from agents/tests/unit/rewards/test_llm_as_judge_reward.py
rename to agentfly/tests/unit/rewards/test_llm_as_judge_reward.py
index 4bc15d7..f3a1f8d 100644
--- a/agents/tests/unit/rewards/test_llm_as_judge_reward.py
+++ b/agentfly/tests/unit/rewards/test_llm_as_judge_reward.py
@@ -1,4 +1,4 @@
-from agents.rewards.llm_as_judge.llm_as_judge_client import llm_as_judge_client_math_reward
+from ....rewards.llm_as_judge.llm_as_judge_client import llm_as_judge_client_math_reward
 import pytest
 
 # @pytest.mark.asyncio    
diff --git a/agents/tests/unit/rewards/test_reward_with_env.py b/agentfly/tests/unit/rewards/test_reward_with_env.py
similarity index 73%
rename from agents/tests/unit/rewards/test_reward_with_env.py
rename to agentfly/tests/unit/rewards/test_reward_with_env.py
index fc335da..3ce7bd8 100644
--- a/agents/tests/unit/rewards/test_reward_with_env.py
+++ b/agentfly/tests/unit/rewards/test_reward_with_env.py
@@ -1,7 +1,7 @@
-from agents.rewards.code_reward import code_reward_test
+from ....rewards.code_reward import code_reward_test
 import pytest
 
-@pytest.mark.asyncio
+@pytest.mark.asyncio(loop_scope="session")
 async def test_code_reward_test():
     code = "print('Hello, World!')"
     reward = await code_reward_test(code, id="test")
diff --git a/agents/tests/unit/rewards/test_scienceworld_reward.py b/agentfly/tests/unit/rewards/test_scienceworld_reward.py
similarity index 68%
rename from agents/tests/unit/rewards/test_scienceworld_reward.py
rename to agentfly/tests/unit/rewards/test_scienceworld_reward.py
index bc2f2ad..395f31d 100644
--- a/agents/tests/unit/rewards/test_scienceworld_reward.py
+++ b/agentfly/tests/unit/rewards/test_scienceworld_reward.py
@@ -1,7 +1,7 @@
-from agents.rewards.scienceworld_reward import scienceworld_reward
+from ....rewards.scienceworld_reward import scienceworld_reward
 import pytest
 
-@pytest.mark.asyncio
+@pytest.mark.asyncio(loop_scope="session")
 async def test_scienceworld_reward():
     prediction = "Task not completed"
     reward = await scienceworld_reward(prediction, id="test")
diff --git a/agents/tests/unit/rewards/test_tool_reward_env.py b/agentfly/tests/unit/rewards/test_tool_reward_env.py
similarity index 78%
rename from agents/tests/unit/rewards/test_tool_reward_env.py
rename to agentfly/tests/unit/rewards/test_tool_reward_env.py
index 7a31df9..62246c2 100644
--- a/agents/tests/unit/rewards/test_tool_reward_env.py
+++ b/agentfly/tests/unit/rewards/test_tool_reward_env.py
@@ -1,12 +1,12 @@
 import pytest
 
-from agents.envs.manager.env_manager import EnvironmentManager
-from agents.envs import PythonSandboxEnv
-from agents.tools import tool
-from agents.rewards import reward
+from ....envs.manager.env_manager import EnvironmentManager
+from ....envs import PythonSandboxEnv
+from ....tools import tool
+from ....rewards import reward
 
 
-@pytest.mark.asyncio()
+@pytest.mark.asyncio(loop_scope="session")
 async def test_tool_reward_env():
     @tool(env_cls=PythonSandboxEnv, name="test_tool", pool_size=4)
     async def test_tool(code: str, env: PythonSandboxEnv):
diff --git a/agents/tests/unit/rewards/test_webshop_reward.py b/agentfly/tests/unit/rewards/test_webshop_reward.py
similarity index 73%
rename from agents/tests/unit/rewards/test_webshop_reward.py
rename to agentfly/tests/unit/rewards/test_webshop_reward.py
index 9dd2067..5f4800d 100644
--- a/agents/tests/unit/rewards/test_webshop_reward.py
+++ b/agentfly/tests/unit/rewards/test_webshop_reward.py
@@ -1,7 +1,7 @@
-from agents.rewards.webshop_reward import webshop_reward
+from ....rewards.webshop_reward import webshop_reward
 import pytest
 
-@pytest.mark.asyncio
+@pytest.mark.asyncio(loop_scope="session")
 async def test_webshop_reward():
     prediction = "Thank you for shopping with us"
     reward = await webshop_reward(prediction, task_id=0, id="test_webshop_reward")
diff --git a/agentfly/tests/unit/tools/__init__.py b/agentfly/tests/unit/tools/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/agents/tests/unit/tools/test_alfworld_tool.py b/agentfly/tests/unit/tools/test_alfworld_tool.py
similarity index 97%
rename from agents/tests/unit/tools/test_alfworld_tool.py
rename to agentfly/tests/unit/tools/test_alfworld_tool.py
index 49599ce..86bb32e 100644
--- a/agents/tests/unit/tools/test_alfworld_tool.py
+++ b/agentfly/tests/unit/tools/test_alfworld_tool.py
@@ -1,6 +1,6 @@
 import asyncio
 import pytest
-from agents.tools import alfworld_step, alfworld_get_admissible_commands, alfworld_get_task_objective, alfworld_reset
+from ....tools import alfworld_step, alfworld_get_admissible_commands, alfworld_get_task_objective, alfworld_reset
 
 @pytest.mark.asyncio(loop_scope="session")
 async def test_alfworld_reset():
diff --git a/agents/tests/unit/tools/test_async_dense_retriever.py b/agentfly/tests/unit/tools/test_async_dense_retriever.py
similarity index 97%
rename from agents/tests/unit/tools/test_async_dense_retriever.py
rename to agentfly/tests/unit/tools/test_async_dense_retriever.py
index 1775008..8afc402 100644
--- a/agents/tests/unit/tools/test_async_dense_retriever.py
+++ b/agentfly/tests/unit/tools/test_async_dense_retriever.py
@@ -15,16 +15,16 @@
 
 # Fix imports to handle both versions properly
 try:
-    from agents.tools.src.search.async_dense_retriever import DenseRetriever as AsyncDenseRetriever
-    from agents.tools.src.search.async_dense_retriever import dense_retrieve as async_dense_retrieve
+    from ....tools.src.search.async_dense_retriever import DenseRetriever as AsyncDenseRetriever
+    from ....tools.src.search.async_dense_retriever import dense_retrieve as async_dense_retrieve
 except ImportError as e:
     print(f"Error importing async_dense_retriever: {e}")
     AsyncDenseRetriever = None
     async_dense_retrieve = None
 
 try:
-    from agents.tools.src.search.dense_retriever import DenseRetriever as SyncDenseRetriever
-    from agents.tools.src.search.dense_retriever import dense_retrieve as sync_dense_retrieve
+    from ....tools.src.search.dense_retriever import DenseRetriever as SyncDenseRetriever
+    from ....tools.src.search.dense_retriever import dense_retrieve as sync_dense_retrieve
 except ImportError as e:
     print(f"Error importing dense_retriever: {e}")
     SyncDenseRetriever = None
diff --git a/agents/tests/unit/tools/test_code_tool.py b/agentfly/tests/unit/tools/test_code_tool.py
similarity index 97%
rename from agents/tests/unit/tools/test_code_tool.py
rename to agentfly/tests/unit/tools/test_code_tool.py
index 388845f..4fc79e8 100644
--- a/agents/tests/unit/tools/test_code_tool.py
+++ b/agentfly/tests/unit/tools/test_code_tool.py
@@ -1,6 +1,6 @@
 import asyncio
 import time
-from agents.tools import code_interpreter
+from ....tools import code_interpreter
 import pytest
 
 
diff --git a/agents/tests/unit/tools/test_predefined_tools.py b/agentfly/tests/unit/tools/test_predefined_tools.py
similarity index 88%
rename from agents/tests/unit/tools/test_predefined_tools.py
rename to agentfly/tests/unit/tools/test_predefined_tools.py
index a524ad1..b1c9c0f 100644
--- a/agents/tests/unit/tools/test_predefined_tools.py
+++ b/agentfly/tests/unit/tools/test_predefined_tools.py
@@ -1,4 +1,4 @@
-from agents.tools import code_interpreter
+from ....tools import code_interpreter
 import pytest
 
 # @pytest.mark.asyncio
diff --git a/agents/tests/unit/tools/test_ray_tool.py b/agentfly/tests/unit/tools/test_ray_tool.py
similarity index 85%
rename from agents/tests/unit/tools/test_ray_tool.py
rename to agentfly/tests/unit/tools/test_ray_tool.py
index feea969..bc22237 100644
--- a/agents/tests/unit/tools/test_ray_tool.py
+++ b/agentfly/tests/unit/tools/test_ray_tool.py
@@ -1,6 +1,6 @@
 import pytest, ray
-from agents.tools import code_interpreter
-from agents.tools.utils.rayify import rayify
+from ....tools import code_interpreter
+from ....tools.utils.rayify import rayify
 from ray.util import inspect_serializability
 
 # def test_serializability():
diff --git a/agents/tests/unit/tools/test_scienceworld_tool.py b/agentfly/tests/unit/tools/test_scienceworld_tool.py
similarity index 93%
rename from agents/tests/unit/tools/test_scienceworld_tool.py
rename to agentfly/tests/unit/tools/test_scienceworld_tool.py
index 63e1fca..0e29188 100644
--- a/agents/tests/unit/tools/test_scienceworld_tool.py
+++ b/agentfly/tests/unit/tools/test_scienceworld_tool.py
@@ -1,4 +1,4 @@
-from agents.tools import scienceworld_explorer
+from ....tools import scienceworld_explorer
 import pytest
 import asyncio
 
diff --git a/agents/tests/unit/tools/test_search_tool.py b/agentfly/tests/unit/tools/test_search_tool.py
similarity index 90%
rename from agents/tests/unit/tools/test_search_tool.py
rename to agentfly/tests/unit/tools/test_search_tool.py
index 978b61e..b102a09 100644
--- a/agents/tests/unit/tools/test_search_tool.py
+++ b/agentfly/tests/unit/tools/test_search_tool.py
@@ -1,5 +1,5 @@
 import asyncio
-from agents.tools.src.search.google_search import google_search_serper
+from ....tools.src.search.google_search import google_search_serper
 import pytest
 
 # @pytest.mark.asyncio
diff --git a/agents/tests/unit/tools/test_tool_call_by_name_async.py b/agentfly/tests/unit/tools/test_tool_call_by_name_async.py
similarity index 92%
rename from agents/tests/unit/tools/test_tool_call_by_name_async.py
rename to agentfly/tests/unit/tools/test_tool_call_by_name_async.py
index 00c3596..81ce1d1 100644
--- a/agents/tests/unit/tools/test_tool_call_by_name_async.py
+++ b/agentfly/tests/unit/tools/test_tool_call_by_name_async.py
@@ -1,4 +1,4 @@
-from agents.tools import submit_tool_call, tool, submit_tool_calls
+from ....tools import submit_tool_call, tool, submit_tool_calls
 import pytest
 import asyncio
 
diff --git a/agents/tests/unit/tools/test_tool_call_by_name_sync.py b/agentfly/tests/unit/tools/test_tool_call_by_name_sync.py
similarity index 94%
rename from agents/tests/unit/tools/test_tool_call_by_name_sync.py
rename to agentfly/tests/unit/tools/test_tool_call_by_name_sync.py
index c65836f..1814666 100644
--- a/agents/tests/unit/tools/test_tool_call_by_name_sync.py
+++ b/agentfly/tests/unit/tools/test_tool_call_by_name_sync.py
@@ -1,5 +1,5 @@
 import pytest
-from agents.tools.tool_base import tool, submit_tool_calls
+from ....tools.tool_base import tool, submit_tool_calls
 
 # def test_tool_call_sync():
 #     # Create a custom sync tool that doesn't use the async implementation
diff --git a/agents/tests/unit/tools/test_tool_define.py b/agentfly/tests/unit/tools/test_tool_define.py
similarity index 89%
rename from agents/tests/unit/tools/test_tool_define.py
rename to agentfly/tests/unit/tools/test_tool_define.py
index 8fe94ef..5418608 100644
--- a/agents/tests/unit/tools/test_tool_define.py
+++ b/agentfly/tests/unit/tools/test_tool_define.py
@@ -1,5 +1,5 @@
-from agents.tools.tool_base import tool
-from agents.envs.python_env import PythonSandboxEnv
+from ....tools.tool_base import tool
+from ....envs.python_env import PythonSandboxEnv
 import pytest
 
 def test_base_tool():
diff --git a/agents/tests/unit/tools/test_tool_functionality.py b/agentfly/tests/unit/tools/test_tool_functionality.py
similarity index 92%
rename from agents/tests/unit/tools/test_tool_functionality.py
rename to agentfly/tests/unit/tools/test_tool_functionality.py
index 4859253..dfad10c 100644
--- a/agents/tests/unit/tools/test_tool_functionality.py
+++ b/agentfly/tests/unit/tools/test_tool_functionality.py
@@ -1,5 +1,5 @@
 import pytest
-from agents.tools import tool
+from ....tools import tool
 
 
 @pytest.mark.asyncio
diff --git a/agents/tests/unit/tools/test_tool_sync.py b/agentfly/tests/unit/tools/test_tool_sync.py
similarity index 90%
rename from agents/tests/unit/tools/test_tool_sync.py
rename to agentfly/tests/unit/tools/test_tool_sync.py
index 39b0f14..eb21369 100644
--- a/agents/tests/unit/tools/test_tool_sync.py
+++ b/agentfly/tests/unit/tools/test_tool_sync.py
@@ -1,7 +1,7 @@
 import pytest
-from agents.tools import code_interpreter
-from agents.tools.tool_base import tool, Tool
-from agents.envs.python_env import PythonSandboxEnv
+from ....tools import code_interpreter
+from ....tools.tool_base import tool, Tool
+from ....envs.python_env import PythonSandboxEnv
 
 
 # def test_stateful_tool_sync():
diff --git a/agents/tests/unit/tools/test_webshop_tool.py b/agentfly/tests/unit/tools/test_webshop_tool.py
similarity index 78%
rename from agents/tests/unit/tools/test_webshop_tool.py
rename to agentfly/tests/unit/tools/test_webshop_tool.py
index 1b579e9..a3c382f 100644
--- a/agents/tests/unit/tools/test_webshop_tool.py
+++ b/agentfly/tests/unit/tools/test_webshop_tool.py
@@ -1,4 +1,4 @@
-from agents.tools import webshop_browser
+from ....tools import webshop_browser
 import pytest
 import asyncio
 
@@ -6,20 +6,20 @@
 async def test_webshop_search():
     result = await webshop_browser(action='search', value='shoes', id='testsearch')
     assert result['status'] == 'success'
-    await webshop_browser.release_env(id='testsearch')
+    await webshop_browser.release(id='testsearch')
 
 @pytest.mark.asyncio
 async def test_webshop_search_and_next_page():
     result = await webshop_browser(action='search', value='shoes', id='testnext')
     result = await webshop_browser(action='click', value='next >', id='testnext')
     assert result['status'] == 'success'
-    await webshop_browser.release_env(id='testnext')
+    await webshop_browser.release(id='testnext')
 
 @pytest.mark.asyncio
 async def test_pool_async_calls():
     async def one_chain(i):
         await webshop_browser(action='search', value='shoes', id=f'test{i}')
-        await webshop_browser.release_env(id=f'test{i}')
+        await webshop_browser.release(id=f'test{i}')
     await asyncio.gather(*[
         one_chain(i) for i in range(webshop_browser.pool_size+5)   # over-subscribe the pool
     ])
diff --git a/agents/agents/tools/__init__.py b/agentfly/tools/__init__.py
similarity index 74%
rename from agents/agents/tools/__init__.py
rename to agentfly/tools/__init__.py
index 98d33ea..959b472 100644
--- a/agents/agents/tools/__init__.py
+++ b/agentfly/tools/__init__.py
@@ -3,9 +3,22 @@
 TOOL_FACTORY = {}
 
 from typing import List
-from .tool_base import Tool, hallucination_tool, invalid_input_tool, tool, submit_tool_call, submit_tool_calls
+from .tool_base import (
+    Tool, 
+    hallucination_tool,
+    invalid_input_tool,
+    tool,
+    submit_tool_call,
+    submit_tool_calls
+)
 from .src.code.tools import code_interpreter
-from .src.alfworld.tools import alfworld_step, alfworld_get_task_objective, alfworld_get_admissible_commands, alfworld_reset
+from .src.alfworld.tools import (
+    alfworld_step,
+    alfworld_get_task_objective,
+    alfworld_get_admissible_commands,
+    alfworld_reset
+)
+from .src.calculate.tools import calculator
 from .src.search.google_search import google_search_serper
 from .src.search.dense_retriever import dense_retrieve
 from .src.search.async_dense_retriever import asyncdense_retrieve
@@ -16,30 +29,6 @@
 from .src.scienceworld.tools import scienceworld_explorer
 from .src.ui.tools import pyautogui_code_generator
 
-# Export the tools
-__all__ = [
-    "asyncdense_retrieve",
-    "dense_retrieve"
-    "http_retrieve",
-    "code_interpreter",
-    "alfworld_step",
-    "alfworld_reset", 
-    "alfworld_get_admissible_commands",
-    "google_search_serper",
-    "answer_qa",
-    "answer_math",
-    "hallucination_tool",
-    "invalid_input_tool",
-    "submit_tool_call",
-    "submit_tool_calls",
-    "tool",
-    "webshop_browser"
-    "alfworld_get_task_objective"
-    "alfworld_reset"
-    "asyncdense_retrieve"
-    "pyautogui_code_generator"
-    # "current_env"
-]
 
 # Add explicit tools in case they weren't auto-registered
 EXPLICIT_TOOLS = {
@@ -57,7 +46,8 @@
     "hallucination_tool": hallucination_tool,
     "invalid_input_tool": invalid_input_tool,
     "dense_retrieve": dense_retrieve,
-    "pyautogui_code_generator": pyautogui_code_generator
+    "pyautogui_code_generator": pyautogui_code_generator,
+    "calculator": calculator
 }
 
 # Update the registry with explicit tools
@@ -73,6 +63,7 @@ def register_tool(tool_name, tool_func):
         tool_name: The name of the tool
         tool_func: The tool function or BaseTool instance
     """
+    global TOOL_REGISTRY
     TOOL_REGISTRY[tool_name] = tool_func
 
 def get_tool_from_name(tool_name: str) -> Tool:
diff --git a/agentfly/tools/src/__init__.py b/agentfly/tools/src/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/agents/agents/tools/src/alfworld/__init__.py b/agentfly/tools/src/alfworld/__init__.py
similarity index 100%
rename from agents/agents/tools/src/alfworld/__init__.py
rename to agentfly/tools/src/alfworld/__init__.py
diff --git a/agents/agents/tools/src/alfworld/tools.py b/agentfly/tools/src/alfworld/tools.py
similarity index 100%
rename from agents/agents/tools/src/alfworld/tools.py
rename to agentfly/tools/src/alfworld/tools.py
diff --git a/agentfly/tools/src/calculate/tools.py b/agentfly/tools/src/calculate/tools.py
new file mode 100644
index 0000000..cebdcda
--- /dev/null
+++ b/agentfly/tools/src/calculate/tools.py
@@ -0,0 +1,21 @@
+from ...tool_base import tool
+from sympy import simplify, sympify, Rational
+
+@tool(name="calculator", description="Calculate the result of a mathematical expression.")
+def calculator(expression: str):
+    try:
+        expr = sympify(expression)
+        result = simplify(expr)
+
+        # Check if the result is a number
+        if result.is_number:
+            # If the result is a rational number, return as a fraction
+            if isinstance(result, Rational):
+                return str(result)
+            # If the result is a floating point number, format to remove redundant zeros
+            else:
+                return "{:g}".format(float(result))
+        else:
+            return str(result)
+    except Exception as e:
+        return f"Error: {str(e)}"
\ No newline at end of file
diff --git a/agentfly/tools/src/code/__init__.py b/agentfly/tools/src/code/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/agents/agents/tools/src/code/tools.py b/agentfly/tools/src/code/tools.py
similarity index 100%
rename from agents/agents/tools/src/code/tools.py
rename to agentfly/tools/src/code/tools.py
diff --git a/agentfly/tools/src/react/__init__.py b/agentfly/tools/src/react/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/agents/agents/tools/src/react/tools.py b/agentfly/tools/src/react/tools.py
similarity index 100%
rename from agents/agents/tools/src/react/tools.py
rename to agentfly/tools/src/react/tools.py
diff --git a/agents/agents/tools/src/scienceworld/tools.py b/agentfly/tools/src/scienceworld/tools.py
similarity index 100%
rename from agents/agents/tools/src/scienceworld/tools.py
rename to agentfly/tools/src/scienceworld/tools.py
diff --git a/agentfly/tools/src/search/__init__.py b/agentfly/tools/src/search/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/agents/agents/tools/src/search/async_dense_retriever.py b/agentfly/tools/src/search/async_dense_retriever.py
similarity index 100%
rename from agents/agents/tools/src/search/async_dense_retriever.py
rename to agentfly/tools/src/search/async_dense_retriever.py
diff --git a/agents/agents/tools/src/search/dense_retriever.py b/agentfly/tools/src/search/dense_retriever.py
similarity index 100%
rename from agents/agents/tools/src/search/dense_retriever.py
rename to agentfly/tools/src/search/dense_retriever.py
diff --git a/agents/agents/tools/src/search/faiss_indexer.py b/agentfly/tools/src/search/faiss_indexer.py
similarity index 100%
rename from agents/agents/tools/src/search/faiss_indexer.py
rename to agentfly/tools/src/search/faiss_indexer.py
diff --git a/agents/agents/tools/src/search/google_search.py b/agentfly/tools/src/search/google_search.py
similarity index 100%
rename from agents/agents/tools/src/search/google_search.py
rename to agentfly/tools/src/search/google_search.py
diff --git a/agents/agents/tools/src/ui/__init__.py b/agentfly/tools/src/ui/__init__.py
similarity index 100%
rename from agents/agents/tools/src/ui/__init__.py
rename to agentfly/tools/src/ui/__init__.py
diff --git a/agents/agents/tools/src/ui/tools.py b/agentfly/tools/src/ui/tools.py
similarity index 97%
rename from agents/agents/tools/src/ui/tools.py
rename to agentfly/tools/src/ui/tools.py
index 02769ab..7d820fd 100644
--- a/agents/agents/tools/src/ui/tools.py
+++ b/agentfly/tools/src/ui/tools.py
@@ -4,7 +4,7 @@
 import json
 from typing import Any
 from ...tool_base import tool
-from agents.utils.ui_action_parser import parsing_response_to_pyautogui_code
+from ....utils.ui_action_parser import parsing_response_to_pyautogui_code
 
 # Default image dimensions for UI interactions
 DEFAULT_IMAGE_HEIGHT = 1080
diff --git a/agents/agents/tools/src/webshop/tools.py b/agentfly/tools/src/webshop/tools.py
similarity index 100%
rename from agents/agents/tools/src/webshop/tools.py
rename to agentfly/tools/src/webshop/tools.py
diff --git a/agents/agents/tools/tool_base.py b/agentfly/tools/tool_base.py
similarity index 99%
rename from agents/agents/tools/tool_base.py
rename to agentfly/tools/tool_base.py
index e2ad350..9129ffc 100644
--- a/agents/agents/tools/tool_base.py
+++ b/agentfly/tools/tool_base.py
@@ -172,7 +172,7 @@ async def __call__(self, **kwargs):
             }
             return result_dict
         else:
-            raise ValueError(f"Got invalid result: {type(result)} when calling {self.name} with arguments {kwargs}. The result should be a string or a dict.")
+            raise ValueError(f"Got invalid result: {type(result)} when calling {self.name} with arguments {kwargs}. The result should be a string or a dict containing 'observation' as a key.")
         
 
     def call(self, **kwargs):
diff --git a/agents/agents/tools/utils/data.py b/agentfly/tools/utils/data.py
similarity index 100%
rename from agents/agents/tools/utils/data.py
rename to agentfly/tools/utils/data.py
diff --git a/agents/agents/tools/utils/rayify.py b/agentfly/tools/utils/rayify.py
similarity index 95%
rename from agents/agents/tools/utils/rayify.py
rename to agentfly/tools/utils/rayify.py
index 4d846b1..5378758 100644
--- a/agents/agents/tools/utils/rayify.py
+++ b/agentfly/tools/utils/rayify.py
@@ -1,5 +1,5 @@
 import asyncio, inspect, types, ray
-from agents.tools.tool_base import TOOL_FACTORY, Tool
+from ...tools.tool_base import TOOL_FACTORY, Tool
 
 def rayify(tool: Tool, *, export=None, **ray_opts):
     """
diff --git a/agents/agents/tools/utils/runner.py b/agentfly/tools/utils/runner.py
similarity index 100%
rename from agents/agents/tools/utils/runner.py
rename to agentfly/tools/utils/runner.py
diff --git a/agents/agents/tools/utils/schema.py b/agentfly/tools/utils/schema.py
similarity index 100%
rename from agents/agents/tools/utils/schema.py
rename to agentfly/tools/utils/schema.py
diff --git a/agentfly/utils/__init__.py b/agentfly/utils/__init__.py
new file mode 100644
index 0000000..41cc621
--- /dev/null
+++ b/agentfly/utils/__init__.py
@@ -0,0 +1,3 @@
+from .timing import Timer
+from .logging import Logger
+from .monitor import Monitor
diff --git a/agents/agents/utils/logging.py b/agentfly/utils/logging.py
similarity index 100%
rename from agents/agents/utils/logging.py
rename to agentfly/utils/logging.py
diff --git a/agents/agents/utils/monitor.py b/agentfly/utils/monitor.py
similarity index 100%
rename from agents/agents/utils/monitor.py
rename to agentfly/utils/monitor.py
diff --git a/agents/agents/utils/timing.py b/agentfly/utils/timing.py
similarity index 100%
rename from agents/agents/utils/timing.py
rename to agentfly/utils/timing.py
diff --git a/agents/agents/utils/ui_action_parser.py b/agentfly/utils/ui_action_parser.py
similarity index 100%
rename from agents/agents/utils/ui_action_parser.py
rename to agentfly/utils/ui_action_parser.py
diff --git a/agents/agents/utils/verl.py b/agentfly/utils/verl.py
similarity index 100%
rename from agents/agents/utils/verl.py
rename to agentfly/utils/verl.py
diff --git a/agents/agents/rewards/__init__.py b/agents/agents/rewards/__init__.py
deleted file mode 100644
index a9a6185..0000000
--- a/agents/agents/rewards/__init__.py
+++ /dev/null
@@ -1,10 +0,0 @@
-from .reward_base import RewardFunction, get_reward_from_name, get_rewards_from_names, list_available_rewards, register_reward, reward
-from .qa_reward import qa_f1_reward
-from .math_reward import math_reward, math_reward_tool, math_reward_think
-from .webshop_reward import webshop_reward
-from .alfworld_reward import alfworld_episode_reward
-from .scienceworld_reward import scienceworld_reward
-from .gui_reward import gui_reward
-
-
-__all__ = ["alfworld_episode_reward","qa_f1_reward", "math_reward", "math_reward_tool", "math_reward_think", "RewardFunction", "get_reward_from_name", "get_rewards_from_names", "list_available_rewards", "register_reward", "llm_as_judge_client_math_reward", "webshop_reward", "alfworld_episode_reward", "gui_reward"]
\ No newline at end of file
diff --git a/agents/agents/tools/src/calculate/tools.py b/agents/agents/tools/src/calculate/tools.py
deleted file mode 100644
index c7bc9b4..0000000
--- a/agents/agents/tools/src/calculate/tools.py
+++ /dev/null
@@ -1,11 +0,0 @@
-from tool_base import tool
-
-
-@tool(name="calculator", description="Calculate the result of a mathematical expression.")
-def calculate(expression: str) -> float:
-    """
-    Calculate the result of a mathematical expression.
-    Args:
-        expression (str): A mathematical expression to calculate.
-    """
-    return eval(expression)
\ No newline at end of file
diff --git a/assets/images/discord.png b/assets/images/discord.png
new file mode 100644
index 0000000..249c9cf
Binary files /dev/null and b/assets/images/discord.png differ
diff --git a/docs/api_references/agents/agent.rst b/docs/api_references/agents/agent.rst
new file mode 100644
index 0000000..db85c3f
--- /dev/null
+++ b/docs/api_references/agents/agent.rst
@@ -0,0 +1,20 @@
+Agent 
+=============
+
+Base Agent Class
+----------------
+
+The foundation class for all agents in AgentFly:
+
+.. currentmodule:: agentfly.agents.agent_base
+.. automodule:: agentfly.agents.agent_base
+    :members:
+    :show-inheritance:
+
+Chain Generation
+----------------
+
+Base class for chain-based generation:
+
+.. autoclass:: agentfly.agents.chain.chain_base.ChainRollout
+    :members:
diff --git a/docs/api_references/agents/index.rst b/docs/api_references/agents/index.rst
new file mode 100644
index 0000000..93853fe
--- /dev/null
+++ b/docs/api_references/agents/index.rst
@@ -0,0 +1,159 @@
+.. _agents_index:
+
+###################
+Agents API Reference
+###################
+
+Overview
+========
+
+AgentFly provides a comprehensive agent system with a base class and specialized implementations for different use cases. All agents inherit from :py:class:`BaseAgent` and support tool calling, chain rollout, and various backends.
+
+Base Agent
+==========
+
+.. toctree::
+   :maxdepth: 2
+
+   agent
+
+Core Classes
+===========
+
+BaseAgent
+---------
+
+The foundation class for all agents in AgentFly:
+
+.. autoclass:: agentfly.agents.agent_base.BaseAgent
+   :members:
+   :show-inheritance:
+   :special-members: __init__
+
+AutoAgent
+---------
+
+Factory class for automatic agent creation:
+
+.. autoclass:: agentfly.agents.auto.AutoAgent
+   :members:
+   :show-inheritance:
+
+Specialized Agents
+==================
+
+ReactAgent
+----------
+
+ReAct-style agent for reasoning and tool use:
+
+.. autoclass:: agentfly.agents.react.react_agent.ReactAgent
+   :members:
+   :show-inheritance:
+
+CodeAgent
+---------
+
+Specialized agent for code generation and execution:
+
+.. autoclass:: agentfly.agents.specialized.code_agent.CodeAgent
+   :members:
+   :show-inheritance:
+
+ThinkAgent
+----------
+
+Agent that uses thinking steps before taking actions:
+
+.. autoclass:: agentfly.agents.specialized.think_agent.ThinkAgent
+   :members:
+   :show-inheritance:
+
+GUIAgent
+---------
+
+Agent for GUI automation tasks:
+
+.. autoclass:: agentfly.agents.specialized.gui_agent.GUIAgent
+   :members:
+   :show-inheritance:
+
+HFAgent
+--------
+
+Hugging Face model-based agent:
+
+.. autoclass:: agentfly.agents.specialized.hf_agent.HFAgent
+   :members:
+   :show-inheritance:
+
+OpenAIAgent
+-----------
+
+OpenAI API-based agent:
+
+.. autoclass:: agentfly.agents.specialized.openai_agent.OpenAIAgent
+   :members:
+   :show-inheritance:
+
+Chain Generation
+===============
+
+ChainRollout
+------------
+
+Base class for chain-based generation:
+
+.. autoclass:: agentfly.agents.chain.chain_base.ChainRollout
+   :members:
+   :show-inheritance:
+
+Usage Examples
+=============
+
+Basic Agent Creation
+-------------------
+
+.. code-block:: python
+
+   from agentfly.agents import ReactAgent
+   from agentfly.tools import get_tools_from_names
+   
+   # Create a ReactAgent with tools
+   agent = ReactAgent(
+       model_name_or_path="gpt2",
+       tools=get_tools_from_names(["calculator", "google_search"]),
+       template="react"
+   )
+
+Using AutoAgent
+--------------
+
+.. code-block:: python
+
+   from agentfly.agents import AutoAgent
+   
+   # Create agent from config
+   config = {
+       "agent_type": "react",
+       "model_name_or_path": "gpt2",
+       "template": "react",
+       "tools": ["calculator"]
+   }
+   agent = AutoAgent.from_config(config)
+
+Custom Agent
+-----------
+
+.. code-block:: python
+
+   from agentfly.agents import BaseAgent
+   
+   class CustomAgent(BaseAgent):
+       def parse(self, response):
+           # Custom parsing logic
+           pass
+       
+       def generate(self, messages):
+           # Custom generation logic
+           pass
diff --git a/docs/environments/alfworld/environment.rst b/docs/api_references/environments/alfworld/environment.rst
similarity index 93%
rename from docs/environments/alfworld/environment.rst
rename to docs/api_references/environments/alfworld/environment.rst
index 0915064..cdc1193 100644
--- a/docs/environments/alfworld/environment.rst
+++ b/docs/api_references/environments/alfworld/environment.rst
@@ -8,8 +8,8 @@ The ALFWorldEnv class provides a Python interface to interact with ALFWorld envi
 Class Reference
 ---------------
 
-.. currentmodule:: agents.envs.alfworld_env
-.. autoclass:: agents.envs.alfworld_env.ALFWorldEnv
+.. currentmodule:: agentfly.envs.alfworld_env
+.. autoclass:: agentfly.envs.alfworld_env.ALFWorldEnv
     :members:
     :undoc-members:
     :show-inheritance:
@@ -23,7 +23,7 @@ Basic Usage
 
 .. code-block:: python
 
-    from agents.agents.envs.alfworld_env import ALFWorldEnv
+    from agentfly.envs.alfworld_env import ALFWorldEnv
     
     # Create environment with default settings
     env = ALFWorldEnv()
diff --git a/docs/environments/alfworld/http_server.rst b/docs/api_references/environments/alfworld/http_server.rst
similarity index 95%
rename from docs/environments/alfworld/http_server.rst
rename to docs/api_references/environments/alfworld/http_server.rst
index 34a1eb1..54d065b 100644
--- a/docs/environments/alfworld/http_server.rst
+++ b/docs/api_references/environments/alfworld/http_server.rst
@@ -8,8 +8,8 @@ The ALFWorld HTTP server provides a RESTful API for interacting with ALFWorld en
 Server Module Reference
 -----------------------
 
-.. currentmodule:: agents.dockers.alfworld_env.alfworld_http_server
-.. automodule:: agents.dockers.alfworld_env.alfworld_http_server
+.. currentmodule:: agentfly.dockers.alfworld_env.alfworld_http_server
+.. automodule:: agentfly.dockers.alfworld_env.alfworld_http_server
     :members:
     :undoc-members:
     :show-inheritance:
diff --git a/docs/environments/alfworld/index.rst b/docs/api_references/environments/alfworld/index.rst
similarity index 95%
rename from docs/environments/alfworld/index.rst
rename to docs/api_references/environments/alfworld/index.rst
index 394c60c..7f47d90 100644
--- a/docs/environments/alfworld/index.rst
+++ b/docs/api_references/environments/alfworld/index.rst
@@ -24,7 +24,6 @@ The ALFWorld environment includes the following components:
    :maxdepth: 2
 
    environment
-   http_server
    tools
    rewards
 
@@ -35,7 +34,7 @@ For most use cases, you can use the ALFWorldEnv class directly:
 
 .. code-block:: python
 
-   from agents.agents.envs.alfworld_env import ALFWorldEnv
+   from agentfly.envs.alfworld_env import ALFWorldEnv
    
    # Create and start the environment
    env = ALFWorldEnv()
diff --git a/docs/environments/alfworld/rewards.rst b/docs/api_references/environments/alfworld/rewards.rst
similarity index 95%
rename from docs/environments/alfworld/rewards.rst
rename to docs/api_references/environments/alfworld/rewards.rst
index 55b6e7b..f42e08a 100644
--- a/docs/environments/alfworld/rewards.rst
+++ b/docs/api_references/environments/alfworld/rewards.rst
@@ -41,8 +41,8 @@ This example shows how the ALFWorld reward is used with a ReactAgent:
 
 .. code-block:: python
 
-    from agents.agents.react.react_agent import ReactAgent
-    from agents.rewards import alfworld_episode_reward
+    from agentfly.agents.react.react_agent import ReactAgent
+    from agentfly.rewards import alfworld_episode_reward
 
     # Create ReactAgent with ALFWorld reward function
     react_agent = ReactAgent(
@@ -79,7 +79,7 @@ Simple Direct Usage
     print(f"Reward: {reward_result['reward']}")
     
     # Get reward by name
-    from agents.rewards import get_reward_from_name
+    from agentfly.rewards import get_reward_from_name
     reward_fn = get_reward_from_name("alfworld_episode_reward")
     result = await reward_fn("take apple", env)
     print(result)
diff --git a/docs/environments/alfworld/tools.rst b/docs/api_references/environments/alfworld/tools.rst
similarity index 93%
rename from docs/environments/alfworld/tools.rst
rename to docs/api_references/environments/alfworld/tools.rst
index 117901d..75f0cfe 100644
--- a/docs/environments/alfworld/tools.rst
+++ b/docs/api_references/environments/alfworld/tools.rst
@@ -8,12 +8,12 @@ The ALFWorld tools provide a Python interface for agents to interact with ALFWor
 Tools Reference
 ---------------
 
-.. currentmodule:: agents.tools.src.alfworld.tools
+.. currentmodule:: agentfly.tools.src.alfworld.tools
 
 alfworld_step
 ~~~~~~~~~~~~~
 
-.. autofunction:: agents.tools.src.alfworld.tools.alfworld_step
+.. autofunction:: agentfly.tools.src.alfworld.tools.alfworld_step
 
 **Function Signature:**
 
@@ -37,7 +37,7 @@ alfworld_step
 alfworld_reset
 ~~~~~~~~~~~~~~
 
-.. autofunction:: agents.tools.src.alfworld.tools.alfworld_reset
+.. autofunction:: agentfly.tools.src.alfworld.tools.alfworld_reset
 
 **Function Signature:**
 
@@ -56,7 +56,7 @@ alfworld_reset
 alfworld_get_admissible_commands
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-.. autofunction:: agents.tools.src.alfworld.tools.alfworld_get_admissible_commands
+.. autofunction:: agentfly.tools.src.alfworld.tools.alfworld_get_admissible_commands
 
 **Function Signature:**
 
@@ -75,7 +75,7 @@ alfworld_get_admissible_commands
 alfworld_get_task_objective
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-.. autofunction:: agents.tools.src.alfworld.tools.alfworld_get_task_objective
+.. autofunction:: agentfly.tools.src.alfworld.tools.alfworld_get_task_objective
 
 **Function Signature:**
 
@@ -101,13 +101,13 @@ This example shows how ALFWorld tools are used with a ReactAgent and LLM (Qwen2.
 
 .. code-block:: python
 
-    from agents.agents.react.react_agent import ReactAgent
-    from agents.tools.src.alfworld.tools import (
+    from agentfly.agents.react.react_agent import ReactAgent
+    from agentfly.tools.src.alfworld.tools import (
         alfworld_step, 
         alfworld_get_admissible_commands,
         alfworld_get_task_objective
     )
-    from agents.rewards import alfworld_episode_reward
+    from agentfly.rewards import alfworld_episode_reward
 
     # Configure tools for ReactAgent
     tools = [
diff --git a/docs/environments/code/environment.rst b/docs/api_references/environments/code/environment.rst
similarity index 95%
rename from docs/environments/code/environment.rst
rename to docs/api_references/environments/code/environment.rst
index c9997cc..8575f3c 100644
--- a/docs/environments/code/environment.rst
+++ b/docs/api_references/environments/code/environment.rst
@@ -8,12 +8,12 @@ The Code Environment provides a secure Python sandbox execution environment usin
 PythonSandboxEnv Class Reference
 --------------------------------
 
-.. currentmodule:: agents.envs.python_env
+.. currentmodule:: agentfly.envs.python_env
 
 PythonSandboxEnv
 ~~~~~~~~~~~~~~~~
 
-.. autoclass:: agents.envs.python_env.PythonSandboxEnv
+.. autoclass:: agentfly.envs.python_env.PythonSandboxEnv
    :members:
    :undoc-members:
    :show-inheritance:
@@ -52,7 +52,7 @@ Basic usage with direct instantiation:
 
 .. code-block:: python
 
-   from agents.envs.python_env import PythonSandboxEnv
+   from agentfly.envs.python_env import PythonSandboxEnv
    
    # Create environment with custom settings
    env = PythonSandboxEnv(
diff --git a/docs/environments/code/http_server.rst b/docs/api_references/environments/code/http_server.rst
similarity index 92%
rename from docs/environments/code/http_server.rst
rename to docs/api_references/environments/code/http_server.rst
index 06a15f8..075aec7 100644
--- a/docs/environments/code/http_server.rst
+++ b/docs/api_references/environments/code/http_server.rst
@@ -8,8 +8,8 @@ The Code HTTP server provides a FastAPI-based execution environment for Python c
 Server Module Reference
 -----------------------
 
-.. currentmodule:: agents.dockers.python_env.python_http_server
-.. automodule:: agents.dockers.python_env.python_http_server
+.. currentmodule:: agentfly.dockers.python_env.python_http_server
+.. automodule:: agentfly.dockers.python_env.python_http_server
     :members:
     :undoc-members:
     :show-inheritance:
diff --git a/docs/environments/code/index.rst b/docs/api_references/environments/code/index.rst
similarity index 97%
rename from docs/environments/code/index.rst
rename to docs/api_references/environments/code/index.rst
index 89c6a33..15fbc6c 100644
--- a/docs/environments/code/index.rst
+++ b/docs/api_references/environments/code/index.rst
@@ -37,7 +37,7 @@ For most use cases, you can use the PythonSandboxEnv class directly:
 
 .. code-block:: python
 
-   from agents.envs.python_env import PythonSandboxEnv
+   from agentfly.envs.python_env import PythonSandboxEnv
    
    # Create and start the environment
    env = PythonSandboxEnv()
diff --git a/docs/environments/code/rewards.rst b/docs/api_references/environments/code/rewards.rst
similarity index 91%
rename from docs/environments/code/rewards.rst
rename to docs/api_references/environments/code/rewards.rst
index fc36dfc..5bdd64c 100644
--- a/docs/environments/code/rewards.rst
+++ b/docs/api_references/environments/code/rewards.rst
@@ -8,12 +8,12 @@ The Code reward system provides evaluation functions for code execution tasks. T
 Reward Functions Reference
 --------------------------
 
-.. currentmodule:: agents.rewards.code_reward
+.. currentmodule:: agentfly.rewards.code_reward
 
 code_reward_test
 ~~~~~~~~~~~~~~~~
 
-.. autofunction:: agents.rewards.code_reward.code_reward_test
+.. autofunction:: agentfly.rewards.code_reward.code_reward_test
 
 **Function Signature:**
 
@@ -64,8 +64,8 @@ Evaluate simple code snippets:
 
 .. code-block:: python
 
-   from agents.rewards.code_reward import code_reward_test
-   from agents.envs.python_env import PythonSandboxEnv
+   from agentfly.rewards.code_reward import code_reward_test
+   from agentfly.envs.python_env import PythonSandboxEnv
    
    # Create environment
    env = await PythonSandboxEnv.acquire()
@@ -92,7 +92,7 @@ Create specialized reward functions for specific tasks:
 
 .. code-block:: python
 
-   from agents.rewards.reward_base import reward
+   from agentfly.rewards.reward_base import reward
    
    @reward(name="math_code_reward", env_cls=PythonSandboxEnv, pool_size=8)
    async def math_code_reward(prediction: str, env: PythonSandboxEnv) -> dict:
diff --git a/docs/environments/code/tools.rst b/docs/api_references/environments/code/tools.rst
similarity index 93%
rename from docs/environments/code/tools.rst
rename to docs/api_references/environments/code/tools.rst
index ea8ac27..5426a01 100644
--- a/docs/environments/code/tools.rst
+++ b/docs/api_references/environments/code/tools.rst
@@ -8,12 +8,12 @@ The Code environment provides tools for executing Python code in secure, isolate
 Tools Reference
 ---------------
 
-.. currentmodule:: agents.tools.src.code.tools
+.. currentmodule:: agentfly.tools.src.code.tools
 
 code_interpreter
 ~~~~~~~~~~~~~~~~
 
-.. autofunction:: agents.tools.src.code.tools.code_interpreter
+.. autofunction:: agentfly.tools.src.code.tools.code_interpreter
 
 **Function Signature:**
 
@@ -46,8 +46,8 @@ Execute simple Python expressions and statements:
 
 .. code-block:: python
 
-   from agents.tools import code_interpreter
-   from agents.envs.python_env import PythonSandboxEnv
+   from agentfly.tools import code_interpreter
+   from agentfly.envs.python_env import PythonSandboxEnv
    
    # Create environment
    env = await PythonSandboxEnv.acquire()
@@ -183,8 +183,8 @@ Real-world usage with ReactAgent for problem-solving:
 
 .. code-block:: python
 
-   from agents.agents.react.react_agent import ReactAgent
-   from agents.rewards.code_reward import code_reward_test
+   from agentfly.agents.react.react_agent import ReactAgent
+   from agentfly.rewards.code_reward import code_reward_test
    
    # Task information for the agent
    task_info = """Execute Python code to solve computational problems. 
diff --git a/docs/api_references/environments/environment.rst b/docs/api_references/environments/environment.rst
new file mode 100644
index 0000000..fbedfe2
--- /dev/null
+++ b/docs/api_references/environments/environment.rst
@@ -0,0 +1,21 @@
+Environment
+==============
+
+Base Environment Class
+---------------------
+
+The foundation class for all environments:
+
+.. currentmodule:: agentfly.envs.env_base
+.. automodule:: agentfly.envs.env_base
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+Docker Support Mixin
+--------------------
+
+Mixin for Docker-based environments:
+
+.. autoclass:: agentfly.envs.env_base.SupportsDocker
+    :members:
diff --git a/docs/api_references/environments/index.rst b/docs/api_references/environments/index.rst
new file mode 100644
index 0000000..6852d2a
--- /dev/null
+++ b/docs/api_references/environments/index.rst
@@ -0,0 +1,180 @@
+.. _environments_index:
+
+###################
+Environments API Reference
+###################
+
+
+Overview
+========
+
+AgentFly provides a comprehensive environment system for managing agent interactions with external systems. Environments can be stateful, support Docker containers, and provide async interfaces for high-performance execution.
+
+Base Environment
+================
+
+.. toctree::
+   :maxdepth: 2
+
+   environment/
+
+Core Classes
+===========
+
+BaseEnv
+-------
+
+The foundation class for all environments:
+
+.. autoclass:: agentfly.envs.env_base.BaseEnv
+   :members:
+   :show-inheritance:
+   :special-members: __init__
+
+SupportsDocker
+--------------
+
+Mixin for Docker-based environments:
+
+.. autoclass:: agentfly.envs.env_base.SupportsDocker
+   :members:
+   :show-inheritance:
+
+Pre-built Environments
+=====================
+
+ALFWorld Environment
+--------------------
+
+ALFWorld text-based environment:
+
+.. autoclass:: agentfly.envs.alfworld_env.ALFWorldEnv
+   :members:
+   :show-inheritance:
+
+ScienceWorld Environment
+------------------------
+
+ScienceWorld experiment environment:
+
+.. autoclass:: agentfly.envs.scienceworld_env.ScienceWorldEnv
+   :members:
+   :show-inheritance:
+
+Python Sandbox Environment
+--------------------------
+
+Python code execution environment:
+
+.. autoclass:: agentfly.envs.python_env.PythonSandboxEnv
+   :members:
+   :show-inheritance:
+
+WebShop Text Environment
+------------------------
+
+WebShop text-based interface:
+
+.. autoclass:: agentfly.envs.webshop_text_env.WebAgentTextEnv
+   :members:
+   :show-inheritance:
+
+Environment Management
+=====================
+
+Environment Manager
+------------------
+
+Centralized environment management:
+
+.. autoclass:: agentfly.envs.manager.env_manager.EnvironmentManager
+   :members:
+   :show-inheritance:
+
+Resource Management
+------------------
+
+Global environment resource tracking:
+
+.. autofunction:: agentfly.envs.manager.resource.GLOBAL_ENVS
+
+Usage Examples
+=============
+
+Basic Environment
+----------------
+
+.. code-block:: python
+
+   from agentfly.envs import BaseEnv
+   
+   class SimpleEnv(BaseEnv):
+       async def start(self):
+           # Initialize resources
+           pass
+       
+       async def reset(self):
+           # Reset to initial state
+           return "initial_state"
+       
+       async def step(self, action):
+           # Execute action
+           return f"result_of_{action}"
+       
+       async def aclose(self):
+           # Clean up resources
+           pass
+       
+       @staticmethod
+       async def acquire():
+           return SimpleEnv()
+
+Docker-based Environment
+-----------------------
+
+.. code-block:: python
+
+   from agentfly.envs import BaseEnv, SupportsDocker
+   
+   class DockerEnv(BaseEnv, SupportsDocker):
+       def __init__(self):
+           super().__init__()
+           self.image = "my-image:latest"
+           self.container = None
+       
+       async def start(self):
+           self.container = await self.start_container(
+               image=self.image,
+               runtime="runc",
+               cpu=2,
+               mem="2g"
+           )
+       
+       async def reset(self):
+           # Reset container state
+           return "reset_state"
+       
+       async def step(self, action):
+           # Execute action in container
+           return f"container_result_{action}"
+       
+       async def aclose(self):
+           if self.container:
+               await self.stop_container(self.container)
+
+Environment with Tools
+---------------------
+
+.. code-block:: python
+
+   from agentfly.tools import tool
+   from agentfly.envs import BaseEnv
+   
+   class MyEnv(BaseEnv):
+       # Environment implementation
+       pass
+   
+   @tool(name="env_tool", env_cls=MyEnv, pool_size=4)
+   async def env_tool(action: str, env: MyEnv):
+       result = await env.step(action)
+       return result
diff --git a/docs/environments/retrieval/index.rst b/docs/api_references/environments/retrieval/index.rst
similarity index 91%
rename from docs/environments/retrieval/index.rst
rename to docs/api_references/environments/retrieval/index.rst
index eedacb6..2bbded2 100644
--- a/docs/environments/retrieval/index.rst
+++ b/docs/api_references/environments/retrieval/index.rst
@@ -26,7 +26,7 @@ Quick Start
 
 .. code-block:: python
 
-   from agents.agents.tools.src.search.async_dense_retriever import asyncdense_retrieve
+   from agentfly.tools.src.search.async_dense_retriever import asyncdense_retrieve
    
    # Retrieve relevant documents
    result = await asyncdense_retrieve("What is machine learning?")
diff --git a/docs/environments/retrieval/tools.rst b/docs/api_references/environments/retrieval/tools.rst
similarity index 98%
rename from docs/environments/retrieval/tools.rst
rename to docs/api_references/environments/retrieval/tools.rst
index ee3f411..9c340c8 100644
--- a/docs/environments/retrieval/tools.rst
+++ b/docs/api_references/environments/retrieval/tools.rst
@@ -8,7 +8,7 @@ The Retrieval tools provide semantic search capabilities for document retrieval
 Tools Reference
 ---------------
 
-.. currentmodule:: agents.tools.src.search
+.. currentmodule:: agentfly.tools.src.search
 
 asyncdense_retrieve
 ~~~~~~~~~~~~~~~~~~~
diff --git a/docs/api_references/index.rst b/docs/api_references/index.rst
new file mode 100644
index 0000000..4cd352c
--- /dev/null
+++ b/docs/api_references/index.rst
@@ -0,0 +1,11 @@
+API Reference
+=============
+
+.. toctree::
+   :maxdepth: 2
+   :hidden:
+
+   agents/index
+   tools/index
+   rewards/index
+   environments/index
\ No newline at end of file
diff --git a/docs/rewards/alfworld_reward.rst b/docs/api_references/rewards/alfworld_reward.rst
similarity index 91%
rename from docs/rewards/alfworld_reward.rst
rename to docs/api_references/rewards/alfworld_reward.rst
index bf37661..6077e5b 100644
--- a/docs/rewards/alfworld_reward.rst
+++ b/docs/api_references/rewards/alfworld_reward.rst
@@ -3,7 +3,7 @@
 ALFWorld Episode Reward
 ========================
 
-.. currentmodule:: agents.agents.rewards.alfworld_reward
+.. currentmodule:: agentfly.rewards.alfworld_reward
 
 .. autofunction:: alfworld_episode_reward
 
@@ -50,8 +50,8 @@ Technical Details
 
 .. code-block:: python
 
-    from agents.agents.rewards import get_reward_from_name
-    from agents.agents.envs import ALFWorldEnv
+    from agentfly.rewards import get_reward_from_name
+    from agentfly.envs import ALFWorldEnv
     
     # Get reward function
     reward_fn = get_reward_from_name("alfworld_episode_reward")
diff --git a/docs/rewards/code_reward.rst b/docs/api_references/rewards/code_reward.rst
similarity index 93%
rename from docs/rewards/code_reward.rst
rename to docs/api_references/rewards/code_reward.rst
index b927403..1128439 100644
--- a/docs/rewards/code_reward.rst
+++ b/docs/api_references/rewards/code_reward.rst
@@ -3,7 +3,7 @@
 Code Execution Reward
 ======================
 
-.. currentmodule:: agents.agents.rewards.code_reward
+.. currentmodule:: agentfly.rewards.code_reward
 
 .. autofunction:: code_reward_test
 
@@ -51,8 +51,8 @@ Technical Details
 
 .. code-block:: python
 
-    from agents.agents.rewards import get_reward_from_name
-    from agents.agents.envs import PythonSandboxEnv
+    from agentfly.rewards import get_reward_from_name
+    from agentfly.envs import PythonSandboxEnv
     
     # Get reward function
     reward_fn = get_reward_from_name("code_reward_test")
diff --git a/docs/rewards/index.rst b/docs/api_references/rewards/index.rst
similarity index 92%
rename from docs/rewards/index.rst
rename to docs/api_references/rewards/index.rst
index c1fe371..8e5dc7c 100644
--- a/docs/rewards/index.rst
+++ b/docs/api_references/rewards/index.rst
@@ -4,9 +4,6 @@
 Reward Functions
 ###################
 
-.. contents::
-   :local:
-   :depth: 2
 
 Overview
 ========
@@ -25,14 +22,13 @@ Available Reward Functions
    code_reward  
    math_reward
    qa_reward
-   llm_judge_reward
 
 Quick Start
 ===========
 
 .. code-block:: python
 
-   from agents.agents.rewards import get_reward_from_name
+   from agentfly.rewards import get_reward_from_name
    
    # Get a specific reward function
    math_reward = get_reward_from_name("math_reward")
@@ -51,7 +47,6 @@ Reward Function Categories
 **Task-Specific Rewards**
     - Math Problem Solving Rewards
     - Question Answering (QA) Rewards
-    - LLM-as-Judge Reward
 
 **Format-Aware Rewards**
     - Tool Usage Rewards
@@ -104,5 +99,4 @@ All reward functions use the ``@reward`` decorator which:
 **Decorator Parameters:**
     - ``name``: Unique identifier for the reward function
     - ``env_cls``: Environment class for stateful rewards (optional)
-    - ``pool_size``: Number of environment instances to pool (optional)
-    - ``llm_config``: LLM configuration for judge-based rewards (optional) 
\ No newline at end of file
+    - ``pool_size``: Number of environment instances to pool (optional) 
\ No newline at end of file
diff --git a/docs/rewards/math_reward.rst b/docs/api_references/rewards/math_reward.rst
similarity index 96%
rename from docs/rewards/math_reward.rst
rename to docs/api_references/rewards/math_reward.rst
index 043dd0f..5cb70e8 100644
--- a/docs/rewards/math_reward.rst
+++ b/docs/api_references/rewards/math_reward.rst
@@ -3,7 +3,7 @@
 Math Reward Functions
 =====================
 
-.. currentmodule:: agents.agents.rewards.math_reward
+.. currentmodule:: agentfly.rewards.math_reward
 
 Math reward functions evaluate agent performance on mathematical problem-solving tasks with various behavioral requirements.
 
@@ -57,13 +57,13 @@ math_reward_tool
 math_reward_thought
 -------------------
 
-.. autofunction:: math_reward_thought
+.. autofunction:: math_reward_think
 
 **Function Signature:**
 
 .. code-block:: python
 
-    def math_reward_thought(prediction: str, answer: str, trajectory: List[Dict]) -> dict
+    def math_reward_think(prediction: str, answer: str, trajectory: List[Dict]) -> dict
 
 **Description:** Rewards mathematical correctness with thinking process requirement.
 
diff --git a/docs/rewards/qa_reward.rst b/docs/api_references/rewards/qa_reward.rst
similarity index 98%
rename from docs/rewards/qa_reward.rst
rename to docs/api_references/rewards/qa_reward.rst
index 069b361..074965a 100644
--- a/docs/rewards/qa_reward.rst
+++ b/docs/api_references/rewards/qa_reward.rst
@@ -3,7 +3,7 @@
 Question Answering Rewards
 ===========================
 
-.. currentmodule:: agents.agents.rewards.qa_reward
+.. currentmodule:: agentfly.rewards.qa_reward
 
 QA reward functions evaluate agent performance on question answering tasks using F1 score and exact match metrics.
 
diff --git a/docs/api_references/rewards/reward.rst b/docs/api_references/rewards/reward.rst
new file mode 100644
index 0000000..3e22cb4
--- /dev/null
+++ b/docs/api_references/rewards/reward.rst
@@ -0,0 +1,12 @@
+Reward
+==============
+
+.. autoclass:: agentfly.rewards.reward_base.RewardFunction
+    :members:
+    :special-members: __call__
+
+.. autofunction:: agentfly.rewards.reward_base.reward
+    :noindex:
+
+
+
diff --git a/docs/api_references/tools/index.rst b/docs/api_references/tools/index.rst
new file mode 100644
index 0000000..5cccb25
--- /dev/null
+++ b/docs/api_references/tools/index.rst
@@ -0,0 +1,158 @@
+.. _tools_index:
+
+###################
+Tools API Reference
+###################
+
+
+Overview
+========
+
+AgentFly provides a comprehensive tool system that enables agents to interact with external systems and APIs. Tools can be stateful (with environment management) or stateless, and support both synchronous and asynchronous execution.
+
+Base Tool
+=========
+
+.. toctree::
+   :maxdepth: 2
+
+   tool
+
+Core Classes
+===========
+
+Tool
+----
+
+The main tool wrapper class:
+
+.. autoclass:: agentfly.tools.tool_base.Tool
+   :members:
+   :show-inheritance:
+   :special-members: __call__
+
+Tool Decorator
+-------------
+
+The main decorator for creating tools:
+
+.. autofunction:: agentfly.tools.tool_base.tool
+
+Utility Functions
+================
+
+Tool Registration
+----------------
+
+.. autofunction:: agentfly.tools.register_tool
+
+Tool Retrieval
+--------------
+
+.. autofunction:: agentfly.tools.get_tools_from_names
+
+Pre-built Tools
+==============
+
+Code Tools
+----------
+
+.. autofunction:: agentfly.tools.src.code.tools.code_interpreter
+
+Search Tools
+------------
+
+.. autofunction:: agentfly.tools.src.search.google_search.google_search_serper
+
+.. autofunction:: agentfly.tools.src.search.dense_retriever.dense_retrieve
+
+.. autofunction:: agentfly.tools.src.search.async_dense_retriever.asyncdense_retrieve
+
+ALFWorld Tools
+--------------
+
+.. autofunction:: agentfly.tools.src.alfworld.tools.alfworld_step
+
+.. autofunction:: agentfly.tools.src.alfworld.tools.alfworld_reset
+
+.. autofunction:: agentfly.tools.src.alfworld.tools.alfworld_get_task_objective
+
+.. autofunction:: agentfly.tools.src.alfworld.tools.alfworld_get_admissible_commands
+
+WebShop Tools
+-------------
+
+.. autofunction:: agentfly.tools.src.webshop.tools.webshop_browser
+
+ScienceWorld Tools
+------------------
+
+.. autofunction:: agentfly.tools.src.scienceworld.tools.scienceworld_explorer
+
+ReAct Tools
+-----------
+
+.. autofunction:: agentfly.tools.src.react.tools.answer_qa
+
+.. autofunction:: agentfly.tools.src.react.tools.answer_math
+
+Utility Tools
+-------------
+
+.. autofunction:: agentfly.tools.src.calculate.tools.calculator
+
+.. autofunction:: agentfly.tools.src.ui.tools.pyautogui_code_generator
+
+Usage Examples
+=============
+
+Basic Tool Definition
+--------------------
+
+.. code-block:: python
+
+   from agentfly.tools import tool
+   
+   @tool(name="calculator", description="Calculate mathematical expressions")
+   def calculator(expression: str):
+       try:
+           result = eval(expression)
+           return str(result)
+       except Exception as e:
+           return f"Error: {str(e)}"
+
+Stateful Tool with Environment
+-----------------------------
+
+.. code-block:: python
+
+   from agentfly.tools import tool
+   from agentfly.envs import BaseEnv
+   
+   class MyEnv(BaseEnv):
+       # Environment implementation
+       pass
+   
+   @tool(name="env_tool", env_cls=MyEnv, pool_size=4)
+   async def env_tool(action: str, env: MyEnv):
+       result = await env.step(action)
+       return result
+
+Tool with Schema
+---------------
+
+.. code-block:: python
+
+   @tool(
+       name="structured_tool",
+       schema={
+           "type": "object",
+           "properties": {
+               "query": {"type": "string"},
+               "limit": {"type": "integer"}
+           }
+       }
+   )
+   def structured_tool(query: str, limit: int = 10):
+       # Tool implementation
+       pass
diff --git a/docs/api_references/tools/tool.rst b/docs/api_references/tools/tool.rst
new file mode 100644
index 0000000..7d1580e
--- /dev/null
+++ b/docs/api_references/tools/tool.rst
@@ -0,0 +1,45 @@
+Tool
+==============
+
+Base Tool Class
+---------------
+
+The main tool wrapper class:
+
+.. autoclass:: agentfly.tools.tool_base.Tool
+    :members:
+    :special-members: __call__
+
+Tool Decorator
+-------------
+
+The main decorator for creating tools:
+
+.. autofunction:: agentfly.tools.tool_base.tool
+
+Predefined Tools
+---------------
+
+The following are predefined tool instances that can be used directly with agents.
+
+Code Interpreter
+^^^^^^^^^^^^^^^
+
+.. autofunction:: agentfly.tools.src.code.tools.code_interpreter
+
+Google Search
+^^^^^^^^^^^^
+
+.. autofunction:: agentfly.tools.src.search.google_search.google_search_serper
+
+Calculator
+^^^^^^^^^^
+
+.. autofunction:: agentfly.tools.src.calculate.tools.calculator
+
+Answer Tools
+^^^^^^^^^^^
+
+.. autofunction:: agentfly.tools.src.react.tools.answer_qa
+
+.. autofunction:: agentfly.tools.src.react.tools.answer_math
diff --git a/docs/classes/agent.rst b/docs/classes/agent.rst
deleted file mode 100644
index 8a05934..0000000
--- a/docs/classes/agent.rst
+++ /dev/null
@@ -1,11 +0,0 @@
-Agent 
-=============
-
-
-.. currentmodule:: agent_base
-.. automodule:: agents.agents.agent_base
-    :members:
-    :show-inheritance:
-
-.. autoclass:: agents.agents.chain.chain_base.ChainGeneration
-    :members:
diff --git a/docs/classes/environment.rst b/docs/classes/environment.rst
deleted file mode 100644
index c8466c6..0000000
--- a/docs/classes/environment.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-Environment
-==============
-
-.. currentmodule:: env_base
-.. automodule:: agents.envs.env_base
-    :members:
-    :undoc-members:
-    :show-inheritance:
diff --git a/docs/classes/reward.rst b/docs/classes/reward.rst
deleted file mode 100644
index ed5cfce..0000000
--- a/docs/classes/reward.rst
+++ /dev/null
@@ -1,12 +0,0 @@
-Reward
-==============
-
-.. autoclass:: agents.rewards.reward_base.RewardFunction
-    :members:
-    :special-members: __call__
-
-.. autofunction:: agents.rewards.reward_base.reward
-    :noindex:
-
-
-
diff --git a/docs/classes/tool.rst b/docs/classes/tool.rst
deleted file mode 100644
index d0a8373..0000000
--- a/docs/classes/tool.rst
+++ /dev/null
@@ -1,34 +0,0 @@
-Tool
-==============
-
-Base Tool Class
----------------
-
-.. autoclass:: agents.tools.tool_base.Tool
-    :members:
-    :special-members: __call__
-
-Tool Decorator
--------------
-
-.. autofunction:: agents.tools.tool_base.tool
-
-Predefined Tools
----------------
-
-The following are predefined tool instances that can be used directly with agents.
-
-Code Interpreter
-^^^^^^^^^^^^^^^
-
-.. tool-docstring:: code_interpreter
-
-Google Search
-^^^^^^^^^^^^
-
-.. tool-docstring:: google_search_serper
-
-Answer Tool
-^^^^^^^^^^
-
-.. tool-docstring:: answer
diff --git a/docs/conf.py b/docs/conf.py
index 93bd302..f5682eb 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -4,10 +4,7 @@
 import importlib
 
 # Add both project root and agents directory to the Python path
-project_root = os.path.abspath('..')
-agents_root = os.path.join(project_root, 'agents')
-sys.path.insert(0, project_root)
-sys.path.insert(0, agents_root)
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
 
 project   = "AgentFly"
 author    = "AgentFly Team"
@@ -20,6 +17,7 @@
     "sphinx.ext.napoleon",    # Google/NumPy-style docstrings
     "sphinx.ext.viewcode",    # add "[source]" links
     "sphinx.ext.autosectionlabel",
+    "sphinx_design"
 ]
 
 # recognise both .md and .rst
@@ -28,9 +26,93 @@
 templates_path   = ["_templates"]
 exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "README.md"]
 
-html_theme       = "sphinx_rtd_theme"
+
+# html_theme = 'furo'
+html_theme = 'sphinx_book_theme'
+# html_theme       = "sphinx_rtd_theme"  # Commented out to use furo theme
 html_static_path = ["_static"]
 
+
+
+# html_theme_options = {
+#     "logo": {
+#         "text": "🪽AgentFly\n",
+#         "image_light": "_static/logo-light.png",
+#         "image_dark": "_static/logo-dark.png",
+#     }
+# }
+html_theme_options = {
+    # "path_to_docs": "docs",
+    "repository_url": "https://github.com/executablebooks/sphinx-book-theme",
+    "repository_branch": "master",
+    "launch_buttons": {
+        "binderhub_url": "https://mybinder.org",
+        "colab_url": "https://colab.research.google.com/",
+        "deepnote_url": "https://deepnote.com/",
+        "notebook_interface": "jupyterlab",
+        "thebe": True,
+        # "jupyterhub_url": "https://datahub.berkeley.edu",  # For testing
+    },
+    "use_edit_page_button": True,
+    "use_source_button": True,
+    "use_issues_button": True,
+    "use_repository_button": True,
+    "use_download_button": True,
+    "use_sidenotes": True,
+    "show_toc_level": 2,
+    "show_navbar_depth": 2,
+    "navigation_depth": 4,
+    "collapse_navigation": False,
+    "globaltoc_collapse": False,
+    "announcement": (
+        "⚠️The latest release refactored our HTML, "
+        "so double-check your custom CSS rules!⚠️"
+    ),
+    "logo": {
+        "image_dark": "_static/logo-wide-dark.svg",
+        "text": "🪽AgentFly Document",  # Uncomment to try text with logo
+    },
+    "icon_links": [
+        {
+            "name": "Paper",
+            "url": "https://arxiv.org/pdf/2507.14897",
+            "icon": "https://cdn.simpleicons.org/arxiv",
+            "type": "url",
+        },
+        {
+            "name": "WANDB",
+            "url": "https://wandb.ai/AgentRL/Open",
+            "icon": "https://cdn.simpleicons.org/weightsandbiases",
+            "type": "url"
+        },
+        {
+            "name": "HF",
+            "url": "https://huggingface.co/collections/Agent-One/agentfly-6882061c6cf08537cb66c12b",
+            "icon": "https://cdn.simpleicons.org/huggingface/FF9A00",
+            "type": "url",
+        },
+        {
+            "name": "GitHub",
+            "url": "https://github.com/Agent-One-Lab/AgentFly",
+            "icon": "https://cdn.simpleicons.org/github",
+            "type": "url",
+        },
+    ],
+    # For testing
+    # "use_fullscreen_button": False,
+    # "home_page_in_toc": True,
+    # "extra_footer": "<a href='https://google.com'>Test</a>",  # DEPRECATED KEY
+    # "show_navbar_depth": 2,
+    # Testing layout areas
+    # "navbar_start": ["test.html"],
+    # "navbar_center": ["test.html"],
+    # "navbar_end": ["test.html"],
+    # "navbar_persistent": ["test.html"],
+    # "footer_start": ["test.html"],
+    # "footer_end": ["test.html"]
+}
+
+
 # Configure autodoc to include special methods
 autodoc_default_options = {
     'members': True,
@@ -57,13 +139,13 @@ def run(self):
             # Import the original function modules
             try:
                 if tool_name == "code_interpreter":
-                    from agents.tools.src.code.tools import code_interpreter
+                    from agentfly.tools.src.code.tools import code_interpreter
                     original_func = code_interpreter.user_func
                 elif tool_name == "google_search_serper":
-                    from agents.tools.src.search.google_search import google_search_serper
+                    from agentfly.tools.src.search.google_search import google_search_serper
                     original_func = google_search_serper.user_func
                 elif tool_name == "answer":
-                    from agents.tools.src.react.tools import answer
+                    from agentfly.tools.src.react.tools import answer
                     original_func = answer.user_func
                 else:
                     return [nodes.paragraph(text=f"Tool {tool_name} not found")]
diff --git a/docs/examples/index.rst b/docs/examples/index.rst
new file mode 100644
index 0000000..96c7728
--- /dev/null
+++ b/docs/examples/index.rst
@@ -0,0 +1,8 @@
+Examples
+========
+
+.. toctree::
+   :maxdepth: 2
+   :hidden:
+
+   predefined_training_examples
\ No newline at end of file
diff --git a/docs/chat_template/README.md b/docs/features/chat_template/README.md
similarity index 97%
rename from docs/chat_template/README.md
rename to docs/features/chat_template/README.md
index 84d449c..9639bb3 100644
--- a/docs/chat_template/README.md
+++ b/docs/features/chat_template/README.md
@@ -18,7 +18,7 @@ The Chat Template System is a comprehensive framework that provides a modular, e
 ## Quick Start
 
 ```python
-from agents.agents.agents.templates import Chat, get_template
+from agentfly.agents.templates import Chat, get_template
 
 # Get a pre-built template
 template = get_template("qwen2.5")
diff --git a/docs/chat_template/advanced_features.md b/docs/features/chat_template/advanced_features.md
similarity index 94%
rename from docs/chat_template/advanced_features.md
rename to docs/features/chat_template/advanced_features.md
index 76ccccb..d4a1bff 100644
--- a/docs/chat_template/advanced_features.md
+++ b/docs/features/chat_template/advanced_features.md
@@ -11,7 +11,7 @@ The Chat Template System provides advanced features for fine-grained control ove
 The system supports multiple strategies for where and how tools are integrated into prompts:
 
 ```python
-from agents.agents.agents.templates.constants import ToolPlacement
+from agentfly.agents.templates.constants import ToolPlacement
 
 # 1. SYSTEM placement - tools appear in system message
 system_placement = ToolPlacement.SYSTEM
@@ -31,7 +31,7 @@ Different strategies for formatting tool definitions:
 #### JSON Formatters
 
 ```python
-from agents.agents.agents.templates.tool_policy import (
+from agentfly.agents.templates.tool_policy import (
     JsonFormatter, JsonMinifiedFormatter, JsonIndentedFormatter, JsonCompactFormatter
 )
 
@@ -56,7 +56,7 @@ compact_formatter = JsonCompactFormatter(format_as_list=True)
 #### YAML Formatter
 
 ```python
-from agents.agents.agents.templates.tool_policy import YamlFormatter
+from agentfly.agents.templates.tool_policy import YamlFormatter
 
 # YAML formatting (requires PyYAML)
 yaml_formatter = YamlFormatter()
@@ -65,7 +65,7 @@ yaml_formatter = YamlFormatter()
 #### Custom Formatters
 
 ```python
-from agents.agents.agents.templates.tool_policy import ToolFormatter
+from agentfly.agents.templates.tool_policy import ToolFormatter
 
 class CustomToolFormatter(ToolFormatter):
     def format(self, tools):
@@ -101,7 +101,7 @@ custom_tool_policy = ToolPolicy(
 Process tool content before formatting:
 
 ```python
-from agents.agents.agents.templates.tool_policy import ToolContentProcessor
+from agentfly.agents.templates.tool_policy import ToolContentProcessor
 
 class ToolFilterProcessor(ToolContentProcessor):
     """Filter tools based on certain criteria"""
@@ -139,7 +139,7 @@ filtered_tool_policy = ToolPolicy(
 Fine-grained control over system message behavior:
 
 ```python
-from agents.agents.agents.templates.system_policy import SystemPolicy
+from agentfly.agents.templates.system_policy import SystemPolicy
 
 # Basic system policy
 basic_policy = SystemPolicy(
@@ -170,7 +170,7 @@ Transform system messages before rendering:
 #### Built-in Processors
 
 ```python
-from agents.agents.agents.templates.system_policy import Llama32DateProcessor
+from agentfly.agents.templates.system_policy import Llama32DateProcessor
 
 # Llama 3.2 date processor (adds current date)
 llama_date_policy = SystemPolicy(
@@ -183,7 +183,7 @@ llama_date_policy = SystemPolicy(
 #### Custom Content Processors
 
 ```python
-from agents.agents.agents.templates.system_policy import SystemContentProcessor
+from agentfly.agents.templates.system_policy import SystemContentProcessor
 
 class EnvironmentAwareProcessor(SystemContentProcessor):
     """Add environment information to system messages"""
@@ -225,7 +225,7 @@ lambda_policy = SystemPolicy(
 ### Template-Wide Settings
 
 ```python
-from agents.agents.agents.templates import GlobalPolicy
+from agentfly.agents.templates import GlobalPolicy
 
 # Add prefix to all prompts
 prefix_policy = GlobalPolicy(prefix="<|begin_of_text|>")
@@ -245,7 +245,7 @@ comprehensive_policy = GlobalPolicy(
 ### Conditional Templates
 
 ```python
-from agents.agents.agents.templates import Template
+from agentfly.agents.templates import Template
 
 # Template that changes based on context
 conditional_template = Template(
diff --git a/docs/chat_template/architecture.md b/docs/features/chat_template/architecture.md
similarity index 100%
rename from docs/chat_template/architecture.md
rename to docs/features/chat_template/architecture.md
diff --git a/docs/chat_template/basic_usage.md b/docs/features/chat_template/basic_usage.md
similarity index 97%
rename from docs/chat_template/basic_usage.md
rename to docs/features/chat_template/basic_usage.md
index 215a391..b376374 100644
--- a/docs/chat_template/basic_usage.md
+++ b/docs/features/chat_template/basic_usage.md
@@ -7,9 +7,9 @@ The Chat Template System provides a simple yet powerful interface for creating a
 ## Importing the System
 
 ```python
-from agents.agents.agents.templates import Chat, get_template, Template
-from agents.agents.agents.templates.tool_policy import ToolPolicy, JsonFormatter
-from agents.agents.agents.templates.system_policy import SystemPolicy
+from agentfly.agents.templates import Chat, get_template, Template
+from agentfly.agents.templates.tool_policy import ToolPolicy, JsonFormatter
+from agentfly.agents.templates.system_policy import SystemPolicy
 ```
 
 ## Using Pre-built Templates
diff --git a/docs/chat_template/custom_templates.md b/docs/features/chat_template/custom_templates.md
similarity index 90%
rename from docs/chat_template/custom_templates.md
rename to docs/features/chat_template/custom_templates.md
index 7fdece3..b390515 100644
--- a/docs/chat_template/custom_templates.md
+++ b/docs/features/chat_template/custom_templates.md
@@ -9,7 +9,7 @@ The Chat Template System is designed to be highly extensible, allowing you to cr
 ### Core Template Fields
 
 ```python
-from agents.agents.agents.templates import Template
+from agentfly.agents.templates import Template
 
 template = Template(
     name="my-custom-template",           # Unique identifier
@@ -122,7 +122,7 @@ vision_template = Template(
 ### System Policy
 
 ```python
-from agents.agents.agents.templates.system_policy import SystemPolicy
+from agentfly.agents.templates.system_policy import SystemPolicy
 
 # Basic system policy
 system_policy = SystemPolicy(
@@ -147,8 +147,8 @@ system_policy = SystemPolicy(
 ### Tool Policy
 
 ```python
-from agents.agents.agents.templates.tool_policy import ToolPolicy, JsonFormatter, JsonIndentedFormatter
-from agents.agents.agents.templates.constants import ToolPlacement
+from agentfly.agents.templates.tool_policy import ToolPolicy, JsonFormatter, JsonIndentedFormatter
+from agentfly.agents.templates.constants import ToolPlacement
 
 # Basic tool policy
 tool_policy = ToolPolicy(
@@ -167,7 +167,7 @@ tool_policy = ToolPolicy(
 ### Global Policy
 
 ```python
-from agents.agents.agents.templates import GlobalPolicy
+from agentfly.agents.templates import GlobalPolicy
 
 global_policy = GlobalPolicy(
     prefix="<|begin_of_text|>"               # Add prefix to all prompts
@@ -177,10 +177,10 @@ global_policy = GlobalPolicy(
 ## Complete Template Example
 
 ```python
-from agents.agents.agents.templates import Template, GlobalPolicy
-from agents.agents.agents.templates.system_policy import SystemPolicy
-from agents.agents.agents.templates.tool_policy import ToolPolicy, JsonIndentedFormatter
-from agents.agents.agents.templates.constants import ToolPlacement
+from agentfly.agents.templates import Template, GlobalPolicy
+from agentfly.agents.templates.system_policy import SystemPolicy
+from agentfly.agents.templates.tool_policy import ToolPolicy, JsonIndentedFormatter
+from agentfly.agents.templates.constants import ToolPlacement
 
 # Create a comprehensive template
 comprehensive_template = Template(
@@ -224,13 +224,13 @@ comprehensive_template = Template(
 ### Registering a Template
 
 ```python
-from agents.agents.agents.templates import register_template
+from agentfly.agents.templates import register_template
 
 # Register the template
 register_template(comprehensive_template)
 
 # Now you can use it
-from agents.agents.agents.templates import get_template
+from agentfly.agents.templates import get_template
 template = get_template("comprehensive-example")
 ```
 
@@ -244,7 +244,7 @@ register_template(comprehensive_template, override=True)
 ### Template Registry Management
 
 ```python
-from agents.agents.agents.templates import TEMPLATES
+from agentfly.agents.templates import TEMPLATES
 
 # List all registered templates
 print("Available templates:", list(TEMPLATES.keys()))
@@ -348,9 +348,9 @@ register_template(modified)
 Here's a complete example of creating a custom template for a specific use case:
 
 ```python
-from agents.agents.agents.templates import Template, register_template
-from agents.agents.agents.templates.tool_policy import ToolPolicy, JsonCompactFormatter
-from agents.agents.agents.templates.constants import ToolPlacement
+from agentfly.agents.templates import Template, register_template
+from agentfly.agents.templates.tool_policy import ToolPolicy, JsonCompactFormatter
+from agentfly.agents.templates.constants import ToolPlacement
 
 # Create a coding assistant template
 coding_template = Template(
@@ -394,7 +394,7 @@ Available Tools:
 register_template(coding_template)
 
 # Test the template
-from agents.agents.agents.templates import Chat
+from agentfly.agents.templates import Chat
 
 chat = Chat(template="coding-assistant", messages=[
     {"role": "user", "content": "Write a Python function to calculate fibonacci numbers"}
diff --git a/docs/chat_template/examples.md b/docs/features/chat_template/examples.md
similarity index 98%
rename from docs/chat_template/examples.md
rename to docs/features/chat_template/examples.md
index 3186372..dcce310 100644
--- a/docs/chat_template/examples.md
+++ b/docs/features/chat_template/examples.md
@@ -9,7 +9,7 @@ This section provides comprehensive examples of how to use the Chat Template Sys
 ### Example 1: Simple Chat Template
 
 ```python
-from agents.agents.agents.templates import Chat, get_template
+from agentfly.agents.templates import Chat, get_template
 
 # Get a pre-built template
 template = get_template("qwen2.5")
@@ -159,9 +159,9 @@ print(f"First 20 action mask: {inputs['action_mask'][0][:20]}")
 ### Example 4: Custom Template Creation
 
 ```python
-from agents.agents.agents.templates import Template, register_template
-from agents.agents.agents.templates.tool_policy import ToolPolicy, JsonIndentedFormatter
-from agents.agents.agents.templates.constants import ToolPlacement
+from agentfly.agents.templates import Template, register_template
+from agentfly.agents.templates.tool_policy import ToolPolicy, JsonIndentedFormatter
+from agentfly.agents.templates.constants import ToolPlacement
 
 # Create a custom coding assistant template
 coding_template = Template(
diff --git a/docs/chat_template/index.md b/docs/features/chat_template/index.rst
similarity index 68%
rename from docs/chat_template/index.md
rename to docs/features/chat_template/index.rst
index 175c0c5..da7aba2 100644
--- a/docs/chat_template/index.md
+++ b/docs/features/chat_template/index.rst
@@ -1,17 +1,34 @@
-# Chat Template System Documentation
+Chat Template System Documentation
+=================================
+
+.. toctree::
+    :maxdepth: 2
+    :hidden:
+
+    architecture
+    basic_usage
+    custom_templates
+    advanced_features
+    vision_templates
+    examples
 
 Welcome to the comprehensive documentation for the Chat Template System - a powerful and flexible framework for creating conversation templates inspired by building block toys.
 
-## 📚 Documentation Structure
+Documentation Structure
+------------------------
+
+:doc:`Architecture & Design <architecture>`
+^^^^^^^^^^^^^^^^^^^^
 
-### 🏗️ [Architecture & Design](./architecture.md)
 - **System Philosophy**: Building block approach to template design
 - **Architecture Overview**: High-level system design and flow
 - **Design Patterns**: Factory, Strategy, and Observer patterns
 - **Extensibility Points**: How to extend the system
 - **Key Design Decisions**: Rationale behind architectural choices
 
-### 🚀 [Basic Usage](./basic_usage.md)
+:doc:`Basic Usage <basic_usage>`
+^^^^^^^^^^^^^^^^^^^
+
 - **Getting Started**: Quick start guide and imports
 - **Pre-built Templates**: Available templates and their usage
 - **Chat Operations**: Creating chats, generating prompts, tokenization
@@ -19,7 +36,9 @@ Welcome to the comprehensive documentation for the Chat Template System - a powe
 - **Message Formats**: Standard and multi-modal message structures
 - **Error Handling**: Common issues and validation
 
-### 🛠️ [Custom Templates](./custom_templates.md)
+:doc:`Custom Templates <custom_templates>`
+^^^^^^^^^^^^^^^^^^^
+
 - **Template Components**: Core and advanced template fields
 - **Template Creation**: Step-by-step template building
 - **Policy Configuration**: System, tool, and global policies
@@ -27,7 +46,9 @@ Welcome to the comprehensive documentation for the Chat Template System - a powe
 - **Advanced Features**: Jinja templates, inheritance, copying
 - **Best Practices**: Template design and testing guidelines
 
-### 🔧 [Advanced Features](./advanced_features.md)
+:doc:`Advanced Features <advanced_features>`
+^^^^^^^^^^^^^^^^
+
 - **Tool Policy System**: Placement strategies and formatters
 - **System Policy System**: Message control and content processors
 - **Global Policy Configuration**: Template-wide settings
@@ -35,7 +56,9 @@ Welcome to the comprehensive documentation for the Chat Template System - a powe
 - **Advanced Tool Integration**: Custom placement and validation
 - **Performance Optimization**: Caching and lazy evaluation
 
-### 👁️ [Vision Templates](./vision_templates.md)
+:doc:`Vision Templates <vision_templates>`
+^^^^^^^^^^^^^^^
+
 - **Vision Architecture**: Pipeline overview and key components
 - **Creating Vision Templates**: Basic and advanced vision templates
 - **Vision Processor Configuration**: Automatic registration and model inference
@@ -44,59 +67,71 @@ Welcome to the comprehensive documentation for the Chat Template System - a powe
 - **Token Calculation**: Image and video token computation
 - **Advanced Features**: Custom processors and configuration options
 
-### 💡 [Examples & Use Cases](./examples.md)
+:doc:`Examples & Use Cases <examples>`
+^^^^^^^^^^^^^^^^^^^^
+
 - **Basic Examples**: Simple chat, tools, tokenization
 - **Advanced Examples**: Custom templates, vision usage, dynamic generation
 - **Real-World Use Cases**: Customer support, education, content analysis
 - **Testing & Validation**: Template comparison and validation
 - **Complete Examples**: End-to-end implementation examples
 
-## 🎯 Quick Start
+Quick Start
+-----------
 
-```python
-from agents.agents.agents.templates import Chat, get_template
+.. code-block:: python
 
-# Get a pre-built template
-template = get_template("qwen2.5")
+    from agentfly.agents.templates import Chat, get_template
 
-# Create a chat instance
-chat = Chat(template="qwen2.5", messages=[
-    {"role": "user", "content": "Hello, how are you?"}
-])
+    # Get a pre-built template
+    template = get_template("qwen2.5")
 
-# Generate a prompt
-prompt = chat.prompt()
-print(prompt)
-```
+    # Create a chat instance
+    chat = Chat(template="qwen2.5", messages=[
+        {"role": "user", "content": "Hello, how are you?"}
+    ])
 
-## 🔑 Key Concepts
+    # Generate a prompt
+    prompt = chat.prompt()
+    print(prompt)
+
+Key Concepts
+------------
+
+Template Components
+^^^^^^^^^^^^^^^^^^^
 
-### Template Components
 - **System Template**: Defines system message format
 - **User Template**: How user messages are formatted
 - **Assistant Template**: How assistant responses are formatted
 - **Tool Template**: How tool responses are formatted
 
-### Policies
+Policies
+^^^^^^^^
+
 - **System Policy**: Controls system message behavior
 - **Tool Policy**: Manages tool integration strategy
 - **Global Policy**: Template-wide behavior settings
 
-### Vision Support
+Vision Support
+^^^^^^^^^^^^^^
+
 - **Image Processing**: Automatic image token expansion
 - **Video Processing**: Video frame extraction and processing
 - **Multi-Modal Alignment**: Proper tensor alignment for training
 
-## 🚀 Getting Started
+Getting Started
+---------------
 
-1. **Read the [Architecture](./architecture.md)** to understand the system design
-2. **Follow [Basic Usage](./basic_usage.md)** for quick setup
-3. **Explore [Examples](./examples.md)** to see practical implementations
-4. **Create [Custom Templates](./custom_templates.md)** for your specific needs
-5. **Leverage [Advanced Features](./advanced_features.md)** for complex use cases
-6. **Add [Vision Support](./vision_templates.md)** for multi-modal capabilities
+1. **Read the** :doc:`Architecture <architecture>` to understand the system design
+2. **Follow** :doc:`Basic Usage <basic_usage>` for quick setup
+3. **Explore** :doc:`Examples <examples>` to see practical implementations
+4. **Create** :doc:`Custom Templates <custom_templates>` for your specific needs
+5. **Leverage** :doc:`Advanced Features <advanced_features>` for complex use cases
+6. **Add** :doc:`Vision Support <vision_templates>` for multi-modal capabilities
 
-## 🎨 Design Philosophy
+Design Philosophy
+-----------------
 
 The Chat Template System is inspired by **building block toys** - where complex structures are created by combining simple, standardized components. This philosophy manifests in:
 
@@ -105,18 +140,21 @@ The Chat Template System is inspired by **building block toys** - where complex
 - **Strategy Pattern**: Different behaviors can be selected at runtime
 - **Policy-Based Configuration**: Flexible behavior control without hardcoding
 
-## 🔧 System Architecture
+System Architecture
+-------------------
 
-```
-Messages + Tools → Template Processing → Vision Processing → LLM-Ready Inputs
-```
+::
+
+    Messages + Tools → Template Processing → Vision Processing → LLM-Ready Inputs
 
 The system follows a **three-step rendering process**:
+
 1. **Tool Insertion**: Decide where and how to inject tool definitions
 2. **Turn Encoding**: Convert each conversation turn to its textual representation
 3. **Generation Prompt**: Optionally append generation prefixes
 
-## 🌟 Key Features
+Key Features
+------------
 
 - **Modular Design**: Templates built from configurable components
 - **Multi-Modal Support**: Built-in vision and video processing
@@ -125,16 +163,19 @@ The system follows a **three-step rendering process**:
 - **Jinja Template Generation**: Automatic HuggingFace-compatible templates
 - **Extensible Architecture**: Easy to add new template types and processors
 
-## 📖 Additional Resources
+Additional Resources
+-----------------------
 
-- **Source Code**: `agents/agents/agents/templates/`
+- **Source Code**: ``agents/agents/agents/templates/``
 - **API Reference**: Check the source code for detailed method documentation
 - **Issues & Discussions**: Use the project's issue tracker for questions
 
-## 🤝 Contributing
+Contributing
+------------
 
-The template system is designed to be extensible. See [Custom Templates](./custom_templates.md) for guidance on adding new template types and processors.
+The template system is designed to be extensible. See :doc:`Custom Templates <custom_templates>` for guidance on adding new template types and processors.
 
 ---
 
 *This documentation covers the complete Chat Template System. Start with the architecture to understand the design, then follow the usage guides to implement your own templates.*
+
diff --git a/docs/chat_template/vision_templates.md b/docs/features/chat_template/vision_templates.md
similarity index 95%
rename from docs/chat_template/vision_templates.md
rename to docs/features/chat_template/vision_templates.md
index d75e749..1635826 100644
--- a/docs/chat_template/vision_templates.md
+++ b/docs/features/chat_template/vision_templates.md
@@ -30,7 +30,7 @@ Messages → Template Processing → Vision Processor → LLM-Ready Inputs
 ### Basic Vision Template
 
 ```python
-from agents.agents.agents.templates import Template
+from agentfly.agents.templates import Template
 
 vision_template = Template(
     name="vision-enabled",
@@ -142,7 +142,7 @@ def _infer_model_type(self) -> str:
 The default processor used by most vision models:
 
 ```python
-from agents.agents.agents.templates.vision_processor import PatchBasedProcessor
+from agentfly.agents.templates.vision_processor import PatchBasedProcessor
 
 # Automatically used for most models
 # Supports multiple image input formats
@@ -154,7 +154,7 @@ from agents.agents.agents.templates.vision_processor import PatchBasedProcessor
 Specialized processor for Qwen-VL models:
 
 ```python
-from agents.agents.agents.templates.vision_processor import QwenVLProcessor
+from agentfly.agents.templates.vision_processor import QwenVLProcessor
 
 # Qwen-VL specific image preprocessing
 # Custom token calculation using grid-based approach
@@ -166,7 +166,7 @@ from agents.agents.agents.templates.vision_processor import QwenVLProcessor
 Specialized processor for LLaVA models:
 
 ```python
-from agents.agents.agents.templates.vision_processor import LlavaProcessor
+from agentfly.agents.templates.vision_processor import LlavaProcessor
 
 # LLaVA specific token calculation
 # Optimized for LLaVA architecture
@@ -271,7 +271,7 @@ message_with_url = {
 ### Basic Vision Chat
 
 ```python
-from agents.agents.agents.templates import Chat
+from agentfly.agents.templates import Chat
 
 # Create chat with vision template
 chat = Chat(template="qwen2.5-vl", messages=[
@@ -453,7 +453,7 @@ def calculate_video_tokens(self, video_data, processor):
 ### Custom Vision Processors
 
 ```python
-from agents.agents.agents.templates.vision_processor import VisionProcessor, VisionProcessorConfig
+from agentfly.agents.templates.vision_processor import VisionProcessor, VisionProcessorConfig
 
 class CustomVisionProcessor(VisionProcessor):
     """Custom vision processor for specific needs"""
@@ -495,14 +495,14 @@ config = VisionProcessorConfig(
     vision_end="<custom_vision_end>"
 )
 
-from agents.agents.agents.templates.vision_processor import register_processor
+from agentfly.agents.templates.vision_processor import register_processor
 register_processor("custom-template", config, CustomVisionProcessor)
 ```
 
 ### Vision Configuration Options
 
 ```python
-from agents.agents.agents.templates.vision_processor import VisionProcessorConfig
+from agentfly.agents.templates.vision_processor import VisionProcessorConfig
 
 config = VisionProcessorConfig(
     model_type="qwen_vl",
@@ -558,9 +558,9 @@ config = VisionProcessorConfig(
 Here's a complete example of creating and using a vision template:
 
 ```python
-from agents.agents.agents.templates import Template, register_template, Chat
-from agents.agents.agents.templates.tool_policy import ToolPolicy, JsonFormatter
-from agents.agents.agents.templates.constants import ToolPlacement
+from agentfly.agents.templates import Template, register_template, Chat
+from agentfly.agents.templates.tool_policy import ToolPolicy, JsonFormatter
+from agentfly.agents.templates.constants import ToolPlacement
 
 # Create a comprehensive vision template
 vision_template = Template(
diff --git a/docs/features/index.rst b/docs/features/index.rst
new file mode 100644
index 0000000..efd8492
--- /dev/null
+++ b/docs/features/index.rst
@@ -0,0 +1,11 @@
+Features & Concepts
+==================
+
+.. toctree::
+   :maxdepth: 2
+   :hidden:
+
+   agent_rollout
+   tool_system
+   reward_system
+   environments
\ No newline at end of file
diff --git a/docs/index.rst b/docs/index.rst
index 1f228f8..4158c90 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -1,48 +1,185 @@
-AgentFly
+🪽AgentFly
 =====================
-AgentFly is a scalable and extensible Agent-RL framework designed to empower LM agents with a variety of RL algorithms. The framework supports multi-turn interactions by adapting traditional RL methods with token-level masking. It features a decorator-based interface for defining tools and reward functions, enabling seamless extension and ease of use. To support high-throughput training, we implement asynchronous execution of tool calls and reward computations, and design a centralized resource management system for scalable environment coordination. We also provide a suite of prebuilt tools and environments, demonstrating the framework's effectiveness through successful agent training across multiple tasks.
 
----------------------
-.. _Contents:
+.. grid:: 12
+    :gutter: 0
+
+    .. grid-item::
+        :columns: 10
+        :class: sd-fs-4
+
+        Training scalable LLM agents with RL (multi-turn, asynchronous tools/rewards, multimodal)
+
+        .. raw:: html
+
+            <img src="https://img.shields.io/github/stars/Agent-One-Lab/AgentFly?style=for-the-badge&logo=github&color=a2d2ff" alt="GitHub" width="20%" style="display:block;" />
+
+    .. grid-item::
+        :columns: 2
+
+        .. raw:: html
+
+            <div class="shown_logo" role="img" aria-label="AgentFly wing logo">
+            <span class="glyph">🪽</span>
+            </div>
+
+            <style>
+            :root{
+                --size: 140px;
+                --gold: #D4AF37;          /* primary gold */
+            }
+
+            .shown_logo{
+                width: var(--size);
+                aspect-ratio: 1;
+                border-radius: 50%;
+                display: grid;
+                place-items: center;
+                border: 0.1px solid var(--gold);
+
+                /* soft, matte depth */
+                box-shadow:
+                0 3px 14px rgba(212,175,55,0.35);              /* outer golden glow */
+                position: relative;
+                background:
+                radial-gradient(circle at 30% 25%, rgba(255,255,255,.12), transparent 40%),
+                radial-gradient(circle at 70% 75%, rgba(255,255,255,.06), transparent 50%);
+            }
+
+            /* inner rings & subtle bevel */
+            .shown_logo::after{
+                content: "";
+                position: absolute; inset: 0;
+                border-radius: 50%;
+                box-shadow:
+                inset 0 0 0 1.5px rgba(255,255,255,0.18),        /* crisp inner ring */
+                inset 0 12px 24px rgba(255,255,255,0.06),      /* top highlight */
+                inset 0 -12px 28px rgba(0,0,0,0.28);           /* inner shadow */
+                pointer-events: none;
+            }
+
+            .glyph{
+                font-size: 100px;   /* try 108–116px if you want fuller */
+                line-height: 1;
+                /* tiny optical nudge to counter emoji side bearing */
+                transform: translateX(1px) translateY(1px);
+                filter: drop-shadow(0 2px 2px rgba(0,0,0,.25));
+                font-family: "Apple Color Emoji","Segoe UI Emoji","Noto Color Emoji",system-ui,sans-serif;
+            }
+
+            /* small hover delight (optional) */
+            .shown_logo { transition: transform .24s ease; }
+            .shown_logo:hover { transform: translateY(-1px) scale(1.02); }
+            </style>
+
+
+.. grid:: 3
+    :gutter: 2
+
+    .. grid-item-card::
+        :link: https://arxiv.org/abs/2507.14897
+        :class-header: bg-light
+
+        AgentFly Paper 📜
+        ^^^
+
+        Explore the full paper, including the design inspiration, technical deatils, and training curves.
+
+    .. grid-item-card::
+        :link: https://github.com/Agent-One-Lab/AgentFly
+        :class-header: bg-light
+
+        GitHub Repo 💻
+        ^^^
+
+        Code repository in GitHub. 
+
+    .. grid-item-card::
+        :link: https://wandb.ai/AgentRL/Open
+        :class-header: bg-light
+
+        Weights & Biases 📈
+        ^^^
+
+        The training curves, parameters, rewards, and trajectories.
+
+.. grid:: 3
+    :gutter: 2
+
+    .. grid-item-card::
+        :link: https://huggingface.co/collections/Agent-One/agentfly-6882061c6cf08537cb66c12b
+        :class-header: bg-light
+
+        Models 🤗
+        ^^^
+
+        Check out the models on Hugging Face. Agent for code interpreter, retrieval, ScienceWorld, WebShop, etc.
+
+    .. grid-item-card::
+        :link: start/first_agent
+        :class-header: bg-light
+
+        Tutorials 📚
+        ^^^
+
+        Check out the tutorials on how to build agents, tools, rewards, and start training. 
+
+
+
+Welcome to join our community!
+--------------------------------
+
+.. grid:: 3
+    :gutter: 2
+
+    .. grid-item-card::
+        :img-top: ../assets/images/wechat.jpg
+        :class-card: sd-text-center
+
+        Scan to join WeChat group.
+
+
+    .. grid-item-card::
+        :img-top: ../assets/images/discord.png
+        :class-card: sd-text-center
+
+        `Join our Discord <https://discord.gg/ekrKVg8Y>`_
+
 
 .. toctree::
     :maxdepth: 2
+    :hidden:
     :caption: Quick Start
 
     start/installation
-    start/training_example
-    start/agent_examples
+    start/first_agent
+    start/first_tool_reward
+    start/first_training
 
 .. toctree::
     :maxdepth: 2
-    :caption: Features
+    :hidden:
+    :caption: Features & Concepts
 
     features/agent_rollout
     features/tool_system
     features/reward_system
-    chat_template/index
-
-.. toctree::
-   :maxdepth: 2
-   :caption: Environments
-
-   environments/alfworld/index
-   environments/code/index
-   environments/retrieval/index
+    features/environments
+    features/chat_template/index
 
 .. toctree::
     :maxdepth: 2
-    :caption: Rewards
+    :hidden:
+    :caption: API Reference
 
-    rewards/index
+    api_references/agents/index
+    api_references/tools/index
+    api_references/rewards/index
+    api_references/environments/index
 
 .. toctree::
     :maxdepth: 2
-    :caption: Classes
-
-    classes/agent
-    classes/tool
-    classes/reward
-    classes/environment
+    :hidden:
+    :caption: Examples
 
----------------------
\ No newline at end of file
+    examples/predefined_training_examples
diff --git a/docs/requirements-docs.txt b/docs/requirements-docs.txt
index 27e1812..98e879c 100644
--- a/docs/requirements-docs.txt
+++ b/docs/requirements-docs.txt
@@ -5,3 +5,8 @@ sphinx-markdown-tables
 # Classic Read-the-Docs look
 sphinx-rtd-theme
 # Sphinx itself is pulled transitively
+sphinx-book-theme
+
+# Grid
+sphinx-design
+
diff --git a/docs/rewards/llm_judge_reward.rst b/docs/rewards/llm_judge_reward.rst
deleted file mode 100644
index ed33e3f..0000000
--- a/docs/rewards/llm_judge_reward.rst
+++ /dev/null
@@ -1,90 +0,0 @@
-.. _llm_judge_reward:
-
-LLM-as-Judge Reward
-===================
-
-.. currentmodule:: agents.agents.rewards.llm_as_judge.llm_as_judge_reward
-
-.. autofunction:: llm_as_judge_reward
-
-Function Signature
-------------------
-
-.. code-block:: python
-
-    def llm_as_judge_reward(prediction: str, golden_answer: str, llm_backend) -> Dict[str, float]
-
-Description
------------
-
-Uses a language model as an expert judge to evaluate the quality and correctness of agent responses against expected answers.
-
-**Parameters:**
-    - **prediction** (str): The agent's prediction or response to evaluate
-    - **golden_answer** (str): The ground truth or expected answer
-    - **llm_backend**: LLM backend instance configured for evaluation
-
-**Returns:**
-    Dict[str, float]: Dictionary containing:
-        - **reward** (float): Evaluated score from 0.0 to 1.0
-        - **raw_score** (float): Original score before clamping
-
-**Decorator Configuration:**
-    - **name**: "llm_as_judge_reward"
-    - **llm_config**: 
-        - **backend_type**: "transformers"
-        - **model_name**: "Qwen/Qwen2.5-0.5B" (configurable via LLM_JUDGE_MODEL env var)
-        - **max_tokens**: 10
-        - **temperature**: 0.1
-
-Technical Details
------------------
-
-**Evaluation Prompt:**
-    The LLM judge uses a structured prompt that includes:
-    - Role definition as expert evaluator
-    - Evaluation criteria (correctness, completeness, alignment)
-    - Expected answer and given answer
-    - Scoring scale (0.0 to 1.0)
-
-**Scoring Scale:**
-    - **1.0**: Perfect match or equivalent answer
-    - **0.7-0.9**: Mostly correct with minor issues
-    - **0.4-0.6**: Partially correct
-    - **0.0-0.3**: Incorrect or irrelevant
-
-**Score Processing:**
-    - Extracts numerical score from LLM response
-    - Clamps values to [0.0, 1.0] range
-    - Defaults to 0.0 if parsing fails
-
-**Example Usage:**
-
-.. code-block:: python
-
-    from agents.agents.rewards import get_reward_from_name
-    
-    # Get LLM judge reward function
-    reward_fn = get_reward_from_name("llm_as_judge_reward")
-    
-    # Evaluate a response
-    result = reward_fn(
-        prediction="Paris is the capital of France and a major cultural center",
-        golden_answer="Paris is the capital of France"
-    )
-    print(result)
-    # {"reward": 0.95, "raw_score": 0.95}
-
-**Configuration:**
-
-Set custom judge model via environment variable:
-
-.. code-block:: bash
-
-    export LLM_JUDGE_MODEL="meta-llama/Llama-2-7b-chat-hf"
-
-**Use Cases:**
-    - Open-ended question evaluation
-    - Creative writing assessment
-    - Complex reasoning task evaluation
-    - Multi-dimensional answer quality assessment 
\ No newline at end of file
diff --git a/docs/start/agent_examples.md b/docs/start/agent_examples.md
deleted file mode 100644
index 2f5f3b0..0000000
--- a/docs/start/agent_examples.md
+++ /dev/null
@@ -1,215 +0,0 @@
-## Build an Agent
-
-### Use a Predefined Agent
-We can specify the following arguments to use a predefined agent:
-
-- model_name: the path or name or the model, used to load weights
-- tools: tools that will be used by the agent
-- template: chat template
-- backend: what type of backend
-
-The following shows an example to use Qwen2.5-7B-Instruct as a react agent:
-
-```python
-from agents.agents.react.react_agent import ReactAgent
-from agents.tools.src.code.tools import code_interpreter
-from agents.tools.src.search.google_search import google_search_serper
-from agents.tools.src.react.tools import answer
-
-tools = [google_search_serper, answer]
-
-task_info = "Use code to get answers. Result must be printed."
-
-react_agent = ReactAgent(
-    "Qwen/Qwen2.5-7B-Instruct",
-    tools=tools,
-    template="qwen2.5-no-tool",
-    task_info=task_info,
-    backend="async_vllm"
-)
-
-question = "Solve the equation 2x + 5y = 4 such that sum of x and y is 7."
-messages = [
-    {
-        "messages": [
-            {"role": "user", "content": f"{question}"}
-        ],
-        "question": f"{question}",
-    },
-]
-
-await react_agent.run_async(
-    max_steps=4,
-    start_messages=messages,
-    num_chains=5 # for the question, the agent will generate 5 trajectories
-)
-
-```
-
-After the rollout, we can obtain the trajectories:
-
-```python
-react_agent.trajectories
-```
-
-Obtaining the rewards (if you specified reward function and give necessary parameters in input messages)
-```
-react_agent.rewards)
-```
-
-### Customize Agent
-
-You can customize your own agent by defining how the agent do generation and handle tool calls.
-
-```python
-class CustomizedAgent(BaseAgent):
-    def __init__(self,
-        **kwargs
-    )
-        super().__init__(**kwargs)
-
-    async def generate_async(self, messages_list: List[List[Dict]], **args):
-        return await self.llm_engine.generate_async(messages_list, **args)
-
-    def parse(self, responses: List(str), tools):
-        # parse responses into tool calls
-        ...
-```
-
-### Use Trained Agent
-We provide the following agent that we can try:
-
-- WebShop Agent:
-```python
-import asyncio
-from agents.agents import ReactAgent
-from agents.tools import webshop_browser
-from agents.rewards import webshop_reward
-from agents.agents.chain.streaming_observer import ConsoleStreamObserver
-
-tools = [webshop_browser]
-
-agent = ReactAgent(
-    "Agent-One/Qwen2.5-3B-Instruct-WebShop",
-    tools=tools,
-    template="qwen2.5",
-    backend="async_vllm",
-    streaming="console"
-)
-
-question = "I am looking for a gluten free, 100% vegan plant based protein shake that is soy-free, and price lower than 40.00 dollars"
-
-messages = [
-    {
-        "messages": [
-            {"role": "user", "content": f"{question}"}
-        ],
-        "question": f"{question}"
-    },
-]
-
-await agent.run_async(
-    max_steps=10,
-    start_messages=messages,
-    num_chains=1,
-    enable_streaming=True
-)
-```
-- Science World Agent
-```python
-from agents.tools import scienceworld_explorer
-from agents.rewards import scienceworld_reward
-
-
-tools = [scienceworld_explorer]
-
-agent = ReactAgent(
-    "Agent-One/Qwen2.5-7B-Instruct-ScienceWorld-REINFORCEPP",
-    tools=tools,
-    template="qwen2.5-no-system-tool",
-    backend="async_vllm",
-    reward_fn=scienceworld_reward,
-    streaming="console"
-)
-
-# question = "Your task is to boil ice cream. For compounds without a boiling point, combusting the substance is also acceptable. First, focus on the substance. Then, take actions that will cause it to change its state of matter."
-# task_name = "boil"
-# variation_idx = 14
-
-
-# question = "Your task is to find a(n) animal. First, focus on the thing. Then, move it to the green box in the kitchen."
-# task_name = "find-animal"
-# variation_idx = 150
-
-# question = "Your task is to find a(n) plant. First, focus on the thing. Then, move it to the purple box in the bathroom."
-# task_name = "find-plant"
-# variation_idx = 191
-
-
-# question = "Your task is to grow a orange. This will require growing several plants, and them being crosspollinated to produce fruit.  Seeds can be found in the kitchen. To complete the task, focus on the grown orange."
-# task_name = "grow-fruit"
-# variation_idx = 92
-
-# question = "Your task is to determine which of the two inclined planes (bronze, copper) has the most friction. After completing your experiment, focus on the inclined plane with the most friction."
-# task_name = "inclined-plane-friction-named-surfaces"
-# variation_idx = 776
-
-question = "Your task is to measure the melting point of orange juice, which is located around the kitchen. First, focus on the thermometer. Next, focus on the orange juice. If the melting point of orange juice is above 200.0 degrees celsius, focus on the yellow box. If the melting point of orange juice is below 200.0 degrees celsius, focus on the purple box. The boxes are located around the kitchen."
-task_name = "measure-melting-point-known-substance"
-variation_idx = 247
-
-messages = [
-    {
-        "messages": [
-            {"role": "user", "content": f"{question}"}
-        ],
-        "question": f"{question}",
-        "task_name": task_name,
-        "variation_idx": variation_idx
-    },
-]
-
-await agent.run_async(
-    max_steps=20,
-    start_messages=messages,
-    num_chains=1,
-    enable_streaming=True
-)
-
-print(agent.rewards)
-```
-
-- Retrieval Agent
-
-```python
-from agents.tools import dense_retrieve, asyncdense_retrieve
-
-tools = [dense_retrieve]
-
-agent = ReactAgent(
-    "Agent-One/Qwen2.5-3B-Instruct-Retrieval-GRPO",
-    tools=tools,
-    template="qwen2.5-no-system-tool",
-    backend="async_vllm",
-    streaming="console"
-)
-
-question = "Who is Geoffrey Hinton"
-
-
-messages = [
-    {
-        "messages": [
-            {"role": "user", "content": f"{question}"}
-        ],
-        "question": f"{question}",
-    },
-]
-
-await agent.run_async(
-    max_steps=6,
-    start_messages=messages,
-    num_chains=1,
-    enable_streaming=True
-)
-```
\ No newline at end of file
diff --git a/docs/start/first_agent.md b/docs/start/first_agent.md
new file mode 100644
index 0000000..9b17ac9
--- /dev/null
+++ b/docs/start/first_agent.md
@@ -0,0 +1,74 @@
+# Build an Agent
+
+A simplest agent can be build by initializing the agent instance with tools. The following shows a small example to build an agent using Qwen2.5.
+
+```python
+from agentfly.agents import HFAgent
+from agentfly.tools import calculator
+agent = HFAgent(
+    model_name_or_path="Qwen/Qwen2.5-3B-Instruct",
+    tools=[calculator],
+    template="qwen2.5",
+    backend="async_vllm",
+)
+```
+
+Then, we can use the agent to do the task for us (or, say "rollout" in reinforcement learning scenario). Our framework is designed to be asynchronous, so we need to use `await`.
+
+```python
+messages = [{"role": "user", "content": "What is the result of 1 + 1?"}]
+await agent.run(
+    messages=messages,
+    max_turns=3,
+    num_chains=1
+)
+```
+Here, `max_turns` specifies the maximal number of rounds that the agent can iteract with the environment. `num_chains` specifies how many chains/trajectories the agent will run for a single query. After the running, we can obtain the results by getting its trajectories.
+
+```python
+trajectories = agent.trajectories
+print(trajectories)
+```
+
+It is in ShareGPT/OpenAI's input messages, and will look like something to this:
+```
+{
+    'messages': [
+        {
+            'role': 'user', 
+            'content': [{'type': 'text', 'text': 'What is the result of 1 + 1?'}]
+        }, 
+        {
+            'role': 'assistant',
+            'content': [
+                {'type': 'text', 'text': '<tool_call>\n{"name": "calculator", "arguments": {"expression": "1 + 1"}}\n</tool_call>'}
+            ], 
+            'tool_calls': [
+                    {
+                        'id': None, 'type': 'function',
+                        'function': {
+                            'name': 'calculator',
+                            'arguments': {'expression': '1 + 1'}
+                        }
+                    }
+            ],
+        },
+        {
+            'role': 'tool',
+            'tool_call_id': None,
+            'tool_name': 'calculator',
+            'content': [
+                {'type': 'text', 'text': '2'}
+            ]
+        }, 
+        {
+            'role': 'assistant', 
+            'content': [
+                {'type': 'text', 'text': 'The result of 1 + 1 is 2.'}
+            ],
+            'tool_calls': [],
+        }
+    ]
+}
+```
+Now we have this built and run the agent. However, to run agent reinforcement learning, we still need several steps: define and get the tool to use, define reward functions, and finally, run the training.
\ No newline at end of file
diff --git a/docs/start/first_tool_reward.md b/docs/start/first_tool_reward.md
new file mode 100644
index 0000000..f495fd7
--- /dev/null
+++ b/docs/start/first_tool_reward.md
@@ -0,0 +1,98 @@
+# Define Tools & Reward Functions
+
+We have shown how to build an agent, to further customize the training, we need to define tools and reward functions.
+
+**Tool Definition**
+
+Define a tools is simple and easy in AgentFly framework. You simple write a function, and then decorate it with `@tool`. The following example shows the calculator tool we used previously.
+
+```python
+from agentfly.tools import tool
+from sympy import simplify, sympify, Rational
+
+@tool(name="calculator", description="Calculate the result of a mathematical expression.")
+def calculator(expression: str):
+    try:
+        expr = sympify(expression)
+        result = simplify(expr)
+
+        # Check if the result is a number
+        if result.is_number:
+            # If the result is a rational number, return as a fraction
+            if isinstance(result, Rational):
+                return str(result)
+            # If the result is a floating point number, format to remove redundant zeros
+            else:
+                return "{:g}".format(float(result))
+        else:
+            return str(result)
+    except Exception as e:
+        return f"Error: {str(e)}"
+```
+
+Now we have the tool, we can then define the reward function, which also simply use a `@reward` decorator. The following example shows a reward by extracting the last number of a text and compare it with the golden asnwer. The return of the reward function is a float number representing the reward, or a dictionary containing "reward" as a key.
+
+```python
+from agentfly.rewards import reward
+
+@reward(name="math_reward_string_equal")
+def math_reward_string_equal(prediction: str, answer: str, trajectory: List[Dict]) -> float:
+
+    def extract_last_number(s: str):
+        matches = re.findall(r'\d+', s)  # find all sequences of digits
+        return matches[-1] if matches else None
+
+    tool_count = 0
+    for msg in trajectory:
+        if msg["role"] == "tool":
+            tool_count += 1
+    
+    if tool_count < 1:
+        return 0.0
+    else:
+        prediction = extract_last_number(prediction)
+        
+        if prediction == answer:
+            return 1.0
+        else:
+            return 0.1
+```
+Note that in this reward function, we use the trajectory to count how many tools the agent has called. If the agent called at least one, we give it the basic format reward (0.1), then if it further gets the answer correct, it gets the full reward (1.0).
+Now we can use the agent with the reward function we just defined.
+
+```python
+from agentfly.agents import HFAgent
+from agentfly.tools import calculate
+agent = HFAgent(
+    model_name_or_path="Qwen/Qwen2.5-3B-Instruct",
+    tools=[calculator],
+    template="qwen2.5",
+    reward_fn=math_reward_string_equal,
+    backend="async_vllm",
+)
+```
+
+Then we can run the agent and get rewards:
+
+```python
+messages = {
+    "messages": [
+        {"role": "user", "content": "Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May?"}
+    ],
+    "answer": "72"
+}
+await agent.run(
+    messages=messages,
+    max_turns=3,
+    num_chains=1
+)
+```
+
+Now we can get the trajectories and rewards with following code:
+```python
+trajectories = agent.trajectories
+rewards = agent.rewards
+print(trajectories)
+print(rewards)
+```
+
diff --git a/docs/start/first_training.md b/docs/start/first_training.md
new file mode 100644
index 0000000..245976b
--- /dev/null
+++ b/docs/start/first_training.md
@@ -0,0 +1,153 @@
+Training Example
+==============
+
+Finally, we are ready to train the agent.
+
+**1. Prepare Training Data**
+
+----------------
+
+We shw an example of training on GSM8K dataset. First, prepare your training and validation datasets in JSON format. The datasets should follow this structure:
+
+```
+
+[
+    {
+        "question": "Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May?",
+        "answer": "72"
+    },
+    {
+        "question": "Weng earns $12 an hour for babysitting. Yesterday, she just did 50 minutes of babysitting. How much did she earn?",
+        "answer": "10"
+    },
+    ...
+]
+```
+"question" filed is used to put task queries, and will be used to form input messages. While other fileds, in our case, "answer" will be given to the reward function.
+
+**2. Create Training Script**
+
+------------------------
+Create a training script (e.g., ``train_example.sh``) with the following configuration:
+
+```bash
+
+export WANDB_API_KEY="your_wandb_key"  # For logging to Weights & Biases
+export VLLM_USE_V1=1
+# Run in single node
+
+set -x
+
+export head_node=${nodes[0]}
+
+head_node_ip=$(hostname --ip-address)
+port=6379
+address_head=$head_node_ip:$port
+
+export HYDRA_FULL_ERROR=1
+# Remove existing Ray cluster
+ray stop
+rm -rf /tmp/ray/ray_current_cluster
+
+# Start Ray head node
+ray start --head --node-ip-address="$head_node_ip" --port=$port  --num-cpus 192 --num-gpus 1
+
+model=Qwen/Qwen2.5-3B-Instruct
+template=qwen2.5
+lr=5e-7
+length=256
+batch_size=32
+num_chains=8
+kl_coef=0.001
+train_dataset="./data/rlhf/math/gsm8k_train.json"
+val_dataset="./data/rlhf/math/gsm8k_test.json"
+# adv_estimator=rloo
+# adv_estimator=reinforce_plus_plus
+# adv_estimator=remax
+adv_estimator=grpo
+# adv_estimator=gae
+
+mini_batch_size=$batch_size
+
+agent_type=hf
+tools="[calculator]"
+reward_name="math_reward_string_equal"
+entropy_coeff=0.001
+kl_loss_type=mse
+max_turns=3
+agent_backend="async_verl"
+project_name="AgentRL"
+total_training_steps=200
+
+experiment_name="test_gsm8k"
+
+python3 -m verl.trainer.main_ppo \
+    algorithm.adv_estimator=$adv_estimator \
+    data.train_files=$train_dataset \
+    data.val_files=$val_dataset \
+    data.train_batch_size=$batch_size \
+    agent.agent_type=$agent_type \
+    agent.tools=$tools \
+    agent.template=$template \
+    agent.model_name_or_path=$model \
+    agent.max_turns=${max_turns} \
+    agent.backend=${agent_backend} \
+    agent.reward_name=$reward_name \
+    agent.num_chains=$num_chains \
+    agent.use_agent=True \
+    actor_rollout_ref.actor.optim.lr=$lr \
+    actor_rollout_ref.model.use_remove_padding=False \
+    actor_rollout_ref.model.path=${model} \
+    actor_rollout_ref.actor.ppo_mini_batch_size=${mini_batch_size} \
+    actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=2 \
+    actor_rollout_ref.actor.use_kl_loss=True \
+    actor_rollout_ref.actor.kl_loss_coef=$kl_coef \
+    actor_rollout_ref.actor.kl_loss_type=$kl_loss_type \
+    actor_rollout_ref.actor.entropy_coeff=$entropy_coeff \
+    actor_rollout_ref.model.enable_gradient_checkpointing=False \
+    actor_rollout_ref.actor.fsdp_config.param_offload=True \
+    actor_rollout_ref.actor.fsdp_config.optimizer_offload=True \
+    actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=4 \
+    actor_rollout_ref.rollout.tensor_model_parallel_size=1 \
+    actor_rollout_ref.rollout.name=vllm \
+    actor_rollout_ref.rollout.response_length=$length \
+    actor_rollout_ref.rollout.gpu_memory_utilization=0.5 \
+    actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=4 \
+    actor_rollout_ref.ref.fsdp_config.param_offload=True \
+    critic.model.path=$model \
+    critic.ppo_mini_batch_size=${mini_batch_size} \
+    critic.ppo_micro_batch_size_per_gpu=2 \
+    algorithm.kl_ctrl.kl_coef=$kl_coef \
+    trainer.critic_warmup=0 \
+    trainer.logger=['console','wandb'] \
+    trainer.project_name=$project_name \
+    trainer.experiment_name=${experiment_name} \
+    trainer.n_gpus_per_node=1 \
+    trainer.nnodes=1 \
+    trainer.save_freq=50 \
+    trainer.test_freq=10 \
+    trainer.total_training_steps=$total_training_steps \
+    trainer.val_before_train=False
+```
+
+**3. Run Training**
+
+--------------
+
+Execute the training script. This training script run agent RL in a single node with one GPU. We have wrapped up everything, including tools, rewards, and training data. Run the following command to start training.
+
+```
+cd verl
+bash run_agents/train_example.sh
+```
+
+The training progress will be logged to Weights & Biases if configured. You can monitor metrics like reward, loss, and KL divergence during training.
+
+Key parameters to consider:
+
+- ``model``: Base model to fine-tune
+- ``batch_size``: Training batch size
+- ``lr``: Learning rate
+- ``num_chains``: Number of interaction chains per sample
+- ``max_turns``: Maximum turns per interaction chain
+- ``total_training_steps``: Total number of training steps
diff --git a/docs/start/installation.md b/docs/start/installation.md
index 1e888a1..27b1405 100644
--- a/docs/start/installation.md
+++ b/docs/start/installation.md
@@ -1,25 +1,53 @@
 # Installation
 
-To install, follow these steps:
+**Install With Script**
 
-1. Clone the repository:
-```bash
-git clone https://github.com/agentfly/agentfly.git
-cd agentfly
+To install dependencies, run the following script in conda environment. We default to use python3.10.
+```
+bash install.sh
 ```
 
-2. Initialize and install dependencies
 
+**Step-by-Step Installation**
+
+Alternatively, you can customize the installation by following these steps:
+
+1. Clone the repository and initialize submodules:
 ```bash
+git clone https://github.com/Agent-One-Lab/AgentFly
+cd AgentFly
 git submodule init
 git submodule update
+```
+
+2. Initialize and install dependencies
 
-pip install -r agents/requirements.txt
-pip install -r verl/requirements.txt
+Basic python packages installation:
+```bash
+pip install -e .
+pip install -e '.[verl]' --no-build-isolation
+```
+
+Some of our tools & environments are managed by *enroot* backend. To use them, please install [enroot](https://github.com/NVIDIA/enroot/blob/master/doc/installation.md) (sudo required). Such tools include code_interpreter, retrieval, webshop, alfworld, sciencworld.
+
+```bash
+# enroot install
+# Debian-based distributions
+arch=$(dpkg --print-architecture)
+curl -fSsL -O https://github.com/NVIDIA/enroot/releases/download/v3.5.0/enroot_3.5.0-1_${arch}.deb
+curl -fSsL -O https://github.com/NVIDIA/enroot/releases/download/v3.5.0/enroot+caps_3.5.0-1_${arch}.deb # optional
+sudo apt install -y ./*.deb
+
+# RHEL-based distributions
+arch=$(uname -m)
+sudo dnf install -y epel-release # required on some distributions
+sudo dnf install -y https://github.com/NVIDIA/enroot/releases/download/v3.5.0/enroot-3.5.0-1.el8.${arch}.rpm
+sudo dnf install -y https://github.com/NVIDIA/enroot/releases/download/v3.5.0/enroot+caps-3.5.0-1.el8.${arch}.rpm # optional
 ```
+
 3. Optional
+Search requires redis to cache results, an optional way to install with conda:
 ```bash
 conda install conda-forge::redis-server==7.4.0
 ```
 
-
diff --git a/docs/start/training_example.rst b/docs/start/training_example.rst
deleted file mode 100644
index 02376ac..0000000
--- a/docs/start/training_example.rst
+++ /dev/null
@@ -1,100 +0,0 @@
-Training Example
-==============
-
-This guide walks through the steps to train an agent using AgentFly.
-
-1. Prepare Training Data
------------------------
-First, prepare your training and validation datasets in JSON format. The datasets should follow this structure:
-
-.. code-block:: json
-
-    [
-        {
-            "id": "0",
-            "question": "$P(x)$ is a polynomial of degree $3n$ such that\n\\begin{eqnarray*} P(0) = P(3) = \\cdots &=& P(3n) = 2, \\\\ P(1) = P(4) = \\cdots &=& P(3n-2) = 1, \\\\ P(2) = P(5) = \\cdots &=& P(3n-1) = 0, \\quad\\text{ and }\\\\ && P(3n+1) = 730.\\end{eqnarray*}\nDetermine $n$.",
-            "answer": "n = 4"
-        },
-        {
-            "id": "1",
-            "question": "Diameter $AB$ of a circle has length a $2$-digit integer (base ten). Reversing the digits gives the length of the perpendicular chord $CD$. The distance from their intersection point $H$ to the center $O$ is a positive rational number. Determine the length of $AB$.",
-            "answer": "65"
-        },
-        ...
-    ]
-
-Save your training data and validation data.
-
-2. Create Training Script 
-------------------------
-Create a training script (e.g., ``train.sh``) with the following configuration:
-
-.. code-block:: bash
-
-    export WANDB_API_KEY="your_wandb_key"  # For logging to Weights & Biases
-    export VLLM_USE_V1=1
-
-    # Ray cluster configuration
-    ray stop
-    ray start --head --node-ip-address="$(hostname --ip-address)" --port=6379 --num-cpus 192 --num-gpus 8
-
-    # Training parameters
-    model="Qwen/Qwen2.5-3B-Instruct"
-    template="qwen-chat"
-    lr=5e-7
-    length=512
-    batch_size=128
-    num_chains=4
-    kl_coef=0.001
-    train_dataset="train"
-    adv_estimator="grpo"
-
-    # Agent configuration
-    agent_type="code"
-    tools="[code_interpreter]"
-    reward_name="math_reward_format"
-    entropy_coeff=0.001
-    kl_loss_type="mse"
-    max_steps=4
-    agent_backend="async_verl"
-    project_name="AgentRL"
-    total_training_steps=200
-
-    python3 -m verl.trainer.main_ppo \
-        algorithm.adv_estimator=$adv_estimator \
-        data.train_files=./data/train.json \
-        data.val_files=./data/val.json \
-        data.train_batch_size=$batch_size \
-        agent.agent_type=$agent_type \
-        agent.tools=$tools \
-        agent.template=$template \
-        agent.model_name_or_path=$model \
-        agent.max_steps=${max_steps} \
-        agent.backend=${agent_backend} \
-        agent.reward_name=$reward_name \
-        agent.num_chains=$num_chains \
-        agent.use_agent=True \
-        actor_rollout_ref.actor.optim.lr=$lr \
-        actor_rollout_ref.model.path=${model} \
-        trainer.n_gpus_per_node=8 \
-        trainer.nnodes=1 \
-        trainer.total_training_steps=$total_training_steps
-
-3. Run Training
---------------
-Execute the training script:
-
-.. code-block:: bash
-
-    bash train.sh
-
-The training progress will be logged to Weights & Biases if configured. You can monitor metrics like reward, loss, and KL divergence during training.
-
-Key parameters to consider:
-
-- ``model``: Base model to fine-tune
-- ``batch_size``: Training batch size
-- ``lr``: Learning rate
-- ``num_chains``: Number of interaction chains per sample
-- ``max_steps``: Maximum steps per interaction chain
-- ``total_training_steps``: Total number of training steps
diff --git a/pyproject.toml b/pyproject.toml
index 068ddf6..129e992 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,20 +2,20 @@
 requires = ["setuptools>=61.0", "wheel"]
 build-backend = "setuptools.build_meta"
 
-[tool.setuptools.packages.find]
-include = ["agents", "verl"]
-
 [tool.pytest.ini_options]
 markers = [
     "gpu: marks tests as requiring GPU resources"
 ]
 
+[tool.setuptools]
+packages = ["agentfly"]
+
 [project]
 name = "AgentFly"
 version = "0.0.1"
 description = "Agent reinforcement learning framework."
 readme = "README.md"
-requires-python = ">=3.10,<3.11"
+requires-python = ">=3.10,<3.13" # Release the version limit for Python 3.12
 license = { text = "Apache-2.0" }
 
 dependencies = [
@@ -33,6 +33,8 @@ dependencies = [
     "bs4",
     "qwen_vl_utils",
     "onnxruntime",
+    "mpmath",
+    "wandb"
 ]
 
 [project.optional-dependencies]
@@ -55,7 +57,6 @@ verl = [
     "tensordict<=0.6.2",
     "torchdata",
     "transformers",
-    "wandb",
     "packaging>=20.0",
     "uvicorn",
     "fastapi"