From 2a5ff4b33dd21f7e062d40f742f32d914452e611 Mon Sep 17 00:00:00 2001
From: us <rahmetsaritekin@gmail.com>
Date: Mon, 15 Jun 2026 01:52:03 +0300
Subject: [PATCH] feat: add fastCRW search tool

---
 src/tool/default_tools/search/__init__.py   |   2 +
 src/tool/default_tools/search/crw_search.py | 230 ++++++++++++++++++++
 src/tool/default_tools/web_searcher.py      |  12 +-
 3 files changed, 239 insertions(+), 5 deletions(-)
 create mode 100644 src/tool/default_tools/search/crw_search.py

diff --git a/src/tool/default_tools/search/__init__.py b/src/tool/default_tools/search/__init__.py
index 02005c55..7d9831f0 100644
--- a/src/tool/default_tools/search/__init__.py
+++ b/src/tool/default_tools/search/__init__.py
@@ -1,5 +1,6 @@
 from .types import SearchItem
 from .firecrawl_search import FirecrawlSearch
+from .crw_search import CrwSearch
 from .brave_search import BraveSearch
 from .bing_search import BingSearch
 from .google_search import GoogleSearch
@@ -9,6 +10,7 @@
 __all__ = [
     "SearchItem",
     "FirecrawlSearch",
+    "CrwSearch",
     "BraveSearch",
     "BingSearch",
     "GoogleSearch",
diff --git a/src/tool/default_tools/search/crw_search.py b/src/tool/default_tools/search/crw_search.py
new file mode 100644
index 00000000..f5f10952
--- /dev/null
+++ b/src/tool/default_tools/search/crw_search.py
@@ -0,0 +1,230 @@
+from __future__ import annotations
+from typing import Any, Optional, Dict, List, Type
+import json
+import os
+from pydantic import ConfigDict, Field
+from firecrawl import AsyncFirecrawlApp
+from dotenv import load_dotenv
+load_dotenv()
+
+from src.tool.default_tools.search.types import SearchItem, SearchToolArgs
+from src.tool.types import Tool, ToolResponse, ToolExtra
+from src.logger import logger
+from src.registry import TOOL
+
+# Default fastCRW cloud base URL. fastCRW is a Firecrawl-compatible web scraper
+# (single binary; self-host or cloud), so the official Firecrawl client can be
+# pointed at it by overriding the base URL. Set CRW_API_URL to use a self-hosted
+# server (e.g. http://localhost:3000).
+DEFAULT_CRW_API_URL = "https://fastcrw.com/api"
+
+@TOOL.register_module(force=True)
+class CrwSearch(Tool):
+    """Tool that queries the fastCRW search engine.
+
+    fastCRW is a Firecrawl-compatible web data engine (single binary; self-host
+    or cloud). It exposes the same /v1/search API as Firecrawl, so the official
+    Firecrawl client is reused with the fastCRW base URL.
+
+    Example usages:
+    .. code-block:: python
+        # basic usage
+        tool = CrwSearch()
+
+    .. code-block:: python
+        # with custom search kwargs
+        tool = CrwSearch.from_search_kwargs({"limit": 5})
+    """
+    model_config = ConfigDict(arbitrary_types_allowed=True, extra="allow")
+
+    name: str = "crw_search"
+    description: str = (
+        "a search engine. "
+        "useful for when you need to answer questions about current events."
+        " input should be a search query."
+    )
+    metadata: Dict[str, Any] = Field(default={}, description="The metadata of the tool")
+    api_key: Optional[str] = Field(default=None, description="fastCRW API key")
+    api_url: Optional[str] = Field(default=None, description="fastCRW base URL (override for self-host)")
+
+    def __init__(self, **kwargs):
+        """Initialize the CrwSearch tool."""
+        # Set api_key from environment if not provided
+        super().__init__(**kwargs)
+        self.api_key = os.getenv("CRW_API_KEY")
+        self.api_url = os.getenv("CRW_API_URL", DEFAULT_CRW_API_URL)
+
+    @classmethod
+    def from_search_kwargs(cls, search_kwargs: dict, **kwargs: Any) -> CrwSearch:
+        """Create a tool from search kwargs.
+
+        Args:
+            search_kwargs: Any additional kwargs to pass to the search function.
+            **kwargs: Any additional kwargs to pass to the tool.
+
+        Returns:
+            A tool.
+        """
+        return cls(search_kwargs=search_kwargs, **kwargs)
+
+    async def _search_crw(self,
+                          query: str,
+                          num_results: int = 10,
+                          filter_year: Optional[int] = 2025) -> List[SearchItem]:
+        """
+        Perform a fastCRW search using the provided parameters.
+        Returns a list of SearchItem objects.
+        """
+        if not self.api_key:
+            raise ValueError("CRW_API_KEY environment variable is required")
+
+        results = []
+
+        app = AsyncFirecrawlApp(api_key=self.api_key, api_url=self.api_url)
+        search_kwargs = {
+            "query": query,
+            "limit": num_results,
+        }
+
+        # Add date filter if year is valid (1900-2100)
+        # Handle None case (when explicitly passed from caller)
+        if filter_year is None:
+            filter_year = 2025  # Use default if None
+
+        if 1900 <= filter_year <= 2100:
+            search_kwargs["tbs"] = f"cdr:1,cd_min:01/01/{filter_year},cd_max:12/31/{filter_year}"
+        else:
+            logger.warning(f"Invalid filter_year: {filter_year}. Expected 1900-2100. Ignoring date filter.")
+
+        try:
+            response = await app.search(**search_kwargs)
+        except Exception as e:
+            logger.error(f"fastCRW API call failed: {e}")
+            return results
+
+        # Check if response and response.web exist and are not None
+        if response is None:
+            logger.warning("fastCRW search returned None response")
+            return results
+
+        # Log response structure for debugging
+        logger.debug(f"fastCRW response type: {type(response)}")
+        logger.debug(f"fastCRW response attributes: {dir(response) if hasattr(response, '__dict__') else 'N/A'}")
+
+        # Check for different possible response formats
+        web_results = None
+
+        # Try to access web results from response object
+        if hasattr(response, 'web') and response.web is not None:
+            web_results = response.web
+        elif hasattr(response, 'data') and response.data is not None:
+            # Some API versions might use 'data' instead of 'web'
+            web_results = response.data
+        elif hasattr(response, 'results') and response.results is not None:
+            # Try 'results' attribute
+            web_results = response.results
+        elif isinstance(response, dict):
+            # Response might be a dict
+            web_results = response.get('web') or response.get('data') or response.get('results')
+        elif isinstance(response, list):
+            # Response might be a list directly
+            web_results = response
+        else:
+            # Try to convert response to dict if it's a Pydantic model
+            try:
+                if hasattr(response, 'model_dump'):
+                    response_dict = response.model_dump()
+                    web_results = response_dict.get('web') or response_dict.get('data') or response_dict.get('results')
+                elif hasattr(response, 'dict'):
+                    response_dict = response.dict()
+                    web_results = response_dict.get('web') or response_dict.get('data') or response_dict.get('results')
+            except Exception:
+                pass
+
+        if web_results is None:
+            # Log full response structure for debugging
+            logger.warning(
+                f"fastCRW search response has no accessible results. "
+                f"Response type: {type(response)}, Response: {str(response)[:200]}"
+            )
+            # Try to log all attributes
+            if hasattr(response, '__dict__'):
+                logger.debug(f"Response attributes: {list(response.__dict__.keys())}")
+            elif hasattr(response, '__fields__'):
+                logger.debug(f"Response fields: {list(response.__fields__.keys())}")
+            return results
+
+        # Safely iterate over web_results
+        try:
+            for item in web_results:
+                if item is None:
+                    continue
+
+                # Handle both object and dict formats
+                if isinstance(item, dict):
+                    title = item.get('title', '') or ""
+                    url = item.get('url', '') or ""
+                    description = item.get('description', '') or item.get('snippet', '') or ""
+                else:
+                    title = getattr(item, 'title', None) or ""
+                    url = getattr(item, 'url', None) or ""
+                    description = getattr(item, 'description', None) or getattr(item, 'snippet', None) or ""
+
+                if url:  # Only add items with valid URLs
+                    results.append(SearchItem(
+                        title=title,
+                        url=url,
+                        description=description
+                    ))
+        except (TypeError, AttributeError) as e:
+            logger.error(f"Error iterating over fastCRW search results: {e}, web_results type: {type(web_results)}")
+            return results
+
+        return results
+
+    async def __call__(
+        self,
+        query: str,
+        num_results: Optional[int] = 5,
+        country: Optional[str] = "us",
+        lang: Optional[str] = "en",
+        filter_year: Optional[int] = 2025,
+        **kwargs
+    ) -> ToolResponse:
+        """
+        fastCRW search tool.
+
+        Args:
+            query (str): The query to search for.
+            num_results (Optional[int]): The number of search results to return.
+            country (Optional[str]): The country to search in.
+            lang (Optional[str]): The language to search in.
+            filter_year (int): The year to filter results by. Defaults to 2025.
+        """
+
+        try:
+
+            # Perform search
+            search_items = await self._search_crw(query, num_results=num_results, filter_year=filter_year)
+
+            # Format results as JSON string
+            results_json = json.dumps([{
+                "title": item.title,
+                "url": item.url,
+                "description": item.description or ""
+            } for item in search_items], ensure_ascii=False, indent=4)
+
+            message = f"fastCRW search results for query: {query}\n\n{results_json}"
+
+            return ToolResponse(success=True, message=message, extra=ToolExtra(
+                data={
+                    "query": query,
+                    "num_results": len(search_items),
+                    "search_items": search_items,
+                    "engine": "crw"
+                }
+            ))
+
+        except Exception as e:
+            logger.error(f"Error in fastCRW search: {e}")
+            return ToolResponse(success=False, message=f"Error in fastCRW search: {str(e)}")
diff --git a/src/tool/default_tools/web_searcher.py b/src/tool/default_tools/web_searcher.py
index 6ba714c1..233deec1 100644
--- a/src/tool/default_tools/web_searcher.py
+++ b/src/tool/default_tools/web_searcher.py
@@ -5,11 +5,12 @@
 
 from src.tool.default_tools.web_fetcher import WebFetcherTool
 from src.tool.default_tools.search import (
-    FirecrawlSearch, 
-    SearchItem, 
-    BraveSearch, 
-    BingSearch, 
-    GoogleSearch, 
+    FirecrawlSearch,
+    CrwSearch,
+    SearchItem,
+    BraveSearch,
+    BingSearch,
+    GoogleSearch,
     DDGSSearch
 )
 from src.logger import logger
@@ -68,6 +69,7 @@ def __init__(self, model_name: Optional[str] = None, require_grad: bool = False,
         # Initialize search engines and content fetcher
         self.search_tools = {
             # "firecrawl_search": FirecrawlSearch(),
+            # "crw_search": CrwSearch(),
             # "bing_search": BingSearch(),
             "ddgs_search": DDGSSearch(),
         }