diff --git a/AGENTS.md b/AGENTS.md index 48adb63..e3d4271 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -54,3 +54,29 @@ Add durable exceptions here when a proposed sync should not be repeated. - Do not implement `/responses` in this SDK unless explicitly requested. - If a capability was intentionally rejected for product/design reasons, do not propose it again until this file is updated. + +## Coding practices + +Besides the practices enforced through the CI checks (especially with ruff), we try to enforce the +following coding practices: + +- Mark non-public modules, classes, methods, functions, and attributes private with a leading + underscore when they are not part of the SDK public API. +- Prefer [Google style guide imports](https://google.github.io/styleguide/pyguide.html#22-imports) + for new code: import modules instead of importing symbols, except for `typing` symbols, public + re-exports, and cases where the existing file convention makes module imports awkward. +- Treat anything exported from `linkup.__init__` or listed in `__all__` as public API; add exports + only intentionally. +- Keep API wire-format names at the request/response boundary only. Internal and public Python APIs + should stay snake_case, and camelCase/API aliases should live in request serialization or Pydantic + aliases. +- Keep sync and async methods structurally aligned: same parameters, same validation behavior, same + returned models, and same documented errors. +- Store secrets as `pydantic.SecretStr` or an equivalent secret container once they enter client + state; do not store raw API keys on instances. +- Prefer explicit SDK error classes over leaking transport or dependency exceptions from public + methods, except where deliberately documented. +- Add `# noqa`, `# type: ignore`, and `# pyright: ignore` only with a short reason. +- Avoid adding runtime dependencies for small conveniences; keep SDK dependencies minimal. +- Write tests through the public `linkup` API unless the test is specifically covering private or + internal behavior. diff --git a/README.md b/README.md index 022c3a0..c74e57d 100644 --- a/README.md +++ b/README.md @@ -94,21 +94,19 @@ The `search` function also supports three output types, through the `output_type - with `"searchResults"`, the search will return a list of relevant documents - with `"sourcedAnswer"`, the search will return a concise answer with sources -- with `"structured"`, the search will return a structured output according to a user-defined schema +- with `"structured"`, the search will return a structured output according to a user-defined object + schema ```python -from typing import Any - import linkup client = linkup.Client() # API key can be read from the environment variable or passed as an argument -search_response: Any = client.search( +search_response: linkup.SourcedAnswer = client.search( query="What are the 3 major events in the life of Abraham Lincoln?", depth="deep", # "fast" (beta), "standard", or "deep" output_type="sourcedAnswer", # "searchResults" or "sourcedAnswer" or "structured" structured_output_schema=None, # must be filled if output_type is "structured" ) -assert isinstance(search_response, linkup.SourcedAnswer) print(search_response.model_dump()) ``` @@ -215,19 +213,17 @@ This makes possible to call the Linkup API several times concurrently for instan ```python import asyncio -from typing import Any import linkup async def main() -> None: client = linkup.Client() # API key can be read from the environment variable or passed as an argument - search_response: Any = await client.async_search( + search_response: linkup.SourcedAnswer = await client.async_search( query="What are the 3 major events in the life of Abraham Lincoln?", depth="deep", # "fast" (beta), "standard", or "deep" output_type="sourcedAnswer", # "searchResults" or "sourcedAnswer" or "structured" structured_output_schema=None, # must be filled if output_type is "structured" ) - assert isinstance(search_response, linkup.SourcedAnswer) print(search_response.model_dump()) asyncio.run(main()) diff --git a/src/linkup/__init__.py b/src/linkup/__init__.py index 2b0d902..b47680f 100644 --- a/src/linkup/__init__.py +++ b/src/linkup/__init__.py @@ -17,6 +17,7 @@ LinkupUnknownError, ) from ._types import ( + JSONObject, LinkupFetchImageExtraction, LinkupFetchResponse, LinkupFetchTask, @@ -92,6 +93,7 @@ "FetchUrlIsFileError", "InsufficientCreditError", "InvalidRequestError", + "JSONObject", "LinkupAuthenticationError", "LinkupBudgetLimitExceededError", "LinkupClient", diff --git a/src/linkup/_client.py b/src/linkup/_client.py index 15d5152..d95616d 100644 --- a/src/linkup/_client.py +++ b/src/linkup/_client.py @@ -5,10 +5,10 @@ import json import os from datetime import date # noqa: TC003 (`date` is used in test mocks) -from typing import TYPE_CHECKING, Any, Literal, cast +from typing import TYPE_CHECKING, Any, Literal, cast, overload import httpx -from pydantic import BaseModel, SecretStr +import pydantic from ._errors import ( LinkupAuthenticationError, @@ -28,6 +28,7 @@ LinkupUnknownError, ) from ._types import ( + JSONObject, LinkupFetchResponse, LinkupFetchTask, LinkupFetchTaskInput, @@ -73,7 +74,7 @@ class LinkupClient: def __init__( self, - api_key: str | SecretStr | None = None, + api_key: str | pydantic.SecretStr | None = None, base_url: str = "https://api.linkup.so/v1", x402_signer: LinkupX402Signer | None = None, auth_header: str | None = None, @@ -84,25 +85,123 @@ def __init__( self._x402_signer: LinkupX402Signer | None = x402_signer if x402_signer is not None: - self._api_key: SecretStr | None = None + self._api_key: pydantic.SecretStr | None = None else: if api_key is None: api_key = os.getenv("LINKUP_API_KEY") if not api_key: raise ValueError("The Linkup API key was not provided") if isinstance(api_key, str): - api_key = SecretStr(api_key) + api_key = pydantic.SecretStr(api_key) self._api_key = api_key self._base_url: str = base_url self._auth_header: str | None = auth_header + @overload def search( self, query: str, + *, + depth: Literal["fast", "standard", "deep"], + output_type: Literal["searchResults"], + structured_output_schema: None = None, + include_images: bool | None = None, + from_date: date | str | None = None, + to_date: date | str | None = None, + exclude_domains: list[str] | None = None, + include_domains: list[str] | None = None, + max_results: int | None = None, + include_inline_citations: bool | None = None, + include_sources: bool | None = None, + timeout: float | None = None, + ) -> LinkupSearchResults: ... + + @overload + def search( + self, + query: str, + *, + depth: Literal["fast", "standard", "deep"], + output_type: Literal["sourcedAnswer"], + structured_output_schema: None = None, + include_images: bool | None = None, + from_date: date | str | None = None, + to_date: date | str | None = None, + exclude_domains: list[str] | None = None, + include_domains: list[str] | None = None, + max_results: int | None = None, + include_inline_citations: bool | None = None, + include_sources: bool | None = None, + timeout: float | None = None, + ) -> LinkupSourcedAnswer: ... + + @overload + def search( + self, + query: str, + *, + depth: Literal["fast", "standard", "deep"], + output_type: Literal["structured"], + structured_output_schema: type[pydantic.BaseModel] | dict[str, Any] | str, + include_images: bool | None = None, + from_date: date | str | None = None, + to_date: date | str | None = None, + exclude_domains: list[str] | None = None, + include_domains: list[str] | None = None, + max_results: int | None = None, + include_inline_citations: bool | None = None, + include_sources: Literal[False] | None = None, + timeout: float | None = None, + ) -> JSONObject: ... + + @overload + def search( + self, + query: str, + *, + depth: Literal["fast", "standard", "deep"], + output_type: Literal["structured"], + structured_output_schema: type[pydantic.BaseModel] | dict[str, Any] | str, + include_images: bool | None = None, + from_date: date | str | None = None, + to_date: date | str | None = None, + exclude_domains: list[str] | None = None, + include_domains: list[str] | None = None, + max_results: int | None = None, + include_inline_citations: bool | None = None, + include_sources: Literal[True], + timeout: float | None = None, + ) -> LinkupSearchStructuredResponse: ... + + @overload + def search( + self, + query: str, + *, + depth: Literal["fast", "standard", "deep"], + output_type: Literal["searchResults", "sourcedAnswer", "structured"], + structured_output_schema: type[pydantic.BaseModel] | dict[str, Any] | str | None = None, + include_images: bool | None = None, + from_date: date | str | None = None, + to_date: date | str | None = None, + exclude_domains: list[str] | None = None, + include_domains: list[str] | None = None, + max_results: int | None = None, + include_inline_citations: bool | None = None, + include_sources: bool | None = None, + timeout: float | None = None, + ) -> ( + LinkupSearchResults | LinkupSourcedAnswer | JSONObject | LinkupSearchStructuredResponse + ): ... + + def search( + self, + query: str, + *, depth: Literal["fast", "standard", "deep"], output_type: Literal["searchResults", "sourcedAnswer", "structured"], - structured_output_schema: type[BaseModel] | dict[str, Any] | str | None = None, + structured_output_schema: type[pydantic.BaseModel] | dict[str, Any] | str | None = None, include_images: bool | None = None, from_date: date | str | None = None, to_date: date | str | None = None, @@ -112,7 +211,7 @@ def search( include_inline_citations: bool | None = None, include_sources: bool | None = None, timeout: float | None = None, - ) -> Any: # noqa: ANN401 + ) -> LinkupSearchResults | LinkupSourcedAnswer | JSONObject | LinkupSearchStructuredResponse: """Perform a web search using the Linkup API `search` endpoint. All optional parameters will default to the Linkup API defaults when not provided. The @@ -130,8 +229,8 @@ def search( supporting it, and "structured" will base the output on the format provided in structured_output_schema. structured_output_schema: If output_type is "structured", specify the schema of the - output. Supported formats are a pydantic.BaseModel, a Python dictionary containing a - valid object JSON schema, or a string representing a valid object JSON schema. + output. Supported formats are a `pydantic.BaseModel`, a Python dictionary containing + a valid object JSON schema, or a string representing a valid object JSON schema. include_images: Indicate whether images should be included during the search. from_date: The date from which the search results should be considered. Accepts a `datetime.date`, `YYYY-MM-DD`, or full ISO datetime string. If None, the search @@ -153,10 +252,8 @@ def search( The Linkup API search result, which can have different types based on the parameters: - LinkupSearchResults if output_type is "searchResults" - LinkupSourcedAnswer if output_type is "sourcedAnswer" - - the provided pydantic.BaseModel or an arbitrary data structure if output_type is - "structured" and include_sources is False - - LinkupSearchStructuredResponse with the provided pydantic.BaseModel or an arbitrary - data structure as data field, if output_type is "structured" and include_sources is + - a raw dictionary if output_type is "structured" and include_sources is False + - LinkupSearchStructuredResponse if output_type is "structured" and include_sources is True Raises: @@ -193,18 +290,115 @@ def search( ) return self._parse_search_response( - response=response, + response_data=response.json(), output_type=output_type, - structured_output_schema=structured_output_schema, include_sources=include_sources, ) + @overload async def async_search( self, query: str, + *, + depth: Literal["fast", "standard", "deep"], + output_type: Literal["searchResults"], + structured_output_schema: None = None, + include_images: bool | None = None, + from_date: date | str | None = None, + to_date: date | str | None = None, + exclude_domains: list[str] | None = None, + include_domains: list[str] | None = None, + max_results: int | None = None, + include_inline_citations: bool | None = None, + include_sources: bool | None = None, + timeout: float | None = None, + ) -> LinkupSearchResults: ... + + @overload + async def async_search( + self, + query: str, + *, + depth: Literal["fast", "standard", "deep"], + output_type: Literal["sourcedAnswer"], + structured_output_schema: None = None, + include_images: bool | None = None, + from_date: date | str | None = None, + to_date: date | str | None = None, + exclude_domains: list[str] | None = None, + include_domains: list[str] | None = None, + max_results: int | None = None, + include_inline_citations: bool | None = None, + include_sources: bool | None = None, + timeout: float | None = None, + ) -> LinkupSourcedAnswer: ... + + @overload + async def async_search( + self, + query: str, + *, + depth: Literal["fast", "standard", "deep"], + output_type: Literal["structured"], + structured_output_schema: type[pydantic.BaseModel] | dict[str, Any] | str, + include_images: bool | None = None, + from_date: date | str | None = None, + to_date: date | str | None = None, + exclude_domains: list[str] | None = None, + include_domains: list[str] | None = None, + max_results: int | None = None, + include_inline_citations: bool | None = None, + include_sources: Literal[False] | None = None, + timeout: float | None = None, + ) -> JSONObject: ... + + @overload + async def async_search( + self, + query: str, + *, + depth: Literal["fast", "standard", "deep"], + output_type: Literal["structured"], + structured_output_schema: type[pydantic.BaseModel] | dict[str, Any] | str, + include_images: bool | None = None, + from_date: date | str | None = None, + to_date: date | str | None = None, + exclude_domains: list[str] | None = None, + include_domains: list[str] | None = None, + max_results: int | None = None, + include_inline_citations: bool | None = None, + include_sources: Literal[True], + timeout: float | None = None, + ) -> LinkupSearchStructuredResponse: ... + + @overload + async def async_search( + self, + query: str, + *, + depth: Literal["fast", "standard", "deep"], + output_type: Literal["searchResults", "sourcedAnswer", "structured"], + structured_output_schema: type[pydantic.BaseModel] | dict[str, Any] | str | None = None, + include_images: bool | None = None, + from_date: date | str | None = None, + to_date: date | str | None = None, + exclude_domains: list[str] | None = None, + include_domains: list[str] | None = None, + max_results: int | None = None, + include_inline_citations: bool | None = None, + include_sources: bool | None = None, + timeout: float | None = None, + ) -> ( + LinkupSearchResults | LinkupSourcedAnswer | JSONObject | LinkupSearchStructuredResponse + ): ... + + async def async_search( + self, + query: str, + *, depth: Literal["fast", "standard", "deep"], output_type: Literal["searchResults", "sourcedAnswer", "structured"], - structured_output_schema: type[BaseModel] | dict[str, Any] | str | None = None, + structured_output_schema: type[pydantic.BaseModel] | dict[str, Any] | str | None = None, include_images: bool | None = None, from_date: date | str | None = None, to_date: date | str | None = None, @@ -214,7 +408,7 @@ async def async_search( include_inline_citations: bool | None = None, include_sources: bool | None = None, timeout: float | None = None, - ) -> Any: # noqa: ANN401 + ) -> LinkupSearchResults | LinkupSourcedAnswer | JSONObject | LinkupSearchStructuredResponse: """Asynchronously perform a web search using the Linkup API `search` endpoint. All optional parameters will default to the Linkup API defaults when not provided. The @@ -232,8 +426,8 @@ async def async_search( supporting it, and "structured" will base the output on the format provided in structured_output_schema. structured_output_schema: If output_type is "structured", specify the schema of the - output. Supported formats are a pydantic.BaseModel, a Python dictionary containing a - valid object JSON schema, or a string representing a valid object JSON schema. + output. Supported formats are a `pydantic.BaseModel`, a Python dictionary containing + a valid object JSON schema, or a string representing a valid object JSON schema. include_images: Indicate whether images should be included during the search. from_date: The date from which the search results should be considered. Accepts a `datetime.date`, `YYYY-MM-DD`, or full ISO datetime string. If None, the search @@ -255,10 +449,8 @@ async def async_search( The Linkup API search result, which can have different types based on the parameters: - LinkupSearchResults if output_type is "searchResults" - LinkupSourcedAnswer if output_type is "sourcedAnswer" - - the provided pydantic.BaseModel or an arbitrary data structure if output_type is - "structured" and include_sources is False - - LinkupSearchStructuredResponse with the provided pydantic.BaseModel or an arbitrary - data structure as data field, if output_type is "structured" and include_sources is + - a raw dictionary if output_type is "structured" and include_sources is False + - LinkupSearchStructuredResponse if output_type is "structured" and include_sources is True Raises: @@ -295,9 +487,8 @@ async def async_search( ) return self._parse_search_response( - response=response, + response_data=response.json(), output_type=output_type, - structured_output_schema=structured_output_schema, include_sources=include_sources, ) @@ -307,7 +498,7 @@ def research( output_type: Literal["sourcedAnswer", "structured"], reasoning_depth: Literal["S", "M", "L", "XL"] | None = None, mode: Literal["answer", "auto", "investigate", "research"] | None = None, - structured_output_schema: type[BaseModel] | dict[str, Any] | str | None = None, + structured_output_schema: type[pydantic.BaseModel] | dict[str, Any] | str | None = None, from_date: date | str | None = None, to_date: date | str | None = None, exclude_domains: list[str] | None = None, @@ -327,7 +518,7 @@ def research( is used. mode: The research mode to use. If None, the Linkup API default is used. structured_output_schema: If output_type is "structured", specify the output schema. - Supported formats are a pydantic.BaseModel, a Python dictionary containing a valid + Supported formats are a `pydantic.BaseModel`, a Python dictionary containing a valid object JSON schema, or a string representing a valid object JSON schema. from_date: The date from which the research sources should be considered. Accepts a `datetime.date`, `YYYY-MM-DD`, or full ISO datetime string. If None, sources will @@ -341,7 +532,7 @@ def research( no timeout. Returns: - The created research task. + The newly created research task, with "pending" status and no output. Raises: TypeError: If structured_output_schema is not a string, dictionary, or @@ -378,7 +569,7 @@ async def async_research( output_type: Literal["sourcedAnswer", "structured"], reasoning_depth: Literal["S", "M", "L", "XL"] | None = None, mode: Literal["answer", "auto", "investigate", "research"] | None = None, - structured_output_schema: type[BaseModel] | dict[str, Any] | str | None = None, + structured_output_schema: type[pydantic.BaseModel] | dict[str, Any] | str | None = None, from_date: date | str | None = None, to_date: date | str | None = None, exclude_domains: list[str] | None = None, @@ -398,7 +589,7 @@ async def async_research( is used. mode: The research mode to use. If None, the Linkup API default is used. structured_output_schema: If output_type is "structured", specify the output schema. - Supported formats are a pydantic.BaseModel, a Python dictionary containing a valid + Supported formats are a `pydantic.BaseModel`, a Python dictionary containing a valid object JSON schema, or a string representing a valid object JSON schema. from_date: The date from which the research sources should be considered. Accepts a `datetime.date`, `YYYY-MM-DD`, or full ISO datetime string. If None, sources will @@ -412,7 +603,7 @@ async def async_research( no timeout. Returns: - The created research task. + The newly created research task, with "pending" status and no output. Raises: TypeError: If structured_output_schema is not a string, dictionary, or @@ -589,7 +780,7 @@ def create_tasks( no timeout. Returns: - The created tasks, parsed according to each task type. + The newly created tasks with "pending" status and no output. Raises: TypeError: If a task has an unsupported model type, or if a structured output schema has @@ -622,7 +813,7 @@ async def async_create_tasks( no timeout. Returns: - The created tasks, parsed according to each task type. + The newly created tasks with "pending" status and no output. Raises: TypeError: If a task has an unsupported model type, or if a structured output schema has @@ -845,7 +1036,7 @@ def fetch( timeout=timeout, ) - return self._parse_fetch_response(response=response) + return self._parse_fetch_response(response_data=response.json()) async def async_fetch( self, @@ -895,7 +1086,7 @@ async def async_fetch( timeout=timeout, ) - return self._parse_fetch_response(response=response) + return self._parse_fetch_response(response_data=response.json()) def _user_agent(self) -> str: # pragma: no cover return f"Linkup-Python/{self.__version__}" @@ -1204,7 +1395,7 @@ def _get_search_params( query: str, depth: Literal["fast", "standard", "deep"], output_type: Literal["searchResults", "sourcedAnswer", "structured"], - structured_output_schema: type[BaseModel] | str | dict[str, Any] | None, + structured_output_schema: type[pydantic.BaseModel] | str | dict[str, Any] | None, include_images: bool | None, from_date: date | str | None, to_date: date | str | None, @@ -1214,6 +1405,11 @@ def _get_search_params( include_inline_citations: bool | None, include_sources: bool | None, ) -> dict[str, str | bool | int | list[str]]: + if output_type == "structured" and structured_output_schema is None: + raise TypeError( + "structured_output_schema must be provided when output_type is 'structured'" + ) + params: dict[str, str | bool | int | list[str]] = { "q": query, "depth": depth, @@ -1225,7 +1421,7 @@ def _get_search_params( params["structuredOutputSchema"] = structured_output_schema elif isinstance(structured_output_schema, dict): params["structuredOutputSchema"] = json.dumps(structured_output_schema) - elif issubclass(structured_output_schema, BaseModel): + elif issubclass(structured_output_schema, pydantic.BaseModel): json_schema: dict[str, Any] = structured_output_schema.model_json_schema() params["structuredOutputSchema"] = json.dumps(json_schema) else: @@ -1257,7 +1453,7 @@ def _get_research_params( output_type: Literal["sourcedAnswer", "structured"], reasoning_depth: Literal["S", "M", "L", "XL"] | None, mode: Literal["answer", "auto", "investigate", "research"] | None, - structured_output_schema: type[BaseModel] | str | dict[str, Any] | None, + structured_output_schema: type[pydantic.BaseModel] | str | dict[str, Any] | None, from_date: date | str | None, to_date: date | str | None, exclude_domains: list[str] | None, @@ -1278,7 +1474,7 @@ def _get_research_params( params["structuredOutputSchema"] = structured_output_schema elif isinstance(structured_output_schema, dict): params["structuredOutputSchema"] = json.dumps(structured_output_schema) - elif issubclass(structured_output_schema, BaseModel): + elif issubclass(structured_output_schema, pydantic.BaseModel): json_schema: dict[str, Any] = structured_output_schema.model_json_schema() params["structuredOutputSchema"] = json.dumps(json_schema) else: @@ -1422,67 +1618,39 @@ def _get_fetch_params( def _parse_search_response( self, - response: httpx.Response, + response_data: dict[str, Any], output_type: Literal["searchResults", "sourcedAnswer", "structured"], - structured_output_schema: type[BaseModel] | dict[str, Any] | str | None, include_sources: bool | None, - ) -> Any: # noqa: ANN401 - return self._parse_search_response_data( - response_data=response.json(), - output_type=output_type, - structured_output_schema=structured_output_schema, - include_sources=include_sources, - ) - - def _parse_search_response_data( - self, - response_data: Any, # noqa: ANN401 - output_type: Literal["searchResults", "sourcedAnswer", "structured"], - structured_output_schema: type[BaseModel] | dict[str, Any] | str | None, - include_sources: bool | None, - ) -> Any: # noqa: ANN401 + ) -> LinkupSearchResults | LinkupSourcedAnswer | JSONObject | LinkupSearchStructuredResponse: if output_type == "searchResults": return LinkupSearchResults.model_validate(response_data) if output_type == "sourcedAnswer": return LinkupSourcedAnswer.model_validate(response_data) if output_type == "structured": - if structured_output_schema is None: - raise ValueError( - "structured_output_schema must be provided when output_type is 'structured'" - ) # HACK: we assume that `include_sources` will default to False, since the API output can # be arbitrary so we can't guess if it includes sources or not if include_sources: - if isinstance(structured_output_schema, type) and issubclass( - structured_output_schema, BaseModel - ): - response_data["data"] = structured_output_schema.model_validate( - response_data["data"] - ) return LinkupSearchStructuredResponse.model_validate(response_data) - if isinstance(structured_output_schema, type) and issubclass( - structured_output_schema, BaseModel - ): - return structured_output_schema.model_validate(response_data) return response_data raise ValueError(f"Unexpected output_type value: '{output_type}'") - def _parse_fetch_response(self, response: httpx.Response) -> LinkupFetchResponse: - return self._parse_fetch_response_data(response.json()) - - def _parse_fetch_response_data(self, response_data: Any) -> LinkupFetchResponse: # noqa: ANN401 + def _parse_fetch_response(self, response_data: dict[str, Any]) -> LinkupFetchResponse: return LinkupFetchResponse.model_validate(response_data) def _parse_research_task(self, task_data: dict[str, Any]) -> LinkupResearchTask: research_input = LinkupResearchTaskInput.model_validate(task_data["input"]) parsed_output = task_data.get("output") + task = LinkupResearchTask.model_validate( + {**task_data, "input": research_input, "output": None} + ) - if parsed_output is not None and research_input.output_type == "sourcedAnswer": + # Following conversion is a bit convoluted but avoids parsing as sourced answer if using a + # structured output schema that matches the sourced answer schema + if parsed_output is None: + return task + if research_input.output_type == "sourcedAnswer": parsed_output = LinkupSourcedAnswer.model_validate(parsed_output) - - return LinkupResearchTask.model_validate( - {**task_data, "input": research_input, "output": parsed_output} - ) + return task.model_copy(update={"output": parsed_output}) def _parse_task(self, task_data: dict[str, Any]) -> LinkupTask: task_type = task_data["type"] @@ -1490,22 +1658,24 @@ def _parse_task(self, task_data: dict[str, Any]) -> LinkupTask: if task_type == "search": search_input = LinkupSearchTaskInput.model_validate(task_data["input"]) parsed_output = task_data.get("output") + task = LinkupSearchTask.model_validate( + {**task_data, "input": search_input, "output": None} + ) + if parsed_output is not None: - parsed_output = self._parse_search_response_data( + parsed_output = self._parse_search_response( response_data=parsed_output, output_type=search_input.output_type, - structured_output_schema=search_input.structured_output_schema, include_sources=search_input.include_sources, ) - return LinkupSearchTask.model_validate( - {**task_data, "input": search_input, "output": parsed_output} - ) + return task.model_copy(update={"output": parsed_output}) + return task if task_type == "fetch": fetch_input = LinkupFetchTaskInput.model_validate(task_data["input"]) parsed_output = task_data.get("output") if parsed_output is not None: - parsed_output = self._parse_fetch_response_data(parsed_output) + parsed_output = self._parse_fetch_response(parsed_output) return LinkupFetchTask.model_validate( {**task_data, "input": fetch_input, "output": parsed_output} ) diff --git a/src/linkup/_types.py b/src/linkup/_types.py index b8a0880..b7cb2da 100644 --- a/src/linkup/_types.py +++ b/src/linkup/_types.py @@ -1,13 +1,18 @@ """Input and output types for Linkup functions.""" +from __future__ import annotations + from datetime import date -from typing import Any, Literal +from typing import Any, Literal, TypeAlias + +import pydantic + -from pydantic import BaseModel, ConfigDict, Field +class _LinkupBaseModel(pydantic.BaseModel): + model_config = pydantic.ConfigDict(populate_by_name=True) -class _LinkupBaseModel(BaseModel): - model_config = ConfigDict(populate_by_name=True) +JSONObject: TypeAlias = dict[str, Any] class LinkupSearchTextResult(_LinkupBaseModel): @@ -84,11 +89,11 @@ class LinkupSearchStructuredResponse(_LinkupBaseModel): """A Linkup `search` structured response, with the sources supporting it. Attributes: - data: The answer data, either as a Pydantic model or an arbitrary JSON structure. + data: The raw structured output dictionary. sources: The sources supporting the answer. """ - data: Any + data: JSONObject sources: list[LinkupSearchTextResult | LinkupSearchImageResult] @@ -114,8 +119,8 @@ class LinkupFetchResponse(_LinkupBaseModel): """ markdown: str - raw_html: str | None = Field(default=None, validation_alias="rawHtml") - images: list[LinkupFetchImageExtraction] | None = Field(default=None) + raw_html: str | None = pydantic.Field(default=None, validation_alias="rawHtml") + images: list[LinkupFetchImageExtraction] | None = pydantic.Field(default=None) class LinkupSearchTaskInput(_LinkupBaseModel): @@ -136,23 +141,27 @@ class LinkupSearchTaskInput(_LinkupBaseModel): structured_output_schema: The structured output schema, if any. """ - query: str = Field(validation_alias="q") + query: str = pydantic.Field(validation_alias="q") depth: Literal["fast", "standard", "deep"] - output_type: Literal["searchResults", "sourcedAnswer", "structured"] = Field( + output_type: Literal["searchResults", "sourcedAnswer", "structured"] = pydantic.Field( validation_alias="outputType" ) - include_images: bool | None = Field(default=None, validation_alias="includeImages") - from_date: date | str | None = Field(default=None, validation_alias="fromDate") - to_date: date | str | None = Field(default=None, validation_alias="toDate") - exclude_domains: list[str] | None = Field(default=None, validation_alias="excludeDomains") - include_domains: list[str] | None = Field(default=None, validation_alias="includeDomains") - max_results: int | None = Field(default=None, validation_alias="maxResults") - include_inline_citations: bool | None = Field( + include_images: bool | None = pydantic.Field(default=None, validation_alias="includeImages") + from_date: date | str | None = pydantic.Field(default=None, validation_alias="fromDate") + to_date: date | str | None = pydantic.Field(default=None, validation_alias="toDate") + exclude_domains: list[str] | None = pydantic.Field( + default=None, validation_alias="excludeDomains" + ) + include_domains: list[str] | None = pydantic.Field( + default=None, validation_alias="includeDomains" + ) + max_results: int | None = pydantic.Field(default=None, validation_alias="maxResults") + include_inline_citations: bool | None = pydantic.Field( default=None, validation_alias="includeInlineCitations" ) - include_sources: bool | None = Field(default=None, validation_alias="includeSources") - structured_output_schema: type[BaseModel] | str | dict[str, Any] | None = Field( - default=None, validation_alias="structuredOutputSchema" + include_sources: bool | None = pydantic.Field(default=None, validation_alias="includeSources") + structured_output_schema: type[pydantic.BaseModel] | str | dict[str, Any] | None = ( + pydantic.Field(default=None, validation_alias="structuredOutputSchema") ) @@ -171,18 +180,24 @@ class LinkupResearchTaskInput(_LinkupBaseModel): structured_output_schema: The structured output schema, if any. """ - query: str = Field(validation_alias="q") - output_type: Literal["sourcedAnswer", "structured"] = Field(validation_alias="outputType") + query: str = pydantic.Field(validation_alias="q") + output_type: Literal["sourcedAnswer", "structured"] = pydantic.Field( + validation_alias="outputType" + ) mode: Literal["answer", "auto", "investigate", "research"] | None = None - reasoning_depth: Literal["S", "M", "L", "XL"] | None = Field( + reasoning_depth: Literal["S", "M", "L", "XL"] | None = pydantic.Field( default=None, validation_alias="reasoningDepth" ) - from_date: date | str | None = Field(default=None, validation_alias="fromDate") - to_date: date | str | None = Field(default=None, validation_alias="toDate") - exclude_domains: list[str] | None = Field(default=None, validation_alias="excludeDomains") - include_domains: list[str] | None = Field(default=None, validation_alias="includeDomains") - structured_output_schema: type[BaseModel] | str | dict[str, Any] | None = Field( - default=None, validation_alias="structuredOutputSchema" + from_date: date | str | None = pydantic.Field(default=None, validation_alias="fromDate") + to_date: date | str | None = pydantic.Field(default=None, validation_alias="toDate") + exclude_domains: list[str] | None = pydantic.Field( + default=None, validation_alias="excludeDomains" + ) + include_domains: list[str] | None = pydantic.Field( + default=None, validation_alias="includeDomains" + ) + structured_output_schema: type[pydantic.BaseModel] | str | dict[str, Any] | None = ( + pydantic.Field(default=None, validation_alias="structuredOutputSchema") ) @@ -197,9 +212,9 @@ class LinkupFetchTaskInput(_LinkupBaseModel): """ url: str - include_raw_html: bool | None = Field(default=None, validation_alias="includeRawHtml") - render_js: bool | None = Field(default=None, validation_alias="renderJs") - extract_images: bool | None = Field(default=None, validation_alias="extractImages") + include_raw_html: bool | None = pydantic.Field(default=None, validation_alias="includeRawHtml") + render_js: bool | None = pydantic.Field(default=None, validation_alias="renderJs") + extract_images: bool | None = pydantic.Field(default=None, validation_alias="extractImages") LinkupTaskInput = LinkupSearchTaskInput | LinkupFetchTaskInput | LinkupResearchTaskInput @@ -216,9 +231,9 @@ class LinkupTaskMetadata(_LinkupBaseModel): """ page: int - page_size: int = Field(validation_alias="pageSize") + page_size: int = pydantic.Field(validation_alias="pageSize") total: int - total_pages: int = Field(validation_alias="totalPages") + total_pages: int = pydantic.Field(validation_alias="totalPages") class LinkupTaskQuota(_LinkupBaseModel): @@ -229,7 +244,7 @@ class LinkupTaskQuota(_LinkupBaseModel): limit: The maximum number of in-flight tasks allowed. """ - in_flight: int = Field(validation_alias="inFlight") + in_flight: int = pydantic.Field(validation_alias="inFlight") limit: int @@ -241,20 +256,27 @@ class LinkupSearchTask(_LinkupBaseModel): error: The task error message, if the task failed. id: The task identifier. input: The normalized search input for this task. - output: The parsed search output, if available. + output: The parsed search results, sourced answer, sourced structured response, or raw + structured output dictionary, if available. status: The current task status. type: The task type, in this case "search". updated_at: The last task update timestamp. """ - created_at: str = Field(validation_alias="createdAt") + created_at: str = pydantic.Field(validation_alias="createdAt") error: str | None = None id: str input: LinkupSearchTaskInput - output: Any = None + output: ( + LinkupSearchResults + | LinkupSourcedAnswer + | LinkupSearchStructuredResponse + | JSONObject + | None + ) = None status: Literal["pending", "processing", "completed", "failed"] type: Literal["search"] - updated_at: str = Field(validation_alias="updatedAt") + updated_at: str = pydantic.Field(validation_alias="updatedAt") class LinkupFetchTask(_LinkupBaseModel): @@ -271,14 +293,14 @@ class LinkupFetchTask(_LinkupBaseModel): updated_at: The last task update timestamp. """ - created_at: str = Field(validation_alias="createdAt") + created_at: str = pydantic.Field(validation_alias="createdAt") error: str | None = None id: str input: LinkupFetchTaskInput output: LinkupFetchResponse | None = None status: Literal["pending", "processing", "completed", "failed"] type: Literal["fetch"] - updated_at: str = Field(validation_alias="updatedAt") + updated_at: str = pydantic.Field(validation_alias="updatedAt") class LinkupResearchTask(_LinkupBaseModel): @@ -289,20 +311,20 @@ class LinkupResearchTask(_LinkupBaseModel): error: The task error message, if the task failed. id: The task identifier. input: The normalized research input for this task. - output: The parsed research output, if available. + output: The parsed sourced answer, or raw structured output dictionary, if available. status: The current task status. type: The task type, in this case "research". updated_at: The last task update timestamp. """ - created_at: str = Field(validation_alias="createdAt") + created_at: str = pydantic.Field(validation_alias="createdAt") error: str | None = None id: str input: LinkupResearchTaskInput - output: Any = None + output: LinkupSourcedAnswer | JSONObject | None = None status: Literal["pending", "processing", "completed", "failed"] type: Literal["research"] - updated_at: str = Field(validation_alias="updatedAt") + updated_at: str = pydantic.Field(validation_alias="updatedAt") LinkupTask = LinkupSearchTask | LinkupFetchTask | LinkupResearchTask diff --git a/tests/unit/client_test.py b/tests/unit/client_test.py index bd782bd..bb438b0 100644 --- a/tests/unit/client_test.py +++ b/tests/unit/client_test.py @@ -1,18 +1,18 @@ import json from datetime import date -from typing import Any +from typing import Any, cast from unittest.mock import AsyncMock, MagicMock import httpx +import pydantic import pytest from httpx import Response -from pydantic import BaseModel from pytest_mock import MockerFixture import linkup -class Company(BaseModel): +class Company(pydantic.BaseModel): name: str creation_date: str website_url: str @@ -192,12 +192,12 @@ class Company(BaseModel): "website_url": "https://www.linkup.so/" } """, - Company( - name="Linkup", - founders_names=["Philippe Mizrahi", "Denis Charrier", "Boris Toledano"], - creation_date="2024", - website_url="https://www.linkup.so/", - ), + { + "name": "Linkup", + "founders_names": ["Philippe Mizrahi", "Denis Charrier", "Boris Toledano"], + "creation_date": "2024", + "website_url": "https://www.linkup.so/", + }, ), ( { @@ -291,12 +291,12 @@ class Company(BaseModel): } """, linkup.SearchStructuredResponse( - data=Company( - name="Linkup", - founders_names=["Philippe Mizrahi", "Denis Charrier", "Boris Toledano"], - creation_date="2024", - website_url="https://www.linkup.so/", - ), + data={ + "name": "Linkup", + "founders_names": ["Philippe Mizrahi", "Denis Charrier", "Boris Toledano"], + "creation_date": "2024", + "website_url": "https://www.linkup.so/", + }, sources=[ linkup.SearchTextResult( type="text", @@ -338,7 +338,7 @@ def test_search( ), ) - search_response: Any = client.search(**search_kwargs) + search_response = cast("Any", client.search(**search_kwargs)) expected_timeout = search_kwargs.get("timeout", None) request_mock.assert_called_once_with( method="POST", @@ -349,6 +349,55 @@ def test_search( assert search_response == expected_search_response +def test_search_structured_output_model_dump_preserves_data( + mocker: MockerFixture, client: linkup.Client +) -> None: + mocker.patch( + "httpx.Client.request", + return_value=Response( + status_code=200, + content=b""" + { + "data": { + "name": "Linkup", + "founders_names": ["Philippe Mizrahi", "Denis Charrier", "Boris Toledano"], + "creation_date": "2024", + "website_url": "https://www.linkup.so/" + }, + "sources": [] + } + """, + ), + ) + + search_response = client.search( + query="query", + depth="standard", + output_type="structured", + structured_output_schema=Company, + include_sources=True, + ) + + assert search_response.model_dump()["data"] == { + "name": "Linkup", + "founders_names": ["Philippe Mizrahi", "Denis Charrier", "Boris Toledano"], + "creation_date": "2024", + "website_url": "https://www.linkup.so/", + } + + +def test_search_structured_output_requires_schema(client: linkup.Client) -> None: + with pytest.raises( + TypeError, + match="structured_output_schema must be provided", + ): + client.search( + query="query", + depth="standard", + output_type="structured", + ) + + @pytest.mark.asyncio @pytest.mark.parametrize( ( @@ -376,7 +425,7 @@ async def test_async_search( ), ) - search_response: Any = await client.async_search(**search_kwargs) + search_response = cast("Any", await client.async_search(**search_kwargs)) expected_timeout = search_kwargs.get("timeout", None) request_mock.assert_called_once_with( method="POST", @@ -387,6 +436,19 @@ async def test_async_search( assert search_response == expected_search_response +@pytest.mark.asyncio +async def test_async_search_structured_output_requires_schema(client: linkup.Client) -> None: + with pytest.raises( + TypeError, + match="structured_output_schema must be provided", + ): + await client.async_search( + query="query", + depth="standard", + output_type="structured", + ) + + test_search_error_parameters = [ ( 402, @@ -656,6 +718,42 @@ def test_research(mocker: MockerFixture, client: linkup.Client) -> None: ) +def test_get_research_structured_output_keeps_sourced_answer_shape_raw( + mocker: MockerFixture, client: linkup.Client +) -> None: + mocker.patch( + "httpx.Client.request", + return_value=Response( + status_code=200, + content=b""" + { + "createdAt": "2026-05-18T00:00:00.000Z", + "error": null, + "id": "bfeb26f5-f4d6-47d2-9818-7f62fbcd0b0c", + "input": { + "outputType": "structured", + "q": "query" + }, + "output": { + "answer": "structured answer field", + "sources": [] + }, + "status": "completed", + "type": "research", + "updatedAt": "2026-05-18T00:00:00.000Z" + } + """, + ), + ) + + research_response = client.get_research("bfeb26f5-f4d6-47d2-9818-7f62fbcd0b0c") + + assert research_response.output == { + "answer": "structured answer field", + "sources": [], + } + + @pytest.mark.asyncio async def test_async_research(mocker: MockerFixture, client: linkup.Client) -> None: request_mock = mocker.patch( @@ -1367,6 +1465,78 @@ def test_list_tasks(mocker: MockerFixture, client: linkup.Client) -> None: assert tasks_page.quota.in_flight == 1 +def test_get_task_structured_search_output_keeps_search_results_shape_raw( + mocker: MockerFixture, client: linkup.Client +) -> None: + mocker.patch( + "httpx.Client.request", + return_value=Response( + status_code=200, + content=b""" + { + "createdAt": "2026-05-18T00:00:00.000Z", + "error": null, + "id": "bfeb26f5-f4d6-47d2-9818-7f62fbcd0b0c", + "input": { + "depth": "standard", + "outputType": "structured", + "q": "query", + "structuredOutputSchema": { + "type": "object" + } + }, + "output": { + "results": [] + }, + "status": "completed", + "type": "search", + "updatedAt": "2026-05-18T00:00:00.000Z" + } + """, + ), + ) + + task = client.get_task("bfeb26f5-f4d6-47d2-9818-7f62fbcd0b0c") + + assert isinstance(task, linkup.SearchTask) + assert task.output == {"results": []} + + +def test_get_task_structured_search_output_without_schema_raw( + mocker: MockerFixture, client: linkup.Client +) -> None: + mocker.patch( + "httpx.Client.request", + return_value=Response( + status_code=200, + content=b""" + { + "createdAt": "2026-05-18T00:00:00.000Z", + "error": null, + "id": "bfeb26f5-f4d6-47d2-9818-7f62fbcd0b0c", + "input": { + "depth": "standard", + "outputType": "structured", + "q": "query" + }, + "output": { + "summary": "done" + }, + "status": "completed", + "type": "search", + "updatedAt": "2026-05-18T00:00:00.000Z" + } + """, + ), + ) + + task = client.get_task("bfeb26f5-f4d6-47d2-9818-7f62fbcd0b0c") + + assert isinstance(task, linkup.SearchTask) + assert task.input.structured_output_schema is None + assert task.output == {"summary": "done"} + + def test_list_tasks_with_multiple_filters(mocker: MockerFixture, client: linkup.Client) -> None: request_mock = mocker.patch( "httpx.Client.request",