From 17ba3532ae0a394901c5577f6a877b6199510ed5 Mon Sep 17 00:00:00 2001 From: cjumel Date: Fri, 29 May 2026 10:57:59 +0200 Subject: [PATCH] fix!: always return structured outputs as dicts This commit removes the output conversion to a `pydantic.BaseModel` when one is passed as structured output schema. This is done for consistency with the task and research methods (where the same conversion cannot be implemented simply). This will also simplify the code and method signatures. With the simplification of the type hints, this was also an opportunity to rework and improve the type hinting of the client's main methods, especially to enforce accurate type hinting per output type and include sources parameter. Doing so required enforcing a limit in the number of positional arguments, which I set quite aggressively to prefer a long term solution. I also enforced the [google style guide import recommendation](https://google.github.io/styleguide/pyguide.html#22-imports), that is to only import modules, for pydantic imports in the files I changed. --- AGENTS.md | 26 +++ README.md | 12 +- src/linkup/__init__.py | 2 + src/linkup/_client.py | 340 ++++++++++++++++++++++++++++---------- src/linkup/_types.py | 112 ++++++++----- tests/unit/client_test.py | 204 +++++++++++++++++++++-- 6 files changed, 541 insertions(+), 155 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 48adb63..e3d4271 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -54,3 +54,29 @@ Add durable exceptions here when a proposed sync should not be repeated. - Do not implement `/responses` in this SDK unless explicitly requested. - If a capability was intentionally rejected for product/design reasons, do not propose it again until this file is updated. + +## Coding practices + +Besides the practices enforced through the CI checks (especially with ruff), we try to enforce the +following coding practices: + +- Mark non-public modules, classes, methods, functions, and attributes private with a leading + underscore when they are not part of the SDK public API. +- Prefer [Google style guide imports](https://google.github.io/styleguide/pyguide.html#22-imports) + for new code: import modules instead of importing symbols, except for `typing` symbols, public + re-exports, and cases where the existing file convention makes module imports awkward. +- Treat anything exported from `linkup.__init__` or listed in `__all__` as public API; add exports + only intentionally. +- Keep API wire-format names at the request/response boundary only. Internal and public Python APIs + should stay snake_case, and camelCase/API aliases should live in request serialization or Pydantic + aliases. +- Keep sync and async methods structurally aligned: same parameters, same validation behavior, same + returned models, and same documented errors. +- Store secrets as `pydantic.SecretStr` or an equivalent secret container once they enter client + state; do not store raw API keys on instances. +- Prefer explicit SDK error classes over leaking transport or dependency exceptions from public + methods, except where deliberately documented. +- Add `# noqa`, `# type: ignore`, and `# pyright: ignore` only with a short reason. +- Avoid adding runtime dependencies for small conveniences; keep SDK dependencies minimal. +- Write tests through the public `linkup` API unless the test is specifically covering private or + internal behavior. diff --git a/README.md b/README.md index 022c3a0..c74e57d 100644 --- a/README.md +++ b/README.md @@ -94,21 +94,19 @@ The `search` function also supports three output types, through the `output_type - with `"searchResults"`, the search will return a list of relevant documents - with `"sourcedAnswer"`, the search will return a concise answer with sources -- with `"structured"`, the search will return a structured output according to a user-defined schema +- with `"structured"`, the search will return a structured output according to a user-defined object + schema ```python -from typing import Any - import linkup client = linkup.Client() # API key can be read from the environment variable or passed as an argument -search_response: Any = client.search( +search_response: linkup.SourcedAnswer = client.search( query="What are the 3 major events in the life of Abraham Lincoln?", depth="deep", # "fast" (beta), "standard", or "deep" output_type="sourcedAnswer", # "searchResults" or "sourcedAnswer" or "structured" structured_output_schema=None, # must be filled if output_type is "structured" ) -assert isinstance(search_response, linkup.SourcedAnswer) print(search_response.model_dump()) ``` @@ -215,19 +213,17 @@ This makes possible to call the Linkup API several times concurrently for instan ```python import asyncio -from typing import Any import linkup async def main() -> None: client = linkup.Client() # API key can be read from the environment variable or passed as an argument - search_response: Any = await client.async_search( + search_response: linkup.SourcedAnswer = await client.async_search( query="What are the 3 major events in the life of Abraham Lincoln?", depth="deep", # "fast" (beta), "standard", or "deep" output_type="sourcedAnswer", # "searchResults" or "sourcedAnswer" or "structured" structured_output_schema=None, # must be filled if output_type is "structured" ) - assert isinstance(search_response, linkup.SourcedAnswer) print(search_response.model_dump()) asyncio.run(main()) diff --git a/src/linkup/__init__.py b/src/linkup/__init__.py index 2b0d902..b47680f 100644 --- a/src/linkup/__init__.py +++ b/src/linkup/__init__.py @@ -17,6 +17,7 @@ LinkupUnknownError, ) from ._types import ( + JSONObject, LinkupFetchImageExtraction, LinkupFetchResponse, LinkupFetchTask, @@ -92,6 +93,7 @@ "FetchUrlIsFileError", "InsufficientCreditError", "InvalidRequestError", + "JSONObject", "LinkupAuthenticationError", "LinkupBudgetLimitExceededError", "LinkupClient", diff --git a/src/linkup/_client.py b/src/linkup/_client.py index 15d5152..d95616d 100644 --- a/src/linkup/_client.py +++ b/src/linkup/_client.py @@ -5,10 +5,10 @@ import json import os from datetime import date # noqa: TC003 (`date` is used in test mocks) -from typing import TYPE_CHECKING, Any, Literal, cast +from typing import TYPE_CHECKING, Any, Literal, cast, overload import httpx -from pydantic import BaseModel, SecretStr +import pydantic from ._errors import ( LinkupAuthenticationError, @@ -28,6 +28,7 @@ LinkupUnknownError, ) from ._types import ( + JSONObject, LinkupFetchResponse, LinkupFetchTask, LinkupFetchTaskInput, @@ -73,7 +74,7 @@ class LinkupClient: def __init__( self, - api_key: str | SecretStr | None = None, + api_key: str | pydantic.SecretStr | None = None, base_url: str = "https://api.linkup.so/v1", x402_signer: LinkupX402Signer | None = None, auth_header: str | None = None, @@ -84,25 +85,123 @@ def __init__( self._x402_signer: LinkupX402Signer | None = x402_signer if x402_signer is not None: - self._api_key: SecretStr | None = None + self._api_key: pydantic.SecretStr | None = None else: if api_key is None: api_key = os.getenv("LINKUP_API_KEY") if not api_key: raise ValueError("The Linkup API key was not provided") if isinstance(api_key, str): - api_key = SecretStr(api_key) + api_key = pydantic.SecretStr(api_key) self._api_key = api_key self._base_url: str = base_url self._auth_header: str | None = auth_header + @overload def search( self, query: str, + *, + depth: Literal["fast", "standard", "deep"], + output_type: Literal["searchResults"], + structured_output_schema: None = None, + include_images: bool | None = None, + from_date: date | str | None = None, + to_date: date | str | None = None, + exclude_domains: list[str] | None = None, + include_domains: list[str] | None = None, + max_results: int | None = None, + include_inline_citations: bool | None = None, + include_sources: bool | None = None, + timeout: float | None = None, + ) -> LinkupSearchResults: ... + + @overload + def search( + self, + query: str, + *, + depth: Literal["fast", "standard", "deep"], + output_type: Literal["sourcedAnswer"], + structured_output_schema: None = None, + include_images: bool | None = None, + from_date: date | str | None = None, + to_date: date | str | None = None, + exclude_domains: list[str] | None = None, + include_domains: list[str] | None = None, + max_results: int | None = None, + include_inline_citations: bool | None = None, + include_sources: bool | None = None, + timeout: float | None = None, + ) -> LinkupSourcedAnswer: ... + + @overload + def search( + self, + query: str, + *, + depth: Literal["fast", "standard", "deep"], + output_type: Literal["structured"], + structured_output_schema: type[pydantic.BaseModel] | dict[str, Any] | str, + include_images: bool | None = None, + from_date: date | str | None = None, + to_date: date | str | None = None, + exclude_domains: list[str] | None = None, + include_domains: list[str] | None = None, + max_results: int | None = None, + include_inline_citations: bool | None = None, + include_sources: Literal[False] | None = None, + timeout: float | None = None, + ) -> JSONObject: ... + + @overload + def search( + self, + query: str, + *, + depth: Literal["fast", "standard", "deep"], + output_type: Literal["structured"], + structured_output_schema: type[pydantic.BaseModel] | dict[str, Any] | str, + include_images: bool | None = None, + from_date: date | str | None = None, + to_date: date | str | None = None, + exclude_domains: list[str] | None = None, + include_domains: list[str] | None = None, + max_results: int | None = None, + include_inline_citations: bool | None = None, + include_sources: Literal[True], + timeout: float | None = None, + ) -> LinkupSearchStructuredResponse: ... + + @overload + def search( + self, + query: str, + *, + depth: Literal["fast", "standard", "deep"], + output_type: Literal["searchResults", "sourcedAnswer", "structured"], + structured_output_schema: type[pydantic.BaseModel] | dict[str, Any] | str | None = None, + include_images: bool | None = None, + from_date: date | str | None = None, + to_date: date | str | None = None, + exclude_domains: list[str] | None = None, + include_domains: list[str] | None = None, + max_results: int | None = None, + include_inline_citations: bool | None = None, + include_sources: bool | None = None, + timeout: float | None = None, + ) -> ( + LinkupSearchResults | LinkupSourcedAnswer | JSONObject | LinkupSearchStructuredResponse + ): ... + + def search( + self, + query: str, + *, depth: Literal["fast", "standard", "deep"], output_type: Literal["searchResults", "sourcedAnswer", "structured"], - structured_output_schema: type[BaseModel] | dict[str, Any] | str | None = None, + structured_output_schema: type[pydantic.BaseModel] | dict[str, Any] | str | None = None, include_images: bool | None = None, from_date: date | str | None = None, to_date: date | str | None = None, @@ -112,7 +211,7 @@ def search( include_inline_citations: bool | None = None, include_sources: bool | None = None, timeout: float | None = None, - ) -> Any: # noqa: ANN401 + ) -> LinkupSearchResults | LinkupSourcedAnswer | JSONObject | LinkupSearchStructuredResponse: """Perform a web search using the Linkup API `search` endpoint. All optional parameters will default to the Linkup API defaults when not provided. The @@ -130,8 +229,8 @@ def search( supporting it, and "structured" will base the output on the format provided in structured_output_schema. structured_output_schema: If output_type is "structured", specify the schema of the - output. Supported formats are a pydantic.BaseModel, a Python dictionary containing a - valid object JSON schema, or a string representing a valid object JSON schema. + output. Supported formats are a `pydantic.BaseModel`, a Python dictionary containing + a valid object JSON schema, or a string representing a valid object JSON schema. include_images: Indicate whether images should be included during the search. from_date: The date from which the search results should be considered. Accepts a `datetime.date`, `YYYY-MM-DD`, or full ISO datetime string. If None, the search @@ -153,10 +252,8 @@ def search( The Linkup API search result, which can have different types based on the parameters: - LinkupSearchResults if output_type is "searchResults" - LinkupSourcedAnswer if output_type is "sourcedAnswer" - - the provided pydantic.BaseModel or an arbitrary data structure if output_type is - "structured" and include_sources is False - - LinkupSearchStructuredResponse with the provided pydantic.BaseModel or an arbitrary - data structure as data field, if output_type is "structured" and include_sources is + - a raw dictionary if output_type is "structured" and include_sources is False + - LinkupSearchStructuredResponse if output_type is "structured" and include_sources is True Raises: @@ -193,18 +290,115 @@ def search( ) return self._parse_search_response( - response=response, + response_data=response.json(), output_type=output_type, - structured_output_schema=structured_output_schema, include_sources=include_sources, ) + @overload async def async_search( self, query: str, + *, + depth: Literal["fast", "standard", "deep"], + output_type: Literal["searchResults"], + structured_output_schema: None = None, + include_images: bool | None = None, + from_date: date | str | None = None, + to_date: date | str | None = None, + exclude_domains: list[str] | None = None, + include_domains: list[str] | None = None, + max_results: int | None = None, + include_inline_citations: bool | None = None, + include_sources: bool | None = None, + timeout: float | None = None, + ) -> LinkupSearchResults: ... + + @overload + async def async_search( + self, + query: str, + *, + depth: Literal["fast", "standard", "deep"], + output_type: Literal["sourcedAnswer"], + structured_output_schema: None = None, + include_images: bool | None = None, + from_date: date | str | None = None, + to_date: date | str | None = None, + exclude_domains: list[str] | None = None, + include_domains: list[str] | None = None, + max_results: int | None = None, + include_inline_citations: bool | None = None, + include_sources: bool | None = None, + timeout: float | None = None, + ) -> LinkupSourcedAnswer: ... + + @overload + async def async_search( + self, + query: str, + *, + depth: Literal["fast", "standard", "deep"], + output_type: Literal["structured"], + structured_output_schema: type[pydantic.BaseModel] | dict[str, Any] | str, + include_images: bool | None = None, + from_date: date | str | None = None, + to_date: date | str | None = None, + exclude_domains: list[str] | None = None, + include_domains: list[str] | None = None, + max_results: int | None = None, + include_inline_citations: bool | None = None, + include_sources: Literal[False] | None = None, + timeout: float | None = None, + ) -> JSONObject: ... + + @overload + async def async_search( + self, + query: str, + *, + depth: Literal["fast", "standard", "deep"], + output_type: Literal["structured"], + structured_output_schema: type[pydantic.BaseModel] | dict[str, Any] | str, + include_images: bool | None = None, + from_date: date | str | None = None, + to_date: date | str | None = None, + exclude_domains: list[str] | None = None, + include_domains: list[str] | None = None, + max_results: int | None = None, + include_inline_citations: bool | None = None, + include_sources: Literal[True], + timeout: float | None = None, + ) -> LinkupSearchStructuredResponse: ... + + @overload + async def async_search( + self, + query: str, + *, + depth: Literal["fast", "standard", "deep"], + output_type: Literal["searchResults", "sourcedAnswer", "structured"], + structured_output_schema: type[pydantic.BaseModel] | dict[str, Any] | str | None = None, + include_images: bool | None = None, + from_date: date | str | None = None, + to_date: date | str | None = None, + exclude_domains: list[str] | None = None, + include_domains: list[str] | None = None, + max_results: int | None = None, + include_inline_citations: bool | None = None, + include_sources: bool | None = None, + timeout: float | None = None, + ) -> ( + LinkupSearchResults | LinkupSourcedAnswer | JSONObject | LinkupSearchStructuredResponse + ): ... + + async def async_search( + self, + query: str, + *, depth: Literal["fast", "standard", "deep"], output_type: Literal["searchResults", "sourcedAnswer", "structured"], - structured_output_schema: type[BaseModel] | dict[str, Any] | str | None = None, + structured_output_schema: type[pydantic.BaseModel] | dict[str, Any] | str | None = None, include_images: bool | None = None, from_date: date | str | None = None, to_date: date | str | None = None, @@ -214,7 +408,7 @@ async def async_search( include_inline_citations: bool | None = None, include_sources: bool | None = None, timeout: float | None = None, - ) -> Any: # noqa: ANN401 + ) -> LinkupSearchResults | LinkupSourcedAnswer | JSONObject | LinkupSearchStructuredResponse: """Asynchronously perform a web search using the Linkup API `search` endpoint. All optional parameters will default to the Linkup API defaults when not provided. The @@ -232,8 +426,8 @@ async def async_search( supporting it, and "structured" will base the output on the format provided in structured_output_schema. structured_output_schema: If output_type is "structured", specify the schema of the - output. Supported formats are a pydantic.BaseModel, a Python dictionary containing a - valid object JSON schema, or a string representing a valid object JSON schema. + output. Supported formats are a `pydantic.BaseModel`, a Python dictionary containing + a valid object JSON schema, or a string representing a valid object JSON schema. include_images: Indicate whether images should be included during the search. from_date: The date from which the search results should be considered. Accepts a `datetime.date`, `YYYY-MM-DD`, or full ISO datetime string. If None, the search @@ -255,10 +449,8 @@ async def async_search( The Linkup API search result, which can have different types based on the parameters: - LinkupSearchResults if output_type is "searchResults" - LinkupSourcedAnswer if output_type is "sourcedAnswer" - - the provided pydantic.BaseModel or an arbitrary data structure if output_type is - "structured" and include_sources is False - - LinkupSearchStructuredResponse with the provided pydantic.BaseModel or an arbitrary - data structure as data field, if output_type is "structured" and include_sources is + - a raw dictionary if output_type is "structured" and include_sources is False + - LinkupSearchStructuredResponse if output_type is "structured" and include_sources is True Raises: @@ -295,9 +487,8 @@ async def async_search( ) return self._parse_search_response( - response=response, + response_data=response.json(), output_type=output_type, - structured_output_schema=structured_output_schema, include_sources=include_sources, ) @@ -307,7 +498,7 @@ def research( output_type: Literal["sourcedAnswer", "structured"], reasoning_depth: Literal["S", "M", "L", "XL"] | None = None, mode: Literal["answer", "auto", "investigate", "research"] | None = None, - structured_output_schema: type[BaseModel] | dict[str, Any] | str | None = None, + structured_output_schema: type[pydantic.BaseModel] | dict[str, Any] | str | None = None, from_date: date | str | None = None, to_date: date | str | None = None, exclude_domains: list[str] | None = None, @@ -327,7 +518,7 @@ def research( is used. mode: The research mode to use. If None, the Linkup API default is used. structured_output_schema: If output_type is "structured", specify the output schema. - Supported formats are a pydantic.BaseModel, a Python dictionary containing a valid + Supported formats are a `pydantic.BaseModel`, a Python dictionary containing a valid object JSON schema, or a string representing a valid object JSON schema. from_date: The date from which the research sources should be considered. Accepts a `datetime.date`, `YYYY-MM-DD`, or full ISO datetime string. If None, sources will @@ -341,7 +532,7 @@ def research( no timeout. Returns: - The created research task. + The newly created research task, with "pending" status and no output. Raises: TypeError: If structured_output_schema is not a string, dictionary, or @@ -378,7 +569,7 @@ async def async_research( output_type: Literal["sourcedAnswer", "structured"], reasoning_depth: Literal["S", "M", "L", "XL"] | None = None, mode: Literal["answer", "auto", "investigate", "research"] | None = None, - structured_output_schema: type[BaseModel] | dict[str, Any] | str | None = None, + structured_output_schema: type[pydantic.BaseModel] | dict[str, Any] | str | None = None, from_date: date | str | None = None, to_date: date | str | None = None, exclude_domains: list[str] | None = None, @@ -398,7 +589,7 @@ async def async_research( is used. mode: The research mode to use. If None, the Linkup API default is used. structured_output_schema: If output_type is "structured", specify the output schema. - Supported formats are a pydantic.BaseModel, a Python dictionary containing a valid + Supported formats are a `pydantic.BaseModel`, a Python dictionary containing a valid object JSON schema, or a string representing a valid object JSON schema. from_date: The date from which the research sources should be considered. Accepts a `datetime.date`, `YYYY-MM-DD`, or full ISO datetime string. If None, sources will @@ -412,7 +603,7 @@ async def async_research( no timeout. Returns: - The created research task. + The newly created research task, with "pending" status and no output. Raises: TypeError: If structured_output_schema is not a string, dictionary, or @@ -589,7 +780,7 @@ def create_tasks( no timeout. Returns: - The created tasks, parsed according to each task type. + The newly created tasks with "pending" status and no output. Raises: TypeError: If a task has an unsupported model type, or if a structured output schema has @@ -622,7 +813,7 @@ async def async_create_tasks( no timeout. Returns: - The created tasks, parsed according to each task type. + The newly created tasks with "pending" status and no output. Raises: TypeError: If a task has an unsupported model type, or if a structured output schema has @@ -845,7 +1036,7 @@ def fetch( timeout=timeout, ) - return self._parse_fetch_response(response=response) + return self._parse_fetch_response(response_data=response.json()) async def async_fetch( self, @@ -895,7 +1086,7 @@ async def async_fetch( timeout=timeout, ) - return self._parse_fetch_response(response=response) + return self._parse_fetch_response(response_data=response.json()) def _user_agent(self) -> str: # pragma: no cover return f"Linkup-Python/{self.__version__}" @@ -1204,7 +1395,7 @@ def _get_search_params( query: str, depth: Literal["fast", "standard", "deep"], output_type: Literal["searchResults", "sourcedAnswer", "structured"], - structured_output_schema: type[BaseModel] | str | dict[str, Any] | None, + structured_output_schema: type[pydantic.BaseModel] | str | dict[str, Any] | None, include_images: bool | None, from_date: date | str | None, to_date: date | str | None, @@ -1214,6 +1405,11 @@ def _get_search_params( include_inline_citations: bool | None, include_sources: bool | None, ) -> dict[str, str | bool | int | list[str]]: + if output_type == "structured" and structured_output_schema is None: + raise TypeError( + "structured_output_schema must be provided when output_type is 'structured'" + ) + params: dict[str, str | bool | int | list[str]] = { "q": query, "depth": depth, @@ -1225,7 +1421,7 @@ def _get_search_params( params["structuredOutputSchema"] = structured_output_schema elif isinstance(structured_output_schema, dict): params["structuredOutputSchema"] = json.dumps(structured_output_schema) - elif issubclass(structured_output_schema, BaseModel): + elif issubclass(structured_output_schema, pydantic.BaseModel): json_schema: dict[str, Any] = structured_output_schema.model_json_schema() params["structuredOutputSchema"] = json.dumps(json_schema) else: @@ -1257,7 +1453,7 @@ def _get_research_params( output_type: Literal["sourcedAnswer", "structured"], reasoning_depth: Literal["S", "M", "L", "XL"] | None, mode: Literal["answer", "auto", "investigate", "research"] | None, - structured_output_schema: type[BaseModel] | str | dict[str, Any] | None, + structured_output_schema: type[pydantic.BaseModel] | str | dict[str, Any] | None, from_date: date | str | None, to_date: date | str | None, exclude_domains: list[str] | None, @@ -1278,7 +1474,7 @@ def _get_research_params( params["structuredOutputSchema"] = structured_output_schema elif isinstance(structured_output_schema, dict): params["structuredOutputSchema"] = json.dumps(structured_output_schema) - elif issubclass(structured_output_schema, BaseModel): + elif issubclass(structured_output_schema, pydantic.BaseModel): json_schema: dict[str, Any] = structured_output_schema.model_json_schema() params["structuredOutputSchema"] = json.dumps(json_schema) else: @@ -1422,67 +1618,39 @@ def _get_fetch_params( def _parse_search_response( self, - response: httpx.Response, + response_data: dict[str, Any], output_type: Literal["searchResults", "sourcedAnswer", "structured"], - structured_output_schema: type[BaseModel] | dict[str, Any] | str | None, include_sources: bool | None, - ) -> Any: # noqa: ANN401 - return self._parse_search_response_data( - response_data=response.json(), - output_type=output_type, - structured_output_schema=structured_output_schema, - include_sources=include_sources, - ) - - def _parse_search_response_data( - self, - response_data: Any, # noqa: ANN401 - output_type: Literal["searchResults", "sourcedAnswer", "structured"], - structured_output_schema: type[BaseModel] | dict[str, Any] | str | None, - include_sources: bool | None, - ) -> Any: # noqa: ANN401 + ) -> LinkupSearchResults | LinkupSourcedAnswer | JSONObject | LinkupSearchStructuredResponse: if output_type == "searchResults": return LinkupSearchResults.model_validate(response_data) if output_type == "sourcedAnswer": return LinkupSourcedAnswer.model_validate(response_data) if output_type == "structured": - if structured_output_schema is None: - raise ValueError( - "structured_output_schema must be provided when output_type is 'structured'" - ) # HACK: we assume that `include_sources` will default to False, since the API output can # be arbitrary so we can't guess if it includes sources or not if include_sources: - if isinstance(structured_output_schema, type) and issubclass( - structured_output_schema, BaseModel - ): - response_data["data"] = structured_output_schema.model_validate( - response_data["data"] - ) return LinkupSearchStructuredResponse.model_validate(response_data) - if isinstance(structured_output_schema, type) and issubclass( - structured_output_schema, BaseModel - ): - return structured_output_schema.model_validate(response_data) return response_data raise ValueError(f"Unexpected output_type value: '{output_type}'") - def _parse_fetch_response(self, response: httpx.Response) -> LinkupFetchResponse: - return self._parse_fetch_response_data(response.json()) - - def _parse_fetch_response_data(self, response_data: Any) -> LinkupFetchResponse: # noqa: ANN401 + def _parse_fetch_response(self, response_data: dict[str, Any]) -> LinkupFetchResponse: return LinkupFetchResponse.model_validate(response_data) def _parse_research_task(self, task_data: dict[str, Any]) -> LinkupResearchTask: research_input = LinkupResearchTaskInput.model_validate(task_data["input"]) parsed_output = task_data.get("output") + task = LinkupResearchTask.model_validate( + {**task_data, "input": research_input, "output": None} + ) - if parsed_output is not None and research_input.output_type == "sourcedAnswer": + # Following conversion is a bit convoluted but avoids parsing as sourced answer if using a + # structured output schema that matches the sourced answer schema + if parsed_output is None: + return task + if research_input.output_type == "sourcedAnswer": parsed_output = LinkupSourcedAnswer.model_validate(parsed_output) - - return LinkupResearchTask.model_validate( - {**task_data, "input": research_input, "output": parsed_output} - ) + return task.model_copy(update={"output": parsed_output}) def _parse_task(self, task_data: dict[str, Any]) -> LinkupTask: task_type = task_data["type"] @@ -1490,22 +1658,24 @@ def _parse_task(self, task_data: dict[str, Any]) -> LinkupTask: if task_type == "search": search_input = LinkupSearchTaskInput.model_validate(task_data["input"]) parsed_output = task_data.get("output") + task = LinkupSearchTask.model_validate( + {**task_data, "input": search_input, "output": None} + ) + if parsed_output is not None: - parsed_output = self._parse_search_response_data( + parsed_output = self._parse_search_response( response_data=parsed_output, output_type=search_input.output_type, - structured_output_schema=search_input.structured_output_schema, include_sources=search_input.include_sources, ) - return LinkupSearchTask.model_validate( - {**task_data, "input": search_input, "output": parsed_output} - ) + return task.model_copy(update={"output": parsed_output}) + return task if task_type == "fetch": fetch_input = LinkupFetchTaskInput.model_validate(task_data["input"]) parsed_output = task_data.get("output") if parsed_output is not None: - parsed_output = self._parse_fetch_response_data(parsed_output) + parsed_output = self._parse_fetch_response(parsed_output) return LinkupFetchTask.model_validate( {**task_data, "input": fetch_input, "output": parsed_output} ) diff --git a/src/linkup/_types.py b/src/linkup/_types.py index b8a0880..b7cb2da 100644 --- a/src/linkup/_types.py +++ b/src/linkup/_types.py @@ -1,13 +1,18 @@ """Input and output types for Linkup functions.""" +from __future__ import annotations + from datetime import date -from typing import Any, Literal +from typing import Any, Literal, TypeAlias + +import pydantic + -from pydantic import BaseModel, ConfigDict, Field +class _LinkupBaseModel(pydantic.BaseModel): + model_config = pydantic.ConfigDict(populate_by_name=True) -class _LinkupBaseModel(BaseModel): - model_config = ConfigDict(populate_by_name=True) +JSONObject: TypeAlias = dict[str, Any] class LinkupSearchTextResult(_LinkupBaseModel): @@ -84,11 +89,11 @@ class LinkupSearchStructuredResponse(_LinkupBaseModel): """A Linkup `search` structured response, with the sources supporting it. Attributes: - data: The answer data, either as a Pydantic model or an arbitrary JSON structure. + data: The raw structured output dictionary. sources: The sources supporting the answer. """ - data: Any + data: JSONObject sources: list[LinkupSearchTextResult | LinkupSearchImageResult] @@ -114,8 +119,8 @@ class LinkupFetchResponse(_LinkupBaseModel): """ markdown: str - raw_html: str | None = Field(default=None, validation_alias="rawHtml") - images: list[LinkupFetchImageExtraction] | None = Field(default=None) + raw_html: str | None = pydantic.Field(default=None, validation_alias="rawHtml") + images: list[LinkupFetchImageExtraction] | None = pydantic.Field(default=None) class LinkupSearchTaskInput(_LinkupBaseModel): @@ -136,23 +141,27 @@ class LinkupSearchTaskInput(_LinkupBaseModel): structured_output_schema: The structured output schema, if any. """ - query: str = Field(validation_alias="q") + query: str = pydantic.Field(validation_alias="q") depth: Literal["fast", "standard", "deep"] - output_type: Literal["searchResults", "sourcedAnswer", "structured"] = Field( + output_type: Literal["searchResults", "sourcedAnswer", "structured"] = pydantic.Field( validation_alias="outputType" ) - include_images: bool | None = Field(default=None, validation_alias="includeImages") - from_date: date | str | None = Field(default=None, validation_alias="fromDate") - to_date: date | str | None = Field(default=None, validation_alias="toDate") - exclude_domains: list[str] | None = Field(default=None, validation_alias="excludeDomains") - include_domains: list[str] | None = Field(default=None, validation_alias="includeDomains") - max_results: int | None = Field(default=None, validation_alias="maxResults") - include_inline_citations: bool | None = Field( + include_images: bool | None = pydantic.Field(default=None, validation_alias="includeImages") + from_date: date | str | None = pydantic.Field(default=None, validation_alias="fromDate") + to_date: date | str | None = pydantic.Field(default=None, validation_alias="toDate") + exclude_domains: list[str] | None = pydantic.Field( + default=None, validation_alias="excludeDomains" + ) + include_domains: list[str] | None = pydantic.Field( + default=None, validation_alias="includeDomains" + ) + max_results: int | None = pydantic.Field(default=None, validation_alias="maxResults") + include_inline_citations: bool | None = pydantic.Field( default=None, validation_alias="includeInlineCitations" ) - include_sources: bool | None = Field(default=None, validation_alias="includeSources") - structured_output_schema: type[BaseModel] | str | dict[str, Any] | None = Field( - default=None, validation_alias="structuredOutputSchema" + include_sources: bool | None = pydantic.Field(default=None, validation_alias="includeSources") + structured_output_schema: type[pydantic.BaseModel] | str | dict[str, Any] | None = ( + pydantic.Field(default=None, validation_alias="structuredOutputSchema") ) @@ -171,18 +180,24 @@ class LinkupResearchTaskInput(_LinkupBaseModel): structured_output_schema: The structured output schema, if any. """ - query: str = Field(validation_alias="q") - output_type: Literal["sourcedAnswer", "structured"] = Field(validation_alias="outputType") + query: str = pydantic.Field(validation_alias="q") + output_type: Literal["sourcedAnswer", "structured"] = pydantic.Field( + validation_alias="outputType" + ) mode: Literal["answer", "auto", "investigate", "research"] | None = None - reasoning_depth: Literal["S", "M", "L", "XL"] | None = Field( + reasoning_depth: Literal["S", "M", "L", "XL"] | None = pydantic.Field( default=None, validation_alias="reasoningDepth" ) - from_date: date | str | None = Field(default=None, validation_alias="fromDate") - to_date: date | str | None = Field(default=None, validation_alias="toDate") - exclude_domains: list[str] | None = Field(default=None, validation_alias="excludeDomains") - include_domains: list[str] | None = Field(default=None, validation_alias="includeDomains") - structured_output_schema: type[BaseModel] | str | dict[str, Any] | None = Field( - default=None, validation_alias="structuredOutputSchema" + from_date: date | str | None = pydantic.Field(default=None, validation_alias="fromDate") + to_date: date | str | None = pydantic.Field(default=None, validation_alias="toDate") + exclude_domains: list[str] | None = pydantic.Field( + default=None, validation_alias="excludeDomains" + ) + include_domains: list[str] | None = pydantic.Field( + default=None, validation_alias="includeDomains" + ) + structured_output_schema: type[pydantic.BaseModel] | str | dict[str, Any] | None = ( + pydantic.Field(default=None, validation_alias="structuredOutputSchema") ) @@ -197,9 +212,9 @@ class LinkupFetchTaskInput(_LinkupBaseModel): """ url: str - include_raw_html: bool | None = Field(default=None, validation_alias="includeRawHtml") - render_js: bool | None = Field(default=None, validation_alias="renderJs") - extract_images: bool | None = Field(default=None, validation_alias="extractImages") + include_raw_html: bool | None = pydantic.Field(default=None, validation_alias="includeRawHtml") + render_js: bool | None = pydantic.Field(default=None, validation_alias="renderJs") + extract_images: bool | None = pydantic.Field(default=None, validation_alias="extractImages") LinkupTaskInput = LinkupSearchTaskInput | LinkupFetchTaskInput | LinkupResearchTaskInput @@ -216,9 +231,9 @@ class LinkupTaskMetadata(_LinkupBaseModel): """ page: int - page_size: int = Field(validation_alias="pageSize") + page_size: int = pydantic.Field(validation_alias="pageSize") total: int - total_pages: int = Field(validation_alias="totalPages") + total_pages: int = pydantic.Field(validation_alias="totalPages") class LinkupTaskQuota(_LinkupBaseModel): @@ -229,7 +244,7 @@ class LinkupTaskQuota(_LinkupBaseModel): limit: The maximum number of in-flight tasks allowed. """ - in_flight: int = Field(validation_alias="inFlight") + in_flight: int = pydantic.Field(validation_alias="inFlight") limit: int @@ -241,20 +256,27 @@ class LinkupSearchTask(_LinkupBaseModel): error: The task error message, if the task failed. id: The task identifier. input: The normalized search input for this task. - output: The parsed search output, if available. + output: The parsed search results, sourced answer, sourced structured response, or raw + structured output dictionary, if available. status: The current task status. type: The task type, in this case "search". updated_at: The last task update timestamp. """ - created_at: str = Field(validation_alias="createdAt") + created_at: str = pydantic.Field(validation_alias="createdAt") error: str | None = None id: str input: LinkupSearchTaskInput - output: Any = None + output: ( + LinkupSearchResults + | LinkupSourcedAnswer + | LinkupSearchStructuredResponse + | JSONObject + | None + ) = None status: Literal["pending", "processing", "completed", "failed"] type: Literal["search"] - updated_at: str = Field(validation_alias="updatedAt") + updated_at: str = pydantic.Field(validation_alias="updatedAt") class LinkupFetchTask(_LinkupBaseModel): @@ -271,14 +293,14 @@ class LinkupFetchTask(_LinkupBaseModel): updated_at: The last task update timestamp. """ - created_at: str = Field(validation_alias="createdAt") + created_at: str = pydantic.Field(validation_alias="createdAt") error: str | None = None id: str input: LinkupFetchTaskInput output: LinkupFetchResponse | None = None status: Literal["pending", "processing", "completed", "failed"] type: Literal["fetch"] - updated_at: str = Field(validation_alias="updatedAt") + updated_at: str = pydantic.Field(validation_alias="updatedAt") class LinkupResearchTask(_LinkupBaseModel): @@ -289,20 +311,20 @@ class LinkupResearchTask(_LinkupBaseModel): error: The task error message, if the task failed. id: The task identifier. input: The normalized research input for this task. - output: The parsed research output, if available. + output: The parsed sourced answer, or raw structured output dictionary, if available. status: The current task status. type: The task type, in this case "research". updated_at: The last task update timestamp. """ - created_at: str = Field(validation_alias="createdAt") + created_at: str = pydantic.Field(validation_alias="createdAt") error: str | None = None id: str input: LinkupResearchTaskInput - output: Any = None + output: LinkupSourcedAnswer | JSONObject | None = None status: Literal["pending", "processing", "completed", "failed"] type: Literal["research"] - updated_at: str = Field(validation_alias="updatedAt") + updated_at: str = pydantic.Field(validation_alias="updatedAt") LinkupTask = LinkupSearchTask | LinkupFetchTask | LinkupResearchTask diff --git a/tests/unit/client_test.py b/tests/unit/client_test.py index bd782bd..bb438b0 100644 --- a/tests/unit/client_test.py +++ b/tests/unit/client_test.py @@ -1,18 +1,18 @@ import json from datetime import date -from typing import Any +from typing import Any, cast from unittest.mock import AsyncMock, MagicMock import httpx +import pydantic import pytest from httpx import Response -from pydantic import BaseModel from pytest_mock import MockerFixture import linkup -class Company(BaseModel): +class Company(pydantic.BaseModel): name: str creation_date: str website_url: str @@ -192,12 +192,12 @@ class Company(BaseModel): "website_url": "https://www.linkup.so/" } """, - Company( - name="Linkup", - founders_names=["Philippe Mizrahi", "Denis Charrier", "Boris Toledano"], - creation_date="2024", - website_url="https://www.linkup.so/", - ), + { + "name": "Linkup", + "founders_names": ["Philippe Mizrahi", "Denis Charrier", "Boris Toledano"], + "creation_date": "2024", + "website_url": "https://www.linkup.so/", + }, ), ( { @@ -291,12 +291,12 @@ class Company(BaseModel): } """, linkup.SearchStructuredResponse( - data=Company( - name="Linkup", - founders_names=["Philippe Mizrahi", "Denis Charrier", "Boris Toledano"], - creation_date="2024", - website_url="https://www.linkup.so/", - ), + data={ + "name": "Linkup", + "founders_names": ["Philippe Mizrahi", "Denis Charrier", "Boris Toledano"], + "creation_date": "2024", + "website_url": "https://www.linkup.so/", + }, sources=[ linkup.SearchTextResult( type="text", @@ -338,7 +338,7 @@ def test_search( ), ) - search_response: Any = client.search(**search_kwargs) + search_response = cast("Any", client.search(**search_kwargs)) expected_timeout = search_kwargs.get("timeout", None) request_mock.assert_called_once_with( method="POST", @@ -349,6 +349,55 @@ def test_search( assert search_response == expected_search_response +def test_search_structured_output_model_dump_preserves_data( + mocker: MockerFixture, client: linkup.Client +) -> None: + mocker.patch( + "httpx.Client.request", + return_value=Response( + status_code=200, + content=b""" + { + "data": { + "name": "Linkup", + "founders_names": ["Philippe Mizrahi", "Denis Charrier", "Boris Toledano"], + "creation_date": "2024", + "website_url": "https://www.linkup.so/" + }, + "sources": [] + } + """, + ), + ) + + search_response = client.search( + query="query", + depth="standard", + output_type="structured", + structured_output_schema=Company, + include_sources=True, + ) + + assert search_response.model_dump()["data"] == { + "name": "Linkup", + "founders_names": ["Philippe Mizrahi", "Denis Charrier", "Boris Toledano"], + "creation_date": "2024", + "website_url": "https://www.linkup.so/", + } + + +def test_search_structured_output_requires_schema(client: linkup.Client) -> None: + with pytest.raises( + TypeError, + match="structured_output_schema must be provided", + ): + client.search( + query="query", + depth="standard", + output_type="structured", + ) + + @pytest.mark.asyncio @pytest.mark.parametrize( ( @@ -376,7 +425,7 @@ async def test_async_search( ), ) - search_response: Any = await client.async_search(**search_kwargs) + search_response = cast("Any", await client.async_search(**search_kwargs)) expected_timeout = search_kwargs.get("timeout", None) request_mock.assert_called_once_with( method="POST", @@ -387,6 +436,19 @@ async def test_async_search( assert search_response == expected_search_response +@pytest.mark.asyncio +async def test_async_search_structured_output_requires_schema(client: linkup.Client) -> None: + with pytest.raises( + TypeError, + match="structured_output_schema must be provided", + ): + await client.async_search( + query="query", + depth="standard", + output_type="structured", + ) + + test_search_error_parameters = [ ( 402, @@ -656,6 +718,42 @@ def test_research(mocker: MockerFixture, client: linkup.Client) -> None: ) +def test_get_research_structured_output_keeps_sourced_answer_shape_raw( + mocker: MockerFixture, client: linkup.Client +) -> None: + mocker.patch( + "httpx.Client.request", + return_value=Response( + status_code=200, + content=b""" + { + "createdAt": "2026-05-18T00:00:00.000Z", + "error": null, + "id": "bfeb26f5-f4d6-47d2-9818-7f62fbcd0b0c", + "input": { + "outputType": "structured", + "q": "query" + }, + "output": { + "answer": "structured answer field", + "sources": [] + }, + "status": "completed", + "type": "research", + "updatedAt": "2026-05-18T00:00:00.000Z" + } + """, + ), + ) + + research_response = client.get_research("bfeb26f5-f4d6-47d2-9818-7f62fbcd0b0c") + + assert research_response.output == { + "answer": "structured answer field", + "sources": [], + } + + @pytest.mark.asyncio async def test_async_research(mocker: MockerFixture, client: linkup.Client) -> None: request_mock = mocker.patch( @@ -1367,6 +1465,78 @@ def test_list_tasks(mocker: MockerFixture, client: linkup.Client) -> None: assert tasks_page.quota.in_flight == 1 +def test_get_task_structured_search_output_keeps_search_results_shape_raw( + mocker: MockerFixture, client: linkup.Client +) -> None: + mocker.patch( + "httpx.Client.request", + return_value=Response( + status_code=200, + content=b""" + { + "createdAt": "2026-05-18T00:00:00.000Z", + "error": null, + "id": "bfeb26f5-f4d6-47d2-9818-7f62fbcd0b0c", + "input": { + "depth": "standard", + "outputType": "structured", + "q": "query", + "structuredOutputSchema": { + "type": "object" + } + }, + "output": { + "results": [] + }, + "status": "completed", + "type": "search", + "updatedAt": "2026-05-18T00:00:00.000Z" + } + """, + ), + ) + + task = client.get_task("bfeb26f5-f4d6-47d2-9818-7f62fbcd0b0c") + + assert isinstance(task, linkup.SearchTask) + assert task.output == {"results": []} + + +def test_get_task_structured_search_output_without_schema_raw( + mocker: MockerFixture, client: linkup.Client +) -> None: + mocker.patch( + "httpx.Client.request", + return_value=Response( + status_code=200, + content=b""" + { + "createdAt": "2026-05-18T00:00:00.000Z", + "error": null, + "id": "bfeb26f5-f4d6-47d2-9818-7f62fbcd0b0c", + "input": { + "depth": "standard", + "outputType": "structured", + "q": "query" + }, + "output": { + "summary": "done" + }, + "status": "completed", + "type": "search", + "updatedAt": "2026-05-18T00:00:00.000Z" + } + """, + ), + ) + + task = client.get_task("bfeb26f5-f4d6-47d2-9818-7f62fbcd0b0c") + + assert isinstance(task, linkup.SearchTask) + assert task.input.structured_output_schema is None + assert task.output == {"summary": "done"} + + def test_list_tasks_with_multiple_filters(mocker: MockerFixture, client: linkup.Client) -> None: request_mock = mocker.patch( "httpx.Client.request",