diff --git a/src/ch_api/__version__.py b/src/ch_api/__version__.py index c72e379..c68196d 100644 --- a/src/ch_api/__version__.py +++ b/src/ch_api/__version__.py @@ -1 +1 @@ -__version__ = "1.1.4" +__version__ = "1.2.0" diff --git a/src/ch_api/api.py b/src/ch_api/api.py index 801fae6..9184e4c 100644 --- a/src/ch_api/api.py +++ b/src/ch_api/api.py @@ -1046,6 +1046,179 @@ async def get_filing_history_item( types.public_data.filing_history.FilingHistoryItem, ) + # ------------------------------------------------------------------ + # Document API (separate host) + # ------------------------------------------------------------------ + + @pydantic.validate_call + async def get_document_metadata( + self, + document_id: str, + ) -> types.public_data.documents.DocumentMetadata | None: + """Fetch metadata for a Companies House filed document. + + Queries the Document API (a separate host from the main API) and returns + metadata describing the document, including available content types and + their sizes. Use :meth:`get_document_url` to obtain a download URL for + a specific content type. + + Parameters + ---------- + document_id : str + The document ID (typically found in a filing history item's links). + + Returns + ------- + types.public_data.documents.DocumentMetadata | None + Document metadata, or ``None`` if the document was not found. + + Example + ------- + :: + + meta = await client.get_document_metadata("L_X0y9bwYnkyEMwLe3TNQUfmBpMG0FIj0tLzr5b5s2o") + if meta: + for mime_type, info in (meta.resources or {}).items(): + print(f"{mime_type}: {info.content_length} bytes") + """ + url = f"{self._settings.document_api_url}/document/{document_id}" + return await self._get_resource(url, types.public_data.documents.DocumentMetadata) + + @pydantic.validate_call + async def get_document_url( + self, + document_id: str, + content_type: str = "application/pdf", + ) -> str | None: + """Return a pre-signed download URL for a Companies House filed document. + + Sends a request to the Document API content endpoint, which responds + with an HTTP 302 redirect. This method follows the redirect one level + and returns the ``Location`` URL without downloading the content — callers + can fetch it with any HTTP client. + + Parameters + ---------- + document_id : str + The document ID (typically found in a filing history item's links). + content_type : str + MIME type of the desired format (default ``application/pdf``). + Available types for a document are listed in + :attr:`~types.public_data.documents.DocumentMetadata.resources`. + Common values: ``application/pdf``, ``application/json``, + ``application/xml``, ``application/xhtml+xml``, ``text/csv``. + + Returns + ------- + str | None + The pre-signed download URL, or ``None`` if the document was not found. + + Raises + ------ + httpx.HTTPStatusError + If the API returns an unexpected error status (e.g. 406 if the + requested content type is not available for this document). + + Example + ------- + :: + + url = await client.get_document_url( + "L_X0y9bwYnkyEMwLe3TNQUfmBpMG0FIj0tLzr5b5s2o", + content_type="application/pdf", + ) + if url: + print(url) + """ + url = f"{self._settings.document_api_url}/document/{document_id}/content" + request = self._api_session.build_request( + method="GET", + url=url, + headers={"Accept": content_type}, + ) + async with self._api_limiter(): + try: + # follow_redirects=False is the httpx default; stated explicitly for clarity + response = await self._api_session.send(request) + except RuntimeError as err: + if self._owns_session and "has been closed" in str(err): + logger.warning("HTTP session was closed; reopening and retrying.") + self._api_session = self._new_session() + response = await self._api_session.send(request) + else: + raise + if response.status_code == httpx.codes.NOT_FOUND: + return None + if response.status_code in (httpx.codes.FOUND, httpx.codes.MOVED_PERMANENTLY): + return response.headers.get("Location") + response.raise_for_status() + # Unexpected non-redirect success: return Location if present, else None + return response.headers.get("Location") + + @contextlib.asynccontextmanager + async def get_document_content( + self, + document_id: str, + content_type: str = "application/pdf", + ) -> typing.AsyncIterator[httpx.Response | None]: + """Async context manager that downloads a Companies House filed document. + + Resolves the pre-signed S3 download URL (via :meth:`get_document_url`) + and fetches the document using an unauthenticated request. The underlying + HTTP client is kept alive for the duration of the ``async with`` block so + that callers can stream the response body without worrying about the + connection being closed prematurely. + + Parameters + ---------- + document_id : str + The document ID (typically found in a filing history item's links). + content_type : str + MIME type of the desired format (default ``application/pdf``). + Available types for a document are listed in + :attr:`~types.public_data.documents.DocumentMetadata.resources`. + Common values: ``application/pdf``, ``application/json``, + ``application/xml``, ``application/xhtml+xml``, ``text/csv``. + + Yields + ------ + httpx.Response | None + The HTTP response from S3, or ``None`` if the document was not found. + Call :attr:`httpx.Response.content` to read the full body into memory, + or use :meth:`httpx.Response.aiter_bytes` for streaming. + + Raises + ------ + httpx.HTTPStatusError + If the API or the S3 download returns an unexpected error status. + + Example + ------- + Read entire document into memory:: + + async with client.get_document_content( + "L_X0y9bwYnkyEMwLe3TNQUfmBpMG0FIj0tLzr5b5s2o", + content_type="application/pdf", + ) as response: + if response is not None: + pathlib.Path("confirmation_statement.pdf").write_bytes(response.content) + + Stream the document in chunks:: + + async with client.get_document_content("DOC_ID") as response: + if response is not None: + async for chunk in response.aiter_bytes(chunk_size=65536): + process(chunk) + """ + download_url = await self.get_document_url(document_id, content_type=content_type) + if download_url is None: + yield None + return + async with httpx.AsyncClient() as download_client: + response = await download_client.get(download_url) + response.raise_for_status() + yield response + @pydantic.validate_call async def get_company_insolvency( self, diff --git a/src/ch_api/api_settings.py b/src/ch_api/api_settings.py index 33ce7cc..adaf9c2 100644 --- a/src/ch_api/api_settings.py +++ b/src/ch_api/api_settings.py @@ -93,6 +93,14 @@ class ApiSettings: - Production: ``https://identity.company-information.service.gov.uk`` - Sandbox: ``https://identity-sandbox.company-information.service.gov.uk`` + document_api_url : str + Base URL for the Document API (separate host from the main API). + - Production: ``https://document-api.company-information.service.gov.uk`` + - Sandbox: ``https://document-api-sandbox.company-information.service.gov.uk`` + + Used by :meth:`~ch_api.Client.get_document_metadata` and + :meth:`~ch_api.Client.get_document_url`. + test_data_generator_url : str, optional Base URL for the Test Data Generator API (sandbox only). - Production: ``None`` (not available) @@ -126,6 +134,7 @@ class ApiSettings: api_url: str identity_url: str + document_api_url: str = "https://document-api.company-information.service.gov.uk" test_data_generator_url: str | None = None @@ -161,5 +170,6 @@ class ApiSettings: TEST_API_SETTINGS = ApiSettings( api_url="https://api-sandbox.company-information.service.gov.uk", identity_url="https://identity-sandbox.company-information.service.gov.uk", + document_api_url="https://document-api-sandbox.company-information.service.gov.uk", test_data_generator_url="https://test-data-sandbox.company-information.service.gov.uk", ) diff --git a/src/ch_api/types/public_data/__init__.py b/src/ch_api/types/public_data/__init__.py index 20827ad..9f8f12e 100644 --- a/src/ch_api/types/public_data/__init__.py +++ b/src/ch_api/types/public_data/__init__.py @@ -169,6 +169,7 @@ company_profile, company_registers, disqualifications, + documents, exemptions, filing_history, insolvency, diff --git a/src/ch_api/types/public_data/charges.py b/src/ch_api/types/public_data/charges.py index 9de678b..ad37ea5 100644 --- a/src/ch_api/types/public_data/charges.py +++ b/src/ch_api/types/public_data/charges.py @@ -240,6 +240,48 @@ class PersonsEntitled(base.BaseModel): ] +class TransactionLinks(shared.LinksSection): + """Links associated with a charge transaction filing. + + Inherits ``self`` (and any other arbitrary links) from + :class:`~ch_api.types.shared.LinksSection`. The fields below are + declared explicitly so IDEs and type checkers can see them. + """ + + filing: typing.Annotated[ + str | None, + pydantic.Field( + default=None, + description="Link to the filing history item for this transaction.", + ), + ] + + insolvency_case: typing.Annotated[ + str | None, + pydantic.Field( + default=None, + description="Link to the insolvency case related to this transaction.", + ), + ] + + +class InsolvencyCaseLinks(shared.LinksSection): + """Links associated with an insolvency case on a charge. + + Inherits ``self`` (and any other arbitrary links) from + :class:`~ch_api.types.shared.LinksSection`. The ``case`` field is + declared explicitly so IDEs and type checkers can see it. + """ + + case: typing.Annotated[ + str | None, + pydantic.Field( + default=None, + description="Link to the insolvency case.", + ), + ] + + class Transactions(base.BaseModel): """Transaction that has been filed for the charge.""" @@ -268,7 +310,7 @@ class Transactions(base.BaseModel): ] links: typing.Annotated[ - shared.LinksSection | None, + TransactionLinks | None, pydantic.Field( description="The resources related to this filing", default=None, @@ -288,7 +330,7 @@ class InsolvencyCases(base.BaseModel): ] links: typing.Annotated[ - shared.LinksSection | None, + InsolvencyCaseLinks | None, pydantic.Field( description="The resources related to this insolvency case", default=None, diff --git a/src/ch_api/types/public_data/company_officers.py b/src/ch_api/types/public_data/company_officers.py index 1e44d6b..d03d0d6 100644 --- a/src/ch_api/types/public_data/company_officers.py +++ b/src/ch_api/types/public_data/company_officers.py @@ -79,6 +79,23 @@ from . import officer_changes +class OfficerLinks(shared.LinksSection): + """Links nested under the ``officer`` key of an officer list item. + + Inherits ``self`` (and any other arbitrary links) from + :class:`~ch_api.types.shared.LinksSection`. The ``appointments`` field + is declared explicitly so IDEs and type checkers can see it. + """ + + appointments: typing.Annotated[ + str | None, + pydantic.Field( + default=None, + description="Link to the officer's appointments list.", + ), + ] + + class ItemLinkTypes(base.BaseModel): """Links to other resources associated with this officer list item.""" @@ -90,7 +107,7 @@ class ItemLinkTypes(base.BaseModel): ] officer: typing.Annotated[ - shared.LinksSection, + OfficerLinks, pydantic.Field( description="Links to other officer resources associated with this officer list item.", ), diff --git a/src/ch_api/types/public_data/company_profile.py b/src/ch_api/types/public_data/company_profile.py index d6aa781..93932e0 100644 --- a/src/ch_api/types/public_data/company_profile.py +++ b/src/ch_api/types/public_data/company_profile.py @@ -751,6 +751,87 @@ class BranchCompanyDetails(base.BaseModel): ] +class CompanyProfileLinks(shared.LinksSection): + """Links associated with a company profile response. + + Inherits ``self`` (and any other arbitrary links) from + :class:`~ch_api.types.shared.LinksSection`. The fields below are + declared explicitly so IDEs and type checkers can see them. + """ + + filing_history: typing.Annotated[ + str | None, + pydantic.Field( + default=None, + description="Link to the company's filing history.", + ), + ] + + officers: typing.Annotated[ + str | None, + pydantic.Field( + default=None, + description="Link to the company's officers list.", + ), + ] + + persons_with_significant_control: typing.Annotated[ + str | None, + pydantic.Field( + default=None, + description="Link to the company's persons with significant control list.", + ), + ] + + persons_with_significant_control_statements: typing.Annotated[ + str | None, + pydantic.Field( + default=None, + description="Link to the company's PSC statements list.", + ), + ] + + registers: typing.Annotated[ + str | None, + pydantic.Field( + default=None, + description="Link to the company's registers.", + ), + ] + + insolvency: typing.Annotated[ + str | None, + pydantic.Field( + default=None, + description="Link to the company's insolvency information.", + ), + ] + + charges: typing.Annotated[ + str | None, + pydantic.Field( + default=None, + description="Link to the company's charges.", + ), + ] + + exemptions: typing.Annotated[ + str | None, + pydantic.Field( + default=None, + description="Link to the company's exemptions.", + ), + ] + + overseas: typing.Annotated[ + str | None, + pydantic.Field( + default=None, + description="Link to the company's overseas information.", + ), + ] + + class CompanyProfile(base.BaseModel): """Company profile information from Companies House.""" @@ -1104,7 +1185,7 @@ class CompanyProfile(base.BaseModel): ] links: typing.Annotated[ - shared.LinksSection, + CompanyProfileLinks, pydantic.Field( description="A set of URLs related to the resource, including self.", ), diff --git a/src/ch_api/types/public_data/documents.py b/src/ch_api/types/public_data/documents.py new file mode 100644 index 0000000..6f5fafb --- /dev/null +++ b/src/ch_api/types/public_data/documents.py @@ -0,0 +1,222 @@ +"""Document API models for Companies House. + +This module contains Pydantic models for the Companies House Document API, +which provides access to document metadata and download URLs for filed documents. + +The Document API runs on a separate host from the main Companies House API: + ``document-api.company-information.service.gov.uk`` + +API Endpoints +----- +GET /document/{document_id} - Fetch document metadata +GET /document/{document_id}/content - Fetch document (returns redirect to download URL) + +Document Resources +----- +Each document may be available in multiple formats. The ``resources`` field +maps MIME type strings to :class:`ResourceContent` objects describing each format: + +- ``application/pdf`` - PDF document +- ``application/json`` - Machine-readable JSON +- ``application/xml`` - XML format +- ``application/xhtml+xml`` - XHTML format +- ``application/zip`` - ZIP archive +- ``text/csv`` - CSV data + +Not all content types are available for all documents. Inspect ``resources`` +to determine which types are available before requesting document content. + +Fetching a document +----- +1. Call :meth:`~ch_api.Client.get_document_metadata` to retrieve metadata and + inspect ``resources`` for available content types. +2. Call :meth:`~ch_api.Client.get_document_url` with the desired MIME type to + obtain a pre-signed download URL (valid for a short time). +3. Fetch the URL directly with any HTTP client. + +Documentation +----- +https://developer-specs.company-information.service.gov.uk/document-api.company-information.service.gov.uk-specifications/swagger-2.0/spec/swagger.json +""" + +import datetime +import typing + +import pydantic + +from .. import base, shared + + +class ResourceContent(base.BaseModel): + """Metadata for a single document format / content type. + + Describes the size and optional timestamps for one available representation + of a document (e.g. the PDF version or the JSON version). + """ + + content_length: typing.Annotated[ + int, + pydantic.Field( + description="The size of the document in bytes when returned as this content type.", + ), + ] + + created_at: typing.Annotated[ + datetime.datetime | None, + pydantic.Field( + default=None, + description="The date and time this content type was first created.", + ), + ] + + updated_at: typing.Annotated[ + datetime.datetime | None, + pydantic.Field( + default=None, + description="The date and time this content type was last updated.", + ), + ] + + +class DocumentLinks(shared.LinksSection): + """Links associated with a document metadata response. + + Inherits ``self`` (and any other arbitrary links) from + :class:`~ch_api.types.shared.LinksSection`. The ``document`` field is + declared explicitly so IDEs and type checkers can see it. + """ + + document: typing.Annotated[ + str | None, + pydantic.Field( + default=None, + description="Link to the document content endpoint.", + ), + ] + + +class DocumentMetadata(base.BaseModel): + """Metadata for a filed document returned by the Companies House Document API. + + Returned by :meth:`~ch_api.Client.get_document_metadata`. Use the + ``resources`` mapping to discover which content types are available, then + pass the desired MIME type to :meth:`~ch_api.Client.get_document_url`. + + Example + ------- + Check which formats a document is available in:: + + meta = await client.get_document_metadata("L_X0y9bwYnkyEMwLe3TNQUfmBpMG0FIj0tLzr5b5s2o") + if meta: + print(meta.company_number, meta.pages) + for mime_type, resource in (meta.resources or {}).items(): + print(f" {mime_type}: {resource.content_length} bytes") + """ + + etag: typing.Annotated[ + str, + pydantic.Field( + description="The ETag of the resource.", + ), + ] + + id: typing.Annotated[ + str | None, + pydantic.Field( + default=None, + description="The document ID (not always returned by the API).", + ), + ] + + company_number: typing.Annotated[ + str, + pydantic.Field( + description="The company number the document belongs to.", + ), + ] + + created_at: typing.Annotated[ + datetime.datetime, + pydantic.Field( + description="The date and time the document was first created.", + ), + ] + + updated_at: typing.Annotated[ + datetime.datetime | None, + pydantic.Field( + default=None, + description="The date and time the document was last updated.", + ), + ] + + pages: typing.Annotated[ + int | None, + pydantic.Field( + default=None, + description="The document page count.", + ), + ] + + barcode: typing.Annotated[ + str | None, + pydantic.Field( + default=None, + description="The barcode identifier for this document.", + ), + ] + + filename: typing.Annotated[ + str | None, + pydantic.Field( + default=None, + description="The filename of this document.", + ), + ] + + category: typing.Annotated[ + str | None, + pydantic.Field( + default=None, + description="The filing category this document belongs to (e.g. 'annual-returns').", + ), + ] + + significant_date: typing.Annotated[ + datetime.datetime | None, + pydantic.Field( + default=None, + description="A significant date associated with this document, if applicable.", + ), + ] + + significant_date_type: typing.Annotated[ + str | None, + pydantic.Field( + default=None, + description="The type of significant date.", + ), + ] + + links: typing.Annotated[ + DocumentLinks | None, + pydantic.Field( + default=None, + description=( + "Links to other resources associated with this document. " + "Typically includes ``self`` (metadata URL) and ``document`` (content URL)." + ), + ), + ] + + resources: typing.Annotated[ + dict[str, ResourceContent] | None, + pydantic.Field( + default=None, + description=( + "Available content types for this document, keyed by MIME type. " + "Common types: application/pdf, application/json, application/xml, " + "application/xhtml+xml, application/zip, text/csv." + ), + ), + ] diff --git a/src/ch_api/types/public_data/filing_history.py b/src/ch_api/types/public_data/filing_history.py index 131fc42..4452fd9 100644 --- a/src/ch_api/types/public_data/filing_history.py +++ b/src/ch_api/types/public_data/filing_history.py @@ -218,6 +218,30 @@ class Resolution(base.BaseModel): ] +class FilingHistoryLinks(shared.LinksSection): + """Links associated with a filing history item. + + Inherits ``self`` (and any other arbitrary links) from + :class:`~ch_api.types.shared.LinksSection`. The ``document_metadata`` + field is declared explicitly so IDEs and type checkers can see it — + its value is a URL on the Document API host + (``document-api.company-information.service.gov.uk``). + """ + + document_metadata: typing.Annotated[ + str | None, + pydantic.Field( + default=None, + description=( + "Link to the document metadata on the Document API. " + "Pass the document ID extracted from this URL to " + ":meth:`~ch_api.Client.get_document_metadata` or " + ":meth:`~ch_api.Client.get_document_url`." + ), + ), + ] + + class FilingHistoryItem(base.BaseModel): """Individual filing history item.""" @@ -298,7 +322,7 @@ class FilingHistoryItem(base.BaseModel): ] links: typing.Annotated[ - shared.LinksSection | None, + FilingHistoryLinks | None, pydantic.Field( description="Links to other resources associated with this filing history item.", default=None, diff --git a/src/ch_api/types/public_data/insolvency.py b/src/ch_api/types/public_data/insolvency.py index 4a05466..b81a4a6 100644 --- a/src/ch_api/types/public_data/insolvency.py +++ b/src/ch_api/types/public_data/insolvency.py @@ -200,6 +200,23 @@ class CaseDates(base.BaseModel): ] +class InsolvencyCaseLinks(shared.LinksSection): + """Links associated with an insolvency case. + + Inherits ``self`` (and any other arbitrary links) from + :class:`~ch_api.types.shared.LinksSection`. The ``charge`` field is + declared explicitly so IDEs and type checkers can see it. + """ + + charge: typing.Annotated[ + str | None, + pydantic.Field( + default=None, + description="Link to the charge associated with this insolvency case.", + ), + ] + + class Case(base.BaseModel): """Individual insolvency case.""" @@ -250,9 +267,9 @@ class Case(base.BaseModel): ] links: typing.Annotated[ - shared.LinksSection | None, + InsolvencyCaseLinks | None, pydantic.Field( - description="The practitioners for the case.", + description="The links for this case.", default=None, ), ] diff --git a/src/ch_api/types/public_data/officer_appointments.py b/src/ch_api/types/public_data/officer_appointments.py index 0ed673a..dbb12a9 100644 --- a/src/ch_api/types/public_data/officer_appointments.py +++ b/src/ch_api/types/public_data/officer_appointments.py @@ -315,6 +315,23 @@ class ContactDetails(base.BaseModel): ] +class OfficerAppointmentItemLinks(shared.LinksSection): + """Links associated with an officer appointment item. + + Inherits ``self`` (and any other arbitrary links) from + :class:`~ch_api.types.shared.LinksSection`. The ``company`` field is + declared explicitly so IDEs and type checkers can see it. + """ + + company: typing.Annotated[ + str | None, + pydantic.Field( + default=None, + description="Link to the company profile for this appointment.", + ), + ] + + class OfficerAppointmentSummary(base.BaseModel): """Officer appointment summary.""" @@ -365,7 +382,7 @@ class OfficerAppointmentSummary(base.BaseModel): ] links: typing.Annotated[ - shared.LinksSection, + OfficerAppointmentItemLinks, pydantic.Field( description="Links to other resources associated with this officer appointment item.", ), diff --git a/src/ch_api/types/public_data/search_companies.py b/src/ch_api/types/public_data/search_companies.py index 4636194..194db7e 100644 --- a/src/ch_api/types/public_data/search_companies.py +++ b/src/ch_api/types/public_data/search_companies.py @@ -344,6 +344,23 @@ class DissolvedCompany(base.BaseModel): ] +class AlphabeticalCompanyLinks(shared.LinksSection): + """Links associated with an alphabetical company search result. + + Inherits ``self`` (and any other arbitrary links) from + :class:`~ch_api.types.shared.LinksSection`. The ``company_profile`` + field is declared explicitly so IDEs and type checkers can see it. + """ + + company_profile: typing.Annotated[ + str | None, + pydantic.Field( + default=None, + description="Link to the company profile.", + ), + ] + + class AlphabeticalCompany(base.BaseModel): """Alphabetical company search result.""" @@ -376,7 +393,7 @@ class AlphabeticalCompany(base.BaseModel): ] links: typing.Annotated[ - shared.LinksSection, + AlphabeticalCompanyLinks, pydantic.Field( description="The link to the company", ), @@ -404,6 +421,23 @@ class AlphabeticalCompany(base.BaseModel): ] +class AdvancedCompanyLinks(shared.LinksSection): + """Links associated with an advanced company search result. + + Inherits ``self`` (and any other arbitrary links) from + :class:`~ch_api.types.shared.LinksSection`. The ``company_profile`` + field is declared explicitly so IDEs and type checkers can see it. + """ + + company_profile: typing.Annotated[ + str | None, + pydantic.Field( + default=None, + description="Link to the company profile.", + ), + ] + + class AdvancedCompany(base.BaseModel): """Advanced company search result.""" @@ -520,7 +554,7 @@ class AdvancedCompany(base.BaseModel): ] links: typing.Annotated[ - shared.LinksSection | None, + AdvancedCompanyLinks | None, pydantic.Field( description="The link to the company", default=None, diff --git a/src/ch_api/types/public_data/uk_establishments.py b/src/ch_api/types/public_data/uk_establishments.py index 451ce78..f1d51e7 100644 --- a/src/ch_api/types/public_data/uk_establishments.py +++ b/src/ch_api/types/public_data/uk_establishments.py @@ -69,6 +69,23 @@ from .. import base, shared +class EstablishmentLinks(shared.LinksSection): + """Links associated with a UK establishment record. + + Inherits ``self`` (and any other arbitrary links) from + :class:`~ch_api.types.shared.LinksSection`. The ``company`` field is + declared explicitly so IDEs and type checkers can see it. + """ + + company: typing.Annotated[ + str | None, + pydantic.Field( + default=None, + description="Link to the company profile for this UK establishment.", + ), + ] + + class CompanyEstablishmentDetails(base.BaseModel): """Details of a UK establishment company. @@ -100,7 +117,7 @@ class CompanyEstablishmentDetails(base.BaseModel): ] links: typing.Annotated[ - shared.LinksSection, + EstablishmentLinks, pydantic.Field( description="Resources related to this company.", ), diff --git a/src/ch_api/types/shared.py b/src/ch_api/types/shared.py index f2558b6..34092a5 100644 --- a/src/ch_api/types/shared.py +++ b/src/ch_api/types/shared.py @@ -163,6 +163,11 @@ def get_link(self, name: str) -> str | None: -------- self : Property for getting the 'self' link """ + # Check explicit model fields first (subclasses may declare known link names + # as typed attributes — e.g. FilingHistoryLinks.document_metadata). + if name in self.model_fields and (value := getattr(self, name, None)) is not None: + return value + # Fall back to pydantic extra fields for undeclared / dynamic links. if self.__pydantic_extra__ is None: return None return self.__pydantic_extra__.get(name, None) diff --git a/tests/integration/test_filing_history.py b/tests/integration/test_filing_history.py index 922b399..e7f8363 100644 --- a/tests/integration/test_filing_history.py +++ b/tests/integration/test_filing_history.py @@ -13,7 +13,7 @@ async def test_get_r5e_company_filing_history(live_env_test_client: ch_api.api.C # the company changed its name to "R5E LIMITED" to "RELEASE.ART LIMITED" in 2026 assert filing.type == "CERTNM" assert filing.date.year == 2026 - assert filing.links.get_link("document_metadata") is not None + assert filing.links.document_metadata is not None break else: # No break encountered diff --git a/tests/unit/test_api_branch_coverage.py b/tests/unit/test_api_branch_coverage.py index f14c9e1..5fd51c5 100644 --- a/tests/unit/test_api_branch_coverage.py +++ b/tests/unit/test_api_branch_coverage.py @@ -8,7 +8,6 @@ import pytest from ch_api import api, api_settings -from ch_api.types import shared from ch_api.types.pagination import types as pagination_types from ch_api.types.public_data import search_companies as sc @@ -34,7 +33,7 @@ def _alpha_company(cursor: str = "KEY:12345678") -> sc.AlphabeticalCompany: company_number="12345678", company_status="active", company_type="ltd", - links=shared.LinksSection(), + links=sc.AlphabeticalCompanyLinks(), ordered_alpha_key_with_id=cursor, kind="search-results#alphabetical-search", ) @@ -698,10 +697,8 @@ class TestSessionRestart: """_execute_request auto-restarts closed sessions (owns_session=True only).""" @pytest.mark.asyncio - async def test_restarts_owned_session_on_closed_error(self): + async def test_restarts_owned_session_on_closed_error(self, mocker): """Closed session is replaced and the request is retried successfully.""" - import unittest.mock as mock - auth = api_settings.AuthSettings(api_key="test-key") client = api.Client(credentials=auth, settings=api_settings.LIVE_API_SETTINGS) @@ -709,13 +706,13 @@ async def test_restarts_owned_session_on_closed_error(self): closed_session = MagicMock() closed_session.build_request = MagicMock(return_value=httpx.Request("GET", "http://x")) - closed_session.send = mock.AsyncMock( + closed_session.send = mocker.AsyncMock( side_effect=RuntimeError("Cannot send a request, as the client has been closed.") ) fresh_session = MagicMock() fresh_session.build_request = MagicMock(return_value=httpx.Request("GET", "http://x")) - fresh_session.send = mock.AsyncMock(return_value=ok_response) + fresh_session.send = mocker.AsyncMock(return_value=ok_response) client._api_session = closed_session client._new_session = MagicMock(return_value=fresh_session) @@ -728,15 +725,13 @@ async def test_restarts_owned_session_on_closed_error(self): assert client._api_session is fresh_session @pytest.mark.asyncio - async def test_non_closed_runtime_error_propagates(self): + async def test_non_closed_runtime_error_propagates(self, mocker): """RuntimeError unrelated to session state is re-raised.""" - import unittest.mock as mock - auth = api_settings.AuthSettings(api_key="test-key") client = api.Client(credentials=auth, settings=api_settings.LIVE_API_SETTINGS) broken_session = MagicMock() - broken_session.send = mock.AsyncMock(side_effect=RuntimeError("some other problem")) + broken_session.send = mocker.AsyncMock(side_effect=RuntimeError("some other problem")) client._api_session = broken_session request = httpx.Request("GET", "http://example.com") @@ -744,16 +739,14 @@ async def test_non_closed_runtime_error_propagates(self): await client._execute_request(request, None) @pytest.mark.asyncio - async def test_closed_error_on_external_session_propagates(self): + async def test_closed_error_on_external_session_propagates(self, mocker): """Closed-session error is NOT swallowed when the session is externally owned.""" - import unittest.mock as mock - # Pass an AsyncClient directly → _owns_session = False external_session = httpx.AsyncClient() client = api.Client(credentials=external_session, settings=api_settings.LIVE_API_SETTINGS) broken_session = MagicMock() - broken_session.send = mock.AsyncMock( + broken_session.send = mocker.AsyncMock( side_effect=RuntimeError("Cannot send a request, as the client has been closed.") ) client._api_session = broken_session @@ -763,3 +756,155 @@ async def test_closed_error_on_external_session_propagates(self): await client._execute_request(request, None) await external_session.aclose() + + +class TestDocumentApi: + """Lines 1084-1156 — get_document_metadata + get_document_url.""" + + @pytest.mark.asyncio + async def test_get_document_metadata_calls_correct_url(self): + client = _make_client() + urls_seen = [] + + async def fake_get_resource(url, result_type): + urls_seen.append(url) + return None + + client._get_resource = fake_get_resource + result = await client.get_document_metadata("DOC123") + assert result is None + assert any("document-api" in u and "DOC123" in u for u in urls_seen) + + @pytest.mark.asyncio + async def test_get_document_url_returns_location_on_302(self, mocker): + client = _make_client() + redirect_response = httpx.Response( + 302, + headers={"Location": "https://s3.example.com/doc.pdf"}, + request=httpx.Request("GET", "http://x"), + ) + client._api_session.send = mocker.AsyncMock(return_value=redirect_response) + + url = await client.get_document_url("DOC123", content_type="application/pdf") + assert url == "https://s3.example.com/doc.pdf" + + @pytest.mark.asyncio + async def test_get_document_url_returns_location_on_301(self, mocker): + client = _make_client() + redirect_response = httpx.Response( + 301, + headers={"Location": "https://s3.example.com/doc.pdf"}, + request=httpx.Request("GET", "http://x"), + ) + client._api_session.send = mocker.AsyncMock(return_value=redirect_response) + + url = await client.get_document_url("DOC123") + assert url == "https://s3.example.com/doc.pdf" + + @pytest.mark.asyncio + async def test_get_document_url_returns_none_on_404(self, mocker): + client = _make_client() + not_found = httpx.Response(404, request=httpx.Request("GET", "http://x")) + client._api_session.send = mocker.AsyncMock(return_value=not_found) + + url = await client.get_document_url("DOC_MISSING") + assert url is None + + @pytest.mark.asyncio + async def test_get_document_url_raises_on_error_status(self, mocker): + client = _make_client() + error_response = httpx.Response(406, request=httpx.Request("GET", "http://x")) + client._api_session.send = mocker.AsyncMock(return_value=error_response) + + with pytest.raises(httpx.HTTPStatusError): + await client.get_document_url("DOC123", content_type="text/plain") + + @pytest.mark.asyncio + async def test_get_document_url_unexpected_200_returns_location(self, mocker): + """Unexpected non-redirect 200: return Location if present.""" + client = _make_client() + ok_response = httpx.Response( + 200, + headers={"Location": "https://s3.example.com/doc.pdf"}, + request=httpx.Request("GET", "http://x"), + ) + client._api_session.send = mocker.AsyncMock(return_value=ok_response) + + url = await client.get_document_url("DOC123") + assert url == "https://s3.example.com/doc.pdf" + + @pytest.mark.asyncio + async def test_get_document_url_session_restart(self, mocker): + """Closed session is restarted for document URL requests too.""" + client = _make_client() + redirect_response = httpx.Response( + 302, + headers={"Location": "https://s3.example.com/doc.pdf"}, + request=httpx.Request("GET", "http://x"), + ) + fresh_session = MagicMock() + fresh_session.build_request = MagicMock(return_value=httpx.Request("GET", "http://x")) + fresh_session.send = mocker.AsyncMock(return_value=redirect_response) + + client._api_session.send = mocker.AsyncMock( + side_effect=RuntimeError("Cannot send a request, as the client has been closed.") + ) + client._new_session = MagicMock(return_value=fresh_session) + + url = await client.get_document_url("DOC123") + assert url == "https://s3.example.com/doc.pdf" + client._new_session.assert_called_once() + + @pytest.mark.asyncio + async def test_get_document_url_non_closed_runtime_error_propagates(self, mocker): + """RuntimeError unrelated to session state is re-raised in get_document_url.""" + client = _make_client() + client._api_session.send = mocker.AsyncMock(side_effect=RuntimeError("some other error")) + + with pytest.raises(RuntimeError, match="some other error"): + await client.get_document_url("DOC123") + + @pytest.mark.asyncio + async def test_get_document_content_yields_response_on_success(self, mocker): + """get_document_content yields the httpx.Response within the context block.""" + client = _make_client() + client.get_document_url = mocker.AsyncMock(return_value="https://s3.example.com/doc.pdf") + + fake_response = httpx.Response(200, content=b"%PDF fake content", request=httpx.Request("GET", "http://x")) + mock_instance = mocker.AsyncMock() + mock_instance.__aenter__ = mocker.AsyncMock(return_value=mock_instance) + mock_instance.__aexit__ = mocker.AsyncMock(return_value=False) + mock_instance.get = mocker.AsyncMock(return_value=fake_response) + mocker.patch("httpx.AsyncClient", return_value=mock_instance) + + async with client.get_document_content("DOC123", content_type="application/pdf") as result: + assert isinstance(result, httpx.Response) + assert result.content == b"%PDF fake content" + + client.get_document_url.assert_awaited_once_with("DOC123", content_type="application/pdf") + + @pytest.mark.asyncio + async def test_get_document_content_yields_none_when_not_found(self, mocker): + """get_document_content yields None when get_document_url returns None.""" + client = _make_client() + client.get_document_url = mocker.AsyncMock(return_value=None) + + async with client.get_document_content("MISSING_DOC") as result: + assert result is None + + @pytest.mark.asyncio + async def test_get_document_content_raises_on_s3_error(self, mocker): + """get_document_content propagates S3 HTTP errors.""" + client = _make_client() + client.get_document_url = mocker.AsyncMock(return_value="https://s3.example.com/doc.pdf") + + error_response = httpx.Response(403, request=httpx.Request("GET", "https://s3.example.com/doc.pdf")) + mock_instance = mocker.AsyncMock() + mock_instance.__aenter__ = mocker.AsyncMock(return_value=mock_instance) + mock_instance.__aexit__ = mocker.AsyncMock(return_value=False) + mock_instance.get = mocker.AsyncMock(return_value=error_response) + mocker.patch("httpx.AsyncClient", return_value=mock_instance) + + with pytest.raises(httpx.HTTPStatusError): + async with client.get_document_content("DOC123"): + pass diff --git a/tests/unit/test_shared_types.py b/tests/unit/test_shared_types.py index 631a4f1..349919b 100644 --- a/tests/unit/test_shared_types.py +++ b/tests/unit/test_shared_types.py @@ -1,5 +1,9 @@ """Unit tests for shared data types.""" +import typing + +import pydantic + from ch_api.types import shared @@ -137,3 +141,20 @@ def test_links_section_get_link_with_extra_none(self): # This should return None due to the check on line 162 result = links.get_link("any_link") assert result is None + + def test_links_section_get_link_explicit_field(self): + """Test get_link finds values declared as explicit model fields in subclasses.""" + + class TypedLinks(shared.LinksSection): + document_metadata: typing.Annotated[str | None, pydantic.Field(default=None)] + + data = { + "self": "/company/123/filing-history/XYZ", + "document_metadata": "https://document-api.example.com/document/XYZ", + } + + links = TypedLinks.model_validate(data) + + # Field is explicit — not in __pydantic_extra__, but get_link should still find it. + assert links.document_metadata == "https://document-api.example.com/document/XYZ" + assert links.get_link("document_metadata") == "https://document-api.example.com/document/XYZ"