Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/ch_api/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "1.1.4"
__version__ = "1.2.0"
173 changes: 173 additions & 0 deletions src/ch_api/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -1046,6 +1046,179 @@ async def get_filing_history_item(
types.public_data.filing_history.FilingHistoryItem,
)

# ------------------------------------------------------------------
# Document API (separate host)
# ------------------------------------------------------------------

@pydantic.validate_call
async def get_document_metadata(
self,
document_id: str,
) -> types.public_data.documents.DocumentMetadata | None:
"""Fetch metadata for a Companies House filed document.

Queries the Document API (a separate host from the main API) and returns
metadata describing the document, including available content types and
their sizes. Use :meth:`get_document_url` to obtain a download URL for
a specific content type.

Parameters
----------
document_id : str
The document ID (typically found in a filing history item's links).

Returns
-------
types.public_data.documents.DocumentMetadata | None
Document metadata, or ``None`` if the document was not found.

Example
-------
::

meta = await client.get_document_metadata("L_X0y9bwYnkyEMwLe3TNQUfmBpMG0FIj0tLzr5b5s2o")
if meta:
for mime_type, info in (meta.resources or {}).items():
print(f"{mime_type}: {info.content_length} bytes")
"""
url = f"{self._settings.document_api_url}/document/{document_id}"
return await self._get_resource(url, types.public_data.documents.DocumentMetadata)

@pydantic.validate_call
async def get_document_url(
self,
document_id: str,
content_type: str = "application/pdf",
) -> str | None:
"""Return a pre-signed download URL for a Companies House filed document.

Sends a request to the Document API content endpoint, which responds
with an HTTP 302 redirect. This method follows the redirect one level
and returns the ``Location`` URL without downloading the content — callers
can fetch it with any HTTP client.

Parameters
----------
document_id : str
The document ID (typically found in a filing history item's links).
content_type : str
MIME type of the desired format (default ``application/pdf``).
Available types for a document are listed in
:attr:`~types.public_data.documents.DocumentMetadata.resources`.
Common values: ``application/pdf``, ``application/json``,
``application/xml``, ``application/xhtml+xml``, ``text/csv``.

Returns
-------
str | None
The pre-signed download URL, or ``None`` if the document was not found.

Raises
------
httpx.HTTPStatusError
If the API returns an unexpected error status (e.g. 406 if the
requested content type is not available for this document).

Example
-------
::

url = await client.get_document_url(
"L_X0y9bwYnkyEMwLe3TNQUfmBpMG0FIj0tLzr5b5s2o",
content_type="application/pdf",
)
if url:
print(url)
"""
url = f"{self._settings.document_api_url}/document/{document_id}/content"
request = self._api_session.build_request(
method="GET",
url=url,
headers={"Accept": content_type},
)
async with self._api_limiter():
try:
# follow_redirects=False is the httpx default; stated explicitly for clarity
response = await self._api_session.send(request)
except RuntimeError as err:
if self._owns_session and "has been closed" in str(err):
logger.warning("HTTP session was closed; reopening and retrying.")
self._api_session = self._new_session()
response = await self._api_session.send(request)
else:
raise
if response.status_code == httpx.codes.NOT_FOUND:
return None
if response.status_code in (httpx.codes.FOUND, httpx.codes.MOVED_PERMANENTLY):
return response.headers.get("Location")
response.raise_for_status()
# Unexpected non-redirect success: return Location if present, else None
return response.headers.get("Location")

@contextlib.asynccontextmanager
async def get_document_content(
self,
document_id: str,
content_type: str = "application/pdf",
) -> typing.AsyncIterator[httpx.Response | None]:
"""Async context manager that downloads a Companies House filed document.

Resolves the pre-signed S3 download URL (via :meth:`get_document_url`)
and fetches the document using an unauthenticated request. The underlying
HTTP client is kept alive for the duration of the ``async with`` block so
that callers can stream the response body without worrying about the
connection being closed prematurely.

Parameters
----------
document_id : str
The document ID (typically found in a filing history item's links).
content_type : str
MIME type of the desired format (default ``application/pdf``).
Available types for a document are listed in
:attr:`~types.public_data.documents.DocumentMetadata.resources`.
Common values: ``application/pdf``, ``application/json``,
``application/xml``, ``application/xhtml+xml``, ``text/csv``.

Yields
------
httpx.Response | None
The HTTP response from S3, or ``None`` if the document was not found.
Call :attr:`httpx.Response.content` to read the full body into memory,
or use :meth:`httpx.Response.aiter_bytes` for streaming.

Raises
------
httpx.HTTPStatusError
If the API or the S3 download returns an unexpected error status.

Example
-------
Read entire document into memory::

async with client.get_document_content(
"L_X0y9bwYnkyEMwLe3TNQUfmBpMG0FIj0tLzr5b5s2o",
content_type="application/pdf",
) as response:
if response is not None:
pathlib.Path("confirmation_statement.pdf").write_bytes(response.content)

Stream the document in chunks::

async with client.get_document_content("DOC_ID") as response:
if response is not None:
async for chunk in response.aiter_bytes(chunk_size=65536):
process(chunk)
"""
download_url = await self.get_document_url(document_id, content_type=content_type)
if download_url is None:
yield None
return
async with httpx.AsyncClient() as download_client:
response = await download_client.get(download_url)
response.raise_for_status()
yield response

@pydantic.validate_call
async def get_company_insolvency(
self,
Expand Down
10 changes: 10 additions & 0 deletions src/ch_api/api_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,14 @@ class ApiSettings:
- Production: ``https://identity.company-information.service.gov.uk``
- Sandbox: ``https://identity-sandbox.company-information.service.gov.uk``

document_api_url : str
Base URL for the Document API (separate host from the main API).
- Production: ``https://document-api.company-information.service.gov.uk``
- Sandbox: ``https://document-api-sandbox.company-information.service.gov.uk``

Used by :meth:`~ch_api.Client.get_document_metadata` and
:meth:`~ch_api.Client.get_document_url`.

test_data_generator_url : str, optional
Base URL for the Test Data Generator API (sandbox only).
- Production: ``None`` (not available)
Expand Down Expand Up @@ -126,6 +134,7 @@ class ApiSettings:

api_url: str
identity_url: str
document_api_url: str = "https://document-api.company-information.service.gov.uk"
test_data_generator_url: str | None = None


Expand Down Expand Up @@ -161,5 +170,6 @@ class ApiSettings:
TEST_API_SETTINGS = ApiSettings(
api_url="https://api-sandbox.company-information.service.gov.uk",
identity_url="https://identity-sandbox.company-information.service.gov.uk",
document_api_url="https://document-api-sandbox.company-information.service.gov.uk",
test_data_generator_url="https://test-data-sandbox.company-information.service.gov.uk",
)
1 change: 1 addition & 0 deletions src/ch_api/types/public_data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@
company_profile,
company_registers,
disqualifications,
documents,
exemptions,
filing_history,
insolvency,
Expand Down
46 changes: 44 additions & 2 deletions src/ch_api/types/public_data/charges.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,48 @@ class PersonsEntitled(base.BaseModel):
]


class TransactionLinks(shared.LinksSection):
"""Links associated with a charge transaction filing.
Inherits ``self`` (and any other arbitrary links) from
:class:`~ch_api.types.shared.LinksSection`. The fields below are
declared explicitly so IDEs and type checkers can see them.
"""

filing: typing.Annotated[
str | None,
pydantic.Field(
default=None,
description="Link to the filing history item for this transaction.",
),
]

insolvency_case: typing.Annotated[
str | None,
pydantic.Field(
default=None,
description="Link to the insolvency case related to this transaction.",
),
]


class InsolvencyCaseLinks(shared.LinksSection):
"""Links associated with an insolvency case on a charge.
Inherits ``self`` (and any other arbitrary links) from
:class:`~ch_api.types.shared.LinksSection`. The ``case`` field is
declared explicitly so IDEs and type checkers can see it.
"""

case: typing.Annotated[
str | None,
pydantic.Field(
default=None,
description="Link to the insolvency case.",
),
]


class Transactions(base.BaseModel):
"""Transaction that has been filed for the charge."""

Expand Down Expand Up @@ -268,7 +310,7 @@ class Transactions(base.BaseModel):
]

links: typing.Annotated[
shared.LinksSection | None,
TransactionLinks | None,
pydantic.Field(
description="The resources related to this filing",
default=None,
Expand All @@ -288,7 +330,7 @@ class InsolvencyCases(base.BaseModel):
]

links: typing.Annotated[
shared.LinksSection | None,
InsolvencyCaseLinks | None,
pydantic.Field(
description="The resources related to this insolvency case",
default=None,
Expand Down
19 changes: 18 additions & 1 deletion src/ch_api/types/public_data/company_officers.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,23 @@
from . import officer_changes


class OfficerLinks(shared.LinksSection):
"""Links nested under the ``officer`` key of an officer list item.

Inherits ``self`` (and any other arbitrary links) from
:class:`~ch_api.types.shared.LinksSection`. The ``appointments`` field
is declared explicitly so IDEs and type checkers can see it.
"""

appointments: typing.Annotated[
str | None,
pydantic.Field(
default=None,
description="Link to the officer's appointments list.",
),
]


class ItemLinkTypes(base.BaseModel):
"""Links to other resources associated with this officer list item."""

Expand All @@ -90,7 +107,7 @@ class ItemLinkTypes(base.BaseModel):
]

officer: typing.Annotated[
shared.LinksSection,
OfficerLinks,
pydantic.Field(
description="Links to other officer resources associated with this officer list item.",
),
Expand Down
Loading
Loading