diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 14e8aa64..278c9fea 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -68,9 +68,26 @@ jobs: run: | uv pip install --system --no-deps -v -e . + - name: Start fake-gcs-server + run: | + docker run -d --name fake-gcs-server -p 4443:4443 \ + fsouza/fake-gcs-server -scheme http -filesystem-root /tmp/fake-gcs-server + + for i in $(seq 1 30); do + if curl --silent --fail http://127.0.0.1:4443/storage/v1/b >/dev/null; then + exit 0 + fi + sleep 1 + done + + docker logs fake-gcs-server + exit 1 + - name: Run tests env: S3_ENDPOINT_URL: "https://google.com" + STORAGE_EMULATOR_HOST: "http://127.0.0.1:4443" + GOOGLE_CLOUD_PROJECT: "test-project" run: | pytest -r a -v -n 3 --cov=lsst.resources\ --cov=tests --cov-report=xml --cov-report=term --cov-branch \ diff --git a/.gitignore b/.gitignore index b44a4ac7..6fe30f14 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ *.bck *.pyc .eggs +.env *.egg-info version.py _build.* diff --git a/doc/changes/DM-52947.feature.rst b/doc/changes/DM-52947.feature.rst new file mode 100644 index 00000000..098e8d83 --- /dev/null +++ b/doc/changes/DM-52947.feature.rst @@ -0,0 +1 @@ +Added a ``ResourcePath.get_info()`` method to provide a general interface for obtaining information about a resource including the size, modification date, and any checksums available. diff --git a/pyproject.toml b/pyproject.toml index 4ef1b341..6d4a27bb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -174,6 +174,7 @@ select = [ ] extend-select = [ "RUF100", # Warn about unused noqa + "D212", # Docstring starts without newline after quotes. ] [tool.ruff.lint.isort] diff --git a/python/lsst/resources/__init__.py b/python/lsst/resources/__init__.py index 44de3396..aa7cb95a 100644 --- a/python/lsst/resources/__init__.py +++ b/python/lsst/resources/__init__.py @@ -14,6 +14,7 @@ __all__ = ( "ResourceHandleProtocol", + "ResourceInfo", "ResourcePath", "ResourcePathExpression", ) @@ -22,5 +23,5 @@ from ._resourceHandles import ResourceHandleProtocol # Should only expose ResourcePath and its input type alias -from ._resourcePath import ResourcePath, ResourcePathExpression +from ._resourcePath import ResourceInfo, ResourcePath, ResourcePathExpression from .version import * diff --git a/python/lsst/resources/_resourceHandles/_davResourceHandle.py b/python/lsst/resources/_resourceHandles/_davResourceHandle.py index 95c5106e..5b378fec 100644 --- a/python/lsst/resources/_resourceHandles/_davResourceHandle.py +++ b/python/lsst/resources/_resourceHandles/_davResourceHandle.py @@ -197,8 +197,8 @@ class DavReadAheadCache: Parameters ---------- client : `lsst.resources.davutils.DavClient` - webDAV client to interact with the server to download data. - backend_url : `str` + The webDAV client to interact with the server to download data. + url : `str` URL of the resource to download data from. filesize : `int` Size in bytes of the remote file. diff --git a/python/lsst/resources/_resourcePath.py b/python/lsst/resources/_resourcePath.py index 413bd3d2..a49048e5 100644 --- a/python/lsst/resources/_resourcePath.py +++ b/python/lsst/resources/_resourcePath.py @@ -11,11 +11,13 @@ from __future__ import annotations -__all__ = ("ResourcePath", "ResourcePathExpression") +__all__ = ("ResourceInfo", "ResourcePath", "ResourcePathExpression") import concurrent.futures import contextlib import copy +import dataclasses +import datetime import io import locale import logging @@ -131,6 +133,27 @@ def _patch_environ(new_values: dict[str, str]) -> Iterator[None]: os.environ[k] = old_values[k] +@dataclasses.dataclass(frozen=True) +class ResourceInfo: + """Information about this resource.""" + + uri: str + """URI in string form of the resource from which this information is + derived. + """ + is_file: bool + """Indicate whether the resource is a file or a directory.""" + size: int + """Size of the file in bytes. A directory or a URI that has no concept + of size returns 0.""" + last_modified: datetime.datetime | None + """Modification date of the resource, if known.""" + checksums: dict[str, Any] + """Checksums for this file. Supported checksum implementations are + backend dependent. + """ + + class ResourcePath: # numpydoc ignore=PR02 """Convenience wrapper around URI parsers. @@ -1931,6 +1954,17 @@ def _copy_extra_attributes(self, original_uri: ResourcePath) -> None: # ResourcePath constructor by passing in a ResourcePath object. pass + def get_info(self) -> ResourceInfo: + """Return lightweight metadata about this resource. + + Returns + ------- + info : `ResourceInfo` + The information about this resource that can be obtained from + the backend. Will not read the file contents. + """ + raise NotImplementedError("") + ResourcePathExpression = str | urllib.parse.ParseResult | ResourcePath | Path """Type-annotation alias for objects that can be coerced to ResourcePath. diff --git a/python/lsst/resources/dav.py b/python/lsst/resources/dav.py index 41e97422..ecec1936 100644 --- a/python/lsst/resources/dav.py +++ b/python/lsst/resources/dav.py @@ -39,7 +39,7 @@ from ._resourceHandles import ResourceHandleProtocol from ._resourceHandles._davResourceHandle import DavReadResourceHandle -from ._resourcePath import ResourcePath, ResourcePathExpression +from ._resourcePath import ResourceInfo, ResourcePath, ResourcePathExpression from .davutils import ( DavClient, DavClientPool, @@ -139,8 +139,7 @@ def __init__(self) -> None: self._reset() def _reset(self) -> None: - """ - Initialize all the globals. + """Initialize all the globals. This method is a helper for reinitializing globals in tests. """ @@ -289,11 +288,22 @@ def size(self) -> int: return 0 if self.isdir() else self._client.size(self._internal_url) - def info(self) -> dict[str, Any]: - """Return metadata details about this resource.""" - log.debug("info %s [%#x]", self, id(self)) + @override + def get_info(self) -> ResourceInfo: + """Return lightweight metadata details about this resource.""" + log.debug("get_info %s [%#x]", self, id(self)) + + info = self._client.info(self._internal_url) + if info["type"] is None: + raise FileNotFoundError(f"Resource {self} does not exist") - return self._client.info(self._internal_url, name=str(self)) + return ResourceInfo( + uri=str(self), + is_file=info["type"] == "file", + size=info["size"], + last_modified=info["last_modified"], + checksums=info["checksums"], + ) @override def read(self, size: int = -1) -> bytes: diff --git a/python/lsst/resources/davutils.py b/python/lsst/resources/davutils.py index 2f45ca58..f217ce10 100644 --- a/python/lsst/resources/davutils.py +++ b/python/lsst/resources/davutils.py @@ -25,7 +25,7 @@ import time import uuid import xml.etree.ElementTree as eTree -from datetime import datetime +from datetime import UTC, datetime from http import HTTPStatus from typing import Any, BinaryIO @@ -107,13 +107,15 @@ def normalize_url(url: str, preserve_scheme: bool = False, preserve_path: bool = def redact_url(url: str) -> str: - """Return a modified `url` with authorization query redacted. The - goal is that this method should be used for logging URLs to avoid + """Return a modified `url` with authorization query redacted. + + The goal is that this method should be used for logging URLs to avoid leaking authorization tokens. Parameters ---------- url : `str` + URL to redact. Returns ------- @@ -759,8 +761,7 @@ def invalidate(self, url: str) -> None: self._cache.pop(url, None) def update_size(self, url: str, size: int | None, timeout: float | None = None) -> None: - """ - Update the cache with an entry for `url` which has a size of `size` + """Update the cache with an entry for `url` which has a size of `size` bytes. This entry is considered valid for a period of `timeout` seconds from now. @@ -818,7 +819,17 @@ def get_size(self, url: str) -> int | None: def unexpected_status_error(method: str, url: str, resp: HTTPResponse) -> Exception: - """Raise an exception from `resp`.""" + """Raise an exception from `resp`. + + Parameters + ---------- + method : `str` + The method name triggering the error. + url : `str` + The URL that cause the error. + resp : `resp` + The error response. + """ message = f"Unexpected response to HTTP request {method} {redact_url(url)}: {resp.status} {resp.reason}" body = resp.data.decode() if len(body) > 0: @@ -960,9 +971,8 @@ def _make_pool_manager(self, config: DavConfig) -> PoolManager: ) def get_server_details(self, url: str) -> dict[str, str]: - """ - Retrieve the details of the server and check it advertises compliance - to class 1 of webDAV protocol. + """Retrieve the details of the server and check it advertises + compliance to class 1 of webDAV protocol. Parameters ---------- @@ -1493,7 +1503,7 @@ def options( ---------- url : `str` Target URL. - headers : `dict[str, str]`, optional + headers : `dict` [`str`, `str`], optional Headers to sent with the request. Returns @@ -1526,6 +1536,8 @@ def propfind( Headers to sent with the request. body : `str`, optional Request body. + depth : `str`, optional + ???. """ headers = {} if headers is None else dict(headers) headers.update( @@ -1562,7 +1574,7 @@ def put( Returns ------- size : `int | None` - size in bytes of the file uploaded. Can be `None` if the size + The size in bytes of the file uploaded. Can be `None` if the size could not be retrieved. """ # Send a PUT request with empty body and handle redirection. This @@ -1872,7 +1884,7 @@ def move(self, source_url: str, destination_url: str, overwrite: bool = False) - Returns ------- resp : `HTTPResponse` - unmodified response received from the server. + The unmodified response received from the server. """ headers = { "Destination": destination_url, @@ -1973,6 +1985,8 @@ def read_range( Ending byte offset of the range to download. headers : `dict[str,str]`, optional Specific headers to sent with the GET request. + release_backend : `bool`, optional + Whether or not to close the connection to the backend. Returns ------- @@ -2120,7 +2134,7 @@ def write(self, url: str, data: BinaryIO | bytes) -> int | None: Returns ------- size : `int | None` - size in bytes of the file uploaded. Can be `None` if the size + The size in bytes of the file uploaded. Can be `None` if the size could not be retrieved. Notes @@ -2298,9 +2312,11 @@ def rename( URL of the source file. destination_url : `str` URL of the destination file. Its parent directory must exist. - overwrite : `bool` + overwrite : `bool`, optional If True and a file exists at `destination_url` it will be overwritten. Otherwise an exception is raised. + create_parent : `bool`, optional + Whether to create the parent. """ # Create the destination's parent directory first because MOVE may # fail if it does not exist, depending on the server implementation @@ -2712,7 +2728,7 @@ def put( headers: dict[str, str] | None = None, data: BinaryIO | bytes = b"", ) -> int | None: - """Inherits doc string.""" + # Docstring inherited. # Send a PUT request with empty body to the dCache frontend server to # get redirected to the backend. # @@ -2877,14 +2893,13 @@ def write(self, url: str, data: BinaryIO | bytes) -> int | None: ---------- url : `str` Target URL. - - data: `bytes` + data : `bytes` Sequence of bytes to upload. Returns ------- size : `int | None` - size in bytes of the file uploaded. Can be `None` if the size + The size in bytes of the file uploaded. Can be `None` if the size could not be retrieved. Notes @@ -2944,7 +2959,7 @@ def mkcol(self, url: str) -> None: @override def info(self, url: str, name: str | None = None) -> dict[str, Any]: - """Inherits doc string.""" + # Docstring inherited. result: dict[str, Any] = { "name": name if name is not None else url, "type": None, @@ -3014,7 +3029,7 @@ def put( headers: dict[str, str] | None = None, data: BinaryIO | bytes = b"", ) -> int | None: - """Inherits doc string.""" + # Docstring inherited. # Send a PUT request with empty body to the XRootD frontend server to # get redirected to the backend. frontend_headers = {} if headers is None else dict(headers) @@ -3120,14 +3135,13 @@ def write(self, url: str, data: BinaryIO | bytes) -> int | None: ---------- url : `str` Target URL. - - data: `bytes` + data : `bytes` Sequence of bytes to upload. Returns ------- size : `int | None` - size in bytes of the file uploaded. Can be `None` if the size + The size in bytes of the file uploaded. Can be `None` if the size could not be retrieved. Notes @@ -3188,7 +3202,7 @@ def mkcol(self, url: str) -> None: @override def stat(self, url: str) -> DavFileMetadata: - """Inherits doc string.""" + # Docstring inherited. # XRootD v5.9.1 responds "200 OK" to a HEAD request against an # existing file. When the target URL is a directory, it also responds # "200 OK". In both cases the response header "Content-Length" @@ -3461,7 +3475,7 @@ def last_modified(self) -> datetime: # Last modified timestamp is of the form: # 'Wed, 12 Mar 2025 10:11:13 GMT' - return datetime.strptime(self._getlastmodified, "%a, %d %b %Y %H:%M:%S %Z") + return datetime.strptime(self._getlastmodified, "%a, %d %b %Y %H:%M:%S %Z").replace(tzinfo=UTC) @property def size(self) -> int: @@ -3787,6 +3801,8 @@ def dump_response(method: str, resp: HTTPResponse, dump_body: bool = False) -> N Method name to include in log output. resp : `HTTPResponse` Response to dump. + dump_body : `bool`, optional + Whether or not to issue a debug log message. """ log.debug("%s %s", method, resp.geturl()) log.debug(" %s %s", resp.status, resp.reason) diff --git a/python/lsst/resources/file.py b/python/lsst/resources/file.py index 79c84d42..f549088a 100644 --- a/python/lsst/resources/file.py +++ b/python/lsst/resources/file.py @@ -15,6 +15,7 @@ import contextlib import copy +import datetime import logging import os import os.path @@ -24,10 +25,11 @@ import stat import urllib.parse from collections.abc import Iterator +from pathlib import Path from typing import IO, TYPE_CHECKING from ._resourceHandles._fileResourceHandle import FileResourceHandle -from ._resourcePath import ResourcePath +from ._resourcePath import ResourceInfo, ResourcePath from .utils import NoTransaction, ensure_directory_is_writeable, os2posix, posix2os try: @@ -38,12 +40,37 @@ AbstractFileSystem = type if TYPE_CHECKING: + from importlib.resources.abc import Traversable + from .utils import TransactionProtocol log = logging.getLogger(__name__) +def _path_to_info(uri: str, path: str | Path | Traversable) -> ResourceInfo | None: + """Given a path to a local file, return a `ResourceInfo`.""" + if isinstance(path, Path): + stat_result = path.stat() + elif isinstance(path, str): + stat_result = os.stat(path) + elif (stat_method := getattr(path, "stat", None)) and callable(stat_method): + # Edge case triggered by importlib.resources. + stat_result = stat_method() + if not isinstance(stat_result, os.stat_result): + raise RuntimeError(f"Unexpected stat result from {path}.stat()") + else: + return None + + return ResourceInfo( + uri=uri, + is_file=not stat.S_ISDIR(stat_result.st_mode), + size=0 if stat.S_ISDIR(stat_result.st_mode) else stat_result.st_size, + last_modified=datetime.datetime.fromtimestamp(stat_result.st_mtime, tz=datetime.UTC), + checksums={}, + ) + + class FileResourcePath(ResourcePath): """Path for explicit ``file`` URI scheme.""" @@ -76,6 +103,13 @@ def size(self) -> int: sz = 0 return sz + def get_info(self) -> ResourceInfo: + """Return lightweight metadata about this file.""" + info = _path_to_info(str(self), self.ospath) + if info is None: + raise RuntimeError(f"Unexpected internal failure obtaining file info for {self}") + return info + def remove(self) -> None: """Remove the resource.""" os.remove(self.ospath) diff --git a/python/lsst/resources/gs.py b/python/lsst/resources/gs.py index bc30604b..c66e08bc 100644 --- a/python/lsst/resources/gs.py +++ b/python/lsst/resources/gs.py @@ -16,6 +16,7 @@ __all__ = ("GSResourcePath",) import contextlib +import datetime import logging import re from collections.abc import Iterator @@ -71,7 +72,7 @@ class ServiceUnavailable(ClientError): # type: ignore # noqa: N818 from lsst.utils.timer import time_this -from ._resourcePath import ResourcePath +from ._resourcePath import ResourceInfo, ResourcePath if TYPE_CHECKING: from .utils import TransactionProtocol @@ -111,6 +112,24 @@ def is_retryable(exc: Exception) -> bool: """Cached client connection.""" +def _coerce_gcs_datetime(value: datetime.datetime | str | None) -> datetime.datetime | None: + """Convert GCS timestamp values to timezone-aware UTC datetimes. + + Some emulators return RFC3339 timestamps with an explicit UTC offset + instead of a trailing ``Z``, which the google-cloud-storage property + accessors do not always accept. + """ + if value is None: + return None + if isinstance(value, datetime.datetime): + if value.tzinfo is None: + return value.replace(tzinfo=datetime.UTC) + return value.astimezone(datetime.UTC) + if value.endswith("Z"): + value = value[:-1] + "+00:00" + return datetime.datetime.fromisoformat(value).astimezone(datetime.UTC) + + def _get_client() -> storage.Client: global _client if storage is None: @@ -146,6 +165,10 @@ def blob(self) -> storage.Blob: def exists(self) -> bool: if self.is_root: return self.bucket.exists(retry=_RETRY_POLICY) + if self.dirLike: + # GCS does not have concrete directory objects; treat any + # directory-like path within an existing bucket as existing. + return self.bucket.exists(retry=_RETRY_POLICY) return self.blob.exists(retry=_RETRY_POLICY) def size(self) -> int: @@ -162,6 +185,58 @@ def size(self) -> int: raise FileNotFoundError(f"Resource {self} does not exist") return size + def get_info(self) -> ResourceInfo: + """Return lightweight metadata about this GCS resource.""" + if self.is_root: + if not self.bucket.exists(retry=_RETRY_POLICY): + raise FileNotFoundError(f"Resource {self} does not exist") + return ResourceInfo( + uri=str(self), + is_file=False, + size=0, + last_modified=None, + checksums={}, + ) + + if self.dirLike: + if not self.exists(): + raise FileNotFoundError(f"Resource {self} does not exist") + return ResourceInfo( + uri=str(self), + is_file=False, + size=0, + last_modified=None, + checksums={}, + ) + + try: + self.blob.reload(retry=_RETRY_POLICY) + except NotFound: + raise FileNotFoundError(f"Resource {self} does not exist") from None + + size = self.blob.size + if size is None: + raise FileNotFoundError(f"Resource {self} does not exist") + + checksums = {} + if self.blob.md5_hash: + checksums["md5"] = self.blob.md5_hash + if self.blob.crc32c: + checksums["crc32c"] = self.blob.crc32c + + try: + updated = _coerce_gcs_datetime(self.blob.updated) + except ValueError: + updated = _coerce_gcs_datetime(self.blob._properties.get("updated")) + + return ResourceInfo( + uri=str(self), + is_file=True, + size=size, + last_modified=updated, + checksums=checksums, + ) + def remove(self) -> None: try: self.blob.delete(retry=_RETRY_POLICY) @@ -195,12 +270,9 @@ def mkdir(self) -> None: if not self.dirLike: raise NotADirectoryError(f"Can not create a 'directory' for a file-like URI {self}") - if self.is_root: - # The root must already exist. - return - - # Should this method do anything at all? - self.blob.upload_from_string(b"", retry=_RETRY_POLICY) + # GCS does not have directory objects, so mkdir is a no-op once the + # bucket exists. + return @contextlib.contextmanager def _as_local( diff --git a/python/lsst/resources/http.py b/python/lsst/resources/http.py index c224e446..750c1ca9 100644 --- a/python/lsst/resources/http.py +++ b/python/lsst/resources/http.py @@ -14,6 +14,7 @@ __all__ = ("HttpResourcePath",) import contextlib +import datetime import enum import functools import io @@ -27,6 +28,7 @@ import ssl import stat from collections.abc import Iterator +from email.utils import parsedate_to_datetime from typing import TYPE_CHECKING, Any, BinaryIO, cast try: @@ -58,7 +60,7 @@ from ._resourceHandles import ResourceHandleProtocol from ._resourceHandles._httpResourceHandle import HttpReadResourceHandle, parse_content_range_header -from ._resourcePath import ResourcePath +from ._resourcePath import ResourceInfo, ResourcePath from .utils import _get_num_workers, get_tempdir if TYPE_CHECKING: @@ -897,8 +899,7 @@ def _clear_sessions(self) -> None: delattr(self, "_data_session") def _init_server_properties(self) -> None: - """ - Initialize instance variables '_is_webdav' and '_server' by + """Initialize instance variables '_is_webdav' and '_server' by sending a single OPTIONS request to the remote server and saving the results. """ @@ -996,71 +997,97 @@ def size(self) -> int: """Return the size of the remote resource in bytes.""" if self.dirLike: return 0 + info = self.get_info() + # dirLike can be None if we are unsure. Only flag if we are certain + # we have been told this is a directory but webDAV reports it as a + # file. + if not info.is_file and self.dirLike is False: + raise IsADirectoryError( + f"Resource {self} is reported by server as a directory but has a file path" + ) + return info.size + def get_info(self) -> ResourceInfo: + """Return lightweight metadata about this HTTP resource.""" if not self.is_webdav_endpoint: - # The remote is a plain HTTP server. Send a HEAD request to - # retrieve the size of the resource. resp = self._head_non_webdav_url() - if resp.status_code == requests.codes.ok: # 200 - if "Content-Length" in resp.headers: - return int(resp.headers["Content-Length"]) - else: - raise ValueError( - f"Response to HEAD request to {self} does not contain 'Content-Length' header" - ) - elif resp.status_code == requests.codes.partial_content: - # 206 Partial Content, returned from a GET request with a Range - # header (used to emulate HEAD for presigned S3 URLs). - # In this case Content-Length is the length of the Range and - # not the full length of the file, so we have to parse - # Content-Range instead. - content_range_header = resp.headers.get("Content-Range") - if content_range_header is None: - raise ValueError( - f"Response to GET request to {self} did not contain 'Content-Range' header" - ) - content_range = parse_content_range_header(content_range_header) - size = content_range.total - if size is None: - raise ValueError(f"Content-Range header for {self} did not include a total file size") - return size - elif resp.status_code == requests.codes.range_not_satisfiable: - # 416 Range Not Satisfiable, which can occur on a GET for a 0 - # byte file since we asked for 1 byte Range which is longer - # than the file. - # - # Servers are supposed to include a Content-Range header in - # this case, but Google's S3 implementation doesn't. Any - # non-zero file size should have been handled by the 206 and - # 200 cases above, so assume we have a zero here. - return 0 - elif resp.status_code == requests.codes.not_found: + return self._get_info_from_non_webdav_head(resp) + + resp = self._propfind() + if resp.status_code != requests.codes.multi_status: + raise FileNotFoundError( + f"Resource {self} does not exist, status: {resp.status_code} {resp.reason}" + ) + + prop = _parse_propfind_response_body(resp.text)[0] + if not prop.exists: + raise FileNotFoundError(f"Resource {self} does not exist") + + return ResourceInfo( + uri=str(self), + is_file=prop.is_file, + size=prop.size, + last_modified=prop.last_modified, + checksums=dict(prop.checksums), + ) + + def _get_info_from_non_webdav_head(self, resp: requests.Response) -> ResourceInfo: + """Build `ResourceInfo` from a non-WebDAV HEAD-like response.""" + if not self._is_successful_non_webdav_head_request(resp): + if resp.status_code == requests.codes.not_found: raise FileNotFoundError( f"Resource {self} does not exist, status: {resp.status_code} {resp.reason}" ) - else: - raise ValueError( - f"Unexpected response for HEAD request for {self}, status: {resp.status_code} " - f"{resp.reason}" - ) + raise ValueError( + f"Unexpected response for HEAD request for {self}, status: {resp.status_code} {resp.reason}" + ) - # The remote is a webDAV server: send a PROPFIND request to retrieve - # the size of the resource. Sizes are only meaningful for files. - resp = self._propfind() - if resp.status_code == requests.codes.multi_status: # 207 - prop = _parse_propfind_response_body(resp.text)[0] - if prop.is_file: - return prop.size - elif prop.is_directory: - raise IsADirectoryError( - f"Resource {self} is reported by server as a directory but has a file path" + if self.dirLike: + size = 0 + elif resp.status_code == requests.codes.ok: # 200 + if "Content-Length" not in resp.headers: + raise ValueError( + f"Response to HEAD request to {self} does not contain 'Content-Length' header" ) + size = int(resp.headers["Content-Length"]) + elif resp.status_code == requests.codes.partial_content: + # 206 Partial Content, returned from a GET request with a Range + # header (used to emulate HEAD for presigned S3 URLs). + content_range_header = resp.headers.get("Content-Range") + if content_range_header is None: + raise ValueError(f"Response to GET request to {self} did not contain 'Content-Range' header") + content_range = parse_content_range_header(content_range_header) + size_total = content_range.total + if size_total is None: + raise ValueError(f"Content-Range header for {self} did not include a total file size") + size = size_total + else: + # 416 Range Not Satisfiable can occur on a GET for a 0-byte file. + size = 0 + + checksums = {} + digest_header = resp.headers.get("Digest") + if digest_header is not None: + for digest in digest_header.split(","): + algorithm, separator, value = digest.strip().partition("=") + if separator: + checksums[algorithm.lower()] = value + + last_modified = None + if last_modified_header := resp.headers.get("Last-Modified"): + last_modified = parsedate_to_datetime(last_modified_header) + if last_modified.tzinfo is None: + last_modified = last_modified.replace(tzinfo=datetime.UTC) else: - raise FileNotFoundError(f"Resource {self} does not exist") - else: # 404 Not Found - raise FileNotFoundError( - f"Resource {self} does not exist, status: {resp.status_code} {resp.reason}" - ) + last_modified = last_modified.astimezone(datetime.UTC) + + return ResourceInfo( + uri=str(self), + is_file=not self.dirLike, + size=size, + last_modified=last_modified, + checksums=checksums, + ) def _head_non_webdav_url(self) -> requests.Response: """Return a response from a HTTP HEAD request for a non-WebDAV HTTP @@ -2231,6 +2258,22 @@ def is_file(self) -> bool: def size(self) -> int: return self._getcontentlength + @property + def last_modified(self) -> datetime.datetime | None: + if not self._getlastmodified: + return None + + last_modified = parsedate_to_datetime(self._getlastmodified) + if last_modified.tzinfo is None: + last_modified = last_modified.replace(tzinfo=datetime.UTC) + else: + last_modified = last_modified.astimezone(datetime.UTC) + return last_modified + + @property + def checksums(self) -> dict[str, str]: + return {} + @property def name(self) -> str: return self._displayname diff --git a/python/lsst/resources/mem.py b/python/lsst/resources/mem.py index 914feef1..5a70597d 100644 --- a/python/lsst/resources/mem.py +++ b/python/lsst/resources/mem.py @@ -16,7 +16,7 @@ import contextlib from collections.abc import Iterator -from ._resourcePath import ResourcePath +from ._resourcePath import ResourceInfo, ResourcePath class InMemoryResourcePath(ResourcePath): @@ -30,6 +30,16 @@ def exists(self) -> bool: """Test for existence and always return False.""" return True + def get_info(self) -> ResourceInfo: + """Return placeholder metadata for an in-memory resource.""" + return ResourceInfo( + uri=str(self), + is_file=True, + size=0, + last_modified=None, + checksums={}, + ) + @contextlib.contextmanager def _as_local( self, multithreaded: bool = True, tmpdir: ResourcePath | None = None diff --git a/python/lsst/resources/packageresource.py b/python/lsst/resources/packageresource.py index 33775aff..aea583f4 100644 --- a/python/lsst/resources/packageresource.py +++ b/python/lsst/resources/packageresource.py @@ -29,7 +29,8 @@ AbstractFileSystem = type from ._resourceHandles._baseResourceHandle import ResourceHandleProtocol -from ._resourcePath import ResourcePath, ResourcePathExpression +from ._resourcePath import ResourceInfo, ResourcePath, ResourcePathExpression +from .file import _path_to_info log = logging.getLogger(__name__) @@ -79,6 +80,25 @@ def exists(self) -> bool: return False return ref.is_file() or ref.is_dir() + def get_info(self) -> ResourceInfo: + """Return metadata about the resource without reading its contents.""" + ref = self._get_ref() + if ref is None or not (ref.is_file() or ref.is_dir()): + raise FileNotFoundError(f"Unable to locate resource {self}.") + + info = _path_to_info(str(self), ref) + + if info is None: + # Edge case such as file in Zip. + return ResourceInfo( + uri=str(self), + is_file=True, + size=0, + last_modified=None, + checksums={}, + ) + return info + def read(self, size: int = -1) -> bytes: ref = self._get_ref() if not ref: diff --git a/python/lsst/resources/proxied.py b/python/lsst/resources/proxied.py index 337820ee..ad711da1 100644 --- a/python/lsst/resources/proxied.py +++ b/python/lsst/resources/proxied.py @@ -14,12 +14,13 @@ __all__ = ("ProxiedResourcePath",) import contextlib +import dataclasses import logging import re from abc import ABC, abstractmethod from collections.abc import Iterator -from ._resourcePath import ResourceHandleProtocol, ResourcePath, ResourcePathExpression +from ._resourcePath import ResourceHandleProtocol, ResourceInfo, ResourcePath, ResourcePathExpression from .utils import TransactionProtocol try: @@ -126,6 +127,10 @@ def size(self) -> int: proxy = self._get_proxy() return proxy.size() + def get_info(self) -> ResourceInfo: + proxy = self._get_proxy() + return dataclasses.replace(proxy.get_info(), uri=str(self)) + def write(self, data: bytes, overwrite: bool = True) -> None: proxy = self._get_proxy() proxy.write(data, overwrite=overwrite) diff --git a/python/lsst/resources/s3.py b/python/lsst/resources/s3.py index 64705efe..ac5eecea 100644 --- a/python/lsst/resources/s3.py +++ b/python/lsst/resources/s3.py @@ -15,6 +15,7 @@ import concurrent.futures import contextlib +import datetime import io import logging import os @@ -33,7 +34,14 @@ from ._resourceHandles._baseResourceHandle import ResourceHandleProtocol from ._resourceHandles._s3ResourceHandle import S3ResourceHandle -from ._resourcePath import _EXECUTOR_TYPE, MBulkResult, ResourcePath, _get_executor_class, _patch_environ +from ._resourcePath import ( + _EXECUTOR_TYPE, + MBulkResult, + ResourceInfo, + ResourcePath, + _get_executor_class, + _patch_environ, +) from .s3utils import ( _get_s3_connection_parameters, _s3_disable_bucket_validation, @@ -375,6 +383,65 @@ def size(self) -> int: raise FileNotFoundError(f"Resource {self} does not exist") return sz + @backoff.on_exception(backoff.expo, retryable_io_errors, max_time=max_retry_time) + def get_info(self) -> ResourceInfo: + """Return lightweight metadata about this S3 resource.""" + if self.is_root: + if not bucketExists(self._bucket, self.client): + raise FileNotFoundError(f"Resource {self} does not exist") + return ResourceInfo( + uri=str(self), + is_file=False, + size=0, + last_modified=None, + checksums={}, + ) + + try: + response = self.client.head_object( + Bucket=self._bucket, + Key=self.relativeToPathRoot, + ChecksumMode="ENABLED", + ) + except (self.client.exceptions.NoSuchKey, self.client.exceptions.NoSuchBucket) as err: + raise FileNotFoundError(f"No such resource: {self}") from err + except ClientError as err: + translate_client_error(err, self) + raise + + checksums = {} + for response_key, checksum_name in ( + ("ChecksumCRC32", "crc32"), + ("ChecksumCRC32C", "crc32c"), + ("ChecksumCRC64NVME", "crc64nvme"), + ("ChecksumSHA1", "sha1"), + ("ChecksumSHA256", "sha256"), + ): + if value := response.get(response_key): + checksums[checksum_name] = value + + last_modified = response.get("LastModified") + if last_modified is not None: + if getattr(last_modified, "tzinfo", None) is None: + last_modified = last_modified.replace(tzinfo=datetime.UTC) + else: + last_modified = last_modified.astimezone(datetime.UTC) + + # For ResourcePath usage a dirLike object with zero size is a directory + # but in the general case anyone can create an object with a trailing + # `/` and treat it as a file. For self-consistency with ResourcePath + # call it a file if it has size > 0 even if dirLike. + size = response["ContentLength"] + is_file = (self.dirLike is not True) or (size > 0) + + return ResourceInfo( + uri=str(self), + is_file=is_file, + size=size, + last_modified=last_modified, + checksums=checksums, + ) + @backoff.on_exception(backoff.expo, retryable_io_errors, max_time=max_retry_time) def remove(self) -> None: """Remove the resource.""" diff --git a/python/lsst/resources/tests.py b/python/lsst/resources/tests.py index 58caed03..9e8c68e5 100644 --- a/python/lsst/resources/tests.py +++ b/python/lsst/resources/tests.py @@ -12,6 +12,7 @@ __all__ = ["GenericReadWriteTestCase", "GenericTestCase"] +import datetime import logging import os import pathlib @@ -669,6 +670,36 @@ def test_file(self) -> None: self.assertEqual(uri, uri2) self.assertEqual(id(uri), id(uri2)) + def test_get_info_generic(self) -> None: + """Test generic get_info properties.""" + now = datetime.datetime.now(tz=datetime.UTC) + uri = self.tmpdir.join("test.txt") + + with self.assertRaises(FileNotFoundError): + uri.get_info() + + content = "abcdefghijklmnopqrstuv\n" + uri.write(content.encode()) + + info = uri.get_info() + self.assertTrue(info.is_file) + self.assertEqual(info.size, len(content)) + assert info.last_modified is not None + self.assertGreaterEqual(info.last_modified.timestamp(), now.timestamp() - 1.0) + self.assertIsInstance(info.checksums, dict) # Checksums are backend dependent. + + for dir_uri in (uri.parent(), uri.root_uri()): + # File URIs can return values for modification dates for + # directories. S3 URIs can return checksums for directories. + dirinfo = dir_uri.get_info() + self.assertEqual(dirinfo.uri, str(dir_uri)) + self.assertFalse(dirinfo.is_file) + self.assertEqual(dirinfo.size, 0) + + newdir = self.tmpdir.join("newdir/", forceDirectory=True) + with self.assertRaises(FileNotFoundError): + newdir.get_info() + def test_mkdir(self) -> None: newdir = self.tmpdir.join("newdir/seconddir", forceDirectory=True) newdir.mkdir() diff --git a/tests/test_dav.py b/tests/test_dav.py index 6bab8363..a0a353e5 100644 --- a/tests/test_dav.py +++ b/tests/test_dav.py @@ -10,6 +10,7 @@ # license that can be found in the LICENSE file. import concurrent +import datetime import hashlib import io import os.path @@ -23,9 +24,8 @@ import unittest import zlib from collections.abc import Callable -from datetime import datetime from threading import Thread -from typing import Any, cast +from typing import cast from zipfile import ZipFile, ZipInfo try: @@ -40,7 +40,7 @@ fsspec = None AbstractFileSystem = type -from lsst.resources import ResourcePath +from lsst.resources import ResourceInfo, ResourcePath from lsst.resources._resourceHandles._davResourceHandle import ( DavReadResourceHandle, ) @@ -685,31 +685,24 @@ def download_zip_member(uri: ResourcePath, zinfo: ZipInfo) -> tuple[int, str]: self.assertEqual(member_size, local_file_size) self.assertEqual(member_digest, local_file_digest) - def test_dav_info(self): - def check_metadata_fields(metadata: dict[str, Any]): - for field in ("name", "size", "type", "last_modified", "checksums"): - self.assertTrue(field in metadata) - - # Retrieve and check metadata details about an non-existing object + def test_dav_get_info(self): + # Missing resources now raise instead of returning a partial dict. subdir = self.tmpdir.join("inexistent", forceDirectory=True) - metadata = subdir.info() - check_metadata_fields(metadata) - self.assertEqual(metadata["size"], None) - self.assertEqual(metadata["type"], None) - self.assertEqual(len(metadata["checksums"]), 0) - self.assertEqual(metadata["last_modified"], datetime.min) + with self.assertRaises(FileNotFoundError): + subdir.get_info() # Retrieve and check metadata details about an existing directory subdir = self.tmpdir.join(self._get_dir_name(), forceDirectory=True) self.assertIsNone(subdir.mkdir()) self.assertTrue(subdir.exists()) - metadata = subdir.info() - check_metadata_fields(metadata) + metadata = subdir.get_info() + self.assertIsInstance(metadata, ResourceInfo) - self.assertEqual(metadata["size"], 0) - self.assertEqual(metadata["type"], "directory") - self.assertEqual(len(metadata["checksums"]), 0) - self.assertEqual(metadata["last_modified"], subdir._stat().last_modified) + self.assertFalse(metadata.is_file) + self.assertEqual(metadata.size, 0) + self.assertEqual(len(metadata.checksums), 0) + self.assertEqual(metadata.last_modified.tzinfo, datetime.UTC) + self.assertEqual(metadata.last_modified, subdir._stat().last_modified) # Retrieve and check metadata details about existing file local_file, local_file_size = self._generate_file() @@ -723,13 +716,14 @@ def check_metadata_fields(metadata: dict[str, Any]): self.assertIsNone(remote_file.write(file, overwrite=True)) self.assertEqual(os.stat(local_file).st_size, remote_file.size()) - metadata = remote_file.info() - check_metadata_fields(metadata) - self.assertEqual(metadata["size"], local_file_size) - self.assertEqual(metadata["type"], "file") - self.assertEqual(metadata["last_modified"], remote_file._stat().last_modified) + metadata = remote_file.get_info() + self.assertIsInstance(metadata, ResourceInfo) + self.assertTrue(metadata.is_file) + self.assertEqual(metadata.size, local_file_size) + self.assertEqual(metadata.last_modified.tzinfo, datetime.UTC) + self.assertEqual(metadata.last_modified, remote_file._stat().last_modified) - checksums = metadata["checksums"] + checksums = metadata.checksums if "md5" in checksums: self.assertEqual(checksums["md5"], md5_checksum) if "adler32" in checksums: diff --git a/tests/test_eups.py b/tests/test_eups.py index e54d0e82..a06dfdff 100644 --- a/tests/test_eups.py +++ b/tests/test_eups.py @@ -15,7 +15,7 @@ import unittest import unittest.mock -from lsst.resources import ResourcePath +from lsst.resources import ResourceInfo, ResourcePath from lsst.resources.eups import EupsResourcePath from lsst.resources.tests import GenericTestCase @@ -141,6 +141,21 @@ def test_open(self): content = buffer.read() self.assertEqual(uri.read().decode(), content) + def test_get_info(self): + file_uri = self.root_uri.join("config/test.txt") + info = file_uri.get_info() + self.assertIsInstance(info, ResourceInfo) + self.assertEqual(info.uri, str(file_uri)) + self.assertTrue(info.is_file) + self.assertGreater(info.size, 0) + + dir_uri = self.root_uri.join("config/", forceDirectory=True) + dirinfo = dir_uri.get_info() + self.assertIsInstance(dirinfo, ResourceInfo) + self.assertEqual(dirinfo.uri, str(dir_uri)) + self.assertFalse(dirinfo.is_file) + self.assertEqual(dirinfo.size, 0) + def test_walk(self): """Test that we can find file resources. diff --git a/tests/test_file.py b/tests/test_file.py index 1f9f0da3..45a31e62 100644 --- a/tests/test_file.py +++ b/tests/test_file.py @@ -10,13 +10,14 @@ # license that can be found in the LICENSE file. import contextlib +import datetime import os import pathlib import unittest import unittest.mock import urllib.parse -from lsst.resources import ResourcePath, ResourcePathExpression +from lsst.resources import ResourceInfo, ResourcePath, ResourcePathExpression from lsst.resources.tests import GenericReadWriteTestCase, GenericTestCase TESTDIR = os.path.abspath(os.path.dirname(__file__)) @@ -83,6 +84,27 @@ def test_schemeless_root(self): via_root = ResourcePath("b.txt", root=root) self.assertEqual(via_root.ospath, "/root/b.txt") + def test_get_info(self): + now = datetime.datetime.now(tz=datetime.UTC) + with ResourcePath.temporary_uri(suffix=".txt") as target: + target.write(b"abc") + + info = target.get_info() + self.assertIsInstance(info, ResourceInfo) + self.assertTrue(info.uri.endswith(".txt")) + self.assertTrue(info.is_file) + self.assertEqual(info.size, 3) + self.assertEqual(info.checksums, {}) + self.assertEqual(info.last_modified.tzinfo, datetime.UTC) + self.assertGreaterEqual(info.last_modified.timestamp(), now.timestamp() - 1.0) + + dirinfo = target.parent().get_info() + self.assertEqual(dirinfo.uri, str(target.parent())) + self.assertFalse(dirinfo.is_file) + self.assertEqual(dirinfo.size, 0) + self.assertGreaterEqual(dirinfo.last_modified.timestamp(), 0) + self.assertEqual(dirinfo.checksums, {}) + TEST_UMASK = 0o0333 diff --git a/tests/test_gs.py b/tests/test_gs.py index ad1c3194..d85a0105 100644 --- a/tests/test_gs.py +++ b/tests/test_gs.py @@ -9,8 +9,42 @@ # Use of this source code is governed by a 3-clause BSD-style # license that can be found in the LICENSE file. +"""Tests for the ``gs://`` resource backend. + +The emulator-backed tests in this module are enabled in either of these ways: + +1. Set ``STORAGE_EMULATOR_HOST`` to an already-running GCS emulator + endpoint. This is how GitHub Actions runs these tests. +2. Install the ``fake-gcs-server`` binary locally and make it available on + ``PATH``, or set ``FAKE_GCS_SERVER`` to its full path. The test helper will + start and stop the emulator automatically. + +The server binary is available from: +https://github.com/fsouza/fake-gcs-server/releases + +If neither is configured, the emulator-backed tests are skipped. +""" + +from __future__ import annotations + +import contextlib +import os +import re +import shutil +import socket +import subprocess +import tempfile +import time import unittest +import uuid +from collections import deque +from collections.abc import Iterator +from threading import Thread +from unittest import mock +import lsst.resources.gs as gs_module +from lsst.resources import ResourceInfo, ResourcePath +from lsst.resources.gs import GSResourcePath from lsst.resources.tests import GenericTestCase try: @@ -19,13 +53,152 @@ storage = None -@unittest.skipIf(not storage, "Warning: google-cloud-storage not found!") -class GenericGCSTestCase(GenericTestCase, unittest.TestCase): - """Generic tests of google cloud storage URI format.""" +def _find_free_port() -> int: + with socket.socket() as sock: + sock.bind(("127.0.0.1", 0)) + return sock.getsockname()[1] + + +@contextlib.contextmanager +def _reset_gs_client() -> Iterator[None]: + old_client = GSResourcePath._client + old_global_client = gs_module._client + GSResourcePath._client = None + gs_module._client = None + try: + yield + finally: + GSResourcePath._client = None + gs_module._client = old_global_client + GSResourcePath._client = old_client + + +@contextlib.contextmanager +def fake_gcs_server(): + """Start or connect to a fake GCS server.""" + if storage is None: + raise unittest.SkipTest("google-cloud-storage is not installed") + + emulator_host = os.environ.get("STORAGE_EMULATOR_HOST") + if emulator_host: + env = {"GOOGLE_CLOUD_PROJECT": os.environ.get("GOOGLE_CLOUD_PROJECT", "test-project")} + with mock.patch.dict(os.environ, env, clear=False): + with _reset_gs_client(): + yield storage.Client() + return + + binary = os.environ.get("FAKE_GCS_SERVER") or shutil.which("fake-gcs-server") + if binary is None: + raise unittest.SkipTest("fake-gcs-server is not installed") + + port = _find_free_port() + filesystem_root = tempfile.mkdtemp(prefix="fake-gcs-server-") + startup_output: deque[str] = deque(maxlen=50) + proc = subprocess.Popen( + [binary, "-scheme", "http", "-port", str(port), "-filesystem-root", filesystem_root], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + ) + + def _drain_output() -> None: + assert proc.stdout is not None + for line in proc.stdout: + startup_output.append(line.rstrip()) + + output_thread = Thread(target=_drain_output, daemon=True) + output_thread.start() + try: + deadline = time.time() + 10 + while True: + try: + with socket.create_connection(("127.0.0.1", port), timeout=0.2): + break + except OSError: + if proc.poll() is not None: + details = "\n".join(startup_output) or "no process output captured" + raise RuntimeError( + f"fake-gcs-server exited unexpectedly with code {proc.returncode}:\n{details}" + ) from None + if time.time() > deadline: + details = "\n".join(startup_output) or "no process output captured" + raise RuntimeError(f"Timed out waiting for fake-gcs-server:\n{details}") from None + time.sleep(0.1) + + env = { + "STORAGE_EMULATOR_HOST": f"http://127.0.0.1:{port}", + "GOOGLE_CLOUD_PROJECT": "test-project", + } + with mock.patch.dict(os.environ, env, clear=False): + with _reset_gs_client(): + yield storage.Client() + finally: + proc.terminate() + with contextlib.suppress(subprocess.TimeoutExpired): + proc.wait(timeout=5) + if proc.poll() is None: + proc.kill() + proc.wait() + output_thread.join(timeout=1) + shutil.rmtree(filesystem_root, ignore_errors=True) + + +class GenericGSTestCase(GenericTestCase, unittest.TestCase): + """Generic URI property testing.""" scheme = "gs" netloc = "my_bucket" -if __name__ == "__main__": - unittest.main() +class GSReadWriteTestCase(unittest.TestCase): + """Test GCS backend with emulated server.""" + + def setUp(self) -> None: + self.server = self.enterContext(fake_gcs_server()) + test_id = re.sub(r"[^a-z0-9-]", "-", self.id().lower()).strip("-") + suffix = uuid.uuid4().hex[:8] + self.bucket = f"{test_id[:54]}-{suffix}" + self.server.create_bucket(self.bucket) + self.root_uri = ResourcePath(f"gs://{self.bucket}/", forceDirectory=True, forceAbsolute=False) + self.tmpdir = self.root_uri.join("TESTING/", forceDirectory=True) + + def test_file_round_trip(self) -> None: + uri = self.tmpdir.join("test.txt") + content = b"abcdefghijklmnopqrstuv\n" + + self.assertFalse(uri.exists()) + uri.write(content) + self.assertTrue(uri.exists()) + self.assertEqual(uri.read(), content) + self.assertEqual(uri.size(), len(content)) + + def test_get_info(self) -> None: + remote = self.tmpdir.join("test-info.dat") + remote.write(b"abc") + + info = remote.get_info() + self.assertIsInstance(info, ResourceInfo) + self.assertTrue(info.is_file) + self.assertEqual(info.size, 3) + self.assertIsNotNone(info.last_modified) + self.assertIsInstance(info.checksums, dict) + + def test_directory_semantics(self) -> None: + newdir = self.tmpdir.join("newdir/seconddir", forceDirectory=True) + newdir.mkdir() + self.assertTrue(newdir.exists()) + + info = newdir.get_info() + self.assertFalse(info.is_file) + self.assertEqual(info.size, 0) + self.assertEqual(info.checksums, {}) + + newfile = newdir.join("temp.txt") + newfile.write(b"Data") + self.assertTrue(newfile.exists()) + + def test_root_missing_bucket(self) -> None: + missing = ResourcePath("gs://missing-bucket/", forceDirectory=True, forceAbsolute=False) + self.assertFalse(missing.exists()) + with self.assertRaises(FileNotFoundError): + missing.get_info() diff --git a/tests/test_http.py b/tests/test_http.py index 14013917..fcbda7bd 100644 --- a/tests/test_http.py +++ b/tests/test_http.py @@ -24,6 +24,7 @@ import unittest.mock import warnings from collections.abc import Callable +from datetime import UTC from threading import Thread from typing import cast @@ -38,7 +39,7 @@ import responses.matchers import lsst.resources -from lsst.resources import ResourcePath +from lsst.resources import ResourceInfo, ResourcePath from lsst.resources._resourceHandles._httpResourceHandle import ( HttpReadResourceHandle, parse_content_range_header, @@ -48,6 +49,7 @@ HttpResourcePath, HttpResourcePathConfig, SessionStore, + _get_dav_and_server_headers, _is_protected, ) from lsst.resources.tests import GenericReadWriteTestCase, GenericTestCase @@ -128,6 +130,31 @@ def test_extra_headers(self): copy = ResourcePath(path, forceDirectory=True) self.assertEqual(copy._extra_headers, {"Authorization": "Bearer my-token"}) + @responses.activate + def test_get_info(self): + _get_dav_and_server_headers.cache_clear() + url = "http://test.example/something.txt" + responses.add(responses.OPTIONS, "http://test.example/", status=200) + responses.add( + responses.HEAD, + url, + status=200, + headers={ + "Content-Length": "123", + "Last-Modified": "Wed, 12 Mar 2025 10:11:13 GMT", + "Digest": "md5=rL0Y20zC+Fzt72VPzMSk2A==, sha-256=def456", + }, + ) + + info = ResourcePath(url).get_info() + self.assertIsInstance(info, ResourceInfo) + self.assertTrue(info.is_file) + self.assertEqual(info.size, 123) + self.assertEqual(info.last_modified.tzinfo, UTC) + self.assertEqual(info.last_modified.year, 2025) + self.assertEqual(info.checksums, {"md5": "rL0Y20zC+Fzt72VPzMSk2A==", "sha-256": "def456"}) + self.assertEqual(len(responses.calls), 2) + class HttpReadWriteWebdavTestCase(GenericReadWriteTestCase, unittest.TestCase): """Test with a real webDAV server, as opposed to mocking responses.""" diff --git a/tests/test_mem.py b/tests/test_mem.py index d146f59f..263665ef 100644 --- a/tests/test_mem.py +++ b/tests/test_mem.py @@ -37,6 +37,14 @@ def test_local(self): with self.root_uri.as_local(): pass + def test_get_info(self) -> None: + info = self.root_uri.get_info() + self.assertTrue(info.is_file) + self.assertEqual(info.uri, str(self.root_uri)) + self.assertEqual(info.size, 0) + self.assertEqual(info.checksums, {}) + self.assertIsNone(info.last_modified) + if __name__ == "__main__": unittest.main() diff --git a/tests/test_s3.py b/tests/test_s3.py index 8bf04bff..9791cf41 100644 --- a/tests/test_s3.py +++ b/tests/test_s3.py @@ -9,6 +9,7 @@ # Use of this source code is governed by a 3-clause BSD-style # license that can be found in the LICENSE file. +import datetime import os import time import unittest @@ -16,7 +17,7 @@ from unittest import mock from urllib.parse import parse_qs, urlparse -from lsst.resources import ResourcePath +from lsst.resources import ResourceInfo, ResourcePath from lsst.resources.s3 import S3ResourcePath from lsst.resources.s3utils import clean_test_environment_for_s3 from lsst.resources.tests import GenericReadWriteTestCase, GenericTestCase @@ -204,6 +205,20 @@ def test_nonexistent_presigned_url(self): with self.assertRaises(FileNotFoundError): get_path.size() + def test_get_info(self): + now = datetime.datetime.now(tz=datetime.UTC) + remote = self.root_uri.join("test-info.dat") + remote.write(b"abc") + + info = remote.get_info() + self.assertIsInstance(info, ResourceInfo) + self.assertTrue(info.is_file) + self.assertEqual(info.size, 3) + self.assertIsInstance(info.checksums, dict) + self.assertIn("crc32", info.checksums) # Only appears if ChecksumMode=ENABLED + self.assertEqual(info.last_modified.tzinfo, datetime.UTC) + self.assertGreaterEqual(info.last_modified.timestamp(), now.timestamp() - 1.0) + def _check_presigned_url(self, url: str, expiration_time_seconds: int): parsed = urlparse(url) self.assertEqual(parsed.scheme, "https")