From c879311e946c0797f87ef3630dc5ec47ee5a3e4b Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Mon, 30 Mar 2026 17:04:15 -0700 Subject: [PATCH 01/19] Add a ResourcePath.get_info() method This is a standardized way to get the file size, last modified time, and any checksums known to the backend. --- python/lsst/resources/__init__.py | 3 +- python/lsst/resources/_resourcePath.py | 31 ++++- python/lsst/resources/dav.py | 19 ++- python/lsst/resources/davutils.py | 4 +- python/lsst/resources/file.py | 19 ++- python/lsst/resources/gs.py | 49 +++++++- python/lsst/resources/http.py | 146 ++++++++++++++++------- python/lsst/resources/mem.py | 11 +- python/lsst/resources/packageresource.py | 48 +++++++- python/lsst/resources/proxied.py | 6 +- python/lsst/resources/s3.py | 66 +++++++++- tests/test_dav.py | 48 ++++---- tests/test_file.py | 14 ++- tests/test_http.py | 29 ++++- tests/test_s3.py | 15 ++- 15 files changed, 417 insertions(+), 91 deletions(-) diff --git a/python/lsst/resources/__init__.py b/python/lsst/resources/__init__.py index 44de3396..aa7cb95a 100644 --- a/python/lsst/resources/__init__.py +++ b/python/lsst/resources/__init__.py @@ -14,6 +14,7 @@ __all__ = ( "ResourceHandleProtocol", + "ResourceInfo", "ResourcePath", "ResourcePathExpression", ) @@ -22,5 +23,5 @@ from ._resourceHandles import ResourceHandleProtocol # Should only expose ResourcePath and its input type alias -from ._resourcePath import ResourcePath, ResourcePathExpression +from ._resourcePath import ResourceInfo, ResourcePath, ResourcePathExpression from .version import * diff --git a/python/lsst/resources/_resourcePath.py b/python/lsst/resources/_resourcePath.py index 413bd3d2..92fd8b81 100644 --- a/python/lsst/resources/_resourcePath.py +++ b/python/lsst/resources/_resourcePath.py @@ -11,11 +11,13 @@ from __future__ import annotations -__all__ = ("ResourcePath", "ResourcePathExpression") +__all__ = ("ResourceInfo", "ResourcePath", "ResourcePathExpression") import concurrent.futures import contextlib import copy +import dataclasses +import datetime import io import locale import logging @@ -131,6 +133,22 @@ def _patch_environ(new_values: dict[str, str]) -> Iterator[None]: os.environ[k] = old_values[k] +@dataclasses.dataclass(frozen=True) +class ResourceInfo: + """Information about this resource.""" + + size: int + """Size of the file in bytes. A directory returns 0.""" + last_modified: datetime.datetime | None + """Modification date of the resource, if known.""" + creation_time: datetime.datetime | None + """Creation date of the resource, if known.""" + checksums: dict[str, Any] + """Checksums for this file. Supported checksum implementations are + backend dependent. + """ + + class ResourcePath: # numpydoc ignore=PR02 """Convenience wrapper around URI parsers. @@ -1931,6 +1949,17 @@ def _copy_extra_attributes(self, original_uri: ResourcePath) -> None: # ResourcePath constructor by passing in a ResourcePath object. pass + def get_info(self) -> ResourceInfo: + """Return lightweight metadata about this resource. + + Returns + ------- + info : `ResourceInfo` + The information about this resource that can be obtained from + the backend. Will not read the file contents. + """ + raise NotImplementedError("") + ResourcePathExpression = str | urllib.parse.ParseResult | ResourcePath | Path """Type-annotation alias for objects that can be coerced to ResourcePath. diff --git a/python/lsst/resources/dav.py b/python/lsst/resources/dav.py index 41e97422..59e78b1e 100644 --- a/python/lsst/resources/dav.py +++ b/python/lsst/resources/dav.py @@ -39,7 +39,7 @@ from ._resourceHandles import ResourceHandleProtocol from ._resourceHandles._davResourceHandle import DavReadResourceHandle -from ._resourcePath import ResourcePath, ResourcePathExpression +from ._resourcePath import ResourceInfo, ResourcePath, ResourcePathExpression from .davutils import ( DavClient, DavClientPool, @@ -289,11 +289,20 @@ def size(self) -> int: return 0 if self.isdir() else self._client.size(self._internal_url) - def info(self) -> dict[str, Any]: - """Return metadata details about this resource.""" - log.debug("info %s [%#x]", self, id(self)) + def get_info(self) -> ResourceInfo: + """Return lightweight metadata details about this resource.""" + log.debug("get_info %s [%#x]", self, id(self)) - return self._client.info(self._internal_url, name=str(self)) + stat = self._stat() + if not stat.exists: + raise FileNotFoundError(f"Resource {self} does not exist") + + return ResourceInfo( + size=stat.size, + last_modified=stat.last_modified, + creation_time=None, + checksums=dict(stat.checksums), + ) @override def read(self, size: int = -1) -> bytes: diff --git a/python/lsst/resources/davutils.py b/python/lsst/resources/davutils.py index 2f45ca58..b4426e96 100644 --- a/python/lsst/resources/davutils.py +++ b/python/lsst/resources/davutils.py @@ -25,7 +25,7 @@ import time import uuid import xml.etree.ElementTree as eTree -from datetime import datetime +from datetime import UTC, datetime from http import HTTPStatus from typing import Any, BinaryIO @@ -3461,7 +3461,7 @@ def last_modified(self) -> datetime: # Last modified timestamp is of the form: # 'Wed, 12 Mar 2025 10:11:13 GMT' - return datetime.strptime(self._getlastmodified, "%a, %d %b %Y %H:%M:%S %Z") + return datetime.strptime(self._getlastmodified, "%a, %d %b %Y %H:%M:%S %Z").replace(tzinfo=UTC) @property def size(self) -> int: diff --git a/python/lsst/resources/file.py b/python/lsst/resources/file.py index 79c84d42..19734a26 100644 --- a/python/lsst/resources/file.py +++ b/python/lsst/resources/file.py @@ -15,6 +15,7 @@ import contextlib import copy +import datetime import logging import os import os.path @@ -27,7 +28,7 @@ from typing import IO, TYPE_CHECKING from ._resourceHandles._fileResourceHandle import FileResourceHandle -from ._resourcePath import ResourcePath +from ._resourcePath import ResourceInfo, ResourcePath from .utils import NoTransaction, ensure_directory_is_writeable, os2posix, posix2os try: @@ -76,6 +77,22 @@ def size(self) -> int: sz = 0 return sz + def get_info(self) -> ResourceInfo: + """Return lightweight metadata about this file.""" + stat_result = os.stat(self.ospath) + creation_timestamp = getattr(stat_result, "st_birthtime", None) + creation_time = ( + datetime.datetime.fromtimestamp(creation_timestamp, tz=datetime.UTC) + if creation_timestamp is not None + else None + ) + return ResourceInfo( + size=0 if stat.S_ISDIR(stat_result.st_mode) else stat_result.st_size, + last_modified=datetime.datetime.fromtimestamp(stat_result.st_mtime, tz=datetime.UTC), + creation_time=creation_time, + checksums={}, + ) + def remove(self) -> None: """Remove the resource.""" os.remove(self.ospath) diff --git a/python/lsst/resources/gs.py b/python/lsst/resources/gs.py index bc30604b..4c8088cf 100644 --- a/python/lsst/resources/gs.py +++ b/python/lsst/resources/gs.py @@ -16,6 +16,7 @@ __all__ = ("GSResourcePath",) import contextlib +import datetime import logging import re from collections.abc import Iterator @@ -71,7 +72,7 @@ class ServiceUnavailable(ClientError): # type: ignore # noqa: N818 from lsst.utils.timer import time_this -from ._resourcePath import ResourcePath +from ._resourcePath import ResourceInfo, ResourcePath if TYPE_CHECKING: from .utils import TransactionProtocol @@ -162,6 +163,52 @@ def size(self) -> int: raise FileNotFoundError(f"Resource {self} does not exist") return size + def get_info(self) -> ResourceInfo: + """Return lightweight metadata about this GCS resource.""" + if self.is_root: + if not self.bucket.exists(retry=_RETRY_POLICY): + raise FileNotFoundError(f"Resource {self} does not exist") + return ResourceInfo( + size=0, + last_modified=None, + creation_time=None, + checksums={}, + ) + + if self.dirLike: + if not self.exists(): + raise FileNotFoundError(f"Resource {self} does not exist") + return ResourceInfo( + size=0, + last_modified=None, + creation_time=None, + checksums={}, + ) + + try: + self.blob.reload(retry=_RETRY_POLICY) + except NotFound: + raise FileNotFoundError(f"Resource {self} does not exist") from None + + size = self.blob.size + if size is None: + raise FileNotFoundError(f"Resource {self} does not exist") + + checksums = {} + if self.blob.md5_hash: + checksums["md5"] = self.blob.md5_hash + if self.blob.crc32c: + checksums["crc32c"] = self.blob.crc32c + + updated = self.blob.updated + created = self.blob.time_created + return ResourceInfo( + size=size, + last_modified=updated.astimezone(datetime.UTC) if updated is not None else None, + creation_time=created.astimezone(datetime.UTC) if created is not None else None, + checksums=checksums, + ) + def remove(self) -> None: try: self.blob.delete(retry=_RETRY_POLICY) diff --git a/python/lsst/resources/http.py b/python/lsst/resources/http.py index c224e446..0dc43cb8 100644 --- a/python/lsst/resources/http.py +++ b/python/lsst/resources/http.py @@ -14,6 +14,7 @@ __all__ = ("HttpResourcePath",) import contextlib +import datetime import enum import functools import io @@ -27,6 +28,7 @@ import ssl import stat from collections.abc import Iterator +from email.utils import parsedate_to_datetime from typing import TYPE_CHECKING, Any, BinaryIO, cast try: @@ -58,7 +60,7 @@ from ._resourceHandles import ResourceHandleProtocol from ._resourceHandles._httpResourceHandle import HttpReadResourceHandle, parse_content_range_header -from ._resourcePath import ResourcePath +from ._resourcePath import ResourceInfo, ResourcePath from .utils import _get_num_workers, get_tempdir if TYPE_CHECKING: @@ -998,51 +1000,7 @@ def size(self) -> int: return 0 if not self.is_webdav_endpoint: - # The remote is a plain HTTP server. Send a HEAD request to - # retrieve the size of the resource. - resp = self._head_non_webdav_url() - if resp.status_code == requests.codes.ok: # 200 - if "Content-Length" in resp.headers: - return int(resp.headers["Content-Length"]) - else: - raise ValueError( - f"Response to HEAD request to {self} does not contain 'Content-Length' header" - ) - elif resp.status_code == requests.codes.partial_content: - # 206 Partial Content, returned from a GET request with a Range - # header (used to emulate HEAD for presigned S3 URLs). - # In this case Content-Length is the length of the Range and - # not the full length of the file, so we have to parse - # Content-Range instead. - content_range_header = resp.headers.get("Content-Range") - if content_range_header is None: - raise ValueError( - f"Response to GET request to {self} did not contain 'Content-Range' header" - ) - content_range = parse_content_range_header(content_range_header) - size = content_range.total - if size is None: - raise ValueError(f"Content-Range header for {self} did not include a total file size") - return size - elif resp.status_code == requests.codes.range_not_satisfiable: - # 416 Range Not Satisfiable, which can occur on a GET for a 0 - # byte file since we asked for 1 byte Range which is longer - # than the file. - # - # Servers are supposed to include a Content-Range header in - # this case, but Google's S3 implementation doesn't. Any - # non-zero file size should have been handled by the 206 and - # 200 cases above, so assume we have a zero here. - return 0 - elif resp.status_code == requests.codes.not_found: - raise FileNotFoundError( - f"Resource {self} does not exist, status: {resp.status_code} {resp.reason}" - ) - else: - raise ValueError( - f"Unexpected response for HEAD request for {self}, status: {resp.status_code} " - f"{resp.reason}" - ) + return self.get_info().size # The remote is a webDAV server: send a PROPFIND request to retrieve # the size of the resource. Sizes are only meaningful for files. @@ -1062,6 +1020,86 @@ def size(self) -> int: f"Resource {self} does not exist, status: {resp.status_code} {resp.reason}" ) + def get_info(self) -> ResourceInfo: + """Return lightweight metadata about this HTTP resource.""" + if not self.is_webdav_endpoint: + resp = self._head_non_webdav_url() + return self._get_info_from_non_webdav_head(resp) + + resp = self._propfind() + if resp.status_code != requests.codes.multi_status: + raise FileNotFoundError( + f"Resource {self} does not exist, status: {resp.status_code} {resp.reason}" + ) + + prop = _parse_propfind_response_body(resp.text)[0] + if not prop.exists: + raise FileNotFoundError(f"Resource {self} does not exist") + + return ResourceInfo( + size=prop.size, + last_modified=prop.last_modified, + creation_time=None, + checksums=dict(prop.checksums), + ) + + def _get_info_from_non_webdav_head(self, resp: requests.Response) -> ResourceInfo: + """Build `ResourceInfo` from a non-WebDAV HEAD-like response.""" + if not self._is_successful_non_webdav_head_request(resp): + if resp.status_code == requests.codes.not_found: + raise FileNotFoundError( + f"Resource {self} does not exist, status: {resp.status_code} {resp.reason}" + ) + raise ValueError( + f"Unexpected response for HEAD request for {self}, status: {resp.status_code} {resp.reason}" + ) + + if self.dirLike: + size = 0 + elif resp.status_code == requests.codes.ok: # 200 + if "Content-Length" not in resp.headers: + raise ValueError( + f"Response to HEAD request to {self} does not contain 'Content-Length' header" + ) + size = int(resp.headers["Content-Length"]) + elif resp.status_code == requests.codes.partial_content: + # 206 Partial Content, returned from a GET request with a Range + # header (used to emulate HEAD for presigned S3 URLs). + content_range_header = resp.headers.get("Content-Range") + if content_range_header is None: + raise ValueError(f"Response to GET request to {self} did not contain 'Content-Range' header") + content_range = parse_content_range_header(content_range_header) + size_total = content_range.total + if size_total is None: + raise ValueError(f"Content-Range header for {self} did not include a total file size") + size = size_total + else: + # 416 Range Not Satisfiable can occur on a GET for a 0-byte file. + size = 0 + + checksums = {} + digest_header = resp.headers.get("Digest") + if digest_header is not None: + for digest in digest_header.split(","): + algorithm, separator, value = digest.strip().partition("=") + if separator: + checksums[algorithm.lower()] = value + + last_modified = None + if last_modified_header := resp.headers.get("Last-Modified"): + last_modified = parsedate_to_datetime(last_modified_header) + if last_modified.tzinfo is None: + last_modified = last_modified.replace(tzinfo=datetime.UTC) + else: + last_modified = last_modified.astimezone(datetime.UTC) + + return ResourceInfo( + size=size, + last_modified=last_modified, + creation_time=None, + checksums=checksums, + ) + def _head_non_webdav_url(self) -> requests.Response: """Return a response from a HTTP HEAD request for a non-WebDAV HTTP URL. @@ -2231,6 +2269,22 @@ def is_file(self) -> bool: def size(self) -> int: return self._getcontentlength + @property + def last_modified(self) -> datetime.datetime | None: + if not self._getlastmodified: + return None + + last_modified = parsedate_to_datetime(self._getlastmodified) + if last_modified.tzinfo is None: + last_modified = last_modified.replace(tzinfo=datetime.UTC) + else: + last_modified = last_modified.astimezone(datetime.UTC) + return last_modified + + @property + def checksums(self) -> dict[str, str]: + return {} + @property def name(self) -> str: return self._displayname diff --git a/python/lsst/resources/mem.py b/python/lsst/resources/mem.py index 914feef1..659301ba 100644 --- a/python/lsst/resources/mem.py +++ b/python/lsst/resources/mem.py @@ -16,7 +16,7 @@ import contextlib from collections.abc import Iterator -from ._resourcePath import ResourcePath +from ._resourcePath import ResourceInfo, ResourcePath class InMemoryResourcePath(ResourcePath): @@ -30,6 +30,15 @@ def exists(self) -> bool: """Test for existence and always return False.""" return True + def get_info(self) -> ResourceInfo: + """Return placeholder metadata for an in-memory resource.""" + return ResourceInfo( + size=-1, + last_modified=None, + creation_time=None, + checksums={}, + ) + @contextlib.contextmanager def _as_local( self, multithreaded: bool = True, tmpdir: ResourcePath | None = None diff --git a/python/lsst/resources/packageresource.py b/python/lsst/resources/packageresource.py index 33775aff..990d63e5 100644 --- a/python/lsst/resources/packageresource.py +++ b/python/lsst/resources/packageresource.py @@ -14,10 +14,13 @@ __all__ = ("PackageResourcePath",) import contextlib +import datetime import logging +import os import re from collections.abc import Iterator from importlib import resources +from pathlib import Path from typing import TYPE_CHECKING if TYPE_CHECKING: @@ -29,7 +32,7 @@ AbstractFileSystem = type from ._resourceHandles._baseResourceHandle import ResourceHandleProtocol -from ._resourcePath import ResourcePath, ResourcePathExpression +from ._resourcePath import ResourceInfo, ResourcePath, ResourcePathExpression log = logging.getLogger(__name__) @@ -79,6 +82,49 @@ def exists(self) -> bool: return False return ref.is_file() or ref.is_dir() + def get_info(self) -> ResourceInfo: + """Return metadata about the resource without reading its contents.""" + ref = self._get_ref() + if ref is None or not (ref.is_file() or ref.is_dir()): + raise FileNotFoundError(f"Unable to locate resource {self}.") + + if ref.is_dir(): + return ResourceInfo( + size=0, + last_modified=None, + creation_time=None, + checksums={}, + ) + + stat_result: os.stat_result | None = None + if isinstance(ref, Path): + stat_result = ref.stat() + else: + stat_method = getattr(ref, "stat", None) + if callable(stat_method): + stat_result = stat_method() + + if stat_result is None: + return ResourceInfo( + size=-1, + last_modified=None, + creation_time=None, + checksums={}, + ) + + creation_timestamp = getattr(stat_result, "st_birthtime", None) + creation_time = ( + datetime.datetime.fromtimestamp(creation_timestamp, tz=datetime.UTC) + if creation_timestamp is not None + else None + ) + return ResourceInfo( + size=stat_result.st_size, + last_modified=datetime.datetime.fromtimestamp(stat_result.st_mtime, tz=datetime.UTC), + creation_time=creation_time, + checksums={}, + ) + def read(self, size: int = -1) -> bytes: ref = self._get_ref() if not ref: diff --git a/python/lsst/resources/proxied.py b/python/lsst/resources/proxied.py index 337820ee..539e2ef4 100644 --- a/python/lsst/resources/proxied.py +++ b/python/lsst/resources/proxied.py @@ -19,7 +19,7 @@ from abc import ABC, abstractmethod from collections.abc import Iterator -from ._resourcePath import ResourceHandleProtocol, ResourcePath, ResourcePathExpression +from ._resourcePath import ResourceHandleProtocol, ResourceInfo, ResourcePath, ResourcePathExpression from .utils import TransactionProtocol try: @@ -126,6 +126,10 @@ def size(self) -> int: proxy = self._get_proxy() return proxy.size() + def get_info(self) -> ResourceInfo: + proxy = self._get_proxy() + return proxy.get_info() + def write(self, data: bytes, overwrite: bool = True) -> None: proxy = self._get_proxy() proxy.write(data, overwrite=overwrite) diff --git a/python/lsst/resources/s3.py b/python/lsst/resources/s3.py index 64705efe..16122afd 100644 --- a/python/lsst/resources/s3.py +++ b/python/lsst/resources/s3.py @@ -15,6 +15,7 @@ import concurrent.futures import contextlib +import datetime import io import logging import os @@ -33,7 +34,14 @@ from ._resourceHandles._baseResourceHandle import ResourceHandleProtocol from ._resourceHandles._s3ResourceHandle import S3ResourceHandle -from ._resourcePath import _EXECUTOR_TYPE, MBulkResult, ResourcePath, _get_executor_class, _patch_environ +from ._resourcePath import ( + _EXECUTOR_TYPE, + MBulkResult, + ResourceInfo, + ResourcePath, + _get_executor_class, + _patch_environ, +) from .s3utils import ( _get_s3_connection_parameters, _s3_disable_bucket_validation, @@ -375,6 +383,62 @@ def size(self) -> int: raise FileNotFoundError(f"Resource {self} does not exist") return sz + @backoff.on_exception(backoff.expo, retryable_io_errors, max_time=max_retry_time) + def get_info(self) -> ResourceInfo: + """Return lightweight metadata about this S3 resource.""" + if self.is_root: + if not bucketExists(self._bucket, self.client): + raise FileNotFoundError(f"Resource {self} does not exist") + return ResourceInfo( + size=0, + last_modified=None, + creation_time=None, + checksums={}, + ) + + if self.dirLike: + if not self.exists(): + raise FileNotFoundError(f"Resource {self} does not exist") + return ResourceInfo( + size=0, + last_modified=None, + creation_time=None, + checksums={}, + ) + + try: + response = self.client.head_object(Bucket=self._bucket, Key=self.relativeToPathRoot) + except (self.client.exceptions.NoSuchKey, self.client.exceptions.NoSuchBucket) as err: + raise FileNotFoundError(f"No such resource: {self}") from err + except ClientError as err: + translate_client_error(err, self) + raise + + checksums = {} + for response_key, checksum_name in ( + ("ChecksumCRC32", "crc32"), + ("ChecksumCRC32C", "crc32c"), + ("ChecksumCRC64NVME", "crc64nvme"), + ("ChecksumSHA1", "sha1"), + ("ChecksumSHA256", "sha256"), + ): + if value := response.get(response_key): + checksums[checksum_name] = value + + last_modified = response.get("LastModified") + if last_modified is not None: + if getattr(last_modified, "tzinfo", None) is None: + last_modified = last_modified.replace(tzinfo=datetime.UTC) + else: + last_modified = last_modified.astimezone(datetime.UTC) + + return ResourceInfo( + size=response["ContentLength"], + last_modified=last_modified, + creation_time=None, + checksums=checksums, + ) + @backoff.on_exception(backoff.expo, retryable_io_errors, max_time=max_retry_time) def remove(self) -> None: """Remove the resource.""" diff --git a/tests/test_dav.py b/tests/test_dav.py index 6bab8363..8ded6763 100644 --- a/tests/test_dav.py +++ b/tests/test_dav.py @@ -10,6 +10,7 @@ # license that can be found in the LICENSE file. import concurrent +import datetime import hashlib import io import os.path @@ -23,9 +24,8 @@ import unittest import zlib from collections.abc import Callable -from datetime import datetime from threading import Thread -from typing import Any, cast +from typing import cast from zipfile import ZipFile, ZipInfo try: @@ -40,7 +40,7 @@ fsspec = None AbstractFileSystem = type -from lsst.resources import ResourcePath +from lsst.resources import ResourceInfo, ResourcePath from lsst.resources._resourceHandles._davResourceHandle import ( DavReadResourceHandle, ) @@ -685,31 +685,24 @@ def download_zip_member(uri: ResourcePath, zinfo: ZipInfo) -> tuple[int, str]: self.assertEqual(member_size, local_file_size) self.assertEqual(member_digest, local_file_digest) - def test_dav_info(self): - def check_metadata_fields(metadata: dict[str, Any]): - for field in ("name", "size", "type", "last_modified", "checksums"): - self.assertTrue(field in metadata) - - # Retrieve and check metadata details about an non-existing object + def test_dav_get_info(self): + # Missing resources now raise instead of returning a partial dict. subdir = self.tmpdir.join("inexistent", forceDirectory=True) - metadata = subdir.info() - check_metadata_fields(metadata) - self.assertEqual(metadata["size"], None) - self.assertEqual(metadata["type"], None) - self.assertEqual(len(metadata["checksums"]), 0) - self.assertEqual(metadata["last_modified"], datetime.min) + with self.assertRaises(FileNotFoundError): + subdir.get_info() # Retrieve and check metadata details about an existing directory subdir = self.tmpdir.join(self._get_dir_name(), forceDirectory=True) self.assertIsNone(subdir.mkdir()) self.assertTrue(subdir.exists()) - metadata = subdir.info() - check_metadata_fields(metadata) + metadata = subdir.get_info() + self.assertIsInstance(metadata, ResourceInfo) - self.assertEqual(metadata["size"], 0) - self.assertEqual(metadata["type"], "directory") - self.assertEqual(len(metadata["checksums"]), 0) - self.assertEqual(metadata["last_modified"], subdir._stat().last_modified) + self.assertEqual(metadata.size, 0) + self.assertIsNone(metadata.creation_time) + self.assertEqual(len(metadata.checksums), 0) + self.assertEqual(metadata.last_modified.tzinfo, datetime.UTC) + self.assertEqual(metadata.last_modified, subdir._stat().last_modified) # Retrieve and check metadata details about existing file local_file, local_file_size = self._generate_file() @@ -723,13 +716,14 @@ def check_metadata_fields(metadata: dict[str, Any]): self.assertIsNone(remote_file.write(file, overwrite=True)) self.assertEqual(os.stat(local_file).st_size, remote_file.size()) - metadata = remote_file.info() - check_metadata_fields(metadata) - self.assertEqual(metadata["size"], local_file_size) - self.assertEqual(metadata["type"], "file") - self.assertEqual(metadata["last_modified"], remote_file._stat().last_modified) + metadata = remote_file.get_info() + self.assertIsInstance(metadata, ResourceInfo) + self.assertEqual(metadata.size, local_file_size) + self.assertIsNone(metadata.creation_time) + self.assertEqual(metadata.last_modified.tzinfo, datetime.UTC) + self.assertEqual(metadata.last_modified, remote_file._stat().last_modified) - checksums = metadata["checksums"] + checksums = metadata.checksums if "md5" in checksums: self.assertEqual(checksums["md5"], md5_checksum) if "adler32" in checksums: diff --git a/tests/test_file.py b/tests/test_file.py index 1f9f0da3..64851d16 100644 --- a/tests/test_file.py +++ b/tests/test_file.py @@ -10,13 +10,14 @@ # license that can be found in the LICENSE file. import contextlib +import datetime import os import pathlib import unittest import unittest.mock import urllib.parse -from lsst.resources import ResourcePath, ResourcePathExpression +from lsst.resources import ResourceInfo, ResourcePath, ResourcePathExpression from lsst.resources.tests import GenericReadWriteTestCase, GenericTestCase TESTDIR = os.path.abspath(os.path.dirname(__file__)) @@ -83,6 +84,17 @@ def test_schemeless_root(self): via_root = ResourcePath("b.txt", root=root) self.assertEqual(via_root.ospath, "/root/b.txt") + def test_get_info(self): + with ResourcePath.temporary_uri(suffix=".txt") as target: + target.write(b"abc") + + info = target.get_info() + self.assertIsInstance(info, ResourceInfo) + self.assertEqual(info.size, 3) + self.assertEqual(info.checksums, {}) + self.assertEqual(info.last_modified.tzinfo, datetime.UTC) + self.assertGreaterEqual(info.last_modified.timestamp(), 0) + TEST_UMASK = 0o0333 diff --git a/tests/test_http.py b/tests/test_http.py index 14013917..b3c8c6c5 100644 --- a/tests/test_http.py +++ b/tests/test_http.py @@ -24,6 +24,7 @@ import unittest.mock import warnings from collections.abc import Callable +from datetime import UTC from threading import Thread from typing import cast @@ -38,7 +39,7 @@ import responses.matchers import lsst.resources -from lsst.resources import ResourcePath +from lsst.resources import ResourceInfo, ResourcePath from lsst.resources._resourceHandles._httpResourceHandle import ( HttpReadResourceHandle, parse_content_range_header, @@ -48,6 +49,7 @@ HttpResourcePath, HttpResourcePathConfig, SessionStore, + _get_dav_and_server_headers, _is_protected, ) from lsst.resources.tests import GenericReadWriteTestCase, GenericTestCase @@ -128,6 +130,31 @@ def test_extra_headers(self): copy = ResourcePath(path, forceDirectory=True) self.assertEqual(copy._extra_headers, {"Authorization": "Bearer my-token"}) + @responses.activate + def test_get_info(self): + _get_dav_and_server_headers.cache_clear() + url = "http://test.example/something.txt" + responses.add(responses.OPTIONS, "http://test.example/", status=200) + responses.add( + responses.HEAD, + url, + status=200, + headers={ + "Content-Length": "123", + "Last-Modified": "Wed, 12 Mar 2025 10:11:13 GMT", + "Digest": "md5=abc123, sha-256=def456", + }, + ) + + info = ResourcePath(url).get_info() + self.assertIsInstance(info, ResourceInfo) + self.assertEqual(info.size, 123) + self.assertIsNone(info.creation_time) + self.assertEqual(info.last_modified.tzinfo, UTC) + self.assertEqual(info.last_modified.year, 2025) + self.assertEqual(info.checksums, {"md5": "abc123", "sha-256": "def456"}) + self.assertEqual(len(responses.calls), 2) + class HttpReadWriteWebdavTestCase(GenericReadWriteTestCase, unittest.TestCase): """Test with a real webDAV server, as opposed to mocking responses.""" diff --git a/tests/test_s3.py b/tests/test_s3.py index 8bf04bff..1450a89c 100644 --- a/tests/test_s3.py +++ b/tests/test_s3.py @@ -9,6 +9,7 @@ # Use of this source code is governed by a 3-clause BSD-style # license that can be found in the LICENSE file. +import datetime import os import time import unittest @@ -16,7 +17,7 @@ from unittest import mock from urllib.parse import parse_qs, urlparse -from lsst.resources import ResourcePath +from lsst.resources import ResourceInfo, ResourcePath from lsst.resources.s3 import S3ResourcePath from lsst.resources.s3utils import clean_test_environment_for_s3 from lsst.resources.tests import GenericReadWriteTestCase, GenericTestCase @@ -204,6 +205,18 @@ def test_nonexistent_presigned_url(self): with self.assertRaises(FileNotFoundError): get_path.size() + def test_get_info(self): + remote = self.root_uri.join("test-info.dat") + remote.write(b"abc") + + info = remote.get_info() + self.assertIsInstance(info, ResourceInfo) + self.assertEqual(info.size, 3) + self.assertIsNone(info.creation_time) + self.assertIsInstance(info.checksums, dict) + self.assertEqual(info.last_modified.tzinfo, datetime.UTC) + self.assertGreaterEqual(info.last_modified.timestamp(), 0) + def _check_presigned_url(self, url: str, expiration_time_seconds: int): parsed = urlparse(url) self.assertEqual(parsed.scheme, "https") From b256c3e7eb7c9b71ed8446fee69260592748336c Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Tue, 31 Mar 2026 09:13:33 -0700 Subject: [PATCH 02/19] Add URI string to ResourceInfo for provenance --- python/lsst/resources/_resourcePath.py | 4 ++++ python/lsst/resources/dav.py | 1 + python/lsst/resources/file.py | 1 + python/lsst/resources/gs.py | 3 +++ python/lsst/resources/http.py | 2 ++ python/lsst/resources/mem.py | 1 + python/lsst/resources/packageresource.py | 3 +++ python/lsst/resources/s3.py | 3 +++ 8 files changed, 18 insertions(+) diff --git a/python/lsst/resources/_resourcePath.py b/python/lsst/resources/_resourcePath.py index 92fd8b81..58844587 100644 --- a/python/lsst/resources/_resourcePath.py +++ b/python/lsst/resources/_resourcePath.py @@ -137,6 +137,10 @@ def _patch_environ(new_values: dict[str, str]) -> Iterator[None]: class ResourceInfo: """Information about this resource.""" + uri: str + """URI in string form of the resource from which this information is + derived. + """ size: int """Size of the file in bytes. A directory returns 0.""" last_modified: datetime.datetime | None diff --git a/python/lsst/resources/dav.py b/python/lsst/resources/dav.py index 59e78b1e..6c88e1ce 100644 --- a/python/lsst/resources/dav.py +++ b/python/lsst/resources/dav.py @@ -298,6 +298,7 @@ def get_info(self) -> ResourceInfo: raise FileNotFoundError(f"Resource {self} does not exist") return ResourceInfo( + uri=str(self), size=stat.size, last_modified=stat.last_modified, creation_time=None, diff --git a/python/lsst/resources/file.py b/python/lsst/resources/file.py index 19734a26..1f7a0821 100644 --- a/python/lsst/resources/file.py +++ b/python/lsst/resources/file.py @@ -87,6 +87,7 @@ def get_info(self) -> ResourceInfo: else None ) return ResourceInfo( + uri=str(self), size=0 if stat.S_ISDIR(stat_result.st_mode) else stat_result.st_size, last_modified=datetime.datetime.fromtimestamp(stat_result.st_mtime, tz=datetime.UTC), creation_time=creation_time, diff --git a/python/lsst/resources/gs.py b/python/lsst/resources/gs.py index 4c8088cf..457aa95e 100644 --- a/python/lsst/resources/gs.py +++ b/python/lsst/resources/gs.py @@ -169,6 +169,7 @@ def get_info(self) -> ResourceInfo: if not self.bucket.exists(retry=_RETRY_POLICY): raise FileNotFoundError(f"Resource {self} does not exist") return ResourceInfo( + uri=str(self), size=0, last_modified=None, creation_time=None, @@ -179,6 +180,7 @@ def get_info(self) -> ResourceInfo: if not self.exists(): raise FileNotFoundError(f"Resource {self} does not exist") return ResourceInfo( + uri=str(self), size=0, last_modified=None, creation_time=None, @@ -203,6 +205,7 @@ def get_info(self) -> ResourceInfo: updated = self.blob.updated created = self.blob.time_created return ResourceInfo( + uri=str(self), size=size, last_modified=updated.astimezone(datetime.UTC) if updated is not None else None, creation_time=created.astimezone(datetime.UTC) if created is not None else None, diff --git a/python/lsst/resources/http.py b/python/lsst/resources/http.py index 0dc43cb8..c42c39d8 100644 --- a/python/lsst/resources/http.py +++ b/python/lsst/resources/http.py @@ -1037,6 +1037,7 @@ def get_info(self) -> ResourceInfo: raise FileNotFoundError(f"Resource {self} does not exist") return ResourceInfo( + uri=str(self), size=prop.size, last_modified=prop.last_modified, creation_time=None, @@ -1094,6 +1095,7 @@ def _get_info_from_non_webdav_head(self, resp: requests.Response) -> ResourceInf last_modified = last_modified.astimezone(datetime.UTC) return ResourceInfo( + uri=str(self), size=size, last_modified=last_modified, creation_time=None, diff --git a/python/lsst/resources/mem.py b/python/lsst/resources/mem.py index 659301ba..6e69a590 100644 --- a/python/lsst/resources/mem.py +++ b/python/lsst/resources/mem.py @@ -33,6 +33,7 @@ def exists(self) -> bool: def get_info(self) -> ResourceInfo: """Return placeholder metadata for an in-memory resource.""" return ResourceInfo( + uri=str(self), size=-1, last_modified=None, creation_time=None, diff --git a/python/lsst/resources/packageresource.py b/python/lsst/resources/packageresource.py index 990d63e5..8f7bb6c2 100644 --- a/python/lsst/resources/packageresource.py +++ b/python/lsst/resources/packageresource.py @@ -90,6 +90,7 @@ def get_info(self) -> ResourceInfo: if ref.is_dir(): return ResourceInfo( + uri=str(self), size=0, last_modified=None, creation_time=None, @@ -106,6 +107,7 @@ def get_info(self) -> ResourceInfo: if stat_result is None: return ResourceInfo( + uri=str(self), size=-1, last_modified=None, creation_time=None, @@ -119,6 +121,7 @@ def get_info(self) -> ResourceInfo: else None ) return ResourceInfo( + uri=str(self), size=stat_result.st_size, last_modified=datetime.datetime.fromtimestamp(stat_result.st_mtime, tz=datetime.UTC), creation_time=creation_time, diff --git a/python/lsst/resources/s3.py b/python/lsst/resources/s3.py index 16122afd..6ce2031e 100644 --- a/python/lsst/resources/s3.py +++ b/python/lsst/resources/s3.py @@ -390,6 +390,7 @@ def get_info(self) -> ResourceInfo: if not bucketExists(self._bucket, self.client): raise FileNotFoundError(f"Resource {self} does not exist") return ResourceInfo( + uri=str(self), size=0, last_modified=None, creation_time=None, @@ -400,6 +401,7 @@ def get_info(self) -> ResourceInfo: if not self.exists(): raise FileNotFoundError(f"Resource {self} does not exist") return ResourceInfo( + uri=str(self), size=0, last_modified=None, creation_time=None, @@ -433,6 +435,7 @@ def get_info(self) -> ResourceInfo: last_modified = last_modified.astimezone(datetime.UTC) return ResourceInfo( + uri=str(self), size=response["ContentLength"], last_modified=last_modified, creation_time=None, From 358656fa10361dbe16cf0c6136b12dea9f47cc69 Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Tue, 31 Mar 2026 09:25:06 -0700 Subject: [PATCH 03/19] Enable check sum mode when using s3 get_info --- python/lsst/resources/s3.py | 6 +++++- tests/test_s3.py | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/python/lsst/resources/s3.py b/python/lsst/resources/s3.py index 6ce2031e..e7925958 100644 --- a/python/lsst/resources/s3.py +++ b/python/lsst/resources/s3.py @@ -409,7 +409,11 @@ def get_info(self) -> ResourceInfo: ) try: - response = self.client.head_object(Bucket=self._bucket, Key=self.relativeToPathRoot) + response = self.client.head_object( + Bucket=self._bucket, + Key=self.relativeToPathRoot, + ChecksumMode="ENABLED", + ) except (self.client.exceptions.NoSuchKey, self.client.exceptions.NoSuchBucket) as err: raise FileNotFoundError(f"No such resource: {self}") from err except ClientError as err: diff --git a/tests/test_s3.py b/tests/test_s3.py index 1450a89c..b4b7c317 100644 --- a/tests/test_s3.py +++ b/tests/test_s3.py @@ -214,6 +214,7 @@ def test_get_info(self): self.assertEqual(info.size, 3) self.assertIsNone(info.creation_time) self.assertIsInstance(info.checksums, dict) + self.assertIn("crc32", info.checksums) # Only appears if ChecksumMode=ENABLED self.assertEqual(info.last_modified.tzinfo, datetime.UTC) self.assertGreaterEqual(info.last_modified.timestamp(), 0) From fd5376ed552748f1ea468158fde11894d752dc7a Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Tue, 31 Mar 2026 09:18:08 -0700 Subject: [PATCH 04/19] Improve test coverage --- python/lsst/resources/tests.py | 30 ++++++++++++++++++++++++++++++ tests/test_file.py | 10 +++++++++- tests/test_s3.py | 3 ++- 3 files changed, 41 insertions(+), 2 deletions(-) diff --git a/python/lsst/resources/tests.py b/python/lsst/resources/tests.py index 58caed03..7393a0ab 100644 --- a/python/lsst/resources/tests.py +++ b/python/lsst/resources/tests.py @@ -12,6 +12,7 @@ __all__ = ["GenericReadWriteTestCase", "GenericTestCase"] +import datetime import logging import os import pathlib @@ -669,6 +670,35 @@ def test_file(self) -> None: self.assertEqual(uri, uri2) self.assertEqual(id(uri), id(uri2)) + def test_get_info_generic(self) -> None: + """Test generic get_info properties.""" + now = datetime.datetime.now(tz=datetime.UTC) + uri = self.tmpdir.join("test.txt") + + with self.assertRaises(FileNotFoundError): + uri.get_info() + + content = "abcdefghijklmnopqrstuv\n" + uri.write(content.encode()) + + info = uri.get_info() + self.assertEqual(info.size, len(content)) + assert info.last_modified is not None + self.assertGreaterEqual(info.last_modified.timestamp(), now.timestamp() - 1.0) + self.assertIsInstance(info.checksums, dict) # Checksums are backend dependent. + + for dir_uri in (uri.parent(), uri.root_uri()): + # File URIs can return values for modification dates for + # directories. + dirinfo = dir_uri.get_info() + self.assertEqual(dirinfo.uri, str(dir_uri)) + self.assertEqual(dirinfo.size, 0) + self.assertEqual(dirinfo.checksums, {}) + + newdir = self.tmpdir.join("newdir/", forceDirectory=True) + with self.assertRaises(FileNotFoundError): + newdir.get_info() + def test_mkdir(self) -> None: newdir = self.tmpdir.join("newdir/seconddir", forceDirectory=True) newdir.mkdir() diff --git a/tests/test_file.py b/tests/test_file.py index 64851d16..7efc9073 100644 --- a/tests/test_file.py +++ b/tests/test_file.py @@ -85,15 +85,23 @@ def test_schemeless_root(self): self.assertEqual(via_root.ospath, "/root/b.txt") def test_get_info(self): + now = datetime.datetime.now(tz=datetime.UTC) with ResourcePath.temporary_uri(suffix=".txt") as target: target.write(b"abc") info = target.get_info() self.assertIsInstance(info, ResourceInfo) + self.assertTrue(info.uri.endswith(".txt")) self.assertEqual(info.size, 3) self.assertEqual(info.checksums, {}) self.assertEqual(info.last_modified.tzinfo, datetime.UTC) - self.assertGreaterEqual(info.last_modified.timestamp(), 0) + self.assertGreaterEqual(info.last_modified.timestamp(), now.timestamp() - 1.0) + + dirinfo = target.parent().get_info() + self.assertEqual(dirinfo.uri, str(target.parent())) + self.assertEqual(dirinfo.size, 0) + self.assertGreaterEqual(dirinfo.last_modified.timestamp(), 0) + self.assertEqual(dirinfo.checksums, {}) TEST_UMASK = 0o0333 diff --git a/tests/test_s3.py b/tests/test_s3.py index b4b7c317..b78b98fc 100644 --- a/tests/test_s3.py +++ b/tests/test_s3.py @@ -206,6 +206,7 @@ def test_nonexistent_presigned_url(self): get_path.size() def test_get_info(self): + now = datetime.datetime.now(tz=datetime.UTC) remote = self.root_uri.join("test-info.dat") remote.write(b"abc") @@ -216,7 +217,7 @@ def test_get_info(self): self.assertIsInstance(info.checksums, dict) self.assertIn("crc32", info.checksums) # Only appears if ChecksumMode=ENABLED self.assertEqual(info.last_modified.tzinfo, datetime.UTC) - self.assertGreaterEqual(info.last_modified.timestamp(), 0) + self.assertGreaterEqual(info.last_modified.timestamp(), now.timestamp() - 1.0) def _check_presigned_url(self, url: str, expiration_time_seconds: int): parsed = urlparse(url) From e7019ebe5b5c95d55d3bc999605727e43c8a8324 Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Tue, 31 Mar 2026 10:52:32 -0700 Subject: [PATCH 05/19] Add ResourceInfo.is_file flag and test it --- python/lsst/resources/_resourcePath.py | 2 ++ python/lsst/resources/dav.py | 1 + python/lsst/resources/file.py | 1 + python/lsst/resources/gs.py | 3 +++ python/lsst/resources/http.py | 2 ++ python/lsst/resources/mem.py | 1 + python/lsst/resources/packageresource.py | 3 +++ python/lsst/resources/proxied.py | 3 ++- python/lsst/resources/s3.py | 3 +++ python/lsst/resources/tests.py | 2 ++ tests/test_dav.py | 2 ++ tests/test_eups.py | 17 ++++++++++++++++- tests/test_file.py | 2 ++ tests/test_http.py | 1 + tests/test_s3.py | 1 + 15 files changed, 42 insertions(+), 2 deletions(-) diff --git a/python/lsst/resources/_resourcePath.py b/python/lsst/resources/_resourcePath.py index 58844587..2d6c8e4f 100644 --- a/python/lsst/resources/_resourcePath.py +++ b/python/lsst/resources/_resourcePath.py @@ -141,6 +141,8 @@ class ResourceInfo: """URI in string form of the resource from which this information is derived. """ + is_file: bool + """Indicate whether the resource is a file or a directory.""" size: int """Size of the file in bytes. A directory returns 0.""" last_modified: datetime.datetime | None diff --git a/python/lsst/resources/dav.py b/python/lsst/resources/dav.py index 6c88e1ce..03033dc4 100644 --- a/python/lsst/resources/dav.py +++ b/python/lsst/resources/dav.py @@ -299,6 +299,7 @@ def get_info(self) -> ResourceInfo: return ResourceInfo( uri=str(self), + is_file=stat.is_file, size=stat.size, last_modified=stat.last_modified, creation_time=None, diff --git a/python/lsst/resources/file.py b/python/lsst/resources/file.py index 1f7a0821..e6107cf6 100644 --- a/python/lsst/resources/file.py +++ b/python/lsst/resources/file.py @@ -88,6 +88,7 @@ def get_info(self) -> ResourceInfo: ) return ResourceInfo( uri=str(self), + is_file=not stat.S_ISDIR(stat_result.st_mode), size=0 if stat.S_ISDIR(stat_result.st_mode) else stat_result.st_size, last_modified=datetime.datetime.fromtimestamp(stat_result.st_mtime, tz=datetime.UTC), creation_time=creation_time, diff --git a/python/lsst/resources/gs.py b/python/lsst/resources/gs.py index 457aa95e..8c8daedc 100644 --- a/python/lsst/resources/gs.py +++ b/python/lsst/resources/gs.py @@ -170,6 +170,7 @@ def get_info(self) -> ResourceInfo: raise FileNotFoundError(f"Resource {self} does not exist") return ResourceInfo( uri=str(self), + is_file=False, size=0, last_modified=None, creation_time=None, @@ -181,6 +182,7 @@ def get_info(self) -> ResourceInfo: raise FileNotFoundError(f"Resource {self} does not exist") return ResourceInfo( uri=str(self), + is_file=False, size=0, last_modified=None, creation_time=None, @@ -206,6 +208,7 @@ def get_info(self) -> ResourceInfo: created = self.blob.time_created return ResourceInfo( uri=str(self), + is_file=True, size=size, last_modified=updated.astimezone(datetime.UTC) if updated is not None else None, creation_time=created.astimezone(datetime.UTC) if created is not None else None, diff --git a/python/lsst/resources/http.py b/python/lsst/resources/http.py index c42c39d8..53edd796 100644 --- a/python/lsst/resources/http.py +++ b/python/lsst/resources/http.py @@ -1038,6 +1038,7 @@ def get_info(self) -> ResourceInfo: return ResourceInfo( uri=str(self), + is_file=prop.is_file, size=prop.size, last_modified=prop.last_modified, creation_time=None, @@ -1096,6 +1097,7 @@ def _get_info_from_non_webdav_head(self, resp: requests.Response) -> ResourceInf return ResourceInfo( uri=str(self), + is_file=not self.dirLike, size=size, last_modified=last_modified, creation_time=None, diff --git a/python/lsst/resources/mem.py b/python/lsst/resources/mem.py index 6e69a590..7cc0bbb0 100644 --- a/python/lsst/resources/mem.py +++ b/python/lsst/resources/mem.py @@ -34,6 +34,7 @@ def get_info(self) -> ResourceInfo: """Return placeholder metadata for an in-memory resource.""" return ResourceInfo( uri=str(self), + is_file=True, size=-1, last_modified=None, creation_time=None, diff --git a/python/lsst/resources/packageresource.py b/python/lsst/resources/packageresource.py index 8f7bb6c2..2e45e4b8 100644 --- a/python/lsst/resources/packageresource.py +++ b/python/lsst/resources/packageresource.py @@ -91,6 +91,7 @@ def get_info(self) -> ResourceInfo: if ref.is_dir(): return ResourceInfo( uri=str(self), + is_file=False, size=0, last_modified=None, creation_time=None, @@ -108,6 +109,7 @@ def get_info(self) -> ResourceInfo: if stat_result is None: return ResourceInfo( uri=str(self), + is_file=True, size=-1, last_modified=None, creation_time=None, @@ -122,6 +124,7 @@ def get_info(self) -> ResourceInfo: ) return ResourceInfo( uri=str(self), + is_file=True, size=stat_result.st_size, last_modified=datetime.datetime.fromtimestamp(stat_result.st_mtime, tz=datetime.UTC), creation_time=creation_time, diff --git a/python/lsst/resources/proxied.py b/python/lsst/resources/proxied.py index 539e2ef4..ad711da1 100644 --- a/python/lsst/resources/proxied.py +++ b/python/lsst/resources/proxied.py @@ -14,6 +14,7 @@ __all__ = ("ProxiedResourcePath",) import contextlib +import dataclasses import logging import re from abc import ABC, abstractmethod @@ -128,7 +129,7 @@ def size(self) -> int: def get_info(self) -> ResourceInfo: proxy = self._get_proxy() - return proxy.get_info() + return dataclasses.replace(proxy.get_info(), uri=str(self)) def write(self, data: bytes, overwrite: bool = True) -> None: proxy = self._get_proxy() diff --git a/python/lsst/resources/s3.py b/python/lsst/resources/s3.py index e7925958..3c71b615 100644 --- a/python/lsst/resources/s3.py +++ b/python/lsst/resources/s3.py @@ -391,6 +391,7 @@ def get_info(self) -> ResourceInfo: raise FileNotFoundError(f"Resource {self} does not exist") return ResourceInfo( uri=str(self), + is_file=False, size=0, last_modified=None, creation_time=None, @@ -402,6 +403,7 @@ def get_info(self) -> ResourceInfo: raise FileNotFoundError(f"Resource {self} does not exist") return ResourceInfo( uri=str(self), + is_file=False, size=0, last_modified=None, creation_time=None, @@ -440,6 +442,7 @@ def get_info(self) -> ResourceInfo: return ResourceInfo( uri=str(self), + is_file=True, size=response["ContentLength"], last_modified=last_modified, creation_time=None, diff --git a/python/lsst/resources/tests.py b/python/lsst/resources/tests.py index 7393a0ab..46dff69c 100644 --- a/python/lsst/resources/tests.py +++ b/python/lsst/resources/tests.py @@ -682,6 +682,7 @@ def test_get_info_generic(self) -> None: uri.write(content.encode()) info = uri.get_info() + self.assertTrue(info.is_file) self.assertEqual(info.size, len(content)) assert info.last_modified is not None self.assertGreaterEqual(info.last_modified.timestamp(), now.timestamp() - 1.0) @@ -692,6 +693,7 @@ def test_get_info_generic(self) -> None: # directories. dirinfo = dir_uri.get_info() self.assertEqual(dirinfo.uri, str(dir_uri)) + self.assertFalse(dirinfo.is_file) self.assertEqual(dirinfo.size, 0) self.assertEqual(dirinfo.checksums, {}) diff --git a/tests/test_dav.py b/tests/test_dav.py index 8ded6763..79d15647 100644 --- a/tests/test_dav.py +++ b/tests/test_dav.py @@ -698,6 +698,7 @@ def test_dav_get_info(self): metadata = subdir.get_info() self.assertIsInstance(metadata, ResourceInfo) + self.assertFalse(metadata.is_file) self.assertEqual(metadata.size, 0) self.assertIsNone(metadata.creation_time) self.assertEqual(len(metadata.checksums), 0) @@ -718,6 +719,7 @@ def test_dav_get_info(self): metadata = remote_file.get_info() self.assertIsInstance(metadata, ResourceInfo) + self.assertTrue(metadata.is_file) self.assertEqual(metadata.size, local_file_size) self.assertIsNone(metadata.creation_time) self.assertEqual(metadata.last_modified.tzinfo, datetime.UTC) diff --git a/tests/test_eups.py b/tests/test_eups.py index e54d0e82..a06dfdff 100644 --- a/tests/test_eups.py +++ b/tests/test_eups.py @@ -15,7 +15,7 @@ import unittest import unittest.mock -from lsst.resources import ResourcePath +from lsst.resources import ResourceInfo, ResourcePath from lsst.resources.eups import EupsResourcePath from lsst.resources.tests import GenericTestCase @@ -141,6 +141,21 @@ def test_open(self): content = buffer.read() self.assertEqual(uri.read().decode(), content) + def test_get_info(self): + file_uri = self.root_uri.join("config/test.txt") + info = file_uri.get_info() + self.assertIsInstance(info, ResourceInfo) + self.assertEqual(info.uri, str(file_uri)) + self.assertTrue(info.is_file) + self.assertGreater(info.size, 0) + + dir_uri = self.root_uri.join("config/", forceDirectory=True) + dirinfo = dir_uri.get_info() + self.assertIsInstance(dirinfo, ResourceInfo) + self.assertEqual(dirinfo.uri, str(dir_uri)) + self.assertFalse(dirinfo.is_file) + self.assertEqual(dirinfo.size, 0) + def test_walk(self): """Test that we can find file resources. diff --git a/tests/test_file.py b/tests/test_file.py index 7efc9073..45a31e62 100644 --- a/tests/test_file.py +++ b/tests/test_file.py @@ -92,6 +92,7 @@ def test_get_info(self): info = target.get_info() self.assertIsInstance(info, ResourceInfo) self.assertTrue(info.uri.endswith(".txt")) + self.assertTrue(info.is_file) self.assertEqual(info.size, 3) self.assertEqual(info.checksums, {}) self.assertEqual(info.last_modified.tzinfo, datetime.UTC) @@ -99,6 +100,7 @@ def test_get_info(self): dirinfo = target.parent().get_info() self.assertEqual(dirinfo.uri, str(target.parent())) + self.assertFalse(dirinfo.is_file) self.assertEqual(dirinfo.size, 0) self.assertGreaterEqual(dirinfo.last_modified.timestamp(), 0) self.assertEqual(dirinfo.checksums, {}) diff --git a/tests/test_http.py b/tests/test_http.py index b3c8c6c5..02e278a4 100644 --- a/tests/test_http.py +++ b/tests/test_http.py @@ -148,6 +148,7 @@ def test_get_info(self): info = ResourcePath(url).get_info() self.assertIsInstance(info, ResourceInfo) + self.assertTrue(info.is_file) self.assertEqual(info.size, 123) self.assertIsNone(info.creation_time) self.assertEqual(info.last_modified.tzinfo, UTC) diff --git a/tests/test_s3.py b/tests/test_s3.py index b78b98fc..e1580ba8 100644 --- a/tests/test_s3.py +++ b/tests/test_s3.py @@ -212,6 +212,7 @@ def test_get_info(self): info = remote.get_info() self.assertIsInstance(info, ResourceInfo) + self.assertTrue(info.is_file) self.assertEqual(info.size, 3) self.assertIsNone(info.creation_time) self.assertIsInstance(info.checksums, dict) From 479306d899b9aef8a3ace19638dd1424afc5fd3c Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Tue, 31 Mar 2026 10:52:50 -0700 Subject: [PATCH 06/19] Ignore the .env file --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index b44a4ac7..6fe30f14 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ *.bck *.pyc .eggs +.env *.egg-info version.py _build.* From 52c59b46ea8de496248c157f67ae723e5b8952c6 Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Tue, 31 Mar 2026 10:59:30 -0700 Subject: [PATCH 07/19] Remove some code duplication in http size() call --- python/lsst/resources/http.py | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/python/lsst/resources/http.py b/python/lsst/resources/http.py index 53edd796..5d4f3a43 100644 --- a/python/lsst/resources/http.py +++ b/python/lsst/resources/http.py @@ -998,27 +998,13 @@ def size(self) -> int: """Return the size of the remote resource in bytes.""" if self.dirLike: return 0 - - if not self.is_webdav_endpoint: - return self.get_info().size - - # The remote is a webDAV server: send a PROPFIND request to retrieve - # the size of the resource. Sizes are only meaningful for files. - resp = self._propfind() - if resp.status_code == requests.codes.multi_status: # 207 - prop = _parse_propfind_response_body(resp.text)[0] - if prop.is_file: - return prop.size - elif prop.is_directory: - raise IsADirectoryError( - f"Resource {self} is reported by server as a directory but has a file path" - ) - else: - raise FileNotFoundError(f"Resource {self} does not exist") - else: # 404 Not Found - raise FileNotFoundError( - f"Resource {self} does not exist, status: {resp.status_code} {resp.reason}" + info = self.get_info() + print(info, self.dirLike) + if not info.is_file and self.dirLike is False: + raise IsADirectoryError( + f"Resource {self} is reported by server as a directory but has a file path" ) + return info.size def get_info(self) -> ResourceInfo: """Return lightweight metadata about this HTTP resource.""" From fa9f1df97504c5813bc4248b3946ee2f485c3308 Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Tue, 31 Mar 2026 11:24:46 -0700 Subject: [PATCH 08/19] Add get_info test for mem:// --- tests/test_mem.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/test_mem.py b/tests/test_mem.py index d146f59f..b7f5af02 100644 --- a/tests/test_mem.py +++ b/tests/test_mem.py @@ -37,6 +37,15 @@ def test_local(self): with self.root_uri.as_local(): pass + def test_get_info(self) -> None: + info = self.root_uri.get_info() + self.assertTrue(info.is_file) + self.assertEqual(info.uri, str(self.root_uri)) + self.assertEqual(info.size, -1) + self.assertEqual(info.checksums, {}) + self.assertIsNone(info.last_modified) + self.assertIsNone(info.creation_time) + if __name__ == "__main__": unittest.main() From 2e2fa2aba92ef599d60af3ae0ac4f8220c4c9976 Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Tue, 31 Mar 2026 13:01:31 -0700 Subject: [PATCH 09/19] Add test coverage for GCS using emulated GCS server Code courtest of OpenAI Codex. --- .github/workflows/build.yaml | 17 ++++ python/lsst/resources/gs.py | 47 +++++++-- tests/test_gs.py | 183 ++++++++++++++++++++++++++++++++++- 3 files changed, 232 insertions(+), 15 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 14e8aa64..278c9fea 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -68,9 +68,26 @@ jobs: run: | uv pip install --system --no-deps -v -e . + - name: Start fake-gcs-server + run: | + docker run -d --name fake-gcs-server -p 4443:4443 \ + fsouza/fake-gcs-server -scheme http -filesystem-root /tmp/fake-gcs-server + + for i in $(seq 1 30); do + if curl --silent --fail http://127.0.0.1:4443/storage/v1/b >/dev/null; then + exit 0 + fi + sleep 1 + done + + docker logs fake-gcs-server + exit 1 + - name: Run tests env: S3_ENDPOINT_URL: "https://google.com" + STORAGE_EMULATOR_HOST: "http://127.0.0.1:4443" + GOOGLE_CLOUD_PROJECT: "test-project" run: | pytest -r a -v -n 3 --cov=lsst.resources\ --cov=tests --cov-report=xml --cov-report=term --cov-branch \ diff --git a/python/lsst/resources/gs.py b/python/lsst/resources/gs.py index 8c8daedc..e8a118fc 100644 --- a/python/lsst/resources/gs.py +++ b/python/lsst/resources/gs.py @@ -112,6 +112,24 @@ def is_retryable(exc: Exception) -> bool: """Cached client connection.""" +def _coerce_gcs_datetime(value: datetime.datetime | str | None) -> datetime.datetime | None: + """Convert GCS timestamp values to timezone-aware UTC datetimes. + + Some emulators return RFC3339 timestamps with an explicit UTC offset + instead of a trailing ``Z``, which the google-cloud-storage property + accessors do not always accept. + """ + if value is None: + return None + if isinstance(value, datetime.datetime): + if value.tzinfo is None: + return value.replace(tzinfo=datetime.UTC) + return value.astimezone(datetime.UTC) + if value.endswith("Z"): + value = value[:-1] + "+00:00" + return datetime.datetime.fromisoformat(value).astimezone(datetime.UTC) + + def _get_client() -> storage.Client: global _client if storage is None: @@ -147,6 +165,10 @@ def blob(self) -> storage.Blob: def exists(self) -> bool: if self.is_root: return self.bucket.exists(retry=_RETRY_POLICY) + if self.dirLike: + # GCS does not have concrete directory objects; treat any + # directory-like path within an existing bucket as existing. + return self.bucket.exists(retry=_RETRY_POLICY) return self.blob.exists(retry=_RETRY_POLICY) def size(self) -> int: @@ -204,14 +226,22 @@ def get_info(self) -> ResourceInfo: if self.blob.crc32c: checksums["crc32c"] = self.blob.crc32c - updated = self.blob.updated - created = self.blob.time_created + try: + updated = _coerce_gcs_datetime(self.blob.updated) + except ValueError: + updated = _coerce_gcs_datetime(self.blob._properties.get("updated")) + + try: + created = _coerce_gcs_datetime(self.blob.time_created) + except ValueError: + created = _coerce_gcs_datetime(self.blob._properties.get("timeCreated")) + return ResourceInfo( uri=str(self), is_file=True, size=size, - last_modified=updated.astimezone(datetime.UTC) if updated is not None else None, - creation_time=created.astimezone(datetime.UTC) if created is not None else None, + last_modified=updated, + creation_time=created, checksums=checksums, ) @@ -248,12 +278,9 @@ def mkdir(self) -> None: if not self.dirLike: raise NotADirectoryError(f"Can not create a 'directory' for a file-like URI {self}") - if self.is_root: - # The root must already exist. - return - - # Should this method do anything at all? - self.blob.upload_from_string(b"", retry=_RETRY_POLICY) + # GCS does not have directory objects, so mkdir is a no-op once the + # bucket exists. + return @contextlib.contextmanager def _as_local( diff --git a/tests/test_gs.py b/tests/test_gs.py index ad1c3194..9870ea9b 100644 --- a/tests/test_gs.py +++ b/tests/test_gs.py @@ -9,8 +9,41 @@ # Use of this source code is governed by a 3-clause BSD-style # license that can be found in the LICENSE file. +"""Tests for the ``gs://`` resource backend. + +The emulator-backed tests in this module are enabled in either of these ways: + +1. Set ``STORAGE_EMULATOR_HOST`` to an already-running GCS emulator + endpoint. This is how GitHub Actions runs these tests. +2. Install the ``fake-gcs-server`` binary locally and make it available on + ``PATH``, or set ``FAKE_GCS_SERVER`` to its full path. The test helper will + start and stop the emulator automatically. + +The server binary is available from: +https://github.com/fsouza/fake-gcs-server/releases + +If neither is configured, the emulator-backed tests are skipped. +""" + +from __future__ import annotations + +import contextlib +import os +import re +import shutil +import socket +import subprocess +import tempfile +import time import unittest +import uuid +from collections import deque +from threading import Thread +from unittest import mock +import lsst.resources.gs as gs_module +from lsst.resources import ResourceInfo, ResourcePath +from lsst.resources.gs import GSResourcePath from lsst.resources.tests import GenericTestCase try: @@ -19,13 +52,153 @@ storage = None -@unittest.skipIf(not storage, "Warning: google-cloud-storage not found!") -class GenericGCSTestCase(GenericTestCase, unittest.TestCase): - """Generic tests of google cloud storage URI format.""" +def _find_free_port() -> int: + with socket.socket() as sock: + sock.bind(("127.0.0.1", 0)) + return sock.getsockname()[1] + + +@contextlib.contextmanager +def _reset_gs_client() -> None: + old_client = GSResourcePath._client + old_global_client = gs_module._client + GSResourcePath._client = None + gs_module._client = None + try: + yield + finally: + GSResourcePath._client = None + gs_module._client = old_global_client + GSResourcePath._client = old_client + + +@contextlib.contextmanager +def fake_gcs_server(): + """Start or connect to a fake GCS server.""" + if storage is None: + raise unittest.SkipTest("google-cloud-storage is not installed") + + emulator_host = os.environ.get("STORAGE_EMULATOR_HOST") + if emulator_host: + env = {"GOOGLE_CLOUD_PROJECT": os.environ.get("GOOGLE_CLOUD_PROJECT", "test-project")} + with mock.patch.dict(os.environ, env, clear=False): + with _reset_gs_client(): + yield storage.Client() + return + + binary = os.environ.get("FAKE_GCS_SERVER") or shutil.which("fake-gcs-server") + if binary is None: + raise unittest.SkipTest("fake-gcs-server is not installed") + + port = _find_free_port() + filesystem_root = tempfile.mkdtemp(prefix="fake-gcs-server-") + startup_output: deque[str] = deque(maxlen=50) + proc = subprocess.Popen( + [binary, "-scheme", "http", "-port", str(port), "-filesystem-root", filesystem_root], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + ) + + def _drain_output() -> None: + assert proc.stdout is not None + for line in proc.stdout: + startup_output.append(line.rstrip()) + + output_thread = Thread(target=_drain_output, daemon=True) + output_thread.start() + try: + deadline = time.time() + 10 + while True: + try: + with socket.create_connection(("127.0.0.1", port), timeout=0.2): + break + except OSError: + if proc.poll() is not None: + details = "\n".join(startup_output) or "no process output captured" + raise RuntimeError( + f"fake-gcs-server exited unexpectedly with code {proc.returncode}:\n{details}" + ) from None + if time.time() > deadline: + details = "\n".join(startup_output) or "no process output captured" + raise RuntimeError(f"Timed out waiting for fake-gcs-server:\n{details}") from None + time.sleep(0.1) + + env = { + "STORAGE_EMULATOR_HOST": f"http://127.0.0.1:{port}", + "GOOGLE_CLOUD_PROJECT": "test-project", + } + with mock.patch.dict(os.environ, env, clear=False): + with _reset_gs_client(): + yield storage.Client() + finally: + proc.terminate() + with contextlib.suppress(subprocess.TimeoutExpired): + proc.wait(timeout=5) + if proc.poll() is None: + proc.kill() + proc.wait() + output_thread.join(timeout=1) + shutil.rmtree(filesystem_root, ignore_errors=True) + + +class GenericGSTestCase(GenericTestCase, unittest.TestCase): + """Generic URI property testing.""" scheme = "gs" netloc = "my_bucket" -if __name__ == "__main__": - unittest.main() +class GSReadWriteTestCase(unittest.TestCase): + """Test GCS backend with emulated server.""" + + def setUp(self) -> None: + self.server = self.enterContext(fake_gcs_server()) + test_id = re.sub(r"[^a-z0-9-]", "-", self.id().lower()).strip("-") + suffix = uuid.uuid4().hex[:8] + self.bucket = f"{test_id[:54]}-{suffix}" + self.server.create_bucket(self.bucket) + self.root_uri = ResourcePath(f"gs://{self.bucket}/", forceDirectory=True, forceAbsolute=False) + self.tmpdir = self.root_uri.join("TESTING/", forceDirectory=True) + + def test_file_round_trip(self) -> None: + uri = self.tmpdir.join("test.txt") + content = b"abcdefghijklmnopqrstuv\n" + + self.assertFalse(uri.exists()) + uri.write(content) + self.assertTrue(uri.exists()) + self.assertEqual(uri.read(), content) + self.assertEqual(uri.size(), len(content)) + + def test_get_info(self) -> None: + remote = self.tmpdir.join("test-info.dat") + remote.write(b"abc") + + info = remote.get_info() + self.assertIsInstance(info, ResourceInfo) + self.assertTrue(info.is_file) + self.assertEqual(info.size, 3) + self.assertIsNotNone(info.creation_time) + self.assertIsNotNone(info.last_modified) + self.assertIsInstance(info.checksums, dict) + + def test_directory_semantics(self) -> None: + newdir = self.tmpdir.join("newdir/seconddir", forceDirectory=True) + newdir.mkdir() + self.assertTrue(newdir.exists()) + + info = newdir.get_info() + self.assertFalse(info.is_file) + self.assertEqual(info.size, 0) + self.assertEqual(info.checksums, {}) + + newfile = newdir.join("temp.txt") + newfile.write(b"Data") + self.assertTrue(newfile.exists()) + + def test_root_missing_bucket(self) -> None: + missing = ResourcePath("gs://missing-bucket/", forceDirectory=True, forceAbsolute=False) + self.assertFalse(missing.exists()) + with self.assertRaises(FileNotFoundError): + missing.get_info() From 182d42913f124841f0a246f2dd5f4598ca63be35 Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Wed, 1 Apr 2026 09:55:45 -0700 Subject: [PATCH 10/19] Return size=0 for unknown rather than -1 --- python/lsst/resources/_resourcePath.py | 3 ++- python/lsst/resources/mem.py | 2 +- python/lsst/resources/packageresource.py | 2 +- tests/test_mem.py | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/python/lsst/resources/_resourcePath.py b/python/lsst/resources/_resourcePath.py index 2d6c8e4f..0b86c6fd 100644 --- a/python/lsst/resources/_resourcePath.py +++ b/python/lsst/resources/_resourcePath.py @@ -144,7 +144,8 @@ class ResourceInfo: is_file: bool """Indicate whether the resource is a file or a directory.""" size: int - """Size of the file in bytes. A directory returns 0.""" + """Size of the file in bytes. A directory or a URI that has no concept + of size returns 0.""" last_modified: datetime.datetime | None """Modification date of the resource, if known.""" creation_time: datetime.datetime | None diff --git a/python/lsst/resources/mem.py b/python/lsst/resources/mem.py index 7cc0bbb0..fb132a84 100644 --- a/python/lsst/resources/mem.py +++ b/python/lsst/resources/mem.py @@ -35,7 +35,7 @@ def get_info(self) -> ResourceInfo: return ResourceInfo( uri=str(self), is_file=True, - size=-1, + size=0, last_modified=None, creation_time=None, checksums={}, diff --git a/python/lsst/resources/packageresource.py b/python/lsst/resources/packageresource.py index 2e45e4b8..6c938ff1 100644 --- a/python/lsst/resources/packageresource.py +++ b/python/lsst/resources/packageresource.py @@ -110,7 +110,7 @@ def get_info(self) -> ResourceInfo: return ResourceInfo( uri=str(self), is_file=True, - size=-1, + size=0, last_modified=None, creation_time=None, checksums={}, diff --git a/tests/test_mem.py b/tests/test_mem.py index b7f5af02..64090aee 100644 --- a/tests/test_mem.py +++ b/tests/test_mem.py @@ -41,7 +41,7 @@ def test_get_info(self) -> None: info = self.root_uri.get_info() self.assertTrue(info.is_file) self.assertEqual(info.uri, str(self.root_uri)) - self.assertEqual(info.size, -1) + self.assertEqual(info.size, 0) self.assertEqual(info.checksums, {}) self.assertIsNone(info.last_modified) self.assertIsNone(info.creation_time) From af835b4b26c4e35b2e15ed73675cf11ed5c8ca5a Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Wed, 1 Apr 2026 09:56:18 -0700 Subject: [PATCH 11/19] Fix some minor issues --- python/lsst/resources/_resourcePath.py | 2 +- python/lsst/resources/http.py | 4 +++- tests/test_gs.py | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/python/lsst/resources/_resourcePath.py b/python/lsst/resources/_resourcePath.py index 0b86c6fd..7d563746 100644 --- a/python/lsst/resources/_resourcePath.py +++ b/python/lsst/resources/_resourcePath.py @@ -138,7 +138,7 @@ class ResourceInfo: """Information about this resource.""" uri: str - """URI in string form of the resource from which this information is + """URI in string form of the resource from which this information is derived. """ is_file: bool diff --git a/python/lsst/resources/http.py b/python/lsst/resources/http.py index 5d4f3a43..5e616e99 100644 --- a/python/lsst/resources/http.py +++ b/python/lsst/resources/http.py @@ -999,7 +999,9 @@ def size(self) -> int: if self.dirLike: return 0 info = self.get_info() - print(info, self.dirLike) + # dirLike can be None if we are unsure. Only flag if we are certain + # we have been told this is a directory but webDAV reports it as a + # file. if not info.is_file and self.dirLike is False: raise IsADirectoryError( f"Resource {self} is reported by server as a directory but has a file path" diff --git a/tests/test_gs.py b/tests/test_gs.py index 9870ea9b..02ccce74 100644 --- a/tests/test_gs.py +++ b/tests/test_gs.py @@ -38,6 +38,7 @@ import unittest import uuid from collections import deque +from collections.abc import Iterator from threading import Thread from unittest import mock @@ -59,7 +60,7 @@ def _find_free_port() -> int: @contextlib.contextmanager -def _reset_gs_client() -> None: +def _reset_gs_client() -> Iterator[None]: old_client = GSResourcePath._client old_global_client = gs_module._client GSResourcePath._client = None From e24f1aef49918572f3690219c03700b0f0ab501d Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Wed, 1 Apr 2026 10:02:35 -0700 Subject: [PATCH 12/19] Use a more believable md5 in tests (with == padding) --- tests/test_http.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_http.py b/tests/test_http.py index 02e278a4..d16cc801 100644 --- a/tests/test_http.py +++ b/tests/test_http.py @@ -142,7 +142,7 @@ def test_get_info(self): headers={ "Content-Length": "123", "Last-Modified": "Wed, 12 Mar 2025 10:11:13 GMT", - "Digest": "md5=abc123, sha-256=def456", + "Digest": "md5=rL0Y20zC+Fzt72VPzMSk2A==, sha-256=def456", }, ) @@ -153,7 +153,7 @@ def test_get_info(self): self.assertIsNone(info.creation_time) self.assertEqual(info.last_modified.tzinfo, UTC) self.assertEqual(info.last_modified.year, 2025) - self.assertEqual(info.checksums, {"md5": "abc123", "sha-256": "def456"}) + self.assertEqual(info.checksums, {"md5": "rL0Y20zC+Fzt72VPzMSk2A==", "sha-256": "def456"}) self.assertEqual(len(responses.calls), 2) From 33b36b63e7b3277df40007b77482a3703a51ae02 Mon Sep 17 00:00:00 2001 From: Fabio Hernandez Date: Wed, 1 Apr 2026 10:14:22 -0700 Subject: [PATCH 13/19] Edit dav get_info() to better suit the internal API --- python/lsst/resources/dav.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/python/lsst/resources/dav.py b/python/lsst/resources/dav.py index 03033dc4..6d02724e 100644 --- a/python/lsst/resources/dav.py +++ b/python/lsst/resources/dav.py @@ -289,21 +289,22 @@ def size(self) -> int: return 0 if self.isdir() else self._client.size(self._internal_url) + @override def get_info(self) -> ResourceInfo: """Return lightweight metadata details about this resource.""" log.debug("get_info %s [%#x]", self, id(self)) - stat = self._stat() - if not stat.exists: + info = self._client.info(self._internal_url) + if info["type"] is None: raise FileNotFoundError(f"Resource {self} does not exist") return ResourceInfo( uri=str(self), - is_file=stat.is_file, - size=stat.size, - last_modified=stat.last_modified, + is_file=info["type"] == "file", + size=info["size"], + last_modified=info["last_modified"], creation_time=None, - checksums=dict(stat.checksums), + checksums=info["checksums"], ) @override From 56436317ccae0343a14325e3e421bbcad410171b Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Thu, 2 Apr 2026 15:49:50 -0700 Subject: [PATCH 14/19] Enable D212 ruff check and fix docstrings --- pyproject.toml | 1 + python/lsst/resources/dav.py | 3 +-- python/lsst/resources/davutils.py | 8 +++----- python/lsst/resources/http.py | 3 +-- 4 files changed, 6 insertions(+), 9 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 4ef1b341..6d4a27bb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -174,6 +174,7 @@ select = [ ] extend-select = [ "RUF100", # Warn about unused noqa + "D212", # Docstring starts without newline after quotes. ] [tool.ruff.lint.isort] diff --git a/python/lsst/resources/dav.py b/python/lsst/resources/dav.py index 6d02724e..cdbf7402 100644 --- a/python/lsst/resources/dav.py +++ b/python/lsst/resources/dav.py @@ -139,8 +139,7 @@ def __init__(self) -> None: self._reset() def _reset(self) -> None: - """ - Initialize all the globals. + """Initialize all the globals. This method is a helper for reinitializing globals in tests. """ diff --git a/python/lsst/resources/davutils.py b/python/lsst/resources/davutils.py index b4426e96..3e2363d1 100644 --- a/python/lsst/resources/davutils.py +++ b/python/lsst/resources/davutils.py @@ -759,8 +759,7 @@ def invalidate(self, url: str) -> None: self._cache.pop(url, None) def update_size(self, url: str, size: int | None, timeout: float | None = None) -> None: - """ - Update the cache with an entry for `url` which has a size of `size` + """Update the cache with an entry for `url` which has a size of `size` bytes. This entry is considered valid for a period of `timeout` seconds from now. @@ -960,9 +959,8 @@ def _make_pool_manager(self, config: DavConfig) -> PoolManager: ) def get_server_details(self, url: str) -> dict[str, str]: - """ - Retrieve the details of the server and check it advertises compliance - to class 1 of webDAV protocol. + """Retrieve the details of the server and check it advertises + compliance to class 1 of webDAV protocol. Parameters ---------- diff --git a/python/lsst/resources/http.py b/python/lsst/resources/http.py index 5e616e99..3d8366ff 100644 --- a/python/lsst/resources/http.py +++ b/python/lsst/resources/http.py @@ -899,8 +899,7 @@ def _clear_sessions(self) -> None: delattr(self, "_data_session") def _init_server_properties(self) -> None: - """ - Initialize instance variables '_is_webdav' and '_server' by + """Initialize instance variables '_is_webdav' and '_server' by sending a single OPTIONS request to the remote server and saving the results. """ From 7ddc8888964427117640566e0e3e7a852c148131 Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Thu, 2 Apr 2026 16:00:14 -0700 Subject: [PATCH 15/19] Fix numpydoc errors that were unnoticed. Somehow pre-commit finds them but the numpydoc github action does not. --- .../_resourceHandles/_davResourceHandle.py | 4 +- python/lsst/resources/davutils.py | 54 ++++++++++++------- 2 files changed, 38 insertions(+), 20 deletions(-) diff --git a/python/lsst/resources/_resourceHandles/_davResourceHandle.py b/python/lsst/resources/_resourceHandles/_davResourceHandle.py index 95c5106e..5b378fec 100644 --- a/python/lsst/resources/_resourceHandles/_davResourceHandle.py +++ b/python/lsst/resources/_resourceHandles/_davResourceHandle.py @@ -197,8 +197,8 @@ class DavReadAheadCache: Parameters ---------- client : `lsst.resources.davutils.DavClient` - webDAV client to interact with the server to download data. - backend_url : `str` + The webDAV client to interact with the server to download data. + url : `str` URL of the resource to download data from. filesize : `int` Size in bytes of the remote file. diff --git a/python/lsst/resources/davutils.py b/python/lsst/resources/davutils.py index 3e2363d1..f217ce10 100644 --- a/python/lsst/resources/davutils.py +++ b/python/lsst/resources/davutils.py @@ -107,13 +107,15 @@ def normalize_url(url: str, preserve_scheme: bool = False, preserve_path: bool = def redact_url(url: str) -> str: - """Return a modified `url` with authorization query redacted. The - goal is that this method should be used for logging URLs to avoid + """Return a modified `url` with authorization query redacted. + + The goal is that this method should be used for logging URLs to avoid leaking authorization tokens. Parameters ---------- url : `str` + URL to redact. Returns ------- @@ -817,7 +819,17 @@ def get_size(self, url: str) -> int | None: def unexpected_status_error(method: str, url: str, resp: HTTPResponse) -> Exception: - """Raise an exception from `resp`.""" + """Raise an exception from `resp`. + + Parameters + ---------- + method : `str` + The method name triggering the error. + url : `str` + The URL that cause the error. + resp : `resp` + The error response. + """ message = f"Unexpected response to HTTP request {method} {redact_url(url)}: {resp.status} {resp.reason}" body = resp.data.decode() if len(body) > 0: @@ -1491,7 +1503,7 @@ def options( ---------- url : `str` Target URL. - headers : `dict[str, str]`, optional + headers : `dict` [`str`, `str`], optional Headers to sent with the request. Returns @@ -1524,6 +1536,8 @@ def propfind( Headers to sent with the request. body : `str`, optional Request body. + depth : `str`, optional + ???. """ headers = {} if headers is None else dict(headers) headers.update( @@ -1560,7 +1574,7 @@ def put( Returns ------- size : `int | None` - size in bytes of the file uploaded. Can be `None` if the size + The size in bytes of the file uploaded. Can be `None` if the size could not be retrieved. """ # Send a PUT request with empty body and handle redirection. This @@ -1870,7 +1884,7 @@ def move(self, source_url: str, destination_url: str, overwrite: bool = False) - Returns ------- resp : `HTTPResponse` - unmodified response received from the server. + The unmodified response received from the server. """ headers = { "Destination": destination_url, @@ -1971,6 +1985,8 @@ def read_range( Ending byte offset of the range to download. headers : `dict[str,str]`, optional Specific headers to sent with the GET request. + release_backend : `bool`, optional + Whether or not to close the connection to the backend. Returns ------- @@ -2118,7 +2134,7 @@ def write(self, url: str, data: BinaryIO | bytes) -> int | None: Returns ------- size : `int | None` - size in bytes of the file uploaded. Can be `None` if the size + The size in bytes of the file uploaded. Can be `None` if the size could not be retrieved. Notes @@ -2296,9 +2312,11 @@ def rename( URL of the source file. destination_url : `str` URL of the destination file. Its parent directory must exist. - overwrite : `bool` + overwrite : `bool`, optional If True and a file exists at `destination_url` it will be overwritten. Otherwise an exception is raised. + create_parent : `bool`, optional + Whether to create the parent. """ # Create the destination's parent directory first because MOVE may # fail if it does not exist, depending on the server implementation @@ -2710,7 +2728,7 @@ def put( headers: dict[str, str] | None = None, data: BinaryIO | bytes = b"", ) -> int | None: - """Inherits doc string.""" + # Docstring inherited. # Send a PUT request with empty body to the dCache frontend server to # get redirected to the backend. # @@ -2875,14 +2893,13 @@ def write(self, url: str, data: BinaryIO | bytes) -> int | None: ---------- url : `str` Target URL. - - data: `bytes` + data : `bytes` Sequence of bytes to upload. Returns ------- size : `int | None` - size in bytes of the file uploaded. Can be `None` if the size + The size in bytes of the file uploaded. Can be `None` if the size could not be retrieved. Notes @@ -2942,7 +2959,7 @@ def mkcol(self, url: str) -> None: @override def info(self, url: str, name: str | None = None) -> dict[str, Any]: - """Inherits doc string.""" + # Docstring inherited. result: dict[str, Any] = { "name": name if name is not None else url, "type": None, @@ -3012,7 +3029,7 @@ def put( headers: dict[str, str] | None = None, data: BinaryIO | bytes = b"", ) -> int | None: - """Inherits doc string.""" + # Docstring inherited. # Send a PUT request with empty body to the XRootD frontend server to # get redirected to the backend. frontend_headers = {} if headers is None else dict(headers) @@ -3118,14 +3135,13 @@ def write(self, url: str, data: BinaryIO | bytes) -> int | None: ---------- url : `str` Target URL. - - data: `bytes` + data : `bytes` Sequence of bytes to upload. Returns ------- size : `int | None` - size in bytes of the file uploaded. Can be `None` if the size + The size in bytes of the file uploaded. Can be `None` if the size could not be retrieved. Notes @@ -3186,7 +3202,7 @@ def mkcol(self, url: str) -> None: @override def stat(self, url: str) -> DavFileMetadata: - """Inherits doc string.""" + # Docstring inherited. # XRootD v5.9.1 responds "200 OK" to a HEAD request against an # existing file. When the target URL is a directory, it also responds # "200 OK". In both cases the response header "Content-Length" @@ -3785,6 +3801,8 @@ def dump_response(method: str, resp: HTTPResponse, dump_body: bool = False) -> N Method name to include in log output. resp : `HTTPResponse` Response to dump. + dump_body : `bool`, optional + Whether or not to issue a debug log message. """ log.debug("%s %s", method, resp.geturl()) log.debug(" %s %s", resp.status, resp.reason) From 3747db379514b14fa2b9cd9d2152933803644f28 Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Mon, 6 Apr 2026 12:03:32 -0700 Subject: [PATCH 16/19] Remove creation_time from ResourceInfo It only exists on macOS APFS and whilst that's great for macOS users it doesn't help at all for our main systems. --- python/lsst/resources/_resourcePath.py | 2 -- python/lsst/resources/dav.py | 1 - python/lsst/resources/file.py | 7 ------- python/lsst/resources/gs.py | 8 -------- python/lsst/resources/http.py | 2 -- python/lsst/resources/mem.py | 1 - python/lsst/resources/packageresource.py | 9 --------- python/lsst/resources/s3.py | 3 --- tests/test_dav.py | 2 -- tests/test_gs.py | 1 - tests/test_http.py | 1 - tests/test_mem.py | 1 - tests/test_s3.py | 1 - 13 files changed, 39 deletions(-) diff --git a/python/lsst/resources/_resourcePath.py b/python/lsst/resources/_resourcePath.py index 7d563746..a49048e5 100644 --- a/python/lsst/resources/_resourcePath.py +++ b/python/lsst/resources/_resourcePath.py @@ -148,8 +148,6 @@ class ResourceInfo: of size returns 0.""" last_modified: datetime.datetime | None """Modification date of the resource, if known.""" - creation_time: datetime.datetime | None - """Creation date of the resource, if known.""" checksums: dict[str, Any] """Checksums for this file. Supported checksum implementations are backend dependent. diff --git a/python/lsst/resources/dav.py b/python/lsst/resources/dav.py index cdbf7402..ecec1936 100644 --- a/python/lsst/resources/dav.py +++ b/python/lsst/resources/dav.py @@ -302,7 +302,6 @@ def get_info(self) -> ResourceInfo: is_file=info["type"] == "file", size=info["size"], last_modified=info["last_modified"], - creation_time=None, checksums=info["checksums"], ) diff --git a/python/lsst/resources/file.py b/python/lsst/resources/file.py index e6107cf6..bd0044fb 100644 --- a/python/lsst/resources/file.py +++ b/python/lsst/resources/file.py @@ -80,18 +80,11 @@ def size(self) -> int: def get_info(self) -> ResourceInfo: """Return lightweight metadata about this file.""" stat_result = os.stat(self.ospath) - creation_timestamp = getattr(stat_result, "st_birthtime", None) - creation_time = ( - datetime.datetime.fromtimestamp(creation_timestamp, tz=datetime.UTC) - if creation_timestamp is not None - else None - ) return ResourceInfo( uri=str(self), is_file=not stat.S_ISDIR(stat_result.st_mode), size=0 if stat.S_ISDIR(stat_result.st_mode) else stat_result.st_size, last_modified=datetime.datetime.fromtimestamp(stat_result.st_mtime, tz=datetime.UTC), - creation_time=creation_time, checksums={}, ) diff --git a/python/lsst/resources/gs.py b/python/lsst/resources/gs.py index e8a118fc..c66e08bc 100644 --- a/python/lsst/resources/gs.py +++ b/python/lsst/resources/gs.py @@ -195,7 +195,6 @@ def get_info(self) -> ResourceInfo: is_file=False, size=0, last_modified=None, - creation_time=None, checksums={}, ) @@ -207,7 +206,6 @@ def get_info(self) -> ResourceInfo: is_file=False, size=0, last_modified=None, - creation_time=None, checksums={}, ) @@ -231,17 +229,11 @@ def get_info(self) -> ResourceInfo: except ValueError: updated = _coerce_gcs_datetime(self.blob._properties.get("updated")) - try: - created = _coerce_gcs_datetime(self.blob.time_created) - except ValueError: - created = _coerce_gcs_datetime(self.blob._properties.get("timeCreated")) - return ResourceInfo( uri=str(self), is_file=True, size=size, last_modified=updated, - creation_time=created, checksums=checksums, ) diff --git a/python/lsst/resources/http.py b/python/lsst/resources/http.py index 3d8366ff..750c1ca9 100644 --- a/python/lsst/resources/http.py +++ b/python/lsst/resources/http.py @@ -1028,7 +1028,6 @@ def get_info(self) -> ResourceInfo: is_file=prop.is_file, size=prop.size, last_modified=prop.last_modified, - creation_time=None, checksums=dict(prop.checksums), ) @@ -1087,7 +1086,6 @@ def _get_info_from_non_webdav_head(self, resp: requests.Response) -> ResourceInf is_file=not self.dirLike, size=size, last_modified=last_modified, - creation_time=None, checksums=checksums, ) diff --git a/python/lsst/resources/mem.py b/python/lsst/resources/mem.py index fb132a84..5a70597d 100644 --- a/python/lsst/resources/mem.py +++ b/python/lsst/resources/mem.py @@ -37,7 +37,6 @@ def get_info(self) -> ResourceInfo: is_file=True, size=0, last_modified=None, - creation_time=None, checksums={}, ) diff --git a/python/lsst/resources/packageresource.py b/python/lsst/resources/packageresource.py index 6c938ff1..ca0a4556 100644 --- a/python/lsst/resources/packageresource.py +++ b/python/lsst/resources/packageresource.py @@ -94,7 +94,6 @@ def get_info(self) -> ResourceInfo: is_file=False, size=0, last_modified=None, - creation_time=None, checksums={}, ) @@ -112,22 +111,14 @@ def get_info(self) -> ResourceInfo: is_file=True, size=0, last_modified=None, - creation_time=None, checksums={}, ) - creation_timestamp = getattr(stat_result, "st_birthtime", None) - creation_time = ( - datetime.datetime.fromtimestamp(creation_timestamp, tz=datetime.UTC) - if creation_timestamp is not None - else None - ) return ResourceInfo( uri=str(self), is_file=True, size=stat_result.st_size, last_modified=datetime.datetime.fromtimestamp(stat_result.st_mtime, tz=datetime.UTC), - creation_time=creation_time, checksums={}, ) diff --git a/python/lsst/resources/s3.py b/python/lsst/resources/s3.py index 3c71b615..0ee578e3 100644 --- a/python/lsst/resources/s3.py +++ b/python/lsst/resources/s3.py @@ -394,7 +394,6 @@ def get_info(self) -> ResourceInfo: is_file=False, size=0, last_modified=None, - creation_time=None, checksums={}, ) @@ -406,7 +405,6 @@ def get_info(self) -> ResourceInfo: is_file=False, size=0, last_modified=None, - creation_time=None, checksums={}, ) @@ -445,7 +443,6 @@ def get_info(self) -> ResourceInfo: is_file=True, size=response["ContentLength"], last_modified=last_modified, - creation_time=None, checksums=checksums, ) diff --git a/tests/test_dav.py b/tests/test_dav.py index 79d15647..a0a353e5 100644 --- a/tests/test_dav.py +++ b/tests/test_dav.py @@ -700,7 +700,6 @@ def test_dav_get_info(self): self.assertFalse(metadata.is_file) self.assertEqual(metadata.size, 0) - self.assertIsNone(metadata.creation_time) self.assertEqual(len(metadata.checksums), 0) self.assertEqual(metadata.last_modified.tzinfo, datetime.UTC) self.assertEqual(metadata.last_modified, subdir._stat().last_modified) @@ -721,7 +720,6 @@ def test_dav_get_info(self): self.assertIsInstance(metadata, ResourceInfo) self.assertTrue(metadata.is_file) self.assertEqual(metadata.size, local_file_size) - self.assertIsNone(metadata.creation_time) self.assertEqual(metadata.last_modified.tzinfo, datetime.UTC) self.assertEqual(metadata.last_modified, remote_file._stat().last_modified) diff --git a/tests/test_gs.py b/tests/test_gs.py index 02ccce74..d85a0105 100644 --- a/tests/test_gs.py +++ b/tests/test_gs.py @@ -180,7 +180,6 @@ def test_get_info(self) -> None: self.assertIsInstance(info, ResourceInfo) self.assertTrue(info.is_file) self.assertEqual(info.size, 3) - self.assertIsNotNone(info.creation_time) self.assertIsNotNone(info.last_modified) self.assertIsInstance(info.checksums, dict) diff --git a/tests/test_http.py b/tests/test_http.py index d16cc801..fcbda7bd 100644 --- a/tests/test_http.py +++ b/tests/test_http.py @@ -150,7 +150,6 @@ def test_get_info(self): self.assertIsInstance(info, ResourceInfo) self.assertTrue(info.is_file) self.assertEqual(info.size, 123) - self.assertIsNone(info.creation_time) self.assertEqual(info.last_modified.tzinfo, UTC) self.assertEqual(info.last_modified.year, 2025) self.assertEqual(info.checksums, {"md5": "rL0Y20zC+Fzt72VPzMSk2A==", "sha-256": "def456"}) diff --git a/tests/test_mem.py b/tests/test_mem.py index 64090aee..263665ef 100644 --- a/tests/test_mem.py +++ b/tests/test_mem.py @@ -44,7 +44,6 @@ def test_get_info(self) -> None: self.assertEqual(info.size, 0) self.assertEqual(info.checksums, {}) self.assertIsNone(info.last_modified) - self.assertIsNone(info.creation_time) if __name__ == "__main__": diff --git a/tests/test_s3.py b/tests/test_s3.py index e1580ba8..9791cf41 100644 --- a/tests/test_s3.py +++ b/tests/test_s3.py @@ -214,7 +214,6 @@ def test_get_info(self): self.assertIsInstance(info, ResourceInfo) self.assertTrue(info.is_file) self.assertEqual(info.size, 3) - self.assertIsNone(info.creation_time) self.assertIsInstance(info.checksums, dict) self.assertIn("crc32", info.checksums) # Only appears if ChecksumMode=ENABLED self.assertEqual(info.last_modified.tzinfo, datetime.UTC) From 2ef3a4fe65377328d63cc8b25dcd93bc3dea5998 Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Mon, 6 Apr 2026 12:23:29 -0700 Subject: [PATCH 17/19] Allow S3 directories to return checksums In theory non-zero size S3 objects with trailing / can exist even though ResourcePath does not make them. get_info now handles that case and returns is_file=True if the file object has non-zero size. --- python/lsst/resources/s3.py | 22 +++++++++------------- python/lsst/resources/tests.py | 3 +-- 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/python/lsst/resources/s3.py b/python/lsst/resources/s3.py index 0ee578e3..ac5eecea 100644 --- a/python/lsst/resources/s3.py +++ b/python/lsst/resources/s3.py @@ -397,17 +397,6 @@ def get_info(self) -> ResourceInfo: checksums={}, ) - if self.dirLike: - if not self.exists(): - raise FileNotFoundError(f"Resource {self} does not exist") - return ResourceInfo( - uri=str(self), - is_file=False, - size=0, - last_modified=None, - checksums={}, - ) - try: response = self.client.head_object( Bucket=self._bucket, @@ -438,10 +427,17 @@ def get_info(self) -> ResourceInfo: else: last_modified = last_modified.astimezone(datetime.UTC) + # For ResourcePath usage a dirLike object with zero size is a directory + # but in the general case anyone can create an object with a trailing + # `/` and treat it as a file. For self-consistency with ResourcePath + # call it a file if it has size > 0 even if dirLike. + size = response["ContentLength"] + is_file = (self.dirLike is not True) or (size > 0) + return ResourceInfo( uri=str(self), - is_file=True, - size=response["ContentLength"], + is_file=is_file, + size=size, last_modified=last_modified, checksums=checksums, ) diff --git a/python/lsst/resources/tests.py b/python/lsst/resources/tests.py index 46dff69c..9e8c68e5 100644 --- a/python/lsst/resources/tests.py +++ b/python/lsst/resources/tests.py @@ -690,12 +690,11 @@ def test_get_info_generic(self) -> None: for dir_uri in (uri.parent(), uri.root_uri()): # File URIs can return values for modification dates for - # directories. + # directories. S3 URIs can return checksums for directories. dirinfo = dir_uri.get_info() self.assertEqual(dirinfo.uri, str(dir_uri)) self.assertFalse(dirinfo.is_file) self.assertEqual(dirinfo.size, 0) - self.assertEqual(dirinfo.checksums, {}) newdir = self.tmpdir.join("newdir/", forceDirectory=True) with self.assertRaises(FileNotFoundError): From 876f9407fde2a4d3de966325268397f2d00912c1 Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Mon, 6 Apr 2026 14:36:19 -0700 Subject: [PATCH 18/19] Share os.stat to ResourceInfo code for file and resources URIs --- python/lsst/resources/file.py | 38 +++++++++++++++++++----- python/lsst/resources/packageresource.py | 33 ++++---------------- 2 files changed, 35 insertions(+), 36 deletions(-) diff --git a/python/lsst/resources/file.py b/python/lsst/resources/file.py index bd0044fb..f549088a 100644 --- a/python/lsst/resources/file.py +++ b/python/lsst/resources/file.py @@ -25,6 +25,7 @@ import stat import urllib.parse from collections.abc import Iterator +from pathlib import Path from typing import IO, TYPE_CHECKING from ._resourceHandles._fileResourceHandle import FileResourceHandle @@ -39,12 +40,37 @@ AbstractFileSystem = type if TYPE_CHECKING: + from importlib.resources.abc import Traversable + from .utils import TransactionProtocol log = logging.getLogger(__name__) +def _path_to_info(uri: str, path: str | Path | Traversable) -> ResourceInfo | None: + """Given a path to a local file, return a `ResourceInfo`.""" + if isinstance(path, Path): + stat_result = path.stat() + elif isinstance(path, str): + stat_result = os.stat(path) + elif (stat_method := getattr(path, "stat", None)) and callable(stat_method): + # Edge case triggered by importlib.resources. + stat_result = stat_method() + if not isinstance(stat_result, os.stat_result): + raise RuntimeError(f"Unexpected stat result from {path}.stat()") + else: + return None + + return ResourceInfo( + uri=uri, + is_file=not stat.S_ISDIR(stat_result.st_mode), + size=0 if stat.S_ISDIR(stat_result.st_mode) else stat_result.st_size, + last_modified=datetime.datetime.fromtimestamp(stat_result.st_mtime, tz=datetime.UTC), + checksums={}, + ) + + class FileResourcePath(ResourcePath): """Path for explicit ``file`` URI scheme.""" @@ -79,14 +105,10 @@ def size(self) -> int: def get_info(self) -> ResourceInfo: """Return lightweight metadata about this file.""" - stat_result = os.stat(self.ospath) - return ResourceInfo( - uri=str(self), - is_file=not stat.S_ISDIR(stat_result.st_mode), - size=0 if stat.S_ISDIR(stat_result.st_mode) else stat_result.st_size, - last_modified=datetime.datetime.fromtimestamp(stat_result.st_mtime, tz=datetime.UTC), - checksums={}, - ) + info = _path_to_info(str(self), self.ospath) + if info is None: + raise RuntimeError(f"Unexpected internal failure obtaining file info for {self}") + return info def remove(self) -> None: """Remove the resource.""" diff --git a/python/lsst/resources/packageresource.py b/python/lsst/resources/packageresource.py index ca0a4556..aea583f4 100644 --- a/python/lsst/resources/packageresource.py +++ b/python/lsst/resources/packageresource.py @@ -14,13 +14,10 @@ __all__ = ("PackageResourcePath",) import contextlib -import datetime import logging -import os import re from collections.abc import Iterator from importlib import resources -from pathlib import Path from typing import TYPE_CHECKING if TYPE_CHECKING: @@ -33,6 +30,7 @@ from ._resourceHandles._baseResourceHandle import ResourceHandleProtocol from ._resourcePath import ResourceInfo, ResourcePath, ResourcePathExpression +from .file import _path_to_info log = logging.getLogger(__name__) @@ -88,24 +86,10 @@ def get_info(self) -> ResourceInfo: if ref is None or not (ref.is_file() or ref.is_dir()): raise FileNotFoundError(f"Unable to locate resource {self}.") - if ref.is_dir(): - return ResourceInfo( - uri=str(self), - is_file=False, - size=0, - last_modified=None, - checksums={}, - ) + info = _path_to_info(str(self), ref) - stat_result: os.stat_result | None = None - if isinstance(ref, Path): - stat_result = ref.stat() - else: - stat_method = getattr(ref, "stat", None) - if callable(stat_method): - stat_result = stat_method() - - if stat_result is None: + if info is None: + # Edge case such as file in Zip. return ResourceInfo( uri=str(self), is_file=True, @@ -113,14 +97,7 @@ def get_info(self) -> ResourceInfo: last_modified=None, checksums={}, ) - - return ResourceInfo( - uri=str(self), - is_file=True, - size=stat_result.st_size, - last_modified=datetime.datetime.fromtimestamp(stat_result.st_mtime, tz=datetime.UTC), - checksums={}, - ) + return info def read(self, size: int = -1) -> bytes: ref = self._get_ref() From 6c06ddf5e05ccb0a9f3b17f5ec45b74326c4adc8 Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Mon, 6 Apr 2026 14:39:43 -0700 Subject: [PATCH 19/19] Add news fragment --- doc/changes/DM-52947.feature.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 doc/changes/DM-52947.feature.rst diff --git a/doc/changes/DM-52947.feature.rst b/doc/changes/DM-52947.feature.rst new file mode 100644 index 00000000..098e8d83 --- /dev/null +++ b/doc/changes/DM-52947.feature.rst @@ -0,0 +1 @@ +Added a ``ResourcePath.get_info()`` method to provide a general interface for obtaining information about a resource including the size, modification date, and any checksums available.