Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 17 additions & 11 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,25 @@ permissions:
contents: read

jobs:
pre-commit:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
with:
persist-credentials: false
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6
with:
python-version: '3.12'
- name: Install pre-commit
run: pip install pre-commit
- name: Run pre-commit
run: pre-commit run --all-files
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
with:
persist-credentials: false
- name: Set up Python
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: "3.12"
- name: Install uv
uses: astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 # v8.2.0
with:
enable-cache: true
- name: Install dependencies
# The pyright pre-commit hook runs `uv run --frozen pyright`, so the
# project environment must be present.
run: uv sync --group dev --group test
- uses: j178/prek-action@bdca6f102f98e2b4c7029491a53dfd366469e33d # v2.0.4

test:
runs-on: ${{ matrix.os }}
Expand Down
17 changes: 8 additions & 9 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,12 @@ repos:
args: ["--fix", "--show-fixes"]
- id: ruff-format

- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.17.0
- repo: local
hooks:
- id: mypy
language_version: python
exclude: tests/.*
additional_dependencies:
- types-attrs
- typing-extensions>=4.15.0
- pydantic>=2.12
- id: pyright
name: pyright
language: system
entry: uv run --frozen pyright
pass_filenames: false
always_run: true
types_or: [python, pyi]
51 changes: 15 additions & 36 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ dependencies = [
"rioxarray>=0.13.0",
"cf-xarray>=0.8.0",
"typing-extensions>=4.15.0",
"zarr-cm>=0.2.0",
"zarr-cm>=0.4.1",
"aiohttp>=3.14.0",
"s3fs>=2024.6.0",
"boto3>=1.34.0",
Expand All @@ -48,7 +48,7 @@ dependencies = [
dev = [
"pytest>=7.0.0",
"pytest-cov>=4.0.0",
"mypy>=1.0.0",
"pyright>=1.1.390",
"pre-commit>=3.0.0",
"bandit[toml]>=1.7.0",
]
Expand Down Expand Up @@ -149,40 +149,19 @@ ignore = [
"TRY003", # Long exception messages outside class - common pattern
]

[tool.mypy]
python_version = "3.12"
warn_return_any = true
warn_unused_configs = true
disallow_untyped_defs = true
disallow_incomplete_defs = true
check_untyped_defs = true
disallow_untyped_decorators = true
no_implicit_optional = true
warn_redundant_casts = true
warn_unused_ignores = true
warn_no_return = true
warn_unreachable = true
strict_equality = true
plugins = ["pydantic.mypy"]

[tool.pydantic-mypy]
init_forbid_extra = true
init_typed = true
warn_required_dynamic_aliases = true
warn_untyped_fields = true

[[tool.mypy.overrides]]
module = ["zarr.*", "xarray.*", "rioxarray.*", "cf_xarray.*", "dask.*"]
ignore_missing_imports = true

[[tool.mypy.overrides]]
module = [
"eopf_geozarr.data_api.s1",
"eopf_geozarr.data_api.s2",
"eopf_geozarr.data_api.geozarr.v2",
"eopf_geozarr.data_api.geozarr.store",
]
disable_error_code = ["valid-type"]
[tool.pyright]
include = ["src", "tests"]
pythonVersion = "3.12"
typeCheckingMode = "standard"
# Several runtime deps ship no type stubs; we can't fix their types here, so
# don't report missing stubs/sources for them. (Imports still resolve because
# the packages are installed in the environment.)
reportMissingTypeStubs = false
reportMissingModuleSource = false
# Match the strictness we relied on under mypy.
reportUnnecessaryTypeIgnoreComment = true
reportReturnType = "error"
reportUnnecessaryCast = "error"

[tool.pytest.ini_options]
minversion = "7.0"
Expand Down
4 changes: 2 additions & 2 deletions src/eopf_geozarr/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,7 @@ def format_data_vars(data_vars: dict[str, xr.DataArray]) -> str:
# Fallback to simple format if xarray HTML fails
vars_html = []
for name, var in data_vars.items():
dims_str = format_dimensions(dict(zip(var.dims, var.shape, strict=True)))
dims_str = format_dimensions(dict(zip(map(str, var.dims), var.shape, strict=True)))
dtype_str = str(var.dtype)
vars_html.append(
f"""
Expand Down Expand Up @@ -450,7 +450,7 @@ def render_node(node: xr.DataTree, path: str = "", level: int = 0) -> str:
<div class="tree-section">
<h4 class="section-title">Variables</h4>
<div class="tree-variables">
{format_data_vars(node.ds.data_vars)}
{format_data_vars({str(k): v for k, v in node.ds.data_vars.items()})}
</div>
</div>
"""
Expand Down
54 changes: 46 additions & 8 deletions src/eopf_geozarr/conversion/fs_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import json
import os
from collections.abc import Mapping
from typing import TYPE_CHECKING, Any
from typing import TYPE_CHECKING, Any, Final, Literal, cast
from urllib.parse import urlparse

import s3fs
Expand All @@ -16,6 +16,8 @@
if TYPE_CHECKING:
import xarray as xr

ZarrOpenMode = Literal["r", "r+", "a", "w", "w-"]

_MISSING = object() # sentinel for missing optional attrs


Expand Down Expand Up @@ -52,18 +54,44 @@ def replace_json_invalid_floats(obj: object) -> object:
return obj


def _sanitize_attrs(attrs: Mapping[str, object]) -> dict[str, object]:
"""Sanitize an attributes mapping, returning a typed ``dict``.

Wraps :func:`replace_json_invalid_floats` (which is typed ``object -> object``)
and verifies the dict-in/dict-out invariant at runtime instead of casting.
"""
sanitized = replace_json_invalid_floats(dict(attrs))
if not isinstance(sanitized, dict): # pragma: no cover - invariant guard
raise TypeError(f"expected a dict after sanitizing attrs, got {type(sanitized).__name__}")
return sanitized


_ZARR_MODES: Final = ("r", "r+", "a", "w", "w-")


def _zarr_mode(mode: str) -> ZarrOpenMode:
"""Validate *mode* against zarr's accepted access modes and narrow its type.

Checks the value at runtime instead of casting a bare ``str`` to the
``Literal`` zarr expects.
"""
if mode not in _ZARR_MODES:
raise ValueError(f"Invalid zarr access mode {mode!r}; expected one of {_ZARR_MODES}")
return mode


class NanCompatibleJSONEncoder(json.JSONEncoder):
"""
Custom JSON encoder that converts NaN, Inf, -Inf values to JSON-safe equivalents
to ensure valid JSON output.
"""

def encode(self, obj: Any) -> str:
def encode(self, o: Any) -> str:
"""
Encode object to JSON string, converting NaN values to "NaN".
"""

converted_obj = replace_json_invalid_floats(obj)
converted_obj = replace_json_invalid_floats(o)
return super().encode(converted_obj)


Expand All @@ -87,22 +115,22 @@ def sanitize_dataset_attributes(ds: "xr.Dataset") -> "xr.Dataset":
ds_clean = ds.copy()

# Sanitize dataset attributes
ds_clean.attrs = replace_json_invalid_floats(ds_clean.attrs)
ds_clean.attrs = _sanitize_attrs(ds_clean.attrs)

# Sanitize variable attributes
for var_name in ds_clean.data_vars:
var = ds_clean[var_name]
# Preserve _FillValue as-is — xarray encodes it via FillValueCoder on write;
# converting np.nan to the string "NaN" would break FillValueCoder.encode.
fill_value = var.attrs.get("_FillValue", _MISSING)
var.attrs = replace_json_invalid_floats(var.attrs)
var.attrs = _sanitize_attrs(var.attrs)
if fill_value is not _MISSING:
var.attrs["_FillValue"] = fill_value

# Sanitize coordinate attributes
for coord_name in ds_clean.coords:
coord = ds_clean[coord_name]
coord.attrs = replace_json_invalid_floats(coord.attrs)
coord.attrs = _sanitize_attrs(coord.attrs)

return ds_clean

Expand Down Expand Up @@ -405,7 +433,12 @@ def open_s3_zarr_group(s3_path: str, mode: str = "r", **s3_kwargs: Any) -> zarr.
Zarr group
"""
storage_options = get_s3_storage_options(s3_path, **s3_kwargs)
return zarr.open_group(s3_path, mode=mode, zarr_format=3, storage_options=storage_options)
return zarr.open_group(
s3_path,
mode=_zarr_mode(mode),
zarr_format=3,
storage_options=cast("dict[str, object]", storage_options),
)


def get_s3_credentials_info() -> S3Credentials:
Expand Down Expand Up @@ -589,4 +622,9 @@ def open_zarr_group(path: str, mode: str = "r", **kwargs: Any) -> zarr.Group:
Zarr group
"""
storage_options = get_storage_options(path, **kwargs)
return zarr.open_group(path, mode=mode, zarr_format=3, storage_options=storage_options)
return zarr.open_group(
path,
mode=_zarr_mode(mode),
zarr_format=3,
storage_options=cast("dict[str, object] | None", storage_options),
)
Loading