From be725a9f4fdf7e8de10c942b223b7e299e76e6b0 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Fri, 5 Jun 2026 11:06:23 +0100 Subject: [PATCH 1/4] Slightly rationalise error code: Add tests for valid but unsupported encodings. --- .../netcdf/_bytecoding_datasets.py | 27 +++++++++---------- .../netcdf/test_bytecoding_datasets.py | 26 ++++++++++++------ 2 files changed, 31 insertions(+), 22 deletions(-) diff --git a/lib/iris/fileformats/netcdf/_bytecoding_datasets.py b/lib/iris/fileformats/netcdf/_bytecoding_datasets.py index cab4eb9421..65c93ac47f 100644 --- a/lib/iris/fileformats/netcdf/_bytecoding_datasets.py +++ b/lib/iris/fileformats/netcdf/_bytecoding_datasets.py @@ -304,20 +304,19 @@ def _identify_encoding(encoding, var_name: str, writing: bool = False) -> str: except LookupError: pass - if result is not None: - if result not in SUPPORTED_ENCODINGS: - # Python "codecs" recognised it, but we don't support it. - result = None - - if encoding is not None and result is None: - # Unrecognised encoding name : handle this as just a warning - msg = ( - f"Ignoring unsupported encoding for netCDF variable {var_name!r}: " - f"_Encoding = {encoding!r}, is not recognised as one of the supported " - f"encodings, {SUPPORTED_ENCODINGS}." - ) - warntype = IrisCfSaveWarning if writing else IrisCfLoadWarning - warnings.warn(msg, category=warntype) + if result and result not in SUPPORTED_ENCODINGS: + # Python "codecs" recognised it, but we don't support it. + result = None + + if result is None: + # Unrecognised encoding name : handle this as just a warning + msg = ( + f"Ignoring unsupported encoding for netCDF variable {var_name!r}: " + f"_Encoding = {encoding!r}, is not recognised as one of the supported " + f"encodings, {SUPPORTED_ENCODINGS}." + ) + warntype = IrisCfSaveWarning if writing else IrisCfLoadWarning + warnings.warn(msg, category=warntype) if result is None: if writing: diff --git a/lib/iris/tests/unit/fileformats/netcdf/test_bytecoding_datasets.py b/lib/iris/tests/unit/fileformats/netcdf/test_bytecoding_datasets.py index 8432a0831f..12ea80bde4 100644 --- a/lib/iris/tests/unit/fileformats/netcdf/test_bytecoding_datasets.py +++ b/lib/iris/tests/unit/fileformats/netcdf/test_bytecoding_datasets.py @@ -225,13 +225,18 @@ def test_write_encoding_failure(self, tempdir, encoding): with pytest.raises(ValueError, match=msg): v[:] = samples_3_nonascii - def test_write_badencoding_ignore(self, tempdir): - path = tempdir / "test_bytecoded_writestrings_badencoding_ignore.nc" - ds = make_encoded_dataset(path, strlen=5, encoding="unknown") + @pytest.mark.parametrize("mode", ["invalid", "unsupported"]) + def test_write_badencoding_ignore(self, tempdir, mode): + if mode == "invalid": + encoding = "" + else: + encoding = "latin1" # "latin1" is a real thing + path = tempdir / f"test_bytecoded_writestrings_badencoding_{encoding}_ignore.nc" + ds = make_encoded_dataset(path, strlen=5, encoding=encoding) v = ds.variables["vxs"] msg = ( r"Ignoring unsupported encoding for netCDF variable 'vxs': " - ".*'unknown', is not recognised as one of the supported encodings" + f".*'{encoding}', is not recognised as one of the supported encodings" ) with pytest.warns(IrisCfSaveWarning, match=msg): v[:] = samples_3_ascii # will work OK @@ -465,10 +470,15 @@ def test_read_encoding_failure(self, tempdir, readmode): assert np.all(result == test_utf8_bytes) - def test_read_badencoding_ignore(self, tempdir): - path = tempdir / f"test_bytecoded_read_badencoding_ignore.nc" + @pytest.mark.parametrize("mode", ["invalid", "unsupported"]) + def test_read_badencoding_ignore(self, tempdir, mode): + if mode == "invalid": + encoding = "" + else: + encoding = "latin1" # "latin1" is a real thing + path = tempdir / f"test_bytecoded_read_badencoding_{encoding}_ignore.nc" strlen = 10 - ds = make_encoded_dataset(path, strlen=strlen, encoding="unknown") + ds = make_encoded_dataset(path, strlen=strlen, encoding=encoding) v = ds.variables["vxs"] test_utf8_bytes = make_bytearray( samples_3_nonascii, bytewidth=strlen, encoding="utf-8" @@ -477,7 +487,7 @@ def test_read_badencoding_ignore(self, tempdir): msg = ( r"Ignoring unsupported encoding for netCDF variable 'vxs': " - ".*'unknown', is not recognised as one of the supported encodings" + f".*'{encoding}', is not recognised as one of the supported encodings" ) with pytest.warns(IrisCfLoadWarning, match=msg): # raises warning but succeeds, due to default read encoding of 'utf-8' From 35f895c719116d5f0978ae3dd239958203e9c70e Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Tue, 9 Jun 2026 10:52:58 +0100 Subject: [PATCH 2/4] Exercise a check for unexpected dtype itemsize. --- lib/iris/fileformats/netcdf/saver.py | 4 ++-- .../tests/integration/netcdf/test_stringdata.py | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/lib/iris/fileformats/netcdf/saver.py b/lib/iris/fileformats/netcdf/saver.py index 4938f481c4..1972dae567 100644 --- a/lib/iris/fileformats/netcdf/saver.py +++ b/lib/iris/fileformats/netcdf/saver.py @@ -1847,8 +1847,8 @@ def _create_generic_cf_array_var( # For numpy strings, itemsize is **always** a multiple of 4 if string_dimension_depth % 4 != 0: msg = ( - "Unexpected numpy string 'itemsize' for element " - f"{cube_or_mesh.name()}: " + "Unexpected numpy string 'dtype.itemsize' for element " + f"{cube_or_mesh.name()!r}: " f"'dtype.itemsize = {string_dimension_depth}, expected " "a multiple of four (always)." ) diff --git a/lib/iris/tests/integration/netcdf/test_stringdata.py b/lib/iris/tests/integration/netcdf/test_stringdata.py index 925da599a6..64447983a7 100644 --- a/lib/iris/tests/integration/netcdf/test_stringdata.py +++ b/lib/iris/tests/integration/netcdf/test_stringdata.py @@ -680,6 +680,21 @@ def test_write_stringobjects__fail(self, tmp_path): with pytest.raises(ValueError, match=msg): iris.save(cube, filepath) + def test_write_unexpected_dtype_itemsize(self, mocker, tmp_path): + # Test unexpected form of numpy character data. Not clear if this can actually + # happen, but we do have a runtime test for it, so this just exercises that. + mock_dtype = mocker.Mock(spec=np.dtype, kind="U", itemsize=3) + mock_data = mocker.MagicMock(spec=np.ndarray, dtype=mock_dtype) + mocker.patch("numpy.asarray", return_value=mock_data) + cube = Cube(mock_data) + filepath = tmp_path / "write_unexpected_dtype_itemsize.nc" + msg = ( + r"Unexpected numpy string 'dtype\.itemsize' for element 'unknown': " + r"'dtype\.itemsize = 3, expected a multiple of four \(always\)\." + ) + with pytest.raises(ValueError, match=msg): + iris.save(cube, filepath) + class TestSaveloadBadUnicodeAsBytes: def test_save_load_bad_unicode(self, tmp_path): From 5e321fc63dc9f0e013c380485371aa3a83225a3e Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Tue, 9 Jun 2026 12:26:18 +0100 Subject: [PATCH 3/4] Rationalise handling of set_auto_chartostring in threadsafe/encoded netcdf wrappers. --- lib/iris/fileformats/cf.py | 7 ++--- .../netcdf/_bytecoding_datasets.py | 28 +++++++++---------- .../fileformats/netcdf/_thread_safe_nc.py | 11 ++++++-- .../netcdf/test_bytecoding_datasets.py | 16 +++++++++++ 4 files changed, 40 insertions(+), 22 deletions(-) diff --git a/lib/iris/fileformats/cf.py b/lib/iris/fileformats/cf.py index 01440450f9..a72167b7d4 100644 --- a/lib/iris/fileformats/cf.py +++ b/lib/iris/fileformats/cf.py @@ -1344,12 +1344,9 @@ def __init__(self, file_source, warn=False, monotonic=False): self._with_ugrid = False # Read the variables in the dataset only once to reduce runtime. - # Turn off *any* automatic decoding in the underlying netCDF4 dataset ds = self._dataset - if isinstance(ds, _thread_safe_nc.DatasetWrapper): - ds._contained_instance.set_auto_chartostring(False) - else: - ds.set_auto_chartostring(False) + # Turn off *any* automatic decoding in the underlying netCDF4 dataset. + ds.set_auto_chartostring(False) variables = self._dataset.variables self._translate(variables) self._build_cf_groups(variables) diff --git a/lib/iris/fileformats/netcdf/_bytecoding_datasets.py b/lib/iris/fileformats/netcdf/_bytecoding_datasets.py index 65c93ac47f..00c63ffe0a 100644 --- a/lib/iris/fileformats/netcdf/_bytecoding_datasets.py +++ b/lib/iris/fileformats/netcdf/_bytecoding_datasets.py @@ -327,7 +327,17 @@ def _identify_encoding(encoding, var_name: str, writing: bool = False) -> str: return result -class EncodedVariable(VariableWrapper): +class Mixin_Block_AutoChartostring: + # Adjusted support for "set_auto_chartostring", for all of variable/group/dataset. + def set_auto_chartostring(self, onoff: bool): + # Though the concept doesn't really apply, support the method for simplicity's + # sake, but forbid turning it *on*. + if onoff: + msg = '"auto_chartostring" is not supported by Iris EncodedDataset\'s.' + raise TypeError(msg) + + +class EncodedVariable(Mixin_Block_AutoChartostring, VariableWrapper): """A variable wrapper that translates variable data according to byte encodings.""" def __init__(self, *args, **kwargs): @@ -380,26 +390,18 @@ def __setitem__(self, keys, data): data = encoding_spec.encode_strings_as_bytearray(data) super().__setitem__(keys, data) - def set_auto_chartostring(self, onoff: bool): - msg = "auto_chartostring is not supported by Iris 'EncodedVariable' type." - raise TypeError(msg) - -class EncodedGroup(GroupWrapper): +class EncodedGroup(Mixin_Block_AutoChartostring, GroupWrapper): """A specialised GroupWrapper whose variables are EncodedVariables.""" VAR_WRAPPER_CLS = EncodedVariable GRP_WRAPPER_CLS: Any | None = None - def set_auto_chartostring(self, onoff: bool): - msg = "auto_chartostring is not supported by Iris 'EncodedGroup' type." - raise TypeError(msg) - EncodedGroup.GRP_WRAPPER_CLS = EncodedGroup -class EncodedDataset(DatasetWrapper): +class EncodedDataset(Mixin_Block_AutoChartostring, DatasetWrapper): """A specialised DatasetWrapper. Its groups are EncodedGroups and variables are EncodedVariables. @@ -408,10 +410,6 @@ class EncodedDataset(DatasetWrapper): VAR_WRAPPER_CLS = EncodedVariable GRP_WRAPPER_CLS = EncodedGroup - def set_auto_chartostring(self, onoff: bool): - msg = "auto_chartostring is not supported by Iris 'EncodedGroup' type." - raise TypeError(msg) - class EncodedNetCDFDataProxy(NetCDFDataProxy): __slots__ = NetCDFDataProxy.__slots__ + ("encoding_details",) diff --git a/lib/iris/fileformats/netcdf/_thread_safe_nc.py b/lib/iris/fileformats/netcdf/_thread_safe_nc.py index 486ad518fc..e5d6fdd0f1 100644 --- a/lib/iris/fileformats/netcdf/_thread_safe_nc.py +++ b/lib/iris/fileformats/netcdf/_thread_safe_nc.py @@ -106,7 +106,14 @@ class DimensionWrapper(_ThreadSafeWrapper): _DUCKTYPE_CHECK_PROPERTIES = ["isunlimited"] -class VariableWrapper(_ThreadSafeWrapper): +class ThreadSafeWrapper_With_AutoChartostring(_ThreadSafeWrapper): + # A method supported by all of variables/groups/datasets. + def set_auto_chartostring(self, onoff: bool): + with _GLOBAL_NETCDF4_LOCK: + self._contained_instance.set_auto_chartostring(onoff) + + +class VariableWrapper(ThreadSafeWrapper_With_AutoChartostring): """Accessor for a netCDF4.Variable, always acquiring _GLOBAL_NETCDF4_LOCK. All API calls should be identical to those for netCDF4.Variable. @@ -150,7 +157,7 @@ def get_dims(self, *args, **kwargs) -> typing.Tuple[DimensionWrapper]: return tuple([DimensionWrapper.from_existing(d) for d in dimensions_]) -class GroupWrapper(_ThreadSafeWrapper): +class GroupWrapper(ThreadSafeWrapper_With_AutoChartostring): """Accessor for a netCDF4.Group, always acquiring _GLOBAL_NETCDF4_LOCK. All API calls should be identical to those for netCDF4.Group. diff --git a/lib/iris/tests/unit/fileformats/netcdf/test_bytecoding_datasets.py b/lib/iris/tests/unit/fileformats/netcdf/test_bytecoding_datasets.py index 12ea80bde4..a3137612a1 100644 --- a/lib/iris/tests/unit/fileformats/netcdf/test_bytecoding_datasets.py +++ b/lib/iris/tests/unit/fileformats/netcdf/test_bytecoding_datasets.py @@ -541,3 +541,19 @@ def test_dataset_nonencoded_types(self, samplefile_path, classtype): finally: ds.close() + + @pytest.mark.parametrize("is_on", [True, False], ids=["c2sOn", "c2sOff"]) + @pytest.mark.parametrize("component_type", ["ds", "var", "group"]) + def test_auto_chartostring(self, samplefile_path, classtype, component_type, is_on): + ds = self.dataset_class(samplefile_path) + var = ds.variables["vx"] + grp = ds.groups["grp_a"] + component = {"ds": ds, "var": var, "group": grp}[component_type] + if classtype == "encoded" and is_on: + # In this case cannot turn "on": expect error + msg = '"auto_chartostring" is not supported by Iris EncodedDataset' + with pytest.raises(TypeError, match=msg): + component.set_auto_chartostring(is_on) + else: + # Just check method exists + doesn't error. + component.set_auto_chartostring(is_on) From 2916ac99ade80432d85e9ef756ebc87c362e7651 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Tue, 9 Jun 2026 17:53:19 +0100 Subject: [PATCH 4/4] Tiny test fixes. --- lib/iris/tests/integration/netcdf/test_stringdata.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/iris/tests/integration/netcdf/test_stringdata.py b/lib/iris/tests/integration/netcdf/test_stringdata.py index 64447983a7..b6aa6dfe3d 100644 --- a/lib/iris/tests/integration/netcdf/test_stringdata.py +++ b/lib/iris/tests/integration/netcdf/test_stringdata.py @@ -600,7 +600,7 @@ def test_read_no_encoding(self, tmp_path, data_encoding): # Check that we can read UTF-8 encoded data, even with no _Encoding attribute. # This is a common case in the wild, and now accepted by CF as a default. # However, other encodings will FAIL to decode. - filepath = tmp_path / "utf8_no_encoding.nc" + filepath = tmp_path / f"read_{data_encoding}_no_encoding.nc" testdata = make_testfile( testfile_path=filepath, encoding_str=data_encoding, @@ -618,7 +618,7 @@ def test_read_no_encoding(self, tmp_path, data_encoding): cube.data def test_read_wrong_encoding__fail(self, tmp_path): - filepath = tmp_path / "missing_encoding.nc" + filepath = tmp_path / "read_wrong_encoding.nc" testdata = make_testfile( testfile_path=filepath, encoding_str="utf-16",