Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 2 additions & 5 deletions lib/iris/fileformats/cf.py
Original file line number Diff line number Diff line change
Expand Up @@ -1344,12 +1344,9 @@ def __init__(self, file_source, warn=False, monotonic=False):
self._with_ugrid = False

# Read the variables in the dataset only once to reduce runtime.
# Turn off *any* automatic decoding in the underlying netCDF4 dataset
ds = self._dataset
if isinstance(ds, _thread_safe_nc.DatasetWrapper):
ds._contained_instance.set_auto_chartostring(False)
else:
ds.set_auto_chartostring(False)
# Turn off *any* automatic decoding in the underlying netCDF4 dataset.
ds.set_auto_chartostring(False)
variables = self._dataset.variables
self._translate(variables)
self._build_cf_groups(variables)
Expand Down
55 changes: 26 additions & 29 deletions lib/iris/fileformats/netcdf/_bytecoding_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,20 +304,19 @@ def _identify_encoding(encoding, var_name: str, writing: bool = False) -> str:
except LookupError:
pass

if result is not None:
if result not in SUPPORTED_ENCODINGS:
# Python "codecs" recognised it, but we don't support it.
result = None

if encoding is not None and result is None:
# Unrecognised encoding name : handle this as just a warning
msg = (
f"Ignoring unsupported encoding for netCDF variable {var_name!r}: "
f"_Encoding = {encoding!r}, is not recognised as one of the supported "
f"encodings, {SUPPORTED_ENCODINGS}."
)
warntype = IrisCfSaveWarning if writing else IrisCfLoadWarning
warnings.warn(msg, category=warntype)
if result and result not in SUPPORTED_ENCODINGS:
# Python "codecs" recognised it, but we don't support it.
result = None

if result is None:
# Unrecognised encoding name : handle this as just a warning
msg = (
f"Ignoring unsupported encoding for netCDF variable {var_name!r}: "
f"_Encoding = {encoding!r}, is not recognised as one of the supported "
f"encodings, {SUPPORTED_ENCODINGS}."
)
warntype = IrisCfSaveWarning if writing else IrisCfLoadWarning
warnings.warn(msg, category=warntype)

if result is None:
if writing:
Expand All @@ -328,7 +327,17 @@ def _identify_encoding(encoding, var_name: str, writing: bool = False) -> str:
return result


class EncodedVariable(VariableWrapper):
class Mixin_Block_AutoChartostring:
# Adjusted support for "set_auto_chartostring", for all of variable/group/dataset.
def set_auto_chartostring(self, onoff: bool):
# Though the concept doesn't really apply, support the method for simplicity's
# sake, but forbid turning it *on*.
if onoff:
msg = '"auto_chartostring" is not supported by Iris EncodedDataset\'s.'
raise TypeError(msg)


class EncodedVariable(Mixin_Block_AutoChartostring, VariableWrapper):
"""A variable wrapper that translates variable data according to byte encodings."""

def __init__(self, *args, **kwargs):
Expand Down Expand Up @@ -381,26 +390,18 @@ def __setitem__(self, keys, data):
data = encoding_spec.encode_strings_as_bytearray(data)
super().__setitem__(keys, data)

def set_auto_chartostring(self, onoff: bool):
msg = "auto_chartostring is not supported by Iris 'EncodedVariable' type."
raise TypeError(msg)


class EncodedGroup(GroupWrapper):
class EncodedGroup(Mixin_Block_AutoChartostring, GroupWrapper):
"""A specialised GroupWrapper whose variables are EncodedVariables."""

VAR_WRAPPER_CLS = EncodedVariable
GRP_WRAPPER_CLS: Any | None = None

def set_auto_chartostring(self, onoff: bool):
msg = "auto_chartostring is not supported by Iris 'EncodedGroup' type."
raise TypeError(msg)


EncodedGroup.GRP_WRAPPER_CLS = EncodedGroup


class EncodedDataset(DatasetWrapper):
class EncodedDataset(Mixin_Block_AutoChartostring, DatasetWrapper):
"""A specialised DatasetWrapper.

Its groups are EncodedGroups and variables are EncodedVariables.
Expand All @@ -409,10 +410,6 @@ class EncodedDataset(DatasetWrapper):
VAR_WRAPPER_CLS = EncodedVariable
GRP_WRAPPER_CLS = EncodedGroup

def set_auto_chartostring(self, onoff: bool):
msg = "auto_chartostring is not supported by Iris 'EncodedGroup' type."
raise TypeError(msg)


class EncodedNetCDFDataProxy(NetCDFDataProxy):
__slots__ = NetCDFDataProxy.__slots__ + ("encoding_details",)
Expand Down
11 changes: 9 additions & 2 deletions lib/iris/fileformats/netcdf/_thread_safe_nc.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,14 @@ class DimensionWrapper(_ThreadSafeWrapper):
_DUCKTYPE_CHECK_PROPERTIES = ["isunlimited"]


class VariableWrapper(_ThreadSafeWrapper):
class ThreadSafeWrapper_With_AutoChartostring(_ThreadSafeWrapper):
# A method supported by all of variables/groups/datasets.
def set_auto_chartostring(self, onoff: bool):
with _GLOBAL_NETCDF4_LOCK:
self._contained_instance.set_auto_chartostring(onoff)


class VariableWrapper(ThreadSafeWrapper_With_AutoChartostring):
"""Accessor for a netCDF4.Variable, always acquiring _GLOBAL_NETCDF4_LOCK.

All API calls should be identical to those for netCDF4.Variable.
Expand Down Expand Up @@ -150,7 +157,7 @@ def get_dims(self, *args, **kwargs) -> typing.Tuple[DimensionWrapper]:
return tuple([DimensionWrapper.from_existing(d) for d in dimensions_])


class GroupWrapper(_ThreadSafeWrapper):
class GroupWrapper(ThreadSafeWrapper_With_AutoChartostring):
"""Accessor for a netCDF4.Group, always acquiring _GLOBAL_NETCDF4_LOCK.

All API calls should be identical to those for netCDF4.Group.
Expand Down
4 changes: 2 additions & 2 deletions lib/iris/fileformats/netcdf/saver.py
Original file line number Diff line number Diff line change
Expand Up @@ -1847,8 +1847,8 @@ def _create_generic_cf_array_var(
# For numpy strings, itemsize is **always** a multiple of 4
if string_dimension_depth % 4 != 0:
msg = (
"Unexpected numpy string 'itemsize' for element "
f"{cube_or_mesh.name()}: "
"Unexpected numpy string 'dtype.itemsize' for element "
f"{cube_or_mesh.name()!r}: "
f"'dtype.itemsize = {string_dimension_depth}, expected "
"a multiple of four (always)."
)
Expand Down
19 changes: 17 additions & 2 deletions lib/iris/tests/integration/netcdf/test_stringdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -600,7 +600,7 @@ def test_read_no_encoding(self, tmp_path, data_encoding):
# Check that we can read UTF-8 encoded data, even with no _Encoding attribute.
# This is a common case in the wild, and now accepted by CF as a default.
# However, other encodings will FAIL to decode.
filepath = tmp_path / "utf8_no_encoding.nc"
filepath = tmp_path / f"read_{data_encoding}_no_encoding.nc"
testdata = make_testfile(
testfile_path=filepath,
encoding_str=data_encoding,
Expand All @@ -618,7 +618,7 @@ def test_read_no_encoding(self, tmp_path, data_encoding):
cube.data

def test_read_wrong_encoding__fail(self, tmp_path):
filepath = tmp_path / "missing_encoding.nc"
filepath = tmp_path / "read_wrong_encoding.nc"
testdata = make_testfile(
testfile_path=filepath,
encoding_str="utf-16",
Expand Down Expand Up @@ -680,6 +680,21 @@ def test_write_stringobjects__fail(self, tmp_path):
with pytest.raises(ValueError, match=msg):
iris.save(cube, filepath)

def test_write_unexpected_dtype_itemsize(self, mocker, tmp_path):
# Test unexpected form of numpy character data. Not clear if this can actually
# happen, but we do have a runtime test for it, so this just exercises that.
mock_dtype = mocker.Mock(spec=np.dtype, kind="U", itemsize=3)
mock_data = mocker.MagicMock(spec=np.ndarray, dtype=mock_dtype)
mocker.patch("numpy.asarray", return_value=mock_data)
cube = Cube(mock_data)
filepath = tmp_path / "write_unexpected_dtype_itemsize.nc"
msg = (
r"Unexpected numpy string 'dtype\.itemsize' for element 'unknown': "
r"'dtype\.itemsize = 3, expected a multiple of four \(always\)\."
)
with pytest.raises(ValueError, match=msg):
iris.save(cube, filepath)


class TestSaveloadBadUnicodeAsBytes:
def test_save_load_bad_unicode(self, tmp_path):
Expand Down
42 changes: 34 additions & 8 deletions lib/iris/tests/unit/fileformats/netcdf/test_bytecoding_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,13 +225,18 @@ def test_write_encoding_failure(self, tempdir, encoding):
with pytest.raises(ValueError, match=msg):
v[:] = samples_3_nonascii

def test_write_badencoding_ignore(self, tempdir):
path = tempdir / "test_bytecoded_writestrings_badencoding_ignore.nc"
ds = make_encoded_dataset(path, strlen=5, encoding="unknown")
@pytest.mark.parametrize("mode", ["invalid", "unsupported"])
def test_write_badencoding_ignore(self, tempdir, mode):
if mode == "invalid":
encoding = "<unknown>"
else:
encoding = "latin1" # "latin1" is a real thing
path = tempdir / f"test_bytecoded_writestrings_badencoding_{encoding}_ignore.nc"
ds = make_encoded_dataset(path, strlen=5, encoding=encoding)
v = ds.variables["vxs"]
msg = (
r"Ignoring unsupported encoding for netCDF variable 'vxs': "
".*'unknown', is not recognised as one of the supported encodings"
f".*'{encoding}', is not recognised as one of the supported encodings"
)
with pytest.warns(IrisCfSaveWarning, match=msg):
v[:] = samples_3_ascii # will work OK
Expand Down Expand Up @@ -465,10 +470,15 @@ def test_read_encoding_failure(self, tempdir, readmode):

assert np.all(result == test_utf8_bytes)

def test_read_badencoding_ignore(self, tempdir):
path = tempdir / f"test_bytecoded_read_badencoding_ignore.nc"
@pytest.mark.parametrize("mode", ["invalid", "unsupported"])
def test_read_badencoding_ignore(self, tempdir, mode):
if mode == "invalid":
encoding = "<unknown>"
else:
encoding = "latin1" # "latin1" is a real thing
path = tempdir / f"test_bytecoded_read_badencoding_{encoding}_ignore.nc"
strlen = 10
ds = make_encoded_dataset(path, strlen=strlen, encoding="unknown")
ds = make_encoded_dataset(path, strlen=strlen, encoding=encoding)
v = ds.variables["vxs"]
test_utf8_bytes = make_bytearray(
samples_3_nonascii, bytewidth=strlen, encoding="utf-8"
Expand All @@ -477,7 +487,7 @@ def test_read_badencoding_ignore(self, tempdir):

msg = (
r"Ignoring unsupported encoding for netCDF variable 'vxs': "
".*'unknown', is not recognised as one of the supported encodings"
f".*'{encoding}', is not recognised as one of the supported encodings"
)
with pytest.warns(IrisCfLoadWarning, match=msg):
# raises warning but succeeds, due to default read encoding of 'utf-8'
Expand Down Expand Up @@ -531,3 +541,19 @@ def test_dataset_nonencoded_types(self, samplefile_path, classtype):

finally:
ds.close()

@pytest.mark.parametrize("is_on", [True, False], ids=["c2sOn", "c2sOff"])
@pytest.mark.parametrize("component_type", ["ds", "var", "group"])
def test_auto_chartostring(self, samplefile_path, classtype, component_type, is_on):
ds = self.dataset_class(samplefile_path)
var = ds.variables["vx"]
grp = ds.groups["grp_a"]
component = {"ds": ds, "var": var, "group": grp}[component_type]
if classtype == "encoded" and is_on:
# In this case cannot turn "on": expect error
msg = '"auto_chartostring" is not supported by Iris EncodedDataset'
with pytest.raises(TypeError, match=msg):
component.set_auto_chartostring(is_on)
else:
# Just check method exists + doesn't error.
component.set_auto_chartostring(is_on)
Loading