From be725a9f4fdf7e8de10c942b223b7e299e76e6b0 Mon Sep 17 00:00:00 2001
From: Patrick Peglar <patrick.peglar@metoffice.gov.uk>
Date: Fri, 5 Jun 2026 11:06:23 +0100
Subject: [PATCH 1/4] Slightly rationalise error code: Add tests for valid but
 unsupported encodings.

---
 .../netcdf/_bytecoding_datasets.py            | 27 +++++++++----------
 .../netcdf/test_bytecoding_datasets.py        | 26 ++++++++++++------
 2 files changed, 31 insertions(+), 22 deletions(-)
diff --git a/lib/iris/fileformats/netcdf/_bytecoding_datasets.py b/lib/iris/fileformats/netcdf/_bytecoding_datasets.py
index cab4eb9421..65c93ac47f 100644
--- a/lib/iris/fileformats/netcdf/_bytecoding_datasets.py
+++ b/lib/iris/fileformats/netcdf/_bytecoding_datasets.py
@@ -304,20 +304,19 @@ def _identify_encoding(encoding, var_name: str, writing: bool = False) -> str:
         except LookupError:
             pass
 
-        if result is not None:
-            if result not in SUPPORTED_ENCODINGS:
-                # Python "codecs" recognised it, but we don't support it.
-                result = None
-
-    if encoding is not None and result is None:
-        # Unrecognised encoding name : handle this as just a warning
-        msg = (
-            f"Ignoring unsupported encoding for netCDF variable {var_name!r}: "
-            f"_Encoding = {encoding!r}, is not recognised as one of the supported "
-            f"encodings, {SUPPORTED_ENCODINGS}."
-        )
-        warntype = IrisCfSaveWarning if writing else IrisCfLoadWarning
-        warnings.warn(msg, category=warntype)
+        if result and result not in SUPPORTED_ENCODINGS:
+            # Python "codecs" recognised it, but we don't support it.
+            result = None
+
+        if result is None:
+            # Unrecognised encoding name : handle this as just a warning
+            msg = (
+                f"Ignoring unsupported encoding for netCDF variable {var_name!r}: "
+                f"_Encoding = {encoding!r}, is not recognised as one of the supported "
+                f"encodings, {SUPPORTED_ENCODINGS}."
+            )
+            warntype = IrisCfSaveWarning if writing else IrisCfLoadWarning
+            warnings.warn(msg, category=warntype)
 
     if result is None:
         if writing:
diff --git a/lib/iris/tests/unit/fileformats/netcdf/test_bytecoding_datasets.py b/lib/iris/tests/unit/fileformats/netcdf/test_bytecoding_datasets.py
index 8432a0831f..12ea80bde4 100644
--- a/lib/iris/tests/unit/fileformats/netcdf/test_bytecoding_datasets.py
+++ b/lib/iris/tests/unit/fileformats/netcdf/test_bytecoding_datasets.py
@@ -225,13 +225,18 @@ def test_write_encoding_failure(self, tempdir, encoding):
         with pytest.raises(ValueError, match=msg):
             v[:] = samples_3_nonascii
 
-    def test_write_badencoding_ignore(self, tempdir):
-        path = tempdir / "test_bytecoded_writestrings_badencoding_ignore.nc"
-        ds = make_encoded_dataset(path, strlen=5, encoding="unknown")
+    @pytest.mark.parametrize("mode", ["invalid", "unsupported"])
+    def test_write_badencoding_ignore(self, tempdir, mode):
+        if mode == "invalid":
+            encoding = "<unknown>"
+        else:
+            encoding = "latin1"  # "latin1" is a real thing
+        path = tempdir / f"test_bytecoded_writestrings_badencoding_{encoding}_ignore.nc"
+        ds = make_encoded_dataset(path, strlen=5, encoding=encoding)
         v = ds.variables["vxs"]
         msg = (
             r"Ignoring unsupported encoding for netCDF variable 'vxs': "
-            ".*'unknown', is not recognised as one of the supported encodings"
+            f".*'{encoding}', is not recognised as one of the supported encodings"
         )
         with pytest.warns(IrisCfSaveWarning, match=msg):
             v[:] = samples_3_ascii  # will work OK
@@ -465,10 +470,15 @@ def test_read_encoding_failure(self, tempdir, readmode):
 
             assert np.all(result == test_utf8_bytes)
 
-    def test_read_badencoding_ignore(self, tempdir):
-        path = tempdir / f"test_bytecoded_read_badencoding_ignore.nc"
+    @pytest.mark.parametrize("mode", ["invalid", "unsupported"])
+    def test_read_badencoding_ignore(self, tempdir, mode):
+        if mode == "invalid":
+            encoding = "<unknown>"
+        else:
+            encoding = "latin1"  # "latin1" is a real thing
+        path = tempdir / f"test_bytecoded_read_badencoding_{encoding}_ignore.nc"
         strlen = 10
-        ds = make_encoded_dataset(path, strlen=strlen, encoding="unknown")
+        ds = make_encoded_dataset(path, strlen=strlen, encoding=encoding)
         v = ds.variables["vxs"]
         test_utf8_bytes = make_bytearray(
             samples_3_nonascii, bytewidth=strlen, encoding="utf-8"
@@ -477,7 +487,7 @@ def test_read_badencoding_ignore(self, tempdir):
 
         msg = (
             r"Ignoring unsupported encoding for netCDF variable 'vxs': "
-            ".*'unknown', is not recognised as one of the supported encodings"
+            f".*'{encoding}', is not recognised as one of the supported encodings"
         )
         with pytest.warns(IrisCfLoadWarning, match=msg):
             # raises warning but succeeds, due to default read encoding of 'utf-8'

From 35f895c719116d5f0978ae3dd239958203e9c70e Mon Sep 17 00:00:00 2001
From: Patrick Peglar <patrick.peglar@metoffice.gov.uk>
Date: Tue, 9 Jun 2026 10:52:58 +0100
Subject: [PATCH 2/4] Exercise a check for unexpected dtype itemsize.

---
 lib/iris/fileformats/netcdf/saver.py              |  4 ++--
 .../tests/integration/netcdf/test_stringdata.py   | 15 +++++++++++++++
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/lib/iris/fileformats/netcdf/saver.py b/lib/iris/fileformats/netcdf/saver.py
index 4938f481c4..1972dae567 100644
--- a/lib/iris/fileformats/netcdf/saver.py
+++ b/lib/iris/fileformats/netcdf/saver.py
@@ -1847,8 +1847,8 @@ def _create_generic_cf_array_var(
             # For numpy strings, itemsize is **always** a multiple of 4
             if string_dimension_depth % 4 != 0:
                 msg = (
-                    "Unexpected numpy string 'itemsize' for element "
-                    f"{cube_or_mesh.name()}: "
+                    "Unexpected numpy string 'dtype.itemsize' for element "
+                    f"{cube_or_mesh.name()!r}: "
                     f"'dtype.itemsize = {string_dimension_depth}, expected "
                     "a multiple of four (always)."
                 )
diff --git a/lib/iris/tests/integration/netcdf/test_stringdata.py b/lib/iris/tests/integration/netcdf/test_stringdata.py
index 925da599a6..64447983a7 100644
--- a/lib/iris/tests/integration/netcdf/test_stringdata.py
+++ b/lib/iris/tests/integration/netcdf/test_stringdata.py
@@ -680,6 +680,21 @@ def test_write_stringobjects__fail(self, tmp_path):
         with pytest.raises(ValueError, match=msg):
             iris.save(cube, filepath)
 
+    def test_write_unexpected_dtype_itemsize(self, mocker, tmp_path):
+        # Test unexpected form of numpy character data.  Not clear if this can actually
+        #  happen, but we do have a runtime test for it, so this just exercises that.
+        mock_dtype = mocker.Mock(spec=np.dtype, kind="U", itemsize=3)
+        mock_data = mocker.MagicMock(spec=np.ndarray, dtype=mock_dtype)
+        mocker.patch("numpy.asarray", return_value=mock_data)
+        cube = Cube(mock_data)
+        filepath = tmp_path / "write_unexpected_dtype_itemsize.nc"
+        msg = (
+            r"Unexpected numpy string 'dtype\.itemsize' for element 'unknown': "
+            r"'dtype\.itemsize = 3, expected a multiple of four \(always\)\."
+        )
+        with pytest.raises(ValueError, match=msg):
+            iris.save(cube, filepath)
+
 
 class TestSaveloadBadUnicodeAsBytes:
     def test_save_load_bad_unicode(self, tmp_path):

From 5e321fc63dc9f0e013c380485371aa3a83225a3e Mon Sep 17 00:00:00 2001
From: Patrick Peglar <patrick.peglar@metoffice.gov.uk>
Date: Tue, 9 Jun 2026 12:26:18 +0100
Subject: [PATCH 3/4] Rationalise handling of set_auto_chartostring in
 threadsafe/encoded netcdf wrappers.

---
 lib/iris/fileformats/cf.py                    |  7 ++---
 .../netcdf/_bytecoding_datasets.py            | 28 +++++++++----------
 .../fileformats/netcdf/_thread_safe_nc.py     | 11 ++++++--
 .../netcdf/test_bytecoding_datasets.py        | 16 +++++++++++
 4 files changed, 40 insertions(+), 22 deletions(-)

diff --git a/lib/iris/fileformats/cf.py b/lib/iris/fileformats/cf.py
index 01440450f9..a72167b7d4 100644
--- a/lib/iris/fileformats/cf.py
+++ b/lib/iris/fileformats/cf.py
@@ -1344,12 +1344,9 @@ def __init__(self, file_source, warn=False, monotonic=False):
             self._with_ugrid = False
 
         # Read the variables in the dataset only once to reduce runtime.
-        # Turn off *any* automatic decoding in the underlying netCDF4 dataset
         ds = self._dataset
-        if isinstance(ds, _thread_safe_nc.DatasetWrapper):
-            ds._contained_instance.set_auto_chartostring(False)
-        else:
-            ds.set_auto_chartostring(False)
+        # Turn off *any* automatic decoding in the underlying netCDF4 dataset.
+        ds.set_auto_chartostring(False)
         variables = self._dataset.variables
         self._translate(variables)
         self._build_cf_groups(variables)
diff --git a/lib/iris/fileformats/netcdf/_bytecoding_datasets.py b/lib/iris/fileformats/netcdf/_bytecoding_datasets.py
index 65c93ac47f..00c63ffe0a 100644
--- a/lib/iris/fileformats/netcdf/_bytecoding_datasets.py
+++ b/lib/iris/fileformats/netcdf/_bytecoding_datasets.py
@@ -327,7 +327,17 @@ def _identify_encoding(encoding, var_name: str, writing: bool = False) -> str:
     return result
 
 
-class EncodedVariable(VariableWrapper):
+class Mixin_Block_AutoChartostring:
+    # Adjusted support for "set_auto_chartostring", for all of variable/group/dataset.
+    def set_auto_chartostring(self, onoff: bool):
+        # Though the concept doesn't really apply, support the method for simplicity's
+        #  sake, but forbid turning it *on*.
+        if onoff:
+            msg = '"auto_chartostring" is not supported by Iris EncodedDataset\'s.'
+            raise TypeError(msg)
+
+
+class EncodedVariable(Mixin_Block_AutoChartostring, VariableWrapper):
     """A variable wrapper that translates variable data according to byte encodings."""
 
     def __init__(self, *args, **kwargs):
@@ -380,26 +390,18 @@ def __setitem__(self, keys, data):
         data = encoding_spec.encode_strings_as_bytearray(data)
         super().__setitem__(keys, data)
 
-    def set_auto_chartostring(self, onoff: bool):
-        msg = "auto_chartostring is not supported by Iris 'EncodedVariable' type."
-        raise TypeError(msg)
-
 
-class EncodedGroup(GroupWrapper):
+class EncodedGroup(Mixin_Block_AutoChartostring, GroupWrapper):
     """A specialised GroupWrapper whose variables are EncodedVariables."""
 
     VAR_WRAPPER_CLS = EncodedVariable
     GRP_WRAPPER_CLS: Any | None = None
 
-    def set_auto_chartostring(self, onoff: bool):
-        msg = "auto_chartostring is not supported by Iris 'EncodedGroup' type."
-        raise TypeError(msg)
-
 
 EncodedGroup.GRP_WRAPPER_CLS = EncodedGroup
 
 
-class EncodedDataset(DatasetWrapper):
+class EncodedDataset(Mixin_Block_AutoChartostring, DatasetWrapper):
     """A specialised DatasetWrapper.
 
     Its groups are EncodedGroups and variables are EncodedVariables.
@@ -408,10 +410,6 @@ class EncodedDataset(DatasetWrapper):
     VAR_WRAPPER_CLS = EncodedVariable
     GRP_WRAPPER_CLS = EncodedGroup
 
-    def set_auto_chartostring(self, onoff: bool):
-        msg = "auto_chartostring is not supported by Iris 'EncodedGroup' type."
-        raise TypeError(msg)
-
 
 class EncodedNetCDFDataProxy(NetCDFDataProxy):
     __slots__ = NetCDFDataProxy.__slots__ + ("encoding_details",)
diff --git a/lib/iris/fileformats/netcdf/_thread_safe_nc.py b/lib/iris/fileformats/netcdf/_thread_safe_nc.py
index 486ad518fc..e5d6fdd0f1 100644
--- a/lib/iris/fileformats/netcdf/_thread_safe_nc.py
+++ b/lib/iris/fileformats/netcdf/_thread_safe_nc.py
@@ -106,7 +106,14 @@ class DimensionWrapper(_ThreadSafeWrapper):
     _DUCKTYPE_CHECK_PROPERTIES = ["isunlimited"]
 
 
-class VariableWrapper(_ThreadSafeWrapper):
+class ThreadSafeWrapper_With_AutoChartostring(_ThreadSafeWrapper):
+    # A method supported by all of variables/groups/datasets.
+    def set_auto_chartostring(self, onoff: bool):
+        with _GLOBAL_NETCDF4_LOCK:
+            self._contained_instance.set_auto_chartostring(onoff)
+
+
+class VariableWrapper(ThreadSafeWrapper_With_AutoChartostring):
     """Accessor for a netCDF4.Variable, always acquiring _GLOBAL_NETCDF4_LOCK.
 
     All API calls should be identical to those for netCDF4.Variable.
@@ -150,7 +157,7 @@ def get_dims(self, *args, **kwargs) -> typing.Tuple[DimensionWrapper]:
         return tuple([DimensionWrapper.from_existing(d) for d in dimensions_])
 
 
-class GroupWrapper(_ThreadSafeWrapper):
+class GroupWrapper(ThreadSafeWrapper_With_AutoChartostring):
     """Accessor for a netCDF4.Group, always acquiring _GLOBAL_NETCDF4_LOCK.
 
     All API calls should be identical to those for netCDF4.Group.
diff --git a/lib/iris/tests/unit/fileformats/netcdf/test_bytecoding_datasets.py b/lib/iris/tests/unit/fileformats/netcdf/test_bytecoding_datasets.py
index 12ea80bde4..a3137612a1 100644
--- a/lib/iris/tests/unit/fileformats/netcdf/test_bytecoding_datasets.py
+++ b/lib/iris/tests/unit/fileformats/netcdf/test_bytecoding_datasets.py
@@ -541,3 +541,19 @@ def test_dataset_nonencoded_types(self, samplefile_path, classtype):
 
         finally:
             ds.close()
+
+    @pytest.mark.parametrize("is_on", [True, False], ids=["c2sOn", "c2sOff"])
+    @pytest.mark.parametrize("component_type", ["ds", "var", "group"])
+    def test_auto_chartostring(self, samplefile_path, classtype, component_type, is_on):
+        ds = self.dataset_class(samplefile_path)
+        var = ds.variables["vx"]
+        grp = ds.groups["grp_a"]
+        component = {"ds": ds, "var": var, "group": grp}[component_type]
+        if classtype == "encoded" and is_on:
+            # In this case cannot turn "on": expect error
+            msg = '"auto_chartostring" is not supported by Iris EncodedDataset'
+            with pytest.raises(TypeError, match=msg):
+                component.set_auto_chartostring(is_on)
+        else:
+            # Just check method exists +  doesn't error.
+            component.set_auto_chartostring(is_on)

From 2916ac99ade80432d85e9ef756ebc87c362e7651 Mon Sep 17 00:00:00 2001
From: Patrick Peglar <patrick.peglar@metoffice.gov.uk>
Date: Tue, 9 Jun 2026 17:53:19 +0100
Subject: [PATCH 4/4] Tiny test fixes.

---
 lib/iris/tests/integration/netcdf/test_stringdata.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/iris/tests/integration/netcdf/test_stringdata.py b/lib/iris/tests/integration/netcdf/test_stringdata.py
index 64447983a7..b6aa6dfe3d 100644
--- a/lib/iris/tests/integration/netcdf/test_stringdata.py
+++ b/lib/iris/tests/integration/netcdf/test_stringdata.py
@@ -600,7 +600,7 @@ def test_read_no_encoding(self, tmp_path, data_encoding):
         # Check that we can read UTF-8 encoded data, even with no _Encoding attribute.
         # This is a common case in the wild, and now accepted by CF as a default.
         # However, other encodings will FAIL to decode.
-        filepath = tmp_path / "utf8_no_encoding.nc"
+        filepath = tmp_path / f"read_{data_encoding}_no_encoding.nc"
         testdata = make_testfile(
             testfile_path=filepath,
             encoding_str=data_encoding,
@@ -618,7 +618,7 @@ def test_read_no_encoding(self, tmp_path, data_encoding):
                 cube.data
 
     def test_read_wrong_encoding__fail(self, tmp_path):
-        filepath = tmp_path / "missing_encoding.nc"
+        filepath = tmp_path / "read_wrong_encoding.nc"
         testdata = make_testfile(
             testfile_path=filepath,
             encoding_str="utf-16",