pydata · huard · Sep 2, 2025 · Sep 2, 2025 · Sep 2, 2025 · Sep 3, 2025
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -163,6 +163,8 @@ New Features
 - ``compute=False`` is now supported by :py:meth:`DataTree.to_netcdf` and
   :py:meth:`DataTree.to_zarr`.
   By `Stephan Hoyer <https://github.com/shoyer>`_.
+- The ``h5netcdf`` engine has support for pseudo ``NETCDF4_CLASSIC`` files, meaning variables and attributes are cast to supported types. Note that the saved files won't be recognized as genuine ``NETCDF4_CLASSIC`` files until ``h5netcdf`` adds support.   (:issue:`10676`, :pull:`10686`).
+  By `David Huard <https://github.com/huard>`_.
 - ``open_dataset`` will now correctly infer a path ending in ``.zarr/`` as zarr
   By `Ian Hunt-Isaak <https://github.com/ianhi>`_.
 

diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py
@@ -7,6 +7,7 @@
 from typing import TYPE_CHECKING, Any, Self
 
 import numpy as np
+from packaging.version import Version
 
 from xarray.backends.common import (
     BACKEND_ENTRYPOINTS,
@@ -27,6 +28,7 @@
     PickleableFileManager,
 )
 from xarray.backends.locks import HDF5_LOCK, combine_locks, ensure_lock, get_write_lock
+from xarray.backends.netcdf3 import encode_nc3_attr_value, encode_nc3_variable
 from xarray.backends.netCDF4_ import (
     BaseNetCDF4Array,
     _build_and_get_enum,
@@ -124,6 +126,7 @@ def __init__(
         manager: FileManager | h5netcdf.File | h5netcdf.Group,
         group=None,
         mode=None,
+        format="NETCDF4",
         lock=HDF5_LOCK,
         autoclose=False,
     ):
@@ -143,7 +146,7 @@ def __init__(
         self._manager = manager
         self._group = group
         self._mode = mode
-        self.format = None
+        self.format = format or "NETCDF4"
         # todo: utilizing find_root_and_group seems a bit clunky
         #  making filename available on h5netcdf.Group seems better
         self._filename = find_root_and_group(self.ds)[0].filename
@@ -152,6 +155,9 @@ def __init__(
         self.autoclose = autoclose
 
     def get_child_store(self, group: str) -> Self:
+        if self.format == "NETCDF4_CLASSIC":
+            raise ValueError("Cannot create sub-groups in `NETCDF4_CLASSIC` format.")
+
         if self._group is not None:
             group = os.path.join(self._group, group)
         return type(self)(
@@ -167,7 +173,7 @@ def open(
         cls,
         filename,
         mode="r",
-        format=None,
+        format="NETCDF4",
         group=None,
         lock=None,
         autoclose=False,
@@ -198,8 +204,8 @@ def open(
                     f"{magic_number!r} is not the signature of a valid netCDF4 file"
                 )
 
-        if format not in [None, "NETCDF4"]:
-            raise ValueError("invalid format for h5netcdf backend")
+        if format not in [None, "NETCDF4", "NETCDF4_CLASSIC"]:
+            raise ValueError(f"invalid format for h5netcdf backend: {format}")
 
         kwargs = {
             "invalid_netcdf": invalid_netcdf,
@@ -210,6 +216,8 @@ def open(
             kwargs.update(driver_kwds)
         if phony_dims is not None:
             kwargs["phony_dims"] = phony_dims
+        if format is not None and Version(h5netcdf.__version__) > Version("1.6.4"):
+            kwargs["format"] = format
 
         if lock is None:
             if mode == "r":
@@ -223,7 +231,15 @@ def open(
             else PickleableFileManager
         )
         manager = manager_cls(h5netcdf.File, filename, mode=mode, kwargs=kwargs)
-        return cls(manager, group=group, mode=mode, lock=lock, autoclose=autoclose)
+
+        return cls(
+            manager,
+            group=group,
+            format=format,
+            mode=mode,
+            lock=lock,
+            autoclose=autoclose,
+        )
 
     def _acquire(self, needs_lock=True):
         with self._manager.acquire_context(needs_lock) as root:
@@ -319,11 +335,27 @@ def set_dimension(self, name, length, is_unlimited=False):
         else:
             self.ds.dimensions[name] = length
 
+    def convert_string(self, value):
+        """If format is NETCDF4_CLASSIC, convert strings to fixed width char
+        arrays to ensure they can be read by legacy software.
+
+        CLASSIC attributes are read by third party software as fixed width char arrays
+        """
+        if self.format == "NETCDF4_CLASSIC":
+            value = encode_nc3_attr_value(value)
+            if isinstance(value, bytes):
+                value = np.bytes_(value)
+        return value
+
     def set_attribute(self, key, value):
+        value = self.convert_string(value)
         self.ds.attrs[key] = value
 
     def encode_variable(self, variable, name=None):
-        return _encode_nc4_variable(variable, name=name)
+        if self.format == "NETCDF4_CLASSIC":
+            return encode_nc3_variable(variable, name=name)
+        else:
+            return _encode_nc4_variable(variable, name=name)
 
     def prepare_variable(
         self, name, variable, check_encoding=False, unlimited_dims=None
@@ -332,7 +364,9 @@ def prepare_variable(
 
         _ensure_no_forward_slash_in_name(name)
         attrs = variable.attrs.copy()
-        dtype = _get_datatype(variable, raise_on_invalid_encoding=check_encoding)
+        dtype = _get_datatype(
+            variable, nc_format=self.format, raise_on_invalid_encoding=check_encoding
+        )
 
         fillvalue = attrs.pop("_FillValue", None)
 
@@ -394,7 +428,7 @@ def prepare_variable(
             nc4_var = self.ds[name]
 
         for k, v in attrs.items():
-            nc4_var.attrs[k] = v
+            nc4_var.attrs[k] = self.convert_string(v)
 
         target = H5NetCDFArrayWrapper(name, self)
 
@@ -484,7 +518,7 @@ def open_dataset(
         drop_variables: str | Iterable[str] | None = None,
         use_cftime=None,
         decode_timedelta=None,
-        format=None,
+        format="NETCDF4",
         group=None,
         lock=None,
         invalid_netcdf=None,
@@ -544,7 +578,7 @@ def open_datatree(
         drop_variables: str | Iterable[str] | None = None,
         use_cftime=None,
         decode_timedelta=None,
-        format=None,
+        format="NETCDF4",
         group: str | None = None,
         lock=None,
         invalid_netcdf=None,
@@ -587,7 +621,7 @@ def open_groups_as_dict(
         drop_variables: str | Iterable[str] | None = None,
         use_cftime=None,
         decode_timedelta=None,
-        format=None,
+        format="NETCDF4",
         group: str | None = None,
         lock=None,
         invalid_netcdf=None,

diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
@@ -460,6 +460,7 @@ def roundtrip(
             save_kwargs = {}
         if open_kwargs is None:
             open_kwargs = {}
+
         with create_tmp_file(allow_cleanup_failure=allow_cleanup_failure) as path:
             self.save(data, path, **save_kwargs)
             with self.open(path, **open_kwargs) as ds:
@@ -4727,6 +4728,54 @@ def create_store(self):
             ) as store:
                 yield store
 
+    @requires_h5netcdf
+    def test_string_attributes_stored_as_char(self, tmp_path):
+        import h5netcdf
+
+        original = Dataset(attrs={"foo": "bar"})
+        store_path = tmp_path / "tmp.nc"
+        original.to_netcdf(store_path, engine=self.engine, format=self.file_format)
+        with h5netcdf.File(store_path, "r") as ds:
+            # Check that the attribute is stored as a char array
+            assert ds._h5file.attrs["foo"].dtype == np.dtype("S3")
+
+
+@requires_h5netcdf
+class TestNetCDF4ClassicViaH5NetCDFData(TestNetCDF4ClassicViaNetCDF4Data):
+    engine: T_NetcdfEngine = "h5netcdf"
+    file_format: T_NetcdfTypes = "NETCDF4_CLASSIC"
+
+    @contextlib.contextmanager
+    def create_store(self):
+        with create_tmp_file() as tmp_file:
+            with backends.H5NetCDFStore.open(
+                tmp_file, mode="w", format="NETCDF4_CLASSIC"
+            ) as store:
+                yield store
+
+    @requires_netCDF4
+    def test_cross_engine_read_write_netcdf4(self) -> None:
+        # Drop dim3, because its labels include strings. These appear to be
+        # not properly read with python-netCDF4, which converts them into
+        # unicode instead of leaving them as bytes.
+        data = create_test_data().drop_vars("dim3")
+        data.attrs["foo"] = "bar"
+        valid_engines: list[T_NetcdfEngine] = ["netcdf4", "h5netcdf"]
+        for write_engine in valid_engines:
+            with create_tmp_file() as tmp_file:
+                data.to_netcdf(tmp_file, engine=write_engine, format=self.file_format)
+                for read_engine in valid_engines:
+                    with open_dataset(tmp_file, engine=read_engine) as actual:
+                        assert_identical(data, actual)
+
+    def test_group_fails(self):
+        # Check writing group data fails with CLASSIC format
+        original = create_test_data()
+        with pytest.raises(
+            ValueError, match=r"Cannot create sub-groups in `NETCDF4_CLASSIC` format."
+        ):
+            original.to_netcdf(group="sub", format=self.file_format, engine=self.engine)
+
 
 @requires_scipy_or_netCDF4
 class TestGenericNetCDFData(NetCDF3Only, CFEncodedBase):