From 68d5c73446e6ea925b47e2947dd6e1995203d413 Mon Sep 17 00:00:00 2001 From: David Huard Date: Tue, 2 Sep 2025 14:03:01 -0400 Subject: [PATCH 01/16] Support NETCDF4_CLASSIC in the h5engine backend --- xarray/backends/h5netcdf_.py | 12 +++++++++--- xarray/tests/test_backends.py | 14 ++++++++++++++ 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index 01275d8db5b..339a88dc7ce 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -26,6 +26,7 @@ FileManager, ) from xarray.backends.locks import HDF5_LOCK, combine_locks, ensure_lock, get_write_lock +from xarray.backends.netcdf3 import encode_nc3_attr_value, encode_nc3_variable from xarray.backends.netCDF4_ import ( BaseNetCDF4Array, _build_and_get_enum, @@ -197,7 +198,7 @@ def open( f"{magic_number!r} is not the signature of a valid netCDF4 file" ) - if format not in [None, "NETCDF4"]: + if format not in [None, "NETCDF4", "NETCDF4_CLASSIC"]: raise ValueError("invalid format for h5netcdf backend") kwargs = { @@ -318,10 +319,15 @@ def set_dimension(self, name, length, is_unlimited=False): self.ds.dimensions[name] = length def set_attribute(self, key, value): + if self.format != "NETCDF4": + value = encode_nc3_attr_value(value) self.ds.attrs[key] = value def encode_variable(self, variable, name=None): - return _encode_nc4_variable(variable, name=name) + if self.format == "NETCDF4": + return _encode_nc4_variable(variable, name=name) + + return encode_nc3_variable(variable, name=name) def prepare_variable( self, name, variable, check_encoding=False, unlimited_dims=None @@ -330,7 +336,7 @@ def prepare_variable( _ensure_no_forward_slash_in_name(name) attrs = variable.attrs.copy() - dtype = _get_datatype(variable, raise_on_invalid_encoding=check_encoding) + dtype = _get_datatype(variable, self.format, raise_on_invalid_encoding=check_encoding) fillvalue = attrs.pop("_FillValue", None) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index c54e3c8d135..6268c1c52ee 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -4576,6 +4576,20 @@ def create_store(self): yield store +@requires_h5netcdf +class TestNetCDF4ClassicViaH5NetCDFData(NetCDF3Only, CFEncodedBase): + engine: T_NetcdfEngine = "h5netcdf" + file_format: T_NetcdfTypes = "NETCDF4_CLASSIC" + + @contextlib.contextmanager + def create_store(self): + with create_tmp_file() as tmp_file: + with backends.H5NetCDFStore.open( + tmp_file, mode="w", format="NETCDF4_CLASSIC" + ) as store: + yield store + + @requires_scipy_or_netCDF4 class TestGenericNetCDFData(NetCDF3Only, CFEncodedBase): # verify that we can read and write netCDF3 files as long as we have scipy From 120af0732150dc996da6bec67d6fa73d43009794 Mon Sep 17 00:00:00 2001 From: David Huard Date: Tue, 2 Sep 2025 17:24:28 -0400 Subject: [PATCH 02/16] convert bytes attributes to numpy.bytes_ in NETCDF4_CLASSIC format with h5netcdf engine --- xarray/backends/h5netcdf_.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index 339a88dc7ce..f6e3676b892 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -321,6 +321,8 @@ def set_dimension(self, name, length, is_unlimited=False): def set_attribute(self, key, value): if self.format != "NETCDF4": value = encode_nc3_attr_value(value) + if isinstance(value, bytes): + value = np.bytes_(value) self.ds.attrs[key] = value def encode_variable(self, variable, name=None): From f8f44f0f73433f1e36ef179b0975c046849927ca Mon Sep 17 00:00:00 2001 From: David Huard Date: Tue, 2 Sep 2025 18:01:18 -0400 Subject: [PATCH 03/16] added test to confirm string attributes are stored as numpy char arrays with NETCDF4_CLASSIC --- xarray/tests/test_backends.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 6268c1c52ee..b9837c03aac 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -4575,9 +4575,20 @@ def create_store(self): ) as store: yield store + @requires_h5netcdf + def test_string_attributes_stored_as_char(self, tmp_path): + import h5netcdf + + original = Dataset(attrs={"foo": "bar"}) + store_path = tmp_path / "tmp.nc" + original.to_netcdf(store_path, engine=self.engine, format=self.file_format) + with h5netcdf.File(store_path, "r") as ds: + # Check that the attribute is stored as a char array + assert ds._h5file.attrs["foo"].dtype == np.dtype("S3") + @requires_h5netcdf -class TestNetCDF4ClassicViaH5NetCDFData(NetCDF3Only, CFEncodedBase): +class TestNetCDF4ClassicViaH5NetCDFData(TestNetCDF4ClassicViaNetCDF4Data): engine: T_NetcdfEngine = "h5netcdf" file_format: T_NetcdfTypes = "NETCDF4_CLASSIC" @@ -4590,6 +4601,8 @@ def create_store(self): yield store +# TODO: add cross-engine tests for NETCDF4_CLASSIC + @requires_scipy_or_netCDF4 class TestGenericNetCDFData(NetCDF3Only, CFEncodedBase): # verify that we can read and write netCDF3 files as long as we have scipy From 522d37da8eea1da391a8be48a5372dae6d308cff Mon Sep 17 00:00:00 2001 From: David Huard Date: Tue, 2 Sep 2025 23:28:33 -0400 Subject: [PATCH 04/16] Added change to whats-new --- doc/whats-new.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 29d25c0b734..47dc27d9a51 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -23,6 +23,9 @@ New Features - ``compute=False`` is now supported by :py:meth:`DataTree.to_netcdf` and :py:meth:`DataTree.to_zarr`. By `Stephan Hoyer `_. +- The ``h5netcdf`` engine can now write ``NETCDF4_CLASSIC`` files + (:issue:`10676`, :pull:``). + By `David Huard `_. Breaking changes ~~~~~~~~~~~~~~~~ From 783c407f385efb525ee7e324d49b6b4eeb038103 Mon Sep 17 00:00:00 2001 From: David Huard Date: Tue, 2 Sep 2025 23:31:47 -0400 Subject: [PATCH 05/16] run pre-commit --- xarray/backends/h5netcdf_.py | 4 +++- xarray/tests/test_backends.py | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index f6e3676b892..8490d105fcc 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -338,7 +338,9 @@ def prepare_variable( _ensure_no_forward_slash_in_name(name) attrs = variable.attrs.copy() - dtype = _get_datatype(variable, self.format, raise_on_invalid_encoding=check_encoding) + dtype = _get_datatype( + variable, self.format, raise_on_invalid_encoding=check_encoding + ) fillvalue = attrs.pop("_FillValue", None) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index b9837c03aac..da843430dc0 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -4603,6 +4603,7 @@ def create_store(self): # TODO: add cross-engine tests for NETCDF4_CLASSIC + @requires_scipy_or_netCDF4 class TestGenericNetCDFData(NetCDF3Only, CFEncodedBase): # verify that we can read and write netCDF3 files as long as we have scipy From 2f1c7817bc605bff1f15851ffc395280e715d2df Mon Sep 17 00:00:00 2001 From: David Huard Date: Wed, 3 Sep 2025 00:36:12 -0400 Subject: [PATCH 06/16] Added test comparing CDL representation of test data written with netcdf4 and h5netcdf --- xarray/tests/test_backends.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index da843430dc0..b6c1120c375 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -4600,8 +4600,26 @@ def create_store(self): ) as store: yield store + @requires_netCDF4 + def test_cdl_representation(self, tmp_path) -> None: + import netCDF4 + + data = create_test_data() + + # Create CDL representation using netCDF4 + fn = tmp_path / "tmp.nc" + data.to_netcdf(fn, engine="netcdf4", format=self.file_format) + with netCDF4.Dataset(fn) as ds: + expected = ds.tocdl() + + # Write using h5netcdf + data.to_netcdf(fn, engine=self.engine, format=self.file_format) + + # Read using netCDF4 + with netCDF4.Dataset(fn) as ds: + actual = ds.tocdl() -# TODO: add cross-engine tests for NETCDF4_CLASSIC + assert expected == actual @requires_scipy_or_netCDF4 From b142d38c7ac8a59f96c64dea21c92a174fa689fb Mon Sep 17 00:00:00 2001 From: David Huard Date: Wed, 3 Sep 2025 01:02:39 -0400 Subject: [PATCH 07/16] Added global attribute to test data. Apply CLASSIC conversion to variable attributes as well. --- xarray/backends/h5netcdf_.py | 9 +++++++-- xarray/tests/__init__.py | 1 + 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index 8490d105fcc..166f007698b 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -318,11 +318,16 @@ def set_dimension(self, name, length, is_unlimited=False): else: self.ds.dimensions[name] = length - def set_attribute(self, key, value): + def convert_string(self, value): + """If format is NETCDF4_CLASSIC, convert strings to char arrays.""" if self.format != "NETCDF4": value = encode_nc3_attr_value(value) if isinstance(value, bytes): value = np.bytes_(value) + return value + + def set_attribute(self, key, value): + value = self.convert_string(value) self.ds.attrs[key] = value def encode_variable(self, variable, name=None): @@ -402,7 +407,7 @@ def prepare_variable( nc4_var = self.ds[name] for k, v in attrs.items(): - nc4_var.attrs[k] = v + nc4_var.attrs[k] = self.convert_string(v) target = H5NetCDFArrayWrapper(name, self) diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 3b4e49c64d8..67e24122852 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -397,6 +397,7 @@ def create_test_data( else: numbers_values = rs.integers(0, 3, _dims["dim3"], dtype="int64") obj.coords["numbers"] = ("dim3", numbers_values) + obj.attrs = {"foo": "bar"} obj.encoding = {"foo": "bar"} assert_writeable(obj) return obj From 909a96c3c676268b20006a69d4be16089d830b5e Mon Sep 17 00:00:00 2001 From: David Huard Date: Wed, 3 Sep 2025 13:35:12 -0400 Subject: [PATCH 08/16] Use h5dump to compare file content instead of CDL. Add _nc3_strict attribute. --- doc/whats-new.rst | 2 +- xarray/backends/h5netcdf_.py | 20 ++++++++++++++++++++ xarray/tests/test_backends.py | 21 ++++++++------------- 3 files changed, 29 insertions(+), 14 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 47dc27d9a51..ceeffb38102 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -24,7 +24,7 @@ New Features :py:meth:`DataTree.to_zarr`. By `Stephan Hoyer `_. - The ``h5netcdf`` engine can now write ``NETCDF4_CLASSIC`` files - (:issue:`10676`, :pull:``). + (:issue:`10676`, :pull:`10686`). By `David Huard `_. Breaking changes diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index 166f007698b..17d7986784b 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -3,6 +3,7 @@ import functools import io import os +import subprocess from collections.abc import Iterable from typing import TYPE_CHECKING, Any, Self @@ -101,6 +102,19 @@ def _read_attributes(h5netcdf_var): ) +def _h5dump(fn: str): + """Call h5dump on an h5netcdf file.""" + out = subprocess.run( + ["h5dump", "-A", "-B", fn], check=False, capture_output=True + ).stdout.decode() + + # Strip first and last line + # HDF5 "file" { + # ... + # } + return "\n".join(out.splitlines()[1:-1]) + + def _h5netcdf_create_group(dataset, name): return dataset.create_group(name) @@ -231,6 +245,12 @@ def _acquire(self, needs_lock=True): ) return ds + def encode(self, variables, attributes): + """Overload encode to set _nc3_strict flag.""" + if self.format != "NETCDF4": + self.ds._h5file.attrs["_nc3_strict"] = np.int32(1) + return super().encode(variables, attributes) + @property def ds(self): return self._acquire() diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index b6c1120c375..b0244c4a312 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -45,7 +45,7 @@ save_mfdataset, ) from xarray.backends.common import robust_getitem -from xarray.backends.h5netcdf_ import H5netcdfBackendEntrypoint +from xarray.backends.h5netcdf_ import H5netcdfBackendEntrypoint, _h5dump from xarray.backends.netcdf3 import _nc3_dtype_coercions from xarray.backends.netCDF4_ import ( NetCDF4BackendEntrypoint, @@ -4601,23 +4601,18 @@ def create_store(self): yield store @requires_netCDF4 - def test_cdl_representation(self, tmp_path) -> None: - import netCDF4 - + def test_h5dump(self, tmp_path) -> None: data = create_test_data() - # Create CDL representation using netCDF4 - fn = tmp_path / "tmp.nc" + # Dump the representation of the netCDF4-generated file + fn = tmp_path / "netcdf4.nc" data.to_netcdf(fn, engine="netcdf4", format=self.file_format) - with netCDF4.Dataset(fn) as ds: - expected = ds.tocdl() + expected = _h5dump(fn) - # Write using h5netcdf + # Dump the representation of the h5netcdf-generated file + fn = tmp_path / "h5netcdf.nc" data.to_netcdf(fn, engine=self.engine, format=self.file_format) - - # Read using netCDF4 - with netCDF4.Dataset(fn) as ds: - actual = ds.tocdl() + actual = _h5dump(fn) assert expected == actual From 35b50cea7fa3a2297ab5511e6ead1a7c5d921cb9 Mon Sep 17 00:00:00 2001 From: David Huard Date: Wed, 3 Sep 2025 16:54:16 -0400 Subject: [PATCH 09/16] raise error if writing groups to CLASSIC file. --- xarray/backends/h5netcdf_.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index 17d7986784b..1538b051ac3 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -151,6 +151,10 @@ def __init__( raise ValueError( "must supply a h5netcdf.File if the group argument is provided" ) + if format == "NETCDF4_CLASSIC": + raise ValueError( + "Cannot create sub-groups in `NETCDF4_CLASSIC` format." + ) root = manager manager = DummyFileManager(root) @@ -215,6 +219,9 @@ def open( if format not in [None, "NETCDF4", "NETCDF4_CLASSIC"]: raise ValueError("invalid format for h5netcdf backend") + if format == "NETCDF4_CLASSIC" and group is not None: + raise ValueError("Cannot create sub-groups in `NETCDF4_CLASSIC` format.") + kwargs = { "invalid_netcdf": invalid_netcdf, "decode_vlen_strings": decode_vlen_strings, From a187c8d9ba2959a33d1006c105a18f3dcfa4594b Mon Sep 17 00:00:00 2001 From: David Huard Date: Fri, 5 Sep 2025 13:14:28 -0400 Subject: [PATCH 10/16] remove h5dump test. Remove _nc3_strict attribute (should go into h5netcdf). --- xarray/backends/h5netcdf_.py | 23 +++-------------------- xarray/tests/test_backends.py | 30 +++++++++++++++--------------- 2 files changed, 18 insertions(+), 35 deletions(-) diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index 1538b051ac3..a43015766bc 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -3,11 +3,11 @@ import functools import io import os -import subprocess from collections.abc import Iterable from typing import TYPE_CHECKING, Any, Self import numpy as np +from packaging.version import Version from xarray.backends.common import ( BACKEND_ENTRYPOINTS, @@ -102,19 +102,6 @@ def _read_attributes(h5netcdf_var): ) -def _h5dump(fn: str): - """Call h5dump on an h5netcdf file.""" - out = subprocess.run( - ["h5dump", "-A", "-B", fn], check=False, capture_output=True - ).stdout.decode() - - # Strip first and last line - # HDF5 "file" { - # ... - # } - return "\n".join(out.splitlines()[1:-1]) - - def _h5netcdf_create_group(dataset, name): return dataset.create_group(name) @@ -231,6 +218,8 @@ def open( kwargs.update(driver_kwds) if phony_dims is not None: kwargs["phony_dims"] = phony_dims + if format is not None and Version(h5netcdf.__version__) > Version("1.6"): + kwargs["format"] = format if lock is None: if mode == "r": @@ -252,12 +241,6 @@ def _acquire(self, needs_lock=True): ) return ds - def encode(self, variables, attributes): - """Overload encode to set _nc3_strict flag.""" - if self.format != "NETCDF4": - self.ds._h5file.attrs["_nc3_strict"] = np.int32(1) - return super().encode(variables, attributes) - @property def ds(self): return self._acquire() diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 96d82bca413..82916d5662a 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -45,7 +45,7 @@ save_mfdataset, ) from xarray.backends.common import robust_getitem -from xarray.backends.h5netcdf_ import H5netcdfBackendEntrypoint, _h5dump +from xarray.backends.h5netcdf_ import H5netcdfBackendEntrypoint from xarray.backends.netcdf3 import _nc3_dtype_coercions from xarray.backends.netCDF4_ import ( NetCDF4BackendEntrypoint, @@ -4581,6 +4581,7 @@ def test_string_attributes_stored_as_char(self, tmp_path): with h5netcdf.File(store_path, "r") as ds: # Check that the attribute is stored as a char array assert ds._h5file.attrs["foo"].dtype == np.dtype("S3") + # assert ds._h5file.attrs["_nc3_strict"] == 1 @requires_h5netcdf @@ -4597,20 +4598,19 @@ def create_store(self): yield store @requires_netCDF4 - def test_h5dump(self, tmp_path) -> None: - data = create_test_data() - - # Dump the representation of the netCDF4-generated file - fn = tmp_path / "netcdf4.nc" - data.to_netcdf(fn, engine="netcdf4", format=self.file_format) - expected = _h5dump(fn) - - # Dump the representation of the h5netcdf-generated file - fn = tmp_path / "h5netcdf.nc" - data.to_netcdf(fn, engine=self.engine, format=self.file_format) - actual = _h5dump(fn) - - assert expected == actual + def test_cross_engine_read_write_netcdf4(self) -> None: + # Drop dim3, because its labels include strings. These appear to be + # not properly read with python-netCDF4, which converts them into + # unicode instead of leaving them as bytes. + data = create_test_data().drop_vars("dim3") + data.attrs["foo"] = "bar" + valid_engines: list[T_NetcdfEngine] = ["netcdf4", "h5netcdf"] + for write_engine in valid_engines: + with create_tmp_file() as tmp_file: + data.to_netcdf(tmp_file, engine=write_engine, format=self.file_format) + for read_engine in valid_engines: + with open_dataset(tmp_file, engine=read_engine) as actual: + assert_identical(data, actual) @requires_scipy_or_netCDF4 From 0c1bc086beea37b379b544e0814ae1d438b1568c Mon Sep 17 00:00:00 2001 From: David Huard Date: Mon, 8 Sep 2025 11:38:16 -0400 Subject: [PATCH 11/16] fix h5netcdf version check. --- xarray/backends/h5netcdf_.py | 19 +++++++++---------- xarray/tests/test_backends.py | 1 - 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index a43015766bc..ff0c5c0d17d 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -125,6 +125,7 @@ def __init__( manager: FileManager | h5netcdf.File | h5netcdf.Group, group=None, mode=None, + format=None, lock=HDF5_LOCK, autoclose=False, ): @@ -138,17 +139,13 @@ def __init__( raise ValueError( "must supply a h5netcdf.File if the group argument is provided" ) - if format == "NETCDF4_CLASSIC": - raise ValueError( - "Cannot create sub-groups in `NETCDF4_CLASSIC` format." - ) root = manager manager = DummyFileManager(root) self._manager = manager self._group = group self._mode = mode - self.format = None + self.format = format or "NETCDF4" # todo: utilizing find_root_and_group seems a bit clunky # making filename available on h5netcdf.Group seems better self._filename = find_root_and_group(self.ds)[0].filename @@ -218,7 +215,7 @@ def open( kwargs.update(driver_kwds) if phony_dims is not None: kwargs["phony_dims"] = phony_dims - if format is not None and Version(h5netcdf.__version__) > Version("1.6"): + if format is not None and Version(h5netcdf.__version__) > Version("1.6.4"): kwargs["format"] = format if lock is None: @@ -330,7 +327,7 @@ def set_dimension(self, name, length, is_unlimited=False): def convert_string(self, value): """If format is NETCDF4_CLASSIC, convert strings to char arrays.""" - if self.format != "NETCDF4": + if self.format == "NETCDF4_CLASSIC": value = encode_nc3_attr_value(value) if isinstance(value, bytes): value = np.bytes_(value) @@ -341,10 +338,12 @@ def set_attribute(self, key, value): self.ds.attrs[key] = value def encode_variable(self, variable, name=None): - if self.format == "NETCDF4": + if self.format == "NETCDF4_CLASSIC": + return encode_nc3_variable(variable, name=name) + elif self.format == "NETCDF4": return _encode_nc4_variable(variable, name=name) - - return encode_nc3_variable(variable, name=name) + else: + raise ValueError(f"unexpected format: {self.format}") def prepare_variable( self, name, variable, check_encoding=False, unlimited_dims=None diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 82916d5662a..aa8a1f2f15c 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -4581,7 +4581,6 @@ def test_string_attributes_stored_as_char(self, tmp_path): with h5netcdf.File(store_path, "r") as ds: # Check that the attribute is stored as a char array assert ds._h5file.attrs["foo"].dtype == np.dtype("S3") - # assert ds._h5file.attrs["_nc3_strict"] == 1 @requires_h5netcdf From 03ea2de56738577b9e1314d870b5ea47cfb1717a Mon Sep 17 00:00:00 2001 From: David Huard Date: Mon, 8 Sep 2025 16:05:42 -0400 Subject: [PATCH 12/16] try to fix tests --- xarray/backends/h5netcdf_.py | 19 ++++++++++++------- xarray/tests/test_backends.py | 3 ++- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index ff0c5c0d17d..1cfa4dfc635 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -125,7 +125,7 @@ def __init__( manager: FileManager | h5netcdf.File | h5netcdf.Group, group=None, mode=None, - format=None, + format="NETCDF4", lock=HDF5_LOCK, autoclose=False, ): @@ -145,7 +145,7 @@ def __init__( self._manager = manager self._group = group self._mode = mode - self.format = format or "NETCDF4" + self.format = format # todo: utilizing find_root_and_group seems a bit clunky # making filename available on h5netcdf.Group seems better self._filename = find_root_and_group(self.ds)[0].filename @@ -229,7 +229,14 @@ def open( if isinstance(filename, str) else h5netcdf.File(filename, mode=mode, **kwargs) ) - return cls(manager, group=group, mode=mode, lock=lock, autoclose=autoclose) + return cls( + manager, + group=group, + format=format, + mode=mode, + lock=lock, + autoclose=autoclose, + ) def _acquire(self, needs_lock=True): with self._manager.acquire_context(needs_lock) as root: @@ -340,10 +347,8 @@ def set_attribute(self, key, value): def encode_variable(self, variable, name=None): if self.format == "NETCDF4_CLASSIC": return encode_nc3_variable(variable, name=name) - elif self.format == "NETCDF4": - return _encode_nc4_variable(variable, name=name) - else: - raise ValueError(f"unexpected format: {self.format}") + + return _encode_nc4_variable(variable, name=name) def prepare_variable( self, name, variable, check_encoding=False, unlimited_dims=None diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 8ee804ada59..1722ce3b387 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -460,6 +460,7 @@ def roundtrip( save_kwargs = {} if open_kwargs is None: open_kwargs = {} + with create_tmp_file(allow_cleanup_failure=allow_cleanup_failure) as path: self.save(data, path, **save_kwargs) with self.open(path, **open_kwargs) as ds: @@ -4799,7 +4800,7 @@ def test_encoding_unlimited_dims(self) -> None: @requires_scipy def test_roundtrip_via_bytes(self) -> None: original = create_test_data() - netcdf_bytes = original.to_netcdf() + netcdf_bytes = original.to_netcdf(engine="scipy") roundtrip = load_dataset(netcdf_bytes) assert_identical(roundtrip, original) From 592b98b027f38205f230b151aa80e560d9b2aab3 Mon Sep 17 00:00:00 2001 From: David Huard Date: Mon, 8 Sep 2025 16:50:49 -0400 Subject: [PATCH 13/16] Set default format to NETCDF4 instead of None, because passing None to `netCDF4_.get_datatype` skips required conversions. Remove global attribute from create_test_data because it impacts other tests in other files. --- doc/whats-new.rst | 3 +-- xarray/backends/h5netcdf_.py | 18 +++++++++--------- xarray/tests/__init__.py | 1 - xarray/tests/test_backends.py | 6 ++++++ 4 files changed, 16 insertions(+), 12 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index a841467d4b7..b54115b92e4 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -62,8 +62,7 @@ New Features - ``compute=False`` is now supported by :py:meth:`DataTree.to_netcdf` and :py:meth:`DataTree.to_zarr`. By `Stephan Hoyer `_. -- The ``h5netcdf`` engine can now write ``NETCDF4_CLASSIC`` files - (:issue:`10676`, :pull:`10686`). +- The ``h5netcdf`` engine has support for pseudo ``NETCDF4_CLASSIC`` files, meaning variables and attributes are cast to supported types. Note that the saved files won't be recognized as genuine ``NETCDF4_CLASSIC`` files until ``h5netcdf`` adds support. (:issue:`10676`, :pull:`10686`). By `David Huard `_. - ``open_dataset`` will now correctly infer a path ending in ``.zarr/`` as zarr By `Ian Hunt-Isaak `_. diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index 1cfa4dfc635..1b8b40963db 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -142,10 +142,13 @@ def __init__( root = manager manager = DummyFileManager(root) + if format == "NETCDF4_CLASSIC" and group is not None: + raise ValueError("Cannot create sub-groups in `NETCDF4_CLASSIC` format.") + self._manager = manager self._group = group self._mode = mode - self.format = format + self.format = format or "NETCDF4" # todo: utilizing find_root_and_group seems a bit clunky # making filename available on h5netcdf.Group seems better self._filename = find_root_and_group(self.ds)[0].filename @@ -169,7 +172,7 @@ def open( cls, filename, mode="r", - format=None, + format="NETCDF4", group=None, lock=None, autoclose=False, @@ -203,9 +206,6 @@ def open( if format not in [None, "NETCDF4", "NETCDF4_CLASSIC"]: raise ValueError("invalid format for h5netcdf backend") - if format == "NETCDF4_CLASSIC" and group is not None: - raise ValueError("Cannot create sub-groups in `NETCDF4_CLASSIC` format.") - kwargs = { "invalid_netcdf": invalid_netcdf, "decode_vlen_strings": decode_vlen_strings, @@ -358,7 +358,7 @@ def prepare_variable( _ensure_no_forward_slash_in_name(name) attrs = variable.attrs.copy() dtype = _get_datatype( - variable, self.format, raise_on_invalid_encoding=check_encoding + variable, nc_format=self.format, raise_on_invalid_encoding=check_encoding ) fillvalue = attrs.pop("_FillValue", None) @@ -510,7 +510,7 @@ def open_dataset( drop_variables: str | Iterable[str] | None = None, use_cftime=None, decode_timedelta=None, - format=None, + format="NETCDF4", group=None, lock=None, invalid_netcdf=None, @@ -570,7 +570,7 @@ def open_datatree( drop_variables: str | Iterable[str] | None = None, use_cftime=None, decode_timedelta=None, - format=None, + format="NETCDF4", group: str | None = None, lock=None, invalid_netcdf=None, @@ -613,7 +613,7 @@ def open_groups_as_dict( drop_variables: str | Iterable[str] | None = None, use_cftime=None, decode_timedelta=None, - format=None, + format="NETCDF4", group: str | None = None, lock=None, invalid_netcdf=None, diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 67e24122852..3b4e49c64d8 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -397,7 +397,6 @@ def create_test_data( else: numbers_values = rs.integers(0, 3, _dims["dim3"], dtype="int64") obj.coords["numbers"] = ("dim3", numbers_values) - obj.attrs = {"foo": "bar"} obj.encoding = {"foo": "bar"} assert_writeable(obj) return obj diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 1722ce3b387..094d0c74988 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -4715,6 +4715,12 @@ def test_cross_engine_read_write_netcdf4(self) -> None: with open_dataset(tmp_file, engine=read_engine) as actual: assert_identical(data, actual) + def test_group_fails(self): + # Check writing group data fails with CLASSIC format + original = create_test_data() + with pytest.raises(ValueError): + original.to_netcdf(group="sub", format=self.file_format, engine=self.engine) + @requires_scipy_or_netCDF4 class TestGenericNetCDFData(NetCDF3Only, CFEncodedBase): From 13b60d005613d7e71b6648ed07d56a81e9e7b5f3 Mon Sep 17 00:00:00 2001 From: David Huard Date: Tue, 9 Sep 2025 11:21:30 -0400 Subject: [PATCH 14/16] Apply suggestions from code review Co-authored-by: Stephan Hoyer --- xarray/backends/h5netcdf_.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index 1b8b40963db..af527dc7b3b 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -204,7 +204,7 @@ def open( ) if format not in [None, "NETCDF4", "NETCDF4_CLASSIC"]: - raise ValueError("invalid format for h5netcdf backend") + raise ValueError(f"invalid format for h5netcdf backend: {format}") kwargs = { "invalid_netcdf": invalid_netcdf, From cf8b4be409d8a7a99b54abbed0e66203e94af527 Mon Sep 17 00:00:00 2001 From: David Huard Date: Tue, 9 Sep 2025 11:28:17 -0400 Subject: [PATCH 15/16] Suggestions from review. --- xarray/backends/h5netcdf_.py | 10 +++++++--- xarray/tests/test_backends.py | 6 ++++-- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index 1b8b40963db..9b3115b2c8c 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -333,7 +333,11 @@ def set_dimension(self, name, length, is_unlimited=False): self.ds.dimensions[name] = length def convert_string(self, value): - """If format is NETCDF4_CLASSIC, convert strings to char arrays.""" + """If format is NETCDF4_CLASSIC, convert strings to fixed width char + arrays to ensure they can be read by legacy software. + + CLASSIC attributes are read by third party software as fixed width char arrays + """ if self.format == "NETCDF4_CLASSIC": value = encode_nc3_attr_value(value) if isinstance(value, bytes): @@ -347,8 +351,8 @@ def set_attribute(self, key, value): def encode_variable(self, variable, name=None): if self.format == "NETCDF4_CLASSIC": return encode_nc3_variable(variable, name=name) - - return _encode_nc4_variable(variable, name=name) + else: + return _encode_nc4_variable(variable, name=name) def prepare_variable( self, name, variable, check_encoding=False, unlimited_dims=None diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 094d0c74988..d9a8b85aaac 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -4718,7 +4718,9 @@ def test_cross_engine_read_write_netcdf4(self) -> None: def test_group_fails(self): # Check writing group data fails with CLASSIC format original = create_test_data() - with pytest.raises(ValueError): + with pytest.raises( + ValueError, match=r"Cannot create sub-groups in `NETCDF4_CLASSIC` format." + ): original.to_netcdf(group="sub", format=self.file_format, engine=self.engine) @@ -4806,7 +4808,7 @@ def test_encoding_unlimited_dims(self) -> None: @requires_scipy def test_roundtrip_via_bytes(self) -> None: original = create_test_data() - netcdf_bytes = original.to_netcdf(engine="scipy") + netcdf_bytes = original.to_netcdf() roundtrip = load_dataset(netcdf_bytes) assert_identical(roundtrip, original) From b2a21d22a39829879b98006bf64e38101eba02a0 Mon Sep 17 00:00:00 2001 From: David Huard Date: Mon, 22 Sep 2025 11:11:43 -0400 Subject: [PATCH 16/16] Raise an error within `get_child_store` rather that `__init__` with format=NETCDF4_CLASSIC group not None. --- xarray/backends/h5netcdf_.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index 9c1bb16cba6..d3993590e56 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -143,9 +143,6 @@ def __init__( root = manager manager = DummyFileManager(root) - if format == "NETCDF4_CLASSIC" and group is not None: - raise ValueError("Cannot create sub-groups in `NETCDF4_CLASSIC` format.") - self._manager = manager self._group = group self._mode = mode @@ -158,6 +155,9 @@ def __init__( self.autoclose = autoclose def get_child_store(self, group: str) -> Self: + if self.format == "NETCDF4_CLASSIC": + raise ValueError("Cannot create sub-groups in `NETCDF4_CLASSIC` format.") + if self._group is not None: group = os.path.join(self._group, group) return type(self)( @@ -231,7 +231,7 @@ def open( else PickleableFileManager ) manager = manager_cls(h5netcdf.File, filename, mode=mode, kwargs=kwargs) - + return cls( manager, group=group,