From 9d89ef5263d80fa45adbd1796f9ac149d61294c3 Mon Sep 17 00:00:00 2001 From: Kevin Paul Date: Thu, 11 Apr 2024 12:45:58 +0200 Subject: [PATCH 01/14] Add step:int remapping for coords computation --- kerchunk/grib2.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/kerchunk/grib2.py b/kerchunk/grib2.py index 0003cd79..2eb49d39 100644 --- a/kerchunk/grib2.py +++ b/kerchunk/grib2.py @@ -252,9 +252,11 @@ def scan_grib( z[varName].attrs["_ARRAY_DIMENSIONS"] = dims for coord in cfgrib.dataset.COORD_ATTRS: - coord2 = {"latitude": "latitudes", "longitude": "longitudes"}.get( - coord, coord - ) + coord2 = { + "latitude": "latitudes", + "longitude": "longitudes", + "step": "step:int", + }.get(coord, coord) try: x = m.get(coord2) except eccodes.WrongStepUnitError as e: From 3eac3a4693ea91a409dcbc71694f83b9805277ed Mon Sep 17 00:00:00 2001 From: Kevin Paul Date: Thu, 11 Apr 2024 17:28:22 +0200 Subject: [PATCH 02/14] Add tests with most recent eccodes version --- .github/workflows/tests.yml | 2 +- ci/environment-py310m.yml | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 ci/environment-py310m.yml diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index dd83bea5..fd480582 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -8,7 +8,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [39, 310, 311] + python-version: [39, 310, 310m, 311] steps: - uses: actions/checkout@v4 diff --git a/ci/environment-py310m.yml b/ci/environment-py310m.yml new file mode 100644 index 00000000..87fc740e --- /dev/null +++ b/ci/environment-py310m.yml @@ -0,0 +1,35 @@ +name: test_env +channels: + - conda-forge + - nodefaults +dependencies: + - python=3.10 + - dask + - zarr + - xarray + - xarray-datatree + - h5netcdf + - h5py<3.9 + - pandas + - cfgrib + - eccodes=2.34 + - cftime + - astropy + - requests + - aiohttp + - pytest-cov + - fsspec + - dask + - scipy + - s3fs + - python-blosc + - flake8 + - black + - fastparquet + - pip + - pyopenssl + - tifffile + - netCDF4 + - pip: + - git+https://github.com/fsspec/filesystem_spec + - ipfsspec From 73c4b15c238e47d548d0a070728ab1fb5980565b Mon Sep 17 00:00:00 2001 From: Kevin Paul Date: Fri, 12 Apr 2024 11:12:00 +0200 Subject: [PATCH 03/14] Back out new environment --- ci/environment-py310m.yml | 35 ----------------------------------- 1 file changed, 35 deletions(-) delete mode 100644 ci/environment-py310m.yml diff --git a/ci/environment-py310m.yml b/ci/environment-py310m.yml deleted file mode 100644 index 87fc740e..00000000 --- a/ci/environment-py310m.yml +++ /dev/null @@ -1,35 +0,0 @@ -name: test_env -channels: - - conda-forge - - nodefaults -dependencies: - - python=3.10 - - dask - - zarr - - xarray - - xarray-datatree - - h5netcdf - - h5py<3.9 - - pandas - - cfgrib - - eccodes=2.34 - - cftime - - astropy - - requests - - aiohttp - - pytest-cov - - fsspec - - dask - - scipy - - s3fs - - python-blosc - - flake8 - - black - - fastparquet - - pip - - pyopenssl - - tifffile - - netCDF4 - - pip: - - git+https://github.com/fsspec/filesystem_spec - - ipfsspec From a827575bf88177cd2723455efa92cc9db9b8b91e Mon Sep 17 00:00:00 2001 From: Kevin Paul Date: Fri, 12 Apr 2024 11:12:15 +0200 Subject: [PATCH 04/14] Add a tiny grib file for testing --- kerchunk/tests/tinygrib.grb2 | Bin 0 -> 179 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 kerchunk/tests/tinygrib.grb2 diff --git a/kerchunk/tests/tinygrib.grb2 b/kerchunk/tests/tinygrib.grb2 new file mode 100644 index 0000000000000000000000000000000000000000..cbd29e96e897e519dd4df87418bdff3b8cdec825 GIT binary patch literal 179 zcmZ<{@^oTgU|<4b5ZDaFqKqIGBO@dG1x5%H$n{`mVBo*L03^fq9~Ce#ur2_aa1kiQ z0Fr5q^=bgI4OoE;(Za|A2IdPBK-d8&sKf%25;)B8zX3?|f;eD1z&ii`2XaMOVHPnk RH1I-Y+1Nl*tnB7M000s7Ee8Mq literal 0 HcmV?d00001 From 9a8d6db0490d15e7e62acd336a9dc44588fa2c29 Mon Sep 17 00:00:00 2001 From: Kevin Paul Date: Fri, 12 Apr 2024 11:12:44 +0200 Subject: [PATCH 05/14] Back out tests changes --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index fd480582..dd83bea5 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -8,7 +8,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [39, 310, 310m, 311] + python-version: [39, 310, 311] steps: - uses: actions/checkout@v4 From 70a5218fdd1e55d9d80ba952efa04facf2bb51fe Mon Sep 17 00:00:00 2001 From: Kevin Paul Date: Fri, 12 Apr 2024 11:14:43 +0200 Subject: [PATCH 06/14] Add tiny grib test --- kerchunk/tests/test_grib.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kerchunk/tests/test_grib.py b/kerchunk/tests/test_grib.py index f6835a30..b7ac5253 100644 --- a/kerchunk/tests/test_grib.py +++ b/kerchunk/tests/test_grib.py @@ -93,6 +93,12 @@ def test_subhourly(): assert len(result) == 2, "Expected two grib messages" +def test_tiny_grib(): + fpath = os.path.join(here, "tinygrib.grb2") + result = scan_grib(fpath) + assert len(result) == 1, "Expected one grib message" + + def test_grib_tree(): """ End-to-end test from grib file to zarr hierarchy From 50f58e799f634a9c0537e713046a82c6081f84c9 Mon Sep 17 00:00:00 2001 From: Kevin Paul Date: Fri, 12 Apr 2024 11:20:04 +0200 Subject: [PATCH 07/14] Wrong branch for test! --- kerchunk/tests/test_grib.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/kerchunk/tests/test_grib.py b/kerchunk/tests/test_grib.py index b7ac5253..f6835a30 100644 --- a/kerchunk/tests/test_grib.py +++ b/kerchunk/tests/test_grib.py @@ -93,12 +93,6 @@ def test_subhourly(): assert len(result) == 2, "Expected two grib messages" -def test_tiny_grib(): - fpath = os.path.join(here, "tinygrib.grb2") - result = scan_grib(fpath) - assert len(result) == 1, "Expected one grib message" - - def test_grib_tree(): """ End-to-end test from grib file to zarr hierarchy From 78869f749ef334ce6ddddbbaaff4b899ea794ae5 Mon Sep 17 00:00:00 2001 From: Kevin Paul Date: Fri, 12 Apr 2024 11:20:48 +0200 Subject: [PATCH 08/14] Wrong branch! --- kerchunk/tests/tinygrib.grb2 | Bin 179 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 kerchunk/tests/tinygrib.grb2 diff --git a/kerchunk/tests/tinygrib.grb2 b/kerchunk/tests/tinygrib.grb2 deleted file mode 100644 index cbd29e96e897e519dd4df87418bdff3b8cdec825..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 179 zcmZ<{@^oTgU|<4b5ZDaFqKqIGBO@dG1x5%H$n{`mVBo*L03^fq9~Ce#ur2_aa1kiQ z0Fr5q^=bgI4OoE;(Za|A2IdPBK-d8&sKf%25;)B8zX3?|f;eD1z&ii`2XaMOVHPnk RH1I-Y+1Nl*tnB7M000s7Ee8Mq From 81d08eadb0afff241a0043f2b7f432ae9dd7ea11 Mon Sep 17 00:00:00 2001 From: Kevin Paul Date: Fri, 12 Apr 2024 11:47:29 +0200 Subject: [PATCH 09/14] Remove h5py<3.9 restriction --- ci/environment-docs.yml | 2 +- ci/environment-py310.yml | 2 +- ci/environment-py311.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ci/environment-docs.yml b/ci/environment-docs.yml index d06b2376..71f8ebb9 100644 --- a/ci/environment-docs.yml +++ b/ci/environment-docs.yml @@ -9,7 +9,7 @@ dependencies: - xarray - xarray-datatree - h5netcdf - - h5py<3.9 + - h5py - pandas - cfgrib - cftime diff --git a/ci/environment-py310.yml b/ci/environment-py310.yml index 021b150f..6aa0f568 100644 --- a/ci/environment-py310.yml +++ b/ci/environment-py310.yml @@ -9,7 +9,7 @@ dependencies: - xarray - xarray-datatree - h5netcdf - - h5py<3.9 + - h5py - pandas - cfgrib - cftime diff --git a/ci/environment-py311.yml b/ci/environment-py311.yml index d680ae71..13d50302 100644 --- a/ci/environment-py311.yml +++ b/ci/environment-py311.yml @@ -9,7 +9,7 @@ dependencies: - xarray - xarray-datatree - h5netcdf - - h5py<3.9 + - h5py - pandas - cfgrib - cftime From 3048bb6375001d4f94e7441f313d39437bbf6191 Mon Sep 17 00:00:00 2001 From: Kevin Paul Date: Fri, 12 Apr 2024 11:48:59 +0200 Subject: [PATCH 10/14] Only check dtype kind in cftime test --- kerchunk/tests/test_combine.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kerchunk/tests/test_combine.py b/kerchunk/tests/test_combine.py index 94538d80..5715a135 100644 --- a/kerchunk/tests/test_combine.py +++ b/kerchunk/tests/test_combine.py @@ -730,7 +730,7 @@ def test_cftimes_to_normal(refs): engine="zarr", chunks={}, ) - assert z.time.dtype == "M8[s]" + assert z.time.dtype.kind == "M" assert ( z.time.values == np.array(["1970-02-01T00:00:00", "1970-03-01T00:00:00"], dtype="M8[s]") @@ -799,7 +799,7 @@ def test_chunk_error(refs): refs1 = refs["single1"]["refs"] refs2 = refs1.copy() refs2.pop(".zmetadata") - fs = fsspec.filesystem("reference", fo=refs2, remote_protocol="memory") + fs = fsspec.filesystem("reference", fo=refs2, remote_protocol="memory") # noqa refs2[ "data/.zarray" ] = b""" From 5cc71479968e8c7117aac5e057ffa93877f86969 Mon Sep 17 00:00:00 2001 From: Kevin Paul Date: Fri, 12 Apr 2024 13:22:49 +0200 Subject: [PATCH 11/14] Ignore project-specific VSCode settings --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index f1ca7c3b..28c93192 100644 --- a/.gitignore +++ b/.gitignore @@ -124,3 +124,6 @@ dmypy.json # Pyre type checker .pyre/ + +# VSCode settings +.vscode/ From ca83fd97d97a3c0ec59414587e61424acb9efec9 Mon Sep 17 00:00:00 2001 From: Kevin Paul Date: Fri, 12 Apr 2024 13:23:24 +0200 Subject: [PATCH 12/14] Newest eccodes returns coordinates attr in different order --- kerchunk/tests/test_grib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kerchunk/tests/test_grib.py b/kerchunk/tests/test_grib.py index f6835a30..2b1452a9 100644 --- a/kerchunk/tests/test_grib.py +++ b/kerchunk/tests/test_grib.py @@ -107,7 +107,7 @@ def test_grib_tree(): assert isinstance(zg["vbdsf/avg/surface/vbdsf"], zarr.Array) assert ( zg["vbdsf/avg/surface"].attrs["coordinates"] - == "surface latitude longitude time valid_time step" + == "surface latitude longitude step time valid_time" ) assert ( zg["refc/instant/atmosphere"].attrs["coordinates"] From 1c39bc1a17684cb6224d997ae6bc570c732391d6 Mon Sep 17 00:00:00 2001 From: Kevin Paul Date: Fri, 12 Apr 2024 13:32:17 +0200 Subject: [PATCH 13/14] Make coords attr assert a set comparison instead of string --- kerchunk/tests/test_grib.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/kerchunk/tests/test_grib.py b/kerchunk/tests/test_grib.py index 2b1452a9..80ff9711 100644 --- a/kerchunk/tests/test_grib.py +++ b/kerchunk/tests/test_grib.py @@ -105,13 +105,11 @@ def test_grib_tree(): zg = zarr.open_group(fs.get_mapper("")) assert isinstance(zg["refc/instant/atmosphere/refc"], zarr.Array) assert isinstance(zg["vbdsf/avg/surface/vbdsf"], zarr.Array) - assert ( - zg["vbdsf/avg/surface"].attrs["coordinates"] - == "surface latitude longitude step time valid_time" + assert set(zg["vbdsf/avg/surface"].attrs["coordinates"].split()) == set( + "surface latitude longitude step time valid_time".split() ) - assert ( - zg["refc/instant/atmosphere"].attrs["coordinates"] - == "atmosphere latitude longitude step time valid_time" + assert set(zg["refc/instant/atmosphere"].attrs["coordinates"].split()) == set( + "atmosphere latitude longitude step time valid_time".split() ) # Assert that the fill value is set correctly assert zg.refc.instant.atmosphere.step.fill_value is np.NaN From 9abdec6389c58e7834e93385e4b9ee6d320e976a Mon Sep 17 00:00:00 2001 From: Kevin Paul Date: Fri, 12 Apr 2024 14:07:01 +0200 Subject: [PATCH 14/14] skip hrrr archive test if eccodes version too new --- kerchunk/tests/test_grib.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kerchunk/tests/test_grib.py b/kerchunk/tests/test_grib.py index 80ff9711..e0117ab1 100644 --- a/kerchunk/tests/test_grib.py +++ b/kerchunk/tests/test_grib.py @@ -1,5 +1,6 @@ import os.path +import eccodes import fsspec import numpy as np import pytest @@ -15,6 +16,7 @@ correct_hrrr_subhf_step, ) +eccodes_ver = tuple(int(i) for i in eccodes.__version__.split(".")) cfgrib = pytest.importorskip("cfgrib") here = os.path.dirname(__file__) @@ -49,7 +51,10 @@ def _fetch_first(url): @pytest.mark.parametrize( "url", [ - "s3://noaa-hrrr-bdp-pds/hrrr.20140730/conus/hrrr.t23z.wrfsubhf08.grib2", + pytest.param( + "s3://noaa-hrrr-bdp-pds/hrrr.20140730/conus/hrrr.t23z.wrfsubhf08.grib2", + marks=pytest.mark.skipif(eccodes_ver >= (2, 34), reason="eccodes too new"), + ), "s3://noaa-gefs-pds/gefs.20221011/00/atmos/pgrb2ap5/gep01.t00z.pgrb2a.0p50.f570", "s3://noaa-gefs-retrospective/GEFSv12/reforecast/2000/2000010100/c00/Days:10-16/acpcp_sfc_2000010100_c00.grib2", ],