From c51df64746b196a7e4ddca3cb21b0dce1c5873b2 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Sun, 18 May 2025 17:15:56 -0600 Subject: [PATCH 01/12] check meta now takes meta as input --- tests/test_rexarray.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/tests/test_rexarray.py b/tests/test_rexarray.py index a7afd5a2a..f6802b16d 100644 --- a/tests/test_rexarray.py +++ b/tests/test_rexarray.py @@ -45,10 +45,8 @@ def check_ti(fp, ds, group=None): truth_ti[0:2].astype('int64')) -def check_meta(fp, ds, group=None): +def check_meta(truth_meta, ds): """Check that the meta of the dataset matches expectations""" - with Resource(fp, group=group) as res: - truth_meta = res.meta for col in truth_meta.columns: truth_vals = truth_meta[col].to_numpy() @@ -87,9 +85,12 @@ def check_data(fp, ds, group=None): NSRDB_2013, WAVE_2010]) def test_open_with_xr(fp): """Test basic opening and read operations on various files""" + with Resource(fp) as res: + truth_meta = res.meta + with xr.open_dataset(fp, engine="rex") as ds: check_ti(fp, ds) - check_meta(fp, ds) + check_meta(truth_meta, ds) check_shape(fp, ds) check_data(fp, ds) @@ -151,9 +152,12 @@ def test_ds_attrs(): def test_open_group(): """Test opening a group within the file""" + with Resource(WTK_2012_GRP_FP, group="group") as res: + truth_meta = res.meta + with xr.open_dataset(WTK_2012_GRP_FP, group="group", engine="rex") as ds: check_ti(WTK_2012_GRP_FP, ds, group="group") - check_meta(WTK_2012_GRP_FP, ds, group="group") + check_meta(truth_meta, ds) check_shape(WTK_2012_GRP_FP, ds, group="group") check_data(WTK_2012_GRP_FP, ds, group="group") @@ -271,9 +275,12 @@ def test_coords_dset(): NSRDB_2013, WAVE_2010]) def test_open_data_tree_no_groups(fp): """Test basic opening and read operations for a data tree""" + with Resource(fp) as res: + truth_meta = res.meta + with xr.open_datatree(fp, engine="rex") as ds: check_ti(fp, ds) - check_meta(fp, ds) + check_meta(truth_meta, ds) check_shape(fp, ds) check_data(fp, ds) @@ -284,9 +291,12 @@ def test_open_data_tree_no_groups(fp): reason="DataTrees require Python 3.10+ to run") def test_open_data_tree_with_group(): """Test opening a data tree for a file with a group""" + with Resource(WTK_2012_GRP_FP, group="group") as res: + truth_meta = res.meta + with xr.open_datatree(WTK_2012_GRP_FP, engine="rex") as ds: check_ti(WTK_2012_GRP_FP, ds["group"], group="group") - check_meta(WTK_2012_GRP_FP, ds["group"], group="group") + check_meta(truth_meta, ds["group"]) check_shape(WTK_2012_GRP_FP, ds["group"], group="group") check_data(WTK_2012_GRP_FP, ds["group"], group="group") From ce049c86f57e7af778905406712a4f73c6a4e7d6 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Sun, 18 May 2025 17:19:23 -0600 Subject: [PATCH 02/12] Check ti now takes time index as input --- tests/test_rexarray.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/tests/test_rexarray.py b/tests/test_rexarray.py index f6802b16d..b939dca6a 100644 --- a/tests/test_rexarray.py +++ b/tests/test_rexarray.py @@ -28,10 +28,8 @@ WTK_2010_200M = os.path.join(TESTDATADIR, 'wtk', 'wtk_2010_200m.h5') -def check_ti(fp, ds, group=None): +def check_ti(truth_ti, ds): """Check that the time index of the dataset matches expectations""" - with Resource(fp, group=group) as res: - truth_ti = res.time_index for t_var in ["time_index", "time"]: assert t_var in ds.coords @@ -87,9 +85,10 @@ def test_open_with_xr(fp): """Test basic opening and read operations on various files""" with Resource(fp) as res: truth_meta = res.meta + truth_ti = res.time_index with xr.open_dataset(fp, engine="rex") as ds: - check_ti(fp, ds) + check_ti(truth_ti, ds) check_meta(truth_meta, ds) check_shape(fp, ds) check_data(fp, ds) @@ -154,9 +153,10 @@ def test_open_group(): """Test opening a group within the file""" with Resource(WTK_2012_GRP_FP, group="group") as res: truth_meta = res.meta + truth_ti = res.time_index with xr.open_dataset(WTK_2012_GRP_FP, group="group", engine="rex") as ds: - check_ti(WTK_2012_GRP_FP, ds, group="group") + check_ti(truth_ti, ds) check_meta(truth_meta, ds) check_shape(WTK_2012_GRP_FP, ds, group="group") check_data(WTK_2012_GRP_FP, ds, group="group") @@ -220,6 +220,9 @@ def test_detect_var_dims(): np.ones((8760, 1)), np.float32, attrs={"units": "MW"}) + with Resource(test_file) as res: + truth_ti = res.time_index + with xr.open_dataset(test_file, engine="rex") as ds: assert set(ds.indexes) == {"time", "gid"} @@ -237,7 +240,7 @@ def test_detect_var_dims(): assert np.allclose(ds["latitude"], [41.29]) assert np.allclose(ds["longitude"], [-71.86]) - check_ti(test_file, ds) + check_ti(truth_ti, ds) check_shape(test_file, ds) check_data(test_file, ds) @@ -277,9 +280,10 @@ def test_open_data_tree_no_groups(fp): """Test basic opening and read operations for a data tree""" with Resource(fp) as res: truth_meta = res.meta + truth_ti = res.time_index with xr.open_datatree(fp, engine="rex") as ds: - check_ti(fp, ds) + check_ti(truth_ti, ds) check_meta(truth_meta, ds) check_shape(fp, ds) check_data(fp, ds) @@ -293,9 +297,10 @@ def test_open_data_tree_with_group(): """Test opening a data tree for a file with a group""" with Resource(WTK_2012_GRP_FP, group="group") as res: truth_meta = res.meta + truth_ti = res.time_index with xr.open_datatree(WTK_2012_GRP_FP, engine="rex") as ds: - check_ti(WTK_2012_GRP_FP, ds["group"], group="group") + check_ti(truth_ti, ds["group"]) check_meta(truth_meta, ds["group"]) check_shape(WTK_2012_GRP_FP, ds["group"], group="group") check_data(WTK_2012_GRP_FP, ds["group"], group="group") From 5432d64d750f56621a3d3128da2f42f33b880ea9 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Sun, 18 May 2025 17:22:02 -0600 Subject: [PATCH 03/12] Check shape now takes shape as input --- tests/test_rexarray.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/tests/test_rexarray.py b/tests/test_rexarray.py index b939dca6a..ce9f13946 100644 --- a/tests/test_rexarray.py +++ b/tests/test_rexarray.py @@ -61,11 +61,8 @@ def check_meta(truth_meta, ds): truth_vals[0:2]) -def check_shape(fp, ds, group=None): +def check_shape(truth_shape, ds): """Check that the shape of the dataset matches expectations""" - with Resource(fp, group=group) as res: - truth_shape = res.shape - assert ds.sizes == {'time': truth_shape[0], 'gid': truth_shape[1]} @@ -86,11 +83,12 @@ def test_open_with_xr(fp): with Resource(fp) as res: truth_meta = res.meta truth_ti = res.time_index + truth_shape = res.shape with xr.open_dataset(fp, engine="rex") as ds: check_ti(truth_ti, ds) check_meta(truth_meta, ds) - check_shape(fp, ds) + check_shape(truth_shape, ds) check_data(fp, ds) assert set(ds.indexes) == {"time", "gid"} @@ -154,11 +152,12 @@ def test_open_group(): with Resource(WTK_2012_GRP_FP, group="group") as res: truth_meta = res.meta truth_ti = res.time_index + truth_shape = res.shape with xr.open_dataset(WTK_2012_GRP_FP, group="group", engine="rex") as ds: check_ti(truth_ti, ds) check_meta(truth_meta, ds) - check_shape(WTK_2012_GRP_FP, ds, group="group") + check_shape(truth_shape, ds) check_data(WTK_2012_GRP_FP, ds, group="group") @@ -222,6 +221,7 @@ def test_detect_var_dims(): with Resource(test_file) as res: truth_ti = res.time_index + truth_shape = res.shape with xr.open_dataset(test_file, engine="rex") as ds: assert set(ds.indexes) == {"time", "gid"} @@ -241,7 +241,7 @@ def test_detect_var_dims(): assert np.allclose(ds["longitude"], [-71.86]) check_ti(truth_ti, ds) - check_shape(test_file, ds) + check_shape(truth_shape, ds) check_data(test_file, ds) @@ -281,11 +281,12 @@ def test_open_data_tree_no_groups(fp): with Resource(fp) as res: truth_meta = res.meta truth_ti = res.time_index + truth_shape = res.shape with xr.open_datatree(fp, engine="rex") as ds: check_ti(truth_ti, ds) check_meta(truth_meta, ds) - check_shape(fp, ds) + check_shape(truth_shape, ds) check_data(fp, ds) assert set(ds.indexes) == {"time", "gid"} @@ -298,11 +299,12 @@ def test_open_data_tree_with_group(): with Resource(WTK_2012_GRP_FP, group="group") as res: truth_meta = res.meta truth_ti = res.time_index + truth_shape = res.shape with xr.open_datatree(WTK_2012_GRP_FP, engine="rex") as ds: check_ti(truth_ti, ds["group"]) check_meta(truth_meta, ds["group"]) - check_shape(WTK_2012_GRP_FP, ds["group"], group="group") + check_shape(truth_shape, ds["group"]) check_data(WTK_2012_GRP_FP, ds["group"], group="group") From 41b6d93267232e66f737551d4acaa53fbfe932af Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Sun, 18 May 2025 17:24:57 -0600 Subject: [PATCH 04/12] check data now takes datasets as input --- tests/test_rexarray.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/tests/test_rexarray.py b/tests/test_rexarray.py index ce9f13946..5795f7d57 100644 --- a/tests/test_rexarray.py +++ b/tests/test_rexarray.py @@ -66,12 +66,10 @@ def check_shape(truth_shape, ds): assert ds.sizes == {'time': truth_shape[0], 'gid': truth_shape[1]} -def check_data(fp, ds, group=None): +def check_data(truth_datasets, ds): """Check that the values of the dataset match expectations""" - with Resource(fp, group=group) as res: - datasets = {d_name: res[d_name][:] for d_name in res.resource_datasets} - for name, values in datasets.items(): + for name, values in truth_datasets.items(): assert np.allclose(ds[name], values) @@ -84,12 +82,13 @@ def test_open_with_xr(fp): truth_meta = res.meta truth_ti = res.time_index truth_shape = res.shape + truth_datasets = {name: res[name][:] for name in res.resource_datasets} with xr.open_dataset(fp, engine="rex") as ds: check_ti(truth_ti, ds) check_meta(truth_meta, ds) check_shape(truth_shape, ds) - check_data(fp, ds) + check_data(truth_datasets, ds) assert set(ds.indexes) == {"time", "gid"} @@ -153,12 +152,13 @@ def test_open_group(): truth_meta = res.meta truth_ti = res.time_index truth_shape = res.shape + truth_datasets = {name: res[name][:] for name in res.resource_datasets} with xr.open_dataset(WTK_2012_GRP_FP, group="group", engine="rex") as ds: check_ti(truth_ti, ds) check_meta(truth_meta, ds) check_shape(truth_shape, ds) - check_data(WTK_2012_GRP_FP, ds, group="group") + check_data(truth_datasets, ds) @pytest.mark.parametrize( @@ -222,6 +222,8 @@ def test_detect_var_dims(): with Resource(test_file) as res: truth_ti = res.time_index truth_shape = res.shape + truth_datasets = {name: res[name][:] + for name in res.resource_datasets} with xr.open_dataset(test_file, engine="rex") as ds: assert set(ds.indexes) == {"time", "gid"} @@ -242,7 +244,7 @@ def test_detect_var_dims(): check_ti(truth_ti, ds) check_shape(truth_shape, ds) - check_data(test_file, ds) + check_data(truth_datasets, ds) def test_coords_dset(): @@ -282,12 +284,13 @@ def test_open_data_tree_no_groups(fp): truth_meta = res.meta truth_ti = res.time_index truth_shape = res.shape + truth_datasets = {name: res[name][:] for name in res.resource_datasets} with xr.open_datatree(fp, engine="rex") as ds: check_ti(truth_ti, ds) check_meta(truth_meta, ds) check_shape(truth_shape, ds) - check_data(fp, ds) + check_data(truth_datasets, ds) assert set(ds.indexes) == {"time", "gid"} @@ -300,12 +303,13 @@ def test_open_data_tree_with_group(): truth_meta = res.meta truth_ti = res.time_index truth_shape = res.shape + truth_datasets = {name: res[name][:] for name in res.resource_datasets} with xr.open_datatree(WTK_2012_GRP_FP, engine="rex") as ds: check_ti(truth_ti, ds["group"]) check_meta(truth_meta, ds["group"]) check_shape(truth_shape, ds["group"]) - check_data(WTK_2012_GRP_FP, ds["group"], group="group") + check_data(truth_datasets, ds["group"]) def execute_pytest(capture='all', flags='-rapP'): From 69dc1f06e588e110df30d022338ee48a4d5a3be2 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Sun, 18 May 2025 17:26:02 -0600 Subject: [PATCH 05/12] MF tests now check ti and data too --- tests/test_rexarray.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/tests/test_rexarray.py b/tests/test_rexarray.py index 5795f7d57..f3fcf265c 100644 --- a/tests/test_rexarray.py +++ b/tests/test_rexarray.py @@ -169,22 +169,32 @@ def test_open_group(): def test_open_mf_year(glob_fp): """Test opening a multi-file dataset across years""" with MultiYearResource(glob_fp) as res: + truth_meta = res.meta + truth_ti = res.time_index truth_shape = res.shape + truth_datasets = {name: res[name][:] for name in res.resource_datasets} with xr.open_mfdataset(glob_fp, engine="rex") as ds: - assert ds.sizes == {'time': truth_shape[0], 'gid': truth_shape[1]} + check_meta(truth_meta, ds) + check_ti(truth_ti, ds) + check_shape(truth_shape, ds) + check_data(truth_datasets, ds) def test_open_mf_ds(): """Test opening multi-file dataset across variables""" glob_fp = os.path.join(TESTDATADIR, 'wtk', 'wtk_2010_*m.h5') with MultiFileResource(glob_fp) as res: + truth_meta = res.meta + truth_ti = res.time_index truth_shape = res.shape - datasets = res.resource_datasets + truth_datasets = {name: res[name][:] for name in res.resource_datasets} with xr.open_mfdataset(glob_fp, engine="rex") as ds: - assert ds.sizes == {'time': truth_shape[0], 'gid': truth_shape[1]} - assert all(ds_name in ds for ds_name in datasets) + check_meta(truth_meta, ds) + check_ti(truth_ti, ds) + check_shape(truth_shape, ds) + check_data(truth_datasets, ds) def test_open_drop_var(): From ad68b5fabe137ee74d3f3a8ae5bf28bf9cb5fb39 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Sun, 18 May 2025 17:27:20 -0600 Subject: [PATCH 06/12] Add tests for using compat override --- tests/test_rexarray.py | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/tests/test_rexarray.py b/tests/test_rexarray.py index f3fcf265c..17cc8d55f 100644 --- a/tests/test_rexarray.py +++ b/tests/test_rexarray.py @@ -197,6 +197,44 @@ def test_open_mf_ds(): check_data(truth_datasets, ds) +@pytest.mark.parametrize( + 'glob_fp', + [os.path.join(TESTDATADIR, 'nsrdb', 'ri_100_nsrdb_201*.h5'), + os.path.join(TESTDATADIR, 'sza', 'nsrdb_sza_201*.h5'), + os.path.join(TESTDATADIR, 'wtk', 'ri_100_wtk_201*.h5')]) +def test_open_mf_year_override_compat(glob_fp): + """Test opening a multi-file dataset across years with no compat""" + with MultiYearResource(glob_fp) as res: + truth_meta = res.meta + truth_ti = res.time_index + truth_shape = res.shape + truth_datasets = {name: res[name][:] for name in res.resource_datasets} + + kwargs = {'engine': 'rex', 'compat': 'override', 'coords': 'minimal'} + with xr.open_mfdataset(glob_fp, **kwargs) as ds: + check_meta(truth_meta, ds) + check_ti(truth_ti, ds) + check_shape(truth_shape, ds) + check_data(truth_datasets, ds) + + +def test_open_mf_ds_override_compat(): + """Test opening multi-file dataset across variables""" + glob_fp = os.path.join(TESTDATADIR, 'wtk', 'wtk_2010_*m.h5') + with MultiFileResource(glob_fp) as res: + truth_meta = res.meta + truth_ti = res.time_index + truth_shape = res.shape + truth_datasets = {name: res[name][:] for name in res.resource_datasets} + + kwargs = {'engine': 'rex', 'compat': 'override', 'coords': 'minimal'} + with xr.open_mfdataset(glob_fp, **kwargs) as ds: + check_meta(truth_meta, ds) + check_ti(truth_ti, ds) + check_shape(truth_shape, ds) + check_data(truth_datasets, ds) + + def test_open_drop_var(): """Test dropping of variables when opening a file""" with xr.open_dataset(WTK_2012_FP, engine="rex") as ds: From 6131a1b90db2bbefc6aa13da38ab4a5ca2a58270 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Sun, 18 May 2025 17:35:09 -0600 Subject: [PATCH 07/12] Add warning to `open_mfdataset_hsds` --- rex/external/rexarray.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/rex/external/rexarray.py b/rex/external/rexarray.py index 7aab371ac..f2c8ce97d 100644 --- a/rex/external/rexarray.py +++ b/rex/external/rexarray.py @@ -838,8 +838,14 @@ def open_mfdataset_hsds(paths, **kwargs): for more details on HSDS files. **kwargs Keyword-value argument pairs to pass to :func:`open_mfdataset`. - We strongly recommend specifying ``parallel=True`` and - ``chunks="auto"`` to help with data loading times. + We strongly recommend specifying the following parameters to + help with data loading times: + + - parallel=True + - chunks="auto" + - compat="override" + - coords="minimal" + Returns ------- @@ -852,6 +858,16 @@ def open_mfdataset_hsds(paths, **kwargs): kwargs["engine"] = "rex" kwargs["hsds"] = True + if kwargs.get("compat") != "override": + msg = ("Did not detect 'compat='override' parameter in arguments " + "passed to `rex.open_mfdataset_hsds`. You may see drastically " + "increased loading times since all of the coordinates are " + "loaded and validated by xarray. We strongly recommend passing " + "'compat='override' (and coords='minimal') for increased " + "read performance.") + warnings.warn(msg, UserWarning) + + if isinstance(paths, str): paths = _hsds_glob_to_list(paths) elif isinstance(paths, (list, tuple)): From 106c49e6b45cbbb8331369c2f161f9de661ad024 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Sun, 18 May 2025 17:37:02 -0600 Subject: [PATCH 08/12] Update test to use new kwargs --- tests/h5pyd_tests.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/h5pyd_tests.py b/tests/h5pyd_tests.py index 188154ba3..f2358c686 100644 --- a/tests/h5pyd_tests.py +++ b/tests/h5pyd_tests.py @@ -87,7 +87,9 @@ def test_sup3rcc(): def test_mf_hsds_xr(fps): """Test opening multiple files via HSDS with xarray""" - with open_mfdataset_hsds(fps, parallel=True, chunks="auto") as ds: + kwargs = {"parallel": True, "chunks": "auto", "compat": "override", + "coords": "minimal"} + with open_mfdataset_hsds(fps, **kwargs) as ds: assert ds.sizes == {'time': 17544, 'gid': 2488136} assert str(ds.time_index.isel(time=0).values).startswith("2008") assert str(ds.time_index.isel(time=-1).values).startswith("2009") From 96cc5c37cf96e1d44ef1497b845b07bb64f2a0cf Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Sun, 18 May 2025 17:45:30 -0600 Subject: [PATCH 09/12] Add warnings to README and notebook --- examples/xarray/README.rst | 17 +++++++++++++---- examples/xarray/daily_agg.ipynb | 9 ++++++--- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/examples/xarray/README.rst b/examples/xarray/README.rst index e058fce4e..f117dffeb 100644 --- a/examples/xarray/README.rst +++ b/examples/xarray/README.rst @@ -217,7 +217,16 @@ For more information on using dask with xarray, see `this Date: Sun, 18 May 2025 17:50:33 -0600 Subject: [PATCH 10/12] Minor typo fix --- examples/xarray/daily_agg.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/xarray/daily_agg.ipynb b/examples/xarray/daily_agg.ipynb index 3381d4afe..58b9d76a3 100644 --- a/examples/xarray/daily_agg.ipynb +++ b/examples/xarray/daily_agg.ipynb @@ -25,7 +25,7 @@ "\n", "- Setting up the full aggregate dataset lazily and then doing one `.compute()` call tended to break things. Smaller multiple compute calls seem to work better. \n", "\n", - "- By default, ``xarray`` does not assume that the coordinate data (i.e. meta variables) match across files. As a result, it library will try to load all coordinates and compare them during the concatenation step. This process involves significant I/O and can drastically increase the runtime of ``xr.open_mfdataset`` calls. To override this behavior, we can pass ``compat=\"override\"`` and ``coords=\"minimal\"`` to the xr.open_mfdataset`` call." + "- By default, ``xarray`` does not assume that the coordinate data (i.e. meta variables) match across files. As a result, it library will try to load all coordinates and compare them during the concatenation step. This process involves significant I/O and can drastically increase the runtime of ``xr.open_mfdataset`` calls. To override this behavior, we can pass ``compat=\"override\"`` and ``coords=\"minimal\"`` to the ``xr.open_mfdataset`` call." ] }, { From 444024b2fd4a0b66bf68ae3d967f76a434c920c4 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Sun, 18 May 2025 18:35:16 -0600 Subject: [PATCH 11/12] Minor updates --- examples/xarray/daily_agg.ipynb | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/examples/xarray/daily_agg.ipynb b/examples/xarray/daily_agg.ipynb index 58b9d76a3..bfe6763b1 100644 --- a/examples/xarray/daily_agg.ipynb +++ b/examples/xarray/daily_agg.ipynb @@ -25,20 +25,17 @@ "\n", "- Setting up the full aggregate dataset lazily and then doing one `.compute()` call tended to break things. Smaller multiple compute calls seem to work better. \n", "\n", - "- By default, ``xarray`` does not assume that the coordinate data (i.e. meta variables) match across files. As a result, it library will try to load all coordinates and compare them during the concatenation step. This process involves significant I/O and can drastically increase the runtime of ``xr.open_mfdataset`` calls. To override this behavior, we can pass ``compat=\"override\"`` and ``coords=\"minimal\"`` to the ``xr.open_mfdataset`` call." + "- If you are running on a network file system (NFS) such as on an HPC, try setting the ``local_directory`` parameter of the ``Client`` instance to point to a local scratch to avoid the overhead of network calls for each of the workers." ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "171a1a77-a6e2-4280-8a6b-fc8a286b0895", "metadata": {}, "outputs": [], "source": [ - "import glob\n", "import xarray as xr\n", - "from rex import Resource\n", - "import numpy as np\n", "import pandas as pd\n", "from dask.distributed import Client" ] @@ -663,16 +660,15 @@ "fp_base = '/datasets/sup3rcc/conus_{scenario}/v0.2.2_beta/sup3rcc_conus_{scenario}_{group}_{year}.h5'\n", "fp_pr = fp_base.replace('v0.2.2_beta', 'v0.2.2_beta/daily')\n", "\n", - "kwargs = dict(engine=\"rex\", chunks={'time': 8784, 'gid': 50000},\n", - " compat=\"override\", coords=\"minimal\")\n", - "xds_trh = xr.open_mfdataset(fp_base.format(scenario=scenario, group='trh', year=year), **kwargs)\n", - "xds_wind = xr.open_mfdataset(fp_base.format(scenario=scenario, group='wind', year=year), **kwargs)\n", - "xds_pr = xr.open_mfdataset(fp_pr.format(scenario=scenario, group='pr', year=year), **kwargs)" + "kwargs = dict(engine=\"rex\", chunks={'time': 8784, 'gid': 50000})\n", + "xds_trh = xr.open_dataset(fp_base.format(scenario=scenario, group='trh', year=year), **kwargs)\n", + "xds_wind = xr.open_dataset(fp_base.format(scenario=scenario, group='wind', year=year), **kwargs)\n", + "xds_pr = xr.open_dataset(fp_pr.format(scenario=scenario, group='pr', year=year), **kwargs)" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "d02dc16a-7727-47ee-b265-a6984181bf9e", "metadata": {}, "outputs": [ @@ -687,13 +683,13 @@ ], "source": [ "%%time\n", - "da = xds_trh['temperature_2m'].groupby(\"time.date\").max(\"time\")\n", + "da = xds_trh['temperature_2m'].groupby(\"time.date\").max(dim=\"time\")\n", "ds_out = da.compute().to_dataset()" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "3710d159-370f-48de-8c7b-0c3b30082504", "metadata": {}, "outputs": [ @@ -708,13 +704,13 @@ ], "source": [ "%%time\n", - "da = xds_trh['relativehumidity_2m'].groupby(\"time.date\").min(\"time\")\n", + "da = xds_trh['relativehumidity_2m'].groupby(\"time.date\").min(dim=\"time\")\n", "ds_out['relativehumidity_2m'] = da.compute()" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "0436ece4-c14a-4ca7-926e-6f5dc280ede8", "metadata": {}, "outputs": [ @@ -729,7 +725,7 @@ ], "source": [ "%%time\n", - "da = xds_wind['windspeed_10m'].groupby(\"time.date\").mean(\"time\")\n", + "da = xds_wind['windspeed_10m'].groupby(\"time.date\").mean(dim=\"time\")\n", "ds_out['windspeed_10m'] = da.compute() * 3.6 # m/s to km/hr" ] }, From 1fc05e8e3555a8e42803e57284a3901c358f22e4 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Sun, 18 May 2025 18:49:28 -0600 Subject: [PATCH 12/12] Linter fix --- rex/external/rexarray.py | 1 - 1 file changed, 1 deletion(-) diff --git a/rex/external/rexarray.py b/rex/external/rexarray.py index f2c8ce97d..84806e294 100644 --- a/rex/external/rexarray.py +++ b/rex/external/rexarray.py @@ -867,7 +867,6 @@ def open_mfdataset_hsds(paths, **kwargs): "read performance.") warnings.warn(msg, UserWarning) - if isinstance(paths, str): paths = _hsds_glob_to_list(paths) elif isinstance(paths, (list, tuple)):