653 udsugridto netcdf writes dataset that cannot be depth sliced afterwards (#662)

veenstrajelmer · web-flow · commit e94ef852ad62 · 2023-11-17T01:27:19.000+01:00
* added remove_nan_fillvalue_attrs

* added testcase

* updated whatsnew
diff --git a/dfm_tools/xugrid_helpers.py b/dfm_tools/xugrid_helpers.py
@@ -141,6 +141,24 @@ def decode_default_fillvals(ds):
     return ds
 
 
+def remove_nan_fillvalue_attrs(ds : (xr.Dataset, xu.UgridDataset)):
+    """
+    xarray writes {"_FillValue": np.nan} to encoding for variables without _FillValue attribute.
+    Remove these again upon reading to avoid issues.
+    """
+    if isinstance(ds,xu.UgridDataset):
+        ds = ds.obj
+    
+    count = 0
+    for varn in ds.variables:
+        if '_FillValue' in ds[varn].encoding:
+            if np.isnan(ds[varn].encoding['_FillValue']):
+                ds[varn].encoding.pop('_FillValue')
+                count += 1
+    if count > 0:
+        print(f"[{count} nan fillvalue attrs removed]", end="")
+
+
 def open_partitioned_dataset(file_nc, decode_fillvals=False, remove_edges=True, remove_ghost=True, **kwargs): 
     """
     using xugrid to read and merge partitions, with some additional features (remaning old layerdim, timings, set zcc/zw as data_vars)
@@ -200,6 +218,7 @@ def open_partitioned_dataset(file_nc, decode_fillvals=False, remove_edges=True,
             print('[mapformat1] ',end='')
             #for mapformat1 mapfiles: merge different face dimensions (rename nFlowElem to nNetElem) to make sure the dataset topology is correct
             ds = ds.rename({'nFlowElem':'nNetElem'})
+        remove_nan_fillvalue_attrs(ds)
         uds = xu.core.wrap.UgridDataset(ds)
         if remove_ghost: #TODO: this makes it way slower (at least for GTSM, although merging seems faster), but is necessary since values on overlapping cells are not always identical (eg in case of Venice ucmag)
             uds = remove_ghostcells(uds, file_nc_one)
diff --git a/docs/whats-new.md b/docs/whats-new.md
@@ -1,6 +1,7 @@
 ## UNRELEASED
 
 ### Feat
+- pop np.nan `_FillValue` attrs in `dfmt.open_partitioned_dataset()` by [@veenstrajelmer](https://github.com/veenstrajelmer) in [#662](https://github.com/Deltares/dfm_tools/pull/662)
 - interpolation of edge/node variables to faces with `dfmt.uda_to_faces()` (deprecates `dfmt.uda_edges_to_faces()`) by [@veenstrajelmer](https://github.com/veenstrajelmer) in [#651](https://github.com/Deltares/dfm_tools/pull/651) and [#644](https://github.com/Deltares/dfm_tools/pull/644)
 
 ### Fix
diff --git a/tests/test_xugrid_helpers.py b/tests/test_xugrid_helpers.py
@@ -30,6 +30,42 @@ def test_remove_unassociated_edges():
     assert ds2_edgedimsize == ds_edgedimsize-1
 
 
+@pytest.mark.unittest
+def test_remove_nan_fillvalue_attrs():
+    """
+    xarray writes {"_FillValue": np.nan} to encoding for variables without _FillValue attribute.
+    This test checks if that is still the case and checks if dfmt.open_partitioned_dataset removes them.
+    """
+    file_nc = dfmt.data.fm_curvedbend_map(return_filepath=True)
+    file_out = "temp_fillvals_map.nc"
+    ds_org = xr.open_dataset(file_nc)
+    ds_org.to_netcdf(file_out)
+    
+    ds_out_xr = xr.open_dataset(file_out)
+    ds_out_dfmt = dfmt.open_partitioned_dataset(file_out, chunks="auto")
+    
+    print("nan fillvalue attrs in dataset written by xugrid/xarray")
+    ds = ds_out_xr
+    count_xr = 0
+    for varn in ds.variables:
+        if '_FillValue' in ds[varn].encoding:
+            if np.isnan(ds[varn].encoding['_FillValue']):
+                print(varn, ds[varn].encoding['_FillValue'])
+                count_xr += 1
+    
+    print("nan fillvalue attrs in dataset written by xugrid/xarray, read with dfm_tools")
+    ds = ds_out_dfmt
+    count_dfmt = 0
+    for varn in ds.variables:
+        if '_FillValue' in ds[varn].encoding:
+            if np.isnan(ds[varn].encoding['_FillValue']):
+                print(varn, ds[varn].encoding['_FillValue'])
+                count_dfmt += 1
+    
+    assert count_xr == 10
+    assert count_dfmt == 0
+
+
 @pytest.mark.unittest
 def test_get_uds_isgeographic():
     file_nc = dfmt.data.fm_grevelingen_map(return_filepath=True) #zlayer