Deltares · veenstrajelmer · Jan 14, 2025 · Jan 14, 2025 · Jan 14, 2025 · Jan 14, 2025
diff --git a/dfm_tools/download.py b/dfm_tools/download.py
@@ -226,31 +226,27 @@ def download_CMEMS(varkey,
          # TODO: revert, see https://github.com/Deltares/dfm_tools/issues/1047
          start_datetime = date_min.isoformat(),
          end_datetime = date_max.isoformat(),
+         # optimize chunking for downloading with daily frequency
+         # https://github.com/Deltares/dfm_tools/issues/1033
+         service="arco-geo-series",
+         chunk_size_limit=None,
     )
 
     Path(dir_output).mkdir(parents=True, exist_ok=True)
 
-    if freq is None:
-        date_str = f"{date_min.strftime('%Y%m%d')}_{date_max.strftime('%Y%m%d')}"
+    period_range = pd.period_range(date_min, date_max, freq=freq)
+    for date in period_range:
+        date_str = str(date)
         name_output = f'{file_prefix}{varkey}_{date_str}.nc'
-        output_filename = Path(dir_output,name_output)
+        output_filename = Path(dir_output, name_output)
         if output_filename.is_file() and not overwrite:
-            print(f'"{name_output}" found and overwrite=False, returning.')
-            return
-        print(f'xarray writing netcdf file: {name_output}')
-        dataset.to_netcdf(output_filename)
-    else:
-        period_range = pd.period_range(date_min,date_max,freq=freq)
-        for date in period_range:
-            date_str = str(date)
-            name_output = f'{file_prefix}{varkey}_{date_str}.nc'
-            output_filename = Path(dir_output,name_output)
-            if output_filename.is_file() and not overwrite:
-                print(f'"{name_output}" found and overwrite=False, continuing.')
-                continue
-            dataset_perperiod = dataset.sel(time=slice(date_str, date_str))
-            print(f'xarray writing netcdf file: {name_output}')
-            dataset_perperiod.to_netcdf(output_filename)
+            print(f'"{name_output}" found and overwrite=False, continuing.')
+            continue
+        dataset_perperiod = dataset.sel(time=slice(date_str, date_str))
+        print(f'xarray writing netcdf file: {name_output}: ',end='')
+        dtstart = pd.Timestamp.now()
+        dataset_perperiod.to_netcdf(output_filename)
+        print(f'{(pd.Timestamp.now()-dtstart).total_seconds():.2f} sec')
 
 
 def copernicusmarine_get_product(date_min, date_max, vartype):
@@ -356,7 +352,7 @@ def copernicusmarine_get_dataset_id(varkey, date_min, date_max):
 def copernicusmarine_get_buffer(dataset_id):
     ds = copernicusmarine.open_dataset(dataset_id=dataset_id)
     try:
-        resolution = ds.latitude.attrs["step"]
+        resolution = ds.latitude.diff(dim='latitude').to_numpy().mean()
         buffer = 2 * resolution
     except (AttributeError, KeyError, TypeError):
         print("failed to automatically derive a buffer from the dataset, using buffer=0.5")

diff --git a/docs/whats-new.md b/docs/whats-new.md
@@ -2,6 +2,10 @@
 
 ## UNRELEASED
 
+### Feat
+- updated to copernicusmarine v2 in [#1046](https://github.com/Deltares/dfm_tools/pull/1046)
+- optimized CMEMS download performance in [#1049](https://github.com/Deltares/dfm_tools/pull/1049)
+
 
 ## 0.31.0 (2024-10-28)
 

diff --git a/tests/test_download.py b/tests/test_download.py
@@ -15,6 +15,7 @@
                                 cds_remove_credentials_raise,
                                 copernicusmarine_credentials,
                                 copernicusmarine_get_buffer,
+                                copernicusmarine_get_dataset_id,
                                 )
 import dfm_tools as dfmt
 import xarray as xr
@@ -221,11 +222,36 @@ def test_copernicusmarine_credentials():
 
 @pytest.mark.requiressecrets
 @pytest.mark.unittest
-def test_copernicusmarine_get_buffer_notfound():
+def test_copernicusmarine_get_buffer():
+    dataset_id = 'cmems_mod_glo_phy_my_0.083deg_P1D-m'
+    buffer = copernicusmarine_get_buffer(dataset_id)
+    assert np.isclose(buffer, 0.16666666)
+
     dataset_id = 'cmems_obs-oc_glo_bgc-transp_my_l3-multi-4km_P1D'
     buffer = copernicusmarine_get_buffer(dataset_id)
-    assert np.isclose(buffer, 0.5)
+    assert np.isclose(buffer, 0.0833333358168602)
+
+
+@pytest.mark.requiressecrets
+@pytest.mark.unittest
+def test_copernicusmarine_get_dataset_id():
+    date_min = pd.Timestamp('2010-01-01')
+    date_max = pd.Timestamp('2010-01-02')
+    date_args = dict(date_min=date_min, date_max=date_max)
+    dataset_id = copernicusmarine_get_dataset_id(varkey='bottomT', **date_args)
+    assert dataset_id == 'cmems_mod_glo_phy_my_0.083deg_P1D-m'
+    dataset_id = copernicusmarine_get_dataset_id(varkey='no3', **date_args)
+    assert dataset_id == 'cmems_mod_glo_bgc_my_0.25deg_P1D-m'
 
+    date_min = pd.Timestamp.today()
+    date_max = pd.Timestamp.today() + pd.Timedelta(days=1)
+    date_args = dict(date_min=date_min, date_max=date_max)
+
+    dataset_id = copernicusmarine_get_dataset_id(varkey='tob', **date_args)
+    assert dataset_id == 'cmems_mod_glo_phy_anfc_0.083deg_P1D-m'
+    dataset_id = copernicusmarine_get_dataset_id(varkey='no3', **date_args)
+    assert dataset_id == 'cmems_mod_glo_bgc-nut_anfc_0.25deg_P1D-m'
+
 
 @pytest.mark.requiressecrets
 @pytest.mark.unittest
@@ -234,11 +260,16 @@ def test_download_cmems_my(tmp_path):
     date_max = '2010-01-02'
     longitude_min, longitude_max, latitude_min, latitude_max =    2,   3,  51, 52 #test domain
     varlist_cmems = ['bottomT','no3'] # avaliable variables differ per product, examples are ['bottomT','mlotst','siconc','sithick','so','thetao','uo','vo','usi','vsi','zos','no3']. More info on https://data.marine.copernicus.eu/products
+    dataset_id_dict = {'bottomT':'cmems_mod_glo_phy_my_0.083deg_P1D-m',
+                       'no3':'cmems_mod_glo_bgc_my_0.25deg_P1D-m'}
+    file_prefix = 'cmems_'
     for varkey in varlist_cmems:
-        file_prefix = 'cmems_'
+        dataset_id = dataset_id_dict[varkey]
         dfmt.download_CMEMS(varkey=varkey,
                             longitude_min=longitude_min, longitude_max=longitude_max, latitude_min=latitude_min, latitude_max=latitude_max,
                             date_min=date_min, date_max=date_max,
+                            # speed up tests by supplying datset_id and buffer
+                            dataset_id=dataset_id, buffer=0,
                             dir_output=tmp_path, file_prefix=file_prefix, overwrite=True)
 
     # assert downloaded files
@@ -256,11 +287,16 @@ def test_download_cmems_forecast(tmp_path):
     date_max = pd.Timestamp.today() + pd.Timedelta(days=1)
     longitude_min, longitude_max, latitude_min, latitude_max =    2,   3,  51, 52 #test domain
     varlist_cmems = ['tob','no3'] # avaliable variables differ per product, examples are ['bottomT','mlotst','siconc','sithick','so','thetao','uo','vo','usi','vsi','zos','no3']. More info on https://data.marine.copernicus.eu/products
+    dataset_id_dict = {'tob':'cmems_mod_glo_phy_anfc_0.083deg_P1D-m',
+                       'no3':'cmems_mod_glo_bgc-nut_anfc_0.25deg_P1D-m'}
+    file_prefix = 'cmems_'
     for varkey in varlist_cmems:
-        file_prefix = 'cmems_'
+        dataset_id = dataset_id_dict[varkey]
         dfmt.download_CMEMS(varkey=varkey,
                             longitude_min=longitude_min, longitude_max=longitude_max, latitude_min=latitude_min, latitude_max=latitude_max,
                             date_min=date_min, date_max=date_max,
+                            # speed up tests by supplying datset_id and buffer
+                            dataset_id=dataset_id, buffer=0,
                             dir_output=tmp_path, file_prefix=file_prefix, overwrite=True)
 
     # assert downloaded files