diff --git a/ESDC/README.md b/ESDC/README.md index 0084c74..934d81e 100644 --- a/ESDC/README.md +++ b/ESDC/README.md @@ -188,8 +188,8 @@ inputs-preprocess/FLUXCOM/fluxcom-data-cube-8d-0.25deg.py # Resample to 0.25 deg # GOSIF: Preprocessing inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-part1.py # Convert .tif to .zarr inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-part2.py # Concatenate .zarr files -inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-0.25deg.py # Resample to 0.25 degrees inputs-preprocess/SIF/GOSIF/sif-gosif-metadata.py # Add initial metadata +inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-0.25deg.py # Resample to 0.25 degrees # GOME-2 JJ Method: Preprocessing inputs-preprocess/SIF/GOME2/sif-gome2-JJ-data-cube.py # Concatenate .nc files diff --git a/ESDC/inputs-collect/download-GOME2-SIF.py b/ESDC/inputs-collect/download-GOME2-SIF.py index 65328c5..6ff0d1c 100644 --- a/ESDC/inputs-collect/download-GOME2-SIF.py +++ b/ESDC/inputs-collect/download-GOME2-SIF.py @@ -1,5 +1,6 @@ import requests import numpy as np +import os URL = "http://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/ECOCLIM/Downscaled-GOME2-SIF/v2.0/" # pathOut = "/net/projects/deep_esdl/data/GOME2-SIF/data/" @@ -18,4 +19,5 @@ file_to_download = f"GOME_{rm}_dcSIF_005deg_8day_{year}.nc" print(f"Downloading {file_to_download}") response = requests.get(URL + file_to_download) - open(pathOut + file_to_download, "wb").write(response.content) \ No newline at end of file + file_path = os.path.join(pathOut, file_to_download) + open(file_path, "wb").write(response.content) diff --git a/ESDC/inputs-collect/download-GOSIF.py b/ESDC/inputs-collect/download-GOSIF.py index 0df6434..b629405 100644 --- a/ESDC/inputs-collect/download-GOSIF.py +++ b/ESDC/inputs-collect/download-GOSIF.py @@ -2,6 +2,7 @@ import requests from urllib.request import Request, urlopen import re +import os from os.path import exists URL = "http://data.globalecology.unh.edu/data/GOSIF_v2/8day/" @@ -24,9 +25,10 @@ link = link.get('href') if link.endswith(".tif.gz"): file_to_download = link.split("/")[-1] - if not exists(pathOut + file_to_download): + file_path = os.path.join(pathOut, file_to_download) + if not exists(file_path): print(f"Downloading {file_to_download}") response = requests.get(URL + file_to_download) - open(pathOut + file_to_download, "wb").write(response.content) + open(file_path, "wb").write(response.content) else: - pass \ No newline at end of file + pass diff --git a/ESDC/inputs-collect/extract-gz-gosif.py b/ESDC/inputs-collect/extract-gz-gosif.py index afe94a9..85871ff 100644 --- a/ESDC/inputs-collect/extract-gz-gosif.py +++ b/ESDC/inputs-collect/extract-gz-gosif.py @@ -1,16 +1,15 @@ import gzip import shutil import glob +import os from tqdm import tqdm pathOut = "~/data/SIF/GOSIF/source" pathOut = os.path.expanduser(pathOut) - files = glob.glob(f"{pathOut}/*") files.sort() - for file in tqdm(files): with gzip.open(file, 'rb') as f_in: with open(file.replace(".gz",""), 'wb') as f_out: - shutil.copyfileobj(f_in, f_out) \ No newline at end of file + shutil.copyfileobj(f_in, f_out) diff --git a/ESDC/inputs-preprocess/CCI/aerosol/cci-aod550-8d-0.083deg.py b/ESDC/inputs-preprocess/CCI/aerosol/cci-aod550-8d-0.083deg.py index 2c5663e..9ebb0a4 100644 --- a/ESDC/inputs-preprocess/CCI/aerosol/cci-aod550-8d-0.083deg.py +++ b/ESDC/inputs-preprocess/CCI/aerosol/cci-aod550-8d-0.083deg.py @@ -1,49 +1,53 @@ -from xcube.core.store import find_data_store_extensions -from xcube.core.store import get_data_store_params_schema -from xcube.core.store import new_data_store +import os -import shapely.geometry -from IPython.display import JSON -import matplotlib.pyplot as plt import numpy as np -from tqdm import tqdm import xarray as xr +from tqdm import tqdm +from xcube.core.store import new_data_store + +pathOut = "~/data/CCI/aerosol/preprocess" +pathOut = os.path.expanduser(pathOut) print("Reading") store = new_data_store('cciodp') dataset = store.open_data( - 'esacci.AEROSOL.day.L3C.AER_PRODUCTS.AATSR.Envisat.SU.4-3.r1', + 'esacci.AEROSOL.day.L3C.AER_PRODUCTS.AATSR.Envisat.SU.4-3.r1', variable_names=['AOD550_mean'], - time_range=['2002-05-20','2012-04-08'] + time_range=['2002-05-20', '2012-04-08'] ) -dates_2002 = np.arange(np.datetime64("2002-05-21"), np.datetime64("2003-01-01"), np.timedelta64(8, "D")).astype("datetime64[ns]") +dates_2002 = np.arange(np.datetime64("2002-05-21"), np.datetime64("2003-01-01"), + np.timedelta64(8, "D")).astype("datetime64[ns]") last_year = 2012 first_year = 2002 -years = np.arange(first_year,last_year + 1) +years = np.arange(first_year, last_year + 1) + -def resample_weekly(ds,year): +def resample_weekly(ds, year): keep_attrs = ds.time.attrs - ds = ds.sel(time=slice(f"{year}-01-01",f"{year}-12-31")).resample(time="8D").mean() - ds['time'] = ds.time + np.timedelta64(4,"D") + ds = ds.sel(time=slice(f"{year}-01-01", f"{year}-12-31")).resample( + time="8D").mean() + ds['time'] = ds.time + np.timedelta64(4, "D") ds.time.attrs = keep_attrs - if year==2002: + if year == 2002: ds = ds.interp(coords=dict(time=dates_2002)) return ds + print("Resampling in time") -dataset_8d = [resample_weekly(dataset,year) for year in tqdm(years)] -dataset_8d = xr.concat(dataset_8d,dim="time") +dataset_8d = [resample_weekly(dataset, year) for year in tqdm(years)] +dataset_8d = xr.concat(dataset_8d, dim="time") new_lats = np.load("lat.npy") new_lons = np.load("lon.npy") print("Resampling in space") -dataset_8d = dataset_8d.interp(coords=dict(lat=new_lats,lon=new_lons),method="nearest") -dataset_8d = dataset_8d.chunk(dict(time=512,lat=128,lon=128)) +dataset_8d = dataset_8d.interp(coords=dict(lat=new_lats, lon=new_lons), + method="nearest") +dataset_8d = dataset_8d.chunk(dict(time=512, lat=128, lon=128)) print("Saving") -dataset_8d.to_zarr("~/data/cci-aod550-8d-0.083deg-512x128x128.zarr") +dataset_8d.to_zarr(f"{pathOut}/cci-aod550-8d-0.083deg-512x128x128.zarr") diff --git a/ESDC/inputs-preprocess/CCI/aerosol/cci-aod550-8d-0.25deg.py b/ESDC/inputs-preprocess/CCI/aerosol/cci-aod550-8d-0.25deg.py index f860cc3..2fb3574 100644 --- a/ESDC/inputs-preprocess/CCI/aerosol/cci-aod550-8d-0.25deg.py +++ b/ESDC/inputs-preprocess/CCI/aerosol/cci-aod550-8d-0.25deg.py @@ -1,13 +1,9 @@ -from xcube.core.store import find_data_store_extensions -from xcube.core.store import get_data_store_params_schema -from xcube.core.store import new_data_store +import os -import shapely.geometry -from IPython.display import JSON -import matplotlib.pyplot as plt import numpy as np -from tqdm import tqdm import xarray as xr +from tqdm import tqdm +from xcube.core.store import new_data_store pathOut = "~/data/CCI/aerosol/preprocess" pathOut = os.path.expanduser(pathOut) @@ -19,37 +15,42 @@ store = new_data_store('cciodp') dataset = store.open_data( - 'esacci.AEROSOL.day.L3C.AER_PRODUCTS.AATSR.Envisat.SU.4-3.r1', + 'esacci.AEROSOL.day.L3C.AER_PRODUCTS.AATSR.Envisat.SU.4-3.r1', variable_names=['AOD550_mean'], - time_range=['2002-05-20','2012-04-08'] + time_range=['2002-05-20', '2012-04-08'] ) -dates_2002 = np.arange(np.datetime64("2002-05-21"), np.datetime64("2003-01-01"), np.timedelta64(8, "D")).astype("datetime64[ns]") +dates_2002 = np.arange(np.datetime64("2002-05-21"), np.datetime64("2003-01-01"), + np.timedelta64(8, "D")).astype("datetime64[ns]") last_year = 2012 first_year = 2002 -years = np.arange(first_year,last_year + 1) +years = np.arange(first_year, last_year + 1) + -def resample_weekly(ds,year): +def resample_weekly(ds, year): keep_attrs = ds.time.attrs - ds = ds.sel(time=slice(f"{year}-01-01",f"{year}-12-31")).resample(time="8D").mean() - ds['time'] = ds.time + np.timedelta64(4,"D") + ds = ds.sel(time=slice(f"{year}-01-01", f"{year}-12-31")).resample( + time="8D").mean() + ds['time'] = ds.time + np.timedelta64(4, "D") ds.time.attrs = keep_attrs - if year==2002: + if year == 2002: ds = ds.interp(coords=dict(time=dates_2002)) return ds + print("Resampling in time") -dataset_8d = [resample_weekly(dataset,year) for year in tqdm(years)] -dataset_8d = xr.concat(dataset_8d,dim="time") +dataset_8d = [resample_weekly(dataset, year) for year in tqdm(years)] +dataset_8d = xr.concat(dataset_8d, dim="time") -new_lats = np.arange(-89.875,90,0.25) -new_lons = np.arange(-179.875,180,0.25) +new_lats = np.arange(-89.875, 90, 0.25) +new_lons = np.arange(-179.875, 180, 0.25) print("Resampling in space") -dataset_8d = dataset_8d.interp(coords=dict(lat=new_lats,lon=new_lons),method="nearest") -dataset_8d = dataset_8d.chunk(dict(time=256,lat=128,lon=128)) +dataset_8d = dataset_8d.interp(coords=dict(lat=new_lats, lon=new_lons), + method="nearest") +dataset_8d = dataset_8d.chunk(dict(time=256, lat=128, lon=128)) print("Saving") -dataset_8d.to_zarr(f"{pathOut}/cci-aod550-8d-0.25deg-256x128x128.zarr") \ No newline at end of file +dataset_8d.to_zarr(f"{pathOut}/cci-aod550-8d-0.25deg-256x128x128.zarr") diff --git a/ESDC/inputs-preprocess/CCI/aerosol/cci-aod550-metadata-0.0833deg.py b/ESDC/inputs-preprocess/CCI/aerosol/cci-aod550-metadata-0.0833deg.py index 3d20a5c..9f72486 100644 --- a/ESDC/inputs-preprocess/CCI/aerosol/cci-aod550-metadata-0.0833deg.py +++ b/ESDC/inputs-preprocess/CCI/aerosol/cci-aod550-metadata-0.0833deg.py @@ -1,22 +1,30 @@ -from tqdm import tqdm +import os from datetime import datetime -import yaml -import rioxarray + import xarray as xr -import numpy as np +import yaml + +pathIn = "~/data/CCI/aerosol/preprocess" +pathIn = os.path.expanduser(pathIn) + +pathOut = "~/data/CCI/aerosol/output" +pathOut = os.path.expanduser(pathOut) +if not os.path.exists(pathOut): + os.makedirs(pathOut) -with open("cci-aod550-metadata-0.0833deg.yaml", "r") as stream: +with open("inputs-preprocess/CCI/aerosol/cci-aod550-metadata-0.0833deg.yaml", + "r") as stream: try: metadata = yaml.safe_load(stream) except yaml.YAMLError as exc: print(exc) -datacube = xr.open_zarr("/home/davemlz/data/cci-aod550-8d-0.083deg-512x128x128.zarr") +datacube = xr.open_zarr(f"{pathIn}/cci-aod550-8d-0.083deg-512x128x128.zarr") datacube = datacube.rio.write_crs( "epsg:4326", grid_mapping_name="crs" -).reset_coords() +).reset_coords() del datacube.crs.attrs["spatial_ref"] datacube.attrs = metadata["global"] @@ -44,4 +52,4 @@ sorted({**datacube.attrs, **additional_attrs}.items()) ) -datacube.to_zarr("/home/davemlz/data/metadata/cci-aod550-8d-0.083deg-512x128x128.zarr") +datacube.to_zarr(f"{pathOut}/cci-aod550-8d-0.083deg-512x128x128.zarr") diff --git a/ESDC/inputs-preprocess/CCI/aerosol/cci-aod550-metadata.py b/ESDC/inputs-preprocess/CCI/aerosol/cci-aod550-metadata.py index e3b0567..2425ea8 100644 --- a/ESDC/inputs-preprocess/CCI/aerosol/cci-aod550-metadata.py +++ b/ESDC/inputs-preprocess/CCI/aerosol/cci-aod550-metadata.py @@ -1,10 +1,8 @@ -from tqdm import tqdm +import os from datetime import datetime -import yaml -import rioxarray -import xarray as xr -import numpy as np +import xarray as xr +import yaml pathIn = "~/data/CCI/aerosol/preprocess" pathIn = os.path.expanduser(pathIn) @@ -15,7 +13,8 @@ if not os.path.exists(pathOut): os.makedirs(pathOut) -with open("cci-aod550-metadata.yaml", "r") as stream: +with open("inputs-preprocess/CCI/aerosol/cci-aod550-metadata.yaml", + "r") as stream: try: metadata = yaml.safe_load(stream) except yaml.YAMLError as exc: @@ -25,7 +24,7 @@ datacube = datacube.rio.write_crs( "epsg:4326", grid_mapping_name="crs" -).reset_coords() +).reset_coords() del datacube.crs.attrs["spatial_ref"] datacube.attrs = metadata["global"] diff --git a/ESDC/inputs-preprocess/CCI/cloud/cci-cloud-8d-0.083deg.py b/ESDC/inputs-preprocess/CCI/cloud/cci-cloud-8d-0.083deg.py index 6799f29..113f1bc 100644 --- a/ESDC/inputs-preprocess/CCI/cloud/cci-cloud-8d-0.083deg.py +++ b/ESDC/inputs-preprocess/CCI/cloud/cci-cloud-8d-0.083deg.py @@ -1,42 +1,49 @@ -from xcube.core.store import find_data_store_extensions -from xcube.core.store import get_data_store_params_schema -from xcube.core.store import new_data_store +import os -import shapely.geometry -from IPython.display import JSON -import matplotlib.pyplot as plt import numpy as np -from tqdm import tqdm -import xarray as xr +from xcube.core.store import new_data_store + +pathOut = "~/data/CCI/cloud/preprocess" +pathOut = os.path.expanduser(pathOut) + +if not os.path.exists(pathOut): + os.makedirs(pathOut) print("Reading") store = new_data_store('cciodp') dataset = store.open_data( - 'esacci.CLOUD.mon.L3C.CLD_PRODUCTS.MODIS.Terra.MODIS_TERRA.2-0.r1', - variable_names=['cot','cth','ctt'], - time_range=["2000-02-01","2014-12-31"] + 'esacci.CLOUD.mon.L3C.CLD_PRODUCTS.MODIS.Terra.MODIS_TERRA.2-0.r1', + variable_names=['cot', 'cth', 'ctt'], + time_range=["2000-02-01", "2014-12-31"] ) -dataset = dataset.drop([x for x in list(dataset.variables) if x not in ['time','lat','lon','cot','cth','ctt']]) +dataset = dataset.drop([x for x in list(dataset.variables) if + x not in ['time', 'lat', 'lon', 'cot', 'cth', 'ctt']]) + +dataset = dataset.chunk(dict(time=-1, lat=64, lon=64)) -dataset = dataset.chunk(dict(time=-1,lat=64,lon=64)) def get_dates_8d(year): - return np.arange(np.datetime64(f"{year}-01-05"), np.datetime64(f"{year+1}-01-01"), np.timedelta64(8, "D")).astype("datetime64[ns]") + return np.arange(np.datetime64(f"{year}-01-05"), + np.datetime64(f"{year + 1}-01-01"), + np.timedelta64(8, "D")).astype("datetime64[ns]") + -dates = np.concatenate([get_dates_8d(year) for year in np.arange(2000,2015)]) -dates = dates[(dates >= np.datetime64("2000-02-15")) & (dates <= np.datetime64("2014-12-16"))] +dates = np.concatenate([get_dates_8d(year) for year in np.arange(2000, 2015)]) +dates = dates[(dates >= np.datetime64("2000-02-15")) & ( + dates <= np.datetime64("2014-12-16"))] print("Resampling in time") -dataset_8d = dataset.interp(coords=dict(time=dates),method="nearest") +dataset_8d = dataset.interp(coords=dict(time=dates), method="nearest") new_lats = np.load("lat.npy") new_lons = np.load("lon.npy") print("Resampling in space") -dataset_8d = dataset_8d.interp(coords=dict(lat=new_lats,lon=new_lons),method="nearest") -dataset_8d = dataset_8d.chunk(dict(time=256,lat=128,lon=128)) +dataset_8d = dataset_8d.interp(coords=dict(lat=new_lats, lon=new_lons), + method="nearest") +dataset_8d = dataset_8d.chunk(dict(time=256, lat=128, lon=128)) print("Saving") -dataset_8d.to_zarr("/net/scratch/dmontero/CCI/cci-cloud-8d-0.083deg-256x128x128.zarr") +dataset_8d.to_zarr(f"{pathOut}/cci-cloud-8d-0.083deg-256x128x128.zarr") diff --git a/ESDC/inputs-preprocess/CCI/cloud/cci-cloud-8d-0.25deg.py b/ESDC/inputs-preprocess/CCI/cloud/cci-cloud-8d-0.25deg.py index 2fc0eae..653c23f 100644 --- a/ESDC/inputs-preprocess/CCI/cloud/cci-cloud-8d-0.25deg.py +++ b/ESDC/inputs-preprocess/CCI/cloud/cci-cloud-8d-0.25deg.py @@ -1,14 +1,7 @@ -from xcube.core.store import find_data_store_extensions -from xcube.core.store import get_data_store_params_schema -from xcube.core.store import new_data_store +import os -import shapely.geometry -from IPython.display import JSON -import matplotlib.pyplot as plt import numpy as np -from tqdm import tqdm -import xarray as xr -import os +from xcube.core.store import new_data_store pathOut = "~/data/CCI/cloud/preprocess" pathOut = os.path.expanduser(pathOut) @@ -20,30 +13,38 @@ store = new_data_store('cciodp') dataset = store.open_data( - 'esacci.CLOUD.mon.L3C.CLD_PRODUCTS.MODIS.Terra.MODIS_TERRA.2-0.r1', - variable_names=['cot','cth','ctt'], - time_range=["2000-02-01","2014-12-31"] + 'esacci.CLOUD.mon.L3C.CLD_PRODUCTS.MODIS.Terra.MODIS_TERRA.2-0.r1', + variable_names=['cot', 'cth', 'ctt'], + time_range=["2000-02-01", "2014-12-31"] ) -dataset = dataset.drop([x for x in list(dataset.variables) if x not in ['time','lat','lon','cot','cth','ctt']]) +dataset = dataset.drop([x for x in list(dataset.variables) if + x not in ['time', 'lat', 'lon', 'cot', 'cth', 'ctt']]) + +dataset = dataset.chunk(dict(time=-1, lat=64, lon=64)) -dataset = dataset.chunk(dict(time=-1,lat=64,lon=64)) def get_dates_8d(year): - return np.arange(np.datetime64(f"{year}-01-05"), np.datetime64(f"{year+1}-01-01"), np.timedelta64(8, "D")).astype("datetime64[ns]") + return np.arange(np.datetime64(f"{year}-01-05"), + np.datetime64(f"{year + 1}-01-01"), + np.timedelta64(8, "D")).astype("datetime64[ns]") + -dates = np.concatenate([get_dates_8d(year) for year in np.arange(2000,2015)]) -dates = dates[(dates >= np.datetime64("2000-02-15")) & (dates <= np.datetime64("2014-12-16"))] +dates = np.concatenate([get_dates_8d(year) for year in np.arange(2000, 2015)]) +dates = dates[(dates >= np.datetime64("2000-02-15")) & ( + dates <= np.datetime64("2014-12-16"))] print("Resampling in time") -dataset_8d = dataset.interp(coords=dict(time=dates),method="nearest") +dataset_8d = dataset.interp(coords=dict(time=dates), method="nearest") -new_lats = np.arange(-89.875,90,0.25) -new_lons = np.arange(-179.875,180,0.25) +new_lats = np.arange(-89.875, 90, 0.25) +new_lons = np.arange(-179.875, 180, 0.25) print("Resampling in space") -dataset_8d = dataset_8d.interp(coords=dict(lat=new_lats,lon=new_lons),method="nearest",kwargs={"fill_value": "extrapolate"}) -dataset_8d = dataset_8d.chunk(dict(time=256,lat=128,lon=128)) +dataset_8d = dataset_8d.interp(coords=dict(lat=new_lats, lon=new_lons), + method="nearest", + kwargs={"fill_value": "extrapolate"}) +dataset_8d = dataset_8d.chunk(dict(time=256, lat=128, lon=128)) print("Saving") -dataset_8d.to_zarr(f"{pathOut}/cci-cloud-8d-0.25deg-256x128x128.zarr") \ No newline at end of file +dataset_8d.to_zarr(f"{pathOut}/cci-cloud-8d-0.25deg-256x128x128.zarr") diff --git a/ESDC/inputs-preprocess/CCI/cloud/cci-cloud-metadata-0.0833deg.py b/ESDC/inputs-preprocess/CCI/cloud/cci-cloud-metadata-0.0833deg.py index 02d1fa5..fea92f3 100644 --- a/ESDC/inputs-preprocess/CCI/cloud/cci-cloud-metadata-0.0833deg.py +++ b/ESDC/inputs-preprocess/CCI/cloud/cci-cloud-metadata-0.0833deg.py @@ -1,22 +1,30 @@ -from tqdm import tqdm +import os from datetime import datetime -import yaml -import rioxarray + import xarray as xr -import numpy as np +import yaml + +pathIn = "~/data/CCI/cloud/preprocess" +pathIn = os.path.expanduser(pathIn) + +pathOut = "~/data/CCI/cloud/output" +pathOut = os.path.expanduser(pathOut) +if not os.path.exists(pathOut): + os.makedirs(pathOut) -with open("cci-cloud-metadata-0.0833deg.yaml", "r") as stream: +with open("inputs-preprocess/CCI/cloud/cci-cloud-metadata-0.0833deg.yaml", + "r") as stream: try: metadata = yaml.safe_load(stream) except yaml.YAMLError as exc: print(exc) -datacube = xr.open_zarr("/net/scratch/dmontero/CCI/cci-cloud-8d-0.083deg-256x128x128.zarr") +datacube = xr.open_zarr(f"{pathIn}/cci-cloud-8d-0.083deg-256x128x128.zarr") datacube = datacube.rio.write_crs( "epsg:4326", grid_mapping_name="crs" -).reset_coords() +).reset_coords() del datacube.crs.attrs["spatial_ref"] datacube.attrs = metadata["global"] @@ -44,4 +52,4 @@ sorted({**datacube.attrs, **additional_attrs}.items()) ) -datacube.to_zarr("/net/scratch/dmontero/CCI/metadata/cci-cloud-8d-0.083deg-256x128x128.zarr") +datacube.to_zarr(f"{pathOut}/cci-cloud-8d-0.083deg-256x128x128.zarr") diff --git a/ESDC/inputs-preprocess/CCI/cloud/cci-cloud-metadata.py b/ESDC/inputs-preprocess/CCI/cloud/cci-cloud-metadata.py index 84437e0..63f73ef 100644 --- a/ESDC/inputs-preprocess/CCI/cloud/cci-cloud-metadata.py +++ b/ESDC/inputs-preprocess/CCI/cloud/cci-cloud-metadata.py @@ -1,10 +1,8 @@ -from tqdm import tqdm +import os from datetime import datetime -import yaml -import rioxarray + import xarray as xr -import numpy as np -import os +import yaml pathIn = "~/data/CCI/cloud/preprocess" pathIn = os.path.expanduser(pathIn) @@ -14,8 +12,7 @@ if not os.path.exists(pathOut): os.makedirs(pathOut) - -with open("cci-cloud-metadata.yaml", "r") as stream: +with open("inputs-preprocess/CCI/cloud/cci-cloud-metadata.yaml", "r") as stream: try: metadata = yaml.safe_load(stream) except yaml.YAMLError as exc: @@ -25,7 +22,7 @@ datacube = datacube.rio.write_crs( "epsg:4326", grid_mapping_name="crs" -).reset_coords() +).reset_coords() del datacube.crs.attrs["spatial_ref"] datacube.attrs = metadata["global"] diff --git a/ESDC/inputs-preprocess/CCI/sm/cci-sm-8d-0.083deg.py b/ESDC/inputs-preprocess/CCI/sm/cci-sm-8d-0.083deg.py index a3b89e2..816c714 100644 --- a/ESDC/inputs-preprocess/CCI/sm/cci-sm-8d-0.083deg.py +++ b/ESDC/inputs-preprocess/CCI/sm/cci-sm-8d-0.083deg.py @@ -1,23 +1,20 @@ -from xcube.core.store import find_data_store_extensions -from xcube.core.store import get_data_store_params_schema -from xcube.core.store import new_data_store +import os -import shapely.geometry -from IPython.display import JSON -import matplotlib.pyplot as plt import numpy as np -from tqdm import tqdm import xarray as xr -print("Reading") -dataset_8d = xr.open_zarr("/net/scratch/dmontero/CCI/cci-sm-8d-0.25deg-256x128x128.zarr") +pathOut = "~/data/CCI/sm/preprocess" +pathOut = os.path.expanduser(pathOut) +print("Reading") +dataset_8d = xr.open_zarr(f"{pathOut}/cci-sm-8d-0.25deg-256x128x128.zarr") new_lats = np.load("lat.npy") new_lons = np.load("lon.npy") print("Resampling in space") -dataset_8d = dataset_8d.interp(coords=dict(lat=new_lats,lon=new_lons),method="nearest") -dataset_8d = dataset_8d.chunk(dict(time=256,lat=128,lon=128)) +dataset_8d = dataset_8d.interp(coords=dict(lat=new_lats, lon=new_lons), + method="nearest") +dataset_8d = dataset_8d.chunk(dict(time=256, lat=128, lon=128)) print("Saving") -dataset_8d.to_zarr("/net/scratch/dmontero/CCI/cci-sm-8d-0.083deg-256x128x128.zarr") +dataset_8d.to_zarr(f"{pathOut}/cci-sm-8d-0.083deg-256x128x128.zarr") diff --git a/ESDC/inputs-preprocess/CCI/sm/cci-sm-8d-0.25deg.py b/ESDC/inputs-preprocess/CCI/sm/cci-sm-8d-0.25deg.py index c2c4208..7b2cc5b 100644 --- a/ESDC/inputs-preprocess/CCI/sm/cci-sm-8d-0.25deg.py +++ b/ESDC/inputs-preprocess/CCI/sm/cci-sm-8d-0.25deg.py @@ -1,7 +1,9 @@ +import glob +import os + import numpy as np -from tqdm import tqdm import xarray as xr -import glob +from tqdm import tqdm pathOut = "~/data/CCI/sm/preprocess" pathOut = os.path.expanduser(pathOut) @@ -17,25 +19,28 @@ files.sort() dataset = [xr.open_zarr(file) for file in tqdm(files)] -dataset = xr.concat(dataset,dim="time") +dataset = xr.concat(dataset, dim="time") dataset = dataset.chunk(dict(time=256)) last_year = 2020 first_year = 1979 -years = np.arange(first_year,last_year + 1) +years = np.arange(first_year, last_year + 1) + -def resample_weekly(ds,year): +def resample_weekly(ds, year): keep_attrs = ds.time.attrs - ds = ds.sel(time=slice(f"{year}-01-01",f"{year+1}-01-01")).resample(time="8D").mean() - ds['time'] = ds.time + np.timedelta64(4,"D") + ds = ds.sel(time=slice(f"{year}-01-01", f"{year + 1}-01-01")).resample( + time="8D").mean() + ds['time'] = ds.time + np.timedelta64(4, "D") ds.time.attrs = keep_attrs return ds + print("Resampling in time") -dataset_8d = [resample_weekly(dataset,year) for year in tqdm(years)] -dataset_8d = xr.concat(dataset_8d,dim="time") +dataset_8d = [resample_weekly(dataset, year) for year in tqdm(years)] +dataset_8d = xr.concat(dataset_8d, dim="time") dataset_8d = dataset_8d.chunk(dict(time=256)) print("Saving") -dataset_8d.to_zarr(f"{pathOut}/cci-sm-8d-0.25deg-256x128x128.zarr") \ No newline at end of file +dataset_8d.to_zarr(f"{pathOut}/cci-sm-8d-0.25deg-256x128x128.zarr") diff --git a/ESDC/inputs-preprocess/CCI/sm/cci-sm-metadata-0.0833deg.py b/ESDC/inputs-preprocess/CCI/sm/cci-sm-metadata-0.0833deg.py index 297594f..0b09ddc 100644 --- a/ESDC/inputs-preprocess/CCI/sm/cci-sm-metadata-0.0833deg.py +++ b/ESDC/inputs-preprocess/CCI/sm/cci-sm-metadata-0.0833deg.py @@ -1,22 +1,24 @@ -from tqdm import tqdm +import os from datetime import datetime -import yaml -import rioxarray + import xarray as xr -import numpy as np +import yaml +pathOut = "~/data/CCI/sm/output" +pathOut = os.path.expanduser(pathOut) -with open("cci-sm-metadata-0.0833deg.yaml", "r") as stream: +with open("inputs-preprocess/CCI/sm/cci-sm-metadata-0.0833deg.yaml", + "r") as stream: try: metadata = yaml.safe_load(stream) except yaml.YAMLError as exc: print(exc) -datacube = xr.open_zarr("/net/scratch/dmontero/CCI/cci-sm-8d-0.083deg-256x128x128.zarr") +datacube = xr.open_zarr(f"{pathOut}/cci-sm-8d-0.083deg-256x128x128.zarr") datacube = datacube.rio.write_crs( "epsg:4326", grid_mapping_name="crs" -).reset_coords() +).reset_coords() del datacube.crs.attrs["spatial_ref"] datacube.attrs = metadata["global"] @@ -44,4 +46,4 @@ sorted({**datacube.attrs, **additional_attrs}.items()) ) -datacube.to_zarr("/net/scratch/dmontero/CCI/metadata/cci-sm-8d-0.083deg-256x128x128.zarr") +datacube.to_zarr(f"{pathOut}/cci-sm-8d-0.083deg-256x128x128.zarr") diff --git a/ESDC/inputs-preprocess/CCI/sm/cci-sm-metadata.py b/ESDC/inputs-preprocess/CCI/sm/cci-sm-metadata.py index 752f111..c5ef7ef 100644 --- a/ESDC/inputs-preprocess/CCI/sm/cci-sm-metadata.py +++ b/ESDC/inputs-preprocess/CCI/sm/cci-sm-metadata.py @@ -1,10 +1,8 @@ -from tqdm import tqdm +import os from datetime import datetime -import yaml -import rioxarray -import xarray as xr -import numpy as np +import xarray as xr +import yaml pathOut = "~/data/CCI/sm/output" pathOut = os.path.expanduser(pathOut) @@ -14,8 +12,7 @@ pathIn = "~/data/CCI/sm/preprocess" pathIn = os.path.expanduser(pathIn) - -with open("cci-sm-metadata.yaml", "r") as stream: +with open("inputs-preprocess/CCI/sm/cci-sm-metadata.yaml", "r") as stream: try: metadata = yaml.safe_load(stream) except yaml.YAMLError as exc: @@ -25,7 +22,7 @@ datacube = datacube.rio.write_crs( "epsg:4326", grid_mapping_name="crs" -).reset_coords() +).reset_coords() del datacube.crs.attrs["spatial_ref"] datacube.attrs = metadata["global"] diff --git a/ESDC/inputs-preprocess/FLUXCOM/fluxcom-data-cube.py b/ESDC/inputs-preprocess/FLUXCOM/fluxcom-data-cube.py index bb0ebbf..1999323 100644 --- a/ESDC/inputs-preprocess/FLUXCOM/fluxcom-data-cube.py +++ b/ESDC/inputs-preprocess/FLUXCOM/fluxcom-data-cube.py @@ -1,7 +1,7 @@ import xarray as xr import numpy as np import glob - +import os from tqdm import tqdm pathOut = "~/data/FLUXCOM/preprocess" @@ -49,4 +49,4 @@ def merge_datasets(i): fluxcom = fluxcom.chunk(dict(time=256,lat=256,lon=256)) print("Saving") -fluxcom.to_zarr(f"{pathOut}/fluxcom-8d-0.083deg-256x256x256.zarr") \ No newline at end of file +fluxcom.to_zarr(f"{pathOut}/fluxcom-8d-0.083deg-256x256x256.zarr") diff --git a/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-JJ-data-cube-0.25deg.py b/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-JJ-data-cube-0.25deg.py index e571e99..bc9f8db 100644 --- a/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-JJ-data-cube-0.25deg.py +++ b/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-JJ-data-cube-0.25deg.py @@ -1,9 +1,8 @@ -import xarray as xr -import xesmf as xe -import numpy as np -from tqdm import tqdm +import os from datetime import datetime +import xarray as xr + pathOut = "~/data/SIF/GOME2-SIF/output" pathOut = os.path.expanduser(pathOut) @@ -11,16 +10,18 @@ dataset = xr.open_zarr(f"{pathOut}/sif-gome2-JJ-8d-0.05deg-256x256x256.zarr") print("Interpolating") -dataset = dataset.coarsen(lat=5,lon=5).mean() -dataset = dataset.chunk(dict(time=-1,lat=64,lon=64)) -dataset = dataset.interpolate_na(dim="time",fill_value="extrapolate") -dataset = dataset.chunk(dict(time=256,lat=128,lon=128)) +dataset = dataset.coarsen(lat=5, lon=5).mean() +dataset = dataset.chunk(dict(time=-1, lat=64, lon=64)) +dataset = dataset.interpolate_na(dim="time", fill_value="extrapolate") +dataset = dataset.chunk(dict(time=256, lat=128, lon=128)) print("Adding attributes") dataset.attrs['date_modified'] = str(datetime.now()) dataset.attrs['time_coverage_end'] = str(dataset.time[-1].values) dataset.attrs['time_coverage_start'] = str(dataset.time[0].values) -dataset.attrs['processing_steps'] = dataset.attrs['processing_steps'] + ['Downsampling to 0.25 deg with mean','Interpolating NA with linear interpolation'] +dataset.attrs['processing_steps'] = dataset.attrs['processing_steps'] + [ + 'Downsampling to 0.25 deg with mean', + 'Interpolating NA with linear interpolation'] dataset.attrs['geospatial_lat_max'] = float(dataset.lat.max().values) dataset.attrs['geospatial_lat_min'] = float(dataset.lat.min().values) @@ -31,4 +32,4 @@ dataset.attrs['geospatial_lon_resolution'] = 0.25 print("Saving") -dataset.to_zarr(f"{pathOut}/sif-gome2-JJ-8d-0.25deg-256x128x128.zarr") \ No newline at end of file +dataset.to_zarr(f"{pathOut}/sif-gome2-JJ-8d-0.25deg-256x128x128.zarr") diff --git a/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-JJ-data-cube.py b/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-JJ-data-cube.py index 5faf440..e7d2f52 100644 --- a/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-JJ-data-cube.py +++ b/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-JJ-data-cube.py @@ -1,14 +1,7 @@ -import xarray as xr -import numpy as np import glob import os -import datetime -from tqdm import tqdm - -# pathOut = "/net/projects/deep_esdl/data/GOME2-SIF/cubes/" -# if not os.path.exists(pathOut): -# os.mkdir(pathOut) +import xarray as xr pathIn = "~/data/SIF/GOME2-SIF/source" pathIn = os.path.expanduser(pathIn) @@ -18,13 +11,14 @@ if not os.path.exists(pathOut): os.makedirs(pathOut) - + files = glob.glob(f"{pathIn}/*JJ*.nc") files.sort() -datasets = [xr.open_dataset(file,chunks = {'lat':256, 'lon':256, 'time':256}) for file in files] -datasets = xr.concat(datasets,dim = "time").drop("crs") -datasets = datasets.transpose("time","lat","lon") -datasets = datasets.chunk({'lat':256, 'lon':256, 'time':256}) +datasets = [xr.open_dataset(file, chunks={'lat': 256, 'lon': 256, 'time': 256}) + for file in files] +datasets = xr.concat(datasets, dim="time").drop("crs") +datasets = datasets.transpose("time", "lat", "lon") +datasets = datasets.chunk({'lat': 256, 'lon': 256, 'time': 256}) datasets.to_zarr(f"{pathOut}/sif-gome2-JJ-256x256x256.zarr") diff --git a/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-JJ-metadata.py b/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-JJ-metadata.py index fc02ebe..86920c7 100644 --- a/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-JJ-metadata.py +++ b/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-JJ-metadata.py @@ -1,10 +1,8 @@ -from tqdm import tqdm +import os from datetime import datetime -import yaml -import rioxarray -import xarray as xr -import numpy as np +import xarray as xr +import yaml pathIn = "~/data/SIF/GOME2-SIF/preprocess" pathIn = os.path.expanduser(pathIn) @@ -15,7 +13,8 @@ if not os.path.exists(pathOut): os.makedirs(pathOut) -with open("sif-gome2-JJ-metadata.yaml", "r") as stream: +with open("inputs-preprocess/SIF/GOME2/sif-gome2-JJ-metadata.yaml", + "r") as stream: try: metadata = yaml.safe_load(stream) except yaml.YAMLError as exc: @@ -25,7 +24,7 @@ datacube = datacube.rio.write_crs( "epsg:4326", grid_mapping_name="crs" -).reset_coords() +).reset_coords() del datacube.crs.attrs["spatial_ref"] datacube.attrs = metadata["global"] @@ -53,4 +52,5 @@ sorted({**datacube.attrs, **additional_attrs}.items()) ) -datacube.to_zarr(f"{pathOutput}/sif-gome2-JJ-8d-0.05deg-256x256x256.zarr",mode = "w") \ No newline at end of file +datacube.to_zarr(f"{pathOut}/sif-gome2-JJ-8d-0.05deg-256x256x256.zarr", + mode="w") diff --git a/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-PK-data-cube-0.25deg.py b/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-PK-data-cube-0.25deg.py index d862901..31b06a9 100644 --- a/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-PK-data-cube-0.25deg.py +++ b/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-PK-data-cube-0.25deg.py @@ -1,9 +1,8 @@ -import xarray as xr -import xesmf as xe -import numpy as np -from tqdm import tqdm +import os from datetime import datetime +import xarray as xr + pathOut = "~/data/SIF/GOME2-SIF/output" pathOut = os.path.expanduser(pathOut) @@ -11,16 +10,18 @@ dataset = xr.open_zarr(f"{pathOut}/sif-gome2-PK-8d-0.05deg-256x256x256.zarr") print("Interpolating") -dataset = dataset.coarsen(lat=5,lon=5).mean() -dataset = dataset.chunk(dict(time=-1,lat=64,lon=64)) -dataset = dataset.interpolate_na(dim="time",fill_value="extrapolate") -dataset = dataset.chunk(dict(time=256,lat=128,lon=128)) +dataset = dataset.coarsen(lat=5, lon=5).mean() +dataset = dataset.chunk(dict(time=-1, lat=64, lon=64)) +dataset = dataset.interpolate_na(dim="time", fill_value="extrapolate") +dataset = dataset.chunk(dict(time=256, lat=128, lon=128)) print("Adding attributes") dataset.attrs['date_modified'] = str(datetime.now()) dataset.attrs['time_coverage_end'] = str(dataset.time[-1].values) dataset.attrs['time_coverage_start'] = str(dataset.time[0].values) -dataset.attrs['processing_steps'] = dataset.attrs['processing_steps'] + ['Downsampling to 0.25 deg with mean','Interpolating NA with linear interpolation'] +dataset.attrs['processing_steps'] = dataset.attrs['processing_steps'] + [ + 'Downsampling to 0.25 deg with mean', + 'Interpolating NA with linear interpolation'] dataset.attrs['geospatial_lat_max'] = float(dataset.lat.max().values) dataset.attrs['geospatial_lat_min'] = float(dataset.lat.min().values) @@ -31,4 +32,4 @@ dataset.attrs['geospatial_lon_resolution'] = 0.25 print("Saving") -dataset.to_zarr(f"{pathOut}/sif-gome2-PK-8d-0.25deg-256x128x128.zarr") \ No newline at end of file +dataset.to_zarr(f"{pathOut}/sif-gome2-PK-8d-0.25deg-256x128x128.zarr") diff --git a/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-PK-data-cube.py b/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-PK-data-cube.py index 619340f..862c4c4 100644 --- a/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-PK-data-cube.py +++ b/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-PK-data-cube.py @@ -1,14 +1,7 @@ -import xarray as xr -import numpy as np import glob import os -import datetime -from tqdm import tqdm - -# pathOut = "/net/projects/deep_esdl/data/GOME2-SIF/cubes/" -# if not os.path.exists(pathOut): -# os.mkdir(pathOut) +import xarray as xr pathIn = "~/data/SIF/GOME2-SIF/source" pathIn = os.path.expanduser(pathIn) @@ -18,13 +11,14 @@ if not os.path.exists(pathOut): os.makedirs(pathOut) - + files = glob.glob(f"{pathIn}/*PK*.nc") files.sort() -datasets = [xr.open_dataset(file,chunks = {'lat':256, 'lon':256, 'time':256}) for file in files] -datasets = xr.concat(datasets,dim = "time").drop("crs") -datasets = datasets.transpose("time","lat","lon") -datasets = datasets.chunk({'lat':256, 'lon':256, 'time':256}) +datasets = [xr.open_dataset(file, chunks={'lat': 256, 'lon': 256, 'time': 256}) + for file in files] +datasets = xr.concat(datasets, dim="time").drop("crs") +datasets = datasets.transpose("time", "lat", "lon") +datasets = datasets.chunk({'lat': 256, 'lon': 256, 'time': 256}) datasets.to_zarr(f"{pathOut}/sif-gome2-PK-256x256x256.zarr") diff --git a/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-PK-metadata.py b/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-PK-metadata.py index 01f56b9..9135f28 100644 --- a/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-PK-metadata.py +++ b/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-PK-metadata.py @@ -1,9 +1,8 @@ -from tqdm import tqdm +import os from datetime import datetime -import yaml -import rioxarray + import xarray as xr -import numpy as np +import yaml pathIn = "~/data/SIF/GOME2-SIF/preprocess" pathIn = os.path.expanduser(pathIn) @@ -14,7 +13,8 @@ if not os.path.exists(pathOut): os.makedirs(pathOut) -with open("sif-gome2-PK-metadata.yaml", "r") as stream: +with open("inputs-preprocess/SIF/GOME2/sif-gome2-PK-metadata.yaml", + "r") as stream: try: metadata = yaml.safe_load(stream) except yaml.YAMLError as exc: @@ -24,7 +24,7 @@ datacube = datacube.rio.write_crs( "epsg:4326", grid_mapping_name="crs" -).reset_coords() +).reset_coords() del datacube.crs.attrs["spatial_ref"] datacube.attrs = metadata["global"] @@ -52,4 +52,4 @@ sorted({**datacube.attrs, **additional_attrs}.items()) ) -datacube.to_zarr(f"{pathOut}/sif-gome2-PK-8d-0.05deg-256x256x256.zarr") \ No newline at end of file +datacube.to_zarr(f"{pathOut}/sif-gome2-PK-8d-0.05deg-256x256x256.zarr") diff --git a/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-0.25deg.py b/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-0.25deg.py index 156c630..ca1fae5 100644 --- a/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-0.25deg.py +++ b/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-0.25deg.py @@ -1,9 +1,9 @@ -import xarray as xr -import xesmf as xe -import numpy as np -from tqdm import tqdm +import os from datetime import datetime +import numpy as np +import xarray as xr + pathOut = "~/data/SIF/GOSIF/output" pathOut = os.path.expanduser(pathOut) @@ -12,21 +12,22 @@ print("Fixing time") keep_attrs = dataset.time.attrs -dataset["time"] = dataset.time + np.timedelta64(4,"D") +dataset["time"] = dataset.time + np.timedelta64(4, "D") dataset.time.attrs = keep_attrs print("Coarsing") -dataset = dataset.coarsen(lat=5,lon=5).mean() +dataset = dataset.coarsen(lat=5, lon=5).mean() print("Chunking") -dataset = dataset.chunk(dict(time=256,lat=128,lon=128)) +dataset = dataset.chunk(dict(time=256, lat=128, lon=128)) print("Adding attrs") dataset.attrs['date_modified'] = str(datetime.now()) dataset.attrs['time_coverage_end'] = str(dataset.time[-1].values) dataset.attrs['time_coverage_start'] = str(dataset.time[0].values) dataset.attrs['reported_day'] = 5.0 -dataset.attrs['processing_steps'] = dataset.attrs['processing_steps'] + ['Downsampling to 0.25 deg with mean'] +dataset.attrs['processing_steps'] = dataset.attrs['processing_steps'] + [ + 'Downsampling to 0.25 deg with mean'] dataset.attrs['geospatial_lat_max'] = float(dataset.lat.max().values) dataset.attrs['geospatial_lat_min'] = float(dataset.lat.min().values) @@ -40,4 +41,4 @@ del dataset['sif'].encoding['chunks'] print("Saving") -dataset.to_zarr(f"{pathOut}/sif-gosif-8d-0.25deg-256x128x128.zarr") \ No newline at end of file +dataset.to_zarr(f"{pathOut}/sif-gosif-8d-0.25deg-256x128x128.zarr") diff --git a/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-part1.py b/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-part1.py index a85e13c..f602c84 100644 --- a/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-part1.py +++ b/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-part1.py @@ -1,14 +1,10 @@ -import xarray as xr -import numpy as np +import datetime import glob import os -import datetime -from tqdm import tqdm - -# pathOut = "/net/projects/deep_esdl/data/GOSIF/cubes/" -# if not os.path.exists(pathOut): -# os.mkdir(pathOut) +import numpy as np +import xarray as xr +from tqdm import tqdm pathIn = "~/data/SIF/GOSIF/source" pathIn = os.path.expanduser(pathIn) @@ -16,16 +12,22 @@ files = glob.glob(f"{pathIn}/*.tif") files.sort() + def to_xarray(file): filename = file.split("/")[-1] - date = np.datetime64(datetime.datetime.strptime(filename[6:13],'%Y%j')) - ds = xr.open_dataset(file,engine = "rasterio",chunks = {"x":1024,"y":1024}).where(lambda x: x < 32766,other = np.nan) - ds = ds.reset_coords().band_data.sel(band = 1).drop("band").rename({"y": "lat", "x":"lon"}) + date_info = filename.split("_", 1)[1].split(".")[0] + date = np.datetime64(datetime.datetime.strptime(date_info, '%Y%j')) + ds = xr.open_dataset(file, engine="rasterio", + chunks={"x": 1024, "y": 1024}).where( + lambda x: x < 32766, other=np.nan) + ds = ds.reset_coords().band_data.sel(band=1).drop("band").rename( + {"y": "lat", "x": "lon"}) ds.name = "sif" ds = ds.to_dataset() ds = ds.assign_coords({"time": date}).expand_dims("time") ds = ds * 0.0001 - ds = ds.transpose("time","lat","lon") - ds.to_zarr(f"{pathIn}/{filename.replace('.tif','.zarr')}") + ds = ds.transpose("time", "lat", "lon") + ds.to_zarr(f"{pathIn}/{filename.replace('.tif', '.zarr')}") + -[to_xarray(file) for file in tqdm(files)] \ No newline at end of file +[to_xarray(file) for file in tqdm(files)] diff --git a/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-part2.py b/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-part2.py index ae7f746..a3aac9c 100644 --- a/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-part2.py +++ b/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-part2.py @@ -1,14 +1,8 @@ -import xarray as xr -import numpy as np import glob import os -import datetime -from tqdm import tqdm -# pathOut = "/net/projects/deep_esdl/data/GOSIF/cubes/" - -# if not os.path.exists(pathOut): -# os.mkdir(pathOut) +import xarray as xr +from tqdm import tqdm pathIn = "~/data/SIF/GOSIF/source" pathIn = os.path.expanduser(pathIn) @@ -18,7 +12,7 @@ if not os.path.exists(pathOut): os.makedirs(pathOut) - + files = glob.glob(f"{pathIn}/*.zarr") files.sort() @@ -30,7 +24,7 @@ ds = xr.open_zarr(file) ds["lon"] = ref.lon datasets.append(ds) - -datasets = xr.concat(datasets,dim = "time") -datasets.to_zarr(f"{pathOut}/sif-gosif-1x1024x1024.zarr") \ No newline at end of file +datasets = xr.concat(datasets, dim="time") + +datacube = xr.open_zarr(f"{pathIn}/sif-gosif-1x1024x1024.zarr") diff --git a/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-metadata.py b/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-metadata.py index 9633bd8..de6f0e6 100644 --- a/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-metadata.py +++ b/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-metadata.py @@ -1,9 +1,8 @@ -from tqdm import tqdm +import os from datetime import datetime -import yaml -import rioxarray + import xarray as xr -import numpy as np +import yaml pathIn = "~/data/SIF/GOSIF/preprocess" pathIn = os.path.expanduser(pathIn) @@ -14,7 +13,7 @@ if not os.path.exists(pathOut): os.makedirs(pathOut) -with open("sif-gosif-metadata.yaml", "r") as stream: +with open("inputs-preprocess/SIF/GOSIF/sif-gosif-metadata.yaml", "r") as stream: try: metadata = yaml.safe_load(stream) except yaml.YAMLError as exc: @@ -24,7 +23,7 @@ datacube = datacube.rio.write_crs( "epsg:4326", grid_mapping_name="crs" -).reset_coords() +).reset_coords() del datacube.crs.attrs["spatial_ref"] datacube.attrs = metadata["global"] @@ -52,4 +51,4 @@ sorted({**datacube.attrs, **additional_attrs}.items()) ) -datacube.to_zarr(f"{pathOut}/sif-gosif-8d-0.05deg-1x1024x1024.zarr") \ No newline at end of file +datacube.to_zarr(f"{pathOut}/sif-gosif-8d-0.05deg-1x1024x1024.zarr") diff --git a/ESDC/inputs-preprocess/SIF/RTSIF/sif-rtsif-data-cube-0.25deg.py b/ESDC/inputs-preprocess/SIF/RTSIF/sif-rtsif-data-cube-0.25deg.py index 4640ed6..3b7579a 100644 --- a/ESDC/inputs-preprocess/SIF/RTSIF/sif-rtsif-data-cube-0.25deg.py +++ b/ESDC/inputs-preprocess/SIF/RTSIF/sif-rtsif-data-cube-0.25deg.py @@ -3,6 +3,7 @@ import numpy as np from tqdm import tqdm from datetime import datetime +import os pathIn = "~/data/SIF/RTSIF/preprocess" pathIn = os.path.expanduser(pathIn) diff --git a/ESDC/inputs-preprocess/SIF/RTSIF/sif-rtsif-metadata.py b/ESDC/inputs-preprocess/SIF/RTSIF/sif-rtsif-metadata.py index a53a894..320dfa6 100644 --- a/ESDC/inputs-preprocess/SIF/RTSIF/sif-rtsif-metadata.py +++ b/ESDC/inputs-preprocess/SIF/RTSIF/sif-rtsif-metadata.py @@ -4,7 +4,7 @@ import rioxarray import xarray as xr import numpy as np - +import os pathIn = "path-to-RTSIF-cube" pathOut = "~/data/SIF/RTSIF/preprocess" @@ -13,7 +13,7 @@ if not os.path.exists(pathOut): os.makedirs(pathOut) -with open("sif-rtsif-metadata.yaml", "r") as stream: +with open("inputs-preprocess/SIF/RTSIF/sif-rtsif-metadata.yaml", "r") as stream: try: metadata = yaml.safe_load(stream) except yaml.YAMLError as exc: