From 69fb68567ecb2b08d8251d0a2501befa1fb80e91 Mon Sep 17 00:00:00 2001 From: AliceBalfanz Date: Wed, 5 Apr 2023 09:58:37 +0000 Subject: [PATCH 01/17] mainly including imports --- ESDC/inputs-collect/download-GOME2-SIF.py | 3 ++- ESDC/inputs-collect/download-GOSIF.py | 3 ++- ESDC/inputs-collect/extract-gz-gosif.py | 5 ++--- ESDC/inputs-preprocess/FLUXCOM/fluxcom-data-cube.py | 4 ++-- .../SIF/GOSIF/sif-gosif-data-cube-0.25deg.py | 5 +++-- .../inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-part1.py | 5 +++-- 6 files changed, 14 insertions(+), 11 deletions(-) diff --git a/ESDC/inputs-collect/download-GOME2-SIF.py b/ESDC/inputs-collect/download-GOME2-SIF.py index 65328c5..cf88e76 100644 --- a/ESDC/inputs-collect/download-GOME2-SIF.py +++ b/ESDC/inputs-collect/download-GOME2-SIF.py @@ -1,5 +1,6 @@ import requests import numpy as np +import os URL = "http://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/ECOCLIM/Downscaled-GOME2-SIF/v2.0/" # pathOut = "/net/projects/deep_esdl/data/GOME2-SIF/data/" @@ -18,4 +19,4 @@ file_to_download = f"GOME_{rm}_dcSIF_005deg_8day_{year}.nc" print(f"Downloading {file_to_download}") response = requests.get(URL + file_to_download) - open(pathOut + file_to_download, "wb").write(response.content) \ No newline at end of file + open(pathOut + file_to_download, "wb").write(response.content) diff --git a/ESDC/inputs-collect/download-GOSIF.py b/ESDC/inputs-collect/download-GOSIF.py index 0df6434..3cafb8f 100644 --- a/ESDC/inputs-collect/download-GOSIF.py +++ b/ESDC/inputs-collect/download-GOSIF.py @@ -2,6 +2,7 @@ import requests from urllib.request import Request, urlopen import re +import os from os.path import exists URL = "http://data.globalecology.unh.edu/data/GOSIF_v2/8day/" @@ -29,4 +30,4 @@ response = requests.get(URL + file_to_download) open(pathOut + file_to_download, "wb").write(response.content) else: - pass \ No newline at end of file + pass diff --git a/ESDC/inputs-collect/extract-gz-gosif.py b/ESDC/inputs-collect/extract-gz-gosif.py index afe94a9..85871ff 100644 --- a/ESDC/inputs-collect/extract-gz-gosif.py +++ b/ESDC/inputs-collect/extract-gz-gosif.py @@ -1,16 +1,15 @@ import gzip import shutil import glob +import os from tqdm import tqdm pathOut = "~/data/SIF/GOSIF/source" pathOut = os.path.expanduser(pathOut) - files = glob.glob(f"{pathOut}/*") files.sort() - for file in tqdm(files): with gzip.open(file, 'rb') as f_in: with open(file.replace(".gz",""), 'wb') as f_out: - shutil.copyfileobj(f_in, f_out) \ No newline at end of file + shutil.copyfileobj(f_in, f_out) diff --git a/ESDC/inputs-preprocess/FLUXCOM/fluxcom-data-cube.py b/ESDC/inputs-preprocess/FLUXCOM/fluxcom-data-cube.py index bb0ebbf..1999323 100644 --- a/ESDC/inputs-preprocess/FLUXCOM/fluxcom-data-cube.py +++ b/ESDC/inputs-preprocess/FLUXCOM/fluxcom-data-cube.py @@ -1,7 +1,7 @@ import xarray as xr import numpy as np import glob - +import os from tqdm import tqdm pathOut = "~/data/FLUXCOM/preprocess" @@ -49,4 +49,4 @@ def merge_datasets(i): fluxcom = fluxcom.chunk(dict(time=256,lat=256,lon=256)) print("Saving") -fluxcom.to_zarr(f"{pathOut}/fluxcom-8d-0.083deg-256x256x256.zarr") \ No newline at end of file +fluxcom.to_zarr(f"{pathOut}/fluxcom-8d-0.083deg-256x256x256.zarr") diff --git a/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-0.25deg.py b/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-0.25deg.py index 156c630..67d10ae 100644 --- a/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-0.25deg.py +++ b/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-0.25deg.py @@ -1,10 +1,11 @@ import xarray as xr import xesmf as xe import numpy as np +import os from tqdm import tqdm from datetime import datetime -pathOut = "~/data/SIF/GOSIF/output" +pathOut = "~/data/SIF/GOSIF/preprocess" pathOut = os.path.expanduser(pathOut) print("Reading") @@ -40,4 +41,4 @@ del dataset['sif'].encoding['chunks'] print("Saving") -dataset.to_zarr(f"{pathOut}/sif-gosif-8d-0.25deg-256x128x128.zarr") \ No newline at end of file +dataset.to_zarr(f"{pathOut}/sif-gosif-8d-0.25deg-256x128x128.zarr") diff --git a/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-part1.py b/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-part1.py index a85e13c..a57cb1b 100644 --- a/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-part1.py +++ b/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-part1.py @@ -18,7 +18,8 @@ def to_xarray(file): filename = file.split("/")[-1] - date = np.datetime64(datetime.datetime.strptime(filename[6:13],'%Y%j')) + date_info = filename.split("_",1)[1].split(".")[0] + date = np.datetime64(datetime.datetime.strptime(date_info,'%Y%j')) ds = xr.open_dataset(file,engine = "rasterio",chunks = {"x":1024,"y":1024}).where(lambda x: x < 32766,other = np.nan) ds = ds.reset_coords().band_data.sel(band = 1).drop("band").rename({"y": "lat", "x":"lon"}) ds.name = "sif" @@ -28,4 +29,4 @@ def to_xarray(file): ds = ds.transpose("time","lat","lon") ds.to_zarr(f"{pathIn}/{filename.replace('.tif','.zarr')}") -[to_xarray(file) for file in tqdm(files)] \ No newline at end of file +[to_xarray(file) for file in tqdm(files)] From 840bdda93fde6c390d6a72a77f282ba346b1b447 Mon Sep 17 00:00:00 2001 From: AliceBalfanz Date: Wed, 5 Apr 2023 12:03:58 +0200 Subject: [PATCH 02/17] fixing path --- ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-0.25deg.py | 2 +- ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-part2.py | 2 +- ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-metadata.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-0.25deg.py b/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-0.25deg.py index 67d10ae..aa838b1 100644 --- a/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-0.25deg.py +++ b/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-0.25deg.py @@ -9,7 +9,7 @@ pathOut = os.path.expanduser(pathOut) print("Reading") -dataset = xr.open_zarr(f"{pathOut}/sif-gosif-8d-0.05deg-1x1024x1024.zarr") +dataset = xr.open_zarr(f"{pathOut}/sif-gosif-8d-0.25deg-1x1024x1024.zarr") print("Fixing time") keep_attrs = dataset.time.attrs diff --git a/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-part2.py b/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-part2.py index ae7f746..83dbce7 100644 --- a/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-part2.py +++ b/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-part2.py @@ -33,4 +33,4 @@ datasets = xr.concat(datasets,dim = "time") -datasets.to_zarr(f"{pathOut}/sif-gosif-1x1024x1024.zarr") \ No newline at end of file +datasets.to_zarr(f"{pathOut}/sif-gosif-8d-0.05deg-1x1024x1024.zarr") \ No newline at end of file diff --git a/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-metadata.py b/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-metadata.py index 9633bd8..e1b92cd 100644 --- a/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-metadata.py +++ b/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-metadata.py @@ -20,7 +20,7 @@ except yaml.YAMLError as exc: print(exc) -datacube = xr.open_zarr(f"{pathIn}/sif-gosif-1x1024x1024.zarr") +datacube = xr.open_zarr(f"{pathIn}/sif-gosif-8d-0.05deg-1x1024x1024.zarr") datacube = datacube.rio.write_crs( "epsg:4326", grid_mapping_name="crs" From fc245443e2eb72a7fbbc845967e4ab81691403c7 Mon Sep 17 00:00:00 2001 From: AliceBalfanz Date: Wed, 5 Apr 2023 12:16:10 +0200 Subject: [PATCH 03/17] reverting and adjusting readme as there was the error in the order of steps. --- ESDC/README.md | 2 +- ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-0.25deg.py | 2 +- ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-part2.py | 2 +- ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-metadata.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ESDC/README.md b/ESDC/README.md index 0084c74..934d81e 100644 --- a/ESDC/README.md +++ b/ESDC/README.md @@ -188,8 +188,8 @@ inputs-preprocess/FLUXCOM/fluxcom-data-cube-8d-0.25deg.py # Resample to 0.25 deg # GOSIF: Preprocessing inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-part1.py # Convert .tif to .zarr inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-part2.py # Concatenate .zarr files -inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-0.25deg.py # Resample to 0.25 degrees inputs-preprocess/SIF/GOSIF/sif-gosif-metadata.py # Add initial metadata +inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-0.25deg.py # Resample to 0.25 degrees # GOME-2 JJ Method: Preprocessing inputs-preprocess/SIF/GOME2/sif-gome2-JJ-data-cube.py # Concatenate .nc files diff --git a/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-0.25deg.py b/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-0.25deg.py index aa838b1..67d10ae 100644 --- a/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-0.25deg.py +++ b/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-0.25deg.py @@ -9,7 +9,7 @@ pathOut = os.path.expanduser(pathOut) print("Reading") -dataset = xr.open_zarr(f"{pathOut}/sif-gosif-8d-0.25deg-1x1024x1024.zarr") +dataset = xr.open_zarr(f"{pathOut}/sif-gosif-8d-0.05deg-1x1024x1024.zarr") print("Fixing time") keep_attrs = dataset.time.attrs diff --git a/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-part2.py b/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-part2.py index 83dbce7..c673671 100644 --- a/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-part2.py +++ b/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-part2.py @@ -33,4 +33,4 @@ datasets = xr.concat(datasets,dim = "time") -datasets.to_zarr(f"{pathOut}/sif-gosif-8d-0.05deg-1x1024x1024.zarr") \ No newline at end of file +datacube = xr.open_zarr(f"{pathIn}/sif-gosif-1x1024x1024.zarr") \ No newline at end of file diff --git a/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-metadata.py b/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-metadata.py index e1b92cd..9633bd8 100644 --- a/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-metadata.py +++ b/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-metadata.py @@ -20,7 +20,7 @@ except yaml.YAMLError as exc: print(exc) -datacube = xr.open_zarr(f"{pathIn}/sif-gosif-8d-0.05deg-1x1024x1024.zarr") +datacube = xr.open_zarr(f"{pathIn}/sif-gosif-1x1024x1024.zarr") datacube = datacube.rio.write_crs( "epsg:4326", grid_mapping_name="crs" From 17d5b394f3375bbb9779338e99aa9cebbb740b11 Mon Sep 17 00:00:00 2001 From: AliceBalfanz Date: Wed, 5 Apr 2023 12:18:39 +0200 Subject: [PATCH 04/17] fixing path --- ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-metadata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-metadata.py b/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-metadata.py index 9633bd8..d772e9f 100644 --- a/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-metadata.py +++ b/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-metadata.py @@ -14,7 +14,7 @@ if not os.path.exists(pathOut): os.makedirs(pathOut) -with open("sif-gosif-metadata.yaml", "r") as stream: +with open("inputs-preprocess/SIF/GOSIF/sif-gosif-metadata.yaml", "r") as stream: try: metadata = yaml.safe_load(stream) except yaml.YAMLError as exc: From 0aea102328fdce2d7b3430c7d925cc12e85b11d3 Mon Sep 17 00:00:00 2001 From: AliceBalfanz Date: Wed, 5 Apr 2023 10:20:49 +0000 Subject: [PATCH 05/17] adjusting import --- ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-metadata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-metadata.py b/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-metadata.py index 9633bd8..3989e6c 100644 --- a/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-metadata.py +++ b/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-metadata.py @@ -52,4 +52,4 @@ sorted({**datacube.attrs, **additional_attrs}.items()) ) -datacube.to_zarr(f"{pathOut}/sif-gosif-8d-0.05deg-1x1024x1024.zarr") \ No newline at end of file +datacube.to_zarr(f"{pathOut}/sif-gosif-8d-0.05deg-1x1024x1024.zarr") From 42ef12574a389f6e24ee00453d0127379e807bc5 Mon Sep 17 00:00:00 2001 From: AliceBalfanz Date: Wed, 5 Apr 2023 12:21:53 +0200 Subject: [PATCH 06/17] adding import --- ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-metadata.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-metadata.py b/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-metadata.py index 87643a8..df73bc2 100644 --- a/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-metadata.py +++ b/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-metadata.py @@ -4,6 +4,7 @@ import rioxarray import xarray as xr import numpy as np +import os pathIn = "~/data/SIF/GOSIF/preprocess" pathIn = os.path.expanduser(pathIn) From 6d24f8cfcd2896e36fb6c19f12c4e5f1d27ce213 Mon Sep 17 00:00:00 2001 From: AliceBalfanz Date: Wed, 5 Apr 2023 12:29:47 +0200 Subject: [PATCH 07/17] reverting path adjustment --- ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-0.25deg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-0.25deg.py b/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-0.25deg.py index 67d10ae..ee1bfba 100644 --- a/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-0.25deg.py +++ b/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-0.25deg.py @@ -5,7 +5,7 @@ from tqdm import tqdm from datetime import datetime -pathOut = "~/data/SIF/GOSIF/preprocess" +pathOut = "~/data/SIF/GOSIF/output" pathOut = os.path.expanduser(pathOut) print("Reading") From 48a49ddda08ab78f4114b2bfdf3aeeaf364bf4a5 Mon Sep 17 00:00:00 2001 From: AliceBalfanz Date: Wed, 5 Apr 2023 12:38:32 +0200 Subject: [PATCH 08/17] fixing path concatenation and import --- ESDC/inputs-collect/download-GOME2-SIF.py | 3 ++- ESDC/inputs-collect/download-GOSIF.py | 5 +++-- ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-JJ-metadata.py | 1 + 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/ESDC/inputs-collect/download-GOME2-SIF.py b/ESDC/inputs-collect/download-GOME2-SIF.py index cf88e76..6ff0d1c 100644 --- a/ESDC/inputs-collect/download-GOME2-SIF.py +++ b/ESDC/inputs-collect/download-GOME2-SIF.py @@ -19,4 +19,5 @@ file_to_download = f"GOME_{rm}_dcSIF_005deg_8day_{year}.nc" print(f"Downloading {file_to_download}") response = requests.get(URL + file_to_download) - open(pathOut + file_to_download, "wb").write(response.content) + file_path = os.path.join(pathOut, file_to_download) + open(file_path, "wb").write(response.content) diff --git a/ESDC/inputs-collect/download-GOSIF.py b/ESDC/inputs-collect/download-GOSIF.py index 3cafb8f..b629405 100644 --- a/ESDC/inputs-collect/download-GOSIF.py +++ b/ESDC/inputs-collect/download-GOSIF.py @@ -25,9 +25,10 @@ link = link.get('href') if link.endswith(".tif.gz"): file_to_download = link.split("/")[-1] - if not exists(pathOut + file_to_download): + file_path = os.path.join(pathOut, file_to_download) + if not exists(file_path): print(f"Downloading {file_to_download}") response = requests.get(URL + file_to_download) - open(pathOut + file_to_download, "wb").write(response.content) + open(file_path, "wb").write(response.content) else: pass diff --git a/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-JJ-metadata.py b/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-JJ-metadata.py index fc02ebe..4f68fbe 100644 --- a/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-JJ-metadata.py +++ b/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-JJ-metadata.py @@ -4,6 +4,7 @@ import rioxarray import xarray as xr import numpy as np +import os pathIn = "~/data/SIF/GOME2-SIF/preprocess" From 0ccea3930ac572a011f2520b0a0be2cb909647aa Mon Sep 17 00:00:00 2001 From: AliceBalfanz Date: Wed, 5 Apr 2023 14:28:43 +0200 Subject: [PATCH 09/17] fixing path concatenation and import --- ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-JJ-metadata.py | 2 +- ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-PK-metadata.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-JJ-metadata.py b/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-JJ-metadata.py index 4f68fbe..81fa11c 100644 --- a/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-JJ-metadata.py +++ b/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-JJ-metadata.py @@ -16,7 +16,7 @@ if not os.path.exists(pathOut): os.makedirs(pathOut) -with open("sif-gome2-JJ-metadata.yaml", "r") as stream: +with open("inputs-preprocess/SIF/GOME2/sif-gome2-JJ-metadata.yaml", "r") as stream: try: metadata = yaml.safe_load(stream) except yaml.YAMLError as exc: diff --git a/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-PK-metadata.py b/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-PK-metadata.py index 01f56b9..3de04c9 100644 --- a/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-PK-metadata.py +++ b/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-PK-metadata.py @@ -4,6 +4,7 @@ import rioxarray import xarray as xr import numpy as np +import os pathIn = "~/data/SIF/GOME2-SIF/preprocess" pathIn = os.path.expanduser(pathIn) @@ -14,7 +15,7 @@ if not os.path.exists(pathOut): os.makedirs(pathOut) -with open("sif-gome2-PK-metadata.yaml", "r") as stream: +with open("inputs-preprocess/SIF/GOME2/sif-gome2-PK-metadata.yaml", "r") as stream: try: metadata = yaml.safe_load(stream) except yaml.YAMLError as exc: From 53b53cd02e55c93f71a7fd091b2326159d1accbf Mon Sep 17 00:00:00 2001 From: AliceBalfanz Date: Wed, 5 Apr 2023 14:30:03 +0200 Subject: [PATCH 10/17] fixing variable --- ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-JJ-metadata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-JJ-metadata.py b/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-JJ-metadata.py index 81fa11c..ab18dda 100644 --- a/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-JJ-metadata.py +++ b/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-JJ-metadata.py @@ -54,4 +54,4 @@ sorted({**datacube.attrs, **additional_attrs}.items()) ) -datacube.to_zarr(f"{pathOutput}/sif-gome2-JJ-8d-0.05deg-256x256x256.zarr",mode = "w") \ No newline at end of file +datacube.to_zarr(f"{pathOut}/sif-gome2-JJ-8d-0.05deg-256x256x256.zarr",mode = "w") \ No newline at end of file From bfa660ef2bf2ca44413f57f89701d2cb27c6cefa Mon Sep 17 00:00:00 2001 From: AliceBalfanz Date: Wed, 5 Apr 2023 14:32:40 +0200 Subject: [PATCH 11/17] adding import --- .../SIF/GOME2/sif-gome2-JJ-data-cube-0.25deg.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-JJ-data-cube-0.25deg.py b/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-JJ-data-cube-0.25deg.py index e571e99..5262b82 100644 --- a/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-JJ-data-cube-0.25deg.py +++ b/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-JJ-data-cube-0.25deg.py @@ -3,6 +3,7 @@ import numpy as np from tqdm import tqdm from datetime import datetime +import os pathOut = "~/data/SIF/GOME2-SIF/output" pathOut = os.path.expanduser(pathOut) From f3bb73bdd58d35ed6b78f29184c5f544d84040e6 Mon Sep 17 00:00:00 2001 From: AliceBalfanz Date: Wed, 5 Apr 2023 14:33:29 +0200 Subject: [PATCH 12/17] adding import --- .../SIF/GOME2/sif-gome2-PK-data-cube-0.25deg.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-PK-data-cube-0.25deg.py b/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-PK-data-cube-0.25deg.py index d862901..9d4b010 100644 --- a/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-PK-data-cube-0.25deg.py +++ b/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-PK-data-cube-0.25deg.py @@ -3,6 +3,7 @@ import numpy as np from tqdm import tqdm from datetime import datetime +import os pathOut = "~/data/SIF/GOME2-SIF/output" pathOut = os.path.expanduser(pathOut) From 1a20ff98fa73b508f2ab151ca3f6ec1887b4fb80 Mon Sep 17 00:00:00 2001 From: AliceBalfanz Date: Wed, 5 Apr 2023 15:23:08 +0200 Subject: [PATCH 13/17] adding imports and fixing path --- .../SIF/RTSIF/sif-rtsif-data-cube-0.25deg.py | 1 + ESDC/inputs-preprocess/SIF/RTSIF/sif-rtsif-metadata.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/ESDC/inputs-preprocess/SIF/RTSIF/sif-rtsif-data-cube-0.25deg.py b/ESDC/inputs-preprocess/SIF/RTSIF/sif-rtsif-data-cube-0.25deg.py index 4640ed6..3b7579a 100644 --- a/ESDC/inputs-preprocess/SIF/RTSIF/sif-rtsif-data-cube-0.25deg.py +++ b/ESDC/inputs-preprocess/SIF/RTSIF/sif-rtsif-data-cube-0.25deg.py @@ -3,6 +3,7 @@ import numpy as np from tqdm import tqdm from datetime import datetime +import os pathIn = "~/data/SIF/RTSIF/preprocess" pathIn = os.path.expanduser(pathIn) diff --git a/ESDC/inputs-preprocess/SIF/RTSIF/sif-rtsif-metadata.py b/ESDC/inputs-preprocess/SIF/RTSIF/sif-rtsif-metadata.py index a53a894..320dfa6 100644 --- a/ESDC/inputs-preprocess/SIF/RTSIF/sif-rtsif-metadata.py +++ b/ESDC/inputs-preprocess/SIF/RTSIF/sif-rtsif-metadata.py @@ -4,7 +4,7 @@ import rioxarray import xarray as xr import numpy as np - +import os pathIn = "path-to-RTSIF-cube" pathOut = "~/data/SIF/RTSIF/preprocess" @@ -13,7 +13,7 @@ if not os.path.exists(pathOut): os.makedirs(pathOut) -with open("sif-rtsif-metadata.yaml", "r") as stream: +with open("inputs-preprocess/SIF/RTSIF/sif-rtsif-metadata.yaml", "r") as stream: try: metadata = yaml.safe_load(stream) except yaml.YAMLError as exc: From e64e296d18d30ae25048eba4e8d399b8dcf4a343 Mon Sep 17 00:00:00 2001 From: AliceBalfanz Date: Wed, 5 Apr 2023 15:52:29 +0200 Subject: [PATCH 14/17] fixing paths in cci aerosol --- .../CCI/aerosol/cci-aod550-8d-0.083deg.py | 6 +++++- .../CCI/aerosol/cci-aod550-8d-0.25deg.py | 1 + .../CCI/aerosol/cci-aod550-metadata-0.0833deg.py | 15 ++++++++++++--- .../CCI/aerosol/cci-aod550-metadata.py | 4 ++-- 4 files changed, 20 insertions(+), 6 deletions(-) diff --git a/ESDC/inputs-preprocess/CCI/aerosol/cci-aod550-8d-0.083deg.py b/ESDC/inputs-preprocess/CCI/aerosol/cci-aod550-8d-0.083deg.py index 2c5663e..3de3aa5 100644 --- a/ESDC/inputs-preprocess/CCI/aerosol/cci-aod550-8d-0.083deg.py +++ b/ESDC/inputs-preprocess/CCI/aerosol/cci-aod550-8d-0.083deg.py @@ -8,6 +8,10 @@ import numpy as np from tqdm import tqdm import xarray as xr +import os + +pathOut = "~/data/CCI/aerosol/preprocess" +pathOut = os.path.expanduser(pathOut) print("Reading") store = new_data_store('cciodp') @@ -46,4 +50,4 @@ def resample_weekly(ds,year): dataset_8d = dataset_8d.chunk(dict(time=512,lat=128,lon=128)) print("Saving") -dataset_8d.to_zarr("~/data/cci-aod550-8d-0.083deg-512x128x128.zarr") +dataset_8d.to_zarr(f"{pathOut}/cci-aod550-8d-0.083deg-512x128x128.zarr") \ No newline at end of file diff --git a/ESDC/inputs-preprocess/CCI/aerosol/cci-aod550-8d-0.25deg.py b/ESDC/inputs-preprocess/CCI/aerosol/cci-aod550-8d-0.25deg.py index f860cc3..7b541fa 100644 --- a/ESDC/inputs-preprocess/CCI/aerosol/cci-aod550-8d-0.25deg.py +++ b/ESDC/inputs-preprocess/CCI/aerosol/cci-aod550-8d-0.25deg.py @@ -8,6 +8,7 @@ import numpy as np from tqdm import tqdm import xarray as xr +import os pathOut = "~/data/CCI/aerosol/preprocess" pathOut = os.path.expanduser(pathOut) diff --git a/ESDC/inputs-preprocess/CCI/aerosol/cci-aod550-metadata-0.0833deg.py b/ESDC/inputs-preprocess/CCI/aerosol/cci-aod550-metadata-0.0833deg.py index 3d20a5c..9b3e220 100644 --- a/ESDC/inputs-preprocess/CCI/aerosol/cci-aod550-metadata-0.0833deg.py +++ b/ESDC/inputs-preprocess/CCI/aerosol/cci-aod550-metadata-0.0833deg.py @@ -4,15 +4,24 @@ import rioxarray import xarray as xr import numpy as np +import os +pathIn = "~/data/CCI/aerosol/preprocess" +pathIn = os.path.expanduser(pathIn) -with open("cci-aod550-metadata-0.0833deg.yaml", "r") as stream: +pathOut = "~/data/CCI/aerosol/output" +pathOut = os.path.expanduser(pathOut) + +if not os.path.exists(pathOut): + os.makedirs(pathOut) + +with open("inputs-preprocess/CCI/aerosol/cci-aod550-metadata-0.0833deg.yaml", "r") as stream: try: metadata = yaml.safe_load(stream) except yaml.YAMLError as exc: print(exc) -datacube = xr.open_zarr("/home/davemlz/data/cci-aod550-8d-0.083deg-512x128x128.zarr") +datacube = xr.open_zarr(f"{pathIn}/cci-aod550-8d-0.083deg-512x128x128.zarr") datacube = datacube.rio.write_crs( "epsg:4326", grid_mapping_name="crs" @@ -44,4 +53,4 @@ sorted({**datacube.attrs, **additional_attrs}.items()) ) -datacube.to_zarr("/home/davemlz/data/metadata/cci-aod550-8d-0.083deg-512x128x128.zarr") +datacube.to_zarr(f"{pathOut}/cci-aod550-8d-0.083deg-512x128x128.zarr") diff --git a/ESDC/inputs-preprocess/CCI/aerosol/cci-aod550-metadata.py b/ESDC/inputs-preprocess/CCI/aerosol/cci-aod550-metadata.py index e3b0567..655fe14 100644 --- a/ESDC/inputs-preprocess/CCI/aerosol/cci-aod550-metadata.py +++ b/ESDC/inputs-preprocess/CCI/aerosol/cci-aod550-metadata.py @@ -4,7 +4,7 @@ import rioxarray import xarray as xr import numpy as np - +import os pathIn = "~/data/CCI/aerosol/preprocess" pathIn = os.path.expanduser(pathIn) @@ -15,7 +15,7 @@ if not os.path.exists(pathOut): os.makedirs(pathOut) -with open("cci-aod550-metadata.yaml", "r") as stream: +with open("inputs-preprocess/CCI/aerosol/cci-aod550-metadata.yaml", "r") as stream: try: metadata = yaml.safe_load(stream) except yaml.YAMLError as exc: From 90c631b8daff1127172fc14399b1683732e27aa3 Mon Sep 17 00:00:00 2001 From: AliceBalfanz Date: Wed, 5 Apr 2023 15:59:54 +0200 Subject: [PATCH 15/17] fixing paths in cci cloud --- .../CCI/cloud/cci-cloud-8d-0.083deg.py | 9 ++++++++- .../CCI/cloud/cci-cloud-metadata-0.0833deg.py | 15 ++++++++++++--- .../CCI/cloud/cci-cloud-metadata.py | 3 +-- 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/ESDC/inputs-preprocess/CCI/cloud/cci-cloud-8d-0.083deg.py b/ESDC/inputs-preprocess/CCI/cloud/cci-cloud-8d-0.083deg.py index 6799f29..806ea7e 100644 --- a/ESDC/inputs-preprocess/CCI/cloud/cci-cloud-8d-0.083deg.py +++ b/ESDC/inputs-preprocess/CCI/cloud/cci-cloud-8d-0.083deg.py @@ -8,6 +8,13 @@ import numpy as np from tqdm import tqdm import xarray as xr +import os + +pathOut = "~/data/CCI/cloud/preprocess" +pathOut = os.path.expanduser(pathOut) + +if not os.path.exists(pathOut): + os.makedirs(pathOut) print("Reading") store = new_data_store('cciodp') @@ -39,4 +46,4 @@ def get_dates_8d(year): dataset_8d = dataset_8d.chunk(dict(time=256,lat=128,lon=128)) print("Saving") -dataset_8d.to_zarr("/net/scratch/dmontero/CCI/cci-cloud-8d-0.083deg-256x128x128.zarr") +dataset_8d.to_zarr(f"{pathOut}/cci-cloud-8d-0.083deg-256x128x128.zarr") diff --git a/ESDC/inputs-preprocess/CCI/cloud/cci-cloud-metadata-0.0833deg.py b/ESDC/inputs-preprocess/CCI/cloud/cci-cloud-metadata-0.0833deg.py index 02d1fa5..53096bc 100644 --- a/ESDC/inputs-preprocess/CCI/cloud/cci-cloud-metadata-0.0833deg.py +++ b/ESDC/inputs-preprocess/CCI/cloud/cci-cloud-metadata-0.0833deg.py @@ -4,15 +4,24 @@ import rioxarray import xarray as xr import numpy as np +import os +pathIn = "~/data/CCI/cloud/preprocess" +pathIn = os.path.expanduser(pathIn) -with open("cci-cloud-metadata-0.0833deg.yaml", "r") as stream: +pathOut = "~/data/CCI/cloud/output" +pathOut = os.path.expanduser(pathOut) + +if not os.path.exists(pathOut): + os.makedirs(pathOut) + +with open("inputs-preprocess/CCI/cloud/cci-cloud-metadata-0.0833deg.yaml", "r") as stream: try: metadata = yaml.safe_load(stream) except yaml.YAMLError as exc: print(exc) -datacube = xr.open_zarr("/net/scratch/dmontero/CCI/cci-cloud-8d-0.083deg-256x128x128.zarr") +datacube = xr.open_zarr(f"{pathIn}/cci-cloud-8d-0.083deg-256x128x128.zarr") datacube = datacube.rio.write_crs( "epsg:4326", grid_mapping_name="crs" @@ -44,4 +53,4 @@ sorted({**datacube.attrs, **additional_attrs}.items()) ) -datacube.to_zarr("/net/scratch/dmontero/CCI/metadata/cci-cloud-8d-0.083deg-256x128x128.zarr") +datacube.to_zarr(f"{pathOut}/cci-cloud-8d-0.083deg-256x128x128.zarr") diff --git a/ESDC/inputs-preprocess/CCI/cloud/cci-cloud-metadata.py b/ESDC/inputs-preprocess/CCI/cloud/cci-cloud-metadata.py index 84437e0..4ff4921 100644 --- a/ESDC/inputs-preprocess/CCI/cloud/cci-cloud-metadata.py +++ b/ESDC/inputs-preprocess/CCI/cloud/cci-cloud-metadata.py @@ -14,8 +14,7 @@ if not os.path.exists(pathOut): os.makedirs(pathOut) - -with open("cci-cloud-metadata.yaml", "r") as stream: +with open("inputs-preprocess/CCI/cloud/cci-cloud-metadata.yaml", "r") as stream: try: metadata = yaml.safe_load(stream) except yaml.YAMLError as exc: From de522e3bb95124249a9dc2471eaeb3a197a69400 Mon Sep 17 00:00:00 2001 From: AliceBalfanz Date: Wed, 5 Apr 2023 16:08:48 +0200 Subject: [PATCH 16/17] fixing paths in cci and harmonizing imports --- .../CCI/aerosol/cci-aod550-8d-0.083deg.py | 42 ++++++++--------- .../CCI/aerosol/cci-aod550-8d-0.25deg.py | 46 +++++++++--------- .../aerosol/cci-aod550-metadata-0.0833deg.py | 13 +++-- .../CCI/aerosol/cci-aod550-metadata.py | 13 +++-- .../CCI/cloud/cci-cloud-8d-0.083deg.py | 40 ++++++++-------- .../CCI/cloud/cci-cloud-8d-0.25deg.py | 47 ++++++++++--------- .../CCI/cloud/cci-cloud-metadata-0.0833deg.py | 13 +++-- .../CCI/cloud/cci-cloud-metadata.py | 10 ++-- .../CCI/sm/cci-sm-8d-0.083deg.py | 21 ++++----- .../CCI/sm/cci-sm-8d-0.25deg.py | 25 ++++++---- .../CCI/sm/cci-sm-metadata-0.0833deg.py | 18 +++---- .../CCI/sm/cci-sm-metadata.py | 13 ++--- 12 files changed, 149 insertions(+), 152 deletions(-) diff --git a/ESDC/inputs-preprocess/CCI/aerosol/cci-aod550-8d-0.083deg.py b/ESDC/inputs-preprocess/CCI/aerosol/cci-aod550-8d-0.083deg.py index 3de3aa5..9ebb0a4 100644 --- a/ESDC/inputs-preprocess/CCI/aerosol/cci-aod550-8d-0.083deg.py +++ b/ESDC/inputs-preprocess/CCI/aerosol/cci-aod550-8d-0.083deg.py @@ -1,14 +1,9 @@ -from xcube.core.store import find_data_store_extensions -from xcube.core.store import get_data_store_params_schema -from xcube.core.store import new_data_store +import os -import shapely.geometry -from IPython.display import JSON -import matplotlib.pyplot as plt import numpy as np -from tqdm import tqdm import xarray as xr -import os +from tqdm import tqdm +from xcube.core.store import new_data_store pathOut = "~/data/CCI/aerosol/preprocess" pathOut = os.path.expanduser(pathOut) @@ -17,37 +12,42 @@ store = new_data_store('cciodp') dataset = store.open_data( - 'esacci.AEROSOL.day.L3C.AER_PRODUCTS.AATSR.Envisat.SU.4-3.r1', + 'esacci.AEROSOL.day.L3C.AER_PRODUCTS.AATSR.Envisat.SU.4-3.r1', variable_names=['AOD550_mean'], - time_range=['2002-05-20','2012-04-08'] + time_range=['2002-05-20', '2012-04-08'] ) -dates_2002 = np.arange(np.datetime64("2002-05-21"), np.datetime64("2003-01-01"), np.timedelta64(8, "D")).astype("datetime64[ns]") +dates_2002 = np.arange(np.datetime64("2002-05-21"), np.datetime64("2003-01-01"), + np.timedelta64(8, "D")).astype("datetime64[ns]") last_year = 2012 first_year = 2002 -years = np.arange(first_year,last_year + 1) +years = np.arange(first_year, last_year + 1) -def resample_weekly(ds,year): + +def resample_weekly(ds, year): keep_attrs = ds.time.attrs - ds = ds.sel(time=slice(f"{year}-01-01",f"{year}-12-31")).resample(time="8D").mean() - ds['time'] = ds.time + np.timedelta64(4,"D") + ds = ds.sel(time=slice(f"{year}-01-01", f"{year}-12-31")).resample( + time="8D").mean() + ds['time'] = ds.time + np.timedelta64(4, "D") ds.time.attrs = keep_attrs - if year==2002: + if year == 2002: ds = ds.interp(coords=dict(time=dates_2002)) return ds + print("Resampling in time") -dataset_8d = [resample_weekly(dataset,year) for year in tqdm(years)] -dataset_8d = xr.concat(dataset_8d,dim="time") +dataset_8d = [resample_weekly(dataset, year) for year in tqdm(years)] +dataset_8d = xr.concat(dataset_8d, dim="time") new_lats = np.load("lat.npy") new_lons = np.load("lon.npy") print("Resampling in space") -dataset_8d = dataset_8d.interp(coords=dict(lat=new_lats,lon=new_lons),method="nearest") -dataset_8d = dataset_8d.chunk(dict(time=512,lat=128,lon=128)) +dataset_8d = dataset_8d.interp(coords=dict(lat=new_lats, lon=new_lons), + method="nearest") +dataset_8d = dataset_8d.chunk(dict(time=512, lat=128, lon=128)) print("Saving") -dataset_8d.to_zarr(f"{pathOut}/cci-aod550-8d-0.083deg-512x128x128.zarr") \ No newline at end of file +dataset_8d.to_zarr(f"{pathOut}/cci-aod550-8d-0.083deg-512x128x128.zarr") diff --git a/ESDC/inputs-preprocess/CCI/aerosol/cci-aod550-8d-0.25deg.py b/ESDC/inputs-preprocess/CCI/aerosol/cci-aod550-8d-0.25deg.py index 7b541fa..2fb3574 100644 --- a/ESDC/inputs-preprocess/CCI/aerosol/cci-aod550-8d-0.25deg.py +++ b/ESDC/inputs-preprocess/CCI/aerosol/cci-aod550-8d-0.25deg.py @@ -1,14 +1,9 @@ -from xcube.core.store import find_data_store_extensions -from xcube.core.store import get_data_store_params_schema -from xcube.core.store import new_data_store +import os -import shapely.geometry -from IPython.display import JSON -import matplotlib.pyplot as plt import numpy as np -from tqdm import tqdm import xarray as xr -import os +from tqdm import tqdm +from xcube.core.store import new_data_store pathOut = "~/data/CCI/aerosol/preprocess" pathOut = os.path.expanduser(pathOut) @@ -20,37 +15,42 @@ store = new_data_store('cciodp') dataset = store.open_data( - 'esacci.AEROSOL.day.L3C.AER_PRODUCTS.AATSR.Envisat.SU.4-3.r1', + 'esacci.AEROSOL.day.L3C.AER_PRODUCTS.AATSR.Envisat.SU.4-3.r1', variable_names=['AOD550_mean'], - time_range=['2002-05-20','2012-04-08'] + time_range=['2002-05-20', '2012-04-08'] ) -dates_2002 = np.arange(np.datetime64("2002-05-21"), np.datetime64("2003-01-01"), np.timedelta64(8, "D")).astype("datetime64[ns]") +dates_2002 = np.arange(np.datetime64("2002-05-21"), np.datetime64("2003-01-01"), + np.timedelta64(8, "D")).astype("datetime64[ns]") last_year = 2012 first_year = 2002 -years = np.arange(first_year,last_year + 1) +years = np.arange(first_year, last_year + 1) -def resample_weekly(ds,year): + +def resample_weekly(ds, year): keep_attrs = ds.time.attrs - ds = ds.sel(time=slice(f"{year}-01-01",f"{year}-12-31")).resample(time="8D").mean() - ds['time'] = ds.time + np.timedelta64(4,"D") + ds = ds.sel(time=slice(f"{year}-01-01", f"{year}-12-31")).resample( + time="8D").mean() + ds['time'] = ds.time + np.timedelta64(4, "D") ds.time.attrs = keep_attrs - if year==2002: + if year == 2002: ds = ds.interp(coords=dict(time=dates_2002)) return ds + print("Resampling in time") -dataset_8d = [resample_weekly(dataset,year) for year in tqdm(years)] -dataset_8d = xr.concat(dataset_8d,dim="time") +dataset_8d = [resample_weekly(dataset, year) for year in tqdm(years)] +dataset_8d = xr.concat(dataset_8d, dim="time") -new_lats = np.arange(-89.875,90,0.25) -new_lons = np.arange(-179.875,180,0.25) +new_lats = np.arange(-89.875, 90, 0.25) +new_lons = np.arange(-179.875, 180, 0.25) print("Resampling in space") -dataset_8d = dataset_8d.interp(coords=dict(lat=new_lats,lon=new_lons),method="nearest") -dataset_8d = dataset_8d.chunk(dict(time=256,lat=128,lon=128)) +dataset_8d = dataset_8d.interp(coords=dict(lat=new_lats, lon=new_lons), + method="nearest") +dataset_8d = dataset_8d.chunk(dict(time=256, lat=128, lon=128)) print("Saving") -dataset_8d.to_zarr(f"{pathOut}/cci-aod550-8d-0.25deg-256x128x128.zarr") \ No newline at end of file +dataset_8d.to_zarr(f"{pathOut}/cci-aod550-8d-0.25deg-256x128x128.zarr") diff --git a/ESDC/inputs-preprocess/CCI/aerosol/cci-aod550-metadata-0.0833deg.py b/ESDC/inputs-preprocess/CCI/aerosol/cci-aod550-metadata-0.0833deg.py index 9b3e220..9f72486 100644 --- a/ESDC/inputs-preprocess/CCI/aerosol/cci-aod550-metadata-0.0833deg.py +++ b/ESDC/inputs-preprocess/CCI/aerosol/cci-aod550-metadata-0.0833deg.py @@ -1,10 +1,8 @@ -from tqdm import tqdm +import os from datetime import datetime -import yaml -import rioxarray + import xarray as xr -import numpy as np -import os +import yaml pathIn = "~/data/CCI/aerosol/preprocess" pathIn = os.path.expanduser(pathIn) @@ -15,7 +13,8 @@ if not os.path.exists(pathOut): os.makedirs(pathOut) -with open("inputs-preprocess/CCI/aerosol/cci-aod550-metadata-0.0833deg.yaml", "r") as stream: +with open("inputs-preprocess/CCI/aerosol/cci-aod550-metadata-0.0833deg.yaml", + "r") as stream: try: metadata = yaml.safe_load(stream) except yaml.YAMLError as exc: @@ -25,7 +24,7 @@ datacube = datacube.rio.write_crs( "epsg:4326", grid_mapping_name="crs" -).reset_coords() +).reset_coords() del datacube.crs.attrs["spatial_ref"] datacube.attrs = metadata["global"] diff --git a/ESDC/inputs-preprocess/CCI/aerosol/cci-aod550-metadata.py b/ESDC/inputs-preprocess/CCI/aerosol/cci-aod550-metadata.py index 655fe14..2425ea8 100644 --- a/ESDC/inputs-preprocess/CCI/aerosol/cci-aod550-metadata.py +++ b/ESDC/inputs-preprocess/CCI/aerosol/cci-aod550-metadata.py @@ -1,10 +1,8 @@ -from tqdm import tqdm +import os from datetime import datetime -import yaml -import rioxarray + import xarray as xr -import numpy as np -import os +import yaml pathIn = "~/data/CCI/aerosol/preprocess" pathIn = os.path.expanduser(pathIn) @@ -15,7 +13,8 @@ if not os.path.exists(pathOut): os.makedirs(pathOut) -with open("inputs-preprocess/CCI/aerosol/cci-aod550-metadata.yaml", "r") as stream: +with open("inputs-preprocess/CCI/aerosol/cci-aod550-metadata.yaml", + "r") as stream: try: metadata = yaml.safe_load(stream) except yaml.YAMLError as exc: @@ -25,7 +24,7 @@ datacube = datacube.rio.write_crs( "epsg:4326", grid_mapping_name="crs" -).reset_coords() +).reset_coords() del datacube.crs.attrs["spatial_ref"] datacube.attrs = metadata["global"] diff --git a/ESDC/inputs-preprocess/CCI/cloud/cci-cloud-8d-0.083deg.py b/ESDC/inputs-preprocess/CCI/cloud/cci-cloud-8d-0.083deg.py index 806ea7e..113f1bc 100644 --- a/ESDC/inputs-preprocess/CCI/cloud/cci-cloud-8d-0.083deg.py +++ b/ESDC/inputs-preprocess/CCI/cloud/cci-cloud-8d-0.083deg.py @@ -1,14 +1,7 @@ -from xcube.core.store import find_data_store_extensions -from xcube.core.store import get_data_store_params_schema -from xcube.core.store import new_data_store +import os -import shapely.geometry -from IPython.display import JSON -import matplotlib.pyplot as plt import numpy as np -from tqdm import tqdm -import xarray as xr -import os +from xcube.core.store import new_data_store pathOut = "~/data/CCI/cloud/preprocess" pathOut = os.path.expanduser(pathOut) @@ -20,30 +13,37 @@ store = new_data_store('cciodp') dataset = store.open_data( - 'esacci.CLOUD.mon.L3C.CLD_PRODUCTS.MODIS.Terra.MODIS_TERRA.2-0.r1', - variable_names=['cot','cth','ctt'], - time_range=["2000-02-01","2014-12-31"] + 'esacci.CLOUD.mon.L3C.CLD_PRODUCTS.MODIS.Terra.MODIS_TERRA.2-0.r1', + variable_names=['cot', 'cth', 'ctt'], + time_range=["2000-02-01", "2014-12-31"] ) -dataset = dataset.drop([x for x in list(dataset.variables) if x not in ['time','lat','lon','cot','cth','ctt']]) +dataset = dataset.drop([x for x in list(dataset.variables) if + x not in ['time', 'lat', 'lon', 'cot', 'cth', 'ctt']]) + +dataset = dataset.chunk(dict(time=-1, lat=64, lon=64)) -dataset = dataset.chunk(dict(time=-1,lat=64,lon=64)) def get_dates_8d(year): - return np.arange(np.datetime64(f"{year}-01-05"), np.datetime64(f"{year+1}-01-01"), np.timedelta64(8, "D")).astype("datetime64[ns]") + return np.arange(np.datetime64(f"{year}-01-05"), + np.datetime64(f"{year + 1}-01-01"), + np.timedelta64(8, "D")).astype("datetime64[ns]") + -dates = np.concatenate([get_dates_8d(year) for year in np.arange(2000,2015)]) -dates = dates[(dates >= np.datetime64("2000-02-15")) & (dates <= np.datetime64("2014-12-16"))] +dates = np.concatenate([get_dates_8d(year) for year in np.arange(2000, 2015)]) +dates = dates[(dates >= np.datetime64("2000-02-15")) & ( + dates <= np.datetime64("2014-12-16"))] print("Resampling in time") -dataset_8d = dataset.interp(coords=dict(time=dates),method="nearest") +dataset_8d = dataset.interp(coords=dict(time=dates), method="nearest") new_lats = np.load("lat.npy") new_lons = np.load("lon.npy") print("Resampling in space") -dataset_8d = dataset_8d.interp(coords=dict(lat=new_lats,lon=new_lons),method="nearest") -dataset_8d = dataset_8d.chunk(dict(time=256,lat=128,lon=128)) +dataset_8d = dataset_8d.interp(coords=dict(lat=new_lats, lon=new_lons), + method="nearest") +dataset_8d = dataset_8d.chunk(dict(time=256, lat=128, lon=128)) print("Saving") dataset_8d.to_zarr(f"{pathOut}/cci-cloud-8d-0.083deg-256x128x128.zarr") diff --git a/ESDC/inputs-preprocess/CCI/cloud/cci-cloud-8d-0.25deg.py b/ESDC/inputs-preprocess/CCI/cloud/cci-cloud-8d-0.25deg.py index 2fc0eae..653c23f 100644 --- a/ESDC/inputs-preprocess/CCI/cloud/cci-cloud-8d-0.25deg.py +++ b/ESDC/inputs-preprocess/CCI/cloud/cci-cloud-8d-0.25deg.py @@ -1,14 +1,7 @@ -from xcube.core.store import find_data_store_extensions -from xcube.core.store import get_data_store_params_schema -from xcube.core.store import new_data_store +import os -import shapely.geometry -from IPython.display import JSON -import matplotlib.pyplot as plt import numpy as np -from tqdm import tqdm -import xarray as xr -import os +from xcube.core.store import new_data_store pathOut = "~/data/CCI/cloud/preprocess" pathOut = os.path.expanduser(pathOut) @@ -20,30 +13,38 @@ store = new_data_store('cciodp') dataset = store.open_data( - 'esacci.CLOUD.mon.L3C.CLD_PRODUCTS.MODIS.Terra.MODIS_TERRA.2-0.r1', - variable_names=['cot','cth','ctt'], - time_range=["2000-02-01","2014-12-31"] + 'esacci.CLOUD.mon.L3C.CLD_PRODUCTS.MODIS.Terra.MODIS_TERRA.2-0.r1', + variable_names=['cot', 'cth', 'ctt'], + time_range=["2000-02-01", "2014-12-31"] ) -dataset = dataset.drop([x for x in list(dataset.variables) if x not in ['time','lat','lon','cot','cth','ctt']]) +dataset = dataset.drop([x for x in list(dataset.variables) if + x not in ['time', 'lat', 'lon', 'cot', 'cth', 'ctt']]) + +dataset = dataset.chunk(dict(time=-1, lat=64, lon=64)) -dataset = dataset.chunk(dict(time=-1,lat=64,lon=64)) def get_dates_8d(year): - return np.arange(np.datetime64(f"{year}-01-05"), np.datetime64(f"{year+1}-01-01"), np.timedelta64(8, "D")).astype("datetime64[ns]") + return np.arange(np.datetime64(f"{year}-01-05"), + np.datetime64(f"{year + 1}-01-01"), + np.timedelta64(8, "D")).astype("datetime64[ns]") + -dates = np.concatenate([get_dates_8d(year) for year in np.arange(2000,2015)]) -dates = dates[(dates >= np.datetime64("2000-02-15")) & (dates <= np.datetime64("2014-12-16"))] +dates = np.concatenate([get_dates_8d(year) for year in np.arange(2000, 2015)]) +dates = dates[(dates >= np.datetime64("2000-02-15")) & ( + dates <= np.datetime64("2014-12-16"))] print("Resampling in time") -dataset_8d = dataset.interp(coords=dict(time=dates),method="nearest") +dataset_8d = dataset.interp(coords=dict(time=dates), method="nearest") -new_lats = np.arange(-89.875,90,0.25) -new_lons = np.arange(-179.875,180,0.25) +new_lats = np.arange(-89.875, 90, 0.25) +new_lons = np.arange(-179.875, 180, 0.25) print("Resampling in space") -dataset_8d = dataset_8d.interp(coords=dict(lat=new_lats,lon=new_lons),method="nearest",kwargs={"fill_value": "extrapolate"}) -dataset_8d = dataset_8d.chunk(dict(time=256,lat=128,lon=128)) +dataset_8d = dataset_8d.interp(coords=dict(lat=new_lats, lon=new_lons), + method="nearest", + kwargs={"fill_value": "extrapolate"}) +dataset_8d = dataset_8d.chunk(dict(time=256, lat=128, lon=128)) print("Saving") -dataset_8d.to_zarr(f"{pathOut}/cci-cloud-8d-0.25deg-256x128x128.zarr") \ No newline at end of file +dataset_8d.to_zarr(f"{pathOut}/cci-cloud-8d-0.25deg-256x128x128.zarr") diff --git a/ESDC/inputs-preprocess/CCI/cloud/cci-cloud-metadata-0.0833deg.py b/ESDC/inputs-preprocess/CCI/cloud/cci-cloud-metadata-0.0833deg.py index 53096bc..fea92f3 100644 --- a/ESDC/inputs-preprocess/CCI/cloud/cci-cloud-metadata-0.0833deg.py +++ b/ESDC/inputs-preprocess/CCI/cloud/cci-cloud-metadata-0.0833deg.py @@ -1,10 +1,8 @@ -from tqdm import tqdm +import os from datetime import datetime -import yaml -import rioxarray + import xarray as xr -import numpy as np -import os +import yaml pathIn = "~/data/CCI/cloud/preprocess" pathIn = os.path.expanduser(pathIn) @@ -15,7 +13,8 @@ if not os.path.exists(pathOut): os.makedirs(pathOut) -with open("inputs-preprocess/CCI/cloud/cci-cloud-metadata-0.0833deg.yaml", "r") as stream: +with open("inputs-preprocess/CCI/cloud/cci-cloud-metadata-0.0833deg.yaml", + "r") as stream: try: metadata = yaml.safe_load(stream) except yaml.YAMLError as exc: @@ -25,7 +24,7 @@ datacube = datacube.rio.write_crs( "epsg:4326", grid_mapping_name="crs" -).reset_coords() +).reset_coords() del datacube.crs.attrs["spatial_ref"] datacube.attrs = metadata["global"] diff --git a/ESDC/inputs-preprocess/CCI/cloud/cci-cloud-metadata.py b/ESDC/inputs-preprocess/CCI/cloud/cci-cloud-metadata.py index 4ff4921..63f73ef 100644 --- a/ESDC/inputs-preprocess/CCI/cloud/cci-cloud-metadata.py +++ b/ESDC/inputs-preprocess/CCI/cloud/cci-cloud-metadata.py @@ -1,10 +1,8 @@ -from tqdm import tqdm +import os from datetime import datetime -import yaml -import rioxarray + import xarray as xr -import numpy as np -import os +import yaml pathIn = "~/data/CCI/cloud/preprocess" pathIn = os.path.expanduser(pathIn) @@ -24,7 +22,7 @@ datacube = datacube.rio.write_crs( "epsg:4326", grid_mapping_name="crs" -).reset_coords() +).reset_coords() del datacube.crs.attrs["spatial_ref"] datacube.attrs = metadata["global"] diff --git a/ESDC/inputs-preprocess/CCI/sm/cci-sm-8d-0.083deg.py b/ESDC/inputs-preprocess/CCI/sm/cci-sm-8d-0.083deg.py index a3b89e2..816c714 100644 --- a/ESDC/inputs-preprocess/CCI/sm/cci-sm-8d-0.083deg.py +++ b/ESDC/inputs-preprocess/CCI/sm/cci-sm-8d-0.083deg.py @@ -1,23 +1,20 @@ -from xcube.core.store import find_data_store_extensions -from xcube.core.store import get_data_store_params_schema -from xcube.core.store import new_data_store +import os -import shapely.geometry -from IPython.display import JSON -import matplotlib.pyplot as plt import numpy as np -from tqdm import tqdm import xarray as xr -print("Reading") -dataset_8d = xr.open_zarr("/net/scratch/dmontero/CCI/cci-sm-8d-0.25deg-256x128x128.zarr") +pathOut = "~/data/CCI/sm/preprocess" +pathOut = os.path.expanduser(pathOut) +print("Reading") +dataset_8d = xr.open_zarr(f"{pathOut}/cci-sm-8d-0.25deg-256x128x128.zarr") new_lats = np.load("lat.npy") new_lons = np.load("lon.npy") print("Resampling in space") -dataset_8d = dataset_8d.interp(coords=dict(lat=new_lats,lon=new_lons),method="nearest") -dataset_8d = dataset_8d.chunk(dict(time=256,lat=128,lon=128)) +dataset_8d = dataset_8d.interp(coords=dict(lat=new_lats, lon=new_lons), + method="nearest") +dataset_8d = dataset_8d.chunk(dict(time=256, lat=128, lon=128)) print("Saving") -dataset_8d.to_zarr("/net/scratch/dmontero/CCI/cci-sm-8d-0.083deg-256x128x128.zarr") +dataset_8d.to_zarr(f"{pathOut}/cci-sm-8d-0.083deg-256x128x128.zarr") diff --git a/ESDC/inputs-preprocess/CCI/sm/cci-sm-8d-0.25deg.py b/ESDC/inputs-preprocess/CCI/sm/cci-sm-8d-0.25deg.py index c2c4208..7b2cc5b 100644 --- a/ESDC/inputs-preprocess/CCI/sm/cci-sm-8d-0.25deg.py +++ b/ESDC/inputs-preprocess/CCI/sm/cci-sm-8d-0.25deg.py @@ -1,7 +1,9 @@ +import glob +import os + import numpy as np -from tqdm import tqdm import xarray as xr -import glob +from tqdm import tqdm pathOut = "~/data/CCI/sm/preprocess" pathOut = os.path.expanduser(pathOut) @@ -17,25 +19,28 @@ files.sort() dataset = [xr.open_zarr(file) for file in tqdm(files)] -dataset = xr.concat(dataset,dim="time") +dataset = xr.concat(dataset, dim="time") dataset = dataset.chunk(dict(time=256)) last_year = 2020 first_year = 1979 -years = np.arange(first_year,last_year + 1) +years = np.arange(first_year, last_year + 1) + -def resample_weekly(ds,year): +def resample_weekly(ds, year): keep_attrs = ds.time.attrs - ds = ds.sel(time=slice(f"{year}-01-01",f"{year+1}-01-01")).resample(time="8D").mean() - ds['time'] = ds.time + np.timedelta64(4,"D") + ds = ds.sel(time=slice(f"{year}-01-01", f"{year + 1}-01-01")).resample( + time="8D").mean() + ds['time'] = ds.time + np.timedelta64(4, "D") ds.time.attrs = keep_attrs return ds + print("Resampling in time") -dataset_8d = [resample_weekly(dataset,year) for year in tqdm(years)] -dataset_8d = xr.concat(dataset_8d,dim="time") +dataset_8d = [resample_weekly(dataset, year) for year in tqdm(years)] +dataset_8d = xr.concat(dataset_8d, dim="time") dataset_8d = dataset_8d.chunk(dict(time=256)) print("Saving") -dataset_8d.to_zarr(f"{pathOut}/cci-sm-8d-0.25deg-256x128x128.zarr") \ No newline at end of file +dataset_8d.to_zarr(f"{pathOut}/cci-sm-8d-0.25deg-256x128x128.zarr") diff --git a/ESDC/inputs-preprocess/CCI/sm/cci-sm-metadata-0.0833deg.py b/ESDC/inputs-preprocess/CCI/sm/cci-sm-metadata-0.0833deg.py index 297594f..0b09ddc 100644 --- a/ESDC/inputs-preprocess/CCI/sm/cci-sm-metadata-0.0833deg.py +++ b/ESDC/inputs-preprocess/CCI/sm/cci-sm-metadata-0.0833deg.py @@ -1,22 +1,24 @@ -from tqdm import tqdm +import os from datetime import datetime -import yaml -import rioxarray + import xarray as xr -import numpy as np +import yaml +pathOut = "~/data/CCI/sm/output" +pathOut = os.path.expanduser(pathOut) -with open("cci-sm-metadata-0.0833deg.yaml", "r") as stream: +with open("inputs-preprocess/CCI/sm/cci-sm-metadata-0.0833deg.yaml", + "r") as stream: try: metadata = yaml.safe_load(stream) except yaml.YAMLError as exc: print(exc) -datacube = xr.open_zarr("/net/scratch/dmontero/CCI/cci-sm-8d-0.083deg-256x128x128.zarr") +datacube = xr.open_zarr(f"{pathOut}/cci-sm-8d-0.083deg-256x128x128.zarr") datacube = datacube.rio.write_crs( "epsg:4326", grid_mapping_name="crs" -).reset_coords() +).reset_coords() del datacube.crs.attrs["spatial_ref"] datacube.attrs = metadata["global"] @@ -44,4 +46,4 @@ sorted({**datacube.attrs, **additional_attrs}.items()) ) -datacube.to_zarr("/net/scratch/dmontero/CCI/metadata/cci-sm-8d-0.083deg-256x128x128.zarr") +datacube.to_zarr(f"{pathOut}/cci-sm-8d-0.083deg-256x128x128.zarr") diff --git a/ESDC/inputs-preprocess/CCI/sm/cci-sm-metadata.py b/ESDC/inputs-preprocess/CCI/sm/cci-sm-metadata.py index 752f111..c5ef7ef 100644 --- a/ESDC/inputs-preprocess/CCI/sm/cci-sm-metadata.py +++ b/ESDC/inputs-preprocess/CCI/sm/cci-sm-metadata.py @@ -1,10 +1,8 @@ -from tqdm import tqdm +import os from datetime import datetime -import yaml -import rioxarray -import xarray as xr -import numpy as np +import xarray as xr +import yaml pathOut = "~/data/CCI/sm/output" pathOut = os.path.expanduser(pathOut) @@ -14,8 +12,7 @@ pathIn = "~/data/CCI/sm/preprocess" pathIn = os.path.expanduser(pathIn) - -with open("cci-sm-metadata.yaml", "r") as stream: +with open("inputs-preprocess/CCI/sm/cci-sm-metadata.yaml", "r") as stream: try: metadata = yaml.safe_load(stream) except yaml.YAMLError as exc: @@ -25,7 +22,7 @@ datacube = datacube.rio.write_crs( "epsg:4326", grid_mapping_name="crs" -).reset_coords() +).reset_coords() del datacube.crs.attrs["spatial_ref"] datacube.attrs = metadata["global"] From bf39a493c06c2bc862f00d717098bd50d168a17f Mon Sep 17 00:00:00 2001 From: AliceBalfanz Date: Wed, 5 Apr 2023 16:20:47 +0200 Subject: [PATCH 17/17] harmonizing imports --- .../GOME2/sif-gome2-JJ-data-cube-0.25deg.py | 22 +++++++------- .../SIF/GOME2/sif-gome2-JJ-data-cube.py | 20 +++++-------- .../SIF/GOME2/sif-gome2-JJ-metadata.py | 17 +++++------ .../GOME2/sif-gome2-PK-data-cube-0.25deg.py | 22 +++++++------- .../SIF/GOME2/sif-gome2-PK-data-cube.py | 20 +++++-------- .../SIF/GOME2/sif-gome2-PK-metadata.py | 15 +++++----- .../SIF/GOSIF/sif-gosif-data-cube-0.25deg.py | 16 +++++----- .../SIF/GOSIF/sif-gosif-data-cube-part1.py | 29 ++++++++++--------- .../SIF/GOSIF/sif-gosif-data-cube-part2.py | 18 ++++-------- .../SIF/GOSIF/sif-gosif-metadata.py | 10 +++---- 10 files changed, 84 insertions(+), 105 deletions(-) diff --git a/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-JJ-data-cube-0.25deg.py b/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-JJ-data-cube-0.25deg.py index 5262b82..bc9f8db 100644 --- a/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-JJ-data-cube-0.25deg.py +++ b/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-JJ-data-cube-0.25deg.py @@ -1,9 +1,7 @@ -import xarray as xr -import xesmf as xe -import numpy as np -from tqdm import tqdm -from datetime import datetime import os +from datetime import datetime + +import xarray as xr pathOut = "~/data/SIF/GOME2-SIF/output" pathOut = os.path.expanduser(pathOut) @@ -12,16 +10,18 @@ dataset = xr.open_zarr(f"{pathOut}/sif-gome2-JJ-8d-0.05deg-256x256x256.zarr") print("Interpolating") -dataset = dataset.coarsen(lat=5,lon=5).mean() -dataset = dataset.chunk(dict(time=-1,lat=64,lon=64)) -dataset = dataset.interpolate_na(dim="time",fill_value="extrapolate") -dataset = dataset.chunk(dict(time=256,lat=128,lon=128)) +dataset = dataset.coarsen(lat=5, lon=5).mean() +dataset = dataset.chunk(dict(time=-1, lat=64, lon=64)) +dataset = dataset.interpolate_na(dim="time", fill_value="extrapolate") +dataset = dataset.chunk(dict(time=256, lat=128, lon=128)) print("Adding attributes") dataset.attrs['date_modified'] = str(datetime.now()) dataset.attrs['time_coverage_end'] = str(dataset.time[-1].values) dataset.attrs['time_coverage_start'] = str(dataset.time[0].values) -dataset.attrs['processing_steps'] = dataset.attrs['processing_steps'] + ['Downsampling to 0.25 deg with mean','Interpolating NA with linear interpolation'] +dataset.attrs['processing_steps'] = dataset.attrs['processing_steps'] + [ + 'Downsampling to 0.25 deg with mean', + 'Interpolating NA with linear interpolation'] dataset.attrs['geospatial_lat_max'] = float(dataset.lat.max().values) dataset.attrs['geospatial_lat_min'] = float(dataset.lat.min().values) @@ -32,4 +32,4 @@ dataset.attrs['geospatial_lon_resolution'] = 0.25 print("Saving") -dataset.to_zarr(f"{pathOut}/sif-gome2-JJ-8d-0.25deg-256x128x128.zarr") \ No newline at end of file +dataset.to_zarr(f"{pathOut}/sif-gome2-JJ-8d-0.25deg-256x128x128.zarr") diff --git a/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-JJ-data-cube.py b/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-JJ-data-cube.py index 5faf440..e7d2f52 100644 --- a/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-JJ-data-cube.py +++ b/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-JJ-data-cube.py @@ -1,14 +1,7 @@ -import xarray as xr -import numpy as np import glob import os -import datetime -from tqdm import tqdm - -# pathOut = "/net/projects/deep_esdl/data/GOME2-SIF/cubes/" -# if not os.path.exists(pathOut): -# os.mkdir(pathOut) +import xarray as xr pathIn = "~/data/SIF/GOME2-SIF/source" pathIn = os.path.expanduser(pathIn) @@ -18,13 +11,14 @@ if not os.path.exists(pathOut): os.makedirs(pathOut) - + files = glob.glob(f"{pathIn}/*JJ*.nc") files.sort() -datasets = [xr.open_dataset(file,chunks = {'lat':256, 'lon':256, 'time':256}) for file in files] -datasets = xr.concat(datasets,dim = "time").drop("crs") -datasets = datasets.transpose("time","lat","lon") -datasets = datasets.chunk({'lat':256, 'lon':256, 'time':256}) +datasets = [xr.open_dataset(file, chunks={'lat': 256, 'lon': 256, 'time': 256}) + for file in files] +datasets = xr.concat(datasets, dim="time").drop("crs") +datasets = datasets.transpose("time", "lat", "lon") +datasets = datasets.chunk({'lat': 256, 'lon': 256, 'time': 256}) datasets.to_zarr(f"{pathOut}/sif-gome2-JJ-256x256x256.zarr") diff --git a/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-JJ-metadata.py b/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-JJ-metadata.py index ab18dda..86920c7 100644 --- a/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-JJ-metadata.py +++ b/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-JJ-metadata.py @@ -1,11 +1,8 @@ -from tqdm import tqdm -from datetime import datetime -import yaml -import rioxarray -import xarray as xr -import numpy as np import os +from datetime import datetime +import xarray as xr +import yaml pathIn = "~/data/SIF/GOME2-SIF/preprocess" pathIn = os.path.expanduser(pathIn) @@ -16,7 +13,8 @@ if not os.path.exists(pathOut): os.makedirs(pathOut) -with open("inputs-preprocess/SIF/GOME2/sif-gome2-JJ-metadata.yaml", "r") as stream: +with open("inputs-preprocess/SIF/GOME2/sif-gome2-JJ-metadata.yaml", + "r") as stream: try: metadata = yaml.safe_load(stream) except yaml.YAMLError as exc: @@ -26,7 +24,7 @@ datacube = datacube.rio.write_crs( "epsg:4326", grid_mapping_name="crs" -).reset_coords() +).reset_coords() del datacube.crs.attrs["spatial_ref"] datacube.attrs = metadata["global"] @@ -54,4 +52,5 @@ sorted({**datacube.attrs, **additional_attrs}.items()) ) -datacube.to_zarr(f"{pathOut}/sif-gome2-JJ-8d-0.05deg-256x256x256.zarr",mode = "w") \ No newline at end of file +datacube.to_zarr(f"{pathOut}/sif-gome2-JJ-8d-0.05deg-256x256x256.zarr", + mode="w") diff --git a/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-PK-data-cube-0.25deg.py b/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-PK-data-cube-0.25deg.py index 9d4b010..31b06a9 100644 --- a/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-PK-data-cube-0.25deg.py +++ b/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-PK-data-cube-0.25deg.py @@ -1,9 +1,7 @@ -import xarray as xr -import xesmf as xe -import numpy as np -from tqdm import tqdm -from datetime import datetime import os +from datetime import datetime + +import xarray as xr pathOut = "~/data/SIF/GOME2-SIF/output" pathOut = os.path.expanduser(pathOut) @@ -12,16 +10,18 @@ dataset = xr.open_zarr(f"{pathOut}/sif-gome2-PK-8d-0.05deg-256x256x256.zarr") print("Interpolating") -dataset = dataset.coarsen(lat=5,lon=5).mean() -dataset = dataset.chunk(dict(time=-1,lat=64,lon=64)) -dataset = dataset.interpolate_na(dim="time",fill_value="extrapolate") -dataset = dataset.chunk(dict(time=256,lat=128,lon=128)) +dataset = dataset.coarsen(lat=5, lon=5).mean() +dataset = dataset.chunk(dict(time=-1, lat=64, lon=64)) +dataset = dataset.interpolate_na(dim="time", fill_value="extrapolate") +dataset = dataset.chunk(dict(time=256, lat=128, lon=128)) print("Adding attributes") dataset.attrs['date_modified'] = str(datetime.now()) dataset.attrs['time_coverage_end'] = str(dataset.time[-1].values) dataset.attrs['time_coverage_start'] = str(dataset.time[0].values) -dataset.attrs['processing_steps'] = dataset.attrs['processing_steps'] + ['Downsampling to 0.25 deg with mean','Interpolating NA with linear interpolation'] +dataset.attrs['processing_steps'] = dataset.attrs['processing_steps'] + [ + 'Downsampling to 0.25 deg with mean', + 'Interpolating NA with linear interpolation'] dataset.attrs['geospatial_lat_max'] = float(dataset.lat.max().values) dataset.attrs['geospatial_lat_min'] = float(dataset.lat.min().values) @@ -32,4 +32,4 @@ dataset.attrs['geospatial_lon_resolution'] = 0.25 print("Saving") -dataset.to_zarr(f"{pathOut}/sif-gome2-PK-8d-0.25deg-256x128x128.zarr") \ No newline at end of file +dataset.to_zarr(f"{pathOut}/sif-gome2-PK-8d-0.25deg-256x128x128.zarr") diff --git a/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-PK-data-cube.py b/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-PK-data-cube.py index 619340f..862c4c4 100644 --- a/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-PK-data-cube.py +++ b/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-PK-data-cube.py @@ -1,14 +1,7 @@ -import xarray as xr -import numpy as np import glob import os -import datetime -from tqdm import tqdm - -# pathOut = "/net/projects/deep_esdl/data/GOME2-SIF/cubes/" -# if not os.path.exists(pathOut): -# os.mkdir(pathOut) +import xarray as xr pathIn = "~/data/SIF/GOME2-SIF/source" pathIn = os.path.expanduser(pathIn) @@ -18,13 +11,14 @@ if not os.path.exists(pathOut): os.makedirs(pathOut) - + files = glob.glob(f"{pathIn}/*PK*.nc") files.sort() -datasets = [xr.open_dataset(file,chunks = {'lat':256, 'lon':256, 'time':256}) for file in files] -datasets = xr.concat(datasets,dim = "time").drop("crs") -datasets = datasets.transpose("time","lat","lon") -datasets = datasets.chunk({'lat':256, 'lon':256, 'time':256}) +datasets = [xr.open_dataset(file, chunks={'lat': 256, 'lon': 256, 'time': 256}) + for file in files] +datasets = xr.concat(datasets, dim="time").drop("crs") +datasets = datasets.transpose("time", "lat", "lon") +datasets = datasets.chunk({'lat': 256, 'lon': 256, 'time': 256}) datasets.to_zarr(f"{pathOut}/sif-gome2-PK-256x256x256.zarr") diff --git a/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-PK-metadata.py b/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-PK-metadata.py index 3de04c9..9135f28 100644 --- a/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-PK-metadata.py +++ b/ESDC/inputs-preprocess/SIF/GOME2/sif-gome2-PK-metadata.py @@ -1,10 +1,8 @@ -from tqdm import tqdm +import os from datetime import datetime -import yaml -import rioxarray + import xarray as xr -import numpy as np -import os +import yaml pathIn = "~/data/SIF/GOME2-SIF/preprocess" pathIn = os.path.expanduser(pathIn) @@ -15,7 +13,8 @@ if not os.path.exists(pathOut): os.makedirs(pathOut) -with open("inputs-preprocess/SIF/GOME2/sif-gome2-PK-metadata.yaml", "r") as stream: +with open("inputs-preprocess/SIF/GOME2/sif-gome2-PK-metadata.yaml", + "r") as stream: try: metadata = yaml.safe_load(stream) except yaml.YAMLError as exc: @@ -25,7 +24,7 @@ datacube = datacube.rio.write_crs( "epsg:4326", grid_mapping_name="crs" -).reset_coords() +).reset_coords() del datacube.crs.attrs["spatial_ref"] datacube.attrs = metadata["global"] @@ -53,4 +52,4 @@ sorted({**datacube.attrs, **additional_attrs}.items()) ) -datacube.to_zarr(f"{pathOut}/sif-gome2-PK-8d-0.05deg-256x256x256.zarr") \ No newline at end of file +datacube.to_zarr(f"{pathOut}/sif-gome2-PK-8d-0.05deg-256x256x256.zarr") diff --git a/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-0.25deg.py b/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-0.25deg.py index ee1bfba..ca1fae5 100644 --- a/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-0.25deg.py +++ b/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-0.25deg.py @@ -1,10 +1,9 @@ -import xarray as xr -import xesmf as xe -import numpy as np import os -from tqdm import tqdm from datetime import datetime +import numpy as np +import xarray as xr + pathOut = "~/data/SIF/GOSIF/output" pathOut = os.path.expanduser(pathOut) @@ -13,21 +12,22 @@ print("Fixing time") keep_attrs = dataset.time.attrs -dataset["time"] = dataset.time + np.timedelta64(4,"D") +dataset["time"] = dataset.time + np.timedelta64(4, "D") dataset.time.attrs = keep_attrs print("Coarsing") -dataset = dataset.coarsen(lat=5,lon=5).mean() +dataset = dataset.coarsen(lat=5, lon=5).mean() print("Chunking") -dataset = dataset.chunk(dict(time=256,lat=128,lon=128)) +dataset = dataset.chunk(dict(time=256, lat=128, lon=128)) print("Adding attrs") dataset.attrs['date_modified'] = str(datetime.now()) dataset.attrs['time_coverage_end'] = str(dataset.time[-1].values) dataset.attrs['time_coverage_start'] = str(dataset.time[0].values) dataset.attrs['reported_day'] = 5.0 -dataset.attrs['processing_steps'] = dataset.attrs['processing_steps'] + ['Downsampling to 0.25 deg with mean'] +dataset.attrs['processing_steps'] = dataset.attrs['processing_steps'] + [ + 'Downsampling to 0.25 deg with mean'] dataset.attrs['geospatial_lat_max'] = float(dataset.lat.max().values) dataset.attrs['geospatial_lat_min'] = float(dataset.lat.min().values) diff --git a/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-part1.py b/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-part1.py index a57cb1b..f602c84 100644 --- a/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-part1.py +++ b/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-part1.py @@ -1,14 +1,10 @@ -import xarray as xr -import numpy as np +import datetime import glob import os -import datetime -from tqdm import tqdm - -# pathOut = "/net/projects/deep_esdl/data/GOSIF/cubes/" -# if not os.path.exists(pathOut): -# os.mkdir(pathOut) +import numpy as np +import xarray as xr +from tqdm import tqdm pathIn = "~/data/SIF/GOSIF/source" pathIn = os.path.expanduser(pathIn) @@ -16,17 +12,22 @@ files = glob.glob(f"{pathIn}/*.tif") files.sort() + def to_xarray(file): filename = file.split("/")[-1] - date_info = filename.split("_",1)[1].split(".")[0] - date = np.datetime64(datetime.datetime.strptime(date_info,'%Y%j')) - ds = xr.open_dataset(file,engine = "rasterio",chunks = {"x":1024,"y":1024}).where(lambda x: x < 32766,other = np.nan) - ds = ds.reset_coords().band_data.sel(band = 1).drop("band").rename({"y": "lat", "x":"lon"}) + date_info = filename.split("_", 1)[1].split(".")[0] + date = np.datetime64(datetime.datetime.strptime(date_info, '%Y%j')) + ds = xr.open_dataset(file, engine="rasterio", + chunks={"x": 1024, "y": 1024}).where( + lambda x: x < 32766, other=np.nan) + ds = ds.reset_coords().band_data.sel(band=1).drop("band").rename( + {"y": "lat", "x": "lon"}) ds.name = "sif" ds = ds.to_dataset() ds = ds.assign_coords({"time": date}).expand_dims("time") ds = ds * 0.0001 - ds = ds.transpose("time","lat","lon") - ds.to_zarr(f"{pathIn}/{filename.replace('.tif','.zarr')}") + ds = ds.transpose("time", "lat", "lon") + ds.to_zarr(f"{pathIn}/{filename.replace('.tif', '.zarr')}") + [to_xarray(file) for file in tqdm(files)] diff --git a/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-part2.py b/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-part2.py index c673671..a3aac9c 100644 --- a/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-part2.py +++ b/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-data-cube-part2.py @@ -1,14 +1,8 @@ -import xarray as xr -import numpy as np import glob import os -import datetime -from tqdm import tqdm -# pathOut = "/net/projects/deep_esdl/data/GOSIF/cubes/" - -# if not os.path.exists(pathOut): -# os.mkdir(pathOut) +import xarray as xr +from tqdm import tqdm pathIn = "~/data/SIF/GOSIF/source" pathIn = os.path.expanduser(pathIn) @@ -18,7 +12,7 @@ if not os.path.exists(pathOut): os.makedirs(pathOut) - + files = glob.glob(f"{pathIn}/*.zarr") files.sort() @@ -30,7 +24,7 @@ ds = xr.open_zarr(file) ds["lon"] = ref.lon datasets.append(ds) - -datasets = xr.concat(datasets,dim = "time") -datacube = xr.open_zarr(f"{pathIn}/sif-gosif-1x1024x1024.zarr") \ No newline at end of file +datasets = xr.concat(datasets, dim="time") + +datacube = xr.open_zarr(f"{pathIn}/sif-gosif-1x1024x1024.zarr") diff --git a/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-metadata.py b/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-metadata.py index df73bc2..de6f0e6 100644 --- a/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-metadata.py +++ b/ESDC/inputs-preprocess/SIF/GOSIF/sif-gosif-metadata.py @@ -1,10 +1,8 @@ -from tqdm import tqdm +import os from datetime import datetime -import yaml -import rioxarray + import xarray as xr -import numpy as np -import os +import yaml pathIn = "~/data/SIF/GOSIF/preprocess" pathIn = os.path.expanduser(pathIn) @@ -25,7 +23,7 @@ datacube = datacube.rio.write_crs( "epsg:4326", grid_mapping_name="crs" -).reset_coords() +).reset_coords() del datacube.crs.attrs["spatial_ref"] datacube.attrs = metadata["global"]