Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Wang xcdat clean #1198

Closed
wants to merge 15 commits into from
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -90,4 +90,9 @@ ENV/
# Rope project settings
.ropeproject

# debug_data
pcmdi_metrics/monsoon_wang/*.nc
pcmdi_metrics/monsoon_wang/debug_regions_specs.py
pcmdi_metrics/monsoon_wang/test_param.py

test/
1 change: 1 addition & 0 deletions pcmdi_metrics/io/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,5 @@
get_time_bounds_key,
get_time_key,
select_subset,
da_to_ds,
)
185 changes: 137 additions & 48 deletions pcmdi_metrics/io/default_regions_define.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
from typing import Union

import xarray as xr
import xcdat as xc

# from pcmdi_metrics.io import da_to_ds, get_longitude, select_subset
from .xcdat_dataset_io import da_to_ds, get_longitude, select_subset


def load_regions_specs():
regions_specs = {
Expand Down Expand Up @@ -45,7 +51,8 @@ def load_regions_specs():
# South American Monsoon
"SAMM": {"domain": {"latitude": (-45.0, 0.0), "longitude": (240.0, 330.0)}},
# North African Monsoon
"NAFM": {"domain": {"latitude": (0.0, 45.0), "longitude": (310.0, 60.0)}},
# "NAFM": {"domain": {"latitude": (0.0, 45.0), "longitude": (310.0, 60.0)}},
"NAFM": {"domain": {"latitude": (0.0, 45.0), "longitude": (-50, 60.0)}},
# South African Monsoon
"SAFM": {"domain": {"latitude": (-45.0, 0.0), "longitude": (0.0, 90.0)}},
# Asian Summer Monsoon
Expand All @@ -70,55 +77,137 @@ def load_regions_specs():
return regions_specs


def region_subset(ds, regions_specs, region=None):
"""
d: xarray.Dataset
regions_specs: dict
region: string
"""
# def region_subset(ds, regions_specs, region=None):
# """
# d: xarray.Dataset
# regions_specs: dict
# region: string
# """
#
# #print("list(regions_specs.keys())", list(regions_specs.keys()))
#
# if (region is None) or (
# (region is not None) and (region not in list(regions_specs.keys()))
# ):
# print("Error: region not defined")
# else:
# if "domain" in list(regions_specs[region].keys()):
# if "latitude" in list(regions_specs[region]["domain"].keys()):
# lat0 = regions_specs[region]["domain"]["latitude"][0]
# lat1 = regions_specs[region]["domain"]["latitude"][1]
# # proceed subset
# if "latitude" in (ds.coords.dims):
# ds = ds.sel(latitude=slice(lat0, lat1))
# elif "lat" in (ds.coords.dims):
# ds = ds.sel(lat=slice(lat0, lat1))
#
# if "longitude" in list(regions_specs[region]["domain"].keys()):
# lon0 = regions_specs[region]["domain"]["longitude"][0]
# lon1 = regions_specs[region]["domain"]["longitude"][1]
#
# # check original dataset longitude range
# if "longitude" in (ds.coords.dims):
# lon_min = ds.longitude.min()
# lon_max = ds.longitude.max()
# elif "lon" in (ds.coords.dims):
# lon_min = ds.lon.min()
# lon_max = ds.lon.max()
#
# # longitude range swap if needed
# if (
# min(lon0, lon1) < 0
# ): # when subset region lon is defined in (-180, 180) range
# if (
# min(lon_min, lon_max) < 0
# ): # if original data lon range is (-180, 180) no treatment needed
# pass
# else: # if original data lon range is (0, 360), convert swap lon
# ds = xc.swap_lon_axis(ds, to=(-180, 180))
#
# # proceed subset
# if "longitude" in (ds.coords.dims):
# ds = ds.sel(longitude=slice(lon0, lon1))
# elif "lon" in (ds.coords.dims):
# ds = ds.sel(lon=slice(lon0, lon1))
#
# return ds


def region_subset(
ds: Union[xr.Dataset, xr.DataArray],
region: str,
data_var: str = "variable",
regions_specs: dict = None,
debug: bool = False,
) -> Union[xr.Dataset, xr.DataArray]:
"""_summary_

Parameters
----------
ds : Union[xr.Dataset, xr.DataArray]
_description_
region : str
_description_
data_var : str, optional
_description_, by default None
regions_specs : dict, optional
_description_, by default None
debug: bool, optional
Turn on debug print, by default False

if (region is None) or (
(region is not None) and (region not in list(regions_specs.keys()))
):
print("Error: region not defined")
Returns
-------
Union[xr.Dataset, xr.DataArray]
_description_
"""
if isinstance(ds, xr.DataArray):
is_dataArray = True
ds = da_to_ds(ds, data_var)
else:
if "domain" in list(regions_specs[region].keys()):
if "latitude" in list(regions_specs[region]["domain"].keys()):
lat0 = regions_specs[region]["domain"]["latitude"][0]
lat1 = regions_specs[region]["domain"]["latitude"][1]
# proceed subset
if "latitude" in (ds.coords.dims):
ds = ds.sel(latitude=slice(lat0, lat1))
elif "lat" in (ds.coords.dims):
ds = ds.sel(lat=slice(lat0, lat1))

if "longitude" in list(regions_specs[region]["domain"].keys()):
lon0 = regions_specs[region]["domain"]["longitude"][0]
lon1 = regions_specs[region]["domain"]["longitude"][1]

# check original dataset longitude range
if "longitude" in (ds.coords.dims):
lon_min = ds.longitude.min()
lon_max = ds.longitude.max()
elif "lon" in (ds.coords.dims):
lon_min = ds.lon.min()
lon_max = ds.lon.max()

# longitude range swap if needed
if (
min(lon0, lon1) < 0
): # when subset region lon is defined in (-180, 180) range
if (
min(lon_min, lon_max) < 0
): # if original data lon range is (-180, 180) no treatment needed
is_dataArray = False

if regions_specs is None:
regions_specs = load_regions_specs()

if "domain" in regions_specs[region]:
if "latitude" in regions_specs[region]["domain"]:
lat0 = regions_specs[region]["domain"]["latitude"][0]
lat1 = regions_specs[region]["domain"]["latitude"][1]
# proceed subset
ds = select_subset(ds, lat=(min(lat0, lat1), max(lat0, lat1)))
if debug:
print("region_subset, latitude subsetted, ds:", ds)

if "longitude" in regions_specs[region]["domain"]:
lon0 = regions_specs[region]["domain"]["longitude"][0]
lon1 = regions_specs[region]["domain"]["longitude"][1]

# check original dataset longitude range
lon_min = get_longitude(ds).min().values.item()
lon_max = get_longitude(ds).max().values.item()

# Check if longitude range swap is needed
if min(lon0, lon1) < 0:
# when subset region lon is defined in (-180, 180) range
if min(lon_min, lon_max) < 0:
# if original data lon range is (-180, 180), no treatment needed
pass
else:
# if original data lon range is (0, 360), convert and swap lon
try:
ds = ds.drop_vars(["time"])
except:
pass
else: # if original data lon range is (0, 360), convert swap lon
ds = xc.swap_lon_axis(ds, to=(-180, 180))
ds = xc.swap_lon_axis(ds, to=(-180, 180))

# proceed subset
if "longitude" in (ds.coords.dims):
ds = ds.sel(longitude=slice(lon0, lon1))
elif "lon" in (ds.coords.dims):
ds = ds.sel(lon=slice(lon0, lon1))
# proceed subset
# ds = select_subset(ds, lon=(min(lon0, lon1), max(lon0, lon1)))
ds = select_subset(ds, lon=(lon0, lon1))
if debug:
print("region_subset, longitude subsetted, ds:", ds)

return ds
# return the same type
if is_dataArray:
return ds[data_var]
else:
return ds
30 changes: 30 additions & 0 deletions pcmdi_metrics/io/xcdat_dataset_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,36 @@
# Internal function


def da_to_ds(d: Union[xr.Dataset, xr.DataArray], var: str = "variable") -> xr.Dataset:
"""Convert xarray DataArray to Dataset

Parameters
----------
d : Union[xr.Dataset, xr.DataArray]
Input dataArray. If dataset is given, no process will be done
var : str, optional
Name of dataArray, by default "variable"

Returns
-------
xr.Dataset
xarray Dataset

Raises
------
TypeError
Raised when given input is not xarray based variables
"""
if isinstance(d, xr.Dataset):
return d.copy()
elif isinstance(d, xr.DataArray):
return d.to_dataset(name=var).bounds.add_missing_bounds().copy()
else:
raise TypeError(
"Input must be an instance of either xarrary.DataArray or xarrary.Dataset"
)


def _find_key(
ds: Union[xr.Dataset, xr.DataArray], axis: str, potential_names: list
) -> str:
Expand Down
Loading
Loading