Skip to content

Commit

Permalink
reduce return formats (#5)
Browse files Browse the repository at this point in the history
* Default return is xarray

* remove txt reqs

* deleted data file

* qa

* return format

* varname and eg notebooks

* qa

* updates to methods

* updates to notebooks

* updates to methods

* refactor, notebooks and qa
  • Loading branch information
EddyCMWF authored Aug 31, 2023
1 parent 0217b23 commit 5536856
Show file tree
Hide file tree
Showing 15 changed files with 4,972 additions and 3,614 deletions.
27 changes: 13 additions & 14 deletions .github/workflows/on-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,19 +65,19 @@ jobs:
runs-on: ubuntu-latest
needs: [combine-environments]
steps:
- uses: actions/checkout@v3
with:
ref: ${{ github.event.pull_request.head.sha || github.ref }}
- name: Build distributions
run: |
$CONDA/bin/python -m pip install build
$CONDA/bin/python -m build
- name: Publish a Python distribution to PyPI
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags')
uses: pypa/gh-action-pypi-publish@release/v1
with:
user: __token__
password: ${{ secrets.PYPI_API_TOKEN }}
- uses: actions/checkout@v3
with:
ref: ${{ github.event.pull_request.head.sha || github.ref }}
- name: Build distributions
run: |
$CONDA/bin/python -m pip install build
$CONDA/bin/python -m build
- name: Publish a Python distribution to PyPI
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags')
uses: pypa/gh-action-pypi-publish@release/v1
with:
user: __token__
password: ${{ secrets.PYPI_API_TOKEN }}
# BOPEN METHOD:
# steps:
# - uses: actions/checkout@v3
Expand All @@ -99,4 +99,3 @@ jobs:
# with:
# user: __token__
# password: ${{ secrets.PYPI_API_TOKEN }}

2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# earthkit-climate

A toolkit for statistical analysis of climate and related observational data.
A toolkit for statistical analysis of temporal-geospatial data.

**DISCLAIMER**
This project is **BETA** and will be **Experimental** for the foreseeable future.
Expand Down
1 change: 0 additions & 1 deletion ci/combined-environment-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,3 @@ dependencies:
- xarray
- pip:
- git+https://github.com/ecmwf/multiurl

11 changes: 3 additions & 8 deletions earthkit/climate/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,10 @@
except ImportError:
pass
else:
KWARG_TYPES = {
# "dataarray": xr.DataArray,
# "dataset": xr.Dataset,
}
aggregate = transform_module_inputs(aggregate)

aggregate = transform_module_inputs(aggregate, kwarg_types=KWARG_TYPES)
climatology = transform_module_inputs(climatology)

climatology = transform_module_inputs(climatology, kwarg_types=KWARG_TYPES)

shapes = transform_module_inputs(shapes, kwarg_types=KWARG_TYPES)
shapes = transform_module_inputs(shapes)

__all__ = ["__version__", "aggregate", "climatology", "shapes"]
70 changes: 56 additions & 14 deletions earthkit/climate/aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
}


def daily_mean(dataarray, **kwargs):
def daily_mean(dataarray: T.Union[xr.Dataset, xr.DataArray], **kwargs):
"""
Calculate the daily mean.
Expand All @@ -46,7 +46,7 @@ def daily_mean(dataarray, **kwargs):
return resample(dataarray, frequency="D", dim="time", how="mean", **kwargs)


def daily_max(dataarray, **kwargs):
def daily_max(dataarray: T.Union[xr.Dataset, xr.DataArray], **kwargs):
"""
Calculate the daily max.
Expand All @@ -64,7 +64,7 @@ def daily_max(dataarray, **kwargs):
return resample(dataarray, frequency="D", dim="time", how="max", **kwargs)


def daily_min(dataarray, **kwargs):
def daily_min(dataarray: T.Union[xr.Dataset, xr.DataArray], **kwargs):
"""
Calculate the daily min.
Expand All @@ -82,7 +82,7 @@ def daily_min(dataarray, **kwargs):
return resample(dataarray, frequency="D", dim="time", how="min", **kwargs)


def monthly_mean(dataarray, **kwargs):
def monthly_mean(dataarray: T.Union[xr.Dataset, xr.DataArray], **kwargs):
"""
Calculate the monthly mean.
Expand All @@ -101,7 +101,7 @@ def monthly_mean(dataarray, **kwargs):


def resample(
dataarray: xr.DataArray,
dataarray: T.Union[xr.Dataset, xr.DataArray],
frequency: str or int or float,
dim: str = "time",
how: str = "mean",
Expand Down Expand Up @@ -140,7 +140,7 @@ def resample(


def _groupby_time(
dataarray: xr.DataArray,
dataarray: T.Union[xr.Dataset, xr.DataArray],
frequency: str = None,
bin_widths: int = None,
squeeze: bool = True,
Expand Down Expand Up @@ -175,7 +175,7 @@ def _groupby_time(


def _groupby_bins(
dataarray: xr.DataArray,
dataarray: T.Union[xr.Dataset, xr.DataArray],
frequency: str,
bin_widths: int,
squeeze: bool,
Expand Down Expand Up @@ -280,6 +280,7 @@ def _reduce_dataarray(

def reduce(
dataarray: T.Union[xr.DataArray, xr.Dataset],
*args,
**kwargs,
):
"""
Expand Down Expand Up @@ -314,14 +315,57 @@ def reduce(
"""
if isinstance(dataarray, (xr.Dataset)):
return xr.Dataset(
[_reduce_dataarray(dataarray[var], **kwargs) for var in dataarray.data_vars]
)
out_ds = xr.Dataset().assign_attrs(dataarray.attrs)
for var in dataarray.data_vars:
out_da = _reduce_dataarray(dataarray[var], *args, **kwargs)
out_ds[out_da.name] = out_da
return out_ds
else:
return _reduce_dataarray(dataarray, **kwargs)
return _reduce_dataarray(dataarray, *args, **kwargs)


def rolling_reduce(
dataarray: T.Union[xr.Dataset, xr.DataArray], *args, **kwargs
) -> xr.DataArray:
"""Return reduced data using a moving window over which to apply the reduction.
Parameters
----------
dataarray : xr.DataArray or xr.Dataset
Data over which the moving window is applied according to the reduction method.
windows :
windows for the rolling groups, for example `time=10` to perform a reduction
in the time dimension with a bin size of 10. the rolling groups can be defined
over any number of dimensions. **see documentation for xarray.dataarray.rolling**.
min_periods : integer
The minimum number of observations in the window required to have a value
(otherwise result is NaN). Default is to set **min_periods** equal to the size of the window.
**see documentation for xarray.dataarray.rolling**
center : bool
Set the labels at the centre of the window, **see documentation for xarray.dataarray.rolling**.
how_reduce : str,
Function to be applied for reduction. Default is 'mean'.
how_dropna : str
Determine if dimension is removed from the output when we have at least one NaN or
all NaN. **how_dropna** can be 'None', 'any' or 'all'. Default is 'any'.
**kwargs :
Any kwargs that are compatible with the select `how_reduce` method.
Returns
-------
xr.DataArray or xr.Dataset (as provided)
"""
if isinstance(dataarray, (xr.Dataset)):
out_ds = xr.Dataset().assign_attrs(dataarray.attrs)
for var in dataarray.data_vars:
out_da = _rolling_reduce_dataarray(dataarray[var], *args, **kwargs)
out_ds[out_da.name] = out_da
return out_ds
else:
return _rolling_reduce_dataarray(dataarray, *args, **kwargs)


def _rolling_reduce_dataarray(
dataarray: xr.DataArray, how_reduce="mean", how_dropna="any", **kwargs
) -> xr.DataArray:
"""Return reduced data using a moving window over which to apply the reduction.
Expand Down Expand Up @@ -363,12 +407,10 @@ def rolling_reduce(

# Any kwargs left after above reductions are kwargs for reduction method
reduce_kwargs = kwargs
# print("rolling kwargs: ", rolling_kwargs)
# Create rolling groups:
data_rolling = dataarray.rolling(**rolling_kwargs)
# print("reduce kwargs: ", reduce_kwargs)

data_windowed = reduce(data_rolling, how=how_reduce, **reduce_kwargs)
data_windowed = _reduce_dataarray(data_rolling, how=how_reduce, **reduce_kwargs)

data_windowed = _dropna(data_windowed, window_dims, how_dropna)

Expand Down
12 changes: 7 additions & 5 deletions earthkit/climate/climatology.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import typing as T

import xarray as xr

from . import aggregate


def mean(
dataarray: xr.DataArray,
dataarray: T.Union[xr.Dataset, xr.DataArray],
frequency: str = None,
bin_widths: int = None,
time_dim: str = "time",
Expand Down Expand Up @@ -35,7 +37,7 @@ def mean(


def stdev(
dataarray: xr.DataArray,
dataarray: T.Union[xr.Dataset, xr.DataArray],
frequency: str = None,
bin_widths: int = None,
time_dim: str = "time",
Expand Down Expand Up @@ -63,7 +65,7 @@ def stdev(
return aggregate.reduce(grouped_data, how="std", dim=time_dim)


def median(dataarray: xr.DataArray, **kwargs) -> xr.DataArray:
def median(dataarray: T.Union[xr.Dataset, xr.DataArray], **kwargs) -> xr.DataArray:
"""
Calculate the climatological median.
Expand All @@ -88,7 +90,7 @@ def median(dataarray: xr.DataArray, **kwargs) -> xr.DataArray:


def max(
dataarray: xr.DataArray,
dataarray: T.Union[xr.Dataset, xr.DataArray],
frequency: str = None,
bin_widths: int = None,
time_dim: str = "time",
Expand Down Expand Up @@ -117,7 +119,7 @@ def max(


def min(
dataarray: xr.DataArray,
dataarray: T.Union[xr.Dataset, xr.DataArray],
frequency: str = None,
bin_widths: int = None,
time_dim: str = "time",
Expand Down
Loading

0 comments on commit 5536856

Please sign in to comment.