Skip to content

Commit

Permalink
MBCn with 5days-OddWindow (#79)
Browse files Browse the repository at this point in the history
<!-- Please ensure the PR fulfills the following requirements! -->
<!-- If this is your first PR, make sure to add your details to the
AUTHORS.rst! -->
### Pull Request Checklist:
- [ ] This PR addresses an already opened issue (for bug fixes /
features)
  - This PR fixes #xyz
- [x] (If applicable) Documentation has been added / updated (for bug
fixes / features).
- [x] (If applicable) Tests have been added.
- [x] CHANGELOG.rst has been updated (with summary of main changes).
- [x] Link to issue (:issue:`number`) and pull request (:pull:`number`)
has been added.

### What kind of change does this PR introduce?

* Small workaround to be able to use the 5 day grouping in MBCn,
compatible with odd windows.

### Does this PR introduce a breaking change?

No

### Other information:

This doesn't respect the logic of `Grouper`, but to activate this
functionality, I pass a `Grouper("5D", window=7)`. I think only time,
time.season, time.month, time.dayofyear would be allowed as strings in a
proper implementation ("The usual grouping name as xarray understands
it. Ex: "time.month" or "time".") To allow more complex group, I guess
we would need to accept Pandas object or something.

Since MBCn doesn't currently use map_groups, implementing this hack was
relatively easy.

This is also thought specifically for a 365-day calendar, that 5 is a
factor of 365 (5*73).
  • Loading branch information
coxipi authored Feb 27, 2025
2 parents f16c84c + 90b2ff0 commit 36bed8d
Show file tree
Hide file tree
Showing 8 changed files with 57 additions and 16 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/upstream.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ jobs:
with:
cache-downloads: true
cache-environment: true
environment-file: environment.yml
environment-file: environment-dev.yml
create-args: >-
pytest-reportlog
python=${{ matrix.python-version }}
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ Changes
* Added `upstream` testing to the CI pipeline for both daily and push events. (:pull:`61`).
* Import last changes in xclim before the embargo (:pull:`80`).
* `xsdba` has begun the process of adoption of the OpenSSF Best Practices checklist. (:pull:`82`).
* Add a new grouping method specific for `MBCn` which called by passing `group=Grouper("5D", window=n)` where `n` is an odd positive integer. (:pull:`79`).

Fixes
^^^^^
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ docs = [
"nc-time-axis >=1.4.1",
"pooch >=1.8.0",
"pygments",
"sphinx >=7.0.0,<8.2.0", # pinned until nbsphinx supports sphinx 8.2
"sphinx >=7.1.0,<8.2.0", # pinned until nbsphinx supports sphinx 8.2
"sphinx-autobuild >=2024.4.16",
"sphinx-autodoc-typehints",
"sphinx-codeautolink >=0.16.2",
Expand Down
18 changes: 9 additions & 9 deletions src/xsdba/_adjustment.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,6 @@

def _adapt_freq_hist(ds: xr.Dataset, adapt_freq_thresh: str):
"""Adapt frequency of null values of `hist` in order to match `ref`."""
# ADAPT: Drop context altogether?
# with units.context(infer_context(ds.ref.attrs.get("standard_name"))):
thresh = convert_units_to(adapt_freq_thresh, ds.ref)
dim = ["time"] + ["window"] * ("window" in ds.hist.dims)
return _adapt_freq.func(
Expand Down Expand Up @@ -345,10 +343,12 @@ def _npdft_adjust(sim, af_q, rots, quantiles, method, extrap):


def mbcn_adjust(
ref: xr.Dataset,
hist: xr.Dataset,
sim: xr.Dataset,
ref: xr.DataArray,
hist: xr.DataArray,
sim: xr.DataArray,
ds: xr.Dataset,
g_idxs: xr.DataArray,
gw_idxs: xr.DataArray,
pts_dims: Sequence[str],
interp: str,
extrapolation: str,
Expand All @@ -372,12 +372,14 @@ def mbcn_adjust(
training data.
sim : xr.DataArray
data to adjust (stacked with multivariate dimension).
g_idxs : xr.DataArray
Indices of the times in each time group.
gw_idxs: xr.DataArray
Indices of the times in each windowed time group.
ds : xr.Dataset
Dataset variables:
rot_matrices : Rotation matrices used in the training step.
af_q : Adjustment factors obtained in the training step for the npdf transform
g_idxs : Indices of the times in each time group
gw_idxs: Indices of the times in each windowed time group
pts_dims : [str, str]
The name of the "multivariate" dimension and its primed counterpart. Defaults to "multivar", which
is the normal case when using :py:func:`xsdba.stack_variables`, and "multivar_prime".
Expand Down Expand Up @@ -407,8 +409,6 @@ def mbcn_adjust(
rot_matrices = ds.rot_matrices
af_q = ds.af_q
quantiles = af_q.quantiles
g_idxs = ds.g_idxs
gw_idxs = ds.gw_idxs
gr_dim = gw_idxs.attrs["group_dim"]
win = gw_idxs.attrs["group"][1]

Expand Down
9 changes: 6 additions & 3 deletions src/xsdba/adjustment.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"""
from __future__ import annotations

from copy import deepcopy
from inspect import signature
from typing import Any
from warnings import warn
Expand Down Expand Up @@ -1769,7 +1770,9 @@ class MBCn(TrainAdjust):
Notes
-----
* Only "time" and "time.dayofyear" (with a suitable window) are implemented as possible values for `group`.
* The grouping of time dimensions is passed through `base_kws`. Three types of grouping are allowed: "time" or `xsdba.Grouper("time")
"time.dayofyear"; `xsdba.Grouper("time.dayofyear", window); and `xsdba.Grouper("5D", window)`, where `window` must be an odd integer that
counts the number of 5-day subgroups. The window moves in 5-day strides too. This last option is a specific option to `MBCn`.
* The historical reference (:math:`T`, for "target"), simulated historical (:math:`H`) and simulated projected (:math:`S`)
datasets are constructed by stacking the timeseries of N variables together using ``xsdba.stack_variables``.
Expand Down Expand Up @@ -1909,14 +1912,14 @@ def _adjust(

g_idxs, gw_idxs = grouped_time_indexes(ref.time, self.group)
ds = self.ds.copy()
ds["g_idxs"] = g_idxs
ds["gw_idxs"] = gw_idxs

# adjust (adjust for npft transform, train/adjust for univariate bias correction)
out = mbcn_adjust(
ref=ref,
hist=hist,
sim=sim,
g_idxs=g_idxs,
gw_idxs=gw_idxs,
ds=ds,
pts_dims=self.pts_dims,
interp=self.interp,
Expand Down
5 changes: 4 additions & 1 deletion src/xsdba/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,10 @@ def __init__(
dim, prop = group.split(".")
else:
dim, prop = group, "group"

# TODO : Remove this special workaround
# This will only work with MBCn
if group == "5D":
dim = "time"
if isinstance(add_dims, str):
add_dims = [add_dims]

Expand Down
32 changes: 32 additions & 0 deletions src/xsdba/processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -866,6 +866,38 @@ def _get_group_complement(da, group):
elif gr == "time":
gw_idxs = timeind.rename({"time": win_dim}).expand_dims({win_dim0: [-1]})
g_idxs = gw_idxs.copy()
# TODO : Implement a proper Grouper treatment
# This would normally not be allowed with sdba.Grouper.
# A proper implementation in Grouper may be given in the future, but here is the implementation
# that I used for a project
elif gr == "5D":
if win % 2 == 0:
raise ValueError(
f"Group 5D only works with an odd window, got `window` = {win}"
)

gr_dim = "five_days"
imin, imax = 0, times.size - 1

def _get_idxs(win):
block0 = np.concatenate(
[
np.arange(5) + iwin * 5 + iyear * 365
for iyear in range(len(set(times.dt.year.values)))
for iwin in range(-(win - 1) // 2, (win - 1) // 2 + 1)
]
)
base = xr.DataArray(
block0, dims=[win_dim], coords={win_dim: np.arange(len(block0))}
)
idxs = xr.concat(
[(base + i * 5).expand_dims({gr_dim: [i]}) for i in range(365 // 5)],
dim=gr_dim,
)
return idxs.where((idxs >= imin) & (idxs <= imax), -1)

gw_idxs, g_idxs = _get_idxs(win), _get_idxs(1)

else:
raise NotImplementedError(f"Grouping {gr} not implemented.")
gw_idxs.attrs["group"] = (gr, win)
Expand Down
4 changes: 3 additions & 1 deletion tests/test_adjustment.py
Original file line number Diff line number Diff line change
Expand Up @@ -714,7 +714,9 @@ def test_different_times_training(self, timelonlatseries, random):
@pytest.mark.slow
class TestMBCn:
@pytest.mark.parametrize("use_dask", [True, False])
@pytest.mark.parametrize("group, window", [["time", 1], ["time.dayofyear", 31]])
@pytest.mark.parametrize(
"group, window", [["time", 1], ["time.dayofyear", 31], ["5D", 7]]
)
@pytest.mark.parametrize("period_dim", [None, "period"])
def test_simple(self, use_dask, group, window, period_dim, gosset):
group, window, period_dim, use_dask = "time", 1, None, False
Expand Down

0 comments on commit 36bed8d

Please sign in to comment.