From 12e02d1c118474fe4a1c904817ccecf5b5f85bb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89ric=20Dupuis?= Date: Mon, 21 Oct 2024 14:25:34 -0400 Subject: [PATCH] solve some docs problems, remove unused functions --- docs/api.rst | 81 +++++++++++++ docs/index.rst | 10 ++ docs/references.bib | 50 +++++++- docs/references.rst | 10 ++ docs/xsdba.rst | 10 +- src/xsdba/adjustment.py | 48 ++++---- src/xsdba/formatting.py | 257 ---------------------------------------- src/xsdba/locales.py | 3 +- src/xsdba/loess.py | 6 +- src/xsdba/processing.py | 18 +-- src/xsdba/typing.py | 3 +- src/xsdba/utils.py | 8 +- 12 files changed, 192 insertions(+), 312 deletions(-) create mode 100644 docs/api.rst create mode 100644 docs/references.rst diff --git a/docs/api.rst b/docs/api.rst new file mode 100644 index 0000000..a1c0c11 --- /dev/null +++ b/docs/api.rst @@ -0,0 +1,81 @@ +=== +API +=== + +.. _xsdba-user-api: + +xsdba Module +=========== + +.. automodule:: xsdba.adjustment + :members: + :exclude-members: BaseAdjustment + :special-members: + :show-inheritance: + :noindex: + +.. automodule:: xsdba.processing + :members: + :noindex: + +.. automodule:: xsdba.detrending + :members: + :show-inheritance: + :exclude-members: BaseDetrend + :noindex: + +.. automodule:: xsdba.utils + :members: + :noindex: + +.. autoclass:: xsdba.base.Grouper + :members: + :class-doc-from: init + :noindex: + +.. automodule:: xsdba.nbutils + :members: + :noindex: + +.. automodule:: xsdba.loess + :members: + :noindex: + +.. automodule:: xsdba.properties + :members: + :exclude-members: StatisticalProperty + :noindex: + +.. automodule:: xsdba.measures + :members: + :exclude-members: StatisticalMeasure + :noindex: + +.. _`xsdba-developer-api`: + +xsdba Utilities +-------------- + +.. automodule:: xsdba.base + :members: + :show-inheritance: + :exclude-members: Grouper + :noindex: + +.. autoclass:: xsdba.detrending.BaseDetrend + :members: + :noindex: + +.. autoclass:: xsdba.adjustment.TrainAdjust + :members: + :noindex: + +.. autoclass:: xsdba.adjustment.Adjust + :members: + :noindex: + +.. autofunction:: xsdba.properties.StatisticalProperty + :noindex: + +.. autofunction:: xsdba.measures.StatisticalMeasure + :noindex: diff --git a/docs/index.rst b/docs/index.rst index abda240..c33c563 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -13,6 +13,9 @@ Welcome to xsdba's documentation! releasing authors changelog + references + notebooks/example + notebooks/advanced_example .. toctree:: :maxdepth: 1 @@ -20,6 +23,13 @@ Welcome to xsdba's documentation! apidoc/modules + +.. toctree:: + :maxdepth: 2 + :caption: User API + + api + Indices and tables ================== * :ref:`genindex` diff --git a/docs/references.bib b/docs/references.bib index 735f687..ff6372d 100644 --- a/docs/references.bib +++ b/docs/references.bib @@ -37,7 +37,7 @@ @article{cannon_bias_2015 pages = {6938--6959}, } -@misc{cannon_mbc_2020, +@article{cannon_mbc_2020, title = {{MBC}: {Multivariate} {Bias} {Correction} of {Climate} {Model} {Outputs}}, copyright = {GPL-2}, shorttitle = {{MBC}}, @@ -62,7 +62,7 @@ @article{roy_extremeprecip_2023 year = {2023}, } -@misc{roy_juliaclimateclimatetoolsjl_2021, +@article{roy_juliaclimateclimatetoolsjl_2021, title = {{JuliaClimate}/{ClimateTools}.jl: v0.23.1}, shorttitle = {{JuliaClimate}/{ClimateTools}.jl}, url = {https://zenodo.org/record/5399172}, @@ -153,7 +153,7 @@ @article{szekely_testing_2004 year = {2004}, } -@misc{mezzadri_how_2007, +@article{mezzadri_how_2007, title = {How to generate random matrices from the classical compact groups}, url = {https://arxiv.org/abs/math-ph/0609050}, doi = {10.48550/arXiv.math-ph/0609050}, @@ -200,7 +200,7 @@ @article{cleveland_robust_1979 pages = {829--836}, } -@misc{gramfort_lowess_2015, +@article{gramfort_lowess_2015, title = {{LOWESS} : {Locally} weighted regression}, copyright = {BSD 3-Clause}, shorttitle = {{LOWESS}}, @@ -282,7 +282,7 @@ @article{francois_multivariate_2020 pages = {537--562}, } -@misc{jalbert_extreme_2022, +@article{jalbert_extreme_2022, title = {Extreme value analysis package for {Julia}.}, url = {https://github.com/jojal5/Extremes.jl}, abstract = {Extreme value analysis package for Julia}, @@ -430,3 +430,43 @@ @article{agbazo_characterizing_2020 keywords = {bias adjustment, climate simulations, physical inconsistency, univariate quantile mapping}, pages = {3868--3884}, } + + +@misc{robin_2021, + title = {{SBCK}: {Statistical} {Bias} {Correction} {Kit}}, + copyright = {GPL-3}, + shorttitle = {{SBCK}}, + url = {https://github.com/yrobink/SBCK-python}, + urldate = {2024-07-03}, + author = {Robin, Yoann}, + year = {2021}, +} + +@article{higham_1988, + title = {Computing a nearest symmetric positive semidefinite matrix}, + journal = {Linear Algebra and its Applications}, + volume = {103}, + pages = {103-118}, + year = {1988}, + issn = {0024-3795}, + doi = {https://doi.org/10.1016/0024-3795(88)90223-6}, + url = {https://www.sciencedirect.com/science/article/pii/0024379588902236}, + author = {Nicholas J. Higham}, + abstract = {The nearest symmetric positive semidefinite matrix in the Frobenius norm to an arbitrary real matrix A is shown to be (B + H)/2, where H is the symmetric polar factor of B=(A + AT)/2. In the 2-norm a nearest symmetric positive semidefinite matrix, and its distance δ2(A) from A, are given by a computationally challenging formula due to Halmos. We show how the bisection method can be applied to this formula to compute upper and lower bounds for δ2(A) differing by no more than a given amount. A key ingredient is a stable and efficient test for positive definiteness, based on an attempted Choleski decomposition. For accurate computation of δ2(A) we formulate the problem as one of zero finding and apply a hybrid Newton-bisection algorithm. Some numerical difficulties are discussed and illustrated by example.} +} + +@article{knol_1989, + title = "Least-squares approximation of an improper correlation matrix by a proper one", + abstract = "An algorithm is presented for the best least-squares fitting correlation matrix approximating a given missing value or improper correlation matrix. The proposed algorithm is based upon a solution for Mosier's oblique Procrustes rotation problem offered by ten Berge and Nevels. A necessary and sufficient condition is given for a solution to yield the unique global minimum of the least-squares function. Empirical verification of the condition indicates that the occurrence of non-optimal solutions with the proposed algorithm is very unlikely. A possible drawback of the optimal solution is that it is a singular matrix of necessity. In cases where singularity is undesirable, one may impose the additional nonsingularity constraint that the smallest eigenvalue of the solution be δ, where δ is an arbitrary small positive constant. Finally, it may be desirable to weight the squared errors of estimation differentially. A generalized solution is derived which satisfies the additional nonsingularity constraint and also allows for weighting. The generalized solution can readily be obtained from the standard “unweighted singular” solution by transforming the observed improper correlation matrix in a suitable way.", + keywords = "Missing value correlation, indefinite correlation matrix, IR-85889, tetrachoric correlation, constrained least-squares approximation", + author = "Knol, {Dirk L.} and {ten Berge}, {Jos M.F.}", + year = "1989", + doi = "10.1007/BF02294448", + language = "Undefined", + volume = "54", + pages = "53--61", + journal = "Psychometrika", + issn = "0033-3123", + publisher = "Springer", + number = "1", +} diff --git a/docs/references.rst b/docs/references.rst new file mode 100644 index 0000000..08873c0 --- /dev/null +++ b/docs/references.rst @@ -0,0 +1,10 @@ +.. only:: html + + ============ + Bibliography + ============ + + General References + ------------------ + +.. bibliography:: diff --git a/docs/xsdba.rst b/docs/xsdba.rst index 90ed6ad..2c917de 100644 --- a/docs/xsdba.rst +++ b/docs/xsdba.rst @@ -40,7 +40,7 @@ A generic bias adjustment process is laid out as follows: The train-adjust approach allows to inspect the trained adjustment object. The training information is stored in the underlying `Adj.ds` dataset and usually has a `af` variable with the adjustment factors. -Its layout and the other available variables vary between the different algorithm, refer to :ref:`Adjustment methods `. +Its layout and the other available variables vary between the different algorithm, refer to :ref:`Adjustment methods `. Parameters needed by the training and the adjustment are saved to the ``Adj.ds`` dataset as a `adj_params` attribute. Parameters passed to the `adjust` call are written to the history attribute in the output scenario DataArray. @@ -125,21 +125,19 @@ add them back on exit. User API ======== -See: :ref:`sdba-user-api` +See: :ref:`xsdba-user-api` Developer API ============= -See: :ref:`sdba-developer-api` +See: :ref:`xsdba-developer-api` .. only:: html or text - .. _sdba-footnotes: + _xsdba-footnotes: SDBA Footnotes ============== .. bibliography:: :style: xcstyle - :labelprefix: SDBA- - :keyprefix: sdba- diff --git a/src/xsdba/adjustment.py b/src/xsdba/adjustment.py index 3e3bfec..7397927 100644 --- a/src/xsdba/adjustment.py +++ b/src/xsdba/adjustment.py @@ -481,7 +481,7 @@ class DetrendedQuantileMapping(TrainAdjust): F^{-1}_{ref}\left\{F_{hist}\left[\frac{\overline{hist}\cdot sim}{\overline{sim}}\right]\right\}\frac{\overline{sim}}{\overline{hist}} where :math:`F` is the cumulative distribution function (CDF) and :math:`\overline{xyz}` is the linear trend of the data. - This equation is valid for multiplicative adjustment. Based on the DQM method of :cite:p:`sdba-cannon_bias_2015`. + This equation is valid for multiplicative adjustment. Based on the DQM method of :cite:p:`cannon_bias_2015`. Parameters ---------- @@ -592,7 +592,7 @@ class QuantileDeltaMapping(EmpiricalQuantileMapping): sim\frac{F^{-1}_{ref}\left[F_{sim}(sim)\right]}{F^{-1}_{hist}\left[F_{sim}(sim)\right]} where :math:`F` is the cumulative distribution function (CDF). This equation is valid for multiplicative adjustment. - The algorithm is based on the "QDM" method of :cite:p:`sdba-cannon_bias_2015`. + The algorithm is based on the "QDM" method of :cite:p:`cannon_bias_2015`. Parameters ---------- @@ -643,7 +643,7 @@ class ExtremeValues(TrainAdjust): r"""Adjustment correction for extreme values. The tail of the distribution of adjusted data is corrected according to the bias between the parametric Generalized - Pareto distributions of the simulated and reference data :cite:p:`sdba-roy_extremeprecip_2023`. The distributions are composed of the + Pareto distributions of the simulated and reference data :cite:p:`roy_extremeprecip_2023`. The distributions are composed of the maximal values of clusters of "large" values. With "large" values being those above `cluster_thresh`. Only extreme values, whose quantile within the pool of large values are above `q_thresh`, are re-adjusted. See `Notes`. @@ -704,7 +704,7 @@ class ExtremeValues(TrainAdjust): \tau = \left(\frac{1}{f}\frac{S - min(S)}{max(S) - min(S)}\right)^p Code based on an internal Matlab source and partly ib the `biascorrect_extremes` function of the julia package - "ClimateTools.jl" :cite:p:`sdba-roy_juliaclimateclimatetoolsjl_2021`. + "ClimateTools.jl" :cite:p:`roy_juliaclimateclimatetoolsjl_2021`. Because of limitations imposed by the lazy computing nature of the dask backend, it is not possible to know the number of cluster extremes in `ref` and `hist` at the @@ -802,7 +802,7 @@ class LOCI(TrainAdjust): r"""Local Intensity Scaling (LOCI) bias-adjustment. This bias adjustment method is designed to correct daily precipitation time series by considering wet and dry days - separately :cite:p:`sdba-schmidli_downscaling_2006`. + separately :cite:p:`schmidli_downscaling_2006`. Multiplicative adjustment factors are computed such that the mean of `hist` matches the mean of `ref` for values above a threshold. @@ -924,7 +924,7 @@ class PrincipalComponents(TrainAdjust): r"""Principal component adjustment. This bias-correction method maps model simulation values to the observation space through principal components - :cite:p:`sdba-hnilica_multisite_2017`. Values in the simulation space (multiple variables, or multiple sites) can be + :cite:p:`hnilica_multisite_2017`. Values in the simulation space (multiple variables, or multiple sites) can be thought of as coordinate along axes, such as variable, temperature, etc. Principal components (PC) are a linear combinations of the original variables where the coefficients are the eigenvectors of the covariance matrix. Values can then be expressed as coordinates along the PC axes. The method makes the assumption that bias-corrected @@ -984,7 +984,7 @@ class PrincipalComponents(TrainAdjust): References ---------- - :cite:cts:`hnilica_multisite_2017,sdba-alavoine_distinct_2022` + :cite:cts:`hnilica_multisite_2017,alavoine_distinct_2022` """ @classmethod @@ -1108,8 +1108,8 @@ class NpdfTransform(Adjust): This adjustment object combines both training and adjust steps in the `adjust` class method. - A multivariate bias-adjustment algorithm described by :cite:t:`sdba-cannon_multivariate_2018`, as part of the MBCn - algorithm, based on a color-correction algorithm described by :cite:t:`sdba-pitie_n-dimensional_2005`. + A multivariate bias-adjustment algorithm described by :cite:t:`cannon_multivariate_2018`, as part of the MBCn + algorithm, based on a color-correction algorithm described by :cite:t:`pitie_n-dimensional_2005`. This algorithm in itself, when used with QuantileDeltaMapping, is NOT trend-preserving. The full MBCn algorithm includes a reordering step provided here by :py:func:`xsdba.processing.reordering`. @@ -1168,23 +1168,23 @@ class NpdfTransform(Adjust): These three steps are repeated a certain number of times, prescribed by argument ``n_iter``. At each iteration, a new random rotation matrix is generated. - The original algorithm :cite:p:`sdba-pitie_n-dimensional_2005`, stops the iteration when some distance score converges. - Following cite:t:`sdba-cannon_multivariate_2018` and the MBCn implementation in :cite:t:`sdba-cannon_mbc_2020`, we + The original algorithm :cite:p:`pitie_n-dimensional_2005`, stops the iteration when some distance score converges. + Following cite:t:`cannon_multivariate_2018` and the MBCn implementation in :cite:t:`cannon_mbc_2020`, we instead fix the number of iterations. - As done by cite:t:`sdba-cannon_multivariate_2018`, the distance score chosen is the "Energy distance" from - :cite:t:`sdba-szekely_testing_2004`. (see: :py:func:`xsdba.processing.escore`). + As done by cite:t:`cannon_multivariate_2018`, the distance score chosen is the "Energy distance" from + :cite:t:`szekely_testing_2004`. (see: :py:func:`xsdba.processing.escore`). - The random matrices are generated following a method laid out by :cite:t:`sdba-mezzadri_how_2007`. + The random matrices are generated following a method laid out by :cite:t:`mezzadri_how_2007`. - This is only part of the full MBCn algorithm, see :ref:`notebooks/sdba:Statistical Downscaling and Bias-Adjustment` + This is only part of the full MBCn algorithm, see :ref:`notebooks/example:Statistical Downscaling and Bias-Adjustment` for an example on how to replicate the full method with xsdba. This includes a standardization of the simulated data beforehand, an initial univariate adjustment and the reordering of those adjusted series according to the rank structure of the output of this algorithm. References ---------- - :cite:cts:`cannon_multivariate_2018,sdba-cannon_mbc_2020,sdba-pitie_n-dimensional_2005,sdba-mezzadri_how_2007,sdba-szekely_testing_2004` + :cite:cts:`cannon_multivariate_2018,cannon_mbc_2020,pitie_n-dimensional_2005,mezzadri_how_2007,szekely_testing_2004` """ @classmethod @@ -1266,8 +1266,8 @@ def _adjust( class MBCn(TrainAdjust): r"""Multivariate bias correction function using the N-dimensional probability density function transform. - A multivariate bias-adjustment algorithm described by :cite:t:`sdba-cannon_multivariate_2018` - based on a color-correction algorithm described by :cite:t:`sdba-pitie_n-dimensional_2005`. + A multivariate bias-adjustment algorithm described by :cite:t:`cannon_multivariate_2018` + based on a color-correction algorithm described by :cite:t:`pitie_n-dimensional_2005`. This algorithm in itself, when used with QuantileDeltaMapping, is NOT trend-preserving. The full MBCn algorithm includes a reordering step provided here by :py:func:`xsdba.processing.reordering`. @@ -1356,18 +1356,18 @@ class MBCn(TrainAdjust): 3. Reorder the dataset found in step 2. according to the ranks of the dataset found in step 1. - The original algorithm :cite:p:`sdba-pitie_n-dimensional_2005`, stops the iteration when some distance score converges. - Following cite:t:`sdba-cannon_multivariate_2018` and the MBCn implementation in :cite:t:`sdba-cannon_mbc_2020`, we + The original algorithm :cite:p:`pitie_n-dimensional_2005`, stops the iteration when some distance score converges. + Following cite:t:`cannon_multivariate_2018` and the MBCn implementation in :cite:t:`cannon_mbc_2020`, we instead fix the number of iterations. - As done by cite:t:`sdba-cannon_multivariate_2018`, the distance score chosen is the "Energy distance" from - :cite:t:`sdba-szekely_testing_2004`. (see: :py:func:`xsdba.processing.escore`). + As done by cite:t:`cannon_multivariate_2018`, the distance score chosen is the "Energy distance" from + :cite:t:`szekely_testing_2004`. (see: :py:func:`xsdba.processing.escore`). - The random matrices are generated following a method laid out by :cite:t:`sdba-mezzadri_how_2007`. + The random matrices are generated following a method laid out by :cite:t:`mezzadri_how_2007`. References ---------- - :cite:cts:`cannon_multivariate_2018,sdba-cannon_mbc_2020,sdba-pitie_n-dimensional_2005,sdba-mezzadri_how_2007,sdba-szekely_testing_2004` + :cite:cts:`cannon_multivariate_2018,cannon_mbc_2020,pitie_n-dimensional_2005,mezzadri_how_2007,szekely_testing_2004` Notes ----- diff --git a/src/xsdba/formatting.py b/src/xsdba/formatting.py index 17ed526..4543ecf 100644 --- a/src/xsdba/formatting.py +++ b/src/xsdba/formatting.py @@ -192,103 +192,6 @@ def _match_value(self, value): ) -def parse_doc(doc: str) -> dict[str, str]: - """Crude regex parsing reading an indice docstring and extracting information needed in indicator construction. - - # TODO: Add such a notebook? The focus is not on the class Indicator here - The appropriate docstring syntax is detailed in :ref:`notebooks/extendxsdba:Defining new indices`. - - Parameters - ---------- - doc : str - The docstring of an indice function. - - Returns - ------- - dict - A dictionary with all parsed sections. - """ - if doc is None: - return {} - - out = {} - - sections = re.split(r"(\w+\s?\w+)\n\s+-{3,50}", doc) # obj.__doc__.split('\n\n') - intro = sections.pop(0) - if intro: - intro_content = list(map(str.strip, intro.strip().split("\n\n"))) - if len(intro_content) == 1: - out["title"] = intro_content[0] - elif len(intro_content) >= 2: - out["title"], abstract = intro_content[:2] - out["abstract"] = " ".join(map(str.strip, abstract.splitlines())) - - for i in range(0, len(sections), 2): - header, content = sections[i : i + 2] - - if header in ["Notes", "References"]: - out[header.lower()] = content.replace("\n ", "\n").strip() - elif header == "Parameters": - out["parameters"] = _parse_parameters(content) - elif header == "Returns": - rets = _parse_returns(content) - if rets: - meta = list(rets.values())[0] - if "long_name" in meta: - out["long_name"] = meta["long_name"] - return out - - -def _parse_parameters(section): - """Parse the 'parameters' section of a docstring into a dictionary. - - Works by mapping the parameter name to its description and, potentially, to its set of choices. - The type annotation are not parsed, except for fixed sets of values (listed as "{'a', 'b', 'c'}"). - The annotation parsing only accepts strings, numbers, `None` and `nan` (to represent `numpy.nan`). - """ - curr_key = None - params = {} - for line in section.split("\n"): - if line.startswith(" " * 6): # description - s = " " if params[curr_key]["description"] else "" - params[curr_key]["description"] += s + line.strip() - elif line.startswith(" " * 4) and ":" in line: # param title - name, annot = line.split(":", maxsplit=1) - curr_key = name.strip() - params[curr_key] = {"description": ""} - match = re.search(r".*(\{.*\}).*", annot) - if match: - try: - choices = literal_eval(match.groups()[0]) - params[curr_key]["choices"] = choices - except ValueError: # noqa: S110 - # If the literal_eval fails, we just ignore the choices. - pass - return params - - -def _parse_returns(section): - """Parse the returns section of a docstring into a dictionary mapping the parameter name to its description.""" - curr_key = None - params = {} - for line in section.split("\n"): - if line.strip(): - if line.startswith(" " * 6): # long_name - s = " " if params[curr_key]["long_name"] else "" - params[curr_key]["long_name"] += s + line.strip() - elif line.startswith(" " * 4): # param title - annot, *name = reversed(line.split(":", maxsplit=1)) - if name: - curr_key = name[0].strip() - else: - curr_key = None - params[curr_key] = {"long_name": ""} - annot, *unit = annot.split(",", maxsplit=1) - if unit: - params[curr_key]["units"] = unit[0].strip() - return params - - # XC def prefix_attrs(source: dict, keys: Sequence, prefix: str) -> dict: """Rename some keys of a dictionary by adding a prefix. @@ -537,163 +440,3 @@ def gen_call_string( elements.append(rep) return f"{funcname}({', '.join(elements)})" - - -# XC -def _gen_parameters_section( - parameters: dict[str, dict[str, Any]], allowed_periods: list[str] | None = None -) -> str: - """Generate the "parameters" section of the indicator docstring. - - Parameters - ---------- - parameters : dict - Parameters dictionary (`Ind.parameters`). - allowed_periods : list of str, optional - Restrict parameters to specific periods. Default: None. - - Returns - ------- - str - """ - section = "Parameters\n----------\n" - for name, param in parameters.items(): - desc_str = param.description - if param.kind == InputKind.FREQ_STR: - desc_str += ( - " See https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset" - "-aliases for available options." - ) - if allowed_periods is not None: - desc_str += ( - f" Restricted to frequencies equivalent to one of {allowed_periods}" - ) - if param.kind == InputKind.VARIABLE: - defstr = f"Default : `ds.{param.default}`. " - elif param.kind == InputKind.OPTIONAL_VARIABLE: - defstr = "" - elif param.default is not _empty: - defstr = f"Default : {param.default}. " - else: - defstr = "Required. " - if "choices" in param: - annotstr = str(param.choices) - else: - annotstr = KIND_ANNOTATION[param.kind] - if "units" in param and param.units is not None: - unitstr = f"[Required units : {param.units}]" - else: - unitstr = "" - section += f"{name} {': ' if annotstr else ''}{annotstr}\n {desc_str}\n {defstr}{unitstr}\n" - return section - - -def _gen_returns_section(cf_attrs: Sequence[dict[str, Any]]) -> str: - """Generate the "Returns" section of an indicator's docstring. - - Parameters - ---------- - cf_attrs : Sequence[Dict[str, Any]] - The list of attributes, usually Indicator.cf_attrs. - - Returns - ------- - str - """ - section = "Returns\n-------\n" - for attrs in cf_attrs: - if not section.endswith("\n"): - section += "\n" - section += f"{attrs['var_name']} : DataArray\n" - section += f" {attrs.get('long_name', '')}" - if "standard_name" in attrs: - section += f" ({attrs['standard_name']})" - if "units" in attrs: - section += f" [{attrs['units']}]" - added_section = "" - for key, attr in attrs.items(): - if key not in ["long_name", "standard_name", "units", "var_name"]: - if callable(attr): - attr = "" - added_section += f" **{key}**: {attr};" - if added_section: - section = f"{section}, with additional attributes:{added_section[:-1]}" - section += "\n" - return section - - -def generate_indicator_docstring(ind) -> str: - """Generate an indicator's docstring from keywords. - - Parameters - ---------- - ind : Indicator - An Indicator instance. - - Returns - ------- - str - """ - header = f"{ind.title} (realm: {ind.realm})\n\n{ind.abstract}\n" - - special = "" - - if hasattr(ind, "missing"): # Only ResamplingIndicators - special += f'This indicator will check for missing values according to the method "{ind.missing}".\n' - if hasattr(ind.compute, "__module__"): - special += f"Based on indice :py:func:`~{ind.compute.__module__}.{ind.compute.__name__}`.\n" - if ind.injected_parameters: - special += "With injected parameters: " - special += ", ".join( - [f"{k}={v}" for k, v in ind.injected_parameters.items()] - ) - special += ".\n" - if ind.keywords: - special += f"Keywords : {ind.keywords}.\n" - - parameters = _gen_parameters_section( - ind.parameters, getattr(ind, "allowed_periods", None) - ) - - returns = _gen_returns_section(ind.cf_attrs) - - extras = "" - for section in ["notes", "references"]: - if getattr(ind, section): - extras += f"{section.capitalize()}\n{'-' * len(section)}\n{getattr(ind, section)}\n\n" - - doc = f"{header}\n{special}\n{parameters}\n{returns}\n{extras}" - return doc - - -def get_percentile_metadata(data: xr.DataArray, prefix: str) -> dict[str, str]: - """Get the metadata related to percentiles from the given DataArray as a dictionary. - - Parameters - ---------- - data : xr.DataArray - Must be a percentile DataArray, this means the necessary metadata - must be available in its attributes and coordinates. - prefix : str - The prefix to be used in the metadata key. - Usually this takes the form of "tasmin_per" or equivalent. - - Returns - ------- - dict - A mapping of the configuration used to compute these percentiles. - """ - # handle case where da was created with `quantile()` method - if "quantile" in data.coords: - percs = data.coords["quantile"].values * 100 - elif "percentiles" in data.coords: - percs = data.coords["percentiles"].values - else: - percs = "" - clim_bounds = data.attrs.get("climatology_bounds", "") - - return { - f"{prefix}_thresh": percs, - f"{prefix}_window": data.attrs.get("window", ""), - f"{prefix}_period": clim_bounds, - } diff --git a/src/xsdba/locales.py b/src/xsdba/locales.py index e733ae4..8bb3cf4 100644 --- a/src/xsdba/locales.py +++ b/src/xsdba/locales.py @@ -3,8 +3,7 @@ ==================== This module defines methods and object to help the internationalization of metadata for -climate indicators computed by xsdba. Go to :ref:`notebooks/customize:Adding translated metadata` to see -how to use this feature. +climate indicators computed by xsdba. All the methods and objects in this module use localization data given in JSON files. These files are expected to be defined as in this example for French: diff --git a/src/xsdba/loess.py b/src/xsdba/loess.py index 4cf1f53..95506e9 100644 --- a/src/xsdba/loess.py +++ b/src/xsdba/loess.py @@ -61,7 +61,7 @@ def _loess_nb( The arrays x and y contain an equal number of elements; each pair (x[i], y[i]) defines a data point in the scatter plot. The function returns the estimated (smooth) values of y. - Originally proposed in :cite:t:`sdba-cleveland_robust_1979`. + Originally proposed in :cite:t:`cleveland_robust_1979`. Users should call `utils.loess_smoothing`. See that function for the main documentation. @@ -189,7 +189,7 @@ def loess_smoothing( Returns a smoothed curve along given dimension. The regression is computed for each point using a subset of neighbouring points as given from evaluating the weighting function locally. - Follows the procedure of :cite:t:`sdba-cleveland_robust_1979`. + Follows the procedure of :cite:t:`cleveland_robust_1979`. Parameters ---------- @@ -218,7 +218,7 @@ def loess_smoothing( Notes ----- - As stated in :cite:t:`sdba-cleveland_robust_1979`, the weighting function :math:`W(x)` should respect the following + As stated in :cite:t:`cleveland_robust_1979`, the weighting function :math:`W(x)` should respect the following conditions: - :math:`W(x) > 0` for :math:`|x| < 1` diff --git a/src/xsdba/processing.py b/src/xsdba/processing.py index 3ffdd6f..0cff1b9 100644 --- a/src/xsdba/processing.py +++ b/src/xsdba/processing.py @@ -55,7 +55,7 @@ def adapt_freq( This is useful when the dry-day frequency in the simulations is higher than in the references. This function will create new non-null values for `sim`/`hist`, so that adjustment factors are less wet-biased. - Based on :cite:t:`sdba-themesl_empirical-statistical_2012`. + Based on :cite:t:`themesl_empirical-statistical_2012`. Parameters ---------- @@ -385,7 +385,7 @@ def escore( N: int = 0, scale: bool = False, ) -> xr.DataArray: - r"""Energy score, or energy dissimilarity metric, based on :cite:t:`sdba-szekely_testing_2004` and :cite:t:`sdba-cannon_multivariate_2018`. + r"""Energy score, or energy dissimilarity metric, based on :cite:t:`szekely_testing_2004` and :cite:t:`cannon_multivariate_2018`. Parameters ---------- @@ -414,7 +414,7 @@ def escore( ----- Explanation adapted from the "energy" R package documentation. The e-distance between two clusters :math:`C_i`, :math:`C_j` (tgt and sim) of size :math:`n_i,n_j` - proposed by :cite:t:`sdba-szekely_testing_2004` is defined by: + proposed by :cite:t:`szekely_testing_2004` is defined by: .. math:: @@ -429,13 +429,13 @@ def escore( :math:`\Vert\cdot\Vert` denotes Euclidean norm, :math:`X_{ip}` denotes the p-th observation in the i-th cluster. The input scaling and the factor :math:`\frac{1}{2}` in the first equation are additions of - :cite:t:`sdba-cannon_multivariate_2018` to the metric. With that factor, the test becomes identical to the one - defined by :cite:t:`sdba-baringhaus_new_2004`. - This version is tested against values taken from Alex Cannon's MBC R package :cite:p:`sdba-cannon_mbc_2020`. + :cite:t:`cannon_multivariate_2018` to the metric. With that factor, the test becomes identical to the one + defined by :cite:t:`baringhaus_new_2004`. + This version is tested against values taken from Alex Cannon's MBC R package :cite:p:`cannon_mbc_2020`. References ---------- - :cite:cts:`baringhaus_new_2004,sdba-cannon_multivariate_2018,sdba-cannon_mbc_2020,sdba-szekely_testing_2004`. + :cite:cts:`baringhaus_new_2004,cannon_multivariate_2018,cannon_mbc_2020,szekely_testing_2004`. """ pts_dim, obs_dim = dims @@ -501,7 +501,7 @@ def to_additive_space( ): r"""Transform a non-additive variable into an additive space by the means of a log or logit transformation. - Based on :cite:t:`sdba-alavoine_distinct_2022`. + Based on :cite:t:`alavoine_distinct_2022`. Parameters ---------- @@ -594,7 +594,7 @@ def from_additive_space( ): r"""Transform back to the physical space a variable that was transformed with `to_additive_space`. - Based on :cite:t:`sdba-alavoine_distinct_2022`. + Based on :cite:t:`alavoine_distinct_2022`. If parameters are not present on the attributes of the data, they must be all given are arguments. Parameters diff --git a/src/xsdba/typing.py b/src/xsdba/typing.py index d81eeca..759cbca 100644 --- a/src/xsdba/typing.py +++ b/src/xsdba/typing.py @@ -70,8 +70,7 @@ class InputKind(IntEnum): Annotation : ``str`` or ``str | None``. In most cases, this kind of parameter makes sense with choices indicated in the docstring's version of the annotation with curly braces. - # TOOO : what about this notebook? - See :ref:`notebooks/extendxclim:Defining new indices`. + # TOOO : what about this notebook? removed reference to extendxclim """ DAY_OF_YEAR = 6 """A date, but without a year, in the MM-DD format. diff --git a/src/xsdba/utils.py b/src/xsdba/utils.py index 2b23ea1..858df5f 100644 --- a/src/xsdba/utils.py +++ b/src/xsdba/utils.py @@ -617,7 +617,7 @@ def best_pc_orientation_simple( Given an inverse transform `Hinv` and a transform `R`, this returns the orientation minimizing the projected distance for a test point far from the origin. - This trick is inspired by the one exposed in :cite:t:`sdba-hnilica_multisite_2017`. For each possible orientation vector, + This trick is inspired by the one exposed in :cite:t:`hnilica_multisite_2017`. For each possible orientation vector, the test point is reprojected and the distance from the original point is computed. The orientation minimizing that distance is chosen. @@ -660,7 +660,7 @@ def best_pc_orientation_full( Hmean: np.ndarray, hist: np.ndarray, ) -> np.ndarray: - """Return best orientation vector for `A` according to the method of :cite:t:`sdba-alavoine_distinct_2022`. + """Return best orientation vector for `A` according to the method of :cite:t:`alavoine_distinct_2022`. Eigenvectors returned by `pc_matrix` do not have a defined orientation. Given an inverse transform `Hinv`, a transform `R`, the actual and target origins `Hmean` and `Rmean` and the matrix @@ -668,7 +668,7 @@ def best_pc_orientation_full( that maximizes the Spearman correlation coefficient of all variables. The correlation is computed for each variable individually, then averaged. - This trick is explained in :cite:t:`sdba-alavoine_distinct_2022`. + This trick is explained in :cite:t:`alavoine_distinct_2022`. See docstring of :py:func:`sdba.adjustment.PrincipalComponentAdjustment`. Parameters @@ -1060,7 +1060,7 @@ def eps_cholesky(M, nit=26): References ---------- - :cite:cts:`robin_2021,sdba-higham_1988,sdba-knol_1989` + :cite:cts:`robin_2021,higham_1988,knol_1989` """ MC = None try: