diff --git a/.binder/environment.yml b/.binder/environment.yml index dcc99c35..6d65e763 100644 --- a/.binder/environment.yml +++ b/.binder/environment.yml @@ -7,5 +7,6 @@ dependencies: - netcdf4 - pip - xarray + - pooch - pip: - git+https://github.com/xarray-contrib/cf-xarray diff --git a/.deepsource.toml b/.deepsource.toml new file mode 100644 index 00000000..81205269 --- /dev/null +++ b/.deepsource.toml @@ -0,0 +1,15 @@ +version = 1 + +test_patterns = ["cf_xarray/tests/test_*.py"] + +exclude_patterns = [ + "doc/**", + "ci/**" +] + +[[analyzers]] +name = "python" +enabled = true + + [analyzers.meta] + runtime_version = "3.x.x" diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index f965ce9b..b3a2636f 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -2,7 +2,7 @@ name: CI on: push: branches: - - "*" + - "main" pull_request: branches: - "*" @@ -48,6 +48,29 @@ jobs: name: codecov-umbrella fail_ci_if_error: false + no-optional-deps: + name: no-optional-deps + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: conda-incubator/setup-miniconda@v2 + with: + channels: conda-forge + mamba-version: "*" + activate-environment: cf_xarray_test + auto-update-conda: false + python-version: ${{ matrix.python-version }} + - name: Set up conda environment + shell: bash -l {0} + run: | + mamba env update -f ci/environment-no-optional-deps.yml + python -m pip install -e . + conda list + - name: Run Tests + shell: bash -l {0} + run: | + pytest -n 2 + upstream-dev: name: upstream-dev runs-on: ubuntu-latest diff --git a/.github/workflows/pre-commit.yaml b/.github/workflows/pre-commit.yaml index dd8efaeb..601e28bd 100644 --- a/.github/workflows/pre-commit.yaml +++ b/.github/workflows/pre-commit.yaml @@ -2,7 +2,7 @@ name: pre-commit on: push: - branches: "*" + branches: "main" pull_request: branches: - "*" diff --git a/.gitignore b/.gitignore index b6e47617..24155863 100644 --- a/.gitignore +++ b/.gitignore @@ -69,7 +69,9 @@ instance/ .scrapy # Sphinx documentation -docs/_build/ +doc/_build/ +doc/generated/ +cf_xarray/tests/_build/ # PyBuilder target/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ad688bd9..4af86d7a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,7 +7,7 @@ repos: # isort should run before black as black sometimes tweaks the isort output - repo: https://github.com/timothycrosley/isort - rev: 5.6.4 + rev: 5.7.0 hooks: - id: isort files: .+\.py$ @@ -19,12 +19,12 @@ repos: - id: black - repo: https://gitlab.com/pycqa/flake8 - rev: 3.8.4 + rev: 3.8.3 hooks: - id: flake8 - repo: https://github.com/pre-commit/mirrors-mypy - rev: v0.790 # Must match ci/requirements/*.yml + rev: v0.800 # Must match ci/requirements/*.yml hooks: - id: mypy diff --git a/.tributors b/.tributors new file mode 100644 index 00000000..f9edb2cb --- /dev/null +++ b/.tributors @@ -0,0 +1,48 @@ +{ + "dcherian": { + "name": "Deepak Cherian", + "bio": "physical oceanographer", + "blog": "http://www.cherian.net", + "orcid": "0000-0002-6861-8734", + "affiliation": "National Center for Atmospheric Research" + }, + "malmans2": { + "name": "Mattia Almansi", + "blog": "https://malmans2.github.io", + "orcid": "0000-0001-6849-3647", + "affiliation": "National Oceanography Centre" + }, + "aulemahal": { + "name": "Pascal Bourgault", + "bio": "Physical oceanography graduate turned climate science specialist and scientific developer.", + "orcid": "0000-0003-1192-0403", + "affiliation": "Ouranos Inc" + }, + "keewis": { + "name": "Justus Magin" + }, + "jukent": { + "name": "Julia Kent", + "affiliation": "National Center for Atmospheric Research", + "orcid": "0000-0002-5611-8986" + }, + "kthyng": { + "name": "Kristen Thyng", + "bio": "MetOcean Data Scientist at Axiom Data Science. Associate Editor in Chief at the Journal for Open Source Software (JOSS). Wrote cmocean colormaps.", + "blog": "http://kristenthyng.com", + "orcid": "0000-0002-8746-614X", + "affiliation": "Axiom Data Science" + }, + "jhamman": { + "name": "Joe Hamman", + "bio": "Scientist and Engineer and Human.\r\n", + "blog": "http://joehamman.com", + "orcid": "0000-0001-7479-8439", + "affiliation": "CarbonPlan" + }, + "withshubh": { + "name": "Shubhendra Singh Chauhan", + "bio": "Developer Advocate at @deepsourcelabs šŸ„‘ \r\nšŸ‘ØšŸ»ā€šŸ’» work profile: @shubhendra-deepsource", + "blog": "camelcaseguy.com" + } +} diff --git a/.zenodo.json b/.zenodo.json new file mode 100644 index 00000000..5e50bbd9 --- /dev/null +++ b/.zenodo.json @@ -0,0 +1,34 @@ +{ + "creators": [ + { + "name": "Deepak Cherian", + "affiliation": "National Center for Atmospheric Research", + "orcid": "0000-0002-6861-8734" + }, + { + "name": "Mattia Almansi", + "affiliation": "National Oceanography Centre", + "orcid": "0000-0001-6849-3647" + }, + { + "name": "Pascal Bourgault", + "affiliation": "Ouranos Inc", + "orcid": "0000-0003-1192-0403" + }, + { + "name": "Julia Kent", + "affiliation": "National Center for Atmospheric Research", + "orcid": "0000-0002-5611-8986" + }, + { + "name": "Justus Magin" + }, + { + "name": "Kristen Thyng", + "affiliation": "Axiom Data Science", + "orcid": "0000-0002-8746-614X" + } + ], + "upload_type": "software", + "keywords": [] +} diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 00000000..1607cb16 --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,43 @@ +# YAML 1.2 +# Metadata for citation of this software according to the CFF format (https://citation-file-format.github.io/) +cff-version: 1.0.3 +message: If you use this software, please cite it using these metadata. +title: cf_xarray +doi: 10.5281/zenodo.4749736 +repository-code: https://github.com/xarray-contrib/cf-xarray +license: Apache-2.0 +version: 0.5.2 +date-released: 2021-05-11 +keywords: +- cf-conventions +- xarray +- metadata + +authors: +- affiliation: National Center for Atmospheric Research, USA + family-names: Cherian + given-names: Deepak + orcid: https://orcid.org/0000-0002-6861-8734 +- affiliation: National Oceanography Centre, Southampton, UK + family-names: Almansi + given-names: Mattia + orcid: https://orcid.org/0000-0001-6849-3647 +- affiliation: Ouranos, Inc. + family-names: Bourgault + given-names: Pascal + orcid: https://orcid.org/0000-0003-1192-0403 +- affiliation: National Center for Atmospheric Research, USA + family-names: Kent + given-names: Julia + orcid: https://orcid.org/0000-0002-5611-8986 +- family-names: Magin + given-names: Justus +- family-names: Thielen + given-names: Jon + orcid: https://orcid.org/0000-0002-5479-0189 + affiliation: Iowa State University, Ames, IA, USA +- affiliation: Axiom Data Science + family-names: Thyng + given-names: Kristen + orcid: https://orcid.org/0000-0002-8746-614X +... diff --git a/README.rst b/README.rst index e882f4f7..72a00c55 100644 --- a/README.rst +++ b/README.rst @@ -26,6 +26,8 @@ :target: https://anaconda.org/conda-forge/cf_xarray :alt: Conda Version +.. image:: https://zenodo.org/badge/267381269.svg + :target: https://zenodo.org/badge/latestdoi/267381269 cf-xarray ========= @@ -34,4 +36,6 @@ A lightweight convenience wrapper for using CF attributes on xarray objects. For example you can use ``.cf.mean("latitude")`` instead of ``.mean("lat")`` if appropriate attributes are set! This allows you to write code that does not require knowledge of specific dimension or coordinate names particular to a dataset. -See more in the introductory notebook `here `_. +See more in the `introductory notebook `_. + +Try out our Earthcube 2021 Annual Meeting notebook `submission `_. diff --git a/cf_xarray/__init__.py b/cf_xarray/__init__.py index f9d08cce..10503e50 100644 --- a/cf_xarray/__init__.py +++ b/cf_xarray/__init__.py @@ -1,2 +1,10 @@ +from pkg_resources import DistributionNotFound, get_distribution + from .accessor import CFAccessor # noqa from .helpers import bounds_to_vertices, vertices_to_bounds # noqa + +try: + __version__ = get_distribution("cf_xarray").version +except DistributionNotFound: + # package is not installed + __version__ = "unknown" diff --git a/cf_xarray/accessor.py b/cf_xarray/accessor.py index a7287286..ff0f5c4a 100644 --- a/cf_xarray/accessor.py +++ b/cf_xarray/accessor.py @@ -1,6 +1,7 @@ import functools import inspect import itertools +import re import warnings from collections import ChainMap from typing import ( @@ -14,14 +15,23 @@ MutableMapping, Set, Tuple, + TypeVar, Union, + cast, ) import xarray as xr from xarray import DataArray, Dataset +from xarray.core.arithmetic import SupportsArithmetic +from .criteria import coordinate_criteria, regex from .helpers import bounds_to_vertices -from .utils import parse_cell_methods_attr +from .utils import ( + _is_datetime_like, + always_iterable, + invert_mappings, + parse_cell_methods_attr, +) #: Classes wrapped by cf_xarray. _WRAPPED_CLASSES = ( @@ -42,94 +52,7 @@ #: Cell measures understood by cf_xarray. _CELL_MEASURES = ("area", "volume") -# Define the criteria for coordinate matches -# Copied from metpy -# Internally we only use X, Y, Z, T -coordinate_criteria: MutableMapping[str, MutableMapping[str, Tuple]] = { - "standard_name": { - "X": ("projection_x_coordinate",), - "Y": ("projection_y_coordinate",), - "T": ("time",), - "time": ("time",), - "vertical": ( - "air_pressure", - "height", - "depth", - "geopotential_height", - # computed dimensional coordinate name - "altitude", - "height_above_geopotential_datum", - "height_above_reference_ellipsoid", - "height_above_mean_sea_level", - ), - "Z": ( - "model_level_number", - "atmosphere_ln_pressure_coordinate", - "atmosphere_sigma_coordinate", - "atmosphere_hybrid_sigma_pressure_coordinate", - "atmosphere_hybrid_height_coordinate", - "atmosphere_sleve_coordinate", - "ocean_sigma_coordinate", - "ocean_s_coordinate", - "ocean_s_coordinate_g1", - "ocean_s_coordinate_g2", - "ocean_sigma_z_coordinate", - "ocean_double_sigma_coordinate", - ), - "latitude": ("latitude",), - "longitude": ("longitude",), - }, - "_CoordinateAxisType": { - "T": ("Time",), - "Z": ("GeoZ", "Height", "Pressure"), - "Y": ("GeoY",), - "latitude": ("Lat",), - "X": ("GeoX",), - "longitude": ("Lon",), - }, - "axis": {"T": ("T",), "Z": ("Z",), "Y": ("Y",), "X": ("X",)}, - "cartesian_axis": {"T": ("T",), "Z": ("Z",), "Y": ("Y",), "X": ("X",)}, - "positive": {"vertical": ("up", "down")}, - "units": { - "latitude": ( - "degree_north", - "degree_N", - "degreeN", - "degrees_north", - "degrees_N", - "degreesN", - ), - "longitude": ( - "degree_east", - "degree_E", - "degreeE", - "degrees_east", - "degrees_E", - "degreesE", - ), - }, -} - -# "long_name" and "standard_name" criteria are the same. For convenience. -coordinate_criteria["long_name"] = coordinate_criteria["standard_name"] - -#: regular expressions for guess_coord_axis -regex = { - "time": "time[0-9]*|min|hour|day|week|month|year", - "vertical": ( - "(lv_|bottom_top|sigma|h(ei)?ght|altitude|depth|isobaric|pres|" - "isotherm)[a-z_]*[0-9]*" - ), - "Y": "y", - "latitude": "y?lat[a-z0-9]*", - "X": "x", - "longitude": "x?lon[a-z0-9]*", -} -regex["Z"] = regex["vertical"] -regex["T"] = regex["time"] - - -attrs = { +ATTRS = { "X": {"axis": "X"}, "T": {"axis": "T", "standard_name": "time"}, "Y": {"axis": "Y"}, @@ -137,37 +60,21 @@ "latitude": {"units": "degrees_north", "standard_name": "latitude"}, "longitude": {"units": "degrees_east", "standard_name": "longitude"}, } -attrs["time"] = attrs["T"] -attrs["vertical"] = attrs["Z"] - - -def _is_datetime_like(da: DataArray) -> bool: - import numpy as np - - if np.issubdtype(da.dtype, np.datetime64) or np.issubdtype( - da.dtype, np.timedelta64 - ): - return True - - try: - import cftime - - if isinstance(da.data[0], cftime.datetime): - return True - except ImportError: - pass - - return False +ATTRS["time"] = ATTRS["T"] +ATTRS["vertical"] = ATTRS["Z"] # Type for Mapper functions Mapper = Callable[[Union[DataArray, Dataset], str], List[str]] +# Type for decorators +F = TypeVar("F", bound=Callable[..., Any]) + def apply_mapper( mappers: Union[Mapper, Tuple[Mapper, ...]], obj: Union[DataArray, Dataset], - key: str, + key: Any, error: bool = True, default: Any = None, ) -> List[Any]: @@ -178,15 +85,20 @@ def apply_mapper( It should return a list in all other cases including when there are no results for a good key. """ - if default is None: - default = [] + + if not isinstance(key, str): + if default is None: + raise ValueError("`default` must be provided when `key` is not a string.") + return list(always_iterable(default)) + + default = [] if default is None else list(always_iterable(default)) def _apply_single_mapper(mapper): try: results = mapper(obj, key) - except Exception as e: - if error: + except KeyError as e: + if error or "I expected only one." in repr(e): raise e else: results = [] @@ -202,13 +114,15 @@ def _apply_single_mapper(mapper): for mapper in mappers: results.append(_apply_single_mapper(mapper)) - nresults = sum([bool(v) for v in results]) - if nresults > 1: - raise KeyError( - f"Multiple mappers succeeded with key {key!r}.\nI was using mappers: {mappers!r}." - f"I received results: {results!r}.\nPlease open an issue." - ) - if nresults == 0: + flat = list(itertools.chain(*results)) + # de-duplicate + if all(not isinstance(r, DataArray) for r in flat): + results = list(set(flat)) + else: + results = flat + + nresults = any(bool(v) for v in [results]) + if not nresults: if error: raise KeyError( f"cf-xarray cannot interpret key {key!r}. Perhaps some needed attributes are missing." @@ -216,33 +130,22 @@ def _apply_single_mapper(mapper): else: # none of the mappers worked. Return the default return default - return list(itertools.chain(*results)) - - -def _get_axis_coord_single(var: Union[DataArray, Dataset], key: str) -> List[str]: - """ Helper method for when we really want only one result per key. """ - results = _get_axis_coord(var, key) - if len(results) > 1: - raise KeyError( - f"Multiple results for {key!r} found: {results!r}. I expected only one." - ) - elif len(results) == 0: - raise KeyError(f"No results found for {key!r}.") return results -def _get_axis_coord_time_accessor( - var: Union[DataArray, Dataset], key: str -) -> List[str]: +def _get_groupby_time_accessor(var: Union[DataArray, Dataset], key: str) -> List[str]: + """ + Time variable accessor e.g. 'T.month' + """ """ Helper method for when our key name is of the nature "T.month" and we want to isolate the "T" for coordinate mapping Parameters ---------- - var: DataArray, Dataset + var : DataArray, Dataset DataArray belonging to the coordinate to be checked - key: str, [e.g. "T.month"] + key : str, [e.g. "T.month"] key to check for. Returns @@ -253,10 +156,13 @@ def _get_axis_coord_time_accessor( ----- Returns an empty list if there is no frequency extension specified. """ + if "." in key: key, ext = key.split(".", 1) - results = _get_axis_coord_single(var, key) + results = apply_mapper((_get_all,), var, key, error=False) + if len(results) > 1: + raise KeyError(f"Multiple results received for {key}.") return [v + "." + ext for v in results] else: @@ -269,14 +175,14 @@ def _get_axis_coord(var: Union[DataArray, Dataset], key: str) -> List[str]: Parameters ---------- - var: DataArray, Dataset + var : DataArray, Dataset DataArray belonging to the coordinate to be checked - key: str, ["X", "Y", "Z", "T", "longitude", "latitude", "vertical", "time"] + key : str, ["X", "Y", "Z", "T", "longitude", "latitude", "vertical", "time"] key to check for. - error: bool + error : bool raise errors when key is not found or interpretable. Use False and provide default to replicate dict.get(k, None). - default: Any + default : Any default value to return when error is False. Returns @@ -327,16 +233,6 @@ def _get_axis_coord(var: Union[DataArray, Dataset], key: str) -> List[str]: return list(results) -def _get_measure_variable( - da: DataArray, key: str, error: bool = True, default: str = None -) -> List[DataArray]: - """ tiny wrapper since xarray does not support providing str for weights.""" - varnames = apply_mapper(_get_measure, da, key, error, default) - if len(varnames) > 1: - raise ValueError(f"Multiple measures found for key {key!r}: {varnames!r}.") - return [da[varnames[0]]] - - def _get_measure(obj: Union[DataArray, Dataset], key: str) -> List[str]: """ Translate from cell measures to appropriate variable name. @@ -344,9 +240,9 @@ def _get_measure(obj: Union[DataArray, Dataset], key: str) -> List[str]: Parameters ---------- - obj: DataArray, Dataset + obj : DataArray, Dataset DataArray belonging to the coordinate to be checked - key: str + key : str key to check for. Returns @@ -371,32 +267,42 @@ def _get_measure(obj: Union[DataArray, Dataset], key: str) -> List[str]: return list(results) -#: Default mappers for common keys. -_DEFAULT_KEY_MAPPERS: Mapping[str, Tuple[Mapper, ...]] = { - "dim": (_get_axis_coord,), - "dims": (_get_axis_coord,), # transpose - "dimensions": (_get_axis_coord,), # stack - "dims_dict": (_get_axis_coord,), # swap_dims, rename_dims - "shifts": (_get_axis_coord,), # shift, roll - "pad_width": (_get_axis_coord,), # shift, roll - # "names": something_with_all_valid_keys? # set_coords, reset_coords - "coords": (_get_axis_coord,), # interp - "indexers": (_get_axis_coord,), # sel, isel, reindex - # "indexes": (_get_axis_coord,), # set_index - "dims_or_levels": (_get_axis_coord,), # reset_index - "window": (_get_axis_coord,), # rolling_exp - "coord": (_get_axis_coord_single,), # differentiate, integrate - "group": (_get_axis_coord_single, _get_axis_coord_time_accessor), - "indexer": (_get_axis_coord_single,), # resample - "variables": (_get_axis_coord,), # sortby - "weights": (_get_measure_variable,), # type: ignore -} +def _get_bounds(obj: Union[DataArray, Dataset], key: str) -> List[str]: + """ + Translate from key (either CF key or variable name) to its bounds' variable names. + This function interprets the ``bounds`` attribute on DataArrays. + + Parameters + ---------- + obj : DataArray, Dataset + DataArray belonging to the coordinate to be checked + key : str + key to check for. + + Returns + ------- + List[str], Variable name(s) in parent xarray object that are bounds of `key` + """ + results = set() + for var in apply_mapper(_get_all, obj, key, error=False, default=[key]): + if "bounds" in obj[var].attrs: + results |= {obj[var].attrs["bounds"]} -def _get_with_standard_name(ds: Dataset, name: Union[str, List[str]]) -> List[str]: + return list(results) + + +def _get_with_standard_name( + obj: Union[DataArray, Dataset], name: Union[str, List[str]] +) -> List[str]: """ returns a list of variable names with standard name == name. """ + if name is None: + return [] + varnames = [] - for vname, var in ds.variables.items(): + if isinstance(obj, DataArray): + obj = obj.coords.to_dataset() + for vname, var in obj.variables.items(): stdname = var.attrs.get("standard_name", None) if stdname == name: varnames.append(str(vname)) @@ -404,6 +310,95 @@ def _get_with_standard_name(ds: Dataset, name: Union[str, List[str]]) -> List[st return varnames +def _get_all(obj: Union[DataArray, Dataset], key: str) -> List[str]: + """ + One or more of ('X', 'Y', 'Z', 'T', 'longitude', 'latitude', 'vertical', 'time', + 'area', 'volume'), or arbitrary measures, or standard names + """ + all_mappers = (_get_axis_coord, _get_measure, _get_with_standard_name) + results = apply_mapper(all_mappers, obj, key, error=False, default=None) + return results + + +def _get_dims(obj: Union[DataArray, Dataset], key: str) -> List[str]: + """ + One or more of ('X', 'Y', 'Z', 'T', 'longitude', 'latitude', 'vertical', 'time', + 'area', 'volume'), or arbitrary measures, or standard names present in .dims + """ + return [k for k in _get_all(obj, key) if k in obj.dims] + + +def _get_indexes(obj: Union[DataArray, Dataset], key: str) -> List[str]: + """ + One or more of ('X', 'Y', 'Z', 'T', 'longitude', 'latitude', 'vertical', 'time', + 'area', 'volume'), or arbitrary measures, or standard names present in .indexes + """ + return [k for k in _get_all(obj, key) if k in obj.indexes] + + +def _get_coords(obj: Union[DataArray, Dataset], key: str) -> List[str]: + """ + One or more of ('X', 'Y', 'Z', 'T', 'longitude', 'latitude', 'vertical', 'time', + 'area', 'volume'), or arbitrary measures, or standard names present in .coords + """ + return [k for k in _get_all(obj, key) if k in obj.coords] + + +def _variables(func: F) -> F: + @functools.wraps(func) + def wrapper(obj: Union[DataArray, Dataset], key: str) -> List[DataArray]: + return [obj[k] for k in func(obj, key)] + + return cast(F, wrapper) + + +def _single(func: F) -> F: + @functools.wraps(func) + def wrapper(obj: Union[DataArray, Dataset], key: str): + results = func(obj, key) + if len(results) > 1: + raise KeyError( + f"Multiple results for {key!r} found: {results!r}. I expected only one." + ) + elif len(results) == 0: + raise KeyError(f"No results found for {key!r}.") + return results + + wrapper.__doc__ = ( + func.__doc__.replace("One or more of", "One of") + if func.__doc__ + else func.__doc__ + ) + + return cast(F, wrapper) + + +#: Default mappers for common keys. +_DEFAULT_KEY_MAPPERS: Mapping[str, Tuple[Mapper, ...]] = { + "dim": (_get_dims,), + "dims": (_get_dims,), # transpose + "drop_dims": (_get_dims,), # drop_dims + "dims_dict": (_get_dims,), # swap_dims, rename_dims + "shifts": (_get_dims,), # shift, roll + "pad_width": (_get_dims,), # shift, roll + "names": (_get_all,), # set_coords, reset_coords, drop_vars + "name_dict": (_get_all,), # rename, rename_vars + "new_name_or_name_dict": (_get_all,), # rename + "labels": (_get_indexes,), # drop_sel + "coords": (_get_dims,), # interp + "indexers": (_get_dims,), # sel, isel, reindex + # "indexes": (_single(_get_dims),), # set_index this decodes keys but not values + "dims_or_levels": (_get_dims,), # reset_index + "window": (_get_dims,), # rolling_exp + "coord": (_single(_get_coords),), # differentiate, integrate + "group": (_single(_get_all), _get_groupby_time_accessor), # groupby + "indexer": (_single(_get_indexes),), # resample + "variables": (_get_all,), # sortby + "weights": (_variables(_single(_get_all)),), # type: ignore + "chunks": (_get_dims,), # chunk +} + + def _guess_bounds_dim(da): """ Guess bounds values given a 1D coordinate variable. @@ -429,26 +424,19 @@ def _build_docstring(func): can be used for arguments. """ - # this list will need to be updated any time a new mapper is added - mapper_docstrings = { - _get_axis_coord: f"One or more of {(_AXIS_NAMES + _COORD_NAMES)!r}", - _get_axis_coord_single: f"One of {(_AXIS_NAMES + _COORD_NAMES)!r}", - # _get_measure_variable: f"One of {_CELL_MEASURES!r}", - } - sig = inspect.signature(func) string = "" for k in set(sig.parameters.keys()) & set(_DEFAULT_KEY_MAPPERS): mappers = _DEFAULT_KEY_MAPPERS.get(k, []) - docstring = "; ".join( - mapper_docstrings.get(mapper, "unknown. please open an issue.") + docstring = ";\n\t\t\t".join( + mapper.__doc__ if mapper.__doc__ else "unknown. please open an issue." for mapper in mappers ) string += f"\t\t{k}: {docstring} \n" for param in sig.parameters: if sig.parameters[param].kind is inspect.Parameter.VAR_KEYWORD: - string += f"\t\t{param}: {mapper_docstrings[_get_axis_coord]} \n\n" + string += f"\t\t{param}: {_get_all.__doc__} \n\n" return ( f"\n\tThe following arguments will be processed by cf_xarray: \n{string}" "\n\t----\n\t" @@ -468,24 +456,31 @@ def _getattr( Parameters ---------- - obj : DataArray, Dataset attr : Name of attribute in obj that will be shadowed. accessor : High level accessor object: CFAccessor key_mappers : dict dict(key_name: mapper) - wrap_classes: bool + wrap_classes : bool Should we wrap the return value with _CFWrappedClass? Only True for the high level CFAccessor. Facilitates code reuse for _CFWrappedClass and _CFWrapppedPlotMethods For both of those, wrap_classes is False. - extra_decorator: Callable (optional) + extra_decorator : Callable (optional) An extra decorator, if necessary. This is used by _CFPlotMethods to set default kwargs based on CF attributes. """ try: attribute: Union[Mapping, Callable] = getattr(obj, attr) except AttributeError: + if getattr( + CFDatasetAccessor if isinstance(obj, DataArray) else CFDataArrayAccessor, + attr, + None, + ): + raise AttributeError( + f"{obj.__class__.__name__+'.cf'!r} object has no attribute {attr!r}" + ) raise AttributeError( f"{attr!r} is not a valid attribute on the underlying xarray object." ) @@ -493,23 +488,29 @@ def _getattr( if isinstance(attribute, Mapping): if not attribute: return dict(attribute) - # attributes like chunks / sizes - newmap = dict() - unused_keys = set(attribute.keys()) - for key in _AXIS_NAMES + _COORD_NAMES: - value = set(apply_mapper(_get_axis_coord, obj, key, error=False)) - unused_keys -= value - if value: - good_values = value & set(obj.dims) - if not good_values: - continue - if len(good_values) > 1: + + newmap = {} + inverted = invert_mappings( + accessor.axes, + accessor.coordinates, + accessor.cell_measures, + accessor.standard_names, + ) + unused_keys = set(attribute.keys()) - set(inverted) + for key, value in attribute.items(): + for name in inverted[key]: + if name in newmap: raise AttributeError( - f"cf_xarray can't wrap attribute {attr!r} because there are multiple values for {key!r} viz. {good_values!r}. " - f"There is no unique mapping from {key!r} to a value in {attr!r}." + f"cf_xarray can't wrap attribute {attr!r} because there are multiple values for {name!r}. " + f"There is no unique mapping from {name!r} to a value in {attr!r}." ) - newmap.update({key: attribute[good_values.pop()]}) + newmap.update(dict.fromkeys(inverted[key], value)) newmap.update({key: attribute[key] for key in unused_keys}) + + skip = {"data_vars": ["coords"], "coords": None} + if attr in ["coords", "data_vars"]: + for key in newmap: + newmap[key] = _getitem(accessor, key, skip=skip[attr]) return newmap elif isinstance(attribute, Callable): # type: ignore @@ -538,7 +539,174 @@ def wrapper(*args, **kwargs): return wrapper -class _CFWrappedClass: +def _getitem( + accessor: "CFAccessor", key: Union[str, List[str]], skip: List[str] = None +) -> Union[DataArray, Dataset]: + """ + Index into obj using key. Attaches CF associated variables. + + Parameters + ---------- + accessor : CFAccessor + key : str, List[str] + skip : str, optional + One of ["coords", "measures"], avoid clashes with special coord names + """ + + obj = accessor._obj + kind = str(type(obj).__name__) + scalar_key = isinstance(key, str) + + if scalar_key: + key = (key,) # type: ignore + + if skip is None: + skip = [] + + def drop_bounds(names): + # sometimes bounds variables have the same standard_name as the + # actual variable. It seems practical to ignore them when indexing + # with a scalar key. Hopefully these will soon get decoded to IntervalIndex + # and we can move on... + if scalar_key: + bounds = set([obj[k].attrs.get("bounds", None) for k in names]) + names = set(names) - bounds + return names + + def check_results(names, key): + if scalar_key and len(names) > 1: + raise KeyError( + f"Receive multiple variables for key {key!r}: {names}. " + f"Expected only one. Please pass a list [{key!r}] " + f"instead to get all variables matching {key!r}." + ) + + try: + measures = accessor._get_all_cell_measures() + except ValueError: + measures = [] + warnings.warn("Ignoring bad cell_measures attribute.", UserWarning) + + varnames: List[Hashable] = [] + coords: List[Hashable] = [] + successful = dict.fromkeys(key, False) + for k in key: + if "coords" not in skip and k in _AXIS_NAMES + _COORD_NAMES: + names = _get_all(obj, k) + names = drop_bounds(names) + check_results(names, k) + successful[k] = bool(names) + coords.extend(names) + elif "measures" not in skip and k in measures: + measure = _get_all(obj, k) + check_results(measure, k) + successful[k] = bool(measure) + if measure: + varnames.extend(measure) + else: + stdnames = set(_get_with_standard_name(obj, k)) + objcoords = set(obj.coords) + stdnames = drop_bounds(stdnames) + if "coords" in skip: + stdnames -= objcoords + check_results(stdnames, k) + successful[k] = bool(stdnames) + varnames.extend(stdnames - objcoords) + coords.extend(stdnames & objcoords) + + # these are not special names but could be variable names in underlying object + # we allow this so that we can return variables with appropriate CF auxiliary variables + varnames.extend([k for k, v in successful.items() if not v]) + allnames = varnames + coords + + try: + for name in allnames: + extravars = accessor.get_associated_variable_names( + name, skip_bounds=scalar_key, error=False + ) + coords.extend(itertools.chain(*extravars.values())) + + if isinstance(obj, DataArray): + ds = obj._to_temp_dataset() + else: + ds = obj + + if scalar_key: + if len(allnames) == 1: + da: DataArray = ds.reset_coords()[allnames[0]] # type: ignore + if allnames[0] in coords: + coords.remove(allnames[0]) + for k1 in coords: + da.coords[k1] = ds.variables[k1] + return da + else: + raise KeyError( + f"Received scalar key {key[0]!r} but multiple results: {allnames!r}. " + f"Please pass a list instead (['{key[0]}']) to get back a Dataset " + f"with {allnames!r}." + ) + + ds = ds.reset_coords()[varnames + coords] + if isinstance(obj, DataArray): + if scalar_key and len(ds.variables) == 1: + # single dimension coordinates + assert coords + assert not varnames + + return ds[coords[0]] + + elif scalar_key and len(ds.variables) > 1: + raise NotImplementedError( + "Not sure what to return when given scalar key for DataArray and it has multiple values. " + "Please open an issue." + ) + + return ds.set_coords(coords) + + except KeyError: + raise KeyError( + f"{kind}.cf does not understand the key {k!r}. " + f"Use 'repr({kind}.cf)' (or '{kind}.cf' in a Jupyter environment) to see a list of key names that can be interpreted." + ) + + +def _possible_x_y_plot(obj, key): + """Guesses a name for an x/y variable if possible.""" + # in priority order + x_criteria = [ + ("coordinates", "longitude"), + ("axes", "X"), + ("coordinates", "time"), + ("axes", "T"), + ] + y_criteria = [ + ("coordinates", "vertical"), + ("axes", "Z"), + ("coordinates", "latitude"), + ("axes", "Y"), + ] + + def _get_possible(accessor, criteria): + # is_scalar depends on NON_NUMPY_SUPPORTED_TYPES + # importing a private function seems better than + # maintaining that variable! + from xarray.core.utils import is_scalar + + for attr, key in criteria: + value = getattr(accessor, attr).get(key) + if not value or len(value) > 1: + continue + if not is_scalar(accessor._obj[value[0]]): + return value[0] + return None + + if key == "x": + return _get_possible(obj.cf, x_criteria) + elif key == "y": + return _get_possible(obj.cf, y_criteria) + + +class _CFWrappedClass(SupportsArithmetic): """ This class is used to wrap any class in _WRAPPED_CLASSES. """ @@ -566,6 +734,9 @@ def __getattr__(self, attr): key_mappers=_DEFAULT_KEY_MAPPERS, ) + def __iter__(self): + return iter(self.wrapped) + class _CFWrappedPlotMethods: """ @@ -580,35 +751,43 @@ def __init__(self, obj, accessor): def _plot_decorator(self, func): """ This decorator is used to set default kwargs on plotting functions. - - For now, this is setting ``xincrease`` and ``yincrease``. It could set - other arguments in the future. + For now, this can + 1. set ``xincrease`` and ``yincrease``. + 2. automatically set ``x`` or ``y``. """ valid_keys = self.accessor.keys() @functools.wraps(func) def _plot_wrapper(*args, **kwargs): - if "x" in kwargs: - if kwargs["x"] in valid_keys: - xvar = self.accessor[kwargs["x"]] - else: - xvar = self._obj[kwargs["x"]] - if "positive" in xvar.attrs: - if xvar.attrs["positive"] == "down": - kwargs.setdefault("xincrease", False) - else: - kwargs.setdefault("xincrease", True) + def _process_x_or_y(kwargs, key): + if key not in kwargs: + kwargs[key] = _possible_x_y_plot(self._obj, key) - if "y" in kwargs: - if kwargs["y"] in valid_keys: - yvar = self.accessor[kwargs["y"]] - else: - yvar = self._obj[kwargs["y"]] - if "positive" in yvar.attrs: - if yvar.attrs["positive"] == "down": - kwargs.setdefault("yincrease", False) + value = kwargs.get(key) + if value: + if value in valid_keys: + var = self.accessor[value] else: - kwargs.setdefault("yincrease", True) + var = self._obj[value] + if "positive" in var.attrs: + if var.attrs["positive"] == "down": + kwargs.setdefault(f"{key}increase", False) + else: + kwargs.setdefault(f"{key}increase", True) + return kwargs + + is_line_plot = (func.__name__ == "line") or ( + func.__name__ == "wrapper" + and (kwargs.get("hue") or self._obj.ndim == 1) + ) + if is_line_plot: + if not kwargs.get("hue"): + kwargs = _process_x_or_y(kwargs, "x") + if not kwargs.get("x"): + kwargs = _process_x_or_y(kwargs, "y") + else: + kwargs = _process_x_or_y(kwargs, "x") + kwargs = _process_x_or_y(kwargs, "y") return func(*args, **kwargs) @@ -622,7 +801,7 @@ def __call__(self, *args, **kwargs): obj=self._obj, attr="plot", accessor=self.accessor, - key_mappers=dict.fromkeys(self._keys, (_get_axis_coord_single,)), + key_mappers=dict.fromkeys(self._keys, (_single(_get_all),)), ) return self._plot_decorator(plot)(*args, **kwargs) @@ -634,7 +813,7 @@ def __getattr__(self, attr): obj=self._obj.plot, attr=attr, accessor=self.accessor, - key_mappers=dict.fromkeys(self._keys, (_get_axis_coord_single,)), + key_mappers=dict.fromkeys(self._keys, (_single(_get_all),)), # TODO: "extra_decorator" is more complex than I would like it to be. # Not sure if there is a better way though extra_decorator=self._plot_decorator, @@ -723,12 +902,12 @@ def _rewrite_values( Parameters ---------- - kwargs: Mapping + kwargs : Mapping Mapping from kwarg name to value - key_mappers: Mapping + key_mappers : Mapping Mapping from kwarg name to a Mapper function that will convert a given CF "special" name to an xarray name. - var_kws: List[str] + var_kws : List[str] List of variable kwargs that need special treatment. e.g. **indexers_kwargs in isel @@ -740,14 +919,16 @@ def _rewrite_values( # allow multiple return values here. # these are valid for .sel, .isel, .coarsen - all_mappers = ChainMap(key_mappers, dict.fromkeys(var_kws, (_get_axis_coord,))) + all_mappers = ChainMap( + key_mappers, + dict.fromkeys(var_kws, (_get_all,)), + ) for key in set(all_mappers) & set(kwargs): value = kwargs[key] mappers = all_mappers[key] - if isinstance(value, str): - value = [value] + value = always_iterable(value) if isinstance(value, dict): # this for things like isel where **kwargs captures things like T=5 @@ -793,8 +974,6 @@ def _rewrite_values( for vkw in var_kws: if vkw in kwargs: maybe_update = { - # TODO: this is assuming key_mappers[k] is always - # _get_axis_coord_single k: apply_mapper( key_mappers[k], self._obj, v, error=False, default=[v] )[0] @@ -828,30 +1007,81 @@ def describe(self): """ Print a string repr to screen. """ - text = "Axes:\n" - axes = self.axes - for key in _AXIS_NAMES: - text += f"\t{key}: {axes[key] if key in axes else []}\n" - - text += "\nCoordinates:\n" - coords = self.coordinates - for key in _COORD_NAMES: - text += f"\t{key}: {coords[key] if key in coords else []}\n" - - text += "\nCell Measures:\n" - measures = self.cell_measures - for key in sorted(self._get_all_cell_measures()): - text += f"\t{key}: {measures[key] if key in measures else []}\n" - - text += "\nStandard Names:\n" - if isinstance(self._obj, DataArray): - text += "\tunsupported\n" - else: - for key, value in sorted(self.standard_names.items()): - if key not in _COORD_NAMES: - text += f"\t{key}: {value}\n" - print(text) + warnings.warn( + "'obj.cf.describe()' will be removed in a future version. " + "Use instead 'repr(obj.cf)' or 'obj.cf' in a Jupyter environment.", + DeprecationWarning, + ) + print(repr(self)) + + def __repr__(self): + + coords = self._obj.coords + dims = self._obj.dims + + def make_text_section(subtitle, attr, valid_values, default_keys=None): + + vardict = getattr(self, attr, {}) + + star = " * " + tab = len(star) * " " + subtitle = f"- {subtitle}:" + + # Sort keys if there aren't extra keys, + # preserve default keys order otherwise. + default_keys = [] if not default_keys else list(default_keys) + extra_keys = list(set(vardict) - set(default_keys)) + ordered_keys = sorted(vardict) if extra_keys else default_keys + vardict = {key: vardict[key] for key in ordered_keys if key in vardict} + + # Keep only valid values (e.g., coords or data_vars) + vardict = { + key: set(value).intersection(valid_values) + for key, value in vardict.items() + if set(value).intersection(valid_values) + } + + # Star for keys with dims only, tab otherwise + rows = [ + f"{star if set(value) <= set(dims) else tab}{key}: {sorted(value)}" + for key, value in vardict.items() + ] + + # Append missing default keys followed by n/a + if default_keys: + missing_keys = [key for key in default_keys if key not in vardict] + if missing_keys: + rows += [tab + ", ".join(missing_keys) + ": n/a"] + elif not rows: + rows = [tab + "n/a"] + + # Add subtitle to the first row, align other rows + rows = [ + "\n" + subtitle + row if i == 0 else len(subtitle) * " " + row + for i, row in enumerate(rows) + ] + + return "\n".join(rows) + "\n" + + text = "Coordinates:" + text += make_text_section("CF Axes", "axes", coords, _AXIS_NAMES) + text += make_text_section("CF Coordinates", "coordinates", coords, _COORD_NAMES) + text += make_text_section( + "Cell Measures", "cell_measures", coords, _CELL_MEASURES + ) + text += make_text_section("Standard Names", "standard_names", coords) + text += make_text_section("Bounds", "bounds", coords) + if isinstance(self._obj, Dataset): + data_vars = self._obj.data_vars + text += "\nData Variables:" + text += make_text_section( + "Cell Measures", "cell_measures", data_vars, _CELL_MEASURES + ) + text += make_text_section("Standard Names", "standard_names", data_vars) + text += make_text_section("Bounds", "bounds", data_vars) + + return text def get_valid_keys(self) -> Set[str]: @@ -888,17 +1118,14 @@ def axes(self) -> Dict[str, List[str]]: This is useful for checking whether a key is valid for indexing, i.e. that the attributes necessary to allow indexing by that key exist. - However, it will only return the Axis names, not Coordinate names. + However, it will only return the Axis names present in ``.coords``, not Coordinate names. Returns ------- Dictionary of valid Axis names that can be used with ``__getitem__`` or ``.cf[key]``. Will be ("X", "Y", "Z", "T") or a subset thereof. """ - vardict = { - key: apply_mapper(_get_axis_coord, self._obj, key, error=False) - for key in _AXIS_NAMES - } + vardict = {key: _get_coords(self._obj, key) for key in _AXIS_NAMES} return {k: sorted(v) for k, v in vardict.items() if v} @@ -910,17 +1137,14 @@ def coordinates(self) -> Dict[str, List[str]]: This is useful for checking whether a key is valid for indexing, i.e. that the attributes necessary to allow indexing by that key exist. - However, it will only return the Coordinate names, not Axis names. + However, it will only return the Coordinate names present in ``.coords``, not Axis names. Returns ------- Dictionary of valid Coordinate names that can be used with ``__getitem__`` or ``.cf[key]``. Will be ("longitude", "latitude", "vertical", "time") or a subset thereof. """ - vardict = { - key: apply_mapper(_get_axis_coord, self._obj, key, error=False) - for key in _COORD_NAMES - } + vardict = {key: _get_coords(self._obj, key) for key in _COORD_NAMES} return {k: sorted(v) for k, v in vardict.items() if v} @@ -947,10 +1171,10 @@ def cell_measures(self) -> Dict[str, List[str]]: da.attrs.get("cell_measures", "") for da in obj.data_vars.values() ] - measures: Dict[str, List[str]] = dict() + keys = {} for attr in all_attrs: - for key, value in parse_cell_methods_attr(attr).items(): - measures[key] = measures.setdefault(key, []) + [value] + keys.update(parse_cell_methods_attr(attr)) + measures = {key: _get_all(self._obj, key) for key in keys} return {k: sorted(set(v)) for k, v in measures.items() if v} @@ -966,24 +1190,23 @@ def get_standard_names(self) -> List[str]: @property def standard_names(self) -> Dict[str, List[str]]: """ - Returns a sorted list of standard names in Dataset. + Returns a dictionary mapping standard names to variable names. Parameters ---------- - - obj: DataArray, Dataset + obj : DataArray, Dataset Xarray object to process Returns ------- - Dictionary of standard names in dataset + Dictionary mapping standard names to variable names. """ if isinstance(self._obj, Dataset): variables = self._obj.variables elif isinstance(self._obj, DataArray): variables = self._obj.coords - vardict: Dict[str, List[str]] = dict() + vardict: Dict[str, List[str]] = {} for k, v in variables.items(): if "standard_name" in v.attrs: std_name = v.attrs["standard_name"] @@ -991,7 +1214,9 @@ def standard_names(self) -> Dict[str, List[str]]: return {k: sorted(v) for k, v in vardict.items()} - def get_associated_variable_names(self, name: Hashable) -> Dict[str, List[str]]: + def get_associated_variable_names( + self, name: Hashable, skip_bounds: bool = False, error: bool = True + ) -> Dict[str, List[str]]: """ Returns a dict mapping 1. "ancillary_variables" @@ -1002,12 +1227,13 @@ def get_associated_variable_names(self, name: Hashable) -> Dict[str, List[str]]: Parameters ---------- - - name: Hashable + name : Hashable + skip_bounds : bool, optional + error: bool, optional + Raise or ignore errors. Returns ------ - Dict with keys "ancillary_variables", "cell_measures", "coordinates", "bounds" """ keys = ["ancillary_variables", "cell_measures", "coordinates", "bounds"] @@ -1018,9 +1244,20 @@ def get_associated_variable_names(self, name: Hashable) -> Dict[str, List[str]]: coords["coordinates"] = attrs_or_encoding["coordinates"].split(" ") if "cell_measures" in attrs_or_encoding: - coords["cell_measures"] = list( - parse_cell_methods_attr(attrs_or_encoding["cell_measures"]).values() - ) + try: + coords["cell_measures"] = list( + parse_cell_methods_attr(attrs_or_encoding["cell_measures"]).values() + ) + except ValueError as e: + if error: + msg = e.args[0] + " Ignore this error by passing 'error=False'" + raise ValueError(msg) + else: + warnings.warn( + f"Ignoring bad cell_measures attribute: {attrs_or_encoding['cell_measures']}", + UserWarning, + ) + coords["cell_measures"] = [] if ( isinstance(self._obj, Dataset) @@ -1030,13 +1267,13 @@ def get_associated_variable_names(self, name: Hashable) -> Dict[str, List[str]]: "ancillary_variables" ].split(" ") - if "bounds" in attrs_or_encoding: - coords["bounds"] = [attrs_or_encoding["bounds"]] - - for dim in self._obj[name].dims: - dbounds = self._obj[dim].attrs.get("bounds", None) - if dbounds: - coords["bounds"].append(dbounds) + if not skip_bounds: + if "bounds" in attrs_or_encoding: + coords["bounds"] = [attrs_or_encoding["bounds"]] + for dim in self._obj[name].dims: + dbounds = self._obj[dim].attrs.get("bounds", None) + if dbounds: + coords["bounds"].append(dbounds) allvars = itertools.chain(*coords.values()) missing = set(allvars) - set(self._maybe_to_dataset().variables) @@ -1053,106 +1290,6 @@ def get_associated_variable_names(self, name: Hashable) -> Dict[str, List[str]]: return coords - def __getitem__(self, key: Union[str, List[str]]): - - kind = str(type(self._obj).__name__) - scalar_key = isinstance(key, str) - - if isinstance(self._obj, DataArray) and not scalar_key: - raise KeyError( - f"Cannot use a list of keys with DataArrays. Expected a single string. Received {key!r} instead." - ) - - if scalar_key: - key = (key,) # type: ignore - - def check_results(names, k): - if scalar_key and len(names) > 1: - raise ValueError( - f"Receive multiple variables for key {k!r}: {names}. " - f"Expected only one. Please pass a list [{k!r}] " - f"instead to get all variables matching {k!r}." - ) - - varnames: List[Hashable] = [] - coords: List[Hashable] = [] - successful = dict.fromkeys(key, False) - for k in key: - if k in _AXIS_NAMES + _COORD_NAMES: - names = _get_axis_coord(self._obj, k) - check_results(names, k) - successful[k] = bool(names) - coords.extend(names) - elif k in self._get_all_cell_measures(): - measure = _get_measure(self._obj, k) - check_results(measure, k) - successful[k] = bool(measure) - if measure: - varnames.extend(measure) - elif not isinstance(self._obj, DataArray): - stdnames = set(_get_with_standard_name(self._obj, k)) - check_results(stdnames, k) - successful[k] = bool(stdnames) - objcoords = set(self._obj.coords) - varnames.extend(stdnames - objcoords) - coords.extend(stdnames & objcoords) - - # these are not special names but could be variable names in underlying object - # we allow this so that we can return variables with appropriate CF auxiliary variables - varnames.extend([k for k, v in successful.items() if not v]) - allnames = varnames + coords - - try: - for name in allnames: - extravars = self.get_associated_variable_names(name) - # we cannot return bounds variables with scalar keys - if scalar_key: - extravars.pop("bounds") - coords.extend(itertools.chain(*extravars.values())) - - if isinstance(self._obj, DataArray): - ds = self._obj._to_temp_dataset() - else: - ds = self._obj - - if scalar_key: - if len(allnames) == 1: - da: DataArray = ds.reset_coords()[allnames[0]] # type: ignore - if allnames[0] in coords: - coords.remove(allnames[0]) - for k1 in coords: - da.coords[k1] = ds.variables[k1] - return da - else: - raise ValueError( - f"Received scalar key {key[0]!r} but multiple results: {allnames!r}. " - f"Please pass a list instead (['{key[0]}']) to get back a Dataset " - f"with {allnames!r}." - ) - - ds = ds.reset_coords()[varnames + coords] - if isinstance(self._obj, DataArray): - if scalar_key and len(ds.variables) == 1: - # single dimension coordinates - assert coords - assert not varnames - - return ds[coords[0]] - - elif scalar_key and len(ds.variables) > 1: - raise NotImplementedError( - "Not sure what to return when given scalar key for DataArray and it has multiple values. " - "Please open an issue." - ) - - return ds.set_coords(coords) - - except KeyError: - raise KeyError( - f"{kind}.cf does not understand the key {k!r}. " - f"Use {kind}.cf.describe() to see a list of key names that can be interpreted." - ) - def _maybe_to_dataset(self, obj=None) -> Dataset: if obj is None: obj = self._obj @@ -1170,7 +1307,9 @@ def _maybe_to_dataarray(self, obj=None): return obj def rename_like( - self, other: Union[DataArray, Dataset] + self, + other: Union[DataArray, Dataset], + skip: Union[str, Iterable[str]] = None, ) -> Union[DataArray, Dataset]: """ Renames variables in object to match names of like-variables in ``other``. @@ -1184,39 +1323,102 @@ def rename_like( Parameters ---------- - other: DataArray, Dataset + other : DataArray, Dataset Variables will be renamed to match variable names in this xarray object + skip: str, Iterable[str], optional + Limit the renaming excluding + ("axes", "bounds", cell_measures", "coordinates", "standard_names") + or a subset thereof. Returns ------- DataArray or Dataset with renamed variables """ + skip = [skip] if isinstance(skip, str) else skip or [] + ourkeys = self.keys() theirkeys = other.cf.keys() - good_keys = set(_COORD_NAMES) & ourkeys & theirkeys - if not good_keys: - raise ValueError( - "No common coordinate variables between these two objects." - ) - - renamer = {} + good_keys = ourkeys & theirkeys + keydict = {} for key in good_keys: - ours = _get_axis_coord_single(self._obj, key)[0] - theirs = _get_axis_coord_single(other, key)[0] - renamer[ours] = theirs - + ours = set(apply_mapper(_get_all, self._obj, key)) + theirs = set(apply_mapper(_get_all, other, key)) + for attr in skip: + ours.difference_update(getattr(self, attr).get(key, [])) + theirs.difference_update(getattr(other.cf, attr).get(key, [])) + if ours and theirs: + keydict[key] = dict(ours=list(ours), theirs=list(theirs)) + + def get_renamer_and_conflicts(keydict): + conflicts = {} + for k0, v0 in keydict.items(): + if len(v0["ours"]) > 1 or len(v0["theirs"]) > 1: + conflicts[k0] = v0 + continue + for v1 in keydict.values(): + # Conflicts have same ours but different theirs or vice versa + if (v0["ours"] == v1["ours"]) != (v0["theirs"] == v1["theirs"]): + conflicts[k0] = v0 + break + + renamer = { + v["ours"][0]: v["theirs"][0] + for k, v in keydict.items() + if k not in conflicts + } + + return renamer, conflicts + + # Run get_renamer_and_conflicts twice. + # The second time add the bounds associated with variables to rename + renamer, conflicts = get_renamer_and_conflicts(keydict) + if "bounds" not in skip: + for k, v in renamer.items(): + ours = set(getattr(self, "bounds", {}).get(k, [])) + theirs = set(getattr(other.cf, "bounds", {}).get(v, [])) + if ours and theirs: + ours.update(keydict.get(k, {}).get("ours", [])) + theirs.update(keydict.get(k, {}).get("theirs", [])) + keydict[k] = dict(ours=list(ours), theirs=list(theirs)) + renamer, conflicts = get_renamer_and_conflicts(keydict) + + # Rename and warn + if conflicts: + warnings.warn( + "Conflicting variables skipped:\n" + + "\n".join( + [ + f"{sorted(v['ours'])}: {sorted(v['theirs'])} ({k})" + for k, v in sorted( + conflicts.items(), key=lambda item: sorted(item[1]["ours"]) + ) + ] + ), + UserWarning, + ) newobj = self._obj.rename(renamer) - # rename variable names in the coordinates attribute + # rename variable names in the attributes # if present ds = self._maybe_to_dataset(newobj) for _, variable in ds.variables.items(): - coordinates = variable.attrs.get("coordinates", None) - if coordinates: - for k, v in renamer.items(): - coordinates = coordinates.replace(k, v) - variable.attrs["coordinates"] = coordinates + for attr in ("bounds", "coordinates", "cell_measures"): + if attr == "cell_measures": + varlist = [ + f"{k}: {renamer.get(v, v)}" + for k, v in parse_cell_methods_attr( + variable.attrs.get(attr, "") + ).items() + ] + else: + varlist = [ + renamer.get(var, var) + for var in variable.attrs.get(attr, "").split() + ] + + if varlist: + variable.attrs[attr] = " ".join(varlist) return self._maybe_to_dataarray(ds) def guess_coord_axis(self, verbose: bool = False) -> Union[DataArray, Dataset]: @@ -1229,15 +1431,13 @@ def guess_coord_axis(self, verbose: bool = False) -> Union[DataArray, Dataset]: Parameters ---------- - verbose: bool + verbose : bool Print extra info to screen Returns ------- DataArray or Dataset with appropriate attributes added """ - import re - obj = self._obj.copy(deep=True) for var in obj.coords.variables: if obj[var].ndim == 1 and _is_datetime_like(obj[var]): @@ -1245,68 +1445,219 @@ def guess_coord_axis(self, verbose: bool = False) -> Union[DataArray, Dataset]: print( f"I think {var!r} is of type 'time'. It has a datetime-like type." ) - obj[var].attrs = dict(ChainMap(obj[var].attrs, attrs["time"])) + obj[var].attrs = dict(ChainMap(obj[var].attrs, ATTRS["time"])) continue # prevent second detection - for axis, pattern in regex.items(): + for name, pattern in regex.items(): # match variable names - if re.match(pattern, var.lower()): + if pattern.match(var.lower()): if verbose: print( - f"I think {var!r} is of type {axis!r}. It matched {pattern!r}" + f"I think {var!r} is of type {name!r}. It matched {pattern!r}" ) - obj[var].attrs = dict(ChainMap(obj[var].attrs, attrs[axis])) + obj[var].attrs = dict(ChainMap(obj[var].attrs, ATTRS[name])) return obj + def drop(self, *args, **kwargs): + raise NotImplementedError( + "cf-xarray does not support .drop." + "Please use .cf.drop_vars or .cf.drop_sel as appropriate." + ) + + def stack(self, dimensions=None, **dimensions_kwargs): + # stack needs to rewrite the _values_ of a dict + # our other machinery rewrites the _keys_ of a dict + # This seems somewhat rare, so do it explicitly for now + + if dimensions is None: + dimensions = dimensions_kwargs + for key, values in dimensions.items(): + updates = [ + apply_mapper( + (_single(_get_dims),), self._obj, v, error=True, default=[v] + ) + for v in values + ] + dimensions.update({key: tuple(itertools.chain(*updates))}) + return self._obj.stack(dimensions) + + def differentiate( + self, coord, *xr_args, positive_upward: bool = False, **xr_kwargs + ): + """ + Differentiate an xarray object. + + Parameters + ---------- + positive_upward: optional, bool + Change sign of the derivative based on the ``"positive"`` attribute of ``coord`` + so that positive values indicate increasing upward. + If ``positive=="down"``, then multiplied by -1. + + Notes + ----- + ``xr_args``, ``xr_kwargs`` are passed directly to the underlying xarray function. + + See Also + -------- + DataArray.cf.differentiate + Dataset.cf.differentiate + xarray.DataArray.differentiate: underlying xarray function + xarray.Dataset.differentiate: underlying xarray function + """ + coord = apply_mapper( + (_single(_get_coords),), self._obj, coord, error=False, default=[coord] + )[0] + result = self._obj.differentiate(coord, *xr_args, **xr_kwargs) + if positive_upward: + coord = self._obj[coord] + attrs = coord.attrs + if "positive" not in attrs: + raise ValueError( + f"positive_upward=True and 'positive' attribute not present on {coord.name}" + ) + if attrs["positive"] not in ["up", "down"]: + raise ValueError( + f"positive_upward=True and received attrs['positive']={attrs['positive']}. Expected one of ['up', 'down'] " + ) + if attrs["positive"] == "down": + result *= -1 + return result + @xr.register_dataset_accessor("cf") class CFDatasetAccessor(CFAccessor): + def __getitem__(self, key: Union[str, List[str]]) -> Union[DataArray, Dataset]: + """ + Index into a Dataset making use of CF attributes. + + Parameters + ---------- + + key: str, Iterable[str], optional + One of + - axes names: "X", "Y", "Z", "T" + - coordinate names: "longitude", "latitude", "vertical", "time" + - cell measures: "area", "volume", or other names present in the \ + ``cell_measures`` attribute + - standard names: names present in ``standard_name`` attribute + + Returns + ------- + DataArray or Dataset + ``Dataset.cf[str]`` will return a DataArray, \ + ``Dataset.cf[List[str]]``` will return a Dataset. + + Notes + ----- + In all cases, associated CF variables will be attached as coordinate variables + by parsing attributes such as ``bounds``, ``ancillary_variables``, etc. + + ``bounds`` variables will not be attached when a DataArray is returned. This + is a limitation of the xarray data model. + """ + return _getitem(self, key) + + @property + def formula_terms(self) -> Dict[str, Dict[str, str]]: + """ + Property that returns a dictionary + {parametric_coord_name: {standard_term_name: variable_name}} + """ + return { + dim: self._obj[dim].cf.formula_terms for dim in _get_dims(self._obj, "Z") + } + + @property + def bounds(self) -> Dict[str, List[str]]: + """ + Property that returns a dictionary mapping valid keys + to the variable names of their bounds. + + Returns + ------- + Dictionary mapping valid keys to the variable names of their bounds. + """ + + obj = self._obj + keys = self.keys() | set(obj.variables) + + vardict = { + key: apply_mapper(_get_bounds, obj, key, error=False) for key in keys + } + + return {k: sorted(v) for k, v in vardict.items() if v} + def get_bounds(self, key: str) -> DataArray: """ Get bounds variable corresponding to key. Parameters ---------- - key: str + key : str Name of variable whose bounds are desired Returns ------- DataArray """ - name = apply_mapper( - _get_axis_coord_single, self._obj, key, error=False, default=[key] - )[0] - bounds = self._obj[name].attrs["bounds"] - obj = self._maybe_to_dataset() - return obj[bounds] - def add_bounds(self, dims: Union[Hashable, Iterable[Hashable]]): + return apply_mapper(_variables(_single(_get_bounds)), self._obj, key)[0] + + def get_bounds_dim_name(self, key: str) -> str: + """ + Get bounds dim name for variable corresponding to key. + + Parameters + ---------- + key : str + Name of variable whose bounds dimension name is desired. + + Returns + ------- + str + """ + crd = self[key] + bounds = self.get_bounds(key) + bounds_dims = set(bounds.dims) - set(crd.dims) + assert len(bounds_dims) == 1 + bounds_dim = bounds_dims.pop() + assert self._obj.sizes[bounds_dim] in [2, 4] + return bounds_dim + + def add_bounds(self, keys: Union[str, Iterable[str]]): """ Returns a new object with bounds variables. The bounds values are guessed assuming equal spacing on either side of a coordinate label. Parameters ---------- - dims: Hashable or Iterable[Hashable] - Either a single dimension name or a list of dimension names. + keys : str or Iterable[str] + Either a single key or a list of keys corresponding to dimensions. Returns ------- DataArray or Dataset with bounds variables added and appropriate "bounds" attribute set. + Raises + ------ + KeyError + Notes ----- - The bounds variables are automatically named f"{dim}_bounds" where ``dim`` is a dimension name. """ - if isinstance(dims, Hashable): - dimensions = (dims,) - else: - dimensions = dims + if isinstance(keys, str): + keys = [keys] - bad_dims: Set[Hashable] = set(dimensions) - set(self._obj.dims) + dimensions = set() + for key in keys: + dimensions.update( + apply_mapper(_get_dims, self._obj, key, error=False, default=[key]) + ) + + bad_dims: Set[str] = dimensions - set(self._obj.dims) if bad_dims: raise ValueError( f"{bad_dims!r} are not dimensions in the underlying object." @@ -1407,7 +1758,7 @@ def decode_vertical_coords(self, prefix="z"): Parameters ---------- - prefix: str, optional + prefix : str, optional Prefix for newly created z variables. E.g. ``s_rho`` becomes ``z_rho`` @@ -1417,7 +1768,6 @@ def decode_vertical_coords(self, prefix="z"): Notes ----- - Will only decode when the ``formula_terms`` and ``standard_name`` attributes are set on the parameter (e.g ``s_rho`` ) @@ -1426,36 +1776,29 @@ def decode_vertical_coords(self, prefix="z"): .. warning:: Very lightly tested. Please double check the results. """ - import re - ds = self._obj - dims = _get_axis_coord(ds, "Z") requirements = { "ocean_s_coordinate_g1": {"depth_c", "depth", "s", "C", "eta"}, "ocean_s_coordinate_g2": {"depth_c", "depth", "s", "C", "eta"}, } - for dim in dims: + allterms = self.formula_terms + for dim in allterms: suffix = dim.split("_") zname = f"{prefix}_" + "_".join(suffix[1:]) - if ( - "formula_terms" not in ds[dim].attrs - or "standard_name" not in ds[dim].attrs - ): + if "standard_name" not in ds[dim].attrs: continue - - formula_terms = ds[dim].attrs["formula_terms"] stdname = ds[dim].attrs["standard_name"] # map "standard" formula term names to actual variable names terms = {} - for mapping in re.sub(": ", ":", formula_terms).split(" "): - key, value = mapping.split(":") + for key, value in allterms[dim].items(): if value not in ds: raise KeyError( - f"Variable {value!r} is required to decode coordinate for {dim} but it is absent in the Dataset." + f"Variable {value!r} is required to decode coordinate for {dim!r}" + " but it is absent in the Dataset." ) terms[key] = ds[value] @@ -1483,10 +1826,65 @@ def decode_vertical_coords(self, prefix="z"): else: raise NotImplementedError( - f"Coordinate function for {stdname} not implemented yet. Contributions welcome!" + f"Coordinate function for {stdname!r} not implemented yet. Contributions welcome!" ) @xr.register_dataarray_accessor("cf") class CFDataArrayAccessor(CFAccessor): + @property + def formula_terms(self) -> Dict[str, str]: + """ + Property that returns a dictionary + {parametric_coord_name: {standard_term_name: variable_name}} + """ + da = self._obj + if "formula_terms" not in da.attrs: + var = da[_single(_get_dims)(da, "Z")[0]] + else: + var = da + terms = {} + formula_terms = var.attrs.get("formula_terms", "") + for mapping in re.sub(r"\s*:\s*", ":", formula_terms).split(): + key, value = mapping.split(":") + terms[key] = value + return terms + + def __getitem__(self, key: Union[str, List[str]]) -> DataArray: + """ + Index into a DataArray making use of CF attributes. + + Parameters + ---------- + key: str, Iterable[str], optional + One of + - axes names: "X", "Y", "Z", "T" + - coordinate names: "longitude", "latitude", "vertical", "time" + - cell measures: "area", "volume", or other names present in the \ + ``cell_measures`` attribute + - standard names: names present in ``standard_name`` attribute of \ + coordinate variables + + Returns + ------- + DataArray + + Raises + ------ + KeyError + ``DataArray.cf[List[str]]`` will raise KeyError. + + Notes + ----- + Associated CF variables will be attached as coordinate variables + by parsing attributes such as ``cell_measures``, ``coordinates`` etc. + """ + + if not isinstance(key, str): + raise KeyError( + f"Cannot use a list of keys with DataArrays. Expected a single string. Received {key!r} instead." + ) + + return _getitem(self, key) + pass diff --git a/cf_xarray/criteria.py b/cf_xarray/criteria.py new file mode 100644 index 00000000..9fbd7e40 --- /dev/null +++ b/cf_xarray/criteria.py @@ -0,0 +1,94 @@ +""" +Criteria for identifying axes and coordinate variables. +Reused with modification from MetPy under the terms of the BSD 3-Clause License. +Copyright (c) 2017 MetPy Developers. +""" + + +import copy +import re +from typing import MutableMapping, Tuple + +coordinate_criteria: MutableMapping[str, MutableMapping[str, Tuple]] = { + "standard_name": { + "X": ("projection_x_coordinate",), + "Y": ("projection_y_coordinate",), + "T": ("time",), + "time": ("time",), + "vertical": ( + "air_pressure", + "height", + "depth", + "geopotential_height", + # computed dimensional coordinate name + "altitude", + "height_above_geopotential_datum", + "height_above_reference_ellipsoid", + "height_above_mean_sea_level", + ), + "Z": ( + "model_level_number", + "atmosphere_ln_pressure_coordinate", + "atmosphere_sigma_coordinate", + "atmosphere_hybrid_sigma_pressure_coordinate", + "atmosphere_hybrid_height_coordinate", + "atmosphere_sleve_coordinate", + "ocean_sigma_coordinate", + "ocean_s_coordinate", + "ocean_s_coordinate_g1", + "ocean_s_coordinate_g2", + "ocean_sigma_z_coordinate", + "ocean_double_sigma_coordinate", + ), + "latitude": ("latitude",), + "longitude": ("longitude",), + }, + "_CoordinateAxisType": { + "T": ("Time",), + "Z": ("GeoZ", "Height", "Pressure"), + "Y": ("GeoY",), + "latitude": ("Lat",), + "X": ("GeoX",), + "longitude": ("Lon",), + }, + "axis": {"T": ("T",), "Z": ("Z",), "Y": ("Y",), "X": ("X",)}, + "cartesian_axis": {"T": ("T",), "Z": ("Z",), "Y": ("Y",), "X": ("X",)}, + "positive": {"vertical": ("up", "down")}, + "units": { + "latitude": ( + "degree_north", + "degree_N", + "degreeN", + "degrees_north", + "degrees_N", + "degreesN", + ), + "longitude": ( + "degree_east", + "degree_E", + "degreeE", + "degrees_east", + "degrees_E", + "degreesE", + ), + }, +} + +# "long_name" and "standard_name" criteria are the same. For convenience. +coordinate_criteria["long_name"] = copy.deepcopy(coordinate_criteria["standard_name"]) +coordinate_criteria["long_name"]["X"] += ("cell index along first dimension",) +coordinate_criteria["long_name"]["Y"] += ("cell index along second dimension",) + +#: regular expressions for guess_coord_axis +regex = { + "time": re.compile("\\bt\\b|(time|min|hour|day|week|month|year)[0-9]*"), + "Z": re.compile( + "(z|nav_lev|gdep|lv_|[o]*lev|bottom_top|sigma|h(ei)?ght|altitude|depth|" + "isobaric|pres|isotherm)[a-z_]*[0-9]*" + ), + "Y": re.compile("y|j|nlat|nj"), + "latitude": re.compile("y?(nav_lat|lat|gphi)[a-z0-9]*"), + "X": re.compile("x|i|nlon|ni"), + "longitude": re.compile("x?(nav_lon|lon|glam)[a-z0-9]*"), +} +regex["T"] = regex["time"] diff --git a/cf_xarray/tests/datasets.py b/cf_xarray/datasets.py similarity index 61% rename from cf_xarray/tests/datasets.py rename to cf_xarray/datasets.py index bc8f46c0..de593471 100644 --- a/cf_xarray/tests/datasets.py +++ b/cf_xarray/datasets.py @@ -70,7 +70,6 @@ anc["q_detection_limit"] = xr.DataArray( 1e-3, attrs=dict(standard_name="specific_humidity detection_minimum", units="g/g") ) -anc multiple = xr.Dataset() @@ -122,7 +121,7 @@ romsds["temp"] = ( ("ocean_time", "s_rho"), [np.linspace(20, 30, 30)] * 2, - {"coordinates": "z_rho_dummy"}, + {"coordinates": "z_rho_dummy", "standard_name": "sea_water_potential_temperature"}, ) romsds["temp"].encoding["coordinates"] = "s_rho" romsds.coords["z_rho_dummy"] = ( @@ -188,3 +187,95 @@ lat_vertices=xr.DataArray(lat_vertices, dims=("x_vertices", "y_vertices")), ), ) + +forecast = xr.decode_cf( + xr.Dataset.from_dict( + { + "coords": { + "L": { + "dims": ("L",), + "attrs": { + "long_name": "Lead", + "standard_name": "forecast_period", + "pointwidth": 1.0, + "gridtype": 0, + "units": "months", + }, + "data": [0, 1], + }, + "M": { + "dims": ("M",), + "attrs": { + "standard_name": "realization", + "long_name": "Ensemble Member", + "pointwidth": 1.0, + "gridtype": 0, + "units": "unitless", + }, + "data": [0, 1, 2], + }, + "S": { + "dims": ("S",), + "attrs": { + "calendar": "360_day", + "long_name": "Forecast Start Time", + "standard_name": "forecast_reference_time", + "pointwidth": 0, + "gridtype": 0, + "units": "months since 1960-01-01", + }, + "data": [0, 1, 2, 3], + }, + "X": { + "dims": ("X",), + "attrs": { + "standard_name": "longitude", + "pointwidth": 1.0, + "gridtype": 1, + "units": "degree_east", + }, + "data": [0, 1, 2, 3, 4], + }, + "Y": { + "dims": ("Y",), + "attrs": { + "standard_name": "latitude", + "pointwidth": 1.0, + "gridtype": 0, + "units": "degree_north", + }, + "data": [0, 1, 2, 3, 4, 5], + }, + }, + "attrs": {"Conventions": "IRIDL"}, + "dims": {"L": 2, "M": 3, "S": 4, "X": 5, "Y": 6}, + "data_vars": { + "sst": { + "dims": ("S", "L", "M", "Y", "X"), + "attrs": { + "pointwidth": 0, + "PDS_TimeRange": 3, + "center": "US Weather Service - National Met. Center", + "grib_name": "TMP", + "gribNumBits": 21, + "gribcenter": 7, + "gribparam": 11, + "gribleveltype": 1, + "GRIBgridcode": 3, + "process": 'Spectral Statistical Interpolation (SSI) analysis from "Final" run.', + "PTVersion": 2, + "gribfield": 1, + "units": "Celsius_scale", + "scale_min": -69.97389221191406, + "scale_max": 43.039306640625, + "long_name": "Sea Surface Temperature", + "standard_name": "sea_surface_temperature", + }, + "data": np.arange(np.prod((4, 2, 3, 6, 5))).reshape( + (4, 2, 3, 6, 5) + ), + } + }, + } + ) +) diff --git a/cf_xarray/helpers.py b/cf_xarray/helpers.py index ae00afcc..33304523 100644 --- a/cf_xarray/helpers.py +++ b/cf_xarray/helpers.py @@ -17,7 +17,7 @@ def bounds_to_vertices( Parameters ---------- - bounds: DataArray + bounds : DataArray The bounds to convert. Must be of shape (N, 2) or (N, M, 4). bounds_dim : str The name of the bounds dimension of `bounds` (the one of length 2 or 4). @@ -93,7 +93,7 @@ def vertices_to_bounds( Parameters ---------- - bounds: DataArray + bounds : DataArray The bounds to convert. Must be of shape (N, 2) or (N, M, 4). out_dims : Sequence[str], The name of the dimension in the output. The first is the 'bounds' diff --git a/cf_xarray/scripts/make_doc.py b/cf_xarray/scripts/make_doc.py new file mode 100644 index 00000000..256007c8 --- /dev/null +++ b/cf_xarray/scripts/make_doc.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python + +import os + +from pandas import DataFrame + +from cf_xarray.accessor import _AXIS_NAMES, _COORD_NAMES +from cf_xarray.criteria import coordinate_criteria, regex + + +def main(): + """ + Make all additional files needed to build the documentations. + """ + + make_criteria_csv() + make_regex_csv() + + +def make_criteria_csv(): + """ + Make criteria tables: + _build/csv/{all,axes,coords}_criteria.csv + """ + + csv_dir = "_build/csv" + os.makedirs(csv_dir, exist_ok=True) + + # Criteria tables + df = DataFrame.from_dict(coordinate_criteria) + df = df.dropna(1, how="all") + df = df.applymap(lambda x: ", ".join(sorted(x)) if isinstance(x, tuple) else x) + df = df.sort_index(0).sort_index(1) + + # All criteria + df.to_csv(os.path.join(csv_dir, "all_criteria.csv")) + + # Axes and coordinates + for keys, name in zip([_AXIS_NAMES, _COORD_NAMES], ["axes", "coords"]): + subdf = df.loc[sorted(keys)].dropna(1, how="all") + subdf = subdf.dropna(1, how="all").transpose() + subdf.to_csv(os.path.join(csv_dir, f"{name}_criteria.csv")) + + +def make_regex_csv(): + """ + Make regex tables: + _build/csv/all_regex.csv + """ + + csv_dir = "_build/csv" + os.makedirs(csv_dir, exist_ok=True) + df = DataFrame(regex, index=[0]) + df = df.applymap(lambda x: f"``{x}``") + df = df.sort_index(1).transpose() + df.to_csv(os.path.join(csv_dir, "all_regex.csv"), header=False) + + +if __name__ == "__main__": + main() diff --git a/cf_xarray/tests/test_accessor.py b/cf_xarray/tests/test_accessor.py index 41fd13f6..5da8548f 100644 --- a/cf_xarray/tests/test_accessor.py +++ b/cf_xarray/tests/test_accessor.py @@ -1,15 +1,28 @@ +import itertools +from textwrap import dedent + import matplotlib as mpl import numpy as np import pandas as pd import pytest import xarray as xr from matplotlib import pyplot as plt +from xarray import Dataset from xarray.testing import assert_allclose, assert_identical import cf_xarray # noqa +from ..datasets import ( + airds, + anc, + ds_no_attrs, + forecast, + mollwds, + multiple, + popds, + romsds, +) from . import raise_if_dask_computes -from .datasets import airds, anc, ds_no_attrs, multiple, popds mpl.use("Agg") @@ -19,17 +32,99 @@ objects = datasets + dataarrays -def test_describe(capsys): - airds.cf.describe() - actual = capsys.readouterr().out - expected = ( - "Axes:\n\tX: ['lon']\n\tY: ['lat']\n\tZ: []\n\tT: ['time']\n" - "\nCoordinates:\n\tlongitude: ['lon']\n\tlatitude: ['lat']" - "\n\tvertical: []\n\ttime: ['time']\n" - "\nCell Measures:\n\tarea: ['cell_area']\n\tvolume: []\n" - "\nStandard Names:\n\tair_temperature: ['air']\n\n" - ) - assert actual == expected +def assert_dicts_identical(dict1, dict2): + assert dict1.keys() == dict2.keys() + for k in dict1: + assert_identical(dict1[k], dict2[k]) + + +def test_repr(): + # Dataset. + # Stars: axes, coords, and std names + actual = airds.cf.__repr__() + expected = """\ + Coordinates: + - CF Axes: * X: ['lon'] + * Y: ['lat'] + * T: ['time'] + Z: n/a + + - CF Coordinates: * longitude: ['lon'] + * latitude: ['lat'] + * time: ['time'] + vertical: n/a + + - Cell Measures: area: ['cell_area'] + volume: n/a + + - Standard Names: * latitude: ['lat'] + * longitude: ['lon'] + * time: ['time'] + + - Bounds: n/a + + Data Variables: + - Cell Measures: area, volume: n/a + + - Standard Names: air_temperature: ['air'] + + - Bounds: n/a + """ + assert actual == dedent(expected) + + # DataArray (Coordinates section same as Dataset) + assert airds.cf.__repr__().startswith(airds["air"].cf.__repr__()) + actual = airds["air"].cf.__repr__() + expected = """\ + Coordinates: + - CF Axes: * X: ['lon'] + * Y: ['lat'] + * T: ['time'] + Z: n/a + + - CF Coordinates: * longitude: ['lon'] + * latitude: ['lat'] + * time: ['time'] + vertical: n/a + + - Cell Measures: area: ['cell_area'] + volume: n/a + + - Standard Names: * latitude: ['lat'] + * longitude: ['lon'] + * time: ['time'] + + - Bounds: n/a + """ + assert actual == dedent(expected) + + # Empty Standard Names + actual = popds.cf.__repr__() + expected = """\ + Coordinates: + - CF Axes: * X: ['nlon'] + * Y: ['nlat'] + Z, T: n/a + + - CF Coordinates: longitude: ['TLONG', 'ULONG'] + latitude: ['TLAT', 'ULAT'] + vertical, time: n/a + + - Cell Measures: area, volume: n/a + + - Standard Names: n/a + + - Bounds: n/a + + Data Variables: + - Cell Measures: area, volume: n/a + + - Standard Names: sea_water_potential_temperature: ['TEMP'] + sea_water_x_velocity: ['UVEL'] + + - Bounds: n/a + """ + assert actual == dedent(expected) def test_axes(): @@ -52,7 +147,7 @@ def test_coordinates(): assert actual == expected -def test_cell_measures(capsys): +def test_cell_measures(): ds = airds.copy(deep=True) ds["foo"] = xr.DataArray(ds["cell_area"], attrs=dict(standard_name="foo_std_name")) ds["air"].attrs["cell_measures"] += " foo_measure: foo" @@ -68,13 +163,20 @@ def test_cell_measures(capsys): actual = ds.cf.cell_measures assert actual == expected - ds.cf.describe() - actual = capsys.readouterr().out - expected = ( - "\nCell Measures:\n\tarea: ['cell_area']\n\tfoo_measure: ['foo']\n\tvolume: ['foo']\n" - "\nStandard Names:\n\tair_temperature: ['air']\n\tfoo_std_name: ['foo']\n\n" - ) - assert actual.endswith(expected) + # Additional cell measure in repr + actual = ds.cf.__repr__() + expected = """\ + Data Variables: + - Cell Measures: foo_measure: ['foo'] + volume: ['foo'] + area: n/a + + - Standard Names: air_temperature: ['air'] + foo_std_name: ['foo'] + + - Bounds: n/a + """ + assert actual.endswith(dedent(expected)) def test_standard_names(): @@ -95,9 +197,13 @@ def test_getitem_standard_name(): expected = airds["air"] assert_identical(actual, expected) + actual = airds.lat.cf["latitude"] + expected = airds["lat"] + assert_identical(actual, expected) + ds = airds.copy(deep=True) ds["air2"] = ds.air - with pytest.raises(ValueError): + with pytest.raises(KeyError): ds.cf["air_temperature"] actual = ds.cf[["air_temperature"]] expected = ds[["air", "air2"]] @@ -125,19 +231,60 @@ def test_getitem_ancillary_variables(): def test_rename_like(): original = popds.copy(deep=True) - with pytest.raises(KeyError): - popds.cf.rename_like(airds) + # it'll match for axis: X (lon, nlon) and coordinate="longitude" (lon, TLONG) + # so delete the axis attributes + newair = airds.copy(deep=True) + del newair.lon.attrs["axis"] + del newair.lat.attrs["axis"] - renamed = popds.cf["TEMP"].cf.rename_like(airds) + renamed = popds.cf["TEMP"].cf.rename_like(newair) for k in ["TLONG", "TLAT"]: assert k not in renamed.coords assert k in original.coords - assert original.TEMP.attrs["coordinates"] == "TLONG TLAT" + assert original.TEMP.attrs["coordinates"] == "TLONG TLAT" assert "lon" in renamed.coords assert "lat" in renamed.coords assert renamed.attrs["coordinates"] == "lon lat" + # standard name matching + newroms = romsds.expand_dims(latitude=[1], longitude=[1]).cf.guess_coord_axis() + renamed = popds.cf["UVEL"].cf.rename_like(newroms) + assert renamed.attrs["coordinates"] == "longitude latitude" + assert "longitude" in renamed.coords + assert "latitude" in renamed.coords + assert "ULON" not in renamed.coords + assert "ULAT" not in renamed.coords + + # should change "temp" to "TEMP" + renamed = romsds.cf.rename_like(popds) + assert "temp" not in renamed + assert "TEMP" in renamed + + # skip conflicting variables + da = popds.cf["TEMP"] + with pytest.warns(UserWarning, match="Conflicting variables skipped:.*"): + expected = {"longitude": ["TLONG"], "latitude": ["TLAT"]} + actual = da.cf.rename_like(airds).cf.coordinates + assert expected == actual + expected = {"longitude": ["lon"], "latitude": ["lat"]} + actual = da.cf.rename_like(airds, skip="axes").cf.coordinates + assert expected == actual + + # rename bounds + original = airds.cf[["air"]].cf.add_bounds("lon") + other = popds.cf[["TEMP"]].cf.add_bounds("nlon") + renamed = original.cf.rename_like(other, skip="coordinates") + assert renamed.cf.bounds["nlon"] == ["nlon_bounds"] + + # rename cell measures + other = airds.cf["air"].cf.rename(area="CELL_AREA") + other.attrs["cell_measures"] = other.attrs["cell_measures"].replace( + "cell_area", "CELL_AREA" + ) + renamed = airds.cf["air"].cf.rename_like(other) + assert renamed.cf.cell_measures["area"] == ["CELL_AREA"] + @pytest.mark.parametrize("obj", objects) @pytest.mark.parametrize( @@ -148,15 +295,7 @@ def test_rename_like(): ("groupby", {"group": "time"}, {"group": "T"}), ("groupby", {"group": "time.month"}, {"group": "T.month"}), ("groupby_bins", {"group": "lat", "bins": 5}, {"group": "latitude", "bins": 5}), - pytest.param( - "coarsen", - {"lon": 2, "lat": 5}, - {"X": 2, "Y": 5}, - marks=pytest.mark.skip( - reason="xarray GH4120. any test after this will fail since attrs are lost" - ), - ), - # groupby("time.day")? + ("coarsen", {"lon": 2, "lat": 5}, {"X": 2, "Y": 5}), ), ) def test_wrapped_classes(obj, attr, xrkwargs, cfkwargs): @@ -192,8 +331,13 @@ def test_weighted(obj): with raise_if_dask_computes(max_computes=2): # weights are checked for nans expected = obj.weighted(obj["cell_area"]).sum("lat") - actual = obj.cf.weighted("area").sum("Y") - assert_identical(expected, actual) + actuals = [ + obj.cf.weighted("area").sum("Y"), + obj.cf.weighted(obj["cell_area"]).sum("Y"), + obj.cf.weighted(weights=obj["cell_area"]).sum("Y"), + ] + for actual in actuals: + assert_identical(expected, actual) @pytest.mark.parametrize("obj", objects) @@ -280,18 +424,26 @@ def test_dataarray_getitem(): with pytest.raises(KeyError): air.cf[["longitude"]] with pytest.raises(KeyError): - air.cf[["longitude", "latitude"]], + air.cf[["longitude", "latitude"]] + air["cell_area"].attrs["standard_name"] = "area_grid_cell" + assert_identical(air.cf["area_grid_cell"], air.cell_area.reset_coords(drop=True)) -@pytest.mark.parametrize("obj", dataarrays) -def test_dataarray_plot(obj): - rv = obj.isel(time=1).cf.plot(x="X", y="Y") +def test_dataarray_plot(): + + obj = airds.air + + rv = obj.isel(time=1).transpose("lon", "lat").cf.plot() assert isinstance(rv, mpl.collections.QuadMesh) + assert all(v > 180 for v in rv.axes.get_xlim()) + assert all(v < 200 for v in rv.axes.get_ylim()) plt.close() - rv = obj.isel(time=1).cf.plot.contourf(x="X", y="Y") + rv = obj.isel(time=1).transpose("lon", "lat").cf.plot.contourf() assert isinstance(rv, mpl.contour.QuadContourSet) + assert all(v > 180 for v in rv.axes.get_xlim()) + assert all(v < 200 for v in rv.axes.get_ylim()) plt.close() rv = obj.cf.plot(x="X", y="Y", col="T") @@ -301,7 +453,34 @@ def test_dataarray_plot(obj): plt.close() rv = obj.isel(lat=[0, 1], lon=1).cf.plot.line(x="T", hue="Y") - assert all([isinstance(line, mpl.lines.Line2D) for line in rv]) + assert all(isinstance(line, mpl.lines.Line2D) for line in rv) + plt.close() + + # set y automatically + rv = obj.isel(time=0, lon=1).cf.plot.line() + np.testing.assert_equal(rv[0].get_ydata(), obj.lat.data) + plt.close() + + # don't set y automatically + rv = obj.isel(time=0, lon=1).cf.plot.line(x="lat") + np.testing.assert_equal(rv[0].get_xdata(), obj.lat.data) + plt.close() + + rv = obj.isel(time=0, lon=1).cf.plot(x="lat") + np.testing.assert_equal(rv[0].get_xdata(), obj.lat.data) + plt.close() + + # various line plots and automatic guessing + rv = obj.cf.isel(T=1, Y=[0, 1, 2]).cf.plot.line() + np.testing.assert_equal(rv[0].get_xdata(), obj.lon.data) + plt.close() + + # rv = obj.cf.isel(T=1, Y=[0, 1, 2]).cf.plot(hue="Y") + # np.testing.assert_equal(rv[0].get_xdata(), obj.lon.data) + # plt.close() + + rv = obj.cf.isel(T=1, Y=[0, 1, 2]).cf.plot.line() + np.testing.assert_equal(rv[0].get_xdata(), obj.lon.data) plt.close() obj = obj.copy(deep=True) @@ -324,18 +503,14 @@ def test_dataset_plot(obj): ("longitude", "lon"), ("latitude", "lat"), ("time", "time"), - pytest.param( - "area", - "cell_area", - marks=pytest.mark.xfail(reason="measures not implemented for dataset"), - ), + ("area", "cell_area"), ), ) def test_getitem(obj, key, expected_key): assert key in obj.cf actual = obj.cf[key] - expected = obj[expected_key] + expected = obj[expected_key].reset_coords(drop=True) assert_identical(actual, expected) @@ -349,11 +524,51 @@ def test_getitem_errors(obj): obj2.cf["X"] -def test_getitem_regression(): +def test_getitem_ignores_bad_measure_attribute(): + air2 = airds.copy(deep=True) + air2.air.attrs["cell_measures"] = "asd" + with pytest.warns(UserWarning): + assert_identical(air2.air.drop_vars("cell_area"), air2.cf["air"]) + + with pytest.raises(ValueError): + air2.cf.cell_measures + with pytest.raises(ValueError): + air2.air.cf.cell_measures + with pytest.raises(ValueError): + air2.cf.get_associated_variable_names("air", error=True) + with pytest.warns(UserWarning): + air2.cf.get_associated_variable_names("air", error=False) + + +def test_getitem_clash_standard_name(): ds = xr.Dataset() ds.coords["area"] = xr.DataArray(np.ones(10), attrs={"standard_name": "cell_area"}) assert_identical(ds.cf["cell_area"], ds["area"].reset_coords(drop=True)) + ds = xr.Dataset() + ds["time"] = ( + "time", + np.arange(10), + {"standard_name": "time", "bounds": "time_bounds"}, + ) + ds["time_bounds"] = ( + ("time", "bounds"), + np.ones((10, 2)), + {"standard_name": "time"}, + ) + + ds["lat"] = ( + "lat", + np.arange(10), + {"units": "degrees_north", "bounds": "lat_bounds"}, + ) + ds["lat_bounds"] = ( + ("lat", "bounds"), + np.ones((10, 2)), + {"units": "degrees_north"}, + ) + assert_identical(ds["lat"], ds.cf["latitude"]) + def test_getitem_uses_coordinates(): # POP-like dataset @@ -384,7 +599,7 @@ def test_plot_xincrease_yincrease(): ds.lon.attrs["positive"] = "down" ds.lat.attrs["positive"] = "down" - f, ax = plt.subplots(1, 1) + _, ax = plt.subplots(1, 1) ds.air.isel(time=1).cf.plot(ax=ax, x="X", y="Y") for lim in [ax.get_xlim(), ax.get_ylim()]: @@ -394,7 +609,7 @@ def test_plot_xincrease_yincrease(): @pytest.mark.parametrize("dims", ["lat", "time", ["lat", "lon"]]) @pytest.mark.parametrize("obj", [airds]) def test_add_bounds(obj, dims): - expected = dict() + expected = {} expected["lat"] = xr.concat( [ obj.lat.copy(data=np.arange(76.25, 16.0, -2.5)), @@ -434,9 +649,18 @@ def test_add_bounds(obj, dims): assert added[dim].attrs["bounds"] == name assert_allclose(added[name].reset_coords(drop=True), expected[dim]) + # Test multiple dimensions + assert not {"x1_bounds", "x2_bounds"} <= set(multiple.variables) + assert {"x1_bounds", "x2_bounds"} <= set(multiple.cf.add_bounds("X").variables) + def test_bounds(): ds = airds.copy(deep=True).cf.add_bounds("lat") + + actual = ds.cf.bounds + expected = {"Y": ["lat_bounds"], "lat": ["lat_bounds"], "latitude": ["lat_bounds"]} + assert ds.cf.bounds == expected + actual = ds.cf[["lat"]] expected = ds[["lat", "lat_bounds"]] assert_identical(actual, expected) @@ -454,6 +678,28 @@ def test_bounds(): expected = ds["lat_bounds"] assert_identical(actual, expected) + # Do not attempt to get bounds when extracting a DataArray + # raise a warning when extracting a Dataset and bounds do not exists + ds["time"].attrs["bounds"] = "foo" + with pytest.warns(None) as record: + ds.cf["air"] + assert len(record) == 0 + with pytest.warns(UserWarning, match="{'foo'} not found in object"): + ds.cf[["air"]] + + # Dataset has bounds + expected = """\ + - Bounds: Y: ['lat_bounds'] + lat: ['lat_bounds'] + latitude: ['lat_bounds'] + """ + assert dedent(expected) in ds.cf.__repr__() + + # DataArray does not have bounds + expected = airds.cf["air"].cf.__repr__() + actual = ds.cf["air"].cf.__repr__() + assert actual == expected + def test_bounds_to_vertices(): # All available @@ -481,34 +727,115 @@ def test_bounds_to_vertices(): assert "time_bounds" in dsc +def test_get_bounds_dim_name(): + ds = airds.copy(deep=True).cf.add_bounds("lat") + assert ds.cf.get_bounds_dim_name("latitude") == "bounds" + assert ds.cf.get_bounds_dim_name("lat") == "bounds" + + assert mollwds.cf.get_bounds_dim_name("longitude") == "bounds" + assert mollwds.cf.get_bounds_dim_name("lon") == "bounds" + + def test_docstring(): assert "One of ('X'" in airds.cf.groupby.__doc__ + assert "Time variable accessor e.g. 'T.month'" in airds.cf.groupby.__doc__ assert "One or more of ('X'" in airds.cf.mean.__doc__ + assert "present in .dims" in airds.cf.drop_dims.__doc__ + assert "present in .coords" in airds.cf.integrate.__doc__ + assert "present in .indexes" in airds.cf.resample.__doc__ + + # Make sure docs are up to date + get_all_doc = cf_xarray.accessor._get_all.__doc__ + all_keys = ( + cf_xarray.accessor._AXIS_NAMES + + cf_xarray.accessor._COORD_NAMES + + cf_xarray.accessor._CELL_MEASURES + ) + expected = f"One or more of {all_keys!r}, or arbitrary measures, or standard names" + assert get_all_doc.split() == expected.split() + for name in ["dims", "indexes", "coords"]: + actual = getattr(cf_xarray.accessor, f"_get_{name}").__doc__ + expected = get_all_doc + f" present in .{name}" + assert actual.split() == expected.split() + + +def _make_names(prefixes): + suffixes = ["", "a", "_a", "0", "_0", "a_0a"] + return [ + f"{prefix}{suffix}" for prefix, suffix in itertools.product(prefixes, suffixes) + ] + + +_TIME_NAMES = ["t"] + _make_names( + [ + "time", + "min", + "hour", + "day", + "week", + "month", + "year", + ] +) +_VERTICAL_NAMES = _make_names( + [ + "z", + "lv_1", + "bottom_top", + "sigma", + "sigma_w", + "hght", + "height", + "altitude", + "depth", + "isobaric", + "pressure", + "isotherm", + "gdep", + "nav_lev", + ] +) +_X_NAMES = _make_names(["x", "nlon", "i", "ni"]) +_Y_NAMES = _make_names(["y", "nlat", "j", "nj"]) +_Z_NAMES = _VERTICAL_NAMES + ["olevel", "level", "zlevel"] +_LATITUDE_NAMES = _make_names(["lat", "latitude", "gphi", "nav_lat"]) +_LONGITUDE_NAMES = _make_names(["lon", "longitude", "glam", "nav_lon"]) + + +@pytest.mark.parametrize( + "kind, names", + [ + ["X", _X_NAMES], + ["Y", _Y_NAMES], + ["Z", _Z_NAMES], + ["T", _TIME_NAMES], + ["latitude", _LATITUDE_NAMES], + ["longitude", _LONGITUDE_NAMES], + ], +) +def test_guess_coord_axis(kind, names): + from cf_xarray.accessor import ATTRS + for varname in names: + ds = xr.Dataset() + ds[varname] = (varname, [1, 2, 3, 4, 5]) + dsnew = ds.cf.guess_coord_axis() + assert dsnew[varname].attrs == ATTRS[kind] -def test_guess_coord_axis(): + varname = varname.upper() + ds[varname] = (varname, [1, 2, 3, 4, 5]) + dsnew = ds.cf.guess_coord_axis() + assert dsnew[varname].attrs == ATTRS[kind] + + +def test_guess_coord_axis_datetime(): ds = xr.Dataset() ds["time"] = ("time", pd.date_range("2001-01-01", "2001-04-01")) - ds["lon_rho"] = ("lon_rho", [1, 2, 3, 4, 5]) - ds["lat_rho"] = ("lat_rho", [1, 2, 3, 4, 5]) - ds["x1"] = ("x1", [1, 2, 3, 4, 5]) - ds["y1"] = ("y1", [1, 2, 3, 4, 5]) - dsnew = ds.cf.guess_coord_axis() assert dsnew.time.attrs == {"standard_name": "time", "axis": "T"} - assert dsnew.lon_rho.attrs == { - "standard_name": "longitude", - "units": "degrees_east", - } - assert dsnew.lat_rho.attrs == { - "standard_name": "latitude", - "units": "degrees_north", - } - assert dsnew.x1.attrs == {"axis": "X"} - assert dsnew.y1.attrs == {"axis": "Y"} -def test_dicts(): +def test_attributes(): actual = airds.cf.sizes expected = {"X": 50, "Y": 25, "T": 4, "longitude": 50, "latitude": 25, "time": 4} assert actual == expected @@ -539,6 +866,37 @@ def test_dicts(): expected = {"lon": 50, "Y": 25, "T": 4, "latitude": 25, "time": 4} assert actual == expected + actual = popds.cf.data_vars + expected = { + "sea_water_x_velocity": popds.cf["UVEL"], + "sea_water_potential_temperature": popds.cf["TEMP"], + } + assert_dicts_identical(actual, expected) + + actual = multiple.cf.data_vars + expected = dict(multiple.data_vars) + assert_dicts_identical(actual, expected) + + # check that data_vars contains ancillary variables + assert_identical(anc.cf.data_vars["specific_humidity"], anc.cf["specific_humidity"]) + + # clash between var name and "special" CF name + # Regression test for #126 + data = np.random.rand(4, 3) + times = pd.date_range("2000-01-01", periods=4) + locs = [30, 60, 90] + coords = [("time", times, {"axis": "T"}), ("space", locs)] + foo = xr.DataArray(data, coords, dims=["time", "space"]) + ds1 = xr.Dataset({"T": foo}) + assert_identical(ds1.cf.data_vars["T"], ds1["T"]) + + # multiple latitudes but only one latitude data_var + ds = popds.copy(deep=True) + for var in ["ULAT", "TLAT"]: + ds[var].attrs["standard_name"] = "latitude" + ds = ds.reset_coords("ULAT") + assert_identical(ds.cf.data_vars["latitude"], ds.cf["ULAT"]) + def test_missing_variable_in_coordinates(): airds.air.attrs["coordinates"] = "lat lon time" @@ -548,7 +906,7 @@ def test_missing_variable_in_coordinates(): def test_Z_vs_vertical_ROMS(): - from .datasets import romsds + from ..datasets import romsds assert_identical(romsds.s_rho.reset_coords(drop=True), romsds.temp.cf["Z"]) assert_identical( @@ -574,8 +932,6 @@ def test_Z_vs_vertical_ROMS(): def test_param_vcoord_ocean_s_coord(): - from .datasets import romsds - romsds.s_rho.attrs["standard_name"] = "ocean_s_coordinate_g2" Zo_rho = (romsds.hc * romsds.s_rho + romsds.Cs_r * romsds.h) / ( romsds.hc + romsds.h @@ -606,3 +962,270 @@ def test_param_vcoord_ocean_s_coord(): copy.s_rho.attrs["formula_terms"] = "s: s_rho C: Cs_r depth: h depth_c: hc" with pytest.raises(KeyError): copy.cf.decode_vertical_coords() + + +def test_formula_terms(): + srhoterms = { + "s": "s_rho", + "C": "Cs_r", + "eta": "zeta", + "depth": "h", + "depth_c": "hc", + } + assert romsds.cf.formula_terms == {"s_rho": srhoterms} + assert romsds["temp"].cf.formula_terms == srhoterms + assert romsds["s_rho"].cf.formula_terms == srhoterms + + s_rho = romsds["s_rho"].copy(deep=True) + del s_rho.attrs["standard_name"] + del s_rho.s_rho.attrs["standard_name"] # TODO: xarray bug + assert s_rho.cf.formula_terms == srhoterms + + with pytest.raises(KeyError): + # x,y,t variable + romsds["zeta"].cf.formula_terms + + +def test_standard_name_mapper(): + da = xr.DataArray( + np.arange(6), + dims="time", + coords={ + "label": ( + "time", + ["A", "B", "B", "A", "B", "C"], + {"standard_name": "standard_label"}, + ) + }, + ) + + actual = da.cf.groupby("standard_label").mean() + expected = da.cf.groupby("label").mean() + assert_identical(actual, expected) + + actual = da.cf.sortby("standard_label") + expected = da.sortby("label") + assert_identical(actual, expected) + + assert cf_xarray.accessor._get_with_standard_name(da, None) == [] + + +@pytest.mark.parametrize("obj", objects) +@pytest.mark.parametrize("attr", ["drop_vars", "set_coords"]) +def test_drop_vars_and_set_coords(obj, attr): + + # DataArray object has no attribute set_coords + if not isinstance(obj, Dataset) and attr == "set_coords": + return + + # Get attribute + expected = getattr(obj, attr) + actual = getattr(obj.cf, attr) + + # Axis + assert_identical(expected("lon"), actual("X")) + # Coordinate + assert_identical(expected("lon"), actual("longitude")) + # Cell measure + assert_identical(expected("cell_area"), actual("area")) + # Variables + if isinstance(obj, Dataset): + assert_identical(expected("air"), actual("air_temperature")) + assert_identical(expected(obj.variables), actual(obj.cf.keys())) + + +@pytest.mark.parametrize("obj", objects) +def test_drop_sel_and_reset_coords(obj): + + # Axis + assert_identical(obj.drop_sel(lat=75), obj.cf.drop_sel(Y=75)) + # Coordinate + assert_identical(obj.drop_sel(lat=75), obj.cf.drop_sel(latitude=75)) + + # Cell measure + assert_identical(obj.reset_coords("cell_area"), obj.cf.reset_coords("area")) + # Variable + if isinstance(obj, Dataset): + assert_identical( + obj.reset_coords("air"), obj.cf.reset_coords("air_temperature") + ) + + +@pytest.mark.parametrize("ds", datasets) +def test_drop_dims(ds): + + # Add data_var and coord to test _get_dims + ds["lon_var"] = ds["lon"] + ds = ds.assign_coords(lon_coord=ds["lon"]) + + # Axis and coordinate + for cf_name in ["X", "longitude"]: + assert_identical(ds.drop_dims("lon"), ds.cf.drop_dims(cf_name)) + + +@pytest.mark.parametrize("obj", objects) +def test_rename(obj): + + cf_dict = { + "air_temperature" if isinstance(obj, Dataset) else "longitude": "renamed" + } + xr_dict = {"air" if isinstance(obj, Dataset) else "lon": "renamed"} + assert_identical(obj.rename(xr_dict), obj.cf.rename(cf_dict)) + assert_identical(obj.rename(**xr_dict), obj.cf.rename(**cf_dict)) + + +@pytest.mark.parametrize("ds", datasets) +def test_differentiate(ds): + + # Add data_var and coord to test _get_coords + ds["lon_var"] = ds["lon"] + ds = ds.assign_coords(lon_coord=ds["lon"]) + + # Coordinate + assert_identical(ds.differentiate("lon"), ds.cf.differentiate("lon")) + + # Multiple coords (test error raised by _single) + with pytest.raises(KeyError, match=".*I expected only one."): + assert_identical(ds.differentiate("lon"), ds.cf.differentiate("X")) + + +def test_new_standard_name_mappers(): + assert_identical(forecast.cf.mean("realization"), forecast.mean("M")) + assert_identical( + forecast.cf.mean(["realization", "forecast_period"]), forecast.mean(["M", "L"]) + ) + assert_identical(forecast.cf.chunk({"realization": 1}), forecast.chunk({"M": 1})) + assert_identical(forecast.cf.isel({"realization": 1}), forecast.isel({"M": 1})) + assert_identical(forecast.cf.isel(**{"realization": 1}), forecast.isel(**{"M": 1})) + assert_identical( + forecast.cf.groupby("forecast_reference_time.month").mean(), + forecast.groupby("S.month").mean(), + ) + + +def test_possible_x_y_plot(): + from ..accessor import _possible_x_y_plot + + # choose axes + assert _possible_x_y_plot(airds.air.isel(time=1), "x") == "lon" + assert _possible_x_y_plot(airds.air.isel(time=1), "y") == "lat" + assert _possible_x_y_plot(airds.air.isel(lon=1), "y") == "lat" + assert _possible_x_y_plot(airds.air.isel(lon=1), "x") == "time" + + # choose coordinates over axes + assert _possible_x_y_plot(popds.UVEL, "x") == "ULONG" + assert _possible_x_y_plot(popds.UVEL, "y") == "ULAT" + assert _possible_x_y_plot(popds.TEMP, "x") == "TLONG" + assert _possible_x_y_plot(popds.TEMP, "y") == "TLAT" + + assert _possible_x_y_plot(popds.UVEL.drop_vars("ULONG"), "x") == "nlon" + + # choose X over T, Y over Z + def makeds(*dims): + coords = {dim: (dim, np.arange(3), {"axis": dim}) for dim in dims} + return xr.DataArray(np.zeros((3, 3)), dims=dims, coords=coords) + + yzds = makeds("Y", "Z") + assert _possible_x_y_plot(yzds, "y") == "Z" + assert _possible_x_y_plot(yzds, "x") is None + + xtds = makeds("X", "T") + assert _possible_x_y_plot(xtds, "y") is None + assert _possible_x_y_plot(xtds, "x") == "X" + + +def test_groupby_special_ops(): + cfgrouped = airds.cf.groupby_bins("latitude", np.arange(20, 50, 10)) + grouped = airds.groupby_bins("lat", np.arange(20, 50, 10)) + + # __iter__ + for (label, group), (cflabel, cfgroup) in zip(grouped, cfgrouped): + assert label == cflabel + assert_identical(group, cfgroup) + + # arithmetic + expected = grouped - grouped.mean() + actual = grouped - cfgrouped.mean() + assert_identical(expected, actual) + + +@pytest.mark.parametrize("obj", objects) +def test_stack(obj): + expected = obj.stack(latlon=["lat", "lon"]) + actual = obj.cf.stack(latlon=["latitude", "longitude"]) + assert_identical(expected, actual) + + actual = obj.cf.stack({"latlon": ["latitude", "longitude"]}) + assert_identical(expected, actual) + + +da = xr.DataArray( + np.arange(10)[::-1], # like ocean temperature + dims="z", + coords={"z": ("z", np.arange(10))}, + name="test", +) + + +@pytest.mark.parametrize("obj", [da, da.to_dataset()]) +def test_differentiate_positive_upward(obj): + obj.z.attrs["positive"] = "down" + expected = obj.differentiate("z", 2) + actual = obj.cf.differentiate("z", 2) + assert_identical(expected, actual) + + obj.z.attrs["positive"] = "up" + expected = obj.differentiate("z", 2) + actual = obj.cf.differentiate("z", 2, positive_upward=True) + assert_identical(expected, actual) + + obj.z.attrs["positive"] = "down" + expected = -1 * obj.differentiate("z", 2) + actual = obj.cf.differentiate("z", 2, positive_upward=True) + assert_identical(expected, actual) + + obj = obj.isel(z=slice(None, None, -1)) + expected = -1 * obj.differentiate("z", 2) + actual = obj.cf.differentiate("z", 2, positive_upward=True) + assert_identical(expected, actual) + obj = obj.isel(z=slice(None, None, -1)) + + with xr.set_options(keep_attrs=True): + da["z"] = obj.z * -1 + expected = -1 * obj.differentiate("z", 2) + actual = obj.cf.differentiate("z", 2, positive_upward=True) + assert_identical(expected, actual) + + obj = obj.isel(z=slice(None, None, -1)) + expected = -1 * obj.differentiate("z", 2) + actual = obj.cf.differentiate("z", 2, positive_upward=True) + assert_identical(expected, actual) + + del obj.z.attrs["positive"] + with pytest.raises(ValueError): + obj.cf.differentiate("z", positive_upward=True) + + obj.z.attrs["positive"] = "zzz" + with pytest.raises(ValueError): + obj.cf.differentiate("z", positive_upward=True) + + +def test_cmip6_attrs(): + da = xr.DataArray( + np.ones((10, 10)), + dims=("nlon", "nlat"), + coords={ + "nlon": ( + "nlon", + np.arange(10), + {"long_name": "cell index along first dimension"}, + ), + "nlat": ( + "nlat", + np.arange(10), + {"long_name": "cell index along second dimension"}, + ), + }, + ) + assert da.cf.axes["X"] == ["nlon"] + assert da.cf.axes["Y"] == ["nlat"] diff --git a/cf_xarray/tests/test_helpers.py b/cf_xarray/tests/test_helpers.py index 046d7b8a..3fc7c3e9 100644 --- a/cf_xarray/tests/test_helpers.py +++ b/cf_xarray/tests/test_helpers.py @@ -3,7 +3,7 @@ import cf_xarray as cfxr # noqa -from .datasets import airds, mollwds +from ..datasets import airds, mollwds try: from dask.array import Array as DaskArray diff --git a/cf_xarray/tests/test_scripts.py b/cf_xarray/tests/test_scripts.py new file mode 100644 index 00000000..73221ee0 --- /dev/null +++ b/cf_xarray/tests/test_scripts.py @@ -0,0 +1,32 @@ +import os + +from cf_xarray.scripts import make_doc + + +def remove_if_exists(paths): + paths = [paths] if isinstance(paths, str) else paths + for path in paths: + if os.path.exists(path): + os.remove(path) + + +def test_make_doc(): + + # Create/remove files from tests/, + # always return to original working directory + owd = os.getcwd() + os.chdir(os.path.dirname(__file__)) + try: + names = [ + "axes_criteria", + "coords_criteria", + "all_criteria", + "all_regex", + ] + tables_to_check = [f"_build/csv/{name}.csv" for name in names] + remove_if_exists(tables_to_check) + + make_doc.main() + assert all(os.path.exists(path) for path in tables_to_check) + finally: + os.chdir(owd) diff --git a/cf_xarray/tests/test_units.py b/cf_xarray/tests/test_units.py new file mode 100644 index 00000000..17398d4b --- /dev/null +++ b/cf_xarray/tests/test_units.py @@ -0,0 +1,60 @@ +r"""Tests the operation of cf_xarray's ported unit support code. + +Reused with modification from MetPy under the terms of the BSD 3-Clause License. +Copyright (c) 2017 MetPy Developers. +""" + +import pytest + +pytest.importorskip("pint") + +from ..units import units + + +def test_added_degrees_units(): + """Test that our added degrees units are present in the registry.""" + # Test equivalence of abbreviations/aliases to our defined names + assert str(units("degrees_N").units) == "degrees_north" + assert str(units("degreesN").units) == "degrees_north" + assert str(units("degree_north").units) == "degrees_north" + assert str(units("degree_N").units) == "degrees_north" + assert str(units("degreeN").units) == "degrees_north" + assert str(units("degrees_E").units) == "degrees_east" + assert str(units("degreesE").units) == "degrees_east" + assert str(units("degree_east").units) == "degrees_east" + assert str(units("degree_E").units) == "degrees_east" + assert str(units("degreeE").units) == "degrees_east" + + # Test equivalence of our defined units to base units + assert units("degrees_north") == units("degrees") + assert units("degrees_north").to_base_units().units == units.radian + assert units("degrees_east") == units("degrees") + assert units("degrees_east").to_base_units().units == units.radian + + +def test_gpm_unit(): + """Test that the gpm unit does alias to meters.""" + x = 1 * units("gpm") + assert str(x.units) == "meter" + + +def test_psu_unit(): + """Test that the psu unit are present in the registry.""" + x = 1 * units("psu") + assert str(x.units) == "practical_salinity_unit" + + +def test_percent_units(): + """Test that percent sign units are properly parsed and interpreted.""" + assert str(units("%").units) == "percent" + + +@pytest.mark.xfail(reason="not supported by pint, yet: hgrecco/pint#1295") +def test_udunits_power_syntax(): + """Test that UDUNITS style powers are properly parsed and interpreted.""" + assert units("m2 s-2").units == units.m ** 2 / units.s ** 2 + + +def test_udunits_power_syntax_parse_units(): + """Test that UDUNITS style powers are properly parsed and interpreted.""" + assert units.parse_units("m2 s-2") == units.m ** 2 / units.s ** 2 diff --git a/cf_xarray/units.py b/cf_xarray/units.py new file mode 100644 index 00000000..4aad7a4f --- /dev/null +++ b/cf_xarray/units.py @@ -0,0 +1,58 @@ +r"""Module to provide unit support via pint approximating UDUNITS/CF. + +Reused with modification from MetPy under the terms of the BSD 3-Clause License. +Copyright (c) 2015,2017,2019 MetPy Developers. +""" +import functools +import re +import warnings + +import pint +from pint import ( # noqa: F401 + DimensionalityError, + UndefinedUnitError, + UnitStrippedWarning, +) + +# Create registry, with preprocessors for UDUNITS-style powers (m2 s-2) and percent signs +units = pint.UnitRegistry( + autoconvert_offset_to_baseunit=True, + preprocessors=[ + functools.partial( + re.compile( + r"(?<=[A-Za-z])(?![A-Za-z])(? Mapping[Hashable, T]: - if pos_kwargs is not None: - if not is_dict_like(pos_kwargs): - raise ValueError( - "the first argument to .%s must be a dictionary" % func_name - ) - if kw_kwargs: - raise ValueError( - "cannot specify both keyword and positional " - "arguments to .%s" % func_name - ) - return pos_kwargs - else: - # Need an explicit cast to appease mypy due to invariance; see - # https://github.com/python/mypy/issues/6228 - return cast(Mapping[Hashable, T], kw_kwargs) - - -def is_dict_like(value: Any) -> bool: - return hasattr(value, "keys") and hasattr(value, "__getitem__") - - -# copied from xarray -class UncachedAccessor: - """Acts like a property, but on both classes and class instances - This class is necessary because some tools (e.g. pydoc and sphinx) - inspect classes for which property returns itself and not the - accessor. - """ +from collections import defaultdict +from typing import Any, Dict, Iterable + +from xarray import DataArray + + +def _is_datetime_like(da: DataArray) -> bool: + import numpy as np + + if np.issubdtype(da.dtype, np.datetime64) or np.issubdtype( + da.dtype, np.timedelta64 + ): + return True - def __init__(self, accessor): - self._accessor = accessor + try: + import cftime - def __get__(self, obj, cls): - if obj is None: - return self._accessor + if isinstance(da.data[0], cftime.datetime): + return True + except ImportError: + pass - return self._accessor(obj) + return False def parse_cell_methods_attr(attr: str) -> Dict[str, str]: @@ -55,7 +29,7 @@ def parse_cell_methods_attr(attr: str) -> Dict[str, str]: Parameters ---------- - attr: str + attr : str String to parse Returns @@ -67,3 +41,19 @@ def parse_cell_methods_attr(attr: str) -> Dict[str, str]: raise ValueError(f"attrs['cell_measures'] = {attr!r} is malformed.") return dict(zip(strings[slice(0, None, 2)], strings[slice(1, None, 2)])) + + +def invert_mappings(*mappings): + """Takes a set of mappings and iterates through, inverting to make a + new mapping of value: set(keys). Keys are deduplicated to avoid clashes between + standard_name and coordinate names.""" + merged = defaultdict(set) + for mapping in mappings: + for k, v in mapping.items(): + for name in v: + merged[name] |= {k} + return merged + + +def always_iterable(obj: Any) -> Iterable: + return [obj] if not isinstance(obj, (tuple, list, set, dict)) else obj diff --git a/ci/doc.yml b/ci/doc.yml index c3f77bc5..cb226a04 100644 --- a/ci/doc.yml +++ b/ci/doc.yml @@ -6,6 +6,7 @@ dependencies: - python=3.8 - matplotlib-base - netcdf4 + - pooch - xarray - sphinx - nbsphinx @@ -14,6 +15,6 @@ dependencies: - ipython - ipykernel - pandas + - pydata-sphinx-theme - pip: - git+https://github.com/xarray-contrib/cf-xarray - - sphinx-book-theme diff --git a/ci/environment-no-optional-deps.yml b/ci/environment-no-optional-deps.yml new file mode 100644 index 00000000..67b99aa9 --- /dev/null +++ b/ci/environment-no-optional-deps.yml @@ -0,0 +1,13 @@ +name: cf_xarray_test +channels: + - conda-forge +dependencies: + - pytest-cov + - pytest + - pytest-xdist + - dask + - matplotlib-base + - netcdf4 + - pandas + - pooch + - xarray diff --git a/ci/environment.yml b/ci/environment.yml index 8adb46be..5c5d951c 100644 --- a/ci/environment.yml +++ b/ci/environment.yml @@ -9,4 +9,6 @@ dependencies: - matplotlib-base - netcdf4 - pandas + - pint + - pooch - xarray diff --git a/ci/upstream-dev-env.yml b/ci/upstream-dev-env.yml index 222f8343..69bd2a56 100644 --- a/ci/upstream-dev-env.yml +++ b/ci/upstream-dev-env.yml @@ -9,5 +9,7 @@ dependencies: - matplotlib-base - netcdf4 - pandas + - pooch - pip: - git+https://github.com/pydata/xarray + - git+https://github.com/hgrecco/pint diff --git a/doc/api.rst b/doc/api.rst index 0a66c4db..d409b429 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -1,60 +1,92 @@ -.. currentmodule:: xarray - API === +.. currentmodule:: cf_xarray + +Top-level API +------------- + +.. autosummary:: + :toctree: generated/ + + bounds_to_vertices + vertices_to_bounds + + +.. currentmodule:: xarray + DataArray --------- +.. _daattr: + +Attributes +~~~~~~~~~~ + .. autosummary:: :toctree: generated/ :template: autosummary/accessor_attribute.rst + DataArray.cf.axes + DataArray.cf.cell_measures + DataArray.cf.coordinates + DataArray.cf.formula_terms + DataArray.cf.standard_names DataArray.cf.plot + +.. _dameth: + +Methods +~~~~~~~ + .. autosummary:: :toctree: generated/ :template: autosummary/accessor_method.rst - DataArray.cf.describe - DataArray.cf.standard_names - DataArray.cf.keys - DataArray.cf.axes - DataArray.cf.coordinates - DataArray.cf.cell_measures + DataArray.cf.__getitem__ + DataArray.cf.__repr__ + DataArray.cf.differentiate DataArray.cf.guess_coord_axis + DataArray.cf.keys DataArray.cf.rename_like Dataset ------- +.. _dsattr: + +Attributes +~~~~~~~~~~ + .. autosummary:: :toctree: generated/ :template: autosummary/accessor_attribute.rst + Dataset.cf.axes + Dataset.cf.bounds + Dataset.cf.cell_measures + Dataset.cf.coordinates + Dataset.cf.formula_terms + Dataset.cf.standard_names + +.. _dsmeth: + +Methods +~~~~~~~ + .. autosummary:: :toctree: generated/ :template: autosummary/accessor_method.rst + Dataset.cf.__getitem__ + Dataset.cf.__repr__ Dataset.cf.add_bounds + Dataset.cf.bounds_to_vertices Dataset.cf.decode_vertical_coords - Dataset.cf.describe + Dataset.cf.differentiate Dataset.cf.get_bounds - Dataset.cf.bounds_to_vertices - Dataset.cf.standard_names - Dataset.cf.keys - Dataset.cf.axes - Dataset.cf.coordinates + Dataset.cf.get_bounds_dim_name Dataset.cf.guess_coord_axis + Dataset.cf.keys Dataset.cf.rename_like - -.. currentmodule:: cf_xarray - -Top-level API -------------- - -.. autosummary:: - :toctree: generated/ - - bounds_to_vertices - vertices_to_bounds diff --git a/doc/conf.py b/doc/conf.py index c28ba2e3..170ac980 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -18,6 +18,9 @@ import sphinx_autosummary_accessors import cf_xarray # noqa +from cf_xarray.scripts import make_doc + +make_doc.main() # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the @@ -312,6 +315,10 @@ autosummary_generate = True autodoc_typehints = "none" - +autodoc_default_options = { + "members": True, + "undoc-members": True, + "private-members": True, +} napoleon_use_param = True napoleon_use_rtype = True diff --git a/doc/contributing.rst b/doc/contributing.rst index 5377cb34..cdd95d51 100644 --- a/doc/contributing.rst +++ b/doc/contributing.rst @@ -32,6 +32,11 @@ This dictionary contains criteria for identifying axis and coords using CF attri ~accessor.coordinate_criteria +.. csv-table:: + :file: _build/csv/all_criteria.csv + :header-rows: 1 + :stub-columns: 1 + Classes ~~~~~~~ diff --git a/doc/criteria.rst b/doc/criteria.rst new file mode 100644 index 00000000..7aca1e39 --- /dev/null +++ b/doc/criteria.rst @@ -0,0 +1,36 @@ +.. currentmodule:: xarray + +.. _criteria: + +CF Criteria +----------- + +Attributes +~~~~~~~~~~ +Criteria for identifying variables using CF attributes. + +Axes +==== + +.. csv-table:: + :file: _build/csv/axes_criteria.csv + :header-rows: 1 + :stub-columns: 1 + +Coordinates +=========== + +.. csv-table:: + :file: _build/csv/coords_criteria.csv + :header-rows: 1 + :stub-columns: 1 + + +Names +~~~~~ +Regex used by :py:meth:`DataArray.cf.guess_coord_axis` and :py:meth:`Dataset.cf.guess_coord_axis` for identifying variables using their names. + +.. csv-table:: + :file: _build/csv/all_regex.csv + :stub-columns: 1 + diff --git a/doc/examples/introduction.ipynb b/doc/examples/introduction.ipynb index 64b55cbd..a331c504 100644 --- a/doc/examples/introduction.ipynb +++ b/doc/examples/introduction.ipynb @@ -20,11 +20,25 @@ }, "outputs": [], "source": [ - "import cf_xarray\n", + "import cf_xarray as cfxr\n", "import numpy as np\n", - "import xarray as xr\n", - "\n", - "xr.set_options(display_style=\"text\") # work around issue 57" + "import xarray as xr" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`cf_xarray` works best when `xarray` keeps attributes by default.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "xr.set_options(keep_attrs=True)" ] }, { @@ -70,48 +84,8 @@ }, "outputs": [], "source": [ - "pop = xr.Dataset()\n", + "from cf_xarray.datasets import popds as pop\n", "\n", - "# set 2D coordinate variables as latitude, longitude\n", - "pop.coords[\"TLONG\"] = (\n", - " (\"nlat\", \"nlon\"),\n", - " np.ones((20, 30)),\n", - " {\"units\": \"degrees_east\"},\n", - ")\n", - "pop.coords[\"TLAT\"] = (\n", - " (\"nlat\", \"nlon\"),\n", - " 2 * np.ones((20, 30)),\n", - " {\"units\": \"degrees_north\"},\n", - ")\n", - "pop.coords[\"ULONG\"] = (\n", - " (\"nlat\", \"nlon\"),\n", - " 0.5 * np.ones((20, 30)),\n", - " {\"units\": \"degrees_east\"},\n", - ")\n", - "pop.coords[\"ULAT\"] = (\n", - " (\"nlat\", \"nlon\"),\n", - " 2.5 * np.ones((20, 30)),\n", - " {\"units\": \"degrees_north\"},\n", - ")\n", - "\n", - "# set dimensions as X, Y\n", - "pop[\"nlon\"] = (\"nlon\", np.arange(pop.sizes[\"nlon\"]), {\"axis\": \"X\"})\n", - "pop[\"nlat\"] = (\"nlat\", np.arange(pop.sizes[\"nlat\"]), {\"axis\": \"Y\"})\n", - "\n", - "# actual data vriables with coordinates attribute set\n", - "pop[\"UVEL\"] = (\n", - " (\"nlat\", \"nlon\"),\n", - " np.ones((20, 30)) * 15,\n", - " {\"coordinates\": \"ULONG ULAT\", \"standard_name\": \"sea_water_x_velocity\"},\n", - ")\n", - "pop[\"TEMP\"] = (\n", - " (\"nlat\", \"nlon\"),\n", - " np.ones((20, 30)) * 15,\n", - " {\n", - " \"coordinates\": \"TLONG TLAT\",\n", - " \"standard_name\": \"sea_water_potential_temperature\",\n", - " },\n", - ")\n", "pop" ] }, @@ -134,17 +108,18 @@ }, "outputs": [], "source": [ - "multiple = xr.Dataset()\n", - "multiple.coords[\"x1\"] = (\"x1\", range(30), {\"axis\": \"X\"})\n", - "multiple.coords[\"y1\"] = (\"y1\", range(20), {\"axis\": \"Y\"})\n", - "multiple.coords[\"x2\"] = (\"x2\", range(10), {\"axis\": \"X\"})\n", - "multiple.coords[\"y2\"] = (\"y2\", range(5), {\"axis\": \"Y\"})\n", + "from cf_xarray.datasets import multiple\n", "\n", - "multiple[\"v1\"] = ((\"x1\", \"y1\"), np.ones((30, 20)) * 15)\n", - "multiple[\"v2\"] = ((\"x2\", \"y2\"), np.ones((10, 5)) * 15)\n", "multiple" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This dataset has ancillary variables\n" + ] + }, { "cell_type": "code", "execution_count": null, @@ -156,29 +131,8 @@ }, "outputs": [], "source": [ - "# This dataset has ancillary variables\n", + "from cf_xarray.datasets import anc\n", "\n", - "anc = xr.Dataset()\n", - "anc[\"q\"] = (\n", - " (\"x\", \"y\"),\n", - " np.random.randn(10, 20),\n", - " dict(\n", - " standard_name=\"specific_humidity\",\n", - " units=\"g/g\",\n", - " ancillary_variables=\"q_error_limit q_detection_limit\",\n", - " ),\n", - ")\n", - "anc[\"q_error_limit\"] = (\n", - " (\"x\", \"y\"),\n", - " np.random.randn(10, 20),\n", - " dict(standard_name=\"specific_humidity standard_error\", units=\"g/g\"),\n", - ")\n", - "anc[\"q_detection_limit\"] = xr.DataArray(\n", - " 1e-3,\n", - " attrs=dict(\n", - " standard_name=\"specific_humidity detection_minimum\", units=\"g/g\"\n", - " ),\n", - ")\n", "anc" ] }, @@ -186,7 +140,10 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## What attributes have been discovered?\n" + "## What attributes have been discovered?\n", + "\n", + "The criteria for identifying variables using CF attributes are listed\n", + "[here](../criteria.rst).\n" ] }, { @@ -211,8 +168,7 @@ "`'X'` axis as being represented by the `lon` variable.\n", "\n", "It can also use the `standard_name` and `units` attributes to infer that `lon`\n", - "is \"Longitude\". To see variable names that `cf_xarray` can infer, use\n", - "`.cf.describe()`\n" + "is \"Longitude\". To see variable names that `cf_xarray` can infer, use `ds.cf`\n" ] }, { @@ -226,7 +182,7 @@ }, "outputs": [], "source": [ - "ds.cf.describe()" + "ds.cf" ] }, { @@ -249,7 +205,7 @@ }, "outputs": [], "source": [ - "pop.cf.describe()" + "pop.cf" ] }, { @@ -270,7 +226,7 @@ }, "outputs": [], "source": [ - "multiple.cf.describe()" + "multiple.cf" ] }, { @@ -464,6 +420,13 @@ "pop.cf[[\"sea_water_potential_temperature\", \"UVEL\"]]" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that ancillary variables are included as coordinate variables\n" + ] + }, { "cell_type": "code", "execution_count": null, @@ -635,15 +598,33 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Feature: Renaming coordinate variables\n", + "## Feature: Renaming variables\n", "\n", - "`cf_xarray` lets you rewrite coordinate variables in one dataset to like\n", - "variables in another dataset. This can only be done when a one-to-one mapping is\n", - "possible\n", + "`cf_xarray` lets you rewrite variables in one dataset to like variables in\n", + "another dataset.\n", "\n", - "In this example, `TLONG` and `TLAT` are renamed to `lon` and `lat` i.e. their\n", - "counterparts in `ds`. Note the the `coordinates` attribute is appropriately\n", - "changed.\n" + "In this example, a one-to-one mapping is not possible and the coordinate\n", + "variables are not renamed.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "da = pop.cf[\"TEMP\"]\n", + "da.cf.rename_like(ds)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If we exclude all axes (variables with `axis` attribute), a one-to-one mapping\n", + "is possible. In this example, `TLONG` and `TLAT` are renamed to `lon` and `lat`\n", + "i.e. their counterparts in `ds`. Note the the `coordinates` attribute is\n", + "appropriately changed.\n" ] }, { @@ -657,7 +638,7 @@ }, "outputs": [], "source": [ - "pop.cf[\"TEMP\"].cf.rename_like(ds)" + "da.cf.rename_like(ds, skip=\"axes\")" ] }, { @@ -949,6 +930,7 @@ " * 110e3\n", ")\n", "# and set proper attributes\n", + "ds[\"cell_area\"].attrs = dict(standard_name=\"cell_area\", units=\"m2\")\n", "ds.air.attrs[\"cell_measures\"] = \"area: cell_area\"" ] }, @@ -987,15 +969,20 @@ "ds_bnds" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can also convert each bounds variable independently with the top-level\n", + "functions\n" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "# We can convert each bounds variable independently with the helper:\n", - "import cf_xarray as cfxr\n", - "\n", "lat_bounds = ds_bnds.cf.get_bounds(\"latitude\")\n", "\n", "lat_vertices = cfxr.bounds_to_vertices(lat_bounds, bounds_dim=\"bounds\")\n", @@ -1030,7 +1017,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.8.10" }, "toc": { "base_numbering": 1, diff --git a/doc/index.rst b/doc/index.rst index b0150926..1e999316 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -39,6 +39,7 @@ Table of contents :maxdepth: 2 examples/introduction + criteria whats-new roadmap contributing diff --git a/doc/whats-new.rst b/doc/whats-new.rst index a00f5d8d..9f444669 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -1,19 +1,60 @@ +.. currentmodule:: xarray + What's New ---------- -v0.4.0 (unreleased) +v0.5.3 (unreleased) =================== +- Begin adding support for units with a unit registry for pint arrays. :pr:`197`. + By `Jon Thielen`_ and `Justus Magin`_. +- :py:meth:`Dataset.cf.rename_like` also updates the ``bounds`` and ``cell_measures`` attributes. By `Mattia Almansi`_. + +v0.5.2 (May 11, 2021) +===================== + +- Add some explicit support for CMIP6 output. By `Deepak Cherian`_. +- Replace the ``dims`` argument of :py:meth:`Dataset.cf.add_bounds` with ``keys``, allowing to use CF keys. By `Mattia Almansi`_. +- Added :py:attr:`DataArray.cf.formula_terms` and :py:attr:`Dataset.cf.formula_terms`. + By `Deepak Cherian`_. +- Added :py:attr:`Dataset.cf.bounds` to return a dictionary mapping valid keys to the variable names of their bounds. By `Mattia Almansi`_. +- :py:meth:`DataArray.cf.differentiate` and :py:meth:`Dataset.cf.differentiate` can optionally correct + sign of the derivative by interpreting the ``"positive"`` attribute. By `Deepak Cherian`_. + +v0.5.1 (Feb 24, 2021) +===================== + +Minor bugfix release, thanks to `Pascal Bourgault`_. + +v0.5.0 (Feb 24, 2021) +===================== + +- Replace ``cf.describe()`` with :py:meth:`Dataset.cf.__repr__`. By `Mattia Almansi`_. +- Automatically set ``x`` or ``y`` for :py:attr:`DataArray.cf.plot`. By `Deepak Cherian`_. +- Added scripts to document :ref:`criteria` with tables. By `Mattia Almansi`_. +- Support for ``.drop_vars()``, ``.drop_sel()``, ``.drop_dims()``, ``.set_coords()``, ``.reset_coords()``. By `Mattia Almansi`_. +- Support for using ``standard_name`` in more functions. (:pr:`128`) By `Deepak Cherian`_ +- Allow :py:meth:`DataArray.cf.__getitem__` with standard names. By `Deepak Cherian`_ +- Rewrite the ``values`` of :py:attr:`Dataset.coords` and :py:attr:`Dataset.data_vars` with objects returned + by :py:meth:`Dataset.cf.__getitem__`. This allows extraction of DataArrays when there are clashes + between DataArray names and "special" CF names like ``T``. + (:issue:`129`, :pr:`130`). By `Deepak Cherian`_ +- Retrieve bounds dimension name with :py:meth:`Dataset.cf.get_bounds_dim_name`. By `Pascal Bourgault`_. +- Fix iteration and arithmetic with ``GroupBy`` objects. By `Deepak Cherian`_. + +v0.4.0 (Jan 22, 2021) +===================== - Support for arbitrary cell measures indexing. By `Mattia Almansi`_. +- Avoid using ``grid_latitude`` and ``grid_longitude`` for detecting latitude and longitude variables. + By `Pascal Bourgault`_. v0.3.1 (Nov 25, 2020) ===================== -- Support ``Dataset.cf.cell_measures``. By `Deepak Cherian`_. -- Added ``.axes`` to return a dictionary mapping available Axis standard names to variable names of an xarray object, ``.coordinates`` for Coordinates, - ``.cell_measures`` for Cell Measures, and ``.standard_names`` for all variables. `Kristen Thyng`_ and `Mattia Almansi`_. -- Changed ``get_valid_keys()`` to ``.keys()``. `Kristen Thyng`_. -- Added ``.cf.decode_vertical_coords`` for decoding of parameterized vertical coordinate variables. +- Support :py:attr:`Dataset.cf.cell_measures`. By `Deepak Cherian`_. +- Added :py:attr:`Dataset.cf.axes` to return a dictionary mapping available Axis standard names to variable names of an xarray object, :py:attr:`Dataset.cf.coordinates` for Coordinates, :py:attr:`Dataset.cf.cell_measures` for Cell Measures, and :py:attr:`Dataset.cf.standard_names` for all variables. `Kristen Thyng`_ and `Mattia Almansi`_. +- Changed :py:meth:`Dataset.cf.get_valid_keys` to :py:meth:`Dataset.cf.keys`. `Kristen Thyng`_. +- Added :py:meth:`Dataset.cf.decode_vertical_coords` for decoding of parameterized vertical coordinate variables. (:issue:`34`, :pr:`103`). `Deepak Cherian`_. -- Added top-level ``bounds_to_vertices`` and ``vertices_to_bounds`` as well as ``.cf.bounds_to_vertices`` +- Added top-level :py:func:`~cf_xarray.bounds_to_vertices` and :py:func:`~cf_xarray.vertices_to_bounds` as well as :py:meth:`Dataset.cf.bounds_to_vertices` to convert from coordinate bounds in a CF format (shape (nx, 2)) to a vertices format (shape (nx+1)). (:pr:`108`). `Pascal Bourgault`_. @@ -29,7 +70,7 @@ model in particular. Thanks to Kristen Thyng for opening many issues. v0.2.1 (Aug 06, 2020) ===================== - Support for the ``bounds`` attribute. (:pr:`68`, :issue:`32`). `Deepak Cherian`_. -- Add ``.cf.guess_coord_axis`` to automagically guess axis and coord names, and add +- Add :py:meth:`Dataset.cf.guess_coord_axis` to automagically guess axis and coord names, and add appropriate attributes. (:pr:`67`, :issue:`46`). `Deepak Cherian`_. v0.2.0 (Jul 28, 2020) @@ -38,21 +79,22 @@ v0.2.0 (Jul 28, 2020) - ``cf_xarray`` is now available on conda-forge. Thanks to `Anderson Banihirwe`_ and `Filipe Fernandes`_ - Remap datetime accessor syntax for groupby. E.g. ``.cf.groupby("T.month")`` → ``.cf.groupby("ocean_time.month")``. (:pr:`64`, :issue:`6`). `Julia Kent`_. -- Added ``.cf.rename_like`` to rename matching variables. Only coordinate variables +- Added :py:meth:`Dataset.cf.rename_like` to rename matching variables. Only coordinate variables i.e. those that match the criteria for ``("latitude", "longitude", "vertical", "time")`` are renamed for now. (:pr:`55`) `Deepak Cherian`_. -- Added ``.cf.add_bounds`` to add guessed bounds for 1D coordinates. (:pr:`53`) `Deepak Cherian`_. +- Added :py:meth:`Dataset.cf.add_bounds` to add guessed bounds for 1D coordinates. (:pr:`53`) `Deepak Cherian`_. v0.1.5 ====== + +- Begin documenting things for contributors in :ref:`contribut`. +- Parse ``ancillary_variables`` attribute. These variables are converted to coordinate variables. +- Support :py:meth:`Dataset.reset_index` - Wrap ``.sizes`` and ``.chunks``. (:pr:`42`) `Deepak Cherian`_. >>> ds.cf.sizes {'X': 53, 'Y': 25, 'T': 2920, 'longitude': 53, 'latitude': 25, 'time': 2920} -- Begin documenting things for contributors in :ref:`contribut`. -- Parse ``ancillary_variables`` attribute. These variables are converted to coordinate variables. -- Support ``reset_index`` v0.1.4 ====== @@ -66,6 +108,8 @@ v0.1.3 - Support expanding key to multiple dimension names. .. _`Mattia Almansi`: https://github.com/malmans2 +.. _`Justus Magin`: https://github.com/keewis +.. _`Jon Thielen`: https://github.com/jthielen .. _`Anderson Banihirwe`: https://github.com/andersy005 .. _`Pascal Bourgault`: https://github.com/aulemahal .. _`Deepak Cherian`: https://github.com/dcherian diff --git a/setup.cfg b/setup.cfg index 2c8128c7..af5a4284 100644 --- a/setup.cfg +++ b/setup.cfg @@ -41,7 +41,7 @@ skip_gitignore = true force_to_top = true default_section = THIRDPARTY known_first_party = cf_xarray -known_third_party = dask,matplotlib,numpy,pandas,pytest,setuptools,sphinx_autosummary_accessors,xarray +known_third_party = dask,matplotlib,numpy,pandas,pint,pkg_resources,pytest,setuptools,sphinx_autosummary_accessors,xarray # Most of the numerical computing stack doesn't have type annotations yet. [mypy-affine.*] @@ -116,6 +116,7 @@ test = pytest nobeep = True [rstcheck] -ignore_roles=pr,issue -ignore_directives=ipython,autodata +report=warning +ignore_roles=pr,issue,py:meth,py:attr +ignore_directives=ipython,autodata,csv-table ignore_messages=(is not referenced\.$)