From 450f6a43995b39357d7a16854e0f2aceffc1408d Mon Sep 17 00:00:00 2001 From: dcherian Date: Fri, 22 Jan 2021 07:33:06 -0700 Subject: [PATCH 01/57] RLS 0.4.0 --- doc/whats-new.rst | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index a00f5d8d..02613193 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -1,9 +1,13 @@ +.. currentmodule:: cf_xarray + What's New ---------- -v0.4.0 (unreleased) -=================== +v0.4.0 (Jan 22, 2021) +===================== - Support for arbitrary cell measures indexing. By `Mattia Almansi`_. +- Avoid using ``grid_latitude`` and ``grid_longitude`` for detecting latitude and longitude variables. + By `Pascal Bourgault`_. v0.3.1 (Nov 25, 2020) ===================== @@ -13,7 +17,7 @@ v0.3.1 (Nov 25, 2020) - Changed ``get_valid_keys()`` to ``.keys()``. `Kristen Thyng`_. - Added ``.cf.decode_vertical_coords`` for decoding of parameterized vertical coordinate variables. (:issue:`34`, :pr:`103`). `Deepak Cherian`_. -- Added top-level ``bounds_to_vertices`` and ``vertices_to_bounds`` as well as ``.cf.bounds_to_vertices`` +- Added top-level :py:func:`bounds_to_vertices` and :py:func:`vertices_to_bounds` as well as ``.cf.bounds_to_vertices`` to convert from coordinate bounds in a CF format (shape (nx, 2)) to a vertices format (shape (nx+1)). (:pr:`108`). `Pascal Bourgault`_. From a46b52a841eee14b5cc06690f7edea5eb962c0d8 Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 25 Jan 2021 18:16:47 -0700 Subject: [PATCH 02/57] Use standard name mapper in more places. --- cf_xarray/accessor.py | 53 +++++++++++++++++++------------- cf_xarray/tests/test_accessor.py | 22 +++++++++++++ doc/whats-new.rst | 5 +++ 3 files changed, 59 insertions(+), 21 deletions(-) diff --git a/cf_xarray/accessor.py b/cf_xarray/accessor.py index 537c31d0..747b0d53 100644 --- a/cf_xarray/accessor.py +++ b/cf_xarray/accessor.py @@ -201,13 +201,15 @@ def _apply_single_mapper(mapper): for mapper in mappers: results.append(_apply_single_mapper(mapper)) - nresults = sum([bool(v) for v in results]) - if nresults > 1: - raise KeyError( - f"Multiple mappers succeeded with key {key!r}.\nI was using mappers: {mappers!r}." - f"I received results: {results!r}.\nPlease open an issue." - ) - if nresults == 0: + flat = list(itertools.chain(*results)) + # de-duplicate + if all(not isinstance(r, DataArray) for r in flat): + results = list(set(flat)) + else: + results = flat + + nresults = any([bool(v) for v in [results]]) + if not nresults: if error: raise KeyError( f"cf-xarray cannot interpret key {key!r}. Perhaps some needed attributes are missing." @@ -215,7 +217,7 @@ def _apply_single_mapper(mapper): else: # none of the mappers worked. Return the default return default - return list(itertools.chain(*results)) + return results def _get_axis_coord_single(var: Union[DataArray, Dataset], key: str) -> List[str]: @@ -370,6 +372,21 @@ def _get_measure(obj: Union[DataArray, Dataset], key: str) -> List[str]: return list(results) +def _get_with_standard_name( + obj: Union[DataArray, Dataset], name: Union[str, List[str]] +) -> List[str]: + """ returns a list of variable names with standard name == name. """ + varnames = [] + if isinstance(obj, DataArray): + obj = obj._to_temp_dataset() + for vname, var in obj.variables.items(): + stdname = var.attrs.get("standard_name", None) + if stdname == name: + varnames.append(str(vname)) + + return varnames + + #: Default mappers for common keys. _DEFAULT_KEY_MAPPERS: Mapping[str, Tuple[Mapper, ...]] = { "dim": (_get_axis_coord,), @@ -385,24 +402,18 @@ def _get_measure(obj: Union[DataArray, Dataset], key: str) -> List[str]: "dims_or_levels": (_get_axis_coord,), # reset_index "window": (_get_axis_coord,), # rolling_exp "coord": (_get_axis_coord_single,), # differentiate, integrate - "group": (_get_axis_coord_single, _get_axis_coord_time_accessor), + "group": ( + _get_axis_coord_single, + _get_axis_coord_time_accessor, + _get_with_standard_name, + ), "indexer": (_get_axis_coord_single,), # resample - "variables": (_get_axis_coord,), # sortby + "variables": (_get_axis_coord, _get_with_standard_name), # sortby "weights": (_get_measure_variable,), # type: ignore + "chunks": (_get_axis_coord,), # chunk } -def _get_with_standard_name(ds: Dataset, name: Union[str, List[str]]) -> List[str]: - """ returns a list of variable names with standard name == name. """ - varnames = [] - for vname, var in ds.variables.items(): - stdname = var.attrs.get("standard_name", None) - if stdname == name: - varnames.append(str(vname)) - - return varnames - - def _guess_bounds_dim(da): """ Guess bounds values given a 1D coordinate variable. diff --git a/cf_xarray/tests/test_accessor.py b/cf_xarray/tests/test_accessor.py index 188636be..4cc173fa 100644 --- a/cf_xarray/tests/test_accessor.py +++ b/cf_xarray/tests/test_accessor.py @@ -610,3 +610,25 @@ def test_param_vcoord_ocean_s_coord(): copy.s_rho.attrs["formula_terms"] = "s: s_rho C: Cs_r depth: h depth_c: hc" with pytest.raises(KeyError): copy.cf.decode_vertical_coords() + + +def test_standard_name_mapper(): + da = xr.DataArray( + np.arange(6), + dims="time", + coords={ + "label": ( + "time", + ["A", "B", "B", "A", "B", "C"], + {"standard_name": "standard_label"}, + ) + }, + ) + + actual = da.cf.groupby("standard_label").mean() + expected = da.cf.groupby("label").mean() + assert_identical(actual, expected) + + actual = da.cf.sortby("standard_label") + expected = da.sortby("label") + assert_identical(actual, expected) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 02613193..dd74d1db 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -3,6 +3,11 @@ What's New ---------- +v0.4.1 (unreleased) +=================== + +- Support for using ``standard_name`` in more functions. By `Deepak Cherian`_ + v0.4.0 (Jan 22, 2021) ===================== - Support for arbitrary cell measures indexing. By `Mattia Almansi`_. From 3dc7f3c684b61fa1bdee98ada7ac1a16888cc52b Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 26 Jan 2021 08:20:15 -0700 Subject: [PATCH 03/57] Fix da.cf.describe() not showing standard_names --- cf_xarray/accessor.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/cf_xarray/accessor.py b/cf_xarray/accessor.py index 747b0d53..38ae4a82 100644 --- a/cf_xarray/accessor.py +++ b/cf_xarray/accessor.py @@ -854,12 +854,9 @@ def describe(self): text += f"\t{key}: {measures[key] if key in measures else []}\n" text += "\nStandard Names:\n" - if isinstance(self._obj, DataArray): - text += "\tunsupported\n" - else: - for key, value in sorted(self.standard_names.items()): - if key not in _COORD_NAMES: - text += f"\t{key}: {value}\n" + for key, value in sorted(self.standard_names.items()): + if key not in _COORD_NAMES: + text += f"\t{key}: {value}\n" print(text) From 78f1683f9689ef258269a6c1c029171736055578 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 26 Jan 2021 11:25:32 -0700 Subject: [PATCH 04/57] Rework attribute rewriting, .cf.data_vars, .cf.coords (#130) * Fix .cf.data_vars * Return "CF DataArrays" in .cf.data_vars,.cf.coords * Fix DataArray.cf["standard_name"] Closes #129 Closes #126 --- cf_xarray/accessor.py | 262 ++++++++++++++++++------------- cf_xarray/tests/test_accessor.py | 37 ++++- doc/whats-new.rst | 7 +- 3 files changed, 191 insertions(+), 115 deletions(-) diff --git a/cf_xarray/accessor.py b/cf_xarray/accessor.py index 38ae4a82..fc1dfc1a 100644 --- a/cf_xarray/accessor.py +++ b/cf_xarray/accessor.py @@ -2,7 +2,7 @@ import inspect import itertools import warnings -from collections import ChainMap +from collections import ChainMap, defaultdict from typing import ( Any, Callable, @@ -159,6 +159,18 @@ def _is_datetime_like(da: DataArray) -> bool: return False +def invert_mappings(*mappings): + """Takes a set of mappings and iterates through, inverting to make a + new mapping of value: set(keys). Keys are deduplicated to avoid clashes between + standard_name and coordinate names.""" + merged = defaultdict(set) + for mapping in mappings: + for k, v in mapping.items(): + for name in v: + merged[name] |= set([k]) + return merged + + # Type for Mapper functions Mapper = Callable[[Union[DataArray, Dataset], str], List[str]] @@ -503,23 +515,29 @@ def _getattr( if isinstance(attribute, Mapping): if not attribute: return dict(attribute) - # attributes like chunks / sizes + newmap = dict() - unused_keys = set(attribute.keys()) - for key in _AXIS_NAMES + _COORD_NAMES: - value = set(apply_mapper(_get_axis_coord, obj, key, error=False)) - unused_keys -= value - if value: - good_values = value & set(obj.dims) - if not good_values: - continue - if len(good_values) > 1: + inverted = invert_mappings( + accessor.axes, + accessor.coordinates, + accessor.cell_measures, + accessor.standard_names, + ) + unused_keys = set(attribute.keys()) - set(inverted) + for key, value in attribute.items(): + for name in inverted[key]: + if name in newmap: raise AttributeError( - f"cf_xarray can't wrap attribute {attr!r} because there are multiple values for {key!r} viz. {good_values!r}. " - f"There is no unique mapping from {key!r} to a value in {attr!r}." + f"cf_xarray can't wrap attribute {attr!r} because there are multiple values for {name!r}. " + f"There is no unique mapping from {name!r} to a value in {attr!r}." ) - newmap.update({key: attribute[good_values.pop()]}) + newmap.update(dict.fromkeys(inverted[key], value)) newmap.update({key: attribute[key] for key in unused_keys}) + + skip = {"data_vars": ["coords"], "coords": None} + if attr in ["coords", "data_vars"]: + for key in newmap: + newmap[key] = _getitem(accessor, key, skip=skip[attr]) return newmap elif isinstance(attribute, Callable): # type: ignore @@ -548,6 +566,123 @@ def wrapper(*args, **kwargs): return wrapper +def _getitem( + accessor: "CFAccessor", key: Union[str, List[str]], skip: List[str] = None +) -> Union[DataArray, Dataset]: + """ + Index into obj using key. Attaches CF associated variables. + + Parameters + ---------- + accessor: CFAccessor + key: str, List[str] + skip: str, optional + One of ["coords", "measures"], avoid clashes with special coord names + """ + + obj = accessor._obj + kind = str(type(obj).__name__) + scalar_key = isinstance(key, str) + + if isinstance(obj, DataArray) and not scalar_key: + raise KeyError( + f"Cannot use a list of keys with DataArrays. Expected a single string. Received {key!r} instead." + ) + + if scalar_key: + key = (key,) # type: ignore + + if skip is None: + skip = [] + + def check_results(names, k): + if scalar_key and len(names) > 1: + raise ValueError( + f"Receive multiple variables for key {k!r}: {names}. " + f"Expected only one. Please pass a list [{k!r}] " + f"instead to get all variables matching {k!r}." + ) + + varnames: List[Hashable] = [] + coords: List[Hashable] = [] + successful = dict.fromkeys(key, False) + for k in key: + if "coords" not in skip and k in _AXIS_NAMES + _COORD_NAMES: + names = _get_axis_coord(obj, k) + check_results(names, k) + successful[k] = bool(names) + coords.extend(names) + elif "measures" not in skip and k in accessor._get_all_cell_measures(): + measure = _get_measure(obj, k) + check_results(measure, k) + successful[k] = bool(measure) + if measure: + varnames.extend(measure) + else: + stdnames = set(_get_with_standard_name(obj, k)) + check_results(stdnames, k) + successful[k] = bool(stdnames) + objcoords = set(obj.coords) + varnames.extend(stdnames - objcoords) + coords.extend(stdnames & objcoords) + + # these are not special names but could be variable names in underlying object + # we allow this so that we can return variables with appropriate CF auxiliary variables + varnames.extend([k for k, v in successful.items() if not v]) + allnames = varnames + coords + + try: + for name in allnames: + extravars = accessor.get_associated_variable_names(name) + # we cannot return bounds variables with scalar keys + if scalar_key: + extravars.pop("bounds") + coords.extend(itertools.chain(*extravars.values())) + + if isinstance(obj, DataArray): + ds = obj._to_temp_dataset() + else: + ds = obj + + if scalar_key: + if len(allnames) == 1: + da: DataArray = ds.reset_coords()[allnames[0]] # type: ignore + if allnames[0] in coords: + coords.remove(allnames[0]) + for k1 in coords: + da.coords[k1] = ds.variables[k1] + return da + else: + raise ValueError( + f"Received scalar key {key[0]!r} but multiple results: {allnames!r}. " + f"Please pass a list instead (['{key[0]}']) to get back a Dataset " + f"with {allnames!r}." + ) + + ds = ds.reset_coords()[varnames + coords] + if isinstance(obj, DataArray): + if scalar_key and len(ds.variables) == 1: + # single dimension coordinates + assert coords + assert not varnames + + return ds[coords[0]] + + elif scalar_key and len(ds.variables) > 1: + raise NotImplementedError( + "Not sure what to return when given scalar key for DataArray and it has multiple values. " + "Please open an issue." + ) + + return ds.set_coords(coords) + + except KeyError: + raise KeyError( + f"{kind}.cf does not understand the key {k!r}. " + f"Use {kind}.cf.describe() to see a list of key names that can be interpreted." + ) + + class _CFWrappedClass: """ This class is used to wrap any class in _WRAPPED_CLASSES. @@ -1061,104 +1196,7 @@ def get_associated_variable_names(self, name: Hashable) -> Dict[str, List[str]]: return coords def __getitem__(self, key: Union[str, List[str]]): - - kind = str(type(self._obj).__name__) - scalar_key = isinstance(key, str) - - if isinstance(self._obj, DataArray) and not scalar_key: - raise KeyError( - f"Cannot use a list of keys with DataArrays. Expected a single string. Received {key!r} instead." - ) - - if scalar_key: - key = (key,) # type: ignore - - def check_results(names, k): - if scalar_key and len(names) > 1: - raise ValueError( - f"Receive multiple variables for key {k!r}: {names}. " - f"Expected only one. Please pass a list [{k!r}] " - f"instead to get all variables matching {k!r}." - ) - - varnames: List[Hashable] = [] - coords: List[Hashable] = [] - successful = dict.fromkeys(key, False) - for k in key: - if k in _AXIS_NAMES + _COORD_NAMES: - names = _get_axis_coord(self._obj, k) - check_results(names, k) - successful[k] = bool(names) - coords.extend(names) - elif k in self._get_all_cell_measures(): - measure = _get_measure(self._obj, k) - check_results(measure, k) - successful[k] = bool(measure) - if measure: - varnames.extend(measure) - elif not isinstance(self._obj, DataArray): - stdnames = set(_get_with_standard_name(self._obj, k)) - check_results(stdnames, k) - successful[k] = bool(stdnames) - objcoords = set(self._obj.coords) - varnames.extend(stdnames - objcoords) - coords.extend(stdnames & objcoords) - - # these are not special names but could be variable names in underlying object - # we allow this so that we can return variables with appropriate CF auxiliary variables - varnames.extend([k for k, v in successful.items() if not v]) - allnames = varnames + coords - - try: - for name in allnames: - extravars = self.get_associated_variable_names(name) - # we cannot return bounds variables with scalar keys - if scalar_key: - extravars.pop("bounds") - coords.extend(itertools.chain(*extravars.values())) - - if isinstance(self._obj, DataArray): - ds = self._obj._to_temp_dataset() - else: - ds = self._obj - - if scalar_key: - if len(allnames) == 1: - da: DataArray = ds.reset_coords()[allnames[0]] # type: ignore - if allnames[0] in coords: - coords.remove(allnames[0]) - for k1 in coords: - da.coords[k1] = ds.variables[k1] - return da - else: - raise ValueError( - f"Received scalar key {key[0]!r} but multiple results: {allnames!r}. " - f"Please pass a list instead (['{key[0]}']) to get back a Dataset " - f"with {allnames!r}." - ) - - ds = ds.reset_coords()[varnames + coords] - if isinstance(self._obj, DataArray): - if scalar_key and len(ds.variables) == 1: - # single dimension coordinates - assert coords - assert not varnames - - return ds[coords[0]] - - elif scalar_key and len(ds.variables) > 1: - raise NotImplementedError( - "Not sure what to return when given scalar key for DataArray and it has multiple values. " - "Please open an issue." - ) - - return ds.set_coords(coords) - - except KeyError: - raise KeyError( - f"{kind}.cf does not understand the key {k!r}. " - f"Use {kind}.cf.describe() to see a list of key names that can be interpreted." - ) + return _getitem(self, key) def _maybe_to_dataset(self, obj=None) -> Dataset: if obj is None: diff --git a/cf_xarray/tests/test_accessor.py b/cf_xarray/tests/test_accessor.py index 4cc173fa..240c9c50 100644 --- a/cf_xarray/tests/test_accessor.py +++ b/cf_xarray/tests/test_accessor.py @@ -19,6 +19,12 @@ objects = datasets + dataarrays +def assert_dicts_identical(dict1, dict2): + assert dict1.keys() == dict2.keys() + for k in dict1: + assert_identical(dict1[k], dict2[k]) + + def test_describe(capsys): airds.cf.describe() actual = capsys.readouterr().out @@ -280,7 +286,10 @@ def test_dataarray_getitem(): with pytest.raises(KeyError): air.cf[["longitude"]] with pytest.raises(KeyError): - air.cf[["longitude", "latitude"]], + air.cf[["longitude", "latitude"]] + + air["cell_area"].attrs["standard_name"] = "area_grid_cell" + assert_identical(air.cf["area_grid_cell"], air.cell_area.reset_coords(drop=True)) @pytest.mark.parametrize("obj", dataarrays) @@ -512,7 +521,7 @@ def test_guess_coord_axis(): assert dsnew.y1.attrs == {"axis": "Y"} -def test_dicts(): +def test_attributes(): actual = airds.cf.sizes expected = {"X": 50, "Y": 25, "T": 4, "longitude": 50, "latitude": 25, "time": 4} assert actual == expected @@ -543,6 +552,30 @@ def test_dicts(): expected = {"lon": 50, "Y": 25, "T": 4, "latitude": 25, "time": 4} assert actual == expected + actual = popds.cf.data_vars + expected = { + "sea_water_x_velocity": popds.cf["UVEL"], + "sea_water_potential_temperature": popds.cf["TEMP"], + } + assert_dicts_identical(actual, expected) + + actual = multiple.cf.data_vars + expected = dict(multiple.data_vars) + assert_dicts_identical(actual, expected) + + # check that data_vars contains ancillary variables + assert_identical(anc.cf.data_vars["specific_humidity"], anc.cf["specific_humidity"]) + + # clash between var name and "special" CF name + # Regression test for #126 + data = np.random.rand(4, 3) + times = pd.date_range("2000-01-01", periods=4) + locs = [30, 60, 90] + coords = [("time", times, {"axis": "T"}), ("space", locs)] + foo = xr.DataArray(data, coords, dims=["time", "space"]) + ds1 = xr.Dataset({"T": foo}) + assert_identical(ds1.cf.data_vars["T"], ds1["T"]) + def test_missing_variable_in_coordinates(): airds.air.attrs["coordinates"] = "lat lon time" diff --git a/doc/whats-new.rst b/doc/whats-new.rst index dd74d1db..7a8e9051 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -6,7 +6,12 @@ What's New v0.4.1 (unreleased) =================== -- Support for using ``standard_name`` in more functions. By `Deepak Cherian`_ +- Support for using ``standard_name`` in more functions. (:pr:`128`) By `Deepak Cherian`_ +- Allow ``DataArray.cf[]`` with standard names. By `Deepak Cherian`_ +- Rewrite the ``values`` of ``.cf.coords`` and ``.cf.data_vars`` with objects returned + by ``.cf.__getitem___``. This allows extraction of DataArrays when there are clashes + between DataArray names and "special" CF names like ``T``. + (:issue:`129`, :pr:`130`). By `Deepak Cherian`_ v0.4.0 (Jan 22, 2021) ===================== From d2a495cd90e058769fcb5e6bccd7d412789ed257 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 26 Jan 2021 11:38:59 -0700 Subject: [PATCH 05/57] Clean up utils (#131) --- cf_xarray/accessor.py | 35 ++-------------------- cf_xarray/utils.py | 70 +++++++++++++++++-------------------------- 2 files changed, 30 insertions(+), 75 deletions(-) diff --git a/cf_xarray/accessor.py b/cf_xarray/accessor.py index fc1dfc1a..adb5bc3a 100644 --- a/cf_xarray/accessor.py +++ b/cf_xarray/accessor.py @@ -2,7 +2,7 @@ import inspect import itertools import warnings -from collections import ChainMap, defaultdict +from collections import ChainMap from typing import ( Any, Callable, @@ -21,7 +21,7 @@ from xarray import DataArray, Dataset from .helpers import bounds_to_vertices -from .utils import parse_cell_methods_attr +from .utils import _is_datetime_like, invert_mappings, parse_cell_methods_attr #: Classes wrapped by cf_xarray. _WRAPPED_CLASSES = ( @@ -140,37 +140,6 @@ attrs["vertical"] = attrs["Z"] -def _is_datetime_like(da: DataArray) -> bool: - import numpy as np - - if np.issubdtype(da.dtype, np.datetime64) or np.issubdtype( - da.dtype, np.timedelta64 - ): - return True - - try: - import cftime - - if isinstance(da.data[0], cftime.datetime): - return True - except ImportError: - pass - - return False - - -def invert_mappings(*mappings): - """Takes a set of mappings and iterates through, inverting to make a - new mapping of value: set(keys). Keys are deduplicated to avoid clashes between - standard_name and coordinate names.""" - merged = defaultdict(set) - for mapping in mappings: - for k, v in mapping.items(): - for name in v: - merged[name] |= set([k]) - return merged - - # Type for Mapper functions Mapper = Callable[[Union[DataArray, Dataset], str], List[str]] diff --git a/cf_xarray/utils.py b/cf_xarray/utils.py index e222133f..b3ed7348 100644 --- a/cf_xarray/utils.py +++ b/cf_xarray/utils.py @@ -1,52 +1,26 @@ -from typing import Any, Dict, Hashable, Mapping, Optional, TypeVar, cast +from collections import defaultdict +from typing import Dict -K = TypeVar("K") -V = TypeVar("V") -T = TypeVar("T") +from xarray import DataArray -def either_dict_or_kwargs( - pos_kwargs: Optional[Mapping[Hashable, T]], - kw_kwargs: Mapping[str, T], - func_name: str, -) -> Mapping[Hashable, T]: - if pos_kwargs is not None: - if not is_dict_like(pos_kwargs): - raise ValueError( - "the first argument to .%s must be a dictionary" % func_name - ) - if kw_kwargs: - raise ValueError( - "cannot specify both keyword and positional " - "arguments to .%s" % func_name - ) - return pos_kwargs - else: - # Need an explicit cast to appease mypy due to invariance; see - # https://github.com/python/mypy/issues/6228 - return cast(Mapping[Hashable, T], kw_kwargs) +def _is_datetime_like(da: DataArray) -> bool: + import numpy as np + if np.issubdtype(da.dtype, np.datetime64) or np.issubdtype( + da.dtype, np.timedelta64 + ): + return True -def is_dict_like(value: Any) -> bool: - return hasattr(value, "keys") and hasattr(value, "__getitem__") + try: + import cftime + if isinstance(da.data[0], cftime.datetime): + return True + except ImportError: + pass -# copied from xarray -class UncachedAccessor: - """Acts like a property, but on both classes and class instances - This class is necessary because some tools (e.g. pydoc and sphinx) - inspect classes for which property returns itself and not the - accessor. - """ - - def __init__(self, accessor): - self._accessor = accessor - - def __get__(self, obj, cls): - if obj is None: - return self._accessor - - return self._accessor(obj) + return False def parse_cell_methods_attr(attr: str) -> Dict[str, str]: @@ -67,3 +41,15 @@ def parse_cell_methods_attr(attr: str) -> Dict[str, str]: raise ValueError(f"attrs['cell_measures'] = {attr!r} is malformed.") return dict(zip(strings[slice(0, None, 2)], strings[slice(1, None, 2)])) + + +def invert_mappings(*mappings): + """Takes a set of mappings and iterates through, inverting to make a + new mapping of value: set(keys). Keys are deduplicated to avoid clashes between + standard_name and coordinate names.""" + merged = defaultdict(set) + for mapping in mappings: + for k, v in mapping.items(): + for name in v: + merged[name] |= set([k]) + return merged From 2be16191985aebcc8d67fed91604e77791dfebee Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 26 Jan 2021 13:05:24 -0700 Subject: [PATCH 06/57] Update doc build (#132) --- ci/doc.yml | 2 +- doc/examples/introduction.ipynb | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/ci/doc.yml b/ci/doc.yml index c3f77bc5..9a4a5a15 100644 --- a/ci/doc.yml +++ b/ci/doc.yml @@ -14,6 +14,6 @@ dependencies: - ipython - ipykernel - pandas + - pydata-sphinx-theme - pip: - git+https://github.com/xarray-contrib/cf-xarray - - sphinx-book-theme diff --git a/doc/examples/introduction.ipynb b/doc/examples/introduction.ipynb index 64b55cbd..0eab2ec1 100644 --- a/doc/examples/introduction.ipynb +++ b/doc/examples/introduction.ipynb @@ -22,9 +22,7 @@ "source": [ "import cf_xarray\n", "import numpy as np\n", - "import xarray as xr\n", - "\n", - "xr.set_options(display_style=\"text\") # work around issue 57" + "import xarray as xr" ] }, { @@ -1030,7 +1028,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.7.8" }, "toc": { "base_numbering": 1, From 674751b59fc5fe32759eda145b1cb6c917633367 Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 26 Jan 2021 13:19:23 -0700 Subject: [PATCH 07/57] Update mapper docstrings --- cf_xarray/accessor.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cf_xarray/accessor.py b/cf_xarray/accessor.py index adb5bc3a..6500bfee 100644 --- a/cf_xarray/accessor.py +++ b/cf_xarray/accessor.py @@ -424,14 +424,16 @@ def _build_docstring(func): mapper_docstrings = { _get_axis_coord: f"One or more of {(_AXIS_NAMES + _COORD_NAMES)!r}", _get_axis_coord_single: f"One of {(_AXIS_NAMES + _COORD_NAMES)!r}", - # _get_measure_variable: f"One of {_CELL_MEASURES!r}", + _get_axis_coord_time_accessor: "Time variable accessor e.g. 'T.month'", + _get_with_standard_name: "Standard names", + _get_measure_variable: f"One of {_CELL_MEASURES!r}", } sig = inspect.signature(func) string = "" for k in set(sig.parameters.keys()) & set(_DEFAULT_KEY_MAPPERS): mappers = _DEFAULT_KEY_MAPPERS.get(k, []) - docstring = "; ".join( + docstring = ";\n\t\t\t".join( mapper_docstrings.get(mapper, "unknown. please open an issue.") for mapper in mappers ) From 1a15e73e66802a3541c31435df485e3cafe4d80d Mon Sep 17 00:00:00 2001 From: Mattia Almansi Date: Thu, 28 Jan 2021 23:22:38 +0000 Subject: [PATCH 08/57] Support drop* and *_coords methods (#145) * implement names and labels * no need of _get_with_key * add tests * fix tests * add what's new --- .github/workflows/ci.yaml | 2 +- .github/workflows/pre-commit.yaml | 2 +- cf_xarray/accessor.py | 7 ++++- cf_xarray/tests/test_accessor.py | 43 +++++++++++++++++++++++++++++++ doc/whats-new.rst | 1 + 5 files changed, 52 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index f965ce9b..6eb0518b 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -2,7 +2,7 @@ name: CI on: push: branches: - - "*" + - "main" pull_request: branches: - "*" diff --git a/.github/workflows/pre-commit.yaml b/.github/workflows/pre-commit.yaml index dd8efaeb..601e28bd 100644 --- a/.github/workflows/pre-commit.yaml +++ b/.github/workflows/pre-commit.yaml @@ -2,7 +2,7 @@ name: pre-commit on: push: - branches: "*" + branches: "main" pull_request: branches: - "*" diff --git a/cf_xarray/accessor.py b/cf_xarray/accessor.py index 6500bfee..d173ebd7 100644 --- a/cf_xarray/accessor.py +++ b/cf_xarray/accessor.py @@ -376,7 +376,12 @@ def _get_with_standard_name( "dims_dict": (_get_axis_coord,), # swap_dims, rename_dims "shifts": (_get_axis_coord,), # shift, roll "pad_width": (_get_axis_coord,), # shift, roll - # "names": something_with_all_valid_keys? # set_coords, reset_coords + "names": ( + _get_axis_coord, + _get_measure, + _get_with_standard_name, + ), # set_coords, reset_coords, drop_vars + "labels": (_get_axis_coord, _get_measure, _get_with_standard_name), # drop "coords": (_get_axis_coord,), # interp "indexers": (_get_axis_coord,), # sel, isel, reindex # "indexes": (_get_axis_coord,), # set_index diff --git a/cf_xarray/tests/test_accessor.py b/cf_xarray/tests/test_accessor.py index 240c9c50..dc966825 100644 --- a/cf_xarray/tests/test_accessor.py +++ b/cf_xarray/tests/test_accessor.py @@ -4,6 +4,7 @@ import pytest import xarray as xr from matplotlib import pyplot as plt +from xarray import Dataset from xarray.testing import assert_allclose, assert_identical import cf_xarray # noqa @@ -665,3 +666,45 @@ def test_standard_name_mapper(): actual = da.cf.sortby("standard_label") expected = da.sortby("label") assert_identical(actual, expected) + + +@pytest.mark.parametrize("obj", objects) +@pytest.mark.parametrize("attr", ["drop", "drop_vars", "set_coords"]) +@pytest.mark.filterwarnings("ignore:dropping .* using `drop` .* deprecated") +def test_drop_vars_and_set_coords(obj, attr): + + # DataArray object has no attribute set_coords + if not isinstance(obj, Dataset) and attr == "set_coords": + return + + # Get attribute + expected = getattr(obj, attr) + actual = getattr(obj.cf, attr) + + # Axis + assert_identical(expected("lon"), actual("X")) + # Coordinate + assert_identical(expected("lon"), actual("longitude")) + # Cell measure + assert_identical(expected("cell_area"), actual("area")) + # Variables + if isinstance(obj, Dataset): + assert_identical(expected("air"), actual("air_temperature")) + assert_identical(expected(obj.variables), actual(obj.cf.keys())) + + +@pytest.mark.parametrize("obj", objects) +def test_drop_sel_and_reset_coords(obj): + + # Axis + assert_identical(obj.drop_sel(lat=75), obj.cf.drop_sel(Y=75)) + # Coordinate + assert_identical(obj.drop_sel(lat=75), obj.cf.drop_sel(latitude=75)) + + # Cell measure + assert_identical(obj.reset_coords("cell_area"), obj.cf.reset_coords("area")) + # Variable + if isinstance(obj, Dataset): + assert_identical( + obj.reset_coords("air"), obj.cf.reset_coords("air_temperature") + ) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 7a8e9051..25ee2264 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -6,6 +6,7 @@ What's New v0.4.1 (unreleased) =================== +- Support for ``.drop()``, ``.drop_vars()``, ``.drop_sel()``, ``.set_coords()``, ``.reset_coords()``. By `Mattia Almansi`_. - Support for using ``standard_name`` in more functions. (:pr:`128`) By `Deepak Cherian`_ - Allow ``DataArray.cf[]`` with standard names. By `Deepak Cherian`_ - Rewrite the ``values`` of ``.cf.coords`` and ``.cf.data_vars`` with objects returned From aa93aba208f6d3bd7d21978d1cfd17822d5ae77f Mon Sep 17 00:00:00 2001 From: Mattia Almansi Date: Fri, 29 Jan 2021 14:49:41 +0000 Subject: [PATCH 09/57] add support for drop_dims (#146) --- cf_xarray/accessor.py | 1 + cf_xarray/tests/test_accessor.py | 8 ++++++++ doc/whats-new.rst | 2 +- 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/cf_xarray/accessor.py b/cf_xarray/accessor.py index d173ebd7..20ec811b 100644 --- a/cf_xarray/accessor.py +++ b/cf_xarray/accessor.py @@ -372,6 +372,7 @@ def _get_with_standard_name( _DEFAULT_KEY_MAPPERS: Mapping[str, Tuple[Mapper, ...]] = { "dim": (_get_axis_coord,), "dims": (_get_axis_coord,), # transpose + "drop_dims": (_get_axis_coord,), # drop_dims "dimensions": (_get_axis_coord,), # stack "dims_dict": (_get_axis_coord,), # swap_dims, rename_dims "shifts": (_get_axis_coord,), # shift, roll diff --git a/cf_xarray/tests/test_accessor.py b/cf_xarray/tests/test_accessor.py index dc966825..26099fdf 100644 --- a/cf_xarray/tests/test_accessor.py +++ b/cf_xarray/tests/test_accessor.py @@ -708,3 +708,11 @@ def test_drop_sel_and_reset_coords(obj): assert_identical( obj.reset_coords("air"), obj.cf.reset_coords("air_temperature") ) + + +@pytest.mark.parametrize("ds", datasets) +def test_drop_dims(ds): + + # Axis and coordinate + for cf_name in ["X", "longitude"]: + assert_identical(ds.drop_dims("lon"), ds.cf.drop_dims(cf_name)) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 25ee2264..998e1ec0 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -6,7 +6,7 @@ What's New v0.4.1 (unreleased) =================== -- Support for ``.drop()``, ``.drop_vars()``, ``.drop_sel()``, ``.set_coords()``, ``.reset_coords()``. By `Mattia Almansi`_. +- Support for ``.drop()``, ``.drop_vars()``, ``.drop_sel()``, ``.drop_dims()``, ``.set_coords()``, ``.reset_coords()``. By `Mattia Almansi`_. - Support for using ``standard_name`` in more functions. (:pr:`128`) By `Deepak Cherian`_ - Allow ``DataArray.cf[]`` with standard names. By `Deepak Cherian`_ - Rewrite the ``values`` of ``.cf.coords`` and ``.cf.data_vars`` with objects returned From cea21cd27298f05be18f62c31cf883c87557ce40 Mon Sep 17 00:00:00 2001 From: Mattia Almansi Date: Sun, 31 Jan 2021 01:26:40 +0000 Subject: [PATCH 10/57] add coordinate criteria table in docs (#147) * add coordinate criteria table in docs * add criteria in index * add regex table and what's new * fix make docstring --- .gitignore | 4 ++- cf_xarray/scripts/make_doc.py | 59 +++++++++++++++++++++++++++++++++ cf_xarray/tests/test_scripts.py | 32 ++++++++++++++++++ doc/conf.py | 3 ++ doc/contributing.rst | 5 +++ doc/criteria.rst | 34 +++++++++++++++++++ doc/examples/introduction.ipynb | 7 ++-- doc/index.rst | 1 + doc/whats-new.rst | 1 + setup.cfg | 4 +-- 10 files changed, 145 insertions(+), 5 deletions(-) create mode 100644 cf_xarray/scripts/make_doc.py create mode 100644 cf_xarray/tests/test_scripts.py create mode 100644 doc/criteria.rst diff --git a/.gitignore b/.gitignore index b6e47617..24155863 100644 --- a/.gitignore +++ b/.gitignore @@ -69,7 +69,9 @@ instance/ .scrapy # Sphinx documentation -docs/_build/ +doc/_build/ +doc/generated/ +cf_xarray/tests/_build/ # PyBuilder target/ diff --git a/cf_xarray/scripts/make_doc.py b/cf_xarray/scripts/make_doc.py new file mode 100644 index 00000000..203f3c41 --- /dev/null +++ b/cf_xarray/scripts/make_doc.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python + +import os + +from pandas import DataFrame + +from cf_xarray.accessor import _AXIS_NAMES, _COORD_NAMES, coordinate_criteria, regex + + +def main(): + """ + Make all additional files needed to build the documentations. + """ + + make_criteria_csv() + make_regex_csv() + + +def make_criteria_csv(): + """ + Make criteria tables: + _build/csv/{all,axes,coords}_criteria.csv + """ + + csv_dir = "_build/csv" + os.makedirs(csv_dir, exist_ok=True) + + # Criteria tables + df = DataFrame.from_dict(coordinate_criteria) + df = df.dropna(1, how="all") + df = df.applymap(lambda x: ", ".join(sorted(x)) if isinstance(x, tuple) else x) + df = df.sort_index(0).sort_index(1) + + # All criteria + df.to_csv(os.path.join(csv_dir, "all_criteria.csv")) + + # Axes and coordinates + for keys, name in zip([_AXIS_NAMES, _COORD_NAMES], ["axes", "coords"]): + subdf = df.loc[sorted(keys)].dropna(1, how="all") + subdf = subdf.dropna(1, how="all").transpose() + subdf.to_csv(os.path.join(csv_dir, f"{name}_criteria.csv")) + + +def make_regex_csv(): + """ + Make regex tables: + _build/csv/all_regex.csv + """ + + csv_dir = "_build/csv" + os.makedirs(csv_dir, exist_ok=True) + df = DataFrame(regex, index=[0]) + df = df.applymap(lambda x: f"``{x}``") + df = df.sort_index(1).transpose() + df.to_csv(os.path.join(csv_dir, "all_regex.csv"), header=False) + + +if __name__ == "__main__": + main() diff --git a/cf_xarray/tests/test_scripts.py b/cf_xarray/tests/test_scripts.py new file mode 100644 index 00000000..73221ee0 --- /dev/null +++ b/cf_xarray/tests/test_scripts.py @@ -0,0 +1,32 @@ +import os + +from cf_xarray.scripts import make_doc + + +def remove_if_exists(paths): + paths = [paths] if isinstance(paths, str) else paths + for path in paths: + if os.path.exists(path): + os.remove(path) + + +def test_make_doc(): + + # Create/remove files from tests/, + # always return to original working directory + owd = os.getcwd() + os.chdir(os.path.dirname(__file__)) + try: + names = [ + "axes_criteria", + "coords_criteria", + "all_criteria", + "all_regex", + ] + tables_to_check = [f"_build/csv/{name}.csv" for name in names] + remove_if_exists(tables_to_check) + + make_doc.main() + assert all(os.path.exists(path) for path in tables_to_check) + finally: + os.chdir(owd) diff --git a/doc/conf.py b/doc/conf.py index c28ba2e3..36fcab62 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -18,6 +18,9 @@ import sphinx_autosummary_accessors import cf_xarray # noqa +from cf_xarray.scripts import make_doc + +make_doc.main() # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the diff --git a/doc/contributing.rst b/doc/contributing.rst index 5377cb34..cdd95d51 100644 --- a/doc/contributing.rst +++ b/doc/contributing.rst @@ -32,6 +32,11 @@ This dictionary contains criteria for identifying axis and coords using CF attri ~accessor.coordinate_criteria +.. csv-table:: + :file: _build/csv/all_criteria.csv + :header-rows: 1 + :stub-columns: 1 + Classes ~~~~~~~ diff --git a/doc/criteria.rst b/doc/criteria.rst new file mode 100644 index 00000000..c07fe2bd --- /dev/null +++ b/doc/criteria.rst @@ -0,0 +1,34 @@ +.. currentmodule:: xarray + +CF Criteria +----------- + +Attributes +~~~~~~~~~~ +Criteria for identifying variables using CF attributes. + +Axes +==== + +.. csv-table:: + :file: _build/csv/axes_criteria.csv + :header-rows: 1 + :stub-columns: 1 + +Coordinates +=========== + +.. csv-table:: + :file: _build/csv/coords_criteria.csv + :header-rows: 1 + :stub-columns: 1 + + +Names +~~~~~ +Regex used by :meth:`DataArray.cf.guess_coord_axis` and :meth:`Dataset.cf.guess_coord_axis` for identifying variables using their names. + +.. csv-table:: + :file: _build/csv/all_regex.csv + :stub-columns: 1 + diff --git a/doc/examples/introduction.ipynb b/doc/examples/introduction.ipynb index 0eab2ec1..121c3c12 100644 --- a/doc/examples/introduction.ipynb +++ b/doc/examples/introduction.ipynb @@ -184,7 +184,10 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## What attributes have been discovered?\n" + "## What attributes have been discovered?\n", + "\n", + "The criteria for identifying variables using CF attributes are listed\n", + "[here](../criteria.rst).\n" ] }, { @@ -1028,7 +1031,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.8" + "version": "3.7.3" }, "toc": { "base_numbering": 1, diff --git a/doc/index.rst b/doc/index.rst index b0150926..1e999316 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -39,6 +39,7 @@ Table of contents :maxdepth: 2 examples/introduction + criteria whats-new roadmap contributing diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 998e1ec0..32ab89f0 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -6,6 +6,7 @@ What's New v0.4.1 (unreleased) =================== +- Added scripts to document CF criteria with tables. By `Mattia Almansi`_. - Support for ``.drop()``, ``.drop_vars()``, ``.drop_sel()``, ``.drop_dims()``, ``.set_coords()``, ``.reset_coords()``. By `Mattia Almansi`_. - Support for using ``standard_name`` in more functions. (:pr:`128`) By `Deepak Cherian`_ - Allow ``DataArray.cf[]`` with standard names. By `Deepak Cherian`_ diff --git a/setup.cfg b/setup.cfg index 2c8128c7..742d8f27 100644 --- a/setup.cfg +++ b/setup.cfg @@ -116,6 +116,6 @@ test = pytest nobeep = True [rstcheck] -ignore_roles=pr,issue -ignore_directives=ipython,autodata +ignore_roles=pr,issue,meth +ignore_directives=ipython,autodata,csv-table ignore_messages=(is not referenced\.$) From 291ec716bec994b1cb69911c6338e86547e6b330 Mon Sep 17 00:00:00 2001 From: dcherian Date: Sat, 30 Jan 2021 18:59:24 -0700 Subject: [PATCH 11/57] Better links in whats-new --- doc/api.rst | 24 ++++++++++++++---------- doc/conf.py | 6 +++++- doc/criteria.rst | 2 +- doc/whats-new.rst | 32 ++++++++++++++++---------------- setup.cfg | 2 +- 5 files changed, 37 insertions(+), 29 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index 0a66c4db..556ba413 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -10,19 +10,20 @@ DataArray :toctree: generated/ :template: autosummary/accessor_attribute.rst + DataArray.cf.axes + DataArray.cf.cell_measures + DataArray.cf.coordinates + DataArray.cf.standard_names DataArray.cf.plot .. autosummary:: :toctree: generated/ :template: autosummary/accessor_method.rst + DataArray.cf.__getitem__ DataArray.cf.describe - DataArray.cf.standard_names - DataArray.cf.keys - DataArray.cf.axes - DataArray.cf.coordinates - DataArray.cf.cell_measures DataArray.cf.guess_coord_axis + DataArray.cf.keys DataArray.cf.rename_like Dataset @@ -32,20 +33,23 @@ Dataset :toctree: generated/ :template: autosummary/accessor_attribute.rst + Dataset.cf.axes + Dataset.cf.cell_measures + Dataset.cf.coordinates + Dataset.cf.standard_names + .. autosummary:: :toctree: generated/ :template: autosummary/accessor_method.rst + DataArray.cf.__getitem__ Dataset.cf.add_bounds + Dataset.cf.bounds_to_vertices Dataset.cf.decode_vertical_coords Dataset.cf.describe Dataset.cf.get_bounds - Dataset.cf.bounds_to_vertices - Dataset.cf.standard_names - Dataset.cf.keys - Dataset.cf.axes - Dataset.cf.coordinates Dataset.cf.guess_coord_axis + Dataset.cf.keys Dataset.cf.rename_like .. currentmodule:: cf_xarray diff --git a/doc/conf.py b/doc/conf.py index 36fcab62..170ac980 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -315,6 +315,10 @@ autosummary_generate = True autodoc_typehints = "none" - +autodoc_default_options = { + "members": True, + "undoc-members": True, + "private-members": True, +} napoleon_use_param = True napoleon_use_rtype = True diff --git a/doc/criteria.rst b/doc/criteria.rst index c07fe2bd..4e683ed3 100644 --- a/doc/criteria.rst +++ b/doc/criteria.rst @@ -26,7 +26,7 @@ Coordinates Names ~~~~~ -Regex used by :meth:`DataArray.cf.guess_coord_axis` and :meth:`Dataset.cf.guess_coord_axis` for identifying variables using their names. +Regex used by :py:meth:`DataArray.cf.guess_coord_axis` and :py:meth:`Dataset.cf.guess_coord_axis` for identifying variables using their names. .. csv-table:: :file: _build/csv/all_regex.csv diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 32ab89f0..f73d1672 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -1,4 +1,4 @@ -.. currentmodule:: cf_xarray +.. currentmodule:: xarray What's New ---------- @@ -9,9 +9,9 @@ v0.4.1 (unreleased) - Added scripts to document CF criteria with tables. By `Mattia Almansi`_. - Support for ``.drop()``, ``.drop_vars()``, ``.drop_sel()``, ``.drop_dims()``, ``.set_coords()``, ``.reset_coords()``. By `Mattia Almansi`_. - Support for using ``standard_name`` in more functions. (:pr:`128`) By `Deepak Cherian`_ -- Allow ``DataArray.cf[]`` with standard names. By `Deepak Cherian`_ -- Rewrite the ``values`` of ``.cf.coords`` and ``.cf.data_vars`` with objects returned - by ``.cf.__getitem___``. This allows extraction of DataArrays when there are clashes +- Allow :py:meth:`DataArray.cf.__getitem__` with standard names. By `Deepak Cherian`_ +- Rewrite the ``values`` of :py:attr:`Dataset.coords` and :py:attr:`Dataset.data_vars` with objects returned + by :py:meth:`Dataset.cf.__getitem__`. This allows extraction of DataArrays when there are clashes between DataArray names and "special" CF names like ``T``. (:issue:`129`, :pr:`130`). By `Deepak Cherian`_ @@ -23,13 +23,12 @@ v0.4.0 (Jan 22, 2021) v0.3.1 (Nov 25, 2020) ===================== -- Support ``Dataset.cf.cell_measures``. By `Deepak Cherian`_. -- Added ``.axes`` to return a dictionary mapping available Axis standard names to variable names of an xarray object, ``.coordinates`` for Coordinates, - ``.cell_measures`` for Cell Measures, and ``.standard_names`` for all variables. `Kristen Thyng`_ and `Mattia Almansi`_. -- Changed ``get_valid_keys()`` to ``.keys()``. `Kristen Thyng`_. -- Added ``.cf.decode_vertical_coords`` for decoding of parameterized vertical coordinate variables. +- Support :py:attr:`Dataset.cf.cell_measures`. By `Deepak Cherian`_. +- Added :py:attr:`Dataset.cf.axes` to return a dictionary mapping available Axis standard names to variable names of an xarray object, :py:attr:`Dataset.cf.coordinates` for Coordinates, :py:attr:`Dataset.cf.cell_measures` for Cell Measures, and :py:attr:`Dataset.cf.standard_names` for all variables. `Kristen Thyng`_ and `Mattia Almansi`_. +- Changed :py:meth:`Dataset.cf.get_valid_keys` to :py:meth:`Dataset.cf.keys`. `Kristen Thyng`_. +- Added :py:meth:`Dataset.cf.decode_vertical_coords` for decoding of parameterized vertical coordinate variables. (:issue:`34`, :pr:`103`). `Deepak Cherian`_. -- Added top-level :py:func:`bounds_to_vertices` and :py:func:`vertices_to_bounds` as well as ``.cf.bounds_to_vertices`` +- Added top-level :py:func:`~cf_xarray.bounds_to_vertices` and :py:func:`~cf_xarray.vertices_to_bounds` as well as :py:meth:`Dataset.cf.bounds_to_vertices` to convert from coordinate bounds in a CF format (shape (nx, 2)) to a vertices format (shape (nx+1)). (:pr:`108`). `Pascal Bourgault`_. @@ -45,7 +44,7 @@ model in particular. Thanks to Kristen Thyng for opening many issues. v0.2.1 (Aug 06, 2020) ===================== - Support for the ``bounds`` attribute. (:pr:`68`, :issue:`32`). `Deepak Cherian`_. -- Add ``.cf.guess_coord_axis`` to automagically guess axis and coord names, and add +- Add :py:meth:`Dataset.cf.guess_coord_axis` to automagically guess axis and coord names, and add appropriate attributes. (:pr:`67`, :issue:`46`). `Deepak Cherian`_. v0.2.0 (Jul 28, 2020) @@ -54,21 +53,22 @@ v0.2.0 (Jul 28, 2020) - ``cf_xarray`` is now available on conda-forge. Thanks to `Anderson Banihirwe`_ and `Filipe Fernandes`_ - Remap datetime accessor syntax for groupby. E.g. ``.cf.groupby("T.month")`` → ``.cf.groupby("ocean_time.month")``. (:pr:`64`, :issue:`6`). `Julia Kent`_. -- Added ``.cf.rename_like`` to rename matching variables. Only coordinate variables +- Added :py:meth:`Dataset.cf.rename_like` to rename matching variables. Only coordinate variables i.e. those that match the criteria for ``("latitude", "longitude", "vertical", "time")`` are renamed for now. (:pr:`55`) `Deepak Cherian`_. -- Added ``.cf.add_bounds`` to add guessed bounds for 1D coordinates. (:pr:`53`) `Deepak Cherian`_. +- Added :py:meth:`Dataset.cf.add_bounds` to add guessed bounds for 1D coordinates. (:pr:`53`) `Deepak Cherian`_. v0.1.5 ====== + +- Begin documenting things for contributors in :ref:`contribut`. +- Parse ``ancillary_variables`` attribute. These variables are converted to coordinate variables. +- Support :py:meth:`Dataset.reset_index` - Wrap ``.sizes`` and ``.chunks``. (:pr:`42`) `Deepak Cherian`_. >>> ds.cf.sizes {'X': 53, 'Y': 25, 'T': 2920, 'longitude': 53, 'latitude': 25, 'time': 2920} -- Begin documenting things for contributors in :ref:`contribut`. -- Parse ``ancillary_variables`` attribute. These variables are converted to coordinate variables. -- Support ``reset_index`` v0.1.4 ====== diff --git a/setup.cfg b/setup.cfg index 742d8f27..2eeb679b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -116,6 +116,6 @@ test = pytest nobeep = True [rstcheck] -ignore_roles=pr,issue,meth +ignore_roles=pr,issue,py:meth,py:attr ignore_directives=ipython,autodata,csv-table ignore_messages=(is not referenced\.$) From 68cc78e4bdfccb5eb368515d5cee98d49ba54659 Mon Sep 17 00:00:00 2001 From: dcherian Date: Sat, 30 Jan 2021 19:27:37 -0700 Subject: [PATCH 12/57] Update pre-commit --- .pre-commit-config.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ad688bd9..4af86d7a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,7 +7,7 @@ repos: # isort should run before black as black sometimes tweaks the isort output - repo: https://github.com/timothycrosley/isort - rev: 5.6.4 + rev: 5.7.0 hooks: - id: isort files: .+\.py$ @@ -19,12 +19,12 @@ repos: - id: black - repo: https://gitlab.com/pycqa/flake8 - rev: 3.8.4 + rev: 3.8.3 hooks: - id: flake8 - repo: https://github.com/pre-commit/mirrors-mypy - rev: v0.790 # Must match ci/requirements/*.yml + rev: v0.800 # Must match ci/requirements/*.yml hooks: - id: mypy From 654a709a169cf7fe0632aa670a1418461e66abfd Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 2 Feb 2021 09:53:36 -0700 Subject: [PATCH 13/57] docs fixes (#154) --- cf_xarray/accessor.py | 81 +++++++++++++++++++++--- cf_xarray/{tests => }/datasets.py | 0 cf_xarray/tests/test_accessor.py | 6 +- cf_xarray/tests/test_helpers.py | 2 +- doc/api.rst | 50 +++++++++++---- doc/criteria.rst | 2 + doc/examples/introduction.ipynb | 102 ++++++++---------------------- doc/whats-new.rst | 2 +- setup.cfg | 1 + 9 files changed, 142 insertions(+), 104 deletions(-) rename cf_xarray/{tests => }/datasets.py (100%) diff --git a/cf_xarray/accessor.py b/cf_xarray/accessor.py index 20ec811b..ac6dbe78 100644 --- a/cf_xarray/accessor.py +++ b/cf_xarray/accessor.py @@ -561,11 +561,6 @@ def _getitem( kind = str(type(obj).__name__) scalar_key = isinstance(key, str) - if isinstance(obj, DataArray) and not scalar_key: - raise KeyError( - f"Cannot use a list of keys with DataArrays. Expected a single string. Received {key!r} instead." - ) - if scalar_key: key = (key,) # type: ignore @@ -1172,9 +1167,6 @@ def get_associated_variable_names(self, name: Hashable) -> Dict[str, List[str]]: return coords - def __getitem__(self, key: Union[str, List[str]]): - return _getitem(self, key) - def _maybe_to_dataset(self, obj=None) -> Dataset: if obj is None: obj = self._obj @@ -1283,6 +1275,37 @@ def guess_coord_axis(self, verbose: bool = False) -> Union[DataArray, Dataset]: @xr.register_dataset_accessor("cf") class CFDatasetAccessor(CFAccessor): + def __getitem__(self, key: Union[str, List[str]]) -> Union[DataArray, Dataset]: + """ + Index into a Dataset making use of CF attributes. + + Parameters + ---------- + + key: str, Iterable[str], optional + One of + - axes names: "X", "Y", "Z", "T" + - coordinate names: "longitude", "latitude", "vertical", "time" + - cell measures: "area", "volume", or other names present in the \ + ``cell_measures`` attribute + - standard names: names present in ``standard_name`` attribute + + Returns + ------- + DataArray or Dataset + ``Dataset.cf[str]`` will return a DataArray, \ + ``Dataset.cf[List[str]]``` will return a Dataset. + + Notes + ----- + In all cases, associated CF variables will be attached as coordinate variables + by parsing attributes such as ``bounds``, ``ancillary_variables``, etc. + + ``bounds`` variables will not be attached when a DataArray is returned. This + is a limitation of the xarray data model. + """ + return _getitem(self, key) + def get_bounds(self, key: str) -> DataArray: """ Get bounds variable corresponding to key. @@ -1317,9 +1340,12 @@ def add_bounds(self, dims: Union[Hashable, Iterable[Hashable]]): ------- DataArray or Dataset with bounds variables added and appropriate "bounds" attribute set. + Raises + ------ + KeyError + Notes ----- - The bounds variables are automatically named f"{dim}_bounds" where ``dim`` is a dimension name. """ @@ -1511,4 +1537,41 @@ def decode_vertical_coords(self, prefix="z"): @xr.register_dataarray_accessor("cf") class CFDataArrayAccessor(CFAccessor): + def __getitem__(self, key: Union[str, List[str]]) -> DataArray: + """ + Index into a DataArray making use of CF attributes. + + Parameters + ---------- + key: str, Iterable[str], optional + One of + - axes names: "X", "Y", "Z", "T" + - coordinate names: "longitude", "latitude", "vertical", "time" + - cell measures: "area", "volume", or other names present in the \ + ``cell_measures`` attribute + - standard names: names present in ``standard_name`` attribute of \ + coordinate variables + + Returns + ------- + DataArray + + Raises + ------ + KeyError + ``DataArray.cf[List[str]]`` will raise KeyError. + + Notes + ----- + Associated CF variables will be attached as coordinate variables + by parsing attributes such as ``cell_measures``, ``coordinates`` etc. + """ + + if not isinstance(key, str): + raise KeyError( + f"Cannot use a list of keys with DataArrays. Expected a single string. Received {key!r} instead." + ) + + return _getitem(self, key) + pass diff --git a/cf_xarray/tests/datasets.py b/cf_xarray/datasets.py similarity index 100% rename from cf_xarray/tests/datasets.py rename to cf_xarray/datasets.py diff --git a/cf_xarray/tests/test_accessor.py b/cf_xarray/tests/test_accessor.py index 26099fdf..216e6058 100644 --- a/cf_xarray/tests/test_accessor.py +++ b/cf_xarray/tests/test_accessor.py @@ -9,8 +9,8 @@ import cf_xarray # noqa +from ..datasets import airds, anc, ds_no_attrs, multiple, popds, romsds from . import raise_if_dask_computes -from .datasets import airds, anc, ds_no_attrs, multiple, popds mpl.use("Agg") @@ -586,7 +586,7 @@ def test_missing_variable_in_coordinates(): def test_Z_vs_vertical_ROMS(): - from .datasets import romsds + from ..datasets import romsds assert_identical(romsds.s_rho.reset_coords(drop=True), romsds.temp.cf["Z"]) assert_identical( @@ -612,8 +612,6 @@ def test_Z_vs_vertical_ROMS(): def test_param_vcoord_ocean_s_coord(): - from .datasets import romsds - romsds.s_rho.attrs["standard_name"] = "ocean_s_coordinate_g2" Zo_rho = (romsds.hc * romsds.s_rho + romsds.Cs_r * romsds.h) / ( romsds.hc + romsds.h diff --git a/cf_xarray/tests/test_helpers.py b/cf_xarray/tests/test_helpers.py index 046d7b8a..3fc7c3e9 100644 --- a/cf_xarray/tests/test_helpers.py +++ b/cf_xarray/tests/test_helpers.py @@ -3,7 +3,7 @@ import cf_xarray as cfxr # noqa -from .datasets import airds, mollwds +from ..datasets import airds, mollwds try: from dask.array import Array as DaskArray diff --git a/doc/api.rst b/doc/api.rst index 556ba413..381e3cb4 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -1,11 +1,28 @@ -.. currentmodule:: xarray - API === +.. currentmodule:: cf_xarray + +Top-level API +------------- + +.. autosummary:: + :toctree: generated/ + + bounds_to_vertices + vertices_to_bounds + + +.. currentmodule:: xarray + DataArray --------- +.. _daattr: + +Attributes +~~~~~~~~~~ + .. autosummary:: :toctree: generated/ :template: autosummary/accessor_attribute.rst @@ -16,6 +33,12 @@ DataArray DataArray.cf.standard_names DataArray.cf.plot + +.. _dameth: + +Methods +~~~~~~~ + .. autosummary:: :toctree: generated/ :template: autosummary/accessor_method.rst @@ -29,6 +52,11 @@ DataArray Dataset ------- +.. _dsattr: + +Attributes +~~~~~~~~~~ + .. autosummary:: :toctree: generated/ :template: autosummary/accessor_attribute.rst @@ -38,11 +66,16 @@ Dataset Dataset.cf.coordinates Dataset.cf.standard_names +.. _dsmeth: + +Methods +~~~~~~~ + .. autosummary:: :toctree: generated/ :template: autosummary/accessor_method.rst - DataArray.cf.__getitem__ + Dataset.cf.__getitem__ Dataset.cf.add_bounds Dataset.cf.bounds_to_vertices Dataset.cf.decode_vertical_coords @@ -51,14 +84,3 @@ Dataset Dataset.cf.guess_coord_axis Dataset.cf.keys Dataset.cf.rename_like - -.. currentmodule:: cf_xarray - -Top-level API -------------- - -.. autosummary:: - :toctree: generated/ - - bounds_to_vertices - vertices_to_bounds diff --git a/doc/criteria.rst b/doc/criteria.rst index 4e683ed3..7aca1e39 100644 --- a/doc/criteria.rst +++ b/doc/criteria.rst @@ -1,5 +1,7 @@ .. currentmodule:: xarray +.. _criteria: + CF Criteria ----------- diff --git a/doc/examples/introduction.ipynb b/doc/examples/introduction.ipynb index 121c3c12..e598edc3 100644 --- a/doc/examples/introduction.ipynb +++ b/doc/examples/introduction.ipynb @@ -20,7 +20,7 @@ }, "outputs": [], "source": [ - "import cf_xarray\n", + "import cf_xarray as cfxr\n", "import numpy as np\n", "import xarray as xr" ] @@ -68,48 +68,8 @@ }, "outputs": [], "source": [ - "pop = xr.Dataset()\n", - "\n", - "# set 2D coordinate variables as latitude, longitude\n", - "pop.coords[\"TLONG\"] = (\n", - " (\"nlat\", \"nlon\"),\n", - " np.ones((20, 30)),\n", - " {\"units\": \"degrees_east\"},\n", - ")\n", - "pop.coords[\"TLAT\"] = (\n", - " (\"nlat\", \"nlon\"),\n", - " 2 * np.ones((20, 30)),\n", - " {\"units\": \"degrees_north\"},\n", - ")\n", - "pop.coords[\"ULONG\"] = (\n", - " (\"nlat\", \"nlon\"),\n", - " 0.5 * np.ones((20, 30)),\n", - " {\"units\": \"degrees_east\"},\n", - ")\n", - "pop.coords[\"ULAT\"] = (\n", - " (\"nlat\", \"nlon\"),\n", - " 2.5 * np.ones((20, 30)),\n", - " {\"units\": \"degrees_north\"},\n", - ")\n", + "from cf_xarray.datasets import popds as pop\n", "\n", - "# set dimensions as X, Y\n", - "pop[\"nlon\"] = (\"nlon\", np.arange(pop.sizes[\"nlon\"]), {\"axis\": \"X\"})\n", - "pop[\"nlat\"] = (\"nlat\", np.arange(pop.sizes[\"nlat\"]), {\"axis\": \"Y\"})\n", - "\n", - "# actual data vriables with coordinates attribute set\n", - "pop[\"UVEL\"] = (\n", - " (\"nlat\", \"nlon\"),\n", - " np.ones((20, 30)) * 15,\n", - " {\"coordinates\": \"ULONG ULAT\", \"standard_name\": \"sea_water_x_velocity\"},\n", - ")\n", - "pop[\"TEMP\"] = (\n", - " (\"nlat\", \"nlon\"),\n", - " np.ones((20, 30)) * 15,\n", - " {\n", - " \"coordinates\": \"TLONG TLAT\",\n", - " \"standard_name\": \"sea_water_potential_temperature\",\n", - " },\n", - ")\n", "pop" ] }, @@ -132,17 +92,18 @@ }, "outputs": [], "source": [ - "multiple = xr.Dataset()\n", - "multiple.coords[\"x1\"] = (\"x1\", range(30), {\"axis\": \"X\"})\n", - "multiple.coords[\"y1\"] = (\"y1\", range(20), {\"axis\": \"Y\"})\n", - "multiple.coords[\"x2\"] = (\"x2\", range(10), {\"axis\": \"X\"})\n", - "multiple.coords[\"y2\"] = (\"y2\", range(5), {\"axis\": \"Y\"})\n", + "from cf_xarray.datasets import multiple\n", "\n", - "multiple[\"v1\"] = ((\"x1\", \"y1\"), np.ones((30, 20)) * 15)\n", - "multiple[\"v2\"] = ((\"x2\", \"y2\"), np.ones((10, 5)) * 15)\n", "multiple" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This dataset has ancillary variables\n" + ] + }, { "cell_type": "code", "execution_count": null, @@ -154,29 +115,8 @@ }, "outputs": [], "source": [ - "# This dataset has ancillary variables\n", + "from cf_xarray.datasets import anc\n", "\n", - "anc = xr.Dataset()\n", - "anc[\"q\"] = (\n", - " (\"x\", \"y\"),\n", - " np.random.randn(10, 20),\n", - " dict(\n", - " standard_name=\"specific_humidity\",\n", - " units=\"g/g\",\n", - " ancillary_variables=\"q_error_limit q_detection_limit\",\n", - " ),\n", - ")\n", - "anc[\"q_error_limit\"] = (\n", - " (\"x\", \"y\"),\n", - " np.random.randn(10, 20),\n", - " dict(standard_name=\"specific_humidity standard_error\", units=\"g/g\"),\n", - ")\n", - "anc[\"q_detection_limit\"] = xr.DataArray(\n", - " 1e-3,\n", - " attrs=dict(\n", - " standard_name=\"specific_humidity detection_minimum\", units=\"g/g\"\n", - " ),\n", - ")\n", "anc" ] }, @@ -465,6 +405,13 @@ "pop.cf[[\"sea_water_potential_temperature\", \"UVEL\"]]" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that ancillary variables are included as coordinate variables\n" + ] + }, { "cell_type": "code", "execution_count": null, @@ -988,15 +935,20 @@ "ds_bnds" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can also convert each bounds variable independently with the top-level\n", + "functions\n" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "# We can convert each bounds variable independently with the helper:\n", - "import cf_xarray as cfxr\n", - "\n", "lat_bounds = ds_bnds.cf.get_bounds(\"latitude\")\n", "\n", "lat_vertices = cfxr.bounds_to_vertices(lat_bounds, bounds_dim=\"bounds\")\n", @@ -1031,7 +983,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.7.8" }, "toc": { "base_numbering": 1, diff --git a/doc/whats-new.rst b/doc/whats-new.rst index f73d1672..597455ce 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -6,7 +6,7 @@ What's New v0.4.1 (unreleased) =================== -- Added scripts to document CF criteria with tables. By `Mattia Almansi`_. +- Added scripts to document :ref:`criteria` with tables. By `Mattia Almansi`_. - Support for ``.drop()``, ``.drop_vars()``, ``.drop_sel()``, ``.drop_dims()``, ``.set_coords()``, ``.reset_coords()``. By `Mattia Almansi`_. - Support for using ``standard_name`` in more functions. (:pr:`128`) By `Deepak Cherian`_ - Allow :py:meth:`DataArray.cf.__getitem__` with standard names. By `Deepak Cherian`_ diff --git a/setup.cfg b/setup.cfg index 2eeb679b..2236d52d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -116,6 +116,7 @@ test = pytest nobeep = True [rstcheck] +report=warning ignore_roles=pr,issue,py:meth,py:attr ignore_directives=ipython,autodata,csv-table ignore_messages=(is not referenced\.$) From 9d32c1807af4e49413b83360e2e2627e487b6257 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 2 Feb 2021 10:25:58 -0700 Subject: [PATCH 14/57] Automatically choose x,y for plots (#148) --- cf_xarray/accessor.py | 81 ++++++++++++++++++++++++-------- cf_xarray/tests/test_accessor.py | 67 ++++++++++++++++++++++++-- doc/whats-new.rst | 1 + 3 files changed, 126 insertions(+), 23 deletions(-) diff --git a/cf_xarray/accessor.py b/cf_xarray/accessor.py index ac6dbe78..3be768ac 100644 --- a/cf_xarray/accessor.py +++ b/cf_xarray/accessor.py @@ -655,6 +655,42 @@ def check_results(names, k): ) +def _possible_x_y_plot(obj, key): + """Guesses a name for an x/y variable if possible.""" + # in priority order + x_criteria = [ + ("coordinates", "longitude"), + ("axes", "X"), + ("coordinates", "time"), + ("axes", "T"), + ] + y_criteria = [ + ("coordinates", "vertical"), + ("axes", "Z"), + ("coordinates", "latitude"), + ("axes", "Y"), + ] + + def _get_possible(accessor, criteria): + # is_scalar depends on NON_NUMPY_SUPPORTED_TYPES + # importing a private function seems better than + # maintaining that variable! + from xarray.core.utils import is_scalar + + for attr, key in criteria: + value = getattr(accessor, attr).get(key) + if not value or len(value) > 1: + continue + if not is_scalar(accessor._obj[value[0]]): + return value[0] + return None + + if key == "x": + return _get_possible(obj.cf, x_criteria) + elif key == "y": + return _get_possible(obj.cf, y_criteria) + + class _CFWrappedClass: """ This class is used to wrap any class in _WRAPPED_CLASSES. @@ -705,27 +741,34 @@ def _plot_decorator(self, func): @functools.wraps(func) def _plot_wrapper(*args, **kwargs): - if "x" in kwargs: - if kwargs["x"] in valid_keys: - xvar = self.accessor[kwargs["x"]] - else: - xvar = self._obj[kwargs["x"]] - if "positive" in xvar.attrs: - if xvar.attrs["positive"] == "down": - kwargs.setdefault("xincrease", False) - else: - kwargs.setdefault("xincrease", True) + def _process_x_or_y(kwargs, key): + if key not in kwargs: + kwargs[key] = _possible_x_y_plot(self._obj, key) - if "y" in kwargs: - if kwargs["y"] in valid_keys: - yvar = self.accessor[kwargs["y"]] - else: - yvar = self._obj[kwargs["y"]] - if "positive" in yvar.attrs: - if yvar.attrs["positive"] == "down": - kwargs.setdefault("yincrease", False) + value = kwargs.get(key) + if value: + if value in valid_keys: + var = self.accessor[value] else: - kwargs.setdefault("yincrease", True) + var = self._obj[value] + if "positive" in var.attrs: + if var.attrs["positive"] == "down": + kwargs.setdefault(f"{key}increase", False) + else: + kwargs.setdefault(f"{key}increase", True) + return kwargs + + is_line_plot = (func.__name__ == "line") or ( + func.__name__ == "wrapper" and kwargs.get("hue") + ) + if is_line_plot: + if not kwargs.get("hue"): + kwargs = _process_x_or_y(kwargs, "x") + if not kwargs.get("x"): + kwargs = _process_x_or_y(kwargs, "y") + else: + kwargs = _process_x_or_y(kwargs, "x") + kwargs = _process_x_or_y(kwargs, "y") return func(*args, **kwargs) diff --git a/cf_xarray/tests/test_accessor.py b/cf_xarray/tests/test_accessor.py index 216e6058..4fee67e5 100644 --- a/cf_xarray/tests/test_accessor.py +++ b/cf_xarray/tests/test_accessor.py @@ -293,15 +293,20 @@ def test_dataarray_getitem(): assert_identical(air.cf["area_grid_cell"], air.cell_area.reset_coords(drop=True)) -@pytest.mark.parametrize("obj", dataarrays) -def test_dataarray_plot(obj): +def test_dataarray_plot(): + + obj = airds.air - rv = obj.isel(time=1).cf.plot(x="X", y="Y") + rv = obj.isel(time=1).transpose("lon", "lat").cf.plot() assert isinstance(rv, mpl.collections.QuadMesh) + assert all(v > 180 for v in rv.axes.get_xlim()) + assert all(v < 200 for v in rv.axes.get_ylim()) plt.close() - rv = obj.isel(time=1).cf.plot.contourf(x="X", y="Y") + rv = obj.isel(time=1).transpose("lon", "lat").cf.plot.contourf() assert isinstance(rv, mpl.contour.QuadContourSet) + assert all(v > 180 for v in rv.axes.get_xlim()) + assert all(v < 200 for v in rv.axes.get_ylim()) plt.close() rv = obj.cf.plot(x="X", y="Y", col="T") @@ -316,6 +321,29 @@ def test_dataarray_plot(obj): assert all([isinstance(line, mpl.lines.Line2D) for line in rv]) plt.close() + # set y automatically + rv = obj.isel(time=0, lon=1).cf.plot.line() + np.testing.assert_equal(rv[0].get_ydata(), obj.lat.data) + plt.close() + + # don't set y automatically + rv = obj.isel(time=0, lon=1).cf.plot.line(x="lat") + np.testing.assert_equal(rv[0].get_xdata(), obj.lat.data) + plt.close() + + # various line plots and automatic guessing + rv = obj.cf.isel(T=1, Y=[0, 1, 2]).cf.plot.line() + np.testing.assert_equal(rv[0].get_xdata(), obj.lon.data) + plt.close() + + # rv = obj.cf.isel(T=1, Y=[0, 1, 2]).cf.plot(hue="Y") + # np.testing.assert_equal(rv[0].get_xdata(), obj.lon.data) + # plt.close() + + rv = obj.cf.isel(T=1, Y=[0, 1, 2]).cf.plot.line() + np.testing.assert_equal(rv[0].get_xdata(), obj.lon.data) + plt.close() + obj = obj.copy(deep=True) obj.time.attrs.clear() rv = obj.cf.plot(x="X", y="Y", col="time") @@ -714,3 +742,34 @@ def test_drop_dims(ds): # Axis and coordinate for cf_name in ["X", "longitude"]: assert_identical(ds.drop_dims("lon"), ds.cf.drop_dims(cf_name)) + + +def test_possible_x_y_plot(): + from ..accessor import _possible_x_y_plot + + # choose axes + assert _possible_x_y_plot(airds.air.isel(time=1), "x") == "lon" + assert _possible_x_y_plot(airds.air.isel(time=1), "y") == "lat" + assert _possible_x_y_plot(airds.air.isel(lon=1), "y") == "lat" + assert _possible_x_y_plot(airds.air.isel(lon=1), "x") == "time" + + # choose coordinates over axes + assert _possible_x_y_plot(popds.UVEL, "x") == "ULONG" + assert _possible_x_y_plot(popds.UVEL, "y") == "ULAT" + assert _possible_x_y_plot(popds.TEMP, "x") == "TLONG" + assert _possible_x_y_plot(popds.TEMP, "y") == "TLAT" + + assert _possible_x_y_plot(popds.UVEL.drop_vars("ULONG"), "x") == "nlon" + + # choose X over T, Y over Z + def makeds(*dims): + coords = {dim: (dim, np.arange(3), {"axis": dim}) for dim in dims} + return xr.DataArray(np.zeros((3, 3)), dims=dims, coords=coords) + + yzds = makeds("Y", "Z") + assert _possible_x_y_plot(yzds, "y") == "Z" + assert _possible_x_y_plot(yzds, "x") is None + + xtds = makeds("X", "T") + assert _possible_x_y_plot(xtds, "y") is None + assert _possible_x_y_plot(xtds, "x") == "X" diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 597455ce..04e92161 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -6,6 +6,7 @@ What's New v0.4.1 (unreleased) =================== +- Automatically set ``x`` or ``y`` for :py:attr:`DataArray.cf.plot`. By `Deepak Cherian`_. - Added scripts to document :ref:`criteria` with tables. By `Mattia Almansi`_. - Support for ``.drop()``, ``.drop_vars()``, ``.drop_sel()``, ``.drop_dims()``, ``.set_coords()``, ``.reset_coords()``. By `Mattia Almansi`_. - Support for using ``standard_name`` in more functions. (:pr:`128`) By `Deepak Cherian`_ From d73d9f5158fe01d1135635cc043162fbc8d63750 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 2 Feb 2021 10:35:54 -0700 Subject: [PATCH 15/57] Add standard name mapper in more places. (#151) --- cf_xarray/accessor.py | 43 ++++++++------- cf_xarray/datasets.py | 92 ++++++++++++++++++++++++++++++++ cf_xarray/tests/test_accessor.py | 17 +++++- 3 files changed, 131 insertions(+), 21 deletions(-) diff --git a/cf_xarray/accessor.py b/cf_xarray/accessor.py index 3be768ac..26e19e9d 100644 --- a/cf_xarray/accessor.py +++ b/cf_xarray/accessor.py @@ -213,9 +213,7 @@ def _get_axis_coord_single(var: Union[DataArray, Dataset], key: str) -> List[str return results -def _get_axis_coord_time_accessor( - var: Union[DataArray, Dataset], key: str -) -> List[str]: +def _get_groupby_time_accessor(var: Union[DataArray, Dataset], key: str) -> List[str]: """ Helper method for when our key name is of the nature "T.month" and we want to isolate the "T" for coordinate mapping @@ -238,7 +236,11 @@ def _get_axis_coord_time_accessor( if "." in key: key, ext = key.split(".", 1) - results = _get_axis_coord_single(var, key) + results = apply_mapper( + (_get_axis_coord, _get_with_standard_name), var, key, error=False + ) + if len(results) > 1: + raise KeyError(f"Multiple results received for {key}.") return [v + "." + ext for v in results] else: @@ -370,34 +372,34 @@ def _get_with_standard_name( #: Default mappers for common keys. _DEFAULT_KEY_MAPPERS: Mapping[str, Tuple[Mapper, ...]] = { - "dim": (_get_axis_coord,), - "dims": (_get_axis_coord,), # transpose - "drop_dims": (_get_axis_coord,), # drop_dims - "dimensions": (_get_axis_coord,), # stack - "dims_dict": (_get_axis_coord,), # swap_dims, rename_dims - "shifts": (_get_axis_coord,), # shift, roll - "pad_width": (_get_axis_coord,), # shift, roll + "dim": (_get_axis_coord, _get_with_standard_name), + "dims": (_get_axis_coord, _get_with_standard_name), # transpose + "drop_dims": (_get_axis_coord, _get_with_standard_name), # drop_dims + "dimensions": (_get_axis_coord, _get_with_standard_name), # stack + "dims_dict": (_get_axis_coord, _get_with_standard_name), # swap_dims, rename_dims + "shifts": (_get_axis_coord, _get_with_standard_name), # shift, roll + "pad_width": (_get_axis_coord, _get_with_standard_name), # shift, roll "names": ( _get_axis_coord, _get_measure, _get_with_standard_name, ), # set_coords, reset_coords, drop_vars "labels": (_get_axis_coord, _get_measure, _get_with_standard_name), # drop - "coords": (_get_axis_coord,), # interp - "indexers": (_get_axis_coord,), # sel, isel, reindex + "coords": (_get_axis_coord, _get_with_standard_name), # interp + "indexers": (_get_axis_coord, _get_with_standard_name), # sel, isel, reindex # "indexes": (_get_axis_coord,), # set_index - "dims_or_levels": (_get_axis_coord,), # reset_index - "window": (_get_axis_coord,), # rolling_exp + "dims_or_levels": (_get_axis_coord, _get_with_standard_name), # reset_index + "window": (_get_axis_coord, _get_with_standard_name), # rolling_exp "coord": (_get_axis_coord_single,), # differentiate, integrate "group": ( _get_axis_coord_single, - _get_axis_coord_time_accessor, + _get_groupby_time_accessor, _get_with_standard_name, ), "indexer": (_get_axis_coord_single,), # resample "variables": (_get_axis_coord, _get_with_standard_name), # sortby "weights": (_get_measure_variable,), # type: ignore - "chunks": (_get_axis_coord,), # chunk + "chunks": (_get_axis_coord, _get_with_standard_name), # chunk } @@ -430,7 +432,7 @@ def _build_docstring(func): mapper_docstrings = { _get_axis_coord: f"One or more of {(_AXIS_NAMES + _COORD_NAMES)!r}", _get_axis_coord_single: f"One of {(_AXIS_NAMES + _COORD_NAMES)!r}", - _get_axis_coord_time_accessor: "Time variable accessor e.g. 'T.month'", + _get_groupby_time_accessor: "Time variable accessor e.g. 'T.month'", _get_with_standard_name: "Standard names", _get_measure_variable: f"One of {_CELL_MEASURES!r}", } @@ -900,7 +902,10 @@ def _rewrite_values( # allow multiple return values here. # these are valid for .sel, .isel, .coarsen - all_mappers = ChainMap(key_mappers, dict.fromkeys(var_kws, (_get_axis_coord,))) + all_mappers = ChainMap( + key_mappers, + dict.fromkeys(var_kws, (_get_axis_coord, _get_with_standard_name)), + ) for key in set(all_mappers) & set(kwargs): value = kwargs[key] diff --git a/cf_xarray/datasets.py b/cf_xarray/datasets.py index bc8f46c0..4e9e6c7b 100644 --- a/cf_xarray/datasets.py +++ b/cf_xarray/datasets.py @@ -188,3 +188,95 @@ lat_vertices=xr.DataArray(lat_vertices, dims=("x_vertices", "y_vertices")), ), ) + +forecast = xr.decode_cf( + xr.Dataset.from_dict( + { + "coords": { + "L": { + "dims": ("L",), + "attrs": { + "long_name": "Lead", + "standard_name": "forecast_period", + "pointwidth": 1.0, + "gridtype": 0, + "units": "months", + }, + "data": [0, 1], + }, + "M": { + "dims": ("M",), + "attrs": { + "standard_name": "realization", + "long_name": "Ensemble Member", + "pointwidth": 1.0, + "gridtype": 0, + "units": "unitless", + }, + "data": [0, 1, 2], + }, + "S": { + "dims": ("S",), + "attrs": { + "calendar": "360_day", + "long_name": "Forecast Start Time", + "standard_name": "forecast_reference_time", + "pointwidth": 0, + "gridtype": 0, + "units": "months since 1960-01-01", + }, + "data": [0, 1, 2, 3], + }, + "X": { + "dims": ("X",), + "attrs": { + "standard_name": "longitude", + "pointwidth": 1.0, + "gridtype": 1, + "units": "degree_east", + }, + "data": [0, 1, 2, 3, 4], + }, + "Y": { + "dims": ("Y",), + "attrs": { + "standard_name": "latitude", + "pointwidth": 1.0, + "gridtype": 0, + "units": "degree_north", + }, + "data": [0, 1, 2, 3, 4, 5], + }, + }, + "attrs": {"Conventions": "IRIDL"}, + "dims": {"L": 2, "M": 3, "S": 4, "X": 5, "Y": 6}, + "data_vars": { + "sst": { + "dims": ("S", "L", "M", "Y", "X"), + "attrs": { + "pointwidth": 0, + "PDS_TimeRange": 3, + "center": "US Weather Service - National Met. Center", + "grib_name": "TMP", + "gribNumBits": 21, + "gribcenter": 7, + "gribparam": 11, + "gribleveltype": 1, + "GRIBgridcode": 3, + "process": 'Spectral Statistical Interpolation (SSI) analysis from "Final" run.', + "PTVersion": 2, + "gribfield": 1, + "units": "Celsius_scale", + "scale_min": -69.97389221191406, + "scale_max": 43.039306640625, + "long_name": "Sea Surface Temperature", + "standard_name": "sea_surface_temperature", + }, + "data": np.arange(np.prod((4, 2, 3, 6, 5))).reshape( + (4, 2, 3, 6, 5) + ), + } + }, + } + ) +) diff --git a/cf_xarray/tests/test_accessor.py b/cf_xarray/tests/test_accessor.py index 4fee67e5..fcf575ad 100644 --- a/cf_xarray/tests/test_accessor.py +++ b/cf_xarray/tests/test_accessor.py @@ -9,7 +9,7 @@ import cf_xarray # noqa -from ..datasets import airds, anc, ds_no_attrs, multiple, popds, romsds +from ..datasets import airds, anc, ds_no_attrs, forecast, multiple, popds, romsds from . import raise_if_dask_computes mpl.use("Agg") @@ -163,7 +163,6 @@ def test_rename_like(): reason="xarray GH4120. any test after this will fail since attrs are lost" ), ), - # groupby("time.day")? ), ) def test_wrapped_classes(obj, attr, xrkwargs, cfkwargs): @@ -744,6 +743,20 @@ def test_drop_dims(ds): assert_identical(ds.drop_dims("lon"), ds.cf.drop_dims(cf_name)) +def test_new_standard_name_mappers(): + assert_identical(forecast.cf.mean("realization"), forecast.mean("M")) + assert_identical( + forecast.cf.mean(["realization", "forecast_period"]), forecast.mean(["M", "L"]) + ) + assert_identical(forecast.cf.chunk({"realization": 1}), forecast.chunk({"M": 1})) + assert_identical(forecast.cf.isel({"realization": 1}), forecast.isel({"M": 1})) + assert_identical(forecast.cf.isel(**{"realization": 1}), forecast.isel(**{"M": 1})) + assert_identical( + forecast.cf.groupby("forecast_reference_time.month").mean(), + forecast.groupby("S.month").mean(), + ) + + def test_possible_x_y_plot(): from ..accessor import _possible_x_y_plot From 36a9b01994362753577cdc17eec1f55bb89bf3b2 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 3 Feb 2021 09:00:47 -0700 Subject: [PATCH 16/57] Support GroupBy iteration and arithmetic (#158) Fixes #157 --- cf_xarray/accessor.py | 6 +++++- cf_xarray/tests/test_accessor.py | 15 +++++++++++++++ doc/whats-new.rst | 1 + 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/cf_xarray/accessor.py b/cf_xarray/accessor.py index 26e19e9d..56617c4d 100644 --- a/cf_xarray/accessor.py +++ b/cf_xarray/accessor.py @@ -19,6 +19,7 @@ import xarray as xr from xarray import DataArray, Dataset +from xarray.core.arithmetic import SupportsArithmetic from .helpers import bounds_to_vertices from .utils import _is_datetime_like, invert_mappings, parse_cell_methods_attr @@ -693,7 +694,7 @@ def _get_possible(accessor, criteria): return _get_possible(obj.cf, y_criteria) -class _CFWrappedClass: +class _CFWrappedClass(SupportsArithmetic): """ This class is used to wrap any class in _WRAPPED_CLASSES. """ @@ -721,6 +722,9 @@ def __getattr__(self, attr): key_mappers=_DEFAULT_KEY_MAPPERS, ) + def __iter__(self): + return iter(self.wrapped) + class _CFWrappedPlotMethods: """ diff --git a/cf_xarray/tests/test_accessor.py b/cf_xarray/tests/test_accessor.py index fcf575ad..0901874a 100644 --- a/cf_xarray/tests/test_accessor.py +++ b/cf_xarray/tests/test_accessor.py @@ -786,3 +786,18 @@ def makeds(*dims): xtds = makeds("X", "T") assert _possible_x_y_plot(xtds, "y") is None assert _possible_x_y_plot(xtds, "x") == "X" + + +def test_groupby_special_ops(): + cfgrouped = airds.cf.groupby_bins("latitude", np.arange(20, 50, 10)) + grouped = airds.groupby_bins("lat", np.arange(20, 50, 10)) + + # __iter__ + for (label, group), (cflabel, cfgroup) in zip(grouped, cfgrouped): + assert label == cflabel + assert_identical(group, cfgroup) + + # arithmetic + expected = grouped - grouped.mean() + actual = grouped - cfgrouped.mean() + assert_identical(expected, actual) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 04e92161..8d518333 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -15,6 +15,7 @@ v0.4.1 (unreleased) by :py:meth:`Dataset.cf.__getitem__`. This allows extraction of DataArrays when there are clashes between DataArray names and "special" CF names like ``T``. (:issue:`129`, :pr:`130`). By `Deepak Cherian`_ +- Fix iteration and arithemtic with ``GroupBy`` objects. By `Deepak Cherian`_. v0.4.0 (Jan 22, 2021) ===================== From c41f2faa166597520bec5b9c362c74d1827e6b2a Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Wed, 3 Feb 2021 11:02:24 -0500 Subject: [PATCH 17/57] Add "get_bounds_dim_name" (#159) --- cf_xarray/accessor.py | 21 +++++++++++++++++++++ cf_xarray/tests/test_accessor.py | 20 +++++++++++++++++++- doc/api.rst | 1 + doc/whats-new.rst | 3 ++- 4 files changed, 43 insertions(+), 2 deletions(-) diff --git a/cf_xarray/accessor.py b/cf_xarray/accessor.py index 56617c4d..a6a90427 100644 --- a/cf_xarray/accessor.py +++ b/cf_xarray/accessor.py @@ -1378,6 +1378,27 @@ def get_bounds(self, key: str) -> DataArray: obj = self._maybe_to_dataset() return obj[bounds] + def get_bounds_dim_name(self, key: str) -> str: + """ + Get bounds dim name for variable corresponding to key. + + Parameters + ---------- + key : str + Name of variable whose bounds dimension name is desired. + + Returns + ------- + str + """ + crd = self[key] + bounds = self.get_bounds(key) + bounds_dims = set(bounds.dims) - set(crd.dims) + assert len(bounds_dims) == 1 + bounds_dim = bounds_dims.pop() + assert self._obj.sizes[bounds_dim] in [2, 4] + return bounds_dim + def add_bounds(self, dims: Union[Hashable, Iterable[Hashable]]): """ Returns a new object with bounds variables. The bounds values are guessed assuming diff --git a/cf_xarray/tests/test_accessor.py b/cf_xarray/tests/test_accessor.py index 0901874a..5e241470 100644 --- a/cf_xarray/tests/test_accessor.py +++ b/cf_xarray/tests/test_accessor.py @@ -9,7 +9,16 @@ import cf_xarray # noqa -from ..datasets import airds, anc, ds_no_attrs, forecast, multiple, popds, romsds +from ..datasets import ( + airds, + anc, + ds_no_attrs, + forecast, + mollwds, + multiple, + popds, + romsds, +) from . import raise_if_dask_computes mpl.use("Agg") @@ -522,6 +531,15 @@ def test_bounds_to_vertices(): assert "time_bounds" in dsc +def test_get_bounds_dim_name(): + ds = airds.copy(deep=True).cf.add_bounds("lat") + assert ds.cf.get_bounds_dim_name("latitude") == "bounds" + assert ds.cf.get_bounds_dim_name("lat") == "bounds" + + assert mollwds.cf.get_bounds_dim_name("longitude") == "bounds" + assert mollwds.cf.get_bounds_dim_name("lon") == "bounds" + + def test_docstring(): assert "One of ('X'" in airds.cf.groupby.__doc__ assert "One or more of ('X'" in airds.cf.mean.__doc__ diff --git a/doc/api.rst b/doc/api.rst index 381e3cb4..a03bfa60 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -81,6 +81,7 @@ Methods Dataset.cf.decode_vertical_coords Dataset.cf.describe Dataset.cf.get_bounds + Dataset.cf.get_bounds_dim_name Dataset.cf.guess_coord_axis Dataset.cf.keys Dataset.cf.rename_like diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 8d518333..9bd3ebae 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -15,7 +15,8 @@ v0.4.1 (unreleased) by :py:meth:`Dataset.cf.__getitem__`. This allows extraction of DataArrays when there are clashes between DataArray names and "special" CF names like ``T``. (:issue:`129`, :pr:`130`). By `Deepak Cherian`_ -- Fix iteration and arithemtic with ``GroupBy`` objects. By `Deepak Cherian`_. +- Retrieve bounds dimension name with :py:meth:`Dataset.cf.get_bounds_dim_name`. By `Pascal Bourgault`_. +- Fix iteration and arithmetic with ``GroupBy`` objects. By `Deepak Cherian`_. v0.4.0 (Jan 22, 2021) ===================== From ccd493907739e02b77e029316b8c012880585031 Mon Sep 17 00:00:00 2001 From: Mattia Almansi Date: Wed, 3 Feb 2021 19:42:20 +0000 Subject: [PATCH 18/57] replace describe with __repr__ (#150) * show coords std names * add star * introduce __repr__ * more compact * update doc * whats new * minor fix --- cf_xarray/accessor.py | 97 ++++++++++++++++++++------- cf_xarray/tests/test_accessor.py | 110 +++++++++++++++++++++++++------ doc/api.rst | 4 +- doc/examples/introduction.ipynb | 9 ++- doc/whats-new.rst | 1 + 5 files changed, 173 insertions(+), 48 deletions(-) diff --git a/cf_xarray/accessor.py b/cf_xarray/accessor.py index a6a90427..132f95dc 100644 --- a/cf_xarray/accessor.py +++ b/cf_xarray/accessor.py @@ -654,7 +654,7 @@ def check_results(names, k): except KeyError: raise KeyError( f"{kind}.cf does not understand the key {k!r}. " - f"Use {kind}.cf.describe() to see a list of key names that can be interpreted." + f"Use 'repr({kind}.cf)' (or '{kind}.cf' in a Jupyter environment) to see a list of key names that can be interpreted." ) @@ -997,27 +997,80 @@ def describe(self): """ Print a string repr to screen. """ - text = "Axes:\n" - axes = self.axes - for key in _AXIS_NAMES: - text += f"\t{key}: {axes[key] if key in axes else []}\n" - - text += "\nCoordinates:\n" - coords = self.coordinates - for key in _COORD_NAMES: - text += f"\t{key}: {coords[key] if key in coords else []}\n" - - text += "\nCell Measures:\n" - measures = self.cell_measures - for key in sorted(self._get_all_cell_measures()): - text += f"\t{key}: {measures[key] if key in measures else []}\n" - - text += "\nStandard Names:\n" - for key, value in sorted(self.standard_names.items()): - if key not in _COORD_NAMES: - text += f"\t{key}: {value}\n" - - print(text) + + warnings.warn( + "'obj.cf.describe()' will be removed in a future version. " + "Use instead 'repr(obj.cf)' or 'obj.cf' in a Jupyter environment.", + DeprecationWarning, + ) + print(repr(self)) + + def __repr__(self): + + coords = self._obj.coords + dims = self._obj.dims + + def make_text_section(subtitle, vardict, valid_values, valid_keys=None): + + star = " * " + tab = len(star) * " " + subtitle = f"- {subtitle}:" + + # Sort keys + if not valid_keys: + # Alphabetical order + vardict = {key: vardict[key] for key in sorted(vardict)} + else: + # Hardcoded order + vardict = {key: vardict[key] for key in valid_keys if key in vardict} + + # Keep only valid values (e.g., coords or data_vars) + vardict = { + key: set(value).intersection(valid_values) + for key, value in vardict.items() + if set(value).intersection(valid_values) + } + + # Star for keys with dims only, tab otherwise + rows = [ + f"{star if set(value) <= set(dims) else tab}{key}: {sorted(value)}" + for key, value in vardict.items() + ] + + # Add valid keys missing followed by n/a + if valid_keys: + missing_keys = [key for key in valid_keys if key not in vardict] + if missing_keys: + rows += [tab + ", ".join(missing_keys) + ": n/a"] + elif not rows: + rows = [tab + "n/a"] + + # Add subtitle to the first row, align other rows + rows = [ + "\n" + subtitle + row if i == 0 else len(subtitle) * " " + row + for i, row in enumerate(rows) + ] + + return "\n".join(rows) + "\n" + + text = "Coordinates:" + text += make_text_section("CF Axes", self.axes, coords, _AXIS_NAMES) + text += make_text_section( + "CF Coordinates", self.coordinates, coords, _COORD_NAMES + ) + text += make_text_section( + "Cell Measures", self.cell_measures, coords, _CELL_MEASURES + ) + text += make_text_section("Standard Names", self.standard_names, coords) + if isinstance(self._obj, Dataset): + data_vars = self._obj.data_vars + text += "\nData Variables:" + text += make_text_section( + "Cell Measures", self.cell_measures, data_vars, _CELL_MEASURES + ) + text += make_text_section("Standard Names", self.standard_names, data_vars) + + return text def get_valid_keys(self) -> Set[str]: diff --git a/cf_xarray/tests/test_accessor.py b/cf_xarray/tests/test_accessor.py index 5e241470..e5567ee6 100644 --- a/cf_xarray/tests/test_accessor.py +++ b/cf_xarray/tests/test_accessor.py @@ -1,3 +1,5 @@ +from textwrap import dedent + import matplotlib as mpl import numpy as np import pandas as pd @@ -35,17 +37,83 @@ def assert_dicts_identical(dict1, dict2): assert_identical(dict1[k], dict2[k]) -def test_describe(capsys): - airds.cf.describe() - actual = capsys.readouterr().out - expected = ( - "Axes:\n\tX: ['lon']\n\tY: ['lat']\n\tZ: []\n\tT: ['time']\n" - "\nCoordinates:\n\tlongitude: ['lon']\n\tlatitude: ['lat']" - "\n\tvertical: []\n\ttime: ['time']\n" - "\nCell Measures:\n\tarea: ['cell_area']\n\tvolume: []\n" - "\nStandard Names:\n\tair_temperature: ['air']\n\n" - ) - assert actual == expected +def test_repr(): + # Dataset. + # Stars: axes, coords, and std names + actual = airds.cf.__repr__() + expected = """\ + Coordinates: + - CF Axes: * X: ['lon'] + * Y: ['lat'] + * T: ['time'] + Z: n/a + + - CF Coordinates: * longitude: ['lon'] + * latitude: ['lat'] + * time: ['time'] + vertical: n/a + + - Cell Measures: area: ['cell_area'] + volume: n/a + + - Standard Names: * latitude: ['lat'] + * longitude: ['lon'] + * time: ['time'] + + Data Variables: + - Cell Measures: area, volume: n/a + + - Standard Names: air_temperature: ['air'] + """ + assert actual == dedent(expected) + + # DataArray (Coordinates section same as Dataset) + assert airds.cf.__repr__().startswith(airds["air"].cf.__repr__()) + actual = airds["air"].cf.__repr__() + expected = """\ + Coordinates: + - CF Axes: * X: ['lon'] + * Y: ['lat'] + * T: ['time'] + Z: n/a + + - CF Coordinates: * longitude: ['lon'] + * latitude: ['lat'] + * time: ['time'] + vertical: n/a + + - Cell Measures: area: ['cell_area'] + volume: n/a + + - Standard Names: * latitude: ['lat'] + * longitude: ['lon'] + * time: ['time'] + """ + assert actual == dedent(expected) + + # Empty Standard Names + actual = popds.cf.__repr__() + expected = """\ + Coordinates: + - CF Axes: * X: ['nlon'] + * Y: ['nlat'] + Z, T: n/a + + - CF Coordinates: longitude: ['TLONG', 'ULONG'] + latitude: ['TLAT', 'ULAT'] + vertical, time: n/a + + - Cell Measures: area, volume: n/a + + - Standard Names: n/a + + Data Variables: + - Cell Measures: area, volume: n/a + + - Standard Names: sea_water_potential_temperature: ['TEMP'] + sea_water_x_velocity: ['UVEL'] + """ + assert actual == dedent(expected) def test_axes(): @@ -68,7 +136,7 @@ def test_coordinates(): assert actual == expected -def test_cell_measures(capsys): +def test_cell_measures(): ds = airds.copy(deep=True) ds["foo"] = xr.DataArray(ds["cell_area"], attrs=dict(standard_name="foo_std_name")) ds["air"].attrs["cell_measures"] += " foo_measure: foo" @@ -84,13 +152,17 @@ def test_cell_measures(capsys): actual = ds.cf.cell_measures assert actual == expected - ds.cf.describe() - actual = capsys.readouterr().out - expected = ( - "\nCell Measures:\n\tarea: ['cell_area']\n\tfoo_measure: ['foo']\n\tvolume: ['foo']\n" - "\nStandard Names:\n\tair_temperature: ['air']\n\tfoo_std_name: ['foo']\n\n" - ) - assert actual.endswith(expected) + # Additional cell measure in repr + actual = ds.cf.__repr__() + expected = """\ + Data Variables: + - Cell Measures: volume: ['foo'] + area: n/a + + - Standard Names: air_temperature: ['air'] + foo_std_name: ['foo'] + """ + assert actual.endswith(dedent(expected)) def test_standard_names(): diff --git a/doc/api.rst b/doc/api.rst index a03bfa60..2f753fb2 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -44,7 +44,7 @@ Methods :template: autosummary/accessor_method.rst DataArray.cf.__getitem__ - DataArray.cf.describe + DataArray.cf.__repr__ DataArray.cf.guess_coord_axis DataArray.cf.keys DataArray.cf.rename_like @@ -76,10 +76,10 @@ Methods :template: autosummary/accessor_method.rst Dataset.cf.__getitem__ + Dataset.cf.__repr__ Dataset.cf.add_bounds Dataset.cf.bounds_to_vertices Dataset.cf.decode_vertical_coords - Dataset.cf.describe Dataset.cf.get_bounds Dataset.cf.get_bounds_dim_name Dataset.cf.guess_coord_axis diff --git a/doc/examples/introduction.ipynb b/doc/examples/introduction.ipynb index e598edc3..3a7c1c94 100644 --- a/doc/examples/introduction.ipynb +++ b/doc/examples/introduction.ipynb @@ -152,8 +152,7 @@ "`'X'` axis as being represented by the `lon` variable.\n", "\n", "It can also use the `standard_name` and `units` attributes to infer that `lon`\n", - "is \"Longitude\". To see variable names that `cf_xarray` can infer, use\n", - "`.cf.describe()`\n" + "is \"Longitude\". To see variable names that `cf_xarray` can infer, use `ds.cf`\n" ] }, { @@ -167,7 +166,7 @@ }, "outputs": [], "source": [ - "ds.cf.describe()" + "ds.cf" ] }, { @@ -190,7 +189,7 @@ }, "outputs": [], "source": [ - "pop.cf.describe()" + "pop.cf" ] }, { @@ -211,7 +210,7 @@ }, "outputs": [], "source": [ - "multiple.cf.describe()" + "multiple.cf" ] }, { diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 9bd3ebae..e6be6541 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -6,6 +6,7 @@ What's New v0.4.1 (unreleased) =================== +- Replace ``cf.describe()`` with :py:meth:`Dataset.cf.__repr__`. By `Mattia Almansi`_. - Automatically set ``x`` or ``y`` for :py:attr:`DataArray.cf.plot`. By `Deepak Cherian`_. - Added scripts to document :ref:`criteria` with tables. By `Mattia Almansi`_. - Support for ``.drop()``, ``.drop_vars()``, ``.drop_sel()``, ``.drop_dims()``, ``.set_coords()``, ``.reset_coords()``. By `Mattia Almansi`_. From b36645f50f620c4027b7e877ca3dafee9d0f53bb Mon Sep 17 00:00:00 2001 From: Mattia Almansi Date: Thu, 4 Feb 2021 10:42:05 +0000 Subject: [PATCH 19/57] fix cell measures __repr__ (#162) * fix cell measures * sort measures * better comments --- cf_xarray/accessor.py | 21 ++++++++++----------- cf_xarray/tests/test_accessor.py | 3 ++- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/cf_xarray/accessor.py b/cf_xarray/accessor.py index 132f95dc..ed8dea3e 100644 --- a/cf_xarray/accessor.py +++ b/cf_xarray/accessor.py @@ -1010,19 +1010,18 @@ def __repr__(self): coords = self._obj.coords dims = self._obj.dims - def make_text_section(subtitle, vardict, valid_values, valid_keys=None): + def make_text_section(subtitle, vardict, valid_values, default_keys=None): star = " * " tab = len(star) * " " subtitle = f"- {subtitle}:" - # Sort keys - if not valid_keys: - # Alphabetical order - vardict = {key: vardict[key] for key in sorted(vardict)} - else: - # Hardcoded order - vardict = {key: vardict[key] for key in valid_keys if key in vardict} + # Sort keys if there aren't extra keys, + # preserve default keys order otherwise. + default_keys = [] if not default_keys else list(default_keys) + extra_keys = list(set(vardict) - set(default_keys)) + ordered_keys = sorted(vardict) if extra_keys else default_keys + vardict = {key: vardict[key] for key in ordered_keys if key in vardict} # Keep only valid values (e.g., coords or data_vars) vardict = { @@ -1037,9 +1036,9 @@ def make_text_section(subtitle, vardict, valid_values, valid_keys=None): for key, value in vardict.items() ] - # Add valid keys missing followed by n/a - if valid_keys: - missing_keys = [key for key in valid_keys if key not in vardict] + # Append missing default keys followed by n/a + if default_keys: + missing_keys = [key for key in default_keys if key not in vardict] if missing_keys: rows += [tab + ", ".join(missing_keys) + ": n/a"] elif not rows: diff --git a/cf_xarray/tests/test_accessor.py b/cf_xarray/tests/test_accessor.py index e5567ee6..138c6290 100644 --- a/cf_xarray/tests/test_accessor.py +++ b/cf_xarray/tests/test_accessor.py @@ -156,7 +156,8 @@ def test_cell_measures(): actual = ds.cf.__repr__() expected = """\ Data Variables: - - Cell Measures: volume: ['foo'] + - Cell Measures: foo_measure: ['foo'] + volume: ['foo'] area: n/a - Standard Names: air_temperature: ['air'] From d7f0c15b15181f5b9bc67bea721dbdea7dbf2e8d Mon Sep 17 00:00:00 2001 From: Mattia Almansi Date: Fri, 5 Feb 2021 14:58:15 +0000 Subject: [PATCH 20/57] Do not get bounds when extracting a DataArray (#164) --- cf_xarray/accessor.py | 28 ++++++++++++++++------------ cf_xarray/tests/test_accessor.py | 9 +++++++++ 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/cf_xarray/accessor.py b/cf_xarray/accessor.py index ed8dea3e..1e588b3a 100644 --- a/cf_xarray/accessor.py +++ b/cf_xarray/accessor.py @@ -12,6 +12,7 @@ List, Mapping, MutableMapping, + Optional, Set, Tuple, Union, @@ -608,10 +609,9 @@ def check_results(names, k): try: for name in allnames: - extravars = accessor.get_associated_variable_names(name) - # we cannot return bounds variables with scalar keys - if scalar_key: - extravars.pop("bounds") + extravars = accessor.get_associated_variable_names( + name, skip_bounds=scalar_key + ) coords.extend(itertools.chain(*extravars.values())) if isinstance(obj, DataArray): @@ -1209,7 +1209,9 @@ def standard_names(self) -> Dict[str, List[str]]: return {k: sorted(v) for k, v in vardict.items()} - def get_associated_variable_names(self, name: Hashable) -> Dict[str, List[str]]: + def get_associated_variable_names( + self, name: Hashable, skip_bounds: Optional[bool] = None + ) -> Dict[str, List[str]]: """ Returns a dict mapping 1. "ancillary_variables" @@ -1223,6 +1225,8 @@ def get_associated_variable_names(self, name: Hashable) -> Dict[str, List[str]]: name: Hashable + skip_bounds: bool, optional + Returns ------ @@ -1248,13 +1252,13 @@ def get_associated_variable_names(self, name: Hashable) -> Dict[str, List[str]]: "ancillary_variables" ].split(" ") - if "bounds" in attrs_or_encoding: - coords["bounds"] = [attrs_or_encoding["bounds"]] - - for dim in self._obj[name].dims: - dbounds = self._obj[dim].attrs.get("bounds", None) - if dbounds: - coords["bounds"].append(dbounds) + if not skip_bounds: + if "bounds" in attrs_or_encoding: + coords["bounds"] = [attrs_or_encoding["bounds"]] + for dim in self._obj[name].dims: + dbounds = self._obj[dim].attrs.get("bounds", None) + if dbounds: + coords["bounds"].append(dbounds) allvars = itertools.chain(*coords.values()) missing = set(allvars) - set(self._maybe_to_dataset().variables) diff --git a/cf_xarray/tests/test_accessor.py b/cf_xarray/tests/test_accessor.py index 138c6290..c09da277 100644 --- a/cf_xarray/tests/test_accessor.py +++ b/cf_xarray/tests/test_accessor.py @@ -577,6 +577,15 @@ def test_bounds(): expected = ds["lat_bounds"] assert_identical(actual, expected) + # Do not attempt to get bounds when extracting a DataArray + # raise a warning when extracting a Dataset and bounds do not exists + ds["time"].attrs["bounds"] = "foo" + with pytest.warns(None) as record: + ds.cf["air"] + assert len(record) == 0 + with pytest.warns(UserWarning, match="{'foo'} not found in object"): + ds.cf[["air"]] + def test_bounds_to_vertices(): # All available From 19b5d20218ba156ca86fbc78eab7f35d72da348d Mon Sep 17 00:00:00 2001 From: Shubhendra Singh Chauhan Date: Mon, 8 Feb 2021 00:10:19 +0530 Subject: [PATCH 21/57] Fix: code quality issues (#167) - Removed unnecessary comprehension - Used literal syntax instead of function calls to create data structure - Used literal syntax to create data structure - added .deepsource.toml file --- .deepsource.toml | 15 +++++++++++++++ cf_xarray/accessor.py | 8 ++++---- cf_xarray/tests/test_accessor.py | 4 ++-- cf_xarray/utils.py | 2 +- 4 files changed, 22 insertions(+), 7 deletions(-) create mode 100644 .deepsource.toml diff --git a/.deepsource.toml b/.deepsource.toml new file mode 100644 index 00000000..81205269 --- /dev/null +++ b/.deepsource.toml @@ -0,0 +1,15 @@ +version = 1 + +test_patterns = ["cf_xarray/tests/test_*.py"] + +exclude_patterns = [ + "doc/**", + "ci/**" +] + +[[analyzers]] +name = "python" +enabled = true + + [analyzers.meta] + runtime_version = "3.x.x" diff --git a/cf_xarray/accessor.py b/cf_xarray/accessor.py index 1e588b3a..ee5f3009 100644 --- a/cf_xarray/accessor.py +++ b/cf_xarray/accessor.py @@ -191,7 +191,7 @@ def _apply_single_mapper(mapper): else: results = flat - nresults = any([bool(v) for v in [results]]) + nresults = any(bool(v) for v in [results]) if not nresults: if error: raise KeyError( @@ -497,7 +497,7 @@ def _getattr( if not attribute: return dict(attribute) - newmap = dict() + newmap = {} inverted = invert_mappings( accessor.axes, accessor.coordinates, @@ -1165,7 +1165,7 @@ def cell_measures(self) -> Dict[str, List[str]]: da.attrs.get("cell_measures", "") for da in obj.data_vars.values() ] - measures: Dict[str, List[str]] = dict() + measures: Dict[str, List[str]] = {} for attr in all_attrs: for key, value in parse_cell_methods_attr(attr).items(): measures[key] = measures.setdefault(key, []) + [value] @@ -1201,7 +1201,7 @@ def standard_names(self) -> Dict[str, List[str]]: elif isinstance(self._obj, DataArray): variables = self._obj.coords - vardict: Dict[str, List[str]] = dict() + vardict: Dict[str, List[str]] = {} for k, v in variables.items(): if "standard_name" in v.attrs: std_name = v.attrs["standard_name"] diff --git a/cf_xarray/tests/test_accessor.py b/cf_xarray/tests/test_accessor.py index c09da277..a913f6da 100644 --- a/cf_xarray/tests/test_accessor.py +++ b/cf_xarray/tests/test_accessor.py @@ -399,7 +399,7 @@ def test_dataarray_plot(): plt.close() rv = obj.isel(lat=[0, 1], lon=1).cf.plot.line(x="T", hue="Y") - assert all([isinstance(line, mpl.lines.Line2D) for line in rv]) + assert all(isinstance(line, mpl.lines.Line2D) for line in rv) plt.close() # set y automatically @@ -517,7 +517,7 @@ def test_plot_xincrease_yincrease(): @pytest.mark.parametrize("dims", ["lat", "time", ["lat", "lon"]]) @pytest.mark.parametrize("obj", [airds]) def test_add_bounds(obj, dims): - expected = dict() + expected = {} expected["lat"] = xr.concat( [ obj.lat.copy(data=np.arange(76.25, 16.0, -2.5)), diff --git a/cf_xarray/utils.py b/cf_xarray/utils.py index b3ed7348..98a45ad9 100644 --- a/cf_xarray/utils.py +++ b/cf_xarray/utils.py @@ -51,5 +51,5 @@ def invert_mappings(*mappings): for mapping in mappings: for k, v in mapping.items(): for name in v: - merged[name] |= set([k]) + merged[name] |= {k} return merged From d05c59f7432938c7dc2562f08cb0430ada7026e6 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Sun, 7 Feb 2021 15:08:12 -0700 Subject: [PATCH 22/57] velin docstring fixes (#168) --- cf_xarray/accessor.py | 55 +++++++++++++++++++------------------------ cf_xarray/helpers.py | 4 ++-- cf_xarray/utils.py | 2 +- 3 files changed, 27 insertions(+), 34 deletions(-) diff --git a/cf_xarray/accessor.py b/cf_xarray/accessor.py index ee5f3009..524495f4 100644 --- a/cf_xarray/accessor.py +++ b/cf_xarray/accessor.py @@ -222,9 +222,9 @@ def _get_groupby_time_accessor(var: Union[DataArray, Dataset], key: str) -> List Parameters ---------- - var: DataArray, Dataset + var : DataArray, Dataset DataArray belonging to the coordinate to be checked - key: str, [e.g. "T.month"] + key : str, [e.g. "T.month"] key to check for. Returns @@ -255,14 +255,14 @@ def _get_axis_coord(var: Union[DataArray, Dataset], key: str) -> List[str]: Parameters ---------- - var: DataArray, Dataset + var : DataArray, Dataset DataArray belonging to the coordinate to be checked - key: str, ["X", "Y", "Z", "T", "longitude", "latitude", "vertical", "time"] + key : str, ["X", "Y", "Z", "T", "longitude", "latitude", "vertical", "time"] key to check for. - error: bool + error : bool raise errors when key is not found or interpretable. Use False and provide default to replicate dict.get(k, None). - default: Any + default : Any default value to return when error is False. Returns @@ -330,9 +330,9 @@ def _get_measure(obj: Union[DataArray, Dataset], key: str) -> List[str]: Parameters ---------- - obj: DataArray, Dataset + obj : DataArray, Dataset DataArray belonging to the coordinate to be checked - key: str + key : str key to check for. Returns @@ -471,18 +471,17 @@ def _getattr( Parameters ---------- - obj : DataArray, Dataset attr : Name of attribute in obj that will be shadowed. accessor : High level accessor object: CFAccessor key_mappers : dict dict(key_name: mapper) - wrap_classes: bool + wrap_classes : bool Should we wrap the return value with _CFWrappedClass? Only True for the high level CFAccessor. Facilitates code reuse for _CFWrappedClass and _CFWrapppedPlotMethods For both of those, wrap_classes is False. - extra_decorator: Callable (optional) + extra_decorator : Callable (optional) An extra decorator, if necessary. This is used by _CFPlotMethods to set default kwargs based on CF attributes. """ @@ -555,9 +554,9 @@ def _getitem( Parameters ---------- - accessor: CFAccessor - key: str, List[str] - skip: str, optional + accessor : CFAccessor + key : str, List[str] + skip : str, optional One of ["coords", "measures"], avoid clashes with special coord names """ @@ -889,12 +888,12 @@ def _rewrite_values( Parameters ---------- - kwargs: Mapping + kwargs : Mapping Mapping from kwarg name to value - key_mappers: Mapping + key_mappers : Mapping Mapping from kwarg name to a Mapper function that will convert a given CF "special" name to an xarray name. - var_kws: List[str] + var_kws : List[str] List of variable kwargs that need special treatment. e.g. **indexers_kwargs in isel @@ -1188,8 +1187,7 @@ def standard_names(self) -> Dict[str, List[str]]: Parameters ---------- - - obj: DataArray, Dataset + obj : DataArray, Dataset Xarray object to process Returns @@ -1222,14 +1220,10 @@ def get_associated_variable_names( Parameters ---------- - - name: Hashable - - skip_bounds: bool, optional - + name : Hashable + skip_bounds : bool, optional Returns ------ - Dict with keys "ancillary_variables", "cell_measures", "coordinates", "bounds" """ keys = ["ancillary_variables", "cell_measures", "coordinates", "bounds"] @@ -1306,7 +1300,7 @@ def rename_like( Parameters ---------- - other: DataArray, Dataset + other : DataArray, Dataset Variables will be renamed to match variable names in this xarray object Returns @@ -1351,7 +1345,7 @@ def guess_coord_axis(self, verbose: bool = False) -> Union[DataArray, Dataset]: Parameters ---------- - verbose: bool + verbose : bool Print extra info to screen Returns @@ -1420,7 +1414,7 @@ def get_bounds(self, key: str) -> DataArray: Parameters ---------- - key: str + key : str Name of variable whose bounds are desired Returns @@ -1462,7 +1456,7 @@ def add_bounds(self, dims: Union[Hashable, Iterable[Hashable]]): Parameters ---------- - dims: Hashable or Iterable[Hashable] + dims : Hashable or Iterable[Hashable] Either a single dimension name or a list of dimension names. Returns @@ -1584,7 +1578,7 @@ def decode_vertical_coords(self, prefix="z"): Parameters ---------- - prefix: str, optional + prefix : str, optional Prefix for newly created z variables. E.g. ``s_rho`` becomes ``z_rho`` @@ -1594,7 +1588,6 @@ def decode_vertical_coords(self, prefix="z"): Notes ----- - Will only decode when the ``formula_terms`` and ``standard_name`` attributes are set on the parameter (e.g ``s_rho`` ) diff --git a/cf_xarray/helpers.py b/cf_xarray/helpers.py index ae00afcc..33304523 100644 --- a/cf_xarray/helpers.py +++ b/cf_xarray/helpers.py @@ -17,7 +17,7 @@ def bounds_to_vertices( Parameters ---------- - bounds: DataArray + bounds : DataArray The bounds to convert. Must be of shape (N, 2) or (N, M, 4). bounds_dim : str The name of the bounds dimension of `bounds` (the one of length 2 or 4). @@ -93,7 +93,7 @@ def vertices_to_bounds( Parameters ---------- - bounds: DataArray + bounds : DataArray The bounds to convert. Must be of shape (N, 2) or (N, M, 4). out_dims : Sequence[str], The name of the dimension in the output. The first is the 'bounds' diff --git a/cf_xarray/utils.py b/cf_xarray/utils.py index 98a45ad9..e05e3501 100644 --- a/cf_xarray/utils.py +++ b/cf_xarray/utils.py @@ -29,7 +29,7 @@ def parse_cell_methods_attr(attr: str) -> Dict[str, str]: Parameters ---------- - attr: str + attr : str String to parse Returns From f96d708a62798d1263278ca37102f77ee93efe6f Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 9 Feb 2021 03:56:37 -0700 Subject: [PATCH 23/57] Test regexes for _guess_coord_axis (#172) --- cf_xarray/accessor.py | 12 ++--- cf_xarray/tests/test_accessor.py | 84 ++++++++++++++++++++++++++------ 2 files changed, 74 insertions(+), 22 deletions(-) diff --git a/cf_xarray/accessor.py b/cf_xarray/accessor.py index 524495f4..3bf5c426 100644 --- a/cf_xarray/accessor.py +++ b/cf_xarray/accessor.py @@ -116,7 +116,7 @@ #: regular expressions for guess_coord_axis regex = { - "time": "time[0-9]*|min|hour|day|week|month|year", + "time": "(time|min|hour|day|week|month|year)[0-9]*", "vertical": ( "(lv_|bottom_top|sigma|h(ei)?ght|altitude|depth|isobaric|pres|" "isotherm)[a-z_]*[0-9]*" @@ -130,7 +130,7 @@ regex["T"] = regex["time"] -attrs = { +ATTRS = { "X": {"axis": "X"}, "T": {"axis": "T", "standard_name": "time"}, "Y": {"axis": "Y"}, @@ -138,8 +138,8 @@ "latitude": {"units": "degrees_north", "standard_name": "latitude"}, "longitude": {"units": "degrees_east", "standard_name": "longitude"}, } -attrs["time"] = attrs["T"] -attrs["vertical"] = attrs["Z"] +ATTRS["time"] = ATTRS["T"] +ATTRS["vertical"] = ATTRS["Z"] # Type for Mapper functions @@ -1361,7 +1361,7 @@ def guess_coord_axis(self, verbose: bool = False) -> Union[DataArray, Dataset]: print( f"I think {var!r} is of type 'time'. It has a datetime-like type." ) - obj[var].attrs = dict(ChainMap(obj[var].attrs, attrs["time"])) + obj[var].attrs = dict(ChainMap(obj[var].attrs, ATTRS["time"])) continue # prevent second detection for axis, pattern in regex.items(): @@ -1371,7 +1371,7 @@ def guess_coord_axis(self, verbose: bool = False) -> Union[DataArray, Dataset]: print( f"I think {var!r} is of type {axis!r}. It matched {pattern!r}" ) - obj[var].attrs = dict(ChainMap(obj[var].attrs, attrs[axis])) + obj[var].attrs = dict(ChainMap(obj[var].attrs, ATTRS[axis])) return obj diff --git a/cf_xarray/tests/test_accessor.py b/cf_xarray/tests/test_accessor.py index a913f6da..097ac6eb 100644 --- a/cf_xarray/tests/test_accessor.py +++ b/cf_xarray/tests/test_accessor.py @@ -1,3 +1,4 @@ +import itertools from textwrap import dedent import matplotlib as mpl @@ -627,26 +628,77 @@ def test_docstring(): assert "One or more of ('X'" in airds.cf.mean.__doc__ -def test_guess_coord_axis(): +def _make_names(prefixes): + suffixes = ["", "a", "_a", "0", "_0"] + return [ + f"{prefix}{suffix}" for prefix, suffix in itertools.product(prefixes, suffixes) + ] + + +_TIME_NAMES = _make_names( + [ + "time", + "min", + "hour", + "day", + "week", + "month", + "year", + ] +) +_VERTICAL_NAMES = _make_names( + [ + "lv_1", + "bottom_top", + "sigma", + "sigma_w", + "hght", + "height", + "altitude", + "depth", + "isobaric", + "pressure", + "isotherm", + ] +) +_X_NAMES = _make_names(["x"]) +_Y_NAMES = _make_names(["y"]) +_Z_NAMES = _VERTICAL_NAMES +_LATITUDE_NAMES = _make_names(["lat", "latitude"]) +_LONGITUDE_NAMES = _make_names(["lon", "longitude"]) + + +@pytest.mark.parametrize( + "kind, names", + [ + ["X", _X_NAMES], + ["Y", _Y_NAMES], + ["Z", _Z_NAMES], + ["T", _TIME_NAMES], + ["latitude", _LATITUDE_NAMES], + ["longitude", _LONGITUDE_NAMES], + ], +) +def test_guess_coord_axis(kind, names): + from cf_xarray.accessor import ATTRS + + for varname in names: + ds = xr.Dataset() + ds[varname] = (varname, [1, 2, 3, 4, 5]) + dsnew = ds.cf.guess_coord_axis() + assert dsnew[varname].attrs == ATTRS[kind] + + varname = varname.upper() + ds[varname] = (varname, [1, 2, 3, 4, 5]) + dsnew = ds.cf.guess_coord_axis() + assert dsnew[varname].attrs == ATTRS[kind] + + +def test_guess_coord_axis_datetime(): ds = xr.Dataset() ds["time"] = ("time", pd.date_range("2001-01-01", "2001-04-01")) - ds["lon_rho"] = ("lon_rho", [1, 2, 3, 4, 5]) - ds["lat_rho"] = ("lat_rho", [1, 2, 3, 4, 5]) - ds["x1"] = ("x1", [1, 2, 3, 4, 5]) - ds["y1"] = ("y1", [1, 2, 3, 4, 5]) - dsnew = ds.cf.guess_coord_axis() assert dsnew.time.attrs == {"standard_name": "time", "axis": "T"} - assert dsnew.lon_rho.attrs == { - "standard_name": "longitude", - "units": "degrees_east", - } - assert dsnew.lat_rho.attrs == { - "standard_name": "latitude", - "units": "degrees_north", - } - assert dsnew.x1.attrs == {"axis": "X"} - assert dsnew.y1.attrs == {"axis": "Y"} def test_attributes(): From 189b60b346262a4e86ebddd17b49d85d7b1134a4 Mon Sep 17 00:00:00 2001 From: Mattia Almansi Date: Wed, 10 Feb 2021 00:00:44 +0000 Subject: [PATCH 24/57] Add NEMO regex (#169) --- cf_xarray/accessor.py | 10 +++++----- cf_xarray/tests/test_accessor.py | 11 +++++++---- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/cf_xarray/accessor.py b/cf_xarray/accessor.py index 3bf5c426..4061884c 100644 --- a/cf_xarray/accessor.py +++ b/cf_xarray/accessor.py @@ -116,15 +116,15 @@ #: regular expressions for guess_coord_axis regex = { - "time": "(time|min|hour|day|week|month|year)[0-9]*", + "time": "\\bt\\b|(time|min|hour|day|week|month|year)[0-9]*", "vertical": ( - "(lv_|bottom_top|sigma|h(ei)?ght|altitude|depth|isobaric|pres|" - "isotherm)[a-z_]*[0-9]*" + "(z|nav_lev|gdep|lv_|bottom_top|sigma|h(ei)?ght|altitude|depth|" + "isobaric|pres|isotherm)[a-z_]*[0-9]*" ), "Y": "y", - "latitude": "y?lat[a-z0-9]*", + "latitude": "y?(nav_lat|lat|gphi)[a-z0-9]*", "X": "x", - "longitude": "x?lon[a-z0-9]*", + "longitude": "x?(nav_lon|lon|glam)[a-z0-9]*", } regex["Z"] = regex["vertical"] regex["T"] = regex["time"] diff --git a/cf_xarray/tests/test_accessor.py b/cf_xarray/tests/test_accessor.py index 097ac6eb..a08c5e55 100644 --- a/cf_xarray/tests/test_accessor.py +++ b/cf_xarray/tests/test_accessor.py @@ -629,13 +629,13 @@ def test_docstring(): def _make_names(prefixes): - suffixes = ["", "a", "_a", "0", "_0"] + suffixes = ["", "a", "_a", "0", "_0", "a_0a"] return [ f"{prefix}{suffix}" for prefix, suffix in itertools.product(prefixes, suffixes) ] -_TIME_NAMES = _make_names( +_TIME_NAMES = ["t"] + _make_names( [ "time", "min", @@ -648,6 +648,7 @@ def _make_names(prefixes): ) _VERTICAL_NAMES = _make_names( [ + "z", "lv_1", "bottom_top", "sigma", @@ -659,13 +660,15 @@ def _make_names(prefixes): "isobaric", "pressure", "isotherm", + "gdep", + "nav_lev", ] ) _X_NAMES = _make_names(["x"]) _Y_NAMES = _make_names(["y"]) _Z_NAMES = _VERTICAL_NAMES -_LATITUDE_NAMES = _make_names(["lat", "latitude"]) -_LONGITUDE_NAMES = _make_names(["lon", "longitude"]) +_LATITUDE_NAMES = _make_names(["lat", "latitude", "gphi", "nav_lat"]) +_LONGITUDE_NAMES = _make_names(["lon", "longitude", "glam", "nav_lon"]) @pytest.mark.parametrize( From ed3dcc15452960b6d479ae6913727228362e98ad Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 9 Feb 2021 17:04:24 -0700 Subject: [PATCH 25/57] minor cleanups (#173) --- cf_xarray/accessor.py | 8 ++++---- cf_xarray/datasets.py | 1 - cf_xarray/tests/test_accessor.py | 4 ++-- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/cf_xarray/accessor.py b/cf_xarray/accessor.py index 4061884c..01c2ce4d 100644 --- a/cf_xarray/accessor.py +++ b/cf_xarray/accessor.py @@ -167,7 +167,7 @@ def _apply_single_mapper(mapper): try: results = mapper(obj, key) - except Exception as e: + except KeyError as e: if error: raise e else: @@ -319,7 +319,7 @@ def _get_measure_variable( """ tiny wrapper since xarray does not support providing str for weights.""" varnames = apply_mapper(_get_measure, da, key, error, default) if len(varnames) > 1: - raise ValueError(f"Multiple measures found for key {key!r}: {varnames!r}.") + raise KeyError(f"Multiple measures found for key {key!r}: {varnames!r}.") return [da[varnames[0]]] @@ -572,7 +572,7 @@ def _getitem( def check_results(names, k): if scalar_key and len(names) > 1: - raise ValueError( + raise KeyError( f"Receive multiple variables for key {k!r}: {names}. " f"Expected only one. Please pass a list [{k!r}] " f"instead to get all variables matching {k!r}." @@ -627,7 +627,7 @@ def check_results(names, k): da.coords[k1] = ds.variables[k1] return da else: - raise ValueError( + raise KeyError( f"Received scalar key {key[0]!r} but multiple results: {allnames!r}. " f"Please pass a list instead (['{key[0]}']) to get back a Dataset " f"with {allnames!r}." diff --git a/cf_xarray/datasets.py b/cf_xarray/datasets.py index 4e9e6c7b..fea548bb 100644 --- a/cf_xarray/datasets.py +++ b/cf_xarray/datasets.py @@ -70,7 +70,6 @@ anc["q_detection_limit"] = xr.DataArray( 1e-3, attrs=dict(standard_name="specific_humidity detection_minimum", units="g/g") ) -anc multiple = xr.Dataset() diff --git a/cf_xarray/tests/test_accessor.py b/cf_xarray/tests/test_accessor.py index a08c5e55..2682ccf9 100644 --- a/cf_xarray/tests/test_accessor.py +++ b/cf_xarray/tests/test_accessor.py @@ -187,7 +187,7 @@ def test_getitem_standard_name(): ds = airds.copy(deep=True) ds["air2"] = ds.air - with pytest.raises(ValueError): + with pytest.raises(KeyError): ds.cf["air_temperature"] actual = ds.cf[["air_temperature"]] expected = ds[["air", "air2"]] @@ -508,7 +508,7 @@ def test_plot_xincrease_yincrease(): ds.lon.attrs["positive"] = "down" ds.lat.attrs["positive"] = "down" - f, ax = plt.subplots(1, 1) + _, ax = plt.subplots(1, 1) ds.air.isel(time=1).cf.plot(ax=ax, x="X", y="Y") for lim in [ax.get_xlim(), ax.get_ylim()]: From 9a7ed41798575888b9ef36649a0c198a06ee0a33 Mon Sep 17 00:00:00 2001 From: Mattia Almansi Date: Tue, 23 Feb 2021 17:59:00 +0000 Subject: [PATCH 26/57] Rework mappers to be more general (#174) Co-authored-by: dcherian --- cf_xarray/accessor.py | 226 +++++++++++++++++-------------- cf_xarray/datasets.py | 2 +- cf_xarray/tests/test_accessor.py | 69 +++++++++- doc/examples/introduction.ipynb | 2 +- doc/whats-new.rst | 2 +- 5 files changed, 194 insertions(+), 107 deletions(-) diff --git a/cf_xarray/accessor.py b/cf_xarray/accessor.py index 01c2ce4d..739fad8e 100644 --- a/cf_xarray/accessor.py +++ b/cf_xarray/accessor.py @@ -15,7 +15,9 @@ Optional, Set, Tuple, + TypeVar, Union, + cast, ) import xarray as xr @@ -145,6 +147,9 @@ # Type for Mapper functions Mapper = Callable[[Union[DataArray, Dataset], str], List[str]] +# Type for decorators +F = TypeVar("F", bound=Callable[..., Any]) + def apply_mapper( mappers: Union[Mapper, Tuple[Mapper, ...]], @@ -168,7 +173,7 @@ def _apply_single_mapper(mapper): try: results = mapper(obj, key) except KeyError as e: - if error: + if error or "I expected only one." in repr(e): raise e else: results = [] @@ -203,19 +208,10 @@ def _apply_single_mapper(mapper): return results -def _get_axis_coord_single(var: Union[DataArray, Dataset], key: str) -> List[str]: - """ Helper method for when we really want only one result per key. """ - results = _get_axis_coord(var, key) - if len(results) > 1: - raise KeyError( - f"Multiple results for {key!r} found: {results!r}. I expected only one." - ) - elif len(results) == 0: - raise KeyError(f"No results found for {key!r}.") - return results - - def _get_groupby_time_accessor(var: Union[DataArray, Dataset], key: str) -> List[str]: + """ + Time variable accessor e.g. 'T.month' + """ """ Helper method for when our key name is of the nature "T.month" and we want to isolate the "T" for coordinate mapping @@ -235,12 +231,11 @@ def _get_groupby_time_accessor(var: Union[DataArray, Dataset], key: str) -> List ----- Returns an empty list if there is no frequency extension specified. """ + if "." in key: key, ext = key.split(".", 1) - results = apply_mapper( - (_get_axis_coord, _get_with_standard_name), var, key, error=False - ) + results = apply_mapper((_get_all,), var, key, error=False) if len(results) > 1: raise KeyError(f"Multiple results received for {key}.") return [v + "." + ext for v in results] @@ -313,16 +308,6 @@ def _get_axis_coord(var: Union[DataArray, Dataset], key: str) -> List[str]: return list(results) -def _get_measure_variable( - da: DataArray, key: str, error: bool = True, default: str = None -) -> List[DataArray]: - """ tiny wrapper since xarray does not support providing str for weights.""" - varnames = apply_mapper(_get_measure, da, key, error, default) - if len(varnames) > 1: - raise KeyError(f"Multiple measures found for key {key!r}: {varnames!r}.") - return [da[varnames[0]]] - - def _get_measure(obj: Union[DataArray, Dataset], key: str) -> List[str]: """ Translate from cell measures to appropriate variable name. @@ -361,6 +346,9 @@ def _get_with_standard_name( obj: Union[DataArray, Dataset], name: Union[str, List[str]] ) -> List[str]: """ returns a list of variable names with standard name == name. """ + if name is None: + return [] + varnames = [] if isinstance(obj, DataArray): obj = obj._to_temp_dataset() @@ -372,36 +360,91 @@ def _get_with_standard_name( return varnames +def _get_all(obj: Union[DataArray, Dataset], key: str) -> List[str]: + """ + One or more of ('X', 'Y', 'Z', 'T', 'longitude', 'latitude', 'vertical', 'time', + 'area', 'volume'), or arbitrary measures, or standard names + """ + all_mappers = (_get_axis_coord, _get_measure, _get_with_standard_name) + results = apply_mapper(all_mappers, obj, key, error=False, default=None) + return results + + +def _get_dims(obj: Union[DataArray, Dataset], key: str) -> List[str]: + """ + One or more of ('X', 'Y', 'Z', 'T', 'longitude', 'latitude', 'vertical', 'time', + 'area', 'volume'), or arbitrary measures, or standard names present in .dims + """ + return [k for k in _get_all(obj, key) if k in obj.dims] + + +def _get_indexes(obj: Union[DataArray, Dataset], key: str) -> List[str]: + """ + One or more of ('X', 'Y', 'Z', 'T', 'longitude', 'latitude', 'vertical', 'time', + 'area', 'volume'), or arbitrary measures, or standard names present in .indexes + """ + return [k for k in _get_all(obj, key) if k in obj.indexes] + + +def _get_coords(obj: Union[DataArray, Dataset], key: str) -> List[str]: + """ + One or more of ('X', 'Y', 'Z', 'T', 'longitude', 'latitude', 'vertical', 'time', + 'area', 'volume'), or arbitrary measures, or standard names present in .coords + """ + return [k for k in _get_all(obj, key) if k in obj.coords] + + +def _variables(func: F) -> F: + @functools.wraps(func) + def wrapper(obj: Union[DataArray, Dataset], key: str) -> List[DataArray]: + return [obj[k] for k in func(obj, key)] + + return cast(F, wrapper) + + +def _single(func: F) -> F: + @functools.wraps(func) + def wrapper(obj: Union[DataArray, Dataset], key: str): + results = func(obj, key) + if len(results) > 1: + raise KeyError( + f"Multiple results for {key!r} found: {results!r}. I expected only one." + ) + elif len(results) == 0: + raise KeyError(f"No results found for {key!r}.") + return results + + wrapper.__doc__ = ( + func.__doc__.replace("One or more of", "One of") + if func.__doc__ + else func.__doc__ + ) + + return cast(F, wrapper) + + #: Default mappers for common keys. _DEFAULT_KEY_MAPPERS: Mapping[str, Tuple[Mapper, ...]] = { - "dim": (_get_axis_coord, _get_with_standard_name), - "dims": (_get_axis_coord, _get_with_standard_name), # transpose - "drop_dims": (_get_axis_coord, _get_with_standard_name), # drop_dims - "dimensions": (_get_axis_coord, _get_with_standard_name), # stack - "dims_dict": (_get_axis_coord, _get_with_standard_name), # swap_dims, rename_dims - "shifts": (_get_axis_coord, _get_with_standard_name), # shift, roll - "pad_width": (_get_axis_coord, _get_with_standard_name), # shift, roll - "names": ( - _get_axis_coord, - _get_measure, - _get_with_standard_name, - ), # set_coords, reset_coords, drop_vars - "labels": (_get_axis_coord, _get_measure, _get_with_standard_name), # drop - "coords": (_get_axis_coord, _get_with_standard_name), # interp - "indexers": (_get_axis_coord, _get_with_standard_name), # sel, isel, reindex - # "indexes": (_get_axis_coord,), # set_index - "dims_or_levels": (_get_axis_coord, _get_with_standard_name), # reset_index - "window": (_get_axis_coord, _get_with_standard_name), # rolling_exp - "coord": (_get_axis_coord_single,), # differentiate, integrate - "group": ( - _get_axis_coord_single, - _get_groupby_time_accessor, - _get_with_standard_name, - ), - "indexer": (_get_axis_coord_single,), # resample - "variables": (_get_axis_coord, _get_with_standard_name), # sortby - "weights": (_get_measure_variable,), # type: ignore - "chunks": (_get_axis_coord, _get_with_standard_name), # chunk + "dim": (_get_dims,), + "dims": (_get_dims,), # transpose + "drop_dims": (_get_dims,), # drop_dims + "dimensions": (_get_dims,), # stack + "dims_dict": (_get_dims,), # swap_dims, rename_dims + "shifts": (_get_dims,), # shift, roll + "pad_width": (_get_dims,), # shift, roll + "names": (_get_all,), # set_coords, reset_coords, drop_vars + "labels": (_get_indexes,), # drop_sel + "coords": (_get_dims,), # interp + "indexers": (_get_dims,), # sel, isel, reindex + # "indexes": (_single(_get_dims),), # set_index this decodes keys but not values + "dims_or_levels": (_get_dims,), # reset_index + "window": (_get_dims,), # rolling_exp + "coord": (_single(_get_coords),), # differentiate, integrate + "group": (_single(_get_all), _get_groupby_time_accessor), # groupby + "indexer": (_single(_get_indexes),), # resample + "variables": (_get_all,), # sortby + "weights": (_variables(_single(_get_all)),), # type: ignore + "chunks": (_get_dims,), # chunk } @@ -430,28 +473,19 @@ def _build_docstring(func): can be used for arguments. """ - # this list will need to be updated any time a new mapper is added - mapper_docstrings = { - _get_axis_coord: f"One or more of {(_AXIS_NAMES + _COORD_NAMES)!r}", - _get_axis_coord_single: f"One of {(_AXIS_NAMES + _COORD_NAMES)!r}", - _get_groupby_time_accessor: "Time variable accessor e.g. 'T.month'", - _get_with_standard_name: "Standard names", - _get_measure_variable: f"One of {_CELL_MEASURES!r}", - } - sig = inspect.signature(func) string = "" for k in set(sig.parameters.keys()) & set(_DEFAULT_KEY_MAPPERS): mappers = _DEFAULT_KEY_MAPPERS.get(k, []) docstring = ";\n\t\t\t".join( - mapper_docstrings.get(mapper, "unknown. please open an issue.") + mapper.__doc__ if mapper.__doc__ else "unknown. please open an issue." for mapper in mappers ) string += f"\t\t{k}: {docstring} \n" for param in sig.parameters: if sig.parameters[param].kind is inspect.Parameter.VAR_KEYWORD: - string += f"\t\t{param}: {mapper_docstrings[_get_axis_coord]} \n\n" + string += f"\t\t{param}: {_get_all.__doc__} \n\n" return ( f"\n\tThe following arguments will be processed by cf_xarray: \n{string}" "\n\t----\n\t" @@ -583,12 +617,12 @@ def check_results(names, k): successful = dict.fromkeys(key, False) for k in key: if "coords" not in skip and k in _AXIS_NAMES + _COORD_NAMES: - names = _get_axis_coord(obj, k) + names = _get_all(obj, k) check_results(names, k) successful[k] = bool(names) coords.extend(names) elif "measures" not in skip and k in accessor._get_all_cell_measures(): - measure = _get_measure(obj, k) + measure = _get_all(obj, k) check_results(measure, k) successful[k] = bool(measure) if measure: @@ -738,9 +772,9 @@ def __init__(self, obj, accessor): def _plot_decorator(self, func): """ This decorator is used to set default kwargs on plotting functions. - - For now, this is setting ``xincrease`` and ``yincrease``. It could set - other arguments in the future. + For now, this can + 1. set ``xincrease`` and ``yincrease``. + 2. automatically set ``x`` or ``y``. """ valid_keys = self.accessor.keys() @@ -764,7 +798,8 @@ def _process_x_or_y(kwargs, key): return kwargs is_line_plot = (func.__name__ == "line") or ( - func.__name__ == "wrapper" and kwargs.get("hue") + func.__name__ == "wrapper" + and (kwargs.get("hue") or self._obj.ndim == 1) ) if is_line_plot: if not kwargs.get("hue"): @@ -787,7 +822,7 @@ def __call__(self, *args, **kwargs): obj=self._obj, attr="plot", accessor=self.accessor, - key_mappers=dict.fromkeys(self._keys, (_get_axis_coord_single,)), + key_mappers=dict.fromkeys(self._keys, (_single(_get_all),)), ) return self._plot_decorator(plot)(*args, **kwargs) @@ -799,7 +834,7 @@ def __getattr__(self, attr): obj=self._obj.plot, attr=attr, accessor=self.accessor, - key_mappers=dict.fromkeys(self._keys, (_get_axis_coord_single,)), + key_mappers=dict.fromkeys(self._keys, (_single(_get_all),)), # TODO: "extra_decorator" is more complex than I would like it to be. # Not sure if there is a better way though extra_decorator=self._plot_decorator, @@ -907,7 +942,7 @@ def _rewrite_values( # these are valid for .sel, .isel, .coarsen all_mappers = ChainMap( key_mappers, - dict.fromkeys(var_kws, (_get_axis_coord, _get_with_standard_name)), + dict.fromkeys(var_kws, (_get_all,)), ) for key in set(all_mappers) & set(kwargs): @@ -961,8 +996,6 @@ def _rewrite_values( for vkw in var_kws: if vkw in kwargs: maybe_update = { - # TODO: this is assuming key_mappers[k] is always - # _get_axis_coord_single k: apply_mapper( key_mappers[k], self._obj, v, error=False, default=[v] )[0] @@ -1105,17 +1138,14 @@ def axes(self) -> Dict[str, List[str]]: This is useful for checking whether a key is valid for indexing, i.e. that the attributes necessary to allow indexing by that key exist. - However, it will only return the Axis names, not Coordinate names. + However, it will only return the Axis names present in ``.coords``, not Coordinate names. Returns ------- Dictionary of valid Axis names that can be used with ``__getitem__`` or ``.cf[key]``. Will be ("X", "Y", "Z", "T") or a subset thereof. """ - vardict = { - key: apply_mapper(_get_axis_coord, self._obj, key, error=False) - for key in _AXIS_NAMES - } + vardict = {key: _get_coords(self._obj, key) for key in _AXIS_NAMES} return {k: sorted(v) for k, v in vardict.items() if v} @@ -1127,17 +1157,14 @@ def coordinates(self) -> Dict[str, List[str]]: This is useful for checking whether a key is valid for indexing, i.e. that the attributes necessary to allow indexing by that key exist. - However, it will only return the Coordinate names, not Axis names. + However, it will only return the Coordinate names present in ``.coords``, not Axis names. Returns ------- Dictionary of valid Coordinate names that can be used with ``__getitem__`` or ``.cf[key]``. Will be ("longitude", "latitude", "vertical", "time") or a subset thereof. """ - vardict = { - key: apply_mapper(_get_axis_coord, self._obj, key, error=False) - for key in _COORD_NAMES - } + vardict = {key: _get_coords(self._obj, key) for key in _COORD_NAMES} return {k: sorted(v) for k, v in vardict.items() if v} @@ -1164,10 +1191,10 @@ def cell_measures(self) -> Dict[str, List[str]]: da.attrs.get("cell_measures", "") for da in obj.data_vars.values() ] - measures: Dict[str, List[str]] = {} + keys = {} for attr in all_attrs: - for key, value in parse_cell_methods_attr(attr).items(): - measures[key] = measures.setdefault(key, []) + [value] + keys.update(parse_cell_methods_attr(attr)) + measures = {key: _get_all(self._obj, key) for key in keys} return {k: sorted(set(v)) for k, v in measures.items() if v} @@ -1310,16 +1337,11 @@ def rename_like( ourkeys = self.keys() theirkeys = other.cf.keys() - good_keys = set(_COORD_NAMES) & ourkeys & theirkeys - if not good_keys: - raise ValueError( - "No common coordinate variables between these two objects." - ) - + good_keys = ourkeys & theirkeys renamer = {} for key in good_keys: - ours = _get_axis_coord_single(self._obj, key)[0] - theirs = _get_axis_coord_single(other, key)[0] + ours = _single(_get_all)(self._obj, key)[0] + theirs = _single(_get_all)(other, key)[0] renamer[ours] = theirs newobj = self._obj.rename(renamer) @@ -1374,6 +1396,12 @@ def guess_coord_axis(self, verbose: bool = False) -> Union[DataArray, Dataset]: obj[var].attrs = dict(ChainMap(obj[var].attrs, ATTRS[axis])) return obj + def drop(self, *args, **kwargs): + raise NotImplementedError( + "cf-xarray does not support .drop." + "Please use .cf.drop_vars or .cf.drop_sel as appropriate." + ) + @xr.register_dataset_accessor("cf") class CFDatasetAccessor(CFAccessor): @@ -1422,7 +1450,7 @@ def get_bounds(self, key: str) -> DataArray: DataArray """ name = apply_mapper( - _get_axis_coord_single, self._obj, key, error=False, default=[key] + _single(_get_all), self._obj, key, error=False, default=[key] )[0] bounds = self._obj[name].attrs["bounds"] obj = self._maybe_to_dataset() @@ -1599,7 +1627,7 @@ def decode_vertical_coords(self, prefix="z"): import re ds = self._obj - dims = _get_axis_coord(ds, "Z") + dims = _get_dims(ds, "Z") requirements = { "ocean_s_coordinate_g1": {"depth_c", "depth", "s", "C", "eta"}, diff --git a/cf_xarray/datasets.py b/cf_xarray/datasets.py index fea548bb..de593471 100644 --- a/cf_xarray/datasets.py +++ b/cf_xarray/datasets.py @@ -121,7 +121,7 @@ romsds["temp"] = ( ("ocean_time", "s_rho"), [np.linspace(20, 30, 30)] * 2, - {"coordinates": "z_rho_dummy"}, + {"coordinates": "z_rho_dummy", "standard_name": "sea_water_potential_temperature"}, ) romsds["temp"].encoding["coordinates"] = "s_rho" romsds.coords["z_rho_dummy"] = ( diff --git a/cf_xarray/tests/test_accessor.py b/cf_xarray/tests/test_accessor.py index 2682ccf9..e54e2199 100644 --- a/cf_xarray/tests/test_accessor.py +++ b/cf_xarray/tests/test_accessor.py @@ -215,10 +215,13 @@ def test_getitem_ancillary_variables(): def test_rename_like(): original = popds.copy(deep=True) - with pytest.raises(KeyError): - popds.cf.rename_like(airds) + # it'll match for axis: X (lon, nlon) and coordinate="longitude" (lon, TLONG) + # so delete the axis attributes + newair = airds.copy(deep=True) + del newair.lon.attrs["axis"] + del newair.lat.attrs["axis"] - renamed = popds.cf["TEMP"].cf.rename_like(airds) + renamed = popds.cf["TEMP"].cf.rename_like(newair) for k in ["TLONG", "TLAT"]: assert k not in renamed.coords assert k in original.coords @@ -228,6 +231,20 @@ def test_rename_like(): assert "lat" in renamed.coords assert renamed.attrs["coordinates"] == "lon lat" + # standard name matching + newroms = romsds.expand_dims(latitude=[1], longitude=[1]).cf.guess_coord_axis() + renamed = popds.cf["UVEL"].cf.rename_like(newroms) + assert renamed.attrs["coordinates"] == "longitude latitude" + assert "longitude" in renamed.coords + assert "latitude" in renamed.coords + assert "ULON" not in renamed.coords + assert "ULAT" not in renamed.coords + + # should change "temp" to "TEMP" + renamed = romsds.cf.rename_like(popds) + assert "temp" not in renamed + assert "TEMP" in renamed + @pytest.mark.parametrize("obj", objects) @pytest.mark.parametrize( @@ -413,6 +430,10 @@ def test_dataarray_plot(): np.testing.assert_equal(rv[0].get_xdata(), obj.lat.data) plt.close() + rv = obj.isel(time=0, lon=1).cf.plot(x="lat") + np.testing.assert_equal(rv[0].get_xdata(), obj.lat.data) + plt.close() + # various line plots and automatic guessing rv = obj.cf.isel(T=1, Y=[0, 1, 2]).cf.plot.line() np.testing.assert_equal(rv[0].get_xdata(), obj.lon.data) @@ -625,7 +646,25 @@ def test_get_bounds_dim_name(): def test_docstring(): assert "One of ('X'" in airds.cf.groupby.__doc__ + assert "Time variable accessor e.g. 'T.month'" in airds.cf.groupby.__doc__ assert "One or more of ('X'" in airds.cf.mean.__doc__ + assert "present in .dims" in airds.cf.drop_dims.__doc__ + assert "present in .coords" in airds.cf.integrate.__doc__ + assert "present in .indexes" in airds.cf.resample.__doc__ + + # Make sure docs are up to date + get_all_doc = cf_xarray.accessor._get_all.__doc__ + all_keys = ( + cf_xarray.accessor._AXIS_NAMES + + cf_xarray.accessor._COORD_NAMES + + cf_xarray.accessor._CELL_MEASURES + ) + expected = f"One or more of {all_keys!r}, or arbitrary measures, or standard names" + assert get_all_doc.split() == expected.split() + for name in ["dims", "indexes", "coords"]: + actual = getattr(cf_xarray.accessor, f"_get_{name}").__doc__ + expected = get_all_doc + f" present in .{name}" + assert actual.split() == expected.split() def _make_names(prefixes): @@ -847,10 +886,11 @@ def test_standard_name_mapper(): expected = da.sortby("label") assert_identical(actual, expected) + assert cf_xarray.accessor._get_with_standard_name(da, None) == [] + @pytest.mark.parametrize("obj", objects) -@pytest.mark.parametrize("attr", ["drop", "drop_vars", "set_coords"]) -@pytest.mark.filterwarnings("ignore:dropping .* using `drop` .* deprecated") +@pytest.mark.parametrize("attr", ["drop_vars", "set_coords"]) def test_drop_vars_and_set_coords(obj, attr): # DataArray object has no attribute set_coords @@ -893,11 +933,30 @@ def test_drop_sel_and_reset_coords(obj): @pytest.mark.parametrize("ds", datasets) def test_drop_dims(ds): + # Add data_var and coord to test _get_dims + ds["lon_var"] = ds["lon"] + ds = ds.assign_coords(lon_coord=ds["lon"]) + # Axis and coordinate for cf_name in ["X", "longitude"]: assert_identical(ds.drop_dims("lon"), ds.cf.drop_dims(cf_name)) +@pytest.mark.parametrize("ds", datasets) +def test_differentiate(ds): + + # Add data_var and coord to test _get_coords + ds["lon_var"] = ds["lon"] + ds = ds.assign_coords(lon_coord=ds["lon"]) + + # Coordinate + assert_identical(ds.differentiate("lon"), ds.cf.differentiate("lon")) + + # Multiple coords (test error raised by _single) + with pytest.raises(KeyError, match=".*I expected only one."): + assert_identical(ds.differentiate("lon"), ds.cf.differentiate("X")) + + def test_new_standard_name_mappers(): assert_identical(forecast.cf.mean("realization"), forecast.mean("M")) assert_identical( diff --git a/doc/examples/introduction.ipynb b/doc/examples/introduction.ipynb index 3a7c1c94..87d0f3bb 100644 --- a/doc/examples/introduction.ipynb +++ b/doc/examples/introduction.ipynb @@ -982,7 +982,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.8" + "version": "3.9.1" }, "toc": { "base_numbering": 1, diff --git a/doc/whats-new.rst b/doc/whats-new.rst index e6be6541..787d51fb 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -9,7 +9,7 @@ v0.4.1 (unreleased) - Replace ``cf.describe()`` with :py:meth:`Dataset.cf.__repr__`. By `Mattia Almansi`_. - Automatically set ``x`` or ``y`` for :py:attr:`DataArray.cf.plot`. By `Deepak Cherian`_. - Added scripts to document :ref:`criteria` with tables. By `Mattia Almansi`_. -- Support for ``.drop()``, ``.drop_vars()``, ``.drop_sel()``, ``.drop_dims()``, ``.set_coords()``, ``.reset_coords()``. By `Mattia Almansi`_. +- Support for ``.drop_vars()``, ``.drop_sel()``, ``.drop_dims()``, ``.set_coords()``, ``.reset_coords()``. By `Mattia Almansi`_. - Support for using ``standard_name`` in more functions. (:pr:`128`) By `Deepak Cherian`_ - Allow :py:meth:`DataArray.cf.__getitem__` with standard names. By `Deepak Cherian`_ - Rewrite the ``values`` of :py:attr:`Dataset.coords` and :py:attr:`Dataset.data_vars` with objects returned From 8e9183a7b49273a3111abf3c50a1f78532af1b34 Mon Sep 17 00:00:00 2001 From: Mattia Almansi Date: Wed, 24 Feb 2021 09:43:01 +0000 Subject: [PATCH 27/57] Prepare for v0.5.0 --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 787d51fb..03259394 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -3,7 +3,7 @@ What's New ---------- -v0.4.1 (unreleased) +v0.5.0 (Feb 24, 2021) =================== - Replace ``cf.describe()`` with :py:meth:`Dataset.cf.__repr__`. By `Mattia Almansi`_. From af85a2a43d53032c0c95d1f4acd1c536abcbbbf4 Mon Sep 17 00:00:00 2001 From: Mattia Almansi Date: Wed, 24 Feb 2021 09:59:55 +0000 Subject: [PATCH 28/57] Fully underline rst title --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 03259394..4427c1a2 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -4,7 +4,7 @@ What's New ---------- v0.5.0 (Feb 24, 2021) -=================== +===================== - Replace ``cf.describe()`` with :py:meth:`Dataset.cf.__repr__`. By `Mattia Almansi`_. - Automatically set ``x`` or ``y`` for :py:attr:`DataArray.cf.plot`. By `Deepak Cherian`_. From 54b897a5afed5aea2f6fca3d89ad282bfe549aff Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Wed, 24 Feb 2021 14:59:40 -0500 Subject: [PATCH 29/57] Do not return instances (#180) --- cf_xarray/accessor.py | 2 +- cf_xarray/tests/test_accessor.py | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/cf_xarray/accessor.py b/cf_xarray/accessor.py index 739fad8e..47734149 100644 --- a/cf_xarray/accessor.py +++ b/cf_xarray/accessor.py @@ -351,7 +351,7 @@ def _get_with_standard_name( varnames = [] if isinstance(obj, DataArray): - obj = obj._to_temp_dataset() + obj = obj.coords.to_dataset() for vname, var in obj.variables.items(): stdname = var.attrs.get("standard_name", None) if stdname == name: diff --git a/cf_xarray/tests/test_accessor.py b/cf_xarray/tests/test_accessor.py index e54e2199..fd911ce5 100644 --- a/cf_xarray/tests/test_accessor.py +++ b/cf_xarray/tests/test_accessor.py @@ -185,6 +185,10 @@ def test_getitem_standard_name(): expected = airds["air"] assert_identical(actual, expected) + actual = airds.lat.cf["latitude"] + expected = airds["lat"] + assert_identical(actual, expected) + ds = airds.copy(deep=True) ds["air2"] = ds.air with pytest.raises(KeyError): From 8c21df8999fb3cd46d44d143a858361a7f59d8b4 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 24 Feb 2021 15:02:46 -0700 Subject: [PATCH 30/57] Update whats-new.rst --- doc/whats-new.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 4427c1a2..c5ac51c0 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -3,6 +3,11 @@ What's New ---------- +v0.5.1 (Feb 24, 2021) +===================== + +Minor bugfix release, thanks to `Pasical Bourgault`_. + v0.5.0 (Feb 24, 2021) ===================== From 12d09c5d549cbd9a15a694b0c0f3bbbaf7b08baf Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 24 Feb 2021 15:04:21 -0700 Subject: [PATCH 31/57] typo --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index c5ac51c0..9b8acd52 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -6,7 +6,7 @@ What's New v0.5.1 (Feb 24, 2021) ===================== -Minor bugfix release, thanks to `Pasical Bourgault`_. +Minor bugfix release, thanks to `Pascal Bourgault`_. v0.5.0 (Feb 24, 2021) ===================== From 7a8c620cebe44ad27a6f71c89a5f88cf5ec7dc11 Mon Sep 17 00:00:00 2001 From: Mattia Almansi Date: Fri, 26 Feb 2021 17:28:18 +0000 Subject: [PATCH 32/57] cf.data_vars skips coords (#181) --- cf_xarray/accessor.py | 4 +++- cf_xarray/tests/test_accessor.py | 7 +++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/cf_xarray/accessor.py b/cf_xarray/accessor.py index 47734149..4fbce98c 100644 --- a/cf_xarray/accessor.py +++ b/cf_xarray/accessor.py @@ -629,9 +629,11 @@ def check_results(names, k): varnames.extend(measure) else: stdnames = set(_get_with_standard_name(obj, k)) + objcoords = set(obj.coords) + if "coords" in skip: + stdnames -= objcoords check_results(stdnames, k) successful[k] = bool(stdnames) - objcoords = set(obj.coords) varnames.extend(stdnames - objcoords) coords.extend(stdnames & objcoords) diff --git a/cf_xarray/tests/test_accessor.py b/cf_xarray/tests/test_accessor.py index fd911ce5..e5d7f4df 100644 --- a/cf_xarray/tests/test_accessor.py +++ b/cf_xarray/tests/test_accessor.py @@ -802,6 +802,13 @@ def test_attributes(): ds1 = xr.Dataset({"T": foo}) assert_identical(ds1.cf.data_vars["T"], ds1["T"]) + # multiple latitudes but only one latitude data_var + ds = popds.copy(deep=True) + for var in ["ULAT", "TLAT"]: + ds[var].attrs["standard_name"] = "latitude" + ds = ds.reset_coords("ULAT") + assert_identical(ds.cf.data_vars["latitude"], ds.cf["ULAT"]) + def test_missing_variable_in_coordinates(): airds.air.attrs["coordinates"] = "lat lon time" From 1408f5d027fe26b64c131f0dff30759e39a81609 Mon Sep 17 00:00:00 2001 From: Mattia Almansi Date: Mon, 8 Mar 2021 15:12:33 +0000 Subject: [PATCH 33/57] rename_like warns about conflicting variables (#183) --- cf_xarray/accessor.py | 37 ++++++++++++++++++++++++++++---- cf_xarray/tests/test_accessor.py | 4 ++++ doc/examples/introduction.ipynb | 31 ++++++++++++++++++++------ 3 files changed, 62 insertions(+), 10 deletions(-) diff --git a/cf_xarray/accessor.py b/cf_xarray/accessor.py index 4fbce98c..40fdecbb 100644 --- a/cf_xarray/accessor.py +++ b/cf_xarray/accessor.py @@ -1340,12 +1340,41 @@ def rename_like( theirkeys = other.cf.keys() good_keys = ourkeys & theirkeys - renamer = {} + keydict = {} for key in good_keys: - ours = _single(_get_all)(self._obj, key)[0] - theirs = _single(_get_all)(other, key)[0] - renamer[ours] = theirs + ours = _get_all(self._obj, key) + theirs = _get_all(other, key) + keydict[key] = dict(ours=ours, theirs=theirs) + + conflicts = {} + for k0, v0 in keydict.items(): + if len(v0["ours"]) > 1 or len(v0["theirs"]) > 1: + conflicts[k0] = v0 + continue + for v1 in keydict.values(): + # Conflicts have same ours but different theirs or vice versa + if sum([v0["ours"] == v1["ours"], v0["theirs"] == v1["theirs"]]) == 1: + conflicts[k0] = v0 + break + if conflicts: + warnings.warn( + "Conflicting variables skipped:\n" + + "\n".join( + [ + f"{sorted(v['ours'])}: {sorted(v['theirs'])} ({k})" + for k, v in sorted( + conflicts.items(), key=lambda item: sorted(item[1]["ours"]) + ) + ] + ), + UserWarning, + ) + renamer = { + v["ours"][0]: v["theirs"][0] + for k, v in keydict.items() + if k not in conflicts + } newobj = self._obj.rename(renamer) # rename variable names in the coordinates attribute diff --git a/cf_xarray/tests/test_accessor.py b/cf_xarray/tests/test_accessor.py index e5d7f4df..9f196ca0 100644 --- a/cf_xarray/tests/test_accessor.py +++ b/cf_xarray/tests/test_accessor.py @@ -249,6 +249,10 @@ def test_rename_like(): assert "temp" not in renamed assert "TEMP" in renamed + # skip conflicting variables + with pytest.warns(UserWarning, match="Conflicting variables skipped:.*"): + popds.cf.rename_like(airds) + @pytest.mark.parametrize("obj", objects) @pytest.mark.parametrize( diff --git a/doc/examples/introduction.ipynb b/doc/examples/introduction.ipynb index 87d0f3bb..290444f8 100644 --- a/doc/examples/introduction.ipynb +++ b/doc/examples/introduction.ipynb @@ -582,13 +582,31 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Feature: Renaming coordinate variables\n", + "## Feature: Renaming variables\n", "\n", - "`cf_xarray` lets you rewrite coordinate variables in one dataset to like\n", - "variables in another dataset. This can only be done when a one-to-one mapping is\n", - "possible\n", + "`cf_xarray` lets you rewrite variables in one dataset to like variables in\n", + "another dataset.\n", "\n", - "In this example, `TLONG` and `TLAT` are renamed to `lon` and `lat` i.e. their\n", + "In this example, a one-to-one mapping is not possible and the coordinate\n", + "variables are not renamed.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "da = pop.cf[\"TEMP\"]\n", + "da.cf.rename_like(ds)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If we drop the `X` and `Y` axes, a one-to-one mapping is possible. In this\n", + "example, `TLONG` and `TLAT` are renamed to `lon` and `lat` i.e. their\n", "counterparts in `ds`. Note the the `coordinates` attribute is appropriately\n", "changed.\n" ] @@ -604,7 +622,8 @@ }, "outputs": [], "source": [ - "pop.cf[\"TEMP\"].cf.rename_like(ds)" + "da = da.cf.drop_vars([\"X\", \"Y\"])\n", + "da.cf.rename_like(ds)" ] }, { From 24e5ead544092740a323df851dc67f9f07fb5898 Mon Sep 17 00:00:00 2001 From: Mattia Almansi Date: Sun, 14 Mar 2021 13:05:56 +0000 Subject: [PATCH 34/57] add name_dict and new_name_or_name_dict (#185) --- cf_xarray/accessor.py | 2 ++ cf_xarray/tests/test_accessor.py | 11 +++++++++++ 2 files changed, 13 insertions(+) diff --git a/cf_xarray/accessor.py b/cf_xarray/accessor.py index 40fdecbb..02b2dcf5 100644 --- a/cf_xarray/accessor.py +++ b/cf_xarray/accessor.py @@ -433,6 +433,8 @@ def wrapper(obj: Union[DataArray, Dataset], key: str): "shifts": (_get_dims,), # shift, roll "pad_width": (_get_dims,), # shift, roll "names": (_get_all,), # set_coords, reset_coords, drop_vars + "name_dict": (_get_all,), # rename, rename_vars + "new_name_or_name_dict": (_get_all,), # rename "labels": (_get_indexes,), # drop_sel "coords": (_get_dims,), # interp "indexers": (_get_dims,), # sel, isel, reindex diff --git a/cf_xarray/tests/test_accessor.py b/cf_xarray/tests/test_accessor.py index 9f196ca0..037555a6 100644 --- a/cf_xarray/tests/test_accessor.py +++ b/cf_xarray/tests/test_accessor.py @@ -957,6 +957,17 @@ def test_drop_dims(ds): assert_identical(ds.drop_dims("lon"), ds.cf.drop_dims(cf_name)) +@pytest.mark.parametrize("obj", objects) +def test_rename(obj): + + cf_dict = { + "air_temperature" if isinstance(obj, Dataset) else "longitude": "renamed" + } + xr_dict = {"air" if isinstance(obj, Dataset) else "lon": "renamed"} + assert_identical(obj.rename(xr_dict), obj.cf.rename(cf_dict)) + assert_identical(obj.rename(**xr_dict), obj.cf.rename(**cf_dict)) + + @pytest.mark.parametrize("ds", datasets) def test_differentiate(ds): From fb228b65017d165bbd3758b98480424545fb1751 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 1 Apr 2021 14:39:46 -0600 Subject: [PATCH 35/57] Support stack (#192) --- cf_xarray/accessor.py | 18 +++++++++++++++++- cf_xarray/tests/test_accessor.py | 10 ++++++++++ ci/doc.yml | 1 + ci/environment.yml | 1 + ci/upstream-dev-env.yml | 1 + 5 files changed, 30 insertions(+), 1 deletion(-) diff --git a/cf_xarray/accessor.py b/cf_xarray/accessor.py index 02b2dcf5..d392c45b 100644 --- a/cf_xarray/accessor.py +++ b/cf_xarray/accessor.py @@ -428,7 +428,6 @@ def wrapper(obj: Union[DataArray, Dataset], key: str): "dim": (_get_dims,), "dims": (_get_dims,), # transpose "drop_dims": (_get_dims,), # drop_dims - "dimensions": (_get_dims,), # stack "dims_dict": (_get_dims,), # swap_dims, rename_dims "shifts": (_get_dims,), # shift, roll "pad_width": (_get_dims,), # shift, roll @@ -1435,6 +1434,23 @@ def drop(self, *args, **kwargs): "Please use .cf.drop_vars or .cf.drop_sel as appropriate." ) + def stack(self, dimensions=None, **dimensions_kwargs): + # stack needs to rewrite the _values_ of a dict + # our other machinery rewrites the _keys_ of a dict + # This seems somewhat rare, so do it explicitly for now + + if dimensions is None: + dimensions = dimensions_kwargs + for key, values in dimensions.items(): + updates = [ + apply_mapper( + (_single(_get_dims),), self._obj, v, error=True, default=[v] + ) + for v in values + ] + dimensions.update({key: tuple(itertools.chain(*updates))}) + return self._obj.stack(dimensions) + @xr.register_dataset_accessor("cf") class CFDatasetAccessor(CFAccessor): diff --git a/cf_xarray/tests/test_accessor.py b/cf_xarray/tests/test_accessor.py index 037555a6..19ee8316 100644 --- a/cf_xarray/tests/test_accessor.py +++ b/cf_xarray/tests/test_accessor.py @@ -1041,3 +1041,13 @@ def test_groupby_special_ops(): expected = grouped - grouped.mean() actual = grouped - cfgrouped.mean() assert_identical(expected, actual) + + +@pytest.mark.parametrize("obj", objects) +def test_stack(obj): + expected = obj.stack(latlon=["lat", "lon"]) + actual = obj.cf.stack(latlon=["latitude", "longitude"]) + assert_identical(expected, actual) + + actual = obj.cf.stack({"latlon": ["latitude", "longitude"]}) + assert_identical(expected, actual) diff --git a/ci/doc.yml b/ci/doc.yml index 9a4a5a15..cb226a04 100644 --- a/ci/doc.yml +++ b/ci/doc.yml @@ -6,6 +6,7 @@ dependencies: - python=3.8 - matplotlib-base - netcdf4 + - pooch - xarray - sphinx - nbsphinx diff --git a/ci/environment.yml b/ci/environment.yml index 8adb46be..67b99aa9 100644 --- a/ci/environment.yml +++ b/ci/environment.yml @@ -9,4 +9,5 @@ dependencies: - matplotlib-base - netcdf4 - pandas + - pooch - xarray diff --git a/ci/upstream-dev-env.yml b/ci/upstream-dev-env.yml index 222f8343..58007acd 100644 --- a/ci/upstream-dev-env.yml +++ b/ci/upstream-dev-env.yml @@ -9,5 +9,6 @@ dependencies: - matplotlib-base - netcdf4 - pandas + - pooch - pip: - git+https://github.com/pydata/xarray From b5ffbe5754fbfea7eeaf298e4e7a7e998260d102 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 7 Apr 2021 12:02:19 -0600 Subject: [PATCH 36/57] Updates to getitem (#196) --- cf_xarray/accessor.py | 53 +++++++++++++++++++++++++------- cf_xarray/tests/test_accessor.py | 42 ++++++++++++++++++++++++- 2 files changed, 83 insertions(+), 12 deletions(-) diff --git a/cf_xarray/accessor.py b/cf_xarray/accessor.py index d392c45b..34f7aa0b 100644 --- a/cf_xarray/accessor.py +++ b/cf_xarray/accessor.py @@ -12,7 +12,6 @@ List, Mapping, MutableMapping, - Optional, Set, Tuple, TypeVar, @@ -605,24 +604,41 @@ def _getitem( if skip is None: skip = [] - def check_results(names, k): + def drop_bounds(names): + # sometimes bounds variables have the same standard_name as the + # actual variable. It seems practical to ignore them when indexing + # with a scalar key. Hopefully these will soon get decoded to IntervalIndex + # and we can move on... + if scalar_key: + bounds = set([obj[k].attrs.get("bounds", None) for k in names]) + names = set(names) - bounds + return names + + def check_results(names, key): if scalar_key and len(names) > 1: raise KeyError( - f"Receive multiple variables for key {k!r}: {names}. " - f"Expected only one. Please pass a list [{k!r}] " - f"instead to get all variables matching {k!r}." + f"Receive multiple variables for key {key!r}: {names}. " + f"Expected only one. Please pass a list [{key!r}] " + f"instead to get all variables matching {key!r}." ) + try: + measures = accessor._get_all_cell_measures() + except ValueError: + measures = [] + warnings.warn("Ignoring bad cell_measures attribute.", UserWarning) + varnames: List[Hashable] = [] coords: List[Hashable] = [] successful = dict.fromkeys(key, False) for k in key: if "coords" not in skip and k in _AXIS_NAMES + _COORD_NAMES: names = _get_all(obj, k) + names = drop_bounds(names) check_results(names, k) successful[k] = bool(names) coords.extend(names) - elif "measures" not in skip and k in accessor._get_all_cell_measures(): + elif "measures" not in skip and k in measures: measure = _get_all(obj, k) check_results(measure, k) successful[k] = bool(measure) @@ -631,6 +647,7 @@ def check_results(names, k): else: stdnames = set(_get_with_standard_name(obj, k)) objcoords = set(obj.coords) + stdnames = drop_bounds(stdnames) if "coords" in skip: stdnames -= objcoords check_results(stdnames, k) @@ -646,7 +663,7 @@ def check_results(names, k): try: for name in allnames: extravars = accessor.get_associated_variable_names( - name, skip_bounds=scalar_key + name, skip_bounds=scalar_key, error=False ) coords.extend(itertools.chain(*extravars.values())) @@ -1238,7 +1255,7 @@ def standard_names(self) -> Dict[str, List[str]]: return {k: sorted(v) for k, v in vardict.items()} def get_associated_variable_names( - self, name: Hashable, skip_bounds: Optional[bool] = None + self, name: Hashable, skip_bounds: bool = False, error: bool = True ) -> Dict[str, List[str]]: """ Returns a dict mapping @@ -1252,6 +1269,9 @@ def get_associated_variable_names( ---------- name : Hashable skip_bounds : bool, optional + error: bool, optional + Raise or ignore errors. + Returns ------ Dict with keys "ancillary_variables", "cell_measures", "coordinates", "bounds" @@ -1264,9 +1284,20 @@ def get_associated_variable_names( coords["coordinates"] = attrs_or_encoding["coordinates"].split(" ") if "cell_measures" in attrs_or_encoding: - coords["cell_measures"] = list( - parse_cell_methods_attr(attrs_or_encoding["cell_measures"]).values() - ) + try: + coords["cell_measures"] = list( + parse_cell_methods_attr(attrs_or_encoding["cell_measures"]).values() + ) + except ValueError as e: + if error: + msg = e.args[0] + " Ignore this error by passing 'error=False'" + raise ValueError(msg) + else: + warnings.warn( + f"Ignoring bad cell_measures attribute: {attrs_or_encoding['cell_measures']}", + UserWarning, + ) + coords["cell_measures"] = [] if ( isinstance(self._obj, Dataset) diff --git a/cf_xarray/tests/test_accessor.py b/cf_xarray/tests/test_accessor.py index 19ee8316..b931c59c 100644 --- a/cf_xarray/tests/test_accessor.py +++ b/cf_xarray/tests/test_accessor.py @@ -502,11 +502,51 @@ def test_getitem_errors(obj): obj2.cf["X"] -def test_getitem_regression(): +def test_getitem_ignores_bad_measure_attribute(): + air2 = airds.copy(deep=True) + air2.air.attrs["cell_measures"] = "asd" + with pytest.warns(UserWarning): + assert_identical(air2.air.drop_vars("cell_area"), air2.cf["air"]) + + with pytest.raises(ValueError): + air2.cf.cell_measures + with pytest.raises(ValueError): + air2.air.cf.cell_measures + with pytest.raises(ValueError): + air2.cf.get_associated_variable_names("air", error=True) + with pytest.warns(UserWarning): + air2.cf.get_associated_variable_names("air", error=False) + + +def test_getitem_clash_standard_name(): ds = xr.Dataset() ds.coords["area"] = xr.DataArray(np.ones(10), attrs={"standard_name": "cell_area"}) assert_identical(ds.cf["cell_area"], ds["area"].reset_coords(drop=True)) + ds = xr.Dataset() + ds["time"] = ( + "time", + np.arange(10), + {"standard_name": "time", "bounds": "time_bounds"}, + ) + ds["time_bounds"] = ( + ("time", "bounds"), + np.ones((10, 2)), + {"standard_name": "time"}, + ) + + ds["lat"] = ( + "lat", + np.arange(10), + {"units": "degrees_north", "bounds": "lat_bounds"}, + ) + ds["lat_bounds"] = ( + ("lat", "bounds"), + np.ones((10, 2)), + {"units": "degrees_north"}, + ) + assert_identical(ds["lat"], ds.cf["latitude"]) + def test_getitem_uses_coordinates(): # POP-like dataset From 95379149b4dc7493f72a05cff252b6c354baefc3 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 9 Apr 2021 10:48:24 -0600 Subject: [PATCH 37/57] Add differentiate with positive_upward flag. (#198) * Add differentiate with follow_positive flag. xref #190 * fix test * Switch to 'positive_upward' + better docstring * Also test dataset * [skip-ci] fix whats-new * fix docstring * Update cf_xarray/accessor.py Co-authored-by: Mattia Almansi Co-authored-by: Mattia Almansi --- cf_xarray/accessor.py | 40 +++++++++++++++++++++++++ cf_xarray/tests/test_accessor.py | 51 ++++++++++++++++++++++++++++++++ doc/api.rst | 2 ++ doc/whats-new.rst | 6 ++++ 4 files changed, 99 insertions(+) diff --git a/cf_xarray/accessor.py b/cf_xarray/accessor.py index 34f7aa0b..bd56c781 100644 --- a/cf_xarray/accessor.py +++ b/cf_xarray/accessor.py @@ -1482,6 +1482,46 @@ def stack(self, dimensions=None, **dimensions_kwargs): dimensions.update({key: tuple(itertools.chain(*updates))}) return self._obj.stack(dimensions) + def differentiate( + self, coord, *xr_args, positive_upward: bool = False, **xr_kwargs + ): + """ + Parameters + ---------- + xr_args, xr_kwargs are passed directly to the underlying xarray function. + The following are added by cf_xarray: + + positive_upward: optional, bool + Change sign of the derivative based on the ``"positive"`` attribute of ``coord`` + so that positive values indicate increasing upward. + If ``positive=="down"``, then multiplied by -1. + + See Also + -------- + DataArray.cf.differentiate + Dataset.cf.differentiate + xarray.DataArray.differentiate: underlying xarray function + xarray.Dataset.differentiate: underlying xarray function + """ + coord = apply_mapper( + (_single(_get_coords),), self._obj, coord, error=False, default=[coord] + )[0] + result = self._obj.differentiate(coord, *xr_args, **xr_kwargs) + if positive_upward: + coord = self._obj[coord] + attrs = coord.attrs + if "positive" not in attrs: + raise ValueError( + f"positive_upward=True and 'positive' attribute not present on {coord.name}" + ) + if attrs["positive"] not in ["up", "down"]: + raise ValueError( + f"positive_upward=True and received attrs['positive']={attrs['positive']}. Expected one of ['up', 'down'] " + ) + if attrs["positive"] == "down": + result *= -1 + return result + @xr.register_dataset_accessor("cf") class CFDatasetAccessor(CFAccessor): diff --git a/cf_xarray/tests/test_accessor.py b/cf_xarray/tests/test_accessor.py index b931c59c..71d35441 100644 --- a/cf_xarray/tests/test_accessor.py +++ b/cf_xarray/tests/test_accessor.py @@ -1091,3 +1091,54 @@ def test_stack(obj): actual = obj.cf.stack({"latlon": ["latitude", "longitude"]}) assert_identical(expected, actual) + + +da = xr.DataArray( + np.arange(10)[::-1], # like ocean temperature + dims="z", + coords={"z": ("z", np.arange(10))}, + name="test", +) + + +@pytest.mark.parametrize("obj", [da, da.to_dataset()]) +def test_differentiate_positive_upward(obj): + obj.z.attrs["positive"] = "down" + expected = obj.differentiate("z", 2) + actual = obj.cf.differentiate("z", 2) + assert_identical(expected, actual) + + obj.z.attrs["positive"] = "up" + expected = obj.differentiate("z", 2) + actual = obj.cf.differentiate("z", 2, positive_upward=True) + assert_identical(expected, actual) + + obj.z.attrs["positive"] = "down" + expected = -1 * obj.differentiate("z", 2) + actual = obj.cf.differentiate("z", 2, positive_upward=True) + assert_identical(expected, actual) + + obj = obj.isel(z=slice(None, None, -1)) + expected = -1 * obj.differentiate("z", 2) + actual = obj.cf.differentiate("z", 2, positive_upward=True) + assert_identical(expected, actual) + obj = obj.isel(z=slice(None, None, -1)) + + with xr.set_options(keep_attrs=True): + da["z"] = obj.z * -1 + expected = -1 * obj.differentiate("z", 2) + actual = obj.cf.differentiate("z", 2, positive_upward=True) + assert_identical(expected, actual) + + obj = obj.isel(z=slice(None, None, -1)) + expected = -1 * obj.differentiate("z", 2) + actual = obj.cf.differentiate("z", 2, positive_upward=True) + assert_identical(expected, actual) + + del obj.z.attrs["positive"] + with pytest.raises(ValueError): + obj.cf.differentiate("z", positive_upward=True) + + obj.z.attrs["positive"] = "zzz" + with pytest.raises(ValueError): + obj.cf.differentiate("z", positive_upward=True) diff --git a/doc/api.rst b/doc/api.rst index 2f753fb2..4c987f51 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -45,6 +45,7 @@ Methods DataArray.cf.__getitem__ DataArray.cf.__repr__ + DataArray.cf.differentiate DataArray.cf.guess_coord_axis DataArray.cf.keys DataArray.cf.rename_like @@ -80,6 +81,7 @@ Methods Dataset.cf.add_bounds Dataset.cf.bounds_to_vertices Dataset.cf.decode_vertical_coords + Dataset.cf.differentiate Dataset.cf.get_bounds Dataset.cf.get_bounds_dim_name Dataset.cf.guess_coord_axis diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 9b8acd52..1760fc56 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -3,6 +3,12 @@ What's New ---------- +v0.5.2 (unreleased) +=================== + +- :py:meth:`DataArray.cf.differentiate` and :py:meth:`Dataset.cf.differentiate` can optionally correct + sign of the derivative by interpreting the ``"positive"`` attribute. By `Deepak Cherian`_. + v0.5.1 (Feb 24, 2021) ===================== From 9e86af8d0b555c37826e3a357832a5700857b6a0 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 19 Apr 2021 15:15:09 -0600 Subject: [PATCH 38/57] Refactor out coordinate criteria to criteria.py (#205) Closes #28 --- cf_xarray/accessor.py | 88 +-------------------------------- cf_xarray/criteria.py | 91 +++++++++++++++++++++++++++++++++++ cf_xarray/scripts/make_doc.py | 3 +- 3 files changed, 94 insertions(+), 88 deletions(-) create mode 100644 cf_xarray/criteria.py diff --git a/cf_xarray/accessor.py b/cf_xarray/accessor.py index bd56c781..35692bfc 100644 --- a/cf_xarray/accessor.py +++ b/cf_xarray/accessor.py @@ -23,6 +23,7 @@ from xarray import DataArray, Dataset from xarray.core.arithmetic import SupportsArithmetic +from .criteria import coordinate_criteria, regex from .helpers import bounds_to_vertices from .utils import _is_datetime_like, invert_mappings, parse_cell_methods_attr @@ -44,93 +45,6 @@ #: Cell measures understood by cf_xarray. _CELL_MEASURES = ("area", "volume") -# Define the criteria for coordinate matches -# Copied from metpy -# Internally we only use X, Y, Z, T -coordinate_criteria: MutableMapping[str, MutableMapping[str, Tuple]] = { - "standard_name": { - "X": ("projection_x_coordinate",), - "Y": ("projection_y_coordinate",), - "T": ("time",), - "time": ("time",), - "vertical": ( - "air_pressure", - "height", - "depth", - "geopotential_height", - # computed dimensional coordinate name - "altitude", - "height_above_geopotential_datum", - "height_above_reference_ellipsoid", - "height_above_mean_sea_level", - ), - "Z": ( - "model_level_number", - "atmosphere_ln_pressure_coordinate", - "atmosphere_sigma_coordinate", - "atmosphere_hybrid_sigma_pressure_coordinate", - "atmosphere_hybrid_height_coordinate", - "atmosphere_sleve_coordinate", - "ocean_sigma_coordinate", - "ocean_s_coordinate", - "ocean_s_coordinate_g1", - "ocean_s_coordinate_g2", - "ocean_sigma_z_coordinate", - "ocean_double_sigma_coordinate", - ), - "latitude": ("latitude",), - "longitude": ("longitude",), - }, - "_CoordinateAxisType": { - "T": ("Time",), - "Z": ("GeoZ", "Height", "Pressure"), - "Y": ("GeoY",), - "latitude": ("Lat",), - "X": ("GeoX",), - "longitude": ("Lon",), - }, - "axis": {"T": ("T",), "Z": ("Z",), "Y": ("Y",), "X": ("X",)}, - "cartesian_axis": {"T": ("T",), "Z": ("Z",), "Y": ("Y",), "X": ("X",)}, - "positive": {"vertical": ("up", "down")}, - "units": { - "latitude": ( - "degree_north", - "degree_N", - "degreeN", - "degrees_north", - "degrees_N", - "degreesN", - ), - "longitude": ( - "degree_east", - "degree_E", - "degreeE", - "degrees_east", - "degrees_E", - "degreesE", - ), - }, -} - -# "long_name" and "standard_name" criteria are the same. For convenience. -coordinate_criteria["long_name"] = coordinate_criteria["standard_name"] - -#: regular expressions for guess_coord_axis -regex = { - "time": "\\bt\\b|(time|min|hour|day|week|month|year)[0-9]*", - "vertical": ( - "(z|nav_lev|gdep|lv_|bottom_top|sigma|h(ei)?ght|altitude|depth|" - "isobaric|pres|isotherm)[a-z_]*[0-9]*" - ), - "Y": "y", - "latitude": "y?(nav_lat|lat|gphi)[a-z0-9]*", - "X": "x", - "longitude": "x?(nav_lon|lon|glam)[a-z0-9]*", -} -regex["Z"] = regex["vertical"] -regex["T"] = regex["time"] - - ATTRS = { "X": {"axis": "X"}, "T": {"axis": "T", "standard_name": "time"}, diff --git a/cf_xarray/criteria.py b/cf_xarray/criteria.py new file mode 100644 index 00000000..ace02de4 --- /dev/null +++ b/cf_xarray/criteria.py @@ -0,0 +1,91 @@ +""" +Criteria for identifying axes and coordinate variables. +Reused with modification from MetPy under the terms of the BSD 3-Clause License. +Copyright (c) 2017 MetPy Developers. +""" + + +from typing import MutableMapping, Tuple + +coordinate_criteria: MutableMapping[str, MutableMapping[str, Tuple]] = { + "standard_name": { + "X": ("projection_x_coordinate",), + "Y": ("projection_y_coordinate",), + "T": ("time",), + "time": ("time",), + "vertical": ( + "air_pressure", + "height", + "depth", + "geopotential_height", + # computed dimensional coordinate name + "altitude", + "height_above_geopotential_datum", + "height_above_reference_ellipsoid", + "height_above_mean_sea_level", + ), + "Z": ( + "model_level_number", + "atmosphere_ln_pressure_coordinate", + "atmosphere_sigma_coordinate", + "atmosphere_hybrid_sigma_pressure_coordinate", + "atmosphere_hybrid_height_coordinate", + "atmosphere_sleve_coordinate", + "ocean_sigma_coordinate", + "ocean_s_coordinate", + "ocean_s_coordinate_g1", + "ocean_s_coordinate_g2", + "ocean_sigma_z_coordinate", + "ocean_double_sigma_coordinate", + ), + "latitude": ("latitude",), + "longitude": ("longitude",), + }, + "_CoordinateAxisType": { + "T": ("Time",), + "Z": ("GeoZ", "Height", "Pressure"), + "Y": ("GeoY",), + "latitude": ("Lat",), + "X": ("GeoX",), + "longitude": ("Lon",), + }, + "axis": {"T": ("T",), "Z": ("Z",), "Y": ("Y",), "X": ("X",)}, + "cartesian_axis": {"T": ("T",), "Z": ("Z",), "Y": ("Y",), "X": ("X",)}, + "positive": {"vertical": ("up", "down")}, + "units": { + "latitude": ( + "degree_north", + "degree_N", + "degreeN", + "degrees_north", + "degrees_N", + "degreesN", + ), + "longitude": ( + "degree_east", + "degree_E", + "degreeE", + "degrees_east", + "degrees_E", + "degreesE", + ), + }, +} + +# "long_name" and "standard_name" criteria are the same. For convenience. +coordinate_criteria["long_name"] = coordinate_criteria["standard_name"] + +#: regular expressions for guess_coord_axis +regex = { + "time": "\\bt\\b|(time|min|hour|day|week|month|year)[0-9]*", + "vertical": ( + "(z|nav_lev|gdep|lv_|bottom_top|sigma|h(ei)?ght|altitude|depth|" + "isobaric|pres|isotherm)[a-z_]*[0-9]*" + ), + "Y": "y", + "latitude": "y?(nav_lat|lat|gphi)[a-z0-9]*", + "X": "x", + "longitude": "x?(nav_lon|lon|glam)[a-z0-9]*", +} +regex["Z"] = regex["vertical"] +regex["T"] = regex["time"] diff --git a/cf_xarray/scripts/make_doc.py b/cf_xarray/scripts/make_doc.py index 203f3c41..256007c8 100644 --- a/cf_xarray/scripts/make_doc.py +++ b/cf_xarray/scripts/make_doc.py @@ -4,7 +4,8 @@ from pandas import DataFrame -from cf_xarray.accessor import _AXIS_NAMES, _COORD_NAMES, coordinate_criteria, regex +from cf_xarray.accessor import _AXIS_NAMES, _COORD_NAMES +from cf_xarray.criteria import coordinate_criteria, regex def main(): From be8b23e0c8237e4c7a96b1a8a8e4ff967d538ad1 Mon Sep 17 00:00:00 2001 From: Mattia Almansi Date: Tue, 20 Apr 2021 17:06:14 +0100 Subject: [PATCH 39/57] add skip to rename_like (#206) --- cf_xarray/accessor.py | 22 +++++++++++++++++----- cf_xarray/tests/test_accessor.py | 8 +++++++- doc/examples/introduction.ipynb | 11 +++++------ 3 files changed, 29 insertions(+), 12 deletions(-) diff --git a/cf_xarray/accessor.py b/cf_xarray/accessor.py index 35692bfc..5be0efbd 100644 --- a/cf_xarray/accessor.py +++ b/cf_xarray/accessor.py @@ -1261,7 +1261,9 @@ def _maybe_to_dataarray(self, obj=None): return obj def rename_like( - self, other: Union[DataArray, Dataset] + self, + other: Union[DataArray, Dataset], + skip: Union[str, Iterable[str]] = None, ) -> Union[DataArray, Dataset]: """ Renames variables in object to match names of like-variables in ``other``. @@ -1277,20 +1279,30 @@ def rename_like( ---------- other : DataArray, Dataset Variables will be renamed to match variable names in this xarray object + skip: str, Iterable[str], optional + Limit the renaming excluding + ("axes", "cell_measures", "coordinates", "standard_names") + or a subset thereof. Returns ------- DataArray or Dataset with renamed variables """ + skip = [skip] if isinstance(skip, str) else skip or [] + ourkeys = self.keys() theirkeys = other.cf.keys() good_keys = ourkeys & theirkeys keydict = {} for key in good_keys: - ours = _get_all(self._obj, key) - theirs = _get_all(other, key) - keydict[key] = dict(ours=ours, theirs=theirs) + ours = set(_get_all(self._obj, key)) + theirs = set(_get_all(other, key)) + for attr in skip: + ours -= set(getattr(self, attr).get(key, [])) + theirs -= set(getattr(other.cf, attr).get(key, [])) + if ours and theirs: + keydict[key] = dict(ours=list(ours), theirs=list(theirs)) conflicts = {} for k0, v0 in keydict.items(): @@ -1299,7 +1311,7 @@ def rename_like( continue for v1 in keydict.values(): # Conflicts have same ours but different theirs or vice versa - if sum([v0["ours"] == v1["ours"], v0["theirs"] == v1["theirs"]]) == 1: + if (v0["ours"] == v1["ours"]) != (v0["theirs"] == v1["theirs"]): conflicts[k0] = v0 break if conflicts: diff --git a/cf_xarray/tests/test_accessor.py b/cf_xarray/tests/test_accessor.py index 71d35441..bab850a9 100644 --- a/cf_xarray/tests/test_accessor.py +++ b/cf_xarray/tests/test_accessor.py @@ -250,8 +250,14 @@ def test_rename_like(): assert "TEMP" in renamed # skip conflicting variables + da = popds.cf["TEMP"] with pytest.warns(UserWarning, match="Conflicting variables skipped:.*"): - popds.cf.rename_like(airds) + expected = {"longitude": ["TLONG"], "latitude": ["TLAT"]} + actual = da.cf.rename_like(airds).cf.coordinates + assert expected == actual + expected = {"longitude": ["lon"], "latitude": ["lat"]} + actual = da.cf.rename_like(airds, skip="axes").cf.coordinates + assert expected == actual @pytest.mark.parametrize("obj", objects) diff --git a/doc/examples/introduction.ipynb b/doc/examples/introduction.ipynb index 290444f8..1a2a38ef 100644 --- a/doc/examples/introduction.ipynb +++ b/doc/examples/introduction.ipynb @@ -605,10 +605,10 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "If we drop the `X` and `Y` axes, a one-to-one mapping is possible. In this\n", - "example, `TLONG` and `TLAT` are renamed to `lon` and `lat` i.e. their\n", - "counterparts in `ds`. Note the the `coordinates` attribute is appropriately\n", - "changed.\n" + "If we exclude all axes (variables with `axis` attribute), a one-to-one mapping\n", + "is possible. In this example, `TLONG` and `TLAT` are renamed to `lon` and `lat`\n", + "i.e. their counterparts in `ds`. Note the the `coordinates` attribute is\n", + "appropriately changed.\n" ] }, { @@ -622,8 +622,7 @@ }, "outputs": [], "source": [ - "da = da.cf.drop_vars([\"X\", \"Y\"])\n", - "da.cf.rename_like(ds)" + "da.cf.rename_like(ds, skip=\"axes\")" ] }, { From fb352cfeec19cf5890906ad25097f47c6f9f472c Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 20 Apr 2021 10:26:23 -0600 Subject: [PATCH 40/57] Add __version__ (#208) Co-authored-by: Anderson Banihirwe --- cf_xarray/__init__.py | 8 ++++++++ setup.cfg | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/cf_xarray/__init__.py b/cf_xarray/__init__.py index f9d08cce..a0a37531 100644 --- a/cf_xarray/__init__.py +++ b/cf_xarray/__init__.py @@ -1,2 +1,10 @@ +from pkg_resources import DistributionNotFound, get_distribution + from .accessor import CFAccessor # noqa from .helpers import bounds_to_vertices, vertices_to_bounds # noqa + +try: + __version__ = get_distribution("cf_xarray").version +except DistributionNotFound: + # package is not installed + __version__ = 'unknown' diff --git a/setup.cfg b/setup.cfg index 2236d52d..b0c2485d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -41,7 +41,7 @@ skip_gitignore = true force_to_top = true default_section = THIRDPARTY known_first_party = cf_xarray -known_third_party = dask,matplotlib,numpy,pandas,pytest,setuptools,sphinx_autosummary_accessors,xarray +known_third_party = dask,matplotlib,numpy,pandas,pkg_resources,pytest,setuptools,sphinx_autosummary_accessors,xarray # Most of the numerical computing stack doesn't have type annotations yet. [mypy-affine.*] From fd7e48603a9b00c20390b1cda36edc8b0d748715 Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 20 Apr 2021 11:29:51 -0600 Subject: [PATCH 41/57] Fix black --- cf_xarray/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cf_xarray/__init__.py b/cf_xarray/__init__.py index a0a37531..10503e50 100644 --- a/cf_xarray/__init__.py +++ b/cf_xarray/__init__.py @@ -7,4 +7,4 @@ __version__ = get_distribution("cf_xarray").version except DistributionNotFound: # package is not installed - __version__ = 'unknown' + __version__ = "unknown" From c6c4b5df00ef31ce7921d38b7385170e702d4988 Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 20 Apr 2021 13:29:15 -0600 Subject: [PATCH 42/57] Compile regexes --- cf_xarray/accessor.py | 10 ++++------ cf_xarray/criteria.py | 13 +++++++------ 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/cf_xarray/accessor.py b/cf_xarray/accessor.py index 5be0efbd..e22e0019 100644 --- a/cf_xarray/accessor.py +++ b/cf_xarray/accessor.py @@ -1363,8 +1363,6 @@ def guess_coord_axis(self, verbose: bool = False) -> Union[DataArray, Dataset]: ------- DataArray or Dataset with appropriate attributes added """ - import re - obj = self._obj.copy(deep=True) for var in obj.coords.variables: if obj[var].ndim == 1 and _is_datetime_like(obj[var]): @@ -1375,14 +1373,14 @@ def guess_coord_axis(self, verbose: bool = False) -> Union[DataArray, Dataset]: obj[var].attrs = dict(ChainMap(obj[var].attrs, ATTRS["time"])) continue # prevent second detection - for axis, pattern in regex.items(): + for name, pattern in regex.items(): # match variable names - if re.match(pattern, var.lower()): + if pattern.match(var.lower()): if verbose: print( - f"I think {var!r} is of type {axis!r}. It matched {pattern!r}" + f"I think {var!r} is of type {name!r}. It matched {pattern!r}" ) - obj[var].attrs = dict(ChainMap(obj[var].attrs, ATTRS[axis])) + obj[var].attrs = dict(ChainMap(obj[var].attrs, ATTRS[name])) return obj def drop(self, *args, **kwargs): diff --git a/cf_xarray/criteria.py b/cf_xarray/criteria.py index ace02de4..060e9938 100644 --- a/cf_xarray/criteria.py +++ b/cf_xarray/criteria.py @@ -5,6 +5,7 @@ """ +import re from typing import MutableMapping, Tuple coordinate_criteria: MutableMapping[str, MutableMapping[str, Tuple]] = { @@ -77,15 +78,15 @@ #: regular expressions for guess_coord_axis regex = { - "time": "\\bt\\b|(time|min|hour|day|week|month|year)[0-9]*", - "vertical": ( + "time": re.compile("\\bt\\b|(time|min|hour|day|week|month|year)[0-9]*"), + "vertical": re.compile( "(z|nav_lev|gdep|lv_|bottom_top|sigma|h(ei)?ght|altitude|depth|" "isobaric|pres|isotherm)[a-z_]*[0-9]*" ), - "Y": "y", - "latitude": "y?(nav_lat|lat|gphi)[a-z0-9]*", - "X": "x", - "longitude": "x?(nav_lon|lon|glam)[a-z0-9]*", + "Y": re.compile("y"), + "latitude": re.compile("y?(nav_lat|lat|gphi)[a-z0-9]*"), + "X": re.compile("x"), + "longitude": re.compile("x?(nav_lon|lon|glam)[a-z0-9]*"), } regex["Z"] = regex["vertical"] regex["T"] = regex["time"] From 64e86954c46642df6f11323d1442fbc70c8fd1bb Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 20 Apr 2021 13:32:15 -0600 Subject: [PATCH 43/57] Update some tests. --- cf_xarray/tests/test_accessor.py | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/cf_xarray/tests/test_accessor.py b/cf_xarray/tests/test_accessor.py index bab850a9..77253114 100644 --- a/cf_xarray/tests/test_accessor.py +++ b/cf_xarray/tests/test_accessor.py @@ -269,14 +269,7 @@ def test_rename_like(): ("groupby", {"group": "time"}, {"group": "T"}), ("groupby", {"group": "time.month"}, {"group": "T.month"}), ("groupby_bins", {"group": "lat", "bins": 5}, {"group": "latitude", "bins": 5}), - pytest.param( - "coarsen", - {"lon": 2, "lat": 5}, - {"X": 2, "Y": 5}, - marks=pytest.mark.skip( - reason="xarray GH4120. any test after this will fail since attrs are lost" - ), - ), + ("coarsen", {"lon": 2, "lat": 5}, {"X": 2, "Y": 5}), ), ) def test_wrapped_classes(obj, attr, xrkwargs, cfkwargs): @@ -483,18 +476,14 @@ def test_dataset_plot(obj): ("longitude", "lon"), ("latitude", "lat"), ("time", "time"), - pytest.param( - "area", - "cell_area", - marks=pytest.mark.xfail(reason="measures not implemented for dataset"), - ), + ("area", "cell_area"), ), ) def test_getitem(obj, key, expected_key): assert key in obj.cf actual = obj.cf[key] - expected = obj[expected_key] + expected = obj[expected_key].reset_coords(drop=True) assert_identical(actual, expected) From 36b98febea8ec42420ccf80978ad29f41293d649 Mon Sep 17 00:00:00 2001 From: Mattia Almansi Date: Thu, 22 Apr 2021 13:40:33 +0100 Subject: [PATCH 44/57] add bounds property (#214) --- cf_xarray/accessor.py | 85 +++++++++++++++++++++++++------- cf_xarray/tests/test_accessor.py | 30 +++++++++++ doc/api.rst | 1 + doc/whats-new.rst | 1 + 4 files changed, 100 insertions(+), 17 deletions(-) diff --git a/cf_xarray/accessor.py b/cf_xarray/accessor.py index e22e0019..a49761fb 100644 --- a/cf_xarray/accessor.py +++ b/cf_xarray/accessor.py @@ -255,6 +255,31 @@ def _get_measure(obj: Union[DataArray, Dataset], key: str) -> List[str]: return list(results) +def _get_bounds(obj: Union[DataArray, Dataset], key: str) -> List[str]: + """ + Translate from key (either CF key or variable name) to its bounds' variable names. + This function interprets the ``bounds`` attribute on DataArrays. + + Parameters + ---------- + obj : DataArray, Dataset + DataArray belonging to the coordinate to be checked + key : str + key to check for. + + Returns + ------- + List[str], Variable name(s) in parent xarray object that are bounds of `key` + """ + + results = set() + for var in apply_mapper(_get_all, obj, key, error=False, default=[key]): + if "bounds" in obj[var].attrs: + results |= {obj[var].attrs["bounds"]} + + return list(results) + + def _get_with_standard_name( obj: Union[DataArray, Dataset], name: Union[str, List[str]] ) -> List[str]: @@ -436,6 +461,14 @@ def _getattr( try: attribute: Union[Mapping, Callable] = getattr(obj, attr) except AttributeError: + if getattr( + CFDatasetAccessor if isinstance(obj, DataArray) else CFDataArrayAccessor, + attr, + None, + ): + raise AttributeError( + f"{obj.__class__.__name__+'.cf'!r} object has no attribute {attr!r}" + ) raise AttributeError( f"{attr!r} is not a valid attribute on the underlying xarray object." ) @@ -976,7 +1009,9 @@ def __repr__(self): coords = self._obj.coords dims = self._obj.dims - def make_text_section(subtitle, vardict, valid_values, default_keys=None): + def make_text_section(subtitle, attr, valid_values, default_keys=None): + + vardict = getattr(self, attr, {}) star = " * " tab = len(star) * " " @@ -1019,21 +1054,21 @@ def make_text_section(subtitle, vardict, valid_values, default_keys=None): return "\n".join(rows) + "\n" text = "Coordinates:" - text += make_text_section("CF Axes", self.axes, coords, _AXIS_NAMES) + text += make_text_section("CF Axes", "axes", coords, _AXIS_NAMES) + text += make_text_section("CF Coordinates", "coordinates", coords, _COORD_NAMES) text += make_text_section( - "CF Coordinates", self.coordinates, coords, _COORD_NAMES + "Cell Measures", "cell_measures", coords, _CELL_MEASURES ) - text += make_text_section( - "Cell Measures", self.cell_measures, coords, _CELL_MEASURES - ) - text += make_text_section("Standard Names", self.standard_names, coords) + text += make_text_section("Standard Names", "standard_names", coords) + text += make_text_section("Bounds", "bounds", coords) if isinstance(self._obj, Dataset): data_vars = self._obj.data_vars text += "\nData Variables:" text += make_text_section( - "Cell Measures", self.cell_measures, data_vars, _CELL_MEASURES + "Cell Measures", "cell_measures", data_vars, _CELL_MEASURES ) - text += make_text_section("Standard Names", self.standard_names, data_vars) + text += make_text_section("Standard Names", "standard_names", data_vars) + text += make_text_section("Bounds", "bounds", data_vars) return text @@ -1144,7 +1179,7 @@ def get_standard_names(self) -> List[str]: @property def standard_names(self) -> Dict[str, List[str]]: """ - Returns a sorted list of standard names in Dataset. + Returns a dictionary mapping standard names to variable names. Parameters ---------- @@ -1153,7 +1188,7 @@ def standard_names(self) -> Dict[str, List[str]]: Returns ------- - Dictionary of standard names in dataset + Dictionary mapping standard names to variable names. """ if isinstance(self._obj, Dataset): variables = self._obj.variables @@ -1480,6 +1515,26 @@ def __getitem__(self, key: Union[str, List[str]]) -> Union[DataArray, Dataset]: """ return _getitem(self, key) + @property + def bounds(self) -> Dict[str, List[str]]: + """ + Property that returns a dictionary mapping valid keys + to the variable names of their bounds. + + Returns + ------- + Dictionary mapping valid keys to the variable names of their bounds. + """ + + obj = self._obj + keys = self.keys() | set(obj.variables) + + vardict = { + key: apply_mapper(_get_bounds, obj, key, error=False) for key in keys + } + + return {k: sorted(v) for k, v in vardict.items() if v} + def get_bounds(self, key: str) -> DataArray: """ Get bounds variable corresponding to key. @@ -1493,12 +1548,8 @@ def get_bounds(self, key: str) -> DataArray: ------- DataArray """ - name = apply_mapper( - _single(_get_all), self._obj, key, error=False, default=[key] - )[0] - bounds = self._obj[name].attrs["bounds"] - obj = self._maybe_to_dataset() - return obj[bounds] + + return apply_mapper(_variables(_single(_get_bounds)), self._obj, key)[0] def get_bounds_dim_name(self, key: str) -> str: """ diff --git a/cf_xarray/tests/test_accessor.py b/cf_xarray/tests/test_accessor.py index 77253114..41bbb9f8 100644 --- a/cf_xarray/tests/test_accessor.py +++ b/cf_xarray/tests/test_accessor.py @@ -61,10 +61,14 @@ def test_repr(): * longitude: ['lon'] * time: ['time'] + - Bounds: n/a + Data Variables: - Cell Measures: area, volume: n/a - Standard Names: air_temperature: ['air'] + + - Bounds: n/a """ assert actual == dedent(expected) @@ -89,6 +93,8 @@ def test_repr(): - Standard Names: * latitude: ['lat'] * longitude: ['lon'] * time: ['time'] + + - Bounds: n/a """ assert actual == dedent(expected) @@ -108,11 +114,15 @@ def test_repr(): - Standard Names: n/a + - Bounds: n/a + Data Variables: - Cell Measures: area, volume: n/a - Standard Names: sea_water_potential_temperature: ['TEMP'] sea_water_x_velocity: ['UVEL'] + + - Bounds: n/a """ assert actual == dedent(expected) @@ -163,6 +173,8 @@ def test_cell_measures(): - Standard Names: air_temperature: ['air'] foo_std_name: ['foo'] + + - Bounds: n/a """ assert actual.endswith(dedent(expected)) @@ -625,6 +637,11 @@ def test_add_bounds(obj, dims): def test_bounds(): ds = airds.copy(deep=True).cf.add_bounds("lat") + + actual = ds.cf.bounds + expected = {"Y": ["lat_bounds"], "lat": ["lat_bounds"], "latitude": ["lat_bounds"]} + assert ds.cf.bounds == expected + actual = ds.cf[["lat"]] expected = ds[["lat", "lat_bounds"]] assert_identical(actual, expected) @@ -651,6 +668,19 @@ def test_bounds(): with pytest.warns(UserWarning, match="{'foo'} not found in object"): ds.cf[["air"]] + # Dataset has bounds + expected = """\ + - Bounds: Y: ['lat_bounds'] + lat: ['lat_bounds'] + latitude: ['lat_bounds'] + """ + assert dedent(expected) in ds.cf.__repr__() + + # DataArray does not have bounds + expected = airds.cf["air"].cf.__repr__() + actual = ds.cf["air"].cf.__repr__() + assert actual == expected + def test_bounds_to_vertices(): # All available diff --git a/doc/api.rst b/doc/api.rst index 4c987f51..49aa425c 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -63,6 +63,7 @@ Attributes :template: autosummary/accessor_attribute.rst Dataset.cf.axes + Dataset.cf.bounds Dataset.cf.cell_measures Dataset.cf.coordinates Dataset.cf.standard_names diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 1760fc56..3a54cbd1 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -6,6 +6,7 @@ What's New v0.5.2 (unreleased) =================== +- Added :py:attr:`Dataset.cf.axes` to return a dictionary mapping valid keys to the variable names of their bounds. By `Mattia Almansi`_. - :py:meth:`DataArray.cf.differentiate` and :py:meth:`Dataset.cf.differentiate` can optionally correct sign of the derivative by interpreting the ``"positive"`` attribute. By `Deepak Cherian`_. From 3252c747a53723256544e35e6389e5402b6440ef Mon Sep 17 00:00:00 2001 From: Mattia Almansi Date: Thu, 22 Apr 2021 13:59:13 +0100 Subject: [PATCH 45/57] Update whats-new.rst (#217) --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 3a54cbd1..29bf48e4 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -6,7 +6,7 @@ What's New v0.5.2 (unreleased) =================== -- Added :py:attr:`Dataset.cf.axes` to return a dictionary mapping valid keys to the variable names of their bounds. By `Mattia Almansi`_. +- Added :py:attr:`Dataset.cf.bounds` to return a dictionary mapping valid keys to the variable names of their bounds. By `Mattia Almansi`_. - :py:meth:`DataArray.cf.differentiate` and :py:meth:`Dataset.cf.differentiate` can optionally correct sign of the derivative by interpreting the ``"positive"`` attribute. By `Deepak Cherian`_. From e4fa084e557ddfbb8f3cbba3df109009f73e5361 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 23 Apr 2021 20:32:51 -0600 Subject: [PATCH 46/57] Add .cf.formula_terms (#213) --- cf_xarray/accessor.py | 59 +++++++++++++++++++++++--------- cf_xarray/tests/test_accessor.py | 22 ++++++++++++ doc/api.rst | 2 ++ doc/whats-new.rst | 2 ++ 4 files changed, 68 insertions(+), 17 deletions(-) diff --git a/cf_xarray/accessor.py b/cf_xarray/accessor.py index a49761fb..c0205afb 100644 --- a/cf_xarray/accessor.py +++ b/cf_xarray/accessor.py @@ -1,6 +1,7 @@ import functools import inspect import itertools +import re import warnings from collections import ChainMap from typing import ( @@ -1445,16 +1446,19 @@ def differentiate( self, coord, *xr_args, positive_upward: bool = False, **xr_kwargs ): """ + Differentiate an xarray object. + Parameters ---------- - xr_args, xr_kwargs are passed directly to the underlying xarray function. - The following are added by cf_xarray: - positive_upward: optional, bool Change sign of the derivative based on the ``"positive"`` attribute of ``coord`` so that positive values indicate increasing upward. If ``positive=="down"``, then multiplied by -1. + Notes + ----- + ``xr_args``, ``xr_kwargs`` are passed directly to the underlying xarray function. + See Also -------- DataArray.cf.differentiate @@ -1515,6 +1519,16 @@ def __getitem__(self, key: Union[str, List[str]]) -> Union[DataArray, Dataset]: """ return _getitem(self, key) + @property + def formula_terms(self) -> Dict[str, Dict[str, str]]: + """ + Property that returns a dictionary + {parametric_coord_name: {standard_term_name: variable_name}} + """ + return { + dim: self._obj[dim].cf.formula_terms for dim in _get_dims(self._obj, "Z") + } + @property def bounds(self) -> Dict[str, List[str]]: """ @@ -1719,36 +1733,29 @@ def decode_vertical_coords(self, prefix="z"): .. warning:: Very lightly tested. Please double check the results. """ - import re - ds = self._obj - dims = _get_dims(ds, "Z") requirements = { "ocean_s_coordinate_g1": {"depth_c", "depth", "s", "C", "eta"}, "ocean_s_coordinate_g2": {"depth_c", "depth", "s", "C", "eta"}, } - for dim in dims: + allterms = self.formula_terms + for dim in allterms: suffix = dim.split("_") zname = f"{prefix}_" + "_".join(suffix[1:]) - if ( - "formula_terms" not in ds[dim].attrs - or "standard_name" not in ds[dim].attrs - ): + if "standard_name" not in ds[dim].attrs: continue - - formula_terms = ds[dim].attrs["formula_terms"] stdname = ds[dim].attrs["standard_name"] # map "standard" formula term names to actual variable names terms = {} - for mapping in re.sub(": ", ":", formula_terms).split(" "): - key, value = mapping.split(":") + for key, value in allterms[dim].items(): if value not in ds: raise KeyError( - f"Variable {value!r} is required to decode coordinate for {dim} but it is absent in the Dataset." + f"Variable {value!r} is required to decode coordinate for {dim!r}" + " but it is absent in the Dataset." ) terms[key] = ds[value] @@ -1776,12 +1783,30 @@ def decode_vertical_coords(self, prefix="z"): else: raise NotImplementedError( - f"Coordinate function for {stdname} not implemented yet. Contributions welcome!" + f"Coordinate function for {stdname!r} not implemented yet. Contributions welcome!" ) @xr.register_dataarray_accessor("cf") class CFDataArrayAccessor(CFAccessor): + @property + def formula_terms(self) -> Dict[str, str]: + """ + Property that returns a dictionary + {parametric_coord_name: {standard_term_name: variable_name}} + """ + da = self._obj + if "formula_terms" not in da.attrs: + var = da[_single(_get_dims)(da, "Z")[0]] + else: + var = da + terms = {} + formula_terms = var.attrs.get("formula_terms", "") + for mapping in re.sub(r"\s*:\s*", ":", formula_terms).split(): + key, value = mapping.split(":") + terms[key] = value + return terms + def __getitem__(self, key: Union[str, List[str]]) -> DataArray: """ Index into a DataArray making use of CF attributes. diff --git a/cf_xarray/tests/test_accessor.py b/cf_xarray/tests/test_accessor.py index 41bbb9f8..6f4060d4 100644 --- a/cf_xarray/tests/test_accessor.py +++ b/cf_xarray/tests/test_accessor.py @@ -945,6 +945,28 @@ def test_param_vcoord_ocean_s_coord(): copy.cf.decode_vertical_coords() +def test_formula_terms(): + srhoterms = { + "s": "s_rho", + "C": "Cs_r", + "eta": "zeta", + "depth": "h", + "depth_c": "hc", + } + assert romsds.cf.formula_terms == {"s_rho": srhoterms} + assert romsds["temp"].cf.formula_terms == srhoterms + assert romsds["s_rho"].cf.formula_terms == srhoterms + + s_rho = romsds["s_rho"].copy(deep=True) + del s_rho.attrs["standard_name"] + del s_rho.s_rho.attrs["standard_name"] # TODO: xarray bug + assert s_rho.cf.formula_terms == srhoterms + + with pytest.raises(KeyError): + # x,y,t variable + romsds["zeta"].cf.formula_terms + + def test_standard_name_mapper(): da = xr.DataArray( np.arange(6), diff --git a/doc/api.rst b/doc/api.rst index 49aa425c..d409b429 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -30,6 +30,7 @@ Attributes DataArray.cf.axes DataArray.cf.cell_measures DataArray.cf.coordinates + DataArray.cf.formula_terms DataArray.cf.standard_names DataArray.cf.plot @@ -66,6 +67,7 @@ Attributes Dataset.cf.bounds Dataset.cf.cell_measures Dataset.cf.coordinates + Dataset.cf.formula_terms Dataset.cf.standard_names .. _dsmeth: diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 29bf48e4..af1a88be 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -6,6 +6,8 @@ What's New v0.5.2 (unreleased) =================== +- Added :py:attr:`DataArray.cf.formula_terms` and :py:attr:`Dataset.cf.formula_terms`. + By `Deepak Cherian`_. - Added :py:attr:`Dataset.cf.bounds` to return a dictionary mapping valid keys to the variable names of their bounds. By `Mattia Almansi`_. - :py:meth:`DataArray.cf.differentiate` and :py:meth:`Dataset.cf.differentiate` can optionally correct sign of the derivative by interpreting the ``"positive"`` attribute. By `Deepak Cherian`_. From ad3d4980cb373eed3bf6bbc149647cfe45244bb4 Mon Sep 17 00:00:00 2001 From: Mattia Almansi Date: Mon, 3 May 2021 20:52:17 +0100 Subject: [PATCH 47/57] `add_bounds` uses `keys` rather than `dims` (#221) --- cf_xarray/accessor.py | 20 ++++++++++++-------- cf_xarray/tests/test_accessor.py | 4 ++++ doc/whats-new.rst | 1 + 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/cf_xarray/accessor.py b/cf_xarray/accessor.py index c0205afb..14d7314e 100644 --- a/cf_xarray/accessor.py +++ b/cf_xarray/accessor.py @@ -1586,15 +1586,15 @@ def get_bounds_dim_name(self, key: str) -> str: assert self._obj.sizes[bounds_dim] in [2, 4] return bounds_dim - def add_bounds(self, dims: Union[Hashable, Iterable[Hashable]]): + def add_bounds(self, keys: Union[str, Iterable[str]]): """ Returns a new object with bounds variables. The bounds values are guessed assuming equal spacing on either side of a coordinate label. Parameters ---------- - dims : Hashable or Iterable[Hashable] - Either a single dimension name or a list of dimension names. + keys : str or Iterable[str] + Either a single key or a list of keys corresponding to dimensions. Returns ------- @@ -1609,12 +1609,16 @@ def add_bounds(self, dims: Union[Hashable, Iterable[Hashable]]): The bounds variables are automatically named f"{dim}_bounds" where ``dim`` is a dimension name. """ - if isinstance(dims, Hashable): - dimensions = (dims,) - else: - dimensions = dims + if isinstance(keys, str): + keys = [keys] + + dimensions = set() + for key in keys: + dimensions.update( + apply_mapper(_get_dims, self._obj, key, error=False, default=[key]) + ) - bad_dims: Set[Hashable] = set(dimensions) - set(self._obj.dims) + bad_dims: Set[str] = dimensions - set(self._obj.dims) if bad_dims: raise ValueError( f"{bad_dims!r} are not dimensions in the underlying object." diff --git a/cf_xarray/tests/test_accessor.py b/cf_xarray/tests/test_accessor.py index 6f4060d4..dfa5ebc9 100644 --- a/cf_xarray/tests/test_accessor.py +++ b/cf_xarray/tests/test_accessor.py @@ -634,6 +634,10 @@ def test_add_bounds(obj, dims): assert added[dim].attrs["bounds"] == name assert_allclose(added[name].reset_coords(drop=True), expected[dim]) + # Test multiple dimensions + assert not {"x1_bounds", "x2_bounds"} <= set(multiple.variables) + assert {"x1_bounds", "x2_bounds"} <= set(multiple.cf.add_bounds("X").variables) + def test_bounds(): ds = airds.copy(deep=True).cf.add_bounds("lat") diff --git a/doc/whats-new.rst b/doc/whats-new.rst index af1a88be..3e5fae81 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -6,6 +6,7 @@ What's New v0.5.2 (unreleased) =================== +- Replace the ``dims`` argument of :py:meth:`Dataset.cf.add_bounds` with ``keys``, allowing to use CF keys. By `Mattia Almansi`_. - Added :py:attr:`DataArray.cf.formula_terms` and :py:attr:`Dataset.cf.formula_terms`. By `Deepak Cherian`_. - Added :py:attr:`Dataset.cf.bounds` to return a dictionary mapping valid keys to the variable names of their bounds. By `Mattia Almansi`_. From d2d750f321b60700dbcbf58b413586978f45579b Mon Sep 17 00:00:00 2001 From: Mattia Almansi Date: Mon, 10 May 2021 16:04:55 +0100 Subject: [PATCH 48/57] Add pooch to binder environment (#223) --- .binder/environment.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.binder/environment.yml b/.binder/environment.yml index dcc99c35..6d65e763 100644 --- a/.binder/environment.yml +++ b/.binder/environment.yml @@ -7,5 +7,6 @@ dependencies: - netcdf4 - pip - xarray + - pooch - pip: - git+https://github.com/xarray-contrib/cf-xarray From 5682bdf30e952cf77edae31d995506ea44f6168f Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 11 May 2021 10:28:03 -0600 Subject: [PATCH 49/57] Some CMIP6 support both for autoguessing and long_name == cell index along (first|second) dimension --- cf_xarray/criteria.py | 14 ++++++++------ cf_xarray/tests/test_accessor.py | 27 ++++++++++++++++++++++++--- 2 files changed, 32 insertions(+), 9 deletions(-) diff --git a/cf_xarray/criteria.py b/cf_xarray/criteria.py index 060e9938..9fbd7e40 100644 --- a/cf_xarray/criteria.py +++ b/cf_xarray/criteria.py @@ -5,6 +5,7 @@ """ +import copy import re from typing import MutableMapping, Tuple @@ -74,19 +75,20 @@ } # "long_name" and "standard_name" criteria are the same. For convenience. -coordinate_criteria["long_name"] = coordinate_criteria["standard_name"] +coordinate_criteria["long_name"] = copy.deepcopy(coordinate_criteria["standard_name"]) +coordinate_criteria["long_name"]["X"] += ("cell index along first dimension",) +coordinate_criteria["long_name"]["Y"] += ("cell index along second dimension",) #: regular expressions for guess_coord_axis regex = { "time": re.compile("\\bt\\b|(time|min|hour|day|week|month|year)[0-9]*"), - "vertical": re.compile( - "(z|nav_lev|gdep|lv_|bottom_top|sigma|h(ei)?ght|altitude|depth|" + "Z": re.compile( + "(z|nav_lev|gdep|lv_|[o]*lev|bottom_top|sigma|h(ei)?ght|altitude|depth|" "isobaric|pres|isotherm)[a-z_]*[0-9]*" ), - "Y": re.compile("y"), + "Y": re.compile("y|j|nlat|nj"), "latitude": re.compile("y?(nav_lat|lat|gphi)[a-z0-9]*"), - "X": re.compile("x"), + "X": re.compile("x|i|nlon|ni"), "longitude": re.compile("x?(nav_lon|lon|glam)[a-z0-9]*"), } -regex["Z"] = regex["vertical"] regex["T"] = regex["time"] diff --git a/cf_xarray/tests/test_accessor.py b/cf_xarray/tests/test_accessor.py index dfa5ebc9..e89a4a25 100644 --- a/cf_xarray/tests/test_accessor.py +++ b/cf_xarray/tests/test_accessor.py @@ -780,9 +780,9 @@ def _make_names(prefixes): "nav_lev", ] ) -_X_NAMES = _make_names(["x"]) -_Y_NAMES = _make_names(["y"]) -_Z_NAMES = _VERTICAL_NAMES +_X_NAMES = _make_names(["x", "nlon", "i", "ni"]) +_Y_NAMES = _make_names(["y", "nlat", "j", "nj"]) +_Z_NAMES = _VERTICAL_NAMES + ["olevel", "level", "zlevel"] _LATITUDE_NAMES = _make_names(["lat", "latitude", "gphi", "nav_lat"]) _LONGITUDE_NAMES = _make_names(["lon", "longitude", "glam", "nav_lon"]) @@ -1193,3 +1193,24 @@ def test_differentiate_positive_upward(obj): obj.z.attrs["positive"] = "zzz" with pytest.raises(ValueError): obj.cf.differentiate("z", positive_upward=True) + + +def test_cmip6_attrs(): + da = xr.DataArray( + np.ones((10, 10)), + dims=("nlon", "nlat"), + coords={ + "nlon": ( + "nlon", + np.arange(10), + {"long_name": "cell index along first dimension"}, + ), + "nlat": ( + "nlat", + np.arange(10), + {"long_name": "cell index along second dimension"}, + ), + }, + ) + assert da.cf.axes["X"] == ["nlon"] + assert da.cf.axes["Y"] == ["nlat"] From 5774665377d744e64a0dcca96d2a0a1a7ec61e47 Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 11 May 2021 11:37:56 -0600 Subject: [PATCH 50/57] v0.5.2 --- doc/whats-new.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 3e5fae81..7177bddf 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -3,9 +3,10 @@ What's New ---------- -v0.5.2 (unreleased) -=================== +v0.5.2 (May 11, 2021) +===================== +- Add some explicit support for CMIP6 output. By `Deepak Cherian`_. - Replace the ``dims`` argument of :py:meth:`Dataset.cf.add_bounds` with ``keys``, allowing to use CF keys. By `Mattia Almansi`_. - Added :py:attr:`DataArray.cf.formula_terms` and :py:attr:`Dataset.cf.formula_terms`. By `Deepak Cherian`_. From 748436b40619d843abc2fb1e0c37f51f4a296e5f Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 14 May 2021 13:10:02 -0600 Subject: [PATCH 51/57] Add earthcube 2021 notebook link to readme --- README.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.rst b/README.rst index e882f4f7..6af4f1a0 100644 --- a/README.rst +++ b/README.rst @@ -35,3 +35,5 @@ A lightweight convenience wrapper for using CF attributes on xarray objects. For example you can use ``.cf.mean("latitude")`` instead of ``.mean("lat")`` if appropriate attributes are set! This allows you to write code that does not require knowledge of specific dimension or coordinate names particular to a dataset. See more in the introductory notebook `here `_. + +Try out our Earthcube 2021 Annual Meeting notebook submission `here `_. From 5d3e5fe0f428faa6d8c3b4efd6fe9a625303570e Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 14 May 2021 14:00:02 -0600 Subject: [PATCH 52/57] Update README.rst --- README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 6af4f1a0..84a8573c 100644 --- a/README.rst +++ b/README.rst @@ -34,6 +34,6 @@ A lightweight convenience wrapper for using CF attributes on xarray objects. For example you can use ``.cf.mean("latitude")`` instead of ``.mean("lat")`` if appropriate attributes are set! This allows you to write code that does not require knowledge of specific dimension or coordinate names particular to a dataset. -See more in the introductory notebook `here `_. +See more in the `introductory notebook `_. -Try out our Earthcube 2021 Annual Meeting notebook submission `here `_. +Try out our Earthcube 2021 Annual Meeting notebook `submission `_. From 24bf4d38bbe737a1dee3a18710189bd19ddf88dc Mon Sep 17 00:00:00 2001 From: Jon Thielen Date: Tue, 18 May 2021 13:00:00 -0500 Subject: [PATCH 53/57] Add unit support to cf-xarray (#197) Co-authored-by: keewis Co-authored-by: dcherian Co-authored-by: Deepak Cherian Co-authored-by: Keewis --- .github/workflows/ci.yaml | 23 +++++++++++ cf_xarray/tests/test_units.py | 60 +++++++++++++++++++++++++++++ cf_xarray/units.py | 58 ++++++++++++++++++++++++++++ ci/environment-no-optional-deps.yml | 13 +++++++ ci/environment.yml | 1 + ci/upstream-dev-env.yml | 1 + doc/whats-new.rst | 7 ++++ setup.cfg | 2 +- 8 files changed, 164 insertions(+), 1 deletion(-) create mode 100644 cf_xarray/tests/test_units.py create mode 100644 cf_xarray/units.py create mode 100644 ci/environment-no-optional-deps.yml diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 6eb0518b..b3a2636f 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -48,6 +48,29 @@ jobs: name: codecov-umbrella fail_ci_if_error: false + no-optional-deps: + name: no-optional-deps + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: conda-incubator/setup-miniconda@v2 + with: + channels: conda-forge + mamba-version: "*" + activate-environment: cf_xarray_test + auto-update-conda: false + python-version: ${{ matrix.python-version }} + - name: Set up conda environment + shell: bash -l {0} + run: | + mamba env update -f ci/environment-no-optional-deps.yml + python -m pip install -e . + conda list + - name: Run Tests + shell: bash -l {0} + run: | + pytest -n 2 + upstream-dev: name: upstream-dev runs-on: ubuntu-latest diff --git a/cf_xarray/tests/test_units.py b/cf_xarray/tests/test_units.py new file mode 100644 index 00000000..17398d4b --- /dev/null +++ b/cf_xarray/tests/test_units.py @@ -0,0 +1,60 @@ +r"""Tests the operation of cf_xarray's ported unit support code. + +Reused with modification from MetPy under the terms of the BSD 3-Clause License. +Copyright (c) 2017 MetPy Developers. +""" + +import pytest + +pytest.importorskip("pint") + +from ..units import units + + +def test_added_degrees_units(): + """Test that our added degrees units are present in the registry.""" + # Test equivalence of abbreviations/aliases to our defined names + assert str(units("degrees_N").units) == "degrees_north" + assert str(units("degreesN").units) == "degrees_north" + assert str(units("degree_north").units) == "degrees_north" + assert str(units("degree_N").units) == "degrees_north" + assert str(units("degreeN").units) == "degrees_north" + assert str(units("degrees_E").units) == "degrees_east" + assert str(units("degreesE").units) == "degrees_east" + assert str(units("degree_east").units) == "degrees_east" + assert str(units("degree_E").units) == "degrees_east" + assert str(units("degreeE").units) == "degrees_east" + + # Test equivalence of our defined units to base units + assert units("degrees_north") == units("degrees") + assert units("degrees_north").to_base_units().units == units.radian + assert units("degrees_east") == units("degrees") + assert units("degrees_east").to_base_units().units == units.radian + + +def test_gpm_unit(): + """Test that the gpm unit does alias to meters.""" + x = 1 * units("gpm") + assert str(x.units) == "meter" + + +def test_psu_unit(): + """Test that the psu unit are present in the registry.""" + x = 1 * units("psu") + assert str(x.units) == "practical_salinity_unit" + + +def test_percent_units(): + """Test that percent sign units are properly parsed and interpreted.""" + assert str(units("%").units) == "percent" + + +@pytest.mark.xfail(reason="not supported by pint, yet: hgrecco/pint#1295") +def test_udunits_power_syntax(): + """Test that UDUNITS style powers are properly parsed and interpreted.""" + assert units("m2 s-2").units == units.m ** 2 / units.s ** 2 + + +def test_udunits_power_syntax_parse_units(): + """Test that UDUNITS style powers are properly parsed and interpreted.""" + assert units.parse_units("m2 s-2") == units.m ** 2 / units.s ** 2 diff --git a/cf_xarray/units.py b/cf_xarray/units.py new file mode 100644 index 00000000..4aad7a4f --- /dev/null +++ b/cf_xarray/units.py @@ -0,0 +1,58 @@ +r"""Module to provide unit support via pint approximating UDUNITS/CF. + +Reused with modification from MetPy under the terms of the BSD 3-Clause License. +Copyright (c) 2015,2017,2019 MetPy Developers. +""" +import functools +import re +import warnings + +import pint +from pint import ( # noqa: F401 + DimensionalityError, + UndefinedUnitError, + UnitStrippedWarning, +) + +# Create registry, with preprocessors for UDUNITS-style powers (m2 s-2) and percent signs +units = pint.UnitRegistry( + autoconvert_offset_to_baseunit=True, + preprocessors=[ + functools.partial( + re.compile( + r"(?<=[A-Za-z])(?![A-Za-z])(? Date: Thu, 27 May 2021 14:58:39 +0100 Subject: [PATCH 54/57] Don't apply mappers to DataArrays (#227) --- cf_xarray/accessor.py | 21 +++++++++++++++------ cf_xarray/tests/test_accessor.py | 9 +++++++-- cf_xarray/utils.py | 6 +++++- doc/examples/introduction.ipynb | 19 ++++++++++++++++++- 4 files changed, 45 insertions(+), 10 deletions(-) diff --git a/cf_xarray/accessor.py b/cf_xarray/accessor.py index 14d7314e..231e1008 100644 --- a/cf_xarray/accessor.py +++ b/cf_xarray/accessor.py @@ -26,7 +26,12 @@ from .criteria import coordinate_criteria, regex from .helpers import bounds_to_vertices -from .utils import _is_datetime_like, invert_mappings, parse_cell_methods_attr +from .utils import ( + _is_datetime_like, + always_iterable, + invert_mappings, + parse_cell_methods_attr, +) #: Classes wrapped by cf_xarray. _WRAPPED_CLASSES = ( @@ -68,7 +73,7 @@ def apply_mapper( mappers: Union[Mapper, Tuple[Mapper, ...]], obj: Union[DataArray, Dataset], - key: str, + key: Any, error: bool = True, default: Any = None, ) -> List[Any]: @@ -79,8 +84,13 @@ def apply_mapper( It should return a list in all other cases including when there are no results for a good key. """ - if default is None: - default = [] + + if not isinstance(key, str): + if default is None: + raise ValueError("`default` must be provided when `key` is not a string.") + return list(always_iterable(default)) + + default = [] if default is None else list(always_iterable(default)) def _apply_single_mapper(mapper): @@ -917,8 +927,7 @@ def _rewrite_values( value = kwargs[key] mappers = all_mappers[key] - if isinstance(value, str): - value = [value] + value = always_iterable(value) if isinstance(value, dict): # this for things like isel where **kwargs captures things like T=5 diff --git a/cf_xarray/tests/test_accessor.py b/cf_xarray/tests/test_accessor.py index e89a4a25..1c337b30 100644 --- a/cf_xarray/tests/test_accessor.py +++ b/cf_xarray/tests/test_accessor.py @@ -317,8 +317,13 @@ def test_weighted(obj): with raise_if_dask_computes(max_computes=2): # weights are checked for nans expected = obj.weighted(obj["cell_area"]).sum("lat") - actual = obj.cf.weighted("area").sum("Y") - assert_identical(expected, actual) + actuals = [ + obj.cf.weighted("area").sum("Y"), + obj.cf.weighted(obj["cell_area"]).sum("Y"), + obj.cf.weighted(weights=obj["cell_area"]).sum("Y"), + ] + for actual in actuals: + assert_identical(expected, actual) @pytest.mark.parametrize("obj", objects) diff --git a/cf_xarray/utils.py b/cf_xarray/utils.py index e05e3501..70c61afd 100644 --- a/cf_xarray/utils.py +++ b/cf_xarray/utils.py @@ -1,5 +1,5 @@ from collections import defaultdict -from typing import Dict +from typing import Any, Dict, Iterable from xarray import DataArray @@ -53,3 +53,7 @@ def invert_mappings(*mappings): for name in v: merged[name] |= {k} return merged + + +def always_iterable(obj: Any) -> Iterable: + return [obj] if not isinstance(obj, (tuple, list, set, dict)) else obj diff --git a/doc/examples/introduction.ipynb b/doc/examples/introduction.ipynb index 1a2a38ef..a331c504 100644 --- a/doc/examples/introduction.ipynb +++ b/doc/examples/introduction.ipynb @@ -25,6 +25,22 @@ "import xarray as xr" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`cf_xarray` works best when `xarray` keeps attributes by default.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "xr.set_options(keep_attrs=True)" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -914,6 +930,7 @@ " * 110e3\n", ")\n", "# and set proper attributes\n", + "ds[\"cell_area\"].attrs = dict(standard_name=\"cell_area\", units=\"m2\")\n", "ds.air.attrs[\"cell_measures\"] = \"area: cell_area\"" ] }, @@ -1000,7 +1017,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.1" + "version": "3.8.10" }, "toc": { "base_numbering": 1, From dfde8e30432296a6da71e5f4afeac370b0868db4 Mon Sep 17 00:00:00 2001 From: Mattia Almansi Date: Thu, 27 May 2021 14:59:19 +0100 Subject: [PATCH 55/57] Improve `rename_like` (#222) Co-authored-by: Deepak Cherian --- cf_xarray/accessor.py | 81 ++++++++++++++++++++++---------- cf_xarray/tests/test_accessor.py | 16 ++++++- doc/whats-new.rst | 1 + 3 files changed, 71 insertions(+), 27 deletions(-) diff --git a/cf_xarray/accessor.py b/cf_xarray/accessor.py index 231e1008..b8efa011 100644 --- a/cf_xarray/accessor.py +++ b/cf_xarray/accessor.py @@ -1326,7 +1326,7 @@ def rename_like( Variables will be renamed to match variable names in this xarray object skip: str, Iterable[str], optional Limit the renaming excluding - ("axes", "cell_measures", "coordinates", "standard_names") + ("axes", "bounds", cell_measures", "coordinates", "standard_names") or a subset thereof. Returns @@ -1341,24 +1341,48 @@ def rename_like( good_keys = ourkeys & theirkeys keydict = {} for key in good_keys: - ours = set(_get_all(self._obj, key)) - theirs = set(_get_all(other, key)) + ours = set(apply_mapper(_get_all, self._obj, key)) + theirs = set(apply_mapper(_get_all, other, key)) for attr in skip: - ours -= set(getattr(self, attr).get(key, [])) - theirs -= set(getattr(other.cf, attr).get(key, [])) + ours.difference_update(getattr(self, attr).get(key, [])) + theirs.difference_update(getattr(other.cf, attr).get(key, [])) if ours and theirs: keydict[key] = dict(ours=list(ours), theirs=list(theirs)) - conflicts = {} - for k0, v0 in keydict.items(): - if len(v0["ours"]) > 1 or len(v0["theirs"]) > 1: - conflicts[k0] = v0 - continue - for v1 in keydict.values(): - # Conflicts have same ours but different theirs or vice versa - if (v0["ours"] == v1["ours"]) != (v0["theirs"] == v1["theirs"]): + def get_renamer_and_conflicts(keydict): + conflicts = {} + for k0, v0 in keydict.items(): + if len(v0["ours"]) > 1 or len(v0["theirs"]) > 1: conflicts[k0] = v0 - break + continue + for v1 in keydict.values(): + # Conflicts have same ours but different theirs or vice versa + if (v0["ours"] == v1["ours"]) != (v0["theirs"] == v1["theirs"]): + conflicts[k0] = v0 + break + + renamer = { + v["ours"][0]: v["theirs"][0] + for k, v in keydict.items() + if k not in conflicts + } + + return renamer, conflicts + + # Run get_renamer_and_conflicts twice. + # The second time add the bounds associated with variables to rename + renamer, conflicts = get_renamer_and_conflicts(keydict) + if "bounds" not in skip: + for k, v in renamer.items(): + ours = set(getattr(self, "bounds", {}).get(k, [])) + theirs = set(getattr(other.cf, "bounds", {}).get(v, [])) + if ours and theirs: + ours.update(keydict.get(k, {}).get("ours", [])) + theirs.update(keydict.get(k, {}).get("theirs", [])) + keydict[k] = dict(ours=list(ours), theirs=list(theirs)) + renamer, conflicts = get_renamer_and_conflicts(keydict) + + # Rename and warn if conflicts: warnings.warn( "Conflicting variables skipped:\n" @@ -1372,23 +1396,28 @@ def rename_like( ), UserWarning, ) - - renamer = { - v["ours"][0]: v["theirs"][0] - for k, v in keydict.items() - if k not in conflicts - } newobj = self._obj.rename(renamer) - # rename variable names in the coordinates attribute + # rename variable names in the attributes # if present ds = self._maybe_to_dataset(newobj) for _, variable in ds.variables.items(): - coordinates = variable.attrs.get("coordinates", None) - if coordinates: - for k, v in renamer.items(): - coordinates = coordinates.replace(k, v) - variable.attrs["coordinates"] = coordinates + for attr in ("bounds", "coordinates", "cell_measures"): + if attr == "cell_measures": + varlist = [ + f"{k}: {renamer.get(v, v)}" + for k, v in parse_cell_methods_attr( + variable.attrs.get(attr, "") + ).items() + ] + else: + varlist = [ + renamer.get(var, var) + for var in variable.attrs.get(attr, "").split() + ] + + if varlist: + variable.attrs[attr] = " ".join(varlist) return self._maybe_to_dataarray(ds) def guess_coord_axis(self, verbose: bool = False) -> Union[DataArray, Dataset]: diff --git a/cf_xarray/tests/test_accessor.py b/cf_xarray/tests/test_accessor.py index 1c337b30..47e5d03e 100644 --- a/cf_xarray/tests/test_accessor.py +++ b/cf_xarray/tests/test_accessor.py @@ -241,7 +241,7 @@ def test_rename_like(): for k in ["TLONG", "TLAT"]: assert k not in renamed.coords assert k in original.coords - assert original.TEMP.attrs["coordinates"] == "TLONG TLAT" + assert original.TEMP.attrs["coordinates"] == "TLONG TLAT" assert "lon" in renamed.coords assert "lat" in renamed.coords @@ -271,6 +271,20 @@ def test_rename_like(): actual = da.cf.rename_like(airds, skip="axes").cf.coordinates assert expected == actual + # rename bounds + original = airds.cf[["air"]].cf.add_bounds("lon") + other = popds.cf[["TEMP"]].cf.add_bounds("nlon") + renamed = original.cf.rename_like(other, skip="coordinates") + assert renamed.cf.bounds["nlon"] == ["nlon_bounds"] + + # rename cell measures + other = airds.cf["air"].cf.rename(area="CELL_AREA") + other.attrs["cell_measures"] = other.attrs["cell_measures"].replace( + "cell_area", "CELL_AREA" + ) + renamed = airds.cf["air"].cf.rename_like(other) + assert renamed.cf.cell_measures["area"] == ["CELL_AREA"] + @pytest.mark.parametrize("obj", objects) @pytest.mark.parametrize( diff --git a/doc/whats-new.rst b/doc/whats-new.rst index fb03011f..9f444669 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -7,6 +7,7 @@ v0.5.3 (unreleased) =================== - Begin adding support for units with a unit registry for pint arrays. :pr:`197`. By `Jon Thielen`_ and `Justus Magin`_. +- :py:meth:`Dataset.cf.rename_like` also updates the ``bounds`` and ``cell_measures`` attributes. By `Mattia Almansi`_. v0.5.2 (May 11, 2021) ===================== From 820ca440392cd3ca5b9074d3983875435eaecce2 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 7 Jun 2021 12:23:31 -0600 Subject: [PATCH 56/57] Add zenodo badge --- README.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.rst b/README.rst index 84a8573c..72a00c55 100644 --- a/README.rst +++ b/README.rst @@ -26,6 +26,8 @@ :target: https://anaconda.org/conda-forge/cf_xarray :alt: Conda Version +.. image:: https://zenodo.org/badge/267381269.svg + :target: https://zenodo.org/badge/latestdoi/267381269 cf-xarray ========= From e11cf07f49ab121633131a024843644045304505 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 8 Jun 2021 16:41:11 -0600 Subject: [PATCH 57/57] Add CITATION.cff, tributors, zenodo.json (#231) Co-authored-by: Jon Thielen Co-authored-by: malmans2 --- .tributors | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ .zenodo.json | 34 ++++++++++++++++++++++++++++++++++ CITATION.cff | 43 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 125 insertions(+) create mode 100644 .tributors create mode 100644 .zenodo.json create mode 100644 CITATION.cff diff --git a/.tributors b/.tributors new file mode 100644 index 00000000..f9edb2cb --- /dev/null +++ b/.tributors @@ -0,0 +1,48 @@ +{ + "dcherian": { + "name": "Deepak Cherian", + "bio": "physical oceanographer", + "blog": "http://www.cherian.net", + "orcid": "0000-0002-6861-8734", + "affiliation": "National Center for Atmospheric Research" + }, + "malmans2": { + "name": "Mattia Almansi", + "blog": "https://malmans2.github.io", + "orcid": "0000-0001-6849-3647", + "affiliation": "National Oceanography Centre" + }, + "aulemahal": { + "name": "Pascal Bourgault", + "bio": "Physical oceanography graduate turned climate science specialist and scientific developer.", + "orcid": "0000-0003-1192-0403", + "affiliation": "Ouranos Inc" + }, + "keewis": { + "name": "Justus Magin" + }, + "jukent": { + "name": "Julia Kent", + "affiliation": "National Center for Atmospheric Research", + "orcid": "0000-0002-5611-8986" + }, + "kthyng": { + "name": "Kristen Thyng", + "bio": "MetOcean Data Scientist at Axiom Data Science. Associate Editor in Chief at the Journal for Open Source Software (JOSS). Wrote cmocean colormaps.", + "blog": "http://kristenthyng.com", + "orcid": "0000-0002-8746-614X", + "affiliation": "Axiom Data Science" + }, + "jhamman": { + "name": "Joe Hamman", + "bio": "Scientist and Engineer and Human.\r\n", + "blog": "http://joehamman.com", + "orcid": "0000-0001-7479-8439", + "affiliation": "CarbonPlan" + }, + "withshubh": { + "name": "Shubhendra Singh Chauhan", + "bio": "Developer Advocate at @deepsourcelabs šŸ„‘ \r\nšŸ‘ØšŸ»ā€šŸ’» work profile: @shubhendra-deepsource", + "blog": "camelcaseguy.com" + } +} diff --git a/.zenodo.json b/.zenodo.json new file mode 100644 index 00000000..5e50bbd9 --- /dev/null +++ b/.zenodo.json @@ -0,0 +1,34 @@ +{ + "creators": [ + { + "name": "Deepak Cherian", + "affiliation": "National Center for Atmospheric Research", + "orcid": "0000-0002-6861-8734" + }, + { + "name": "Mattia Almansi", + "affiliation": "National Oceanography Centre", + "orcid": "0000-0001-6849-3647" + }, + { + "name": "Pascal Bourgault", + "affiliation": "Ouranos Inc", + "orcid": "0000-0003-1192-0403" + }, + { + "name": "Julia Kent", + "affiliation": "National Center for Atmospheric Research", + "orcid": "0000-0002-5611-8986" + }, + { + "name": "Justus Magin" + }, + { + "name": "Kristen Thyng", + "affiliation": "Axiom Data Science", + "orcid": "0000-0002-8746-614X" + } + ], + "upload_type": "software", + "keywords": [] +} diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 00000000..1607cb16 --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,43 @@ +# YAML 1.2 +# Metadata for citation of this software according to the CFF format (https://citation-file-format.github.io/) +cff-version: 1.0.3 +message: If you use this software, please cite it using these metadata. +title: cf_xarray +doi: 10.5281/zenodo.4749736 +repository-code: https://github.com/xarray-contrib/cf-xarray +license: Apache-2.0 +version: 0.5.2 +date-released: 2021-05-11 +keywords: +- cf-conventions +- xarray +- metadata + +authors: +- affiliation: National Center for Atmospheric Research, USA + family-names: Cherian + given-names: Deepak + orcid: https://orcid.org/0000-0002-6861-8734 +- affiliation: National Oceanography Centre, Southampton, UK + family-names: Almansi + given-names: Mattia + orcid: https://orcid.org/0000-0001-6849-3647 +- affiliation: Ouranos, Inc. + family-names: Bourgault + given-names: Pascal + orcid: https://orcid.org/0000-0003-1192-0403 +- affiliation: National Center for Atmospheric Research, USA + family-names: Kent + given-names: Julia + orcid: https://orcid.org/0000-0002-5611-8986 +- family-names: Magin + given-names: Justus +- family-names: Thielen + given-names: Jon + orcid: https://orcid.org/0000-0002-5479-0189 + affiliation: Iowa State University, Ames, IA, USA +- affiliation: Axiom Data Science + family-names: Thyng + given-names: Kristen + orcid: https://orcid.org/0000-0002-8746-614X +...