Accessor (#5)

* convert to accessors Use accessors instead of a Dataset subclass, should be more stable for future xarray versions
ActivitySim · Mar 3, 2022 · e4251b1 · e4251b1
1 parent 183e9ca
commit e4251b1
Show file tree

Hide file tree

Showing 18 changed files with 1,357 additions and 1,418 deletions.
diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml
@@ -84,7 +84,7 @@ jobs:
         auto-update-conda: false
     - name: Install Jupyterbook and ruamel.yaml
       run: |
-        mamba install jupyter-book ruamel.yaml -c conda-forge
+        mamba install jupyter-book ruamel.yaml sphinx-autosummary-accessors -c conda-forge
     - name: Install sharrow
       run: |
         python -m pip install --no-deps -e .

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -5,6 +5,7 @@ repos:
   hooks:
   - id: check-yaml
   - id: end-of-file-fixer
+    exclude: .*\.ipynb
   - id: trailing-whitespace
 
 - repo: https://github.com/kynan/nbstripout

diff --git a/docs/_config.yml b/docs/_config.yml
@@ -35,11 +35,14 @@ html:
 sphinx:
   extra_extensions:
   - sphinx.ext.autodoc
+  - sphinx.ext.autosummary
   - sphinx.ext.napoleon
   - sphinx.ext.viewcode
   - sphinx.ext.intersphinx
+  - sphinx_autosummary_accessors
   config:
     add_module_names: false
+    napoleon_use_rtype: false
     html_theme_options:
       home_page_in_toc: false
       search_bar_text: Search these docs...

diff --git a/docs/api.rst b/docs/api.rst
@@ -73,54 +73,54 @@ Convenience
 --------------------------------------------------------------------------------
 Dataset
 --------------------------------------------------------------------------------
-.. autoclass:: sharrow.Dataset
-    :show-inheritance:
+Sharrow uses the :py:class:`xarray.Dataset` class extensively.  Refer to the
+`xarray documentation <https://docs.xarray.dev/en/stable/>` for standard usage.
+The attributes and methods documented here are added to :py:class:`xarray.Dataset`
+when you import sharrow.
 
 Constructors
 ~~~~~~~~~~~~
-.. automethod:: sharrow.Dataset.construct
-.. automethod:: sharrow.Dataset.from_table
-.. automethod:: sharrow.Dataset.from_omx
-.. automethod:: sharrow.Dataset.from_omx_3d
-.. automethod:: sharrow.Dataset.from_zarr
-.. automethod:: sharrow.Dataset.from_named_objects
+
+The sharrow library provides several constructors for :py:class:`Dataset` objects.
+These functions can be found in the :py:mod:`sharrow.dataset` module.
+
+.. autofunction:: sharrow.dataset.construct
+.. autofunction:: sharrow.dataset.from_table
+.. autofunction:: sharrow.dataset.from_omx
+.. autofunction:: sharrow.dataset.from_omx_3d
+.. autofunction:: sharrow.dataset.from_zarr
+.. autofunction:: sharrow.dataset.from_named_objects
 
 Editing
 ~~~~~~~
-.. automethod:: sharrow.Dataset.update
 .. automethod:: sharrow.Dataset.ensure_integer
 
 Indexing
 ~~~~~~~~
-.. automethod:: sharrow.Dataset.at
-.. automethod:: sharrow.Dataset.iat
-.. automethod:: sharrow.Dataset.at_df
-.. automethod:: sharrow.Dataset.iat_df
+.. autoaccessor:: sharrow.Dataset.iloc
+.. autoaccessor:: sharrow.Dataset.at
+.. autoaccessormethod:: sharrow.Dataset.at.df
+.. autoaccessor:: sharrow.Dataset.iat
+.. autoaccessormethod:: sharrow.Dataset.iat.df
 
-Convenience
-~~~~~~~~~~~
-In many ways, a dataset with a single dimensions is like a pandas DataFrame,
-with the one dimension giving the rows, and the variables as columns.  This
-analogy eventually breaks down (DataFrame columns are ordered, Dataset
-variables are not) but the similarities are enought that it's sometimes convenient
-to have `loc` and `iloc` functionality enabled.  This only works for indexing on
-the rows, but if there's only the one dimension the complexity of `sel` and `isel`
-are not needed.
-
-.. autoattribute:: sharrow.Dataset.loc
-.. autoattribute:: sharrow.Dataset.iloc
 
 Shared Memory
 ~~~~~~~~~~~~~
-.. automethod:: sharrow.Dataset.to_shared_memory
-.. automethod:: sharrow.Dataset.from_shared_memory
-.. automethod:: sharrow.Dataset.release_shared_memory
-.. automethod:: sharrow.Dataset.preload_shared_memory_size
-.. autoattribute:: sharrow.Dataset.shared_memory_key
-.. autoattribute:: sharrow.Dataset.shared_memory_size
-.. autoattribute:: sharrow.Dataset.is_shared_memory
+Sharrow's shared memory system is consolidated into the :py:class:`Dataset.shm`
+accessor.
+
+.. autoaccessormethod:: sharrow.Dataset.shm.to_shared_memory
+.. autoaccessormethod:: sharrow.Dataset.shm.from_shared_memory
+.. autoaccessormethod:: sharrow.Dataset.shm.release_shared_memory
+.. autoaccessormethod:: sharrow.Dataset.shm.preload_shared_memory_size
+.. autoaccessorattribute:: sharrow.Dataset.shm.shared_memory_key
+.. autoaccessorattribute:: sharrow.Dataset.shm.shared_memory_size
+.. autoaccessorattribute:: sharrow.Dataset.shm.is_shared_memory
 
 Digital Encoding
 ~~~~~~~~~~~~~~~~
-.. autoattribute:: sharrow.Dataset.digital_encodings
-.. automethod:: sharrow.Dataset.set_digital_encoding
+Sharrow's digital encoding management is consolidated into the
+:py:class:`Dataset.digital_encoding` accessor.
+
+.. autoaccessormethod:: sharrow.Dataset.digital_encoding.info
+.. autoaccessormethod:: sharrow.Dataset.digital_encoding.set
diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -7,3 +7,4 @@ xarray >= 0.20.0
 numba >= 0.53
 numexpr
 filelock
+sphinx-autosummary-accessors
diff --git a/docs/walkthrough/encoding.ipynb b/docs/walkthrough/encoding.ipynb
@@ -108,7 +108,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "f17a0eef",
    "metadata": {},
@@ -253,6 +252,63 @@
     ")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "03ef433c",
+   "metadata": {},
+   "source": [
+    "To manage the digital encodings across an entire dataset, sharrow implements\n",
+    "a `digital_encoding` accessor.  You can use it to apply encodings to one or more\n",
+    "variables in a simple fashion."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5e73b78f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "skims_encoded = skims_encoded.digital_encoding.set(['DISTWALK', 'DISTBIKE'], scale=0.01, offset=0)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7ebda187",
+   "metadata": {},
+   "source": [
+    "And you can review the encodings for every variable in the dataset like this:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d5eb8600",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "skims_encoded.digital_encoding.info()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bd549b5e",
+   "metadata": {
+    "tags": [
+     "remove_cell"
+    ]
+   },
+   "outputs": [],
+   "source": [
+    "# TEST\n",
+    "assert skims_encoded.digital_encoding.info() == {\n",
+    " 'DIST': {'scale': 0.01, 'offset': 0, 'missing_value': None},\n",
+    " 'DISTBIKE': {'scale': 0.01, 'offset': 0, 'missing_value': None},\n",
+    " 'DISTWALK': {'scale': 0.01, 'offset': 0, 'missing_value': None},\n",
+    "}"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "0ba5279d",
@@ -380,7 +436,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "d743105b",
    "metadata": {},
@@ -545,7 +600,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.9"
+   "version": "3.9.10"
   },
   "toc": {
    "base_numbering": 1,

diff --git a/docs/walkthrough/one-dim.ipynb b/docs/walkthrough/one-dim.ipynb
@@ -114,6 +114,40 @@
     "assert persons.index.name == 'PERID'"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0f4b2e1a",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e5d0b407",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sh.Dataset(persons)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fb5a7c20",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1a6f02d7",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
   {
    "cell_type": "markdown",
    "id": "077d2d46",
@@ -306,7 +340,7 @@
     "    ),\n",
     "    extra_vars={\n",
     "        'short_i_wait_mult': 0.75,\n",
-    "        'shortwait': 3.0,\n",
+    "        'shortwait': 3,\n",
     "    },\n",
     ")"
    ]
@@ -770,7 +804,6 @@
   }
  ],
  "metadata": {
-  "celltoolbar": "Tags",
   "kernelspec": {
    "display_name": "Python 3 (ipykernel)",
    "language": "python",
@@ -786,7 +819,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.9"
+   "version": "3.9.10"
   },
   "toc": {
    "base_numbering": 1,

diff --git a/docs/walkthrough/two-dim.ipynb b/docs/walkthrough/two-dim.ipynb
@@ -122,9 +122,7 @@
     "The skims, on the other hand, are not just simple tabular data, but rather a \n",
     "multi-dimensional representation of the transportation system, indexed by origin.\n",
     "destination, and time of day. Rather than using a single DataFrame for this data,\n",
-    "we store it as a multi-dimensional `xarray.Dataset` — or, more exactly, a \n",
-    "`sharrow.Dataset`, which is a subclass from the xarray version that adds some \n",
-    "useful features we'll see later."
+    "we store it as a multi-dimensional `xarray.Dataset`."
    ]
   },
   {
@@ -247,12 +245,22 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "base = sh.Dataset.from_named_objects(\n",
+    "base = sh.dataset.from_named_objects(\n",
     "    workers.index, \n",
     "    landuse.index,\n",
     ")"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f2ba1f96",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "base"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "824c3c0c",
@@ -842,7 +850,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.2"
+   "version": "3.9.10"
   },
   "toc": {
    "base_numbering": 1,

diff --git a/sharrow/__init__.py b/sharrow/__init__.py
@@ -1,6 +1,6 @@
 from xarray import DataArray
 
-from . import example_data
+from . import example_data, selectors, shared_memory
 from ._version import version as __version__
 from .dataset import Dataset
 from .digital_encoding import array_decode, array_encode

diff --git a/sharrow/accessors.py b/sharrow/accessors.py
@@ -0,0 +1,31 @@
+import xarray as xr
+
+
+def register_dataset_method(func):
+    def wrapper(dataset):
+        def f(*args, **kwargs):
+            return func(dataset, *args, **kwargs)
+
+        return f
+
+    wrapper.__doc__ = func.__doc__
+    return xr.register_dataset_accessor(func.__name__)(wrapper)
+
+
+def register_dataarray_method(func):
+    def wrapper(dataarray):
+        def f(*args, **kwargs):
+            return func(dataarray, *args, **kwargs)
+
+        return f
+
+    wrapper.__doc__ = func.__doc__
+    return xr.register_dataarray_accessor(func.__name__)(wrapper)
+
+
+def register_dataarray_staticmethod(func):
+    return xr.register_dataarray_accessor(func.__name__)(func)
+
+
+def register_dataset_staticmethod(func):
+    return xr.register_dataset_accessor(func.__name__)(func)