Patch small items (#63)

* permit numpy >= 2 * add iloc to DataArray * fix from_named_objects when using Series to init * test_from_named_objects * test_dataarray_iloc * ruffen
ActivitySim · Sep 13, 2024 · 31bc3f6 · 31bc3f6
1 parent b491fca
commit 31bc3f6
Show file tree

Hide file tree

Showing 3 changed files with 82 additions and 4 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -10,7 +10,7 @@ name = "sharrow"
 requires-python = ">=3.9"
 dynamic = ["version"]
 dependencies = [
-    "numpy >= 1.19, <2",
+    "numpy >= 1.19",
     "pandas >= 1.2",
     "pyarrow",
     "xarray",
@@ -59,7 +59,6 @@ select = [
     "B", # flake8-bugbear
 ]
 ignore = ["B905", "D1"]
-ignore-init-module-imports = true
 per-file-ignores = { "*.ipynb" = [
     "E402", # allow imports to appear anywhere in Jupyter Notebooks
     "E501", # allow long lines in Jupyter Notebooks

diff --git a/sharrow/dataset.py b/sharrow/dataset.py
@@ -91,7 +91,7 @@ def construct(source):
     Parameters
     ----------
     source : pandas.DataFrame, pyarrow.Table, xarray.Dataset, or Sequence[str]
-        The source from which to create a Dataset.  DataFrames and Tables
+        The source from which to create a Dataset.  DataFrame and Table objects
         are converted to Datasets that have one dimension (the rows) and
         separate variables for each of the columns.  A list of strings
         creates a dataset with those named empty variables.
@@ -1065,6 +1065,38 @@ def __getitem__(self, key: Mapping[Hashable, Any]) -> Dataset:
         return self.dataset.isel(key)
 
 
+@xr.register_dataarray_accessor("iloc")
+class _iLocArrayIndexer:
+    """
+    Purely integer-location based indexing for selection by position on 1-d DataArrays.
+
+    In many ways, a dataset with a single dimensions is like a pandas DataFrame,
+    with the one dimension giving the rows, and the variables as columns. This
+    analogy eventually breaks down (DataFrame columns are ordered, Dataset
+    variables are not) but the similarities are enough that it’s sometimes
+    convenient to have iloc functionality enabled. This only works for indexing
+    on the rows, but if there’s only the one dimension the complexity of isel
+    is not needed.
+    """
+
+    __slots__ = ("dataarray",)
+
+    def __init__(self, dataarray: DataArray):
+        self.dataarray = dataarray
+
+    def __getitem__(self, key: Mapping[Hashable, Any]) -> DataArray:
+        if not is_dict_like(key):
+            if len(self.dataarray.dims) == 1:
+                dim_name = self.dataarray.dims.__iter__().__next__()
+                key = {dim_name: key}
+            else:
+                raise TypeError(
+                    "can only lookup dictionaries from DataArray.iloc, "
+                    "unless there is only one dimension"
+                )
+        return self.dataarray.isel(key)
+
+
 xr.Dataset.rename_dims_and_coords = xr.Dataset.rename
 
 
@@ -1182,6 +1214,8 @@ def _to_ast_literal(x):
         return _to_ast_literal(x.to_list())
     elif isinstance(x, np.ndarray):
         return _to_ast_literal(list(x))
+    elif isinstance(x, np.str_):
+        return repr(str(x))
     else:
         return repr(x)
 
@@ -1448,7 +1482,7 @@ def from_named_objects(*args):
             raise ValueError(f"argument {n} has no name") from None
         if name is None:
             raise ValueError(f"the name for argument {n} is None")
-        objs[name] = a
+        objs[name] = np.asarray(a)
     return xr.Dataset(objs)
 
 

diff --git a/sharrow/tests/test_datasets.py b/sharrow/tests/test_datasets.py
@@ -5,6 +5,7 @@
 import numpy as np
 import openmatrix
 import pandas as pd
+import pytest
 import xarray as xr
 from pytest import approx
 
@@ -133,3 +134,47 @@ def test_deferred_load_to_shared_memory():
         xr.testing.assert_equal(d0, d1)
         d2 = xr.Dataset.shm.from_shared_memory(token)
         xr.testing.assert_equal(d0, d2)
+
+
+def test_from_named_objects():
+    from sharrow.dataset import from_named_objects
+
+    s1 = pd.Series([1, 4, 9, 16], name="Squares")
+    s2 = pd.Series([2, 3, 5, 7, 11], name="Primes")
+    i1 = pd.Index([1, 4, 9, 16], name="Squares")
+    a1 = xr.DataArray([1, 4, 9, 16], name="Squares")
+
+    for obj in [s1, i1, a1]:
+        ds = from_named_objects(obj, s2)
+        assert "Squares" in ds.dims
+        assert "Primes" in ds.dims
+        assert ds.sizes == {"Squares": 4, "Primes": 5}
+
+    with pytest.raises(ValueError):
+        from_named_objects([1, 4, 9, 16], s2)
+
+
+def test_dataarray_iloc():
+    arr = xr.DataArray([1, 4, 9, 16, 25, 36], name="Squares", dims="s")
+
+    assert arr.iloc[1] == 4
+    xr.testing.assert_equal(arr.iloc[1:], xr.DataArray([4, 9, 16, 25, 36], dims="s"))
+    xr.testing.assert_equal(arr.iloc[:2], xr.DataArray([1, 4], dims="s"))
+    xr.testing.assert_equal(arr.iloc[2:4], xr.DataArray([9, 16], dims="s"))
+    xr.testing.assert_equal(arr.iloc[:-2], xr.DataArray([1, 4, 9, 16], dims="s"))
+    xr.testing.assert_equal(arr.iloc[-2:], xr.DataArray([25, 36], dims="s"))
+
+    with pytest.raises(TypeError):
+        arr.iloc[1] = 5  # assignment not allowed
+
+    arr2 = xr.DataArray([2, 3, 5, 7, 11], name="Primes", dims="p")
+    arr2d = arr * arr2
+
+    with pytest.raises(TypeError):
+        _tmp = arr2d.iloc[1]  # not allowed for 2D arrays
+
+    assert arr2d.iloc[dict(s=1, p=2)] == 20
+
+    z = arr2d.iloc[dict(s=slice(1, 2), p=slice(2, 4))]
+
+    xr.testing.assert_equal(z, xr.DataArray([[20, 28]], dims=["s", "p"]))