Skip to content

Commit

Permalink
Fix OMX construction with arbitrary zone ids (#27)
Browse files Browse the repository at this point in the history
* fallback to dynamic version

* version infer

* fix dataset from_omx for arbitrary zone ids
  • Loading branch information
jpn-- authored Aug 26, 2022
1 parent 1fe091b commit 7c500f5
Show file tree
Hide file tree
Showing 5 changed files with 83 additions and 5 deletions.
2 changes: 1 addition & 1 deletion sharrow/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from xarray import DataArray

from . import dataset, example_data, selectors, shared_memory, sparse
from ._version import version as __version__
from ._infer_version import __version__, __version_tuple__
from .dataset import Dataset
from .digital_encoding import array_decode, array_encode
from .flows import Flow
Expand Down
13 changes: 13 additions & 0 deletions sharrow/_infer_version.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
try:
from ._version import __version__, __version_tuple__
except ImportError:
# Package is not "installed", parse git tag at runtime
from importlib.metadata import PackageNotFoundError, version

try:
__version__ = version(__package__)
except PackageNotFoundError:
# package is not installed
__version__ = "999.999"

__version_tuple__ = __version__.split(".")
34 changes: 31 additions & 3 deletions sharrow/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,16 +230,18 @@ def from_omx(
----------
omx : openmatrix.File or larch.OMX
An OMX-format file, opened for reading.
index_names : tuple, default ("otaz", "dtaz", "time_period")
index_names : tuple, default ("otaz", "dtaz")
Should be a tuple of length 3, giving the names of the three
dimensions. The first two names are the native dimensions from
the open matrix file, the last is the name of the implicit
dimension that is created by parsing array names.
indexes : str, optional
indexes : str or tuple[str], optional
The name of a 'lookup' in the OMX file, which will be used to
populate the coordinates for the two native dimensions. Or,
specify "one-based" or "zero-based" to assume sequential and
consecutive numbering starting with 1 or 0 respectively.
consecutive numbering starting with 1 or 0 respectively. For
non-square OMX data, this must be given as a tuple, relating
indexes as above for each dimension of `index_names`.
renames : Mapping or Collection, optional
Limit the import only to these data elements. If given as a
mapping, the keys will be the names of variables in the resulting
Expand All @@ -256,9 +258,11 @@ def from_omx(
# handle both larch.OMX and openmatrix.open_file versions
if "lar" in type(omx).__module__:
omx_data = omx.data
omx_lookup = omx.lookup
omx_shape = omx.shape
else:
omx_data = omx.root["data"]
omx_lookup = omx.root["lookup"]
omx_shape = omx.shape()

arrays = {}
Expand All @@ -285,6 +289,30 @@ def from_omx(
index_names[0]: zero_based(omx_shape[0]),
index_names[1]: zero_based(omx_shape[1]),
}
elif isinstance(indexes, str):
if indexes in omx_lookup:
if omx_shape[0] != omx_shape[1]:
raise ValueError("singleton arbitrary coordinates on non-square arrays")
ixs = np.asarray(omx_lookup[indexes])
indexes = {
index_names[0]: ixs,
index_names[1]: ixs,
}
else:
raise KeyError(f"{indexes} not found in OMX lookups")
elif isinstance(indexes, tuple):
indexes_ = {}
for n, (name, i) in enumerate(zip(index_names, indexes)):
if i == "one-based":
indexes_[name] = one_based(omx_shape[n])
elif i == "zero-based":
indexes_[name] = zero_based(omx_shape[n])
elif isinstance(i, str):
if i in omx_lookup:
indexes_[name] = np.asarray(omx_lookup[i])
else:
raise KeyError(f"{i} not found in OMX lookups")
indexes = indexes_
if indexes is not None:
d["coords"] = {
index_name: {"dims": index_name, "data": index}
Expand Down
2 changes: 1 addition & 1 deletion sharrow/flows.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import pandas as pd
import xarray as xr

from . import __version__
from ._infer_version import __version__
from .aster import expression_for_numba, extract_all_name_tokens, extract_names_2
from .filewrite import blacken, rewrite
from .relationships import DataTree
Expand Down
37 changes: 37 additions & 0 deletions sharrow/tests/test_datasets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import tempfile
from pathlib import Path

import numpy as np
import openmatrix
from pytest import approx

import sharrow as sh


def test_dataset_construct_with_zoneids():
tempdir = tempfile.TemporaryDirectory()
t = Path(tempdir.name)

with openmatrix.open_file(t.joinpath("dummy5.omx"), mode="w") as out:
out.create_carray("/data", "Eye", obj=np.eye(5, dtype=np.float32))
out.create_carray("/lookup", "Zone", obj=np.asarray([11, 22, 33, 44, 55]))
shp = np.empty(2, dtype=int)
shp[0] = 5
shp[1] = 5
out.root._v_attrs.SHAPE = shp

with openmatrix.open_file(t.joinpath("dummy5.omx"), mode="r") as back:
ds = sh.dataset.from_omx(back, indexes="Zone")

assert sorted(ds.coords) == ["dtaz", "otaz"]
assert ds.coords["otaz"].values == approx(np.asarray([11, 22, 33, 44, 55]))
assert sorted(ds.variables) == ["Eye", "dtaz", "otaz"]
assert ds["Eye"].data == approx(np.eye(5, dtype=np.float32))

with openmatrix.open_file(t.joinpath("dummy5.omx"), mode="r") as back:
ds0 = sh.dataset.from_omx(back, indexes="zero-based")
assert ds0.coords["otaz"].values == approx(np.asarray([0, 1, 2, 3, 4]))

with openmatrix.open_file(t.joinpath("dummy5.omx"), mode="r") as back:
ds1 = sh.dataset.from_omx(back, indexes="one-based")
assert ds1.coords["otaz"].values == approx(np.asarray([1, 2, 3, 4, 5]))

0 comments on commit 7c500f5

Please sign in to comment.