Skip to content

Commit

Permalink
Extendable zarr arrays (#802)
Browse files Browse the repository at this point in the history
* add parameter to from_uri

* growable zarr array

* pass max_depth to server

* revert max_shaape

* checkpoint

* checkpoint

* Add endpoint link for append

* fix errant code in write_block

* Add append link to arrays, client sends shape and axis

* zarr adapter is called for write_block

* fix growth of zarr array

* update database for size in array append_block

* precommit cleanup

* fix test

* fix database issue

* add client call docstring

* WIP: Move type_aliases to root and fix misspelled name

* WIP: Rework to use PATCH /array/full

* Appending works

* Rename 'grow' to 'extend'.

* Raise if assumptions are not met

* Improve usability. Test.

* Update imports after rebase.

* Use Python 3.9 compatibile typing.

* Fix regression in refresh

* Reference documentation

* Add example to docstring.

* Test overwrite and out-of-order updates.

* Change from 'slice' to 'offset'.

* update array test

* add extend array to writing tutorial

* whitespace

* Finesse docs

* Data type of patch must match.

* Update CHANGELOG

---------

Co-authored-by: Dan Allan <dallan@bnl.gov>
  • Loading branch information
dylanmcreynolds and danielballan authored Nov 18, 2024
1 parent a5fd789 commit 2459624
Show file tree
Hide file tree
Showing 29 changed files with 345 additions and 52 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,14 @@ Write the date in place of the "Unreleased" in the case a new version is release

# Changelog

## Unreleased

### Added

- Add HTTP endpoint `PATCH /array/full/{path}` to enable updating and
optionally _extending_ an existing array.
- Add associated Python client method `ArrayClient.patch`.

## v0.1.0b11 (2024-11-14)

### Added
Expand Down
1 change: 1 addition & 0 deletions docs/source/reference/python-client.md
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ Tiled currently includes two clients for each structure family:
tiled.client.array.DaskArrayClient.export
tiled.client.array.DaskArrayClient.write
tiled.client.array.DaskArrayClient.write_block
tiled.client.array.DaskArrayClient.patch
```

```{eval-rst}
Expand Down
38 changes: 24 additions & 14 deletions docs/source/tutorials/writing.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,5 @@
# Writing Data

```{warning}
This is a highly experimental feature, recently introduced and included for
evaluation by early users. At this time we do not recommend using it for
anything important.
```

In this tutorial we will start Tiled in a mode where the client can
write (upload) data for later retrieving, search, or sharing.

Expand Down Expand Up @@ -48,21 +41,38 @@ where `...` will be whatever secret was printed at server startup above.

## Write data

Write array and tabular data.
Write array.

```python
# Write simple Python list (which gets converted to numpy.array).
>>> client.write_array([1, 2, 3], metadata={"color": "red", "barcode": 10})
<ArrayClient shape=(3,) chunks=((3,),) dtype=int64>

# Write an array.
>>> import numpy
>>> client.write_array(numpy.array([4, 5, 6]), metadata={"color": "blue", "barcode": 11})
<ArrayClient shape=(3,) chunks=((3,),) dtype=int64>

# Write a table (DataFrame).
# Write a Python list (which gets converted to numpy array).
>>> client.write_array([1, 2, 3], metadata={"color": "red", "barcode": 10})
<ArrayClient shape=(3,) chunks=((3,),) dtype=int64>

# Create an array and grow it by one.
>>> new_array = client.write_array([1, 2, 3])
>>> new_array
<ArrayClient shape=(3,) chunks=((3,),) dtype=int64>

# Extend the array. This array has only one dimension, here we extend by one
# along that dimension.
>>> new_array.patch([4], offset=(3,), extend=True)
>>> new_array
<ArrayClient shape=(4,) chunks=((3, 1),) dtype=int64>
>>> new_array.read()
array([1, 2, 3, 4])
```

Write tabular data in a pandas DataFrame.

```python
>>> import pandas
>>> client.write_dataframe(pandas.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}), metadata={"color": "green", "barcode": 12})
>>> df = pandas.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})
>>> client.write_dataframe(df, metadata={"color": "green", "barcode": 12})
<DataFrameClient ['x', 'y']>
```

Expand Down
2 changes: 1 addition & 1 deletion tiled/_tests/test_protocols.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,13 @@
SparseAdapter,
TableAdapter,
)
from tiled.adapters.type_alliases import JSON, Filters, NDSlice, Scopes
from tiled.server.schemas import Principal, PrincipalType
from tiled.structures.array import ArrayStructure, BuiltinDtype
from tiled.structures.awkward import AwkwardStructure
from tiled.structures.core import Spec, StructureFamily
from tiled.structures.sparse import COOStructure
from tiled.structures.table import TableStructure
from tiled.type_aliases import JSON, Filters, NDSlice, Scopes


class CustomArrayAdapter:
Expand Down
53 changes: 53 additions & 0 deletions tiled/_tests/test_writing.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,59 @@ def test_write_array_chunked(tree):
assert result.specs == specs


def test_extend_array(tree):
"Extend an array with additional data, expanding its shape."
with Context.from_app(
build_app(tree, validation_registry=validation_registry)
) as context:
client = from_context(context)

a = numpy.ones((3, 2, 2))
new_data = numpy.ones((1, 2, 2)) * 2
full_array = numpy.concatenate((a, new_data), axis=0)

# Upload a (3, 2, 2) array.
ac = client.write_array(a)
assert ac.shape == a.shape

# Patching data into a region beyond the current extent of the array
# raises a ValueError (catching a 409 from the server).
with pytest.raises(ValueError):
ac.patch(new_data, offset=(3,))
# With extend=True, the array is expanded.
ac.patch(new_data, offset=(3,), extend=True)
# The local cache of the structure is updated.
assert ac.shape == full_array.shape
actual = ac.read()
# The array has the expected shape and data.
assert actual.shape == full_array.shape
numpy.testing.assert_equal(actual, full_array)

# Overwrite data (do not extend).
revised_data = numpy.ones((1, 2, 2)) * 3
revised_array = full_array.copy()
revised_array[3, :, :] = 3
ac.patch(revised_data, offset=(3,))
numpy.testing.assert_equal(ac.read(), revised_array)

# Extend out of order.
ones = numpy.ones((1, 2, 2))
ac.patch(ones * 7, offset=(7,), extend=True)
ac.patch(ones * 5, offset=(5,), extend=True)
ac.patch(ones * 6, offset=(6,), extend=True)
numpy.testing.assert_equal(ac[5:6], ones * 5)
numpy.testing.assert_equal(ac[6:7], ones * 6)
numpy.testing.assert_equal(ac[7:8], ones * 7)

# Offset given as an int is acceptable.
ac.patch(ones * 8, offset=8, extend=True)
numpy.testing.assert_equal(ac[8:9], ones * 8)

# Data type must match.
with pytest.raises(ValueError):
ac.patch(ones.astype("uint8"), offset=9, extend=True)


def test_write_dataframe_full(tree):
with Context.from_app(
build_app(tree, validation_registry=validation_registry)
Expand Down
2 changes: 1 addition & 1 deletion tiled/adapters/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@

from ..structures.array import ArrayStructure
from ..structures.core import Spec, StructureFamily
from ..type_aliases import JSON, NDSlice
from .protocols import AccessPolicy
from .type_alliases import JSON, NDSlice


class ArrayAdapter:
Expand Down
2 changes: 1 addition & 1 deletion tiled/adapters/arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@
from ..structures.core import Spec, StructureFamily
from ..structures.data_source import Asset, DataSource, Management
from ..structures.table import TableStructure
from ..type_aliases import JSON
from ..utils import ensure_uri, path_from_uri
from .array import ArrayAdapter
from .protocols import AccessPolicy
from .type_alliases import JSON


class ArrowAdapter:
Expand Down
2 changes: 1 addition & 1 deletion tiled/adapters/awkward.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@

from ..structures.awkward import AwkwardStructure
from ..structures.core import Spec, StructureFamily
from ..type_aliases import JSON
from .awkward_directory_container import DirectoryContainer
from .protocols import AccessPolicy
from .type_alliases import JSON


class AwkwardAdapter:
Expand Down
2 changes: 1 addition & 1 deletion tiled/adapters/awkward_buffers.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@
from ..server.schemas import Asset
from ..structures.awkward import AwkwardStructure
from ..structures.core import Spec, StructureFamily
from ..type_aliases import JSON
from ..utils import path_from_uri
from .awkward import AwkwardAdapter
from .awkward_directory_container import DirectoryContainer
from .protocols import AccessPolicy
from .type_alliases import JSON


class AwkwardBuffersAdapter(AwkwardAdapter):
Expand Down
2 changes: 1 addition & 1 deletion tiled/adapters/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@
from ..structures.core import Spec, StructureFamily
from ..structures.data_source import Asset, DataSource, Management
from ..structures.table import TableStructure
from ..type_aliases import JSON
from ..utils import ensure_uri, path_from_uri
from .array import ArrayAdapter
from .dataframe import DataFrameAdapter
from .protocols import AccessPolicy
from .table import TableAdapter
from .type_alliases import JSON


def read_csv(
Expand Down
2 changes: 1 addition & 1 deletion tiled/adapters/hdf5.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@
from ..structures.array import ArrayStructure
from ..structures.core import Spec, StructureFamily
from ..structures.table import TableStructure
from ..type_aliases import JSON
from ..utils import node_repr, path_from_uri
from .array import ArrayAdapter
from .protocols import AccessPolicy
from .resource_cache import with_resource_cache
from .type_alliases import JSON

SWMR_DEFAULT = bool(int(os.getenv("TILED_HDF5_SWMR_DEFAULT", "0")))
INLINED_DEPTH = int(os.getenv("TILED_HDF5_INLINED_CONTENTS_MAX_DEPTH", "7"))
Expand Down
2 changes: 1 addition & 1 deletion tiled/adapters/jpeg.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@

from ..structures.array import ArrayStructure, BuiltinDtype
from ..structures.core import Spec, StructureFamily
from ..type_aliases import JSON, NDSlice
from ..utils import path_from_uri
from .protocols import AccessPolicy
from .resource_cache import with_resource_cache
from .sequence import FileSequenceAdapter
from .type_alliases import JSON, NDSlice


class JPEGAdapter:
Expand Down
2 changes: 1 addition & 1 deletion tiled/adapters/mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,9 @@
from ..server.schemas import SortingItem
from ..structures.core import Spec, StructureFamily
from ..structures.table import TableStructure
from ..type_aliases import JSON
from ..utils import UNCHANGED, Sentinel
from .protocols import AccessPolicy, AnyAdapter
from .type_alliases import JSON
from .utils import IndexersMixin

if sys.version_info < (3, 9):
Expand Down
2 changes: 1 addition & 1 deletion tiled/adapters/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@
from ..server.schemas import Asset
from ..structures.core import Spec, StructureFamily
from ..structures.table import TableStructure
from ..type_aliases import JSON
from ..utils import path_from_uri
from .dataframe import DataFrameAdapter
from .protocols import AccessPolicy
from .type_alliases import JSON


class ParquetDatasetAdapter:
Expand Down
2 changes: 1 addition & 1 deletion tiled/adapters/protocols.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
from ..structures.core import Spec, StructureFamily
from ..structures.sparse import SparseStructure
from ..structures.table import TableStructure
from ..type_aliases import JSON, Filters, NDSlice, Scopes
from .awkward_directory_container import DirectoryContainer
from .type_alliases import JSON, Filters, NDSlice, Scopes


class BaseAdapter(Protocol):
Expand Down
2 changes: 1 addition & 1 deletion tiled/adapters/sequence.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@

from ..structures.array import ArrayStructure, BuiltinDtype
from ..structures.core import Spec
from ..type_aliases import JSON, NDSlice
from ..utils import path_from_uri
from .protocols import AccessPolicy
from .type_alliases import JSON, NDSlice


def force_reshape(arr: np.array, desired_shape: Tuple[int, ...]) -> np.array:
Expand Down
2 changes: 1 addition & 1 deletion tiled/adapters/sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@

from ..structures.core import Spec, StructureFamily
from ..structures.sparse import COOStructure
from ..type_aliases import JSON, NDSlice
from .array import slice_and_shape_from_block_and_chunks
from .protocols import AccessPolicy
from .type_alliases import JSON, NDSlice


class COOAdapter:
Expand Down
19 changes: 7 additions & 12 deletions tiled/adapters/sparse_blocks_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
from ..server.schemas import Asset
from ..structures.core import Spec, StructureFamily
from ..structures.sparse import COOStructure
from ..type_aliases import JSON, NDSlice
from ..utils import path_from_uri
from .protocols import AccessPolicy
from .type_alliases import JSON, NDSlice


def load_block(uri: str) -> Tuple[List[int], Tuple[NDArray[Any], Any]]:
Expand Down Expand Up @@ -113,18 +113,13 @@ def write_block(
self,
data: Union[dask.dataframe.DataFrame, pandas.DataFrame],
block: Tuple[int, ...],
slice: NDSlice = ...,
) -> None:
"""
Parameters
----------
data :
block :
Returns
-------
"""
if slice != ...:
raise NotImplementedError(
"Writing into a slice of a sparse block is not yet supported."
)
"Write into a block of the array."
uri = self.blocks[block]
data.to_parquet(path_from_uri(uri))

Expand Down
2 changes: 1 addition & 1 deletion tiled/adapters/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@

from ..structures.core import Spec, StructureFamily
from ..structures.table import TableStructure
from ..type_aliases import JSON
from .array import ArrayAdapter
from .protocols import AccessPolicy
from .type_alliases import JSON


class TableAdapter:
Expand Down
2 changes: 1 addition & 1 deletion tiled/adapters/tiff.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@

from ..structures.array import ArrayStructure, BuiltinDtype
from ..structures.core import Spec, StructureFamily
from ..type_aliases import JSON, NDSlice
from ..utils import path_from_uri
from .protocols import AccessPolicy
from .resource_cache import with_resource_cache
from .sequence import FileSequenceAdapter
from .type_alliases import JSON, NDSlice


class TiffAdapter:
Expand Down
Loading

0 comments on commit 2459624

Please sign in to comment.