diff --git a/src/silx/app/utils/parseutils.py b/src/silx/app/utils/parseutils.py index ecf8c0e9ff..3a1e81d3de 100644 --- a/src/silx/app/utils/parseutils.py +++ b/src/silx/app/utils/parseutils.py @@ -31,6 +31,7 @@ from collections.abc import Sequence import glob import logging +import urllib.parse from typing import Any from collections.abc import Generator, Iterable from pathlib import Path @@ -76,7 +77,7 @@ def to_bool(thing: Any, default: bool | None = None) -> bool: def filenames_to_dataurls( filenames: Iterable[str | Path], slices: Sequence[int] = tuple(), -) -> Generator[object]: +) -> Generator["DataUrl" | str]: """Expand filenames and HDF5 data path in files input argument""" # Imports here so they are performed after setting HDF5_USE_FILE_LOCKING and logging level import silx.io @@ -87,6 +88,12 @@ def filenames_to_dataurls( extra_slices = tuple(slices) for filename in filenames: + if isinstance(filename, str) and urllib.parse.urlparse( + filename + ).scheme.startswith("zarr+"): + yield filename + continue + url = DataUrl(filename) for file_path in sorted(silx.utils.files.expand_filenames([url.file_path()])): diff --git a/src/silx/app/view/main.py b/src/silx/app/view/main.py index d82fa6543f..a12ce8bea0 100644 --- a/src/silx/app/view/main.py +++ b/src/silx/app/view/main.py @@ -32,6 +32,7 @@ import signal import sys import traceback +import urllib.parse from silx.app.utils import parseutils @@ -178,7 +179,7 @@ def exceptHook(type_, value, trace): for url in parseutils.filenames_to_dataurls(options.files, options.slices): # TODO: Would be nice to add a process widget and a cancel button try: - window.appendFile(url.path()) + window.appendFile(url if isinstance(url, str) else url.path()) except OSError as e: _logger.error(e.args[0]) _logger.debug("Backtrace", exc_info=True) diff --git a/src/silx/io/meson.build b/src/silx/io/meson.build index 55e7350acc..f80a1c37ce 100644 --- a/src/silx/io/meson.build +++ b/src/silx/io/meson.build @@ -18,6 +18,7 @@ py.install_sources([ 'spech5.py', 'url.py', 'utils.py', + 'zarrh5.py', ], subdir: 'silx/io', # Folder relative to site-packages to install to ) diff --git a/src/silx/io/utils.py b/src/silx/io/utils.py index 55faa23593..9d23f88673 100644 --- a/src/silx/io/utils.py +++ b/src/silx/io/utils.py @@ -32,6 +32,7 @@ import sys import time import logging +import urllib.parse from collections.abc import Generator import numpy @@ -49,6 +50,12 @@ except ImportError as e: h5pyd = None +try: + from .zarrh5 import ZarrH5 +except ImportError as e: + ZarrH5 = None + + logger = logging.getLogger(__name__) NEXUS_HDF5_EXT = [".h5", ".nx5", ".nxs", ".hdf", ".hdf5", ".cxi"] @@ -689,34 +696,46 @@ def open(filename): # pylint:disable=redefined-builtin :raises: IOError if the file can't be loaded or path can't be found :rtype: h5py-like node """ - url = DataUrl(filename) + url = urllib.parse.urlparse(filename) + if url.scheme.startswith("zarr+"): + if ZarrH5 is None: + raise IOError( + f"Zarr support is not available, please install zarr, cannot open: {filename}" + ) + try: + return ZarrH5(filename) + except Exception as e: + raise IOError(f"Failed to open URL with zarr: {type(e)} {e}") - if url.scheme() in [None, "file", "silx"]: + data_url = DataUrl(filename) + if data_url.scheme() in [None, "file", "silx"]: # That's a local file - if not url.is_valid(): + if not data_url.is_valid(): raise OSError("URL '%s' is not valid" % filename) - h5_file = _open_local_file(url.file_path()) - elif url.scheme() in ("http", "https"): + h5_file = _open_local_file(data_url.file_path()) + elif data_url.scheme() in ("http", "https"): return _open_url_with_h5pyd(filename) else: - raise OSError(f"Unsupported URL scheme {url.scheme}: {filename}") + raise OSError(f"Unsupported URL scheme {data_url.scheme}: {filename}") - if url.data_path() in [None, "/", ""]: # The full file is requested - if url.data_slice(): + if data_url.data_path() in [None, "/", ""]: # The full file is requested + if data_url.data_slice(): raise OSError(f"URL '{filename}' containing slicing is not supported") return h5_file else: # Only a children is requested - if url.data_path() not in h5_file: - msg = f"File '{filename}' does not contain path '{url.data_path()}'." + if data_url.data_path() not in h5_file: + msg = f"File '{filename}' does not contain path '{data_url.data_path()}'." raise OSError(msg) - node = h5_file[url.data_path()] + node = h5_file[data_url.data_path()] - if url.data_slice() is not None: + if data_url.data_slice() is not None: from . import _sliceh5 # Lazy-import to avoid circular dependency try: - return _sliceh5.DatasetSlice(node, url.data_slice(), attrs=node.attrs) + return _sliceh5.DatasetSlice( + node, data_url.data_slice(), attrs=node.attrs + ) except ValueError: raise OSError( f"URL {filename} contains slicing, but it is not a dataset" diff --git a/src/silx/io/zarrh5.py b/src/silx/io/zarrh5.py new file mode 100644 index 0000000000..10d8ac58e6 --- /dev/null +++ b/src/silx/io/zarrh5.py @@ -0,0 +1,151 @@ +# /*########################################################################## +# Copyright (C) 2025 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ############################################################################*/ +""" +Provides a wrapper to expose `Zarr `_ +This is a preview feature. +""" +from __future__ import annotations + +import logging +import urllib.parse +from typing import Generator +import numpy +from . import commonh5 +import zarr + + +_logger = logging.getLogger(__name__) + + +def _children(group: zarr.Group) -> Generator[ZarrDataset | ZarrGroup]: + for name, item in group.items(): + if isinstance(item, zarr.Group): + yield ZarrGroup(name, item) + elif isinstance(item, zarr.Array): + yield ZarrDataset(name, item) + else: + _logger.warning(f"Cannot map child {name}: Ignored") + + +class ZarrH5(commonh5.File): + """Zarr client wrapper""" + + def __init__( + self, + name: str, + mode: str | None = None, + attrs: dict | None = None, + ) -> None: + assert mode in ("r", None) + if name.startswith("zarr+"): + name = name[5:] + full_url = urllib.parse.urlparse(name) + if full_url.fragment: + raise ValueError("URL fragment is not supported") + + base_url = urllib.parse.urlunparse( + (full_url.scheme, full_url.netloc, full_url.path, "", "", "") + ) + + # quick&dirty storage_options parsing: it would need pydantic model + storage_options = {} + for key, values in urllib.parse.parse_qs(full_url.query).items(): + value = values[-1] + if key == "use_ssl": + value = True if value.lower() == "true" else False + storage_options[key] = value + self.__group = zarr.open_group(base_url, storage_options=storage_options) + + if attrs is None: + attrs = {} + super().__init__( + base_url.rstrip("/"), mode, attrs={**self.__group.attrs, **attrs} + ) + + for child in _children(self.__group): + self.add_node(child) + + _logger.warning( + "Zarr support is a preview feature: This may change or be removed without notice." + ) + + def close(self) -> None: + super().close() + self.__group = None + + +class ZarrGroup(commonh5.LazyLoadableGroup): + """Zarr Group wrapper""" + + def __init__( + self, + name: str, + group: zarr.Group, + parent: ZarrH5 | ZarrGroup | None = None, + attrs: dict | None = None, + ) -> None: + super().__init__(name, parent, attrs) + self.__group = group + + def _create_child(self) -> None: + for child in _children(self.__group): + self.add_node(child) + + +class ZarrDataset(commonh5.Dataset): + """Zarr Array wrapper""" + + def __init__( + self, + name: str, + array: zarr.Array, + parent: ZarrH5 | ZarrGroup | None = None, + attrs: dict | None = None, + ) -> None: + super().__init__(name, array, parent, attrs) + + @property + def shape(self) -> tuple[int, ...]: + return self._get_data().shape + + @property + def size(self) -> int: + return self._get_data().size + + def __len__(self) -> int: + return len(self._get_data()) + + def __getitem__(self, item): + return self._get_data()[item] + + @property + def value(self) -> numpy.ndarray: + return self._get_data()[()] + + @property + def compression(self): + return self._get_data().compressor.codec_id + + @property + def chunks(self): + return self._get_data().chunks