Skip to content

Commit

Permalink
Add docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
martindurant committed Nov 8, 2023
1 parent 508c091 commit d293cff
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 6 deletions.
9 changes: 9 additions & 0 deletions kerchunk/fits.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import numpy as np
import zarr

from fsspec.implementations.reference import LazyReferenceMapper


from kerchunk.utils import class_factory
from kerchunk.codecs import AsciiTableCodec, VarArrCodec
Expand Down Expand Up @@ -56,6 +58,11 @@ def process_file(
primary_attr_to_group: bool
Whether the output top-level group contains the attributes of the primary extension
(which often contains no data, just a general description)
out: dict-like or None
This allows you to supply an fsspec.implementations.reference.LazyReferenceMapper
to write out parquet as the references get filled, or some other dictionary-like class
to customise how references get stored
Returns
-------
Expand Down Expand Up @@ -181,6 +188,8 @@ def process_file(
if k != "COMMENT"
}
)
if isinstance(out, LazyReferenceMapper):
out.flush()
return out


Expand Down
4 changes: 4 additions & 0 deletions kerchunk/hdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,10 @@ class SingleHdf5ToZarr:
encode: save the ID-to-value mapping in a codec, to produce the real values at read
time; requires this library to be available. Can be efficient storage where there
are few unique values.
out: dict-like or None
This allows you to supply an fsspec.implementations.reference.LazyReferenceMapper
to write out parquet as the references get filled, or some other dictionary-like class
to customise how references get stored
"""

def __init__(
Expand Down
4 changes: 4 additions & 0 deletions kerchunk/netCDF3.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ def __init__(
subchunking, and there is never subchunking for coordinate/dimension arrays.
E.g., if an array contains 10,000bytes, and this value is 6000, there will
be two output chunks, split on the biggest available dimension. [TBC]
out: dict-like or None
This allows you to supply an fsspec.implementations.reference.LazyReferenceMapper
to write out parquet as the references get filled, or some other dictionary-like class
to customise how references get stored
args, kwargs: passed to scipy superclass ``scipy.io.netcdf.netcdf_file``
"""
assert kwargs.pop("mmap", False) is False
Expand Down
33 changes: 27 additions & 6 deletions kerchunk/zarr.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,52 @@
import fsspec
from fsspec.implementations.reference import LazyReferenceMapper

from kerchunk.utils import do_inline, class_factory
from kerchunk.utils import class_factory


def single_zarr(uri_or_store, storage_options=None, inline_threshold=100, inline=None):
def single_zarr(
uri_or_store, storage_options=None, inline_threshold=100, inline=None, out=None
):
"""kerchunk-style view on zarr mapper
This is a similar process to zarr's consolidate_metadata, but does not
need to be held in the original file tree. You do not need zarr itself
to do this.
This is useful for testing, so that we can pass hand-made zarrs to combine.
Parameters
----------
uri_or_store: str or dict-like
storage_options: dict or None
given to fsspec
out: dict-like or None
This allows you to supply an fsspec.implementations.reference.LazyReferenceMapper
to write out parquet as the references get filled, or some other dictionary-like class
to customise how references get stored
Returns
-------
reference dict like
"""
inline_threshold = inline or inline_threshold
if isinstance(uri_or_store, str):
mapper = fsspec.get_mapper(uri_or_store, **(storage_options or {}))
else:
mapper = uri_or_store

refs = {}
refs = out or {}
for k in mapper:
if k.startswith("."):
refs[k] = mapper[k]
else:
refs[k] = [fsspec.utils._unstrip_protocol(mapper._key_to_str(k), mapper.fs)]
if inline_threshold:
refs = do_inline(refs, inline_threshold, remote_options=storage_options)
# from kerchunk.utils import do_inline
# inline_threshold = inline or inline_threshold
# if inline_threshold:
# # this never does anything since we don't have the chunk sizes
# refs = do_inline(refs, inline_threshold, remote_options=storage_options)
if isinstance(refs, LazyReferenceMapper):
refs.flush()
return refs


Expand Down

0 comments on commit d293cff

Please sign in to comment.