diff --git a/kerchunk/__init__.py b/kerchunk/__init__.py index 398482ca..efd19771 100644 --- a/kerchunk/__init__.py +++ b/kerchunk/__init__.py @@ -10,3 +10,15 @@ __version__ = "9999" __all__ = ["__version__"] + + +def set_reference_filesystem_cachable(cachable=True): + """While experimenting with kerchunk and referenceFS, it can be convenient to not cache FS instances + + You may wish to call this function with ``False`` before any kerchunking session; leavnig + the instances cachable (the default) is what end-users will want, since it will be + more efficient. + """ + import fsspec + + fsspec.get_filesystem_class("reference").cachable = cachable diff --git a/kerchunk/combine.py b/kerchunk/combine.py index 3836c12c..12a37ba5 100644 --- a/kerchunk/combine.py +++ b/kerchunk/combine.py @@ -157,6 +157,38 @@ def append( target_options=None, **kwargs, ): + """ + Update an existing combined reference set with new references + + There are two main usage patterns: + + - if the input ``original_refs`` is JSON, the combine happens in memory and the + output should be written to JSON. This could then be optionally converted to parquet in a + separate step + - if ``original_refs`` is a lazy parquet reference set, then it will be amended in-place + + If you want to extend JSON references and output to parquet, you must first convert to + parquet in the location you would like the final product to live. + + The other arguments should be the same as they were at the creation of the original combined + reference set. + + NOTE: if the original combine used a postprocess function, it may be that this process + functions, as the combine is done "before" postprocessing. Functions that only add information + (as as setting attrs) would be OK. + + Parameters + ---------- + path: list of reference sets to add. If remote/target options would be different + to ``original_refs``, these can be as dicts or LazyReferenceMapper instances + original_refs: combined reference set to be extended + remote_protocol, remote_options, target_options: referring to ``original_refs``` + kwargs: to MultiZarrToZarr + + Returns + ------- + MultiZarrToZarr + """ import xarray as xr fs = fsspec.filesystem(