diff --git a/kerchunk/combine.py b/kerchunk/combine.py index 6dc71be..eb891de 100644 --- a/kerchunk/combine.py +++ b/kerchunk/combine.py @@ -95,6 +95,8 @@ class MultiZarrToZarr: from scratch. Assumes the same coordinates are being concatenated. """ + inline: int + def __init__( self, path, @@ -106,7 +108,7 @@ def __init__( target_options=None, remote_protocol=None, remote_options=None, - inline_threshold=500, + inline_threshold: int = 500, preprocess=None, postprocess=None, out=None, @@ -584,9 +586,13 @@ def second_pass(self): key = key.rstrip(".") ref = fs.references.get(fn) - if isinstance(ref, list) and ( - (len(ref) > 1 and ref[2] < self.inline) - or fs.info(fn)["size"] < self.inline + if ( + self.inline > 0 + and isinstance(ref, list) + and ( + (len(ref) > 1 and ref[2] < self.inline) + or fs.info(fn)["size"] < self.inline + ) ): to_download[key] = fn else: diff --git a/kerchunk/tests/test_combine.py b/kerchunk/tests/test_combine.py index efd7e05..1399492 100644 --- a/kerchunk/tests/test_combine.py +++ b/kerchunk/tests/test_combine.py @@ -768,6 +768,29 @@ def test_inline(refs): assert ref.references["data/0.0.0"].startswith("base64:") +def test_no_inline(refs): + """Ensure that inline_threshold=0 disables MultiZarrToZarr checking file size.""" + ds = xr.Dataset(dict(x=[1, 2, 3])) + ds["y"] = 3 + ds["x"] + store = fsspec.get_mapper("memory://zarr_store") + ds.to_zarr(store, mode="w", consolidated=False) + ref = kerchunk.utils.consolidate(store) + # This type of reference with no offset or total size is produced by + # kerchunk.zarr.single_zarr or equivalently ZarrToZarr.translate. + ref["refs"]["y/0"] = ["file:///tmp/some/data-that-shouldnt-be-accessed"] + + mzz_no_inline = MultiZarrToZarr([ref], concat_dims=["x"], inline_threshold=0) + # Should be okay because inline_threshold=None so we don't check the file size + # in order to see if it should be inlined + mzz_no_inline.translate() + + mzz_inline = MultiZarrToZarr([ref], concat_dims=["x"], inline_threshold=1) + with pytest.raises(FileNotFoundError): + # Should raise because we check the file size to see if it should be inlined, + # and the example was engineered so that the file doesn't exist. + mzz_inline.translate() + + def test_merge_vars(): a = dict({"version": 1, "refs": dict({"item1": 1})}) b = dict({"version": 1, "refs": dict({"item2": 2})})