Skip to content

PCodec broken - UnknownCodecError #3482

@rabernat

Description

@rabernat

Zarr version

3.1.3

Numcodecs version

0.16.1

Python Version

3.12

Operating System

Linux

Installation

pip

Description

I am unable to use PCodec in Zarr 3.1.3. I can write an array with PCodec as the serializer but I can't read it back.

Steps to reproduce

import zarr
from zarr.storage import MemoryStore
from zarr.codecs.numcodecs import PCodec
from zarr.buffer import default_buffer_prototype
import json

store = MemoryStore()
array = zarr.create_array(shape=1000, chunks=500, dtype='f4', serializer=PCodec(), store=store)
print(array.info)
# Type               : Array
# Zarr format        : 3
# Data type          : Float32(endianness='little')
# Fill value         : 0.0
# Shape              : (1000,)
# Chunk shape        : (500,)
# Order              : C
# Read-only          : False
# Store type         : MemoryStore
# Filters            : ()
# Serializer         : PCodec(codec_name='numcodecs.pcodec', codec_config={})
# Compressors        : (ZstdCodec(level=0, checksum=False),)
# No. bytes          : 4000 (3.9K)

buf = await store.get("zarr.json", default_buffer_prototype())
print(json.loads(buf.to_bytes()))
#{'shape': [1000],
# 'data_type': 'float32',
# 'chunk_grid': {'name': 'regular', 'configuration': {'chunk_shape': [500]}},
# 'chunk_key_encoding': {'name': 'default',
#  'configuration': {'separator': '/'}},
# 'fill_value': 0.0,
# 'codecs': [{'name': 'numcodecs.pcodec', 'configuration': {}},
#  {'name': 'zstd', 'configuration': {'level': 0, 'checksum': False}}],
# 'attributes': {},
# 'zarr_format': 3,
# 'node_type': 'array',
# 'storage_transformers': []}


zarr.open(store)  # errors
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
File [/opt/coiled/env/lib/python3.12/site-packages/zarr/core/metadata/v3.py:74](https://cluster-voqqm.dask.host/opt/coiled/env/lib/python3.12/site-packages/zarr/core/metadata/v3.py#line=73), in parse_codecs(data)
     73 try:
---> 74     out += (get_codec_class(name_parsed).from_dict(c),)
     75 except KeyError as e:

File [/opt/coiled/env/lib/python3.12/site-packages/zarr/registry.py:184](https://cluster-voqqm.dask.host/opt/coiled/env/lib/python3.12/site-packages/zarr/registry.py#line=183), in get_codec_class(key, reload_config)
    183     return list(codec_classes.values())[-1]
--> 184 selected_codec_cls = codec_classes[config_entry]
    186 if selected_codec_cls:

KeyError: 'zarr.codecs.numcodecs.PCodec'

The above exception was the direct cause of the following exception:

UnknownCodecError                         Traceback (most recent call last)
Cell In[44], line 8
      6 array = zarr.create_array(shape=1000, chunks=500, dtype='f4', serializer=PCodec(), store=store)
      7 print(array.info)
----> 8 zarr.open(store)

File [/opt/coiled/env/lib/python3.12/site-packages/zarr/api/synchronous.py:209](https://cluster-voqqm.dask.host/opt/coiled/env/lib/python3.12/site-packages/zarr/api/synchronous.py#line=208), in open(store, mode, zarr_version, zarr_format, path, storage_options, **kwargs)
    171 def open(
    172     store: StoreLike | None = None,
    173     *,
   (...)    179     **kwargs: Any,  # TODO: type kwargs as valid args to async_api.open
    180 ) -> Array | Group:
    181     """Open a group or array using file-mode-like semantics.
    182 
    183     Parameters
   (...)    207         Return type depends on what exists in the given store.
    208     """
--> 209     obj = sync(
    210         async_api.open(
    211             store=store,
    212             mode=mode,
    213             zarr_version=zarr_version,
    214             zarr_format=zarr_format,
    215             path=path,
    216             storage_options=storage_options,
    217             **kwargs,
    218         )
    219     )
    220     if isinstance(obj, AsyncArray):
    221         return Array(obj)

File [/opt/coiled/env/lib/python3.12/site-packages/zarr/core/sync.py:163](https://cluster-voqqm.dask.host/opt/coiled/env/lib/python3.12/site-packages/zarr/core/sync.py#line=162), in sync(coro, loop, timeout)
    160 return_result = next(iter(finished)).result()
    162 if isinstance(return_result, BaseException):
--> 163     raise return_result
    164 else:
    165     return return_result

File [/opt/coiled/env/lib/python3.12/site-packages/zarr/core/sync.py:119](https://cluster-voqqm.dask.host/opt/coiled/env/lib/python3.12/site-packages/zarr/core/sync.py#line=118), in _runner(coro)
    114 """
    115 Await a coroutine and return the result of running it. If awaiting the coroutine raises an
    116 exception, the exception will be returned.
    117 """
    118 try:
--> 119     return await coro
    120 except Exception as ex:
    121     return ex

File [/opt/coiled/env/lib/python3.12/site-packages/zarr/api/asynchronous.py:381](https://cluster-voqqm.dask.host/opt/coiled/env/lib/python3.12/site-packages/zarr/api/asynchronous.py#line=380), in open(store, mode, zarr_version, zarr_format, path, storage_options, **kwargs)
    379     is_v3_array = zarr_format == 3 and _metadata_dict.get("node_type") == "array"
    380     if is_v3_array or zarr_format == 2:
--> 381         return AsyncArray(
    382             store_path=store_path, metadata=_metadata_dict, config=kwargs.get("config")
    383         )
    384 except (AssertionError, FileNotFoundError, NodeTypeValidationError):
    385     pass

File [/opt/coiled/env/lib/python3.12/site-packages/zarr/core/array.py:333](https://cluster-voqqm.dask.host/opt/coiled/env/lib/python3.12/site-packages/zarr/core/array.py#line=332), in AsyncArray.__init__(self, metadata, store_path, config)
    327 def __init__(
    328     self,
    329     metadata: ArrayMetadata | ArrayMetadataDict,
    330     store_path: StorePath,
    331     config: ArrayConfigLike | None = None,
    332 ) -> None:
--> 333     metadata_parsed = parse_array_metadata(metadata)
    334     config_parsed = parse_array_config(config)
    336     object.__setattr__(self, "metadata", metadata_parsed)

File [/opt/coiled/env/lib/python3.12/site-packages/zarr/core/array.py:184](https://cluster-voqqm.dask.host/opt/coiled/env/lib/python3.12/site-packages/zarr/core/array.py#line=183), in parse_array_metadata(data)
    182 zarr_format = data.get("zarr_format")
    183 if zarr_format == 3:
--> 184     meta_out = ArrayV3Metadata.from_dict(data)
    185     if len(meta_out.storage_transformers) > 0:
    186         msg = (
    187             f"Array metadata contains storage transformers: {meta_out.storage_transformers}."
    188             "Arrays with storage transformers are not supported in zarr-python at this time."
    189         )

File [/opt/coiled/env/lib/python3.12/site-packages/zarr/core/metadata/v3.py:332](https://cluster-voqqm.dask.host/opt/coiled/env/lib/python3.12/site-packages/zarr/core/metadata/v3.py#line=331), in ArrayV3Metadata.from_dict(cls, data)
    329 # attributes key is optional, normalize missing to `None`
    330 _data["attributes"] = _data.pop("attributes", None)
--> 332 return cls(**_data, fill_value=fill_value_parsed, data_type=data_type)

File [/opt/coiled/env/lib/python3.12/site-packages/zarr/core/metadata/v3.py:186](https://cluster-voqqm.dask.host/opt/coiled/env/lib/python3.12/site-packages/zarr/core/metadata/v3.py#line=185), in ArrayV3Metadata.__init__(self, shape, data_type, chunk_grid, chunk_key_encoding, fill_value, codecs, attributes, dimension_names, storage_transformers)
    184 fill_value_parsed = data_type.cast_scalar(fill_value)
    185 attributes_parsed = parse_attributes(attributes)
--> 186 codecs_parsed_partial = parse_codecs(codecs)
    187 storage_transformers_parsed = parse_storage_transformers(storage_transformers)
    189 array_spec = ArraySpec(
    190     shape=shape_parsed,
    191     dtype=data_type,
   (...)    194     prototype=default_buffer_prototype(),  # TODO: prototype is not needed here.
    195 )

File [/opt/coiled/env/lib/python3.12/site-packages/zarr/core/metadata/v3.py:76](https://cluster-voqqm.dask.host/opt/coiled/env/lib/python3.12/site-packages/zarr/core/metadata/v3.py#line=75), in parse_codecs(data)
     74             out += (get_codec_class(name_parsed).from_dict(c),)
     75         except KeyError as e:
---> 76             raise UnknownCodecError(f"Unknown codec: {e.args[0]!r}") from e
     78 return out

UnknownCodecError: Unknown codec: 'zarr.codecs.numcodecs.PCodec'

Additional output

No response

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugPotential issues with the zarr-python library

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions