Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ repos:
- typing_extensions
- universal-pathlib
- obstore>=0.5.1
- zarr-metadata>=0.1.1
# Tests
- pytest
- hypothesis
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ dependencies = [
'google-crc32c>=1.5',
'typing_extensions>=4.13',
'donfig>=0.8',
'zarr-metadata>=0.1.1',
]

dynamic = [
Expand Down
40 changes: 18 additions & 22 deletions src/zarr/codecs/blosc.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,22 @@
import numcodecs
from numcodecs.blosc import Blosc
from packaging.version import Version
from zarr_metadata.v3.codec.blosc import (
BloscCName as _BloscCName,
)
from zarr_metadata.v3.codec.blosc import (
BloscCodecConfiguration as _BloscCodecConfiguration,
)
from zarr_metadata.v3.codec.blosc import (
BloscCodecObject as _BloscCodecObject,
)
from zarr_metadata.v3.codec.blosc import (
BloscShuffle as _BloscShuffle,
)

from zarr.abc.codec import BytesBytesCodec
from zarr.core.buffer.cpu import as_numpy_array_wrapper
from zarr.core.common import JSON, NamedRequiredConfig, parse_enum, parse_named_configuration
from zarr.core.common import JSON, parse_enum, parse_named_configuration
from zarr.core.dtype.common import HasItemSize

if TYPE_CHECKING:
Expand All @@ -21,14 +33,14 @@
from zarr.core.array_spec import ArraySpec
from zarr.core.buffer import Buffer

Shuffle = Literal["noshuffle", "shuffle", "bitshuffle"]
"""The shuffle values permitted for the blosc codec"""
# Re-export under zarr-python's historical names.
Shuffle = _BloscShuffle
CName = _BloscCName
BloscConfigV3 = _BloscCodecConfiguration
BloscJSON_V3 = _BloscCodecObject

SHUFFLE: Final = ("noshuffle", "shuffle", "bitshuffle")

CName = Literal["lz4", "lz4hc", "blosclz", "snappy", "zlib", "zstd"]
"""The codec identifiers used in the blosc codec """


class BloscConfigV2(TypedDict):
"""Configuration for the V2 Blosc codec"""
Expand All @@ -40,22 +52,6 @@ class BloscConfigV2(TypedDict):
typesize: NotRequired[int]


class BloscConfigV3(TypedDict):
"""Configuration for the V3 Blosc codec"""

cname: CName
clevel: int
shuffle: Shuffle
blocksize: int
typesize: int


class BloscJSON_V3(NamedRequiredConfig[Literal["blosc"], BloscConfigV3]):
"""
The JSON form of the Blosc codec in Zarr V3.
"""


class BloscShuffle(Enum):
"""
Enum for shuffle filter used by blosc.
Expand Down
15 changes: 3 additions & 12 deletions src/zarr/codecs/cast_value.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

from collections.abc import Mapping
from dataclasses import dataclass, replace
from typing import TYPE_CHECKING, Final, Literal, TypedDict, cast
from typing import TYPE_CHECKING, Final, TypedDict, cast

import numpy as np

Expand All @@ -23,6 +23,8 @@
if TYPE_CHECKING:
from typing import NotRequired, Self

from zarr_metadata.v3.codec.cast_value import OutOfRangeMode, RoundingMode

from zarr.core.array_spec import ArraySpec
from zarr.core.buffer import NDBuffer
from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType
Expand All @@ -33,17 +35,6 @@ class ScalarMapJSON(TypedDict):
decode: NotRequired[list[tuple[object, object]]]


RoundingMode = Literal[
"nearest-even",
"towards-zero",
"towards-positive",
"towards-negative",
"nearest-away",
]

OutOfRangeMode = Literal["clamp", "wrap"]


class ScalarMap(TypedDict, total=False):
"""
The normalized, in-memory form of a scalar map.
Expand Down
16 changes: 5 additions & 11 deletions src/zarr/core/metadata/v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import warnings
from collections.abc import Iterable, Sequence
from functools import cached_property
from typing import TYPE_CHECKING, Any, TypedDict, cast
from typing import TYPE_CHECKING, Any, cast

from zarr.abc.metadata import Metadata
from zarr.abc.numcodec import Numcodec, _is_numcodec
Expand All @@ -29,8 +29,11 @@
from dataclasses import dataclass, field, fields, replace

import numpy as np
from zarr_metadata.v2.array import ArrayMetadataV2 as _ArrayMetadataV2

from zarr.core.array_spec import ArrayConfig, ArraySpec

# Re-export the v2 array metadata JSON shape under zarr-python's historical name.
from zarr.core.chunk_key_encodings import parse_separator
from zarr.core.common import (
JSON,
Expand All @@ -42,18 +45,9 @@
from zarr.core.config import config, parse_indexing_order
from zarr.core.metadata.common import parse_attributes


class ArrayV2MetadataDict(TypedDict):
"""
A typed dictionary model for Zarr format 2 metadata.
"""

zarr_format: Literal[2]
attributes: dict[str, JSON]


# Union of acceptable types for v2 compressors
type CompressorLikev2 = dict[str, JSON] | Numcodec | None
ArrayV2MetadataDict = _ArrayMetadataV2


@dataclass(frozen=True, kw_only=True)
Expand Down
44 changes: 18 additions & 26 deletions src/zarr/core/metadata/v3.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@
import json
from collections.abc import Iterable, Mapping, Sequence
from dataclasses import dataclass, field, replace
from typing import TYPE_CHECKING, Any, Final, Literal, NotRequired, TypeGuard, cast
from typing import TYPE_CHECKING, Any, Final, Literal, TypeGuard, cast

from typing_extensions import TypedDict
from zarr_metadata.v3.array import ArrayMetadataV3, ExtensionFieldV3

from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec, Codec
from zarr.abc.metadata import Metadata
Expand Down Expand Up @@ -139,14 +140,12 @@ def parse_storage_transformers(data: object) -> tuple[dict[str, JSON], ...]:
)


class AllowedExtraField(TypedDict, extra_items=JSON): # type: ignore[call-arg]
"""
This class models allowed extra fields in array metadata.
They must have ``must_understand`` set to ``False``, and may contain
arbitrary additional JSON data.
"""
AllowedExtraField = ExtensionFieldV3
"""Alias for `zarr_metadata.v3.array.ExtensionFieldV3`.

must_understand: Literal[False]
`must_understand` is typed as `bool` to match the spec (extension authors that
*understand* a field may produce `True`); the runtime guard
`check_allowed_extra_field` enforces that zarr-python only accepts `False`."""


def check_allowed_extra_field(data: object) -> TypeGuard[AllowedExtraField]:
Expand Down Expand Up @@ -421,25 +420,12 @@ def parse_chunk_grid(
raise ValueError(f"Unknown chunk grid name: {name!r}")


class ArrayMetadataJSON_V3(TypedDict, extra_items=AllowedExtraField): # type: ignore[call-arg]
"""
A typed dictionary model for zarr v3 array metadata.

Extra keys are permitted if they conform to ``AllowedExtraField``
(i.e. they are mappings with ``must_understand: false``).
"""
ArrayMetadataJSON_V3 = ArrayMetadataV3
"""Alias for `zarr_metadata.v3.array.ArrayMetadataV3`.

zarr_format: Literal[3]
node_type: Literal["array"]
data_type: str | NamedConfig[str, Mapping[str, JSON]]
shape: tuple[int, ...]
chunk_grid: str | NamedConfig[str, Mapping[str, JSON]]
chunk_key_encoding: str | NamedConfig[str, Mapping[str, JSON]]
fill_value: JSON
codecs: tuple[str | NamedConfig[str, Mapping[str, JSON]], ...]
attributes: NotRequired[Mapping[str, JSON]]
storage_transformers: NotRequired[tuple[str | NamedConfig[str, Mapping[str, JSON]], ...]]
dimension_names: NotRequired[tuple[str | None, ...]]
The TypedDict from the metadata package is the canonical model of the v3
array metadata document; this alias preserves the historical zarr-python
name. Extra keys are permitted if they conform to `ExtensionFieldV3`."""


"""
Expand Down Expand Up @@ -671,6 +657,12 @@ def from_dict(cls, data: dict[str, JSON]) -> Self:
)

def to_dict(self) -> dict[str, JSON]:
"""Serialize as a JSON-shaped dict matching `ArrayMetadataV3`.

Return type is `dict[str, JSON]` rather than `ArrayMetadataV3` so the
result composes with other zarr-python metadata serialisation paths
that traffic in `dict[str, JSON]` (notably consolidated metadata).
"""
out_dict = super().to_dict()
extra_fields = out_dict.pop("extra_fields")
out_dict = out_dict | extra_fields # type: ignore[operator]
Expand Down
Loading
Loading