From f2226f52e9e9fdd1acd95f6235c0058dc18668a9 Mon Sep 17 00:00:00 2001
From: Alexandr Plashchinsky
 <alexandr.plashchinsky@alexandrplashchinsky-H765G66H9V.local>
Date: Fri, 29 May 2026 17:42:59 -0700
Subject: [PATCH 01/45] rebase with upstream

Signed-off-by: Alexandr Plashchinsky <alexandr.plashchinsky@alexandrplashchinsky-H765G66H9V.local>
---
 .../config/vocabularies/Data/accept.txt       |    1 +
 python/ray/data/__init__.py                   |    2 +
 .../_internal/datasource/zarrv2_datasource.py |  835 ++++++++++++++
 python/ray/data/read_api.py                   |  246 ++++
 .../ray/data/tests/datasource/test_zarrv2.py  | 1007 +++++++++++++++++
 5 files changed, 2091 insertions(+)
 create mode 100644 python/ray/data/_internal/datasource/zarrv2_datasource.py
 create mode 100644 python/ray/data/tests/datasource/test_zarrv2.py

diff --git a/.vale/styles/config/vocabularies/Data/accept.txt b/.vale/styles/config/vocabularies/Data/accept.txt
index 790ff698ef70..133400798975 100644
--- a/.vale/styles/config/vocabularies/Data/accept.txt
+++ b/.vale/styles/config/vocabularies/Data/accept.txt
@@ -56,5 +56,6 @@ YOLO
 [Ss]harded
 [Pp]arameterization(s)?
 repr
+Zarr
 [Uu]psample(d|s)?
 [Rr]ebatch(ing|ed|es)?
diff --git a/python/ray/data/__init__.py b/python/ray/data/__init__.py
index f58c59873284..1b6589f033f2 100644
--- a/python/ray/data/__init__.py
+++ b/python/ray/data/__init__.py
@@ -82,6 +82,7 @@
     read_unity_catalog,
     read_videos,
     read_webdataset,
+    read_zarr
 )
 
 # Module-level cached global functions for callable classes. It needs to be defined here
@@ -190,6 +191,7 @@
     "read_tfrecords",
     "read_unity_catalog",
     "read_videos",
+    "read_zarr",
     "read_webdataset",
     "KafkaAuthConfig",
     "Preprocessor",
diff --git a/python/ray/data/_internal/datasource/zarrv2_datasource.py b/python/ray/data/_internal/datasource/zarrv2_datasource.py
new file mode 100644
index 000000000000..d441a79b20ab
--- /dev/null
+++ b/python/ray/data/_internal/datasource/zarrv2_datasource.py
@@ -0,0 +1,835 @@
+"""Zarr v2 datasource for Ray Data.
+
+Two output schemas, selected at the call site:
+
+* Long-form (default). Each output row corresponds to one chunk of one
+  array. Arrays in the same call need not share any dimension; they coexist
+  as separate rows distinguished by an ``array`` column.
+* Wide-form (``align_axis_0=True``). Each output row is one axis-0 chunk
+  shared across all selected arrays; the row carries one column per array
+  plus ``t_start`` / ``t_stop`` for the global range.
+
+See :class:`ZarrV2Datasource` for the row schemas and
+:func:`ray.data.read_zarr` for the public API.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import math
+from collections.abc import Callable, Iterable, Sequence
+from dataclasses import dataclass
+from itertools import product
+from typing import TYPE_CHECKING, Any, List, Optional
+
+import numpy as np
+import pandas as pd
+from fsspec.spec import AbstractFileSystem
+
+from ray._common.retry import call_with_retry
+from ray.data._internal.util import _check_import
+from ray.data.block import BlockMetadata
+from ray.data.context import DataContext
+from ray.data.datasource.datasource import Datasource, ReadTask
+
+logger = logging.getLogger(__name__)
+
+if TYPE_CHECKING:
+    import pyarrow
+    from zarr import Array as ZarrArray
+    from zarr.hierarchy import Group as ZarrGroup
+
+    ZarrRoot = ZarrGroup | ZarrArray
+
+
+REQUIRED_ZARRAY_KEYS = ("shape", "chunks", "dtype")
+
+# Zarr-specific transient-error patterns appended to the user's
+# ``DataContext.retried_io_errors`` when reading chunks. The defaults in
+# ``DataContext`` cover AWS-flavored object-store errors; these cover the
+# kind of network-layer messages that bubble up through fsspec/numcodecs
+# when reading chunked array data over HTTPS/S3/GCS.
+_ZARR_TRANSIENT_ERROR_PATTERNS = (
+    "Connection reset",
+    "Read timeout",
+    "Connection refused",
+    "network",
+    "socket",
+    "HTTP error",
+)
+
+
+@dataclass(frozen=True)
+class ZarrArrayMeta:
+    """Validated ``.zarray`` metadata for a single Zarr v2 array."""
+
+    shape: tuple[int, ...]
+    chunks: tuple[int, ...]
+    dtype: str
+
+    @classmethod
+    def from_json(cls, raw_meta: dict[str, Any], array_path: str) -> ZarrArrayMeta:
+        """Validate and parse a ``.zarray`` JSON object into a ZarrArrayMeta.
+
+        Raises ``ValueError`` if any of ``shape``/``chunks``/``dtype`` is
+        missing. ``array_path`` is included in the error message so callers
+        don't have to thread context themselves.
+        """
+        missing = [k for k in REQUIRED_ZARRAY_KEYS if k not in raw_meta]
+        if missing:
+            raise ValueError(
+                f"Invalid .zarray metadata for array path {array_path!r}: "
+                f"missing required key(s) {missing}."
+            )
+        return cls(
+            shape=tuple(int(x) for x in raw_meta["shape"]),
+            chunks=tuple(int(x) for x in raw_meta["chunks"]),
+            dtype=str(raw_meta["dtype"]),
+        )
+
+    @property
+    def rank(self) -> int:
+        return len(self.shape)
+
+    @property
+    def itemsize(self) -> int:
+        """Bytes per element."""
+        return np.dtype(self.dtype).itemsize
+
+    def effective_chunks(
+        self,
+        array_name: str,
+        user_chunk_shape: tuple[int, ...] | dict[str, tuple[int, ...]] | None,
+    ) -> tuple[int, ...]:
+        """Resolve the user's ``chunk_shapes`` override(s) against this array's chunks.
+
+        When ``user_chunk_shape`` is a single sequence, it is treated as a
+        prefix that overrides the leading axes; trailing axes keep the
+        array's native chunk values. This lets a single
+        ``chunk_shapes=[16]`` apply meaningfully across arrays of different
+        ranks (e.g., 4-D images alongside 2-D poses).
+
+        When ``user_chunk_shape`` is a dict, it is interpreted as a
+        per-array mapping from array path to that array's override prefix.
+        Arrays omitted from the mapping keep their native chunks.
+
+        - ``None`` → use native chunks unchanged.
+        - shorter than rank → override leading axes, keep native for the rest.
+        - same length as rank → use as-is.
+        - longer than rank → ``ValueError``.
+
+        Example with array shape ``(200, 28, 28)``, native chunks ``(50, 28, 28)``:
+
+            user=None              → (50, 28, 28)
+            user=(16,)             → (16, 28, 28)
+            user=(16, 14)          → (16, 14, 28)
+            user=(16, 14, 14)      → (16, 14, 14)
+            user=(16, 14, 14, 1)   → ValueError
+        """
+        if user_chunk_shape is None:
+            return self.chunks
+
+        if isinstance(user_chunk_shape, dict):
+            user_chunk_shape = user_chunk_shape.get(array_name)
+            if user_chunk_shape is None:
+                return self.chunks
+        if len(user_chunk_shape) > self.rank:
+            raise ValueError(
+                f"chunk_shapes override for array {array_name!r} has "
+                f"{len(user_chunk_shape)} axes but array of shape "
+                f"{self.shape!r} has rank {self.rank}. Each chunk_shapes "
+                f"override may not be longer than its target array's rank."
+            )
+        return user_chunk_shape + self.chunks[len(user_chunk_shape) :]
+
+    def grid_shape(self, chunks: tuple[int, ...]) -> tuple[int, ...]:
+        """Number of chunks along each axis under the given chunk shape."""
+        return tuple(math.ceil(s / c) for s, c in zip(self.shape, chunks))
+
+    def chunk_slices(
+        self, chunk_index: tuple[int, ...], chunks: tuple[int, ...]
+    ) -> tuple[tuple[int, int], ...]:
+        """Per-axis ``(start, stop)`` for ``array[chunk_index]`` under ``chunks``.
+
+        Trailing-edge chunks are clamped to ``shape[i]``, so they may be
+        shorter than ``chunks[i]``. No padding is applied.
+        """
+        return tuple(
+            (i * c, min((i + 1) * c, s))
+            for i, c, s in zip(chunk_index, chunks, self.shape)
+        )
+
+
+# ---------------------------------------------------------------------------
+# Metadata discovery
+# ---------------------------------------------------------------------------
+
+
+def _load_metadata_from_zmetadata_file(
+    fs, z_meta_path: str
+) -> dict[str, ZarrArrayMeta]:
+    """Load all arrays listed in a consolidated ``.zmetadata`` file."""
+    with fs.open(z_meta_path, "rb") as f:
+        consolidated = json.load(f)
+    if "metadata" not in consolidated:
+        raise ValueError(
+            f"Missing 'metadata' key in consolidated metadata at {z_meta_path}."
+        )
+    out: dict[str, ZarrArrayMeta] = {}
+    for key, value in consolidated["metadata"].items():
+        if not key.endswith(".zarray"):
+            continue
+        array_path = "" if key == ".zarray" else key[: -len("/.zarray")]
+        out[array_path] = ZarrArrayMeta.from_json(value, array_path)
+    return out
+
+
+def _load_metadata_from_array_paths(
+    fs, store_path: str, array_paths: Iterable[str]
+) -> dict[str, ZarrArrayMeta]:
+    """Load ``.zarray`` files for the user's explicit array paths.
+
+    Each path is normalized via :func:`zarr.util.normalize_storage_path`,
+    which strips surrounding slashes, collapses doubles, and rejects
+    ``.``/``..`` segments. Raises ``ValueError`` if a requested path has
+    no ``.zarray`` file at the expected location.
+    """
+    from zarr.util import normalize_storage_path
+
+    store_root = store_path.rstrip("/")
+    out: dict[str, ZarrArrayMeta] = {}
+    for raw in array_paths:
+        normalized = normalize_storage_path(raw)
+        zarray_path = (
+            f"{store_root}/{normalized}/.zarray"
+            if normalized
+            else f"{store_root}/.zarray"
+        )
+        try:
+            with fs.open(zarray_path, "r") as f:
+                raw_meta = json.load(f)
+        except FileNotFoundError as e:
+            raise ValueError(
+                f"Array path {raw!r} not found: no .zarray file at {zarray_path}"
+            ) from e
+        out[normalized] = ZarrArrayMeta.from_json(raw_meta, normalized)
+    return out
+
+
+def _load_metadata_full_scan(fs, store_path: str) -> dict[str, ZarrArrayMeta]:
+    """Recursively walk ``store_path`` for ``.zarray`` files.
+
+    Each discovered relative path is canonicalized via
+    :func:`zarr.util.normalize_storage_path` so the output keys match the
+    format used by the other metadata-loading paths regardless of whether
+    the underlying ``fs.walk`` yields trailing slashes.
+    """
+    from zarr.util import normalize_storage_path
+
+    store_root = store_path.rstrip("/")
+    store_prefix = store_root + "/"
+    out: dict[str, ZarrArrayMeta] = {}
+    for dirpath, _, filenames in fs.walk(store_path):
+        if ".zarray" not in filenames:
+            continue
+        dirpath = dirpath.rstrip("/")
+        if dirpath == store_root:
+            array_path = ""
+        else:
+            array_path = normalize_storage_path(dirpath.removeprefix(store_prefix))
+        zarray_path = f"{dirpath}/.zarray"
+        try:
+            with fs.open(zarray_path, "r") as f:
+                raw = json.load(f)
+        except FileNotFoundError:
+            continue
+        out[array_path] = ZarrArrayMeta.from_json(raw, array_path)
+    return out
+
+
+# ---------------------------------------------------------------------------
+# Chunk reading
+# ---------------------------------------------------------------------------
+
+
+def _read_chunk(
+    root: ZarrRoot,
+    array_name: str,
+    chunk_slices: tuple[tuple[int, int], ...],
+    *,
+    match: Optional[Sequence[str]] = None,
+    max_attempts: int = 10,
+    max_backoff_s: int = 32,
+) -> np.ndarray:
+    """Read ``array[chunk_slices]`` from a Zarr root with transient-error retry.
+
+    ``chunk_slices`` is an N-tuple of ``(start, stop)`` pairs, one per axis.
+    For a 0-D (scalar) array it is the empty tuple ``()``, which reads the
+    single element.
+
+    Retries are delegated to :func:`ray._common.retry.call_with_retry`,
+    matching the pattern used by other Ray Data datasources (lance,
+    iceberg). ``match`` defaults to ``DataContext.retried_io_errors``
+    (covers the AWS-flavored object-store transient errors) plus a small
+    set of zarr-specific network patterns. Pass an explicit ``match``
+    sequence to override.
+    """
+    indexer = tuple(slice(s, e) for s, e in chunk_slices)
+
+    def _read() -> np.ndarray:
+        arr = root if array_name == "" else root[array_name]
+        return arr[indexer]
+
+    if match is None:
+        match = list(DataContext.get_current().retried_io_errors) + list(
+            _ZARR_TRANSIENT_ERROR_PATTERNS
+        )
+    return call_with_retry(
+        _read,
+        description=f"read zarr chunk array={array_name!r} slices={chunk_slices}",
+        match=match,
+        max_attempts=max_attempts,
+        max_backoff_s=max_backoff_s,
+    )
+
+
+@dataclass(frozen=True)
+class _ChunkDescriptor:
+    """One long-form row's worth of read work: which chunk of which array."""
+
+    array_name: str
+    chunk_index: tuple[int, ...]
+    chunk_slices: tuple[tuple[int, int], ...]
+
+
+@dataclass(frozen=True)
+class _AlignedChunkDescriptor:
+    """One wide-row's worth of read work: a global axis-0 range across N aligned arrays.
+
+    The row "owns" the range ``[t_start, t_stop)`` and reports those as
+    columns. When ``overlap > 0``, the row's actual data extends to
+    ``t_stop_data`` (which is ``min(t_stop + overlap, shape[0])``); the
+    trailing slice is the lookahead from the next row's owned range so
+    sliding windows that start in this row's owned range can reach their
+    full tail without crossing a Ray Data row boundary.
+    """
+
+    chunk_index: int
+    t_start: int
+    t_stop: int
+    t_stop_data: int
+
+
+def _create_read_fn(
+    batch: list[_ChunkDescriptor],
+    root: ZarrRoot,
+) -> Callable[[], Iterable[pd.DataFrame]]:
+    """Build a read-task callable that materializes one DataFrame for one batch.
+
+    Each output row carries ``(array, chunk_index, chunk)``. ``chunk`` is
+    the data at its natural shape — possibly shorter than the nominal chunk
+    shape at trailing boundaries.
+
+    The caller is expected to pass batches whose chunks all come from one
+    array. Arrow's tensor extension requires all tensor elements in a
+    column to share rank, so mixing 4-D image chunks with 1-D label chunks
+    in one block would fail at conversion time.
+    :meth:`ZarrV2Datasource.get_read_tasks` enforces this by allocating one
+    batch per array.
+    """
+
+    def read_fn() -> Iterable[pd.DataFrame]:
+        yield pd.DataFrame(
+            {
+                "array": [d.array_name for d in batch],
+                "chunk_index": [d.chunk_index for d in batch],
+                "chunk_slices": [d.chunk_slices for d in batch],
+                "chunk": [
+                    _read_chunk(root, d.array_name, d.chunk_slices) for d in batch
+                ],
+            }
+        )
+
+    return read_fn
+
+
+def _create_aligned_read_fn(
+    batch: list[_AlignedChunkDescriptor],
+    aligned_array_names: list[str],
+    root: ZarrRoot,
+) -> Callable[[], Iterable[pd.DataFrame]]:
+    """Build a read-task callable for aligned (wide-row) reads.
+
+    Each output row carries ``t_start``, ``t_stop``, and one column per
+    aligned array holding that array's ``[t_start:t_stop, ...]`` slice at
+    its natural shape (edge rows may be shorter). All arrays in one row
+    share the same axis-0 range.
+    """
+
+    def read_fn() -> Iterable[pd.DataFrame]:
+        cols: dict[str, list] = {
+            "t_start": [d.t_start for d in batch],
+            "t_stop": [d.t_stop for d in batch],
+        }
+        for name in aligned_array_names:
+            cols[name] = [
+                _read_chunk(root, name, ((d.t_start, d.t_stop_data),)) for d in batch
+            ]
+        yield pd.DataFrame(cols)
+
+    return read_fn
+
+
+def _validate_chunk_shapes_dict(chunk_shapes: dict) -> dict[str, tuple[int, ...]]:
+    from zarr.util import normalize_storage_path
+
+    normalized_chunk_shapes: dict[str, tuple[int, ...]] = {}
+    original_keys_by_normalized: dict[str, str] = {}
+
+    for k, v in chunk_shapes.items():
+        if not isinstance(k, str):
+            raise ValueError(
+                "chunk_shapes dict keys must be array-path strings, "
+                f"got key {k!r} of type {type(k).__name__}"
+            )
+
+        if not isinstance(v, (tuple, list)) or not v:
+            raise ValueError(
+                f"chunk_shapes[{k!r}] must be non-empty sequence of "
+                f"positive integers (list or tuple), got {v!r}"
+            )
+        if any(isinstance(x, bool) or not isinstance(x, int) or x <= 0 for x in v):
+            raise ValueError(
+                f"chunk_shapes[{k!r}] must be a non-empty sequence of "
+                f"positive integers (list or tuple), got {v!r}"
+            )
+
+        normalized_key = normalize_storage_path(k)
+
+        if normalized_key in original_keys_by_normalized:
+            prev_key = original_keys_by_normalized[normalized_key]
+            raise ValueError(
+                "chunk_shapes contains duplicate array paths after normalization: "
+                f"{prev_key!r} and {k!r} both normalize to {normalized_key!r}"
+            )
+
+        original_keys_by_normalized[normalized_key] = k
+        normalized_chunk_shapes[normalized_key] = tuple(v)
+    return normalized_chunk_shapes
+
+
+# ---------------------------------------------------------------------------
+# Datasource
+# ---------------------------------------------------------------------------
+
+
+class ZarrV2Datasource(Datasource):
+    """Reads one or more Zarr v2 arrays into a Ray Data ``Dataset``.
+
+    Two output schemas, selected at the call site via ``align_axis_0``:
+
+    Long-form (default, ``align_axis_0=False``) — one row per chunk per
+    array. Columns:
+
+    * ``array``: the source array's path within the store
+      (e.g., ``"data/camera0_rgb"``, or ``""`` for a root-level array).
+    * ``chunk_index``: the N-D position of this chunk in the array's chunk
+      grid, as a tuple of ints.
+    * ``chunk_slices``: per-axis ``(start, stop)`` of this chunk in the
+      source array's coordinate space.
+    * ``chunk``: the chunk's data as an ``ndarray`` at its natural shape
+      (possibly shorter at trailing boundaries — no padding).
+
+    Arrays in the same call need not share any dimension; they coexist as
+    separate rows distinguished by ``array``.
+
+    Wide-form (opt-in, ``align_axis_0=True``) — one row per axis-0
+    chunk, with one column per selected array. Columns:
+
+    * ``t_start`` / ``t_stop``: global axis-0 range of this row.
+    * ``<array_name>``: that array's ``[t_start:t_stop, ...]`` slice
+      (one column per selected array).
+
+    All selected arrays must share ``shape[0]`` and must end up with the
+    same axis-0 chunk size after :paramref:`chunk_shapes` resolution; if
+    they don't, ``__init__`` raises ``ValueError`` with a hint pointing at
+    the largest aligned subset. Use :paramref:`array_paths` to pick which
+    arrays to read — ``align_axis_0`` itself does not filter.
+
+    See :func:`ray.data.read_zarr` for the public API.
+    """
+
+    def __init__(
+        self,
+        path: str,
+        filesystem: pyarrow.fs.FileSystem | AbstractFileSystem | None = None,
+        chunk_shapes: dict[str, list] | list | None = None,
+        array_paths: list[str] | None = None,
+        allow_full_metadata_scan: bool = False,
+        align_axis_0: bool = False,
+        overlap: int = 0,
+    ) -> None:
+        super().__init__()
+        _check_import(self, module="zarr", package="zarr")
+
+        self.allow_full_metadata_scan = allow_full_metadata_scan
+        self.paths = [str(path)]
+
+        # Resolve filesystem + store path. The order of precedence:
+        #   1. Explicit ``filesystem=`` always wins.
+        #   2. ``.zip`` URL/path: auto-wrap with fsspec's ZipFileSystem.
+        #   3. Otherwise delegate to Ray Data's standard URL to filesystem
+        #      helper (the same one every other ``read_*`` API uses).
+        if filesystem is None and self.paths[0].endswith(".zip"):
+            import fsspec
+
+            self._fs = fsspec.filesystem("zip", fo=self.paths[0])
+            self._store_path = ""
+        elif filesystem is None:
+            from fsspec.implementations.arrow import ArrowFSWrapper
+
+            from ray.data.datasource.path_util import (
+                _resolve_paths_and_filesystem,
+            )
+
+            resolved_paths, pa_fs = _resolve_paths_and_filesystem([self.paths[0]])
+            self._fs = ArrowFSWrapper(pa_fs)
+            self._store_path = resolved_paths[0].rstrip("/")
+        else:
+            from pyarrow.fs import FileSystem
+
+            if isinstance(filesystem, AbstractFileSystem):
+                self._fs = filesystem
+            elif isinstance(filesystem, FileSystem):
+                from fsspec.implementations.arrow import ArrowFSWrapper
+
+                self._fs = ArrowFSWrapper(filesystem)
+            else:
+                raise TypeError(
+                    f"filesystem must be pyarrow.fs.FileSystem or "
+                    f"fsspec.spec.AbstractFileSystem, got "
+                    f"{type(filesystem).__name__}"
+                )
+            self._store_path = self.paths[0].rstrip("/")
+
+        if chunk_shapes is not None and not isinstance(
+            chunk_shapes, (tuple, list, dict)
+        ):
+            raise ValueError(
+                f"chunk_shapes must be a non-empty sequence of positive "
+                f"integers (list or tuple), or a dict, got {chunk_shapes!r}"
+            )
+
+        self.chunk_shapes: tuple[int, ...] | dict[str, tuple[int, ...]] | None = None
+        if chunk_shapes is not None:
+            if isinstance(chunk_shapes, dict):
+                self.chunk_shapes = _validate_chunk_shapes_dict(chunk_shapes)
+            else:
+                if not chunk_shapes or any(
+                    isinstance(x, bool) or not isinstance(x, int) or x <= 0
+                    for x in chunk_shapes
+                ):
+                    raise ValueError(
+                        "chunk_shapes must be a non-empty sequence of positive integers "
+                        f"(list or tuple), got {chunk_shapes!r}"
+                    )
+
+                self.chunk_shapes = tuple(chunk_shapes)
+
+        self._metadata_by_path = self._load_metadata(array_paths)
+        if not self._metadata_by_path:
+            raise ValueError(
+                f"No arrays discovered in Zarr store at {self.paths[0]!r}."
+            )
+
+        # Reject per-array overrides that do not correspond to any selected
+        # array in this read.
+        if isinstance(self.chunk_shapes, dict):
+            unknown_chunk_shape_keys = sorted(
+                set(self.chunk_shapes) - set(self._metadata_by_path)
+            )
+            if unknown_chunk_shape_keys:
+                raise ValueError(
+                    f"Unknown array path(s) in chunk_shapes: {unknown_chunk_shape_keys}"
+                )
+
+        if align_axis_0 is False:
+            self._aligned_array_names: list[str] | None = None
+        elif align_axis_0 is True:
+            shape0_by_array = {
+                name: meta.shape[0] if meta.shape else 0
+                for name, meta in self._metadata_by_path.items()
+            }
+            if len(set(shape0_by_array.values())) > 1:
+                raise ValueError(
+                    f"All selected arrays must share shape[0] when "
+                    f"align_axis_0=True. Got: {shape0_by_array}. Pass a "
+                    f"shape-compatible subset via array_paths=[...]."
+                )
+            self._aligned_array_names = list(self._metadata_by_path.keys())
+        else:
+            raise TypeError(
+                f"align_axis_0 must be a bool, got " f"{type(align_axis_0).__name__}"
+            )
+
+        # Validate overlap. Only meaningful when arrays are co-iterated as
+        # wide rows, since the trailing lookahead is exposed via the
+        # per-array column being longer than ``t_stop - t_start``.
+        if isinstance(overlap, bool) or not isinstance(overlap, int) or overlap < 0:
+            raise ValueError(f"overlap must be a non-negative integer, got {overlap!r}")
+        if overlap and self._aligned_array_names is None:
+            raise ValueError(
+                "overlap requires align_axis_0=True. In the default long-form "
+                "(chunk-per-row) mode, there's no wide row to extend forward — "
+                "the ``chunk_slices`` column on each chunk row already exposes "
+                "the global axis-0 range."
+            )
+        self.overlap = overlap
+
+        # Resolve per-array chunk geometry. ``effective_chunks`` raises a
+        # ``ValueError`` if a shared ``chunk_shapes`` prefix or any per-array
+        # ``chunk_shapes`` override is longer than the target array's rank —
+        # so this loop is also where rank validation happens.
+        self._array_chunks: dict[str, tuple[int, ...]] = {}
+        self._array_grids: dict[str, tuple[int, ...]] = {}
+        for name, meta in self._metadata_by_path.items():
+            chunks = meta.effective_chunks(name, self.chunk_shapes)
+            self._array_chunks[name] = chunks
+            self._array_grids[name] = meta.grid_shape(chunks)
+
+        # If aligned, all listed arrays must share the same axis-0 chunk size
+        # so each wide row corresponds to one axis-0 step across every array.
+        if self._aligned_array_names is not None:
+            axis_0_chunks = {
+                name: self._array_chunks[name][0] for name in self._aligned_array_names
+            }
+            unique = set(axis_0_chunks.values())
+            if len(unique) > 1:
+                raise ValueError(
+                    f"Aligned arrays must share the same axis-0 chunk size. "
+                    f"Got: {axis_0_chunks}. Pass chunk_shapes=[N] (or a "
+                    f"per-array chunk_shapes dict that resolves all aligned "
+                    f"arrays to the same axis-0 prefix) to re-tile them."
+                )
+
+        # Lazy zarr import: ``zarr`` is a hard dep of this datasource (gated
+        # by ``_check_import`` above) but ``import ray.data`` shouldn't drag
+        # it in for users who never call ``read_zarr``.
+        import zarr
+
+        self.root = zarr.open(self._fs.get_mapper(self._store_path), mode="r")
+
+    def estimate_inmemory_data_size(self) -> Optional[int]:
+        """Total bytes = sum over selected arrays of ``prod(shape) * itemsize``."""
+        return sum(
+            math.prod(meta.shape) * meta.itemsize
+            for meta in self._metadata_by_path.values()
+        )
+
+    def get_read_tasks(
+        self,
+        parallelism: int,
+        per_task_row_limit: Optional[int] = None,
+        data_context: Optional["DataContext"] = None,
+    ) -> List[ReadTask]:
+        """Enumerate every chunk and wrap it (or batches of chunks) in ReadTasks.
+
+        Long-form mode (default): one task per per-array chunk batch.
+        Per-array batching keeps each block's ``chunk`` column rank-uniform
+        (Arrow's tensor extension requires this). ``parallelism`` is
+        treated as a per-array budget — each array's chunks are split into
+        ``min(parallelism, n_chunks_for_array)`` tasks.
+
+        Aligned mode (``align_axis_0=True``): one task per batch of
+        aligned axis-0 chunks. Each yielded row carries ``t_start``,
+        ``t_stop``, and one column per selected array containing that
+        array's slice for the row's axis-0 range.
+        """
+        # ``data_context`` is part of the Datasource ABC; this datasource
+        # doesn't read anything off it today (no context-aware behavior).
+        # Threaded through to the helpers so they keep the same signature
+        # in case a future change needs it.
+        if self._aligned_array_names is not None:
+            return self._get_aligned_read_tasks(
+                parallelism, per_task_row_limit, data_context
+            )
+        return self._get_long_form_read_tasks(
+            parallelism, per_task_row_limit, data_context
+        )
+
+    def _get_long_form_read_tasks(
+        self,
+        parallelism: int,
+        per_task_row_limit: Optional[int] = None,
+        data_context: Optional["DataContext"] = None,
+    ) -> List[ReadTask]:
+        """Long-form read tasks. See :meth:`get_read_tasks` for semantics."""
+        read_tasks: List[ReadTask] = []
+        for name, meta in self._metadata_by_path.items():
+            chunks = self._array_chunks[name]
+            grid = self._array_grids[name]
+            descriptors = [
+                _ChunkDescriptor(
+                    array_name=name,
+                    chunk_index=chunk_index,
+                    chunk_slices=meta.chunk_slices(chunk_index, chunks),
+                )
+                for chunk_index in product(*(range(n) for n in grid))
+            ]
+            if not descriptors:
+                continue
+            n_tasks = min(parallelism, len(descriptors))
+            batch_size = math.ceil(len(descriptors) / n_tasks)
+            for start in range(0, len(descriptors), batch_size):
+                batch = descriptors[start : start + batch_size]
+                read_tasks.append(
+                    ReadTask(
+                        _create_read_fn(batch, self.root),
+                        BlockMetadata(
+                            num_rows=len(batch),
+                            size_bytes=self._estimate_long_form_batch_mem_size(batch),
+                            input_files=(self.paths[0],),
+                            exec_stats=None,
+                        ),
+                        per_task_row_limit=per_task_row_limit,
+                    )
+                )
+        return read_tasks
+
+    def _estimate_long_form_batch_mem_size(self, batch: list[_ChunkDescriptor]) -> int:
+        """Sum in-memory bytes across all chunks in one long-form batch."""
+        return sum(
+            math.prod(stop - start for start, stop in desc.chunk_slices)
+            * self._metadata_by_path[desc.array_name].itemsize
+            for desc in batch
+        )
+
+    def _get_aligned_read_tasks(
+        self,
+        parallelism: int,
+        per_task_row_limit: Optional[int] = None,
+        data_context: Optional["DataContext"] = None,
+    ) -> List[ReadTask]:
+        """Aligned read tasks. See :meth:`get_read_tasks` for semantics."""
+        assert self._aligned_array_names is not None
+        # All aligned arrays share the same axis-0 chunk size (validated in
+        # ``__init__``) and the same shape[0]. Read the geometry off the first.
+        first_name = self._aligned_array_names[0]
+        axis_0_chunk = self._array_chunks[first_name][0]
+        shape0 = self._metadata_by_path[first_name].shape[0]
+
+        descriptors = [
+            _AlignedChunkDescriptor(
+                chunk_index=i,
+                t_start=i * axis_0_chunk,
+                t_stop=min((i + 1) * axis_0_chunk, shape0),
+                t_stop_data=min((i + 1) * axis_0_chunk + self.overlap, shape0),
+            )
+            for i in range(math.ceil(shape0 / axis_0_chunk))
+        ]
+        if not descriptors:
+            return []
+
+        n_tasks = min(parallelism, len(descriptors))
+        batch_size = math.ceil(len(descriptors) / n_tasks)
+
+        read_tasks: List[ReadTask] = []
+        for start in range(0, len(descriptors), batch_size):
+            batch = descriptors[start : start + batch_size]
+            read_tasks.append(
+                ReadTask(
+                    _create_aligned_read_fn(
+                        batch, self._aligned_array_names, self.root
+                    ),
+                    BlockMetadata(
+                        num_rows=len(batch),
+                        size_bytes=self._estimate_aligned_batch_mem_size(batch),
+                        input_files=(self.paths[0],),
+                        exec_stats=None,
+                    ),
+                    per_task_row_limit=per_task_row_limit,
+                )
+            )
+        return read_tasks
+
+    def _estimate_aligned_batch_mem_size(
+        self, batch: list[_AlignedChunkDescriptor]
+    ) -> int:
+        """Sum bytes across all (row, aligned-array) pairs in a wide-row batch.
+
+        Accounts for the trailing overlap data each row carries: the row's
+        per-array slice covers ``[t_start, t_stop_data)``, not just
+        ``[t_start, t_stop)``.
+        """
+        assert self._aligned_array_names is not None
+        return sum(
+            (desc.t_stop_data - desc.t_start)
+            * (math.prod(meta.shape[1:]) if len(meta.shape) > 1 else 1)
+            * meta.itemsize
+            for desc in batch
+            for meta in (
+                self._metadata_by_path[name] for name in self._aligned_array_names
+            )
+        )
+
+    def _load_metadata(self, array_paths) -> dict[str, ZarrArrayMeta]:
+        """Discover and load ``.zarray`` metadata for the selected arrays.
+
+        Discovery prefers consolidated ``.zmetadata`` when it exists. If the
+        store has no ``.zmetadata``, the datasource falls back to reading each
+        requested array's ``.zarray`` directly (when ``array_paths`` is given)
+        or to a recursive scan (when ``allow_full_metadata_scan`` is set).
+        If ``array_paths`` is given, the discovered set is filtered down to it;
+        any requested paths that aren't present in the store raise a
+        ``ValueError`` listing what is available.
+        """
+        fs, store_path = self._fs, self._store_path
+
+        z_meta_path = f"{store_path.rstrip('/')}/.zmetadata"
+        if fs.exists(z_meta_path):
+            logger.debug("Loading .zmetadata file")
+            all_arrays = _load_metadata_from_zmetadata_file(fs, z_meta_path)
+        elif array_paths:
+            logger.debug("No .zmetadata; reading requested .zarray files directly")
+            all_arrays = _load_metadata_from_array_paths(fs, store_path, array_paths)
+        elif self.allow_full_metadata_scan:
+            logger.info(
+                "No array_paths provided and no .zmetadata found; "
+                "executing full scan of Zarr store metadata"
+            )
+            all_arrays = _load_metadata_full_scan(fs, store_path)
+            if not all_arrays:
+                # ``fs.walk`` silently returns nothing on filesystems without
+                # directory-listing support (most commonly plain HTTP/HTTPS).
+                # That's distinct from "store exists but has no arrays", so
+                # surface the likely cause.
+                raise ValueError(
+                    f"Full-store scan of {self.paths[0]!r} found no .zarray "
+                    "files. This can occur if the filesystem does not "
+                    "support recursive directory listing (e.g., plain "
+                    "HTTP/HTTPS without an object-store listing API). Pass "
+                    "array_paths=[...] with explicit array names to read "
+                    "from this kind of store."
+                )
+        else:
+            raise ValueError(
+                "No array_paths were provided and this Zarr store does not "
+                "contain .zmetadata. Pass array_paths=[...] or set "
+                "allow_full_metadata_scan=True."
+            )
+
+        if array_paths:
+            from zarr.util import normalize_storage_path
+
+            requested = {normalize_storage_path(p) for p in array_paths}
+
+            missing = sorted(requested - all_arrays.keys())
+            if missing:
+                raise ValueError(
+                    f"Array(s) not found: {', '.join(repr(m) for m in missing)}. "
+                    f"Available: {', '.join(repr(a) for a in sorted(all_arrays))}"
+                )
+            all_arrays = {k: v for k, v in all_arrays.items() if k in requested}
+
+        return all_arrays
diff --git a/python/ray/data/read_api.py b/python/ray/data/read_api.py
index 6a80aad43310..461343798611 100644
--- a/python/ray/data/read_api.py
+++ b/python/ray/data/read_api.py
@@ -28,6 +28,7 @@
 from ray.data._internal.datasource.bigquery_datasource import BigQueryDatasource
 from ray.data._internal.datasource.binary_datasource import BinaryDatasource
 from ray.data._internal.datasource.clickhouse_datasource import ClickHouseDatasource
+from ray.data._internal.datasource.zarrv2_datasource import ZarrV2Datasource
 from ray.data._internal.datasource.csv_datasource import CSVDatasource
 from ray.data._internal.datasource.databricks_credentials import (
     DatabricksCredentialProvider,
@@ -923,6 +924,251 @@ def read_videos(
         concurrency=concurrency,
         override_num_blocks=override_num_blocks,
     )
+    
+@PublicAPI(stability="alpha")
+def read_zarr(
+    path: str,
+    filesystem: "pyarrow.fs.FileSystem | fsspec.spec.AbstractFileSystem | None" = None,
+    chunk_shapes: dict[str, list] | list | None = None,
+    array_paths: list[str] | None = None,
+    allow_full_metadata_scan: bool = False,
+    align_axis_0: bool = False,
+    overlap: int = 0,
+    *,
+    concurrency: Optional[int] = None,
+    override_num_blocks: Optional[int] = None,
+    num_cpus: Optional[float] = None,
+    num_gpus: Optional[float] = None,
+    memory: Optional[float] = None,
+    ray_remote_args: Optional[Dict[str, Any]] = None,
+):
+    """Creates a :class:`~ray.data.Dataset` from a Zarr v2 store.
+
+    Two output schemas, selected by ``align_axis_0``:
+
+    Default (long-form, ``align_axis_0=False``) — one row per chunk of
+    one array. Columns:
+
+    * ``array``: the source array's path (e.g., ``"data/camera0_rgb"``, or
+      ``""`` for a root-level array).
+    * ``chunk_index``: the N-D index of this chunk in its array's chunk grid.
+    * ``chunk_slices``: per-axis ``(start, stop)`` of this chunk in the
+      source array's coordinate space — useful for mapping a chunk back
+      to its global position without recomputing from the chunk shape.
+    * ``chunk``: the chunk's data at its natural shape
+      (possibly shorter at trailing boundaries — no padding is applied).
+
+    Arrays read in the same call need not share any dimension. Different
+    ranks, shapes, dtypes, and native chunk sizes coexist as separate rows.
+
+    Aligned (wide-form, ``align_axis_0=True``) — one row per axis-0
+    chunk, with one column per selected array. Columns:
+
+    * ``t_start``, ``t_stop``: global axis-0 range of this row.
+    * ``<array_name>``: that array's ``[t_start:t_stop, ...]`` slice as
+      one column per selected array.
+
+    All selected arrays must share ``shape[0]`` and must end up with the
+    same axis-0 chunk size after ``chunk_shapes`` resolution; if they
+    don't, ``read_zarr`` raises ``ValueError`` with a hint pointing at the
+    largest aligned subset. Use ``array_paths`` to pick which arrays to
+    read — ``align_axis_0`` itself does not filter.
+
+    Which schema do I want? Stay on the default (long-form) when
+    reading one array, or when the arrays in the store don't all share
+    ``shape[0]`` (e.g., CMIP6 data variables alongside lat/lon coords,
+    anndata's ``X`` alongside ``var/*``, or OME-Zarr image+label arrays at
+    different resolutions). Switch to ``align_axis_0=True`` when you want
+    paired multi-array rows where each row is one "sample" or "timestep"
+    of every array at once — the canonical cases are supervised ML data
+    (paired ``images`` + ``labels``) and robotics imitation learning
+    (paired ``image`` + ``state`` + ``action`` at each timestep).
+
+    Metadata discovery follows these rules:
+
+    * If the store contains ``.zmetadata``, the datasource reads it and treats
+      it as the canonical list of arrays. If ``array_paths`` is provided, the
+      discovered set is filtered down to those paths.
+    * Otherwise, if ``array_paths`` is provided, the datasource reads each
+      requested array's ``.zarray`` file directly. The store doesn't need a
+      ``.zmetadata`` in this case.
+    * Otherwise, if ``allow_full_metadata_scan=True``, the datasource
+      recursively scans the store for ``.zarray`` files. This can be slow or
+      expensive for large remote stores, so it's disabled by default.
+      Before setting ``allow_full_metadata_scan=True``, consider consolidating
+      metdata with ``zarr.consolidate_metadata``.
+    * Otherwise, the datasource raises a :class:`ValueError`.
+
+    Each array's ``.zarray`` metadata must include the keys ``"shape"``,
+    ``"chunks"``, and ``"dtype"``. Reads fail if any discovered array metadata
+    is missing one or more of these required fields.
+
+    ``filesystem`` accepts either a :class:`pyarrow.fs.FileSystem` (as the rest
+    of Ray Data does) or an :class:`fsspec.spec.AbstractFileSystem` (as Zarr's
+    own ecosystem does). pyarrow filesystems are wrapped internally into fsspec
+    via :class:`fsspec.implementations.arrow.ArrowFSWrapper` because Zarr's
+    storage layer requires fsspec. For non-local stores, passing an explicit
+    filesystem is recommended so authentication and backend settings are
+    explicit. If ``filesystem`` is omitted, the datasource infers it from
+    ``path``.
+
+    Examples:
+        Read every array in a store with each array's native chunking
+        (long-form, 4 ``images`` chunks + 1 ``labels`` chunk).
+
+        >>> import ray
+        >>> ds = ray.data.read_zarr(  # doctest: +SKIP
+        ...     "s3://anonymous@ray-example-data/mnist-tiny.zarr",
+        ... )
+        >>> ds.count()  # doctest: +SKIP
+        5
+
+        Aligned read: paired ``(images, labels)`` per row. ``align_axis_0``
+        validates that all selected arrays share ``shape[0]``.
+
+        >>> ds = ray.data.read_zarr(  # doctest: +SKIP
+        ...     "s3://anonymous@ray-example-data/mnist-tiny.zarr",
+        ...     align_axis_0=True,
+        ...     chunk_shapes=[50],
+        ... )
+        >>> ds.count()  # doctest: +SKIP
+        4
+
+        Per-array overrides: retile only selected arrays while leaving
+        others at their native chunking.
+
+        >>> ds = ray.data.read_zarr(  # doctest: +SKIP
+        ...     "s3://anonymous@ray-example-data/mnist-tiny.zarr",
+        ...     chunk_shapes={"images": [50], "labels": [50]},
+        ... )
+
+    Custom codecs:
+        Zarr stores compressed with non-stdlib codecs (e.g.,
+        ``imagecodecs_jpegxl`` for UMI camera arrays) require the codec
+        package to be imported and registered in every Ray worker, not
+        just the driver. Use ``ray.init`` with a worker setup hook::
+
+            ray.init(runtime_env={"worker_process_setup_hook": (
+                "import imagecodecs.numcodecs; "
+                "imagecodecs.numcodecs.register_codecs()"
+            )})
+
+        Driver-side ``.zmetadata`` parsing succeeds without this, but chunk
+        decode in workers will fail with a ``numcodecs`` registry lookup
+        error.
+
+    Anonymous cloud buckets:
+        S3 anonymous reads use the standard URL convention
+        ``s3://anonymous@<bucket>/<key>``. GCS does not have this idiom;
+        instead, pass ``filesystem=pyarrow.fs.GcsFileSystem(anonymous=True)``
+        explicitly.
+
+    Array attributes (``.zattrs``):
+        ``read_zarr`` does not surface each array's ``.zattrs`` (the
+        user-attribute store from the Zarr v2 spec) in the row schema —
+        attrs are invariant per array, so duplicating them on every row
+        would just bloat the output. Read them once (for example with the
+        ``zarr`` python package) if you require them in your job.
+
+    Args:
+        path: Path to the Zarr v2 store.
+        filesystem: Optional preconfigured filesystem. Accepts either a
+            :class:`pyarrow.fs.FileSystem` or an :class:`fsspec.spec.AbstractFileSystem`.
+            pyarrow filesystems are wrapped internally with
+            :class:`fsspec.implementations.arrow.ArrowFSWrapper` because
+            Zarr's storage layer requires fsspec. Use this for private
+            buckets, custom credentials, anonymous/public cloud access, or
+            any storage backend configuration that shouldn't be inferred
+            internally. Recommended for non-local Zarr stores; for local
+            paths it's usually fine to omit. If omitted, the datasource
+            infers the filesystem from ``path``.
+        chunk_shapes: Optional override(s) for chunk geometry along the
+            leading axes. Accepts either:
+
+            * A sequence of positive integers (list or tuple), applied as
+              a shared prefix to every selected array, overriding the
+              leading axes and keeping trailing axes at each array's
+              native chunking.
+            * A dict mapping array paths to per-array prefix overrides,
+              for cases where only some arrays should be re-tiled or
+              different arrays should use different leading-axis chunks.
+              Arrays omitted from the dict keep their native chunks.
+
+            ``chunk_shapes=[16]`` re-tiles a 4-D array with native chunks
+            ``(1, 224, 224, 3)`` into ``(16, 224, 224, 3)`` and a 1-D
+            array with native chunks ``(50,)`` into ``(16,)``.
+            ``chunk_shapes={"images": [16], "labels": [64]}`` applies
+            different axis-0 overrides to different arrays in the same
+            read.
+
+            A shared list/tuple override may not be longer than the
+            smallest selected array's rank. Each per-array dict override
+            may not be longer than its target array's rank. If ``None``
+            (the default), every array keeps its native chunks.
+        array_paths: Optional list of array paths within the Zarr store to
+            read. If unspecified, all arrays discovered in the store are
+            included.
+        allow_full_metadata_scan: If ``True``, recursively scan the store for
+            ``.zarray`` files when ``array_paths`` is unspecified and
+            ``.zmetadata`` is missing. This may be slow or expensive for large
+            remote stores, so it is disabled by default.
+        align_axis_0: Opt-in switch to the wide-form schema. Pass ``True``
+            to emit one row per axis-0 chunk with one column per selected
+            array, plus ``t_start`` and ``t_stop`` columns naming the
+            global axis-0 range. All selected arrays must share
+            ``shape[0]`` and must end up with the same effective axis-0
+            chunk size after ``chunk_shapes`` resolution. The
+            default (``False``) uses the long-form chunk-per-row schema.
+        overlap: When set with ``align_axis_0``, extends each row's per-array
+            data forward by ``overlap`` timesteps from the next row's owned
+            range (clipped at the end of the store). Used for sliding-window
+            pipelines: with ``overlap=K-1``, any window of length ``K``
+            starting in this row's owned ``[t_start, t_stop)`` fits
+            entirely within the row's per-array slice, so a downstream
+            ``flat_map`` doesn't need cross-row state. The row's ownership
+            (the ``t_start``/``t_stop`` columns) is unchanged; only
+            ``chunk.shape[0]`` of each per-array column grows by up to
+            ``overlap``. Requires ``align_axis_0=True``. Defaults to ``0`` —
+            no overlap, each row's data exactly covers its owned range.
+        concurrency: The maximum number of Ray tasks to run concurrently. Set this
+            to control number of tasks to run concurrently. This doesn't change the
+            total number of tasks run or the total number of output blocks. By default,
+            concurrency is dynamically decided based on the available resources.
+        override_num_blocks: Override the number of output blocks from all read tasks.
+            By default, the number of output blocks is dynamically decided based on
+            input data size and available resources. You shouldn't manually set this
+            value in most cases.
+        num_cpus: The number of CPUs to reserve for each parallel read worker.
+        num_gpus: The number of GPUs to reserve for each parallel read worker. For
+            example, specify `num_gpus=1` to request 1 GPU for each parallel read
+            worker.
+        memory: The heap memory in bytes to reserve for each parallel read worker.
+        ray_remote_args: kwargs passed to :meth:`~ray.remote` in the read tasks.
+
+    Returns:
+        A :class:`~ray.data.Dataset` of long-form chunk rows by default
+        (``array``, ``chunk_index``, ``chunk_slices``, ``chunk``), or
+        wide-form aligned rows (``t_start``, ``t_stop``, plus one column
+        per aligned array) when ``align_axis_0`` is set.
+    """
+    datasource = ZarrV2Datasource(
+        path=path,
+        filesystem=filesystem,
+        chunk_shapes=chunk_shapes,
+        array_paths=array_paths,
+        allow_full_metadata_scan=allow_full_metadata_scan,
+        align_axis_0=align_axis_0,
+        overlap=overlap,
+    )
+    return read_datasource(
+        datasource,
+        ray_remote_args=ray_remote_args,
+        num_cpus=num_cpus,
+        num_gpus=num_gpus,
+        memory=memory,
+        concurrency=concurrency,
+        override_num_blocks=override_num_blocks,
+    )
 
 
 @PublicAPI(stability="alpha")
diff --git a/python/ray/data/tests/datasource/test_zarrv2.py b/python/ray/data/tests/datasource/test_zarrv2.py
new file mode 100644
index 000000000000..908450c728d4
--- /dev/null
+++ b/python/ray/data/tests/datasource/test_zarrv2.py
@@ -0,0 +1,1007 @@
+import json
+import os
+from pathlib import Path
+
+import fsspec
+import numpy as np
+import pandas as pd
+import pyarrow.fs
+import pytest
+import zarr
+from pytest_lazy_fixtures import lf as lazy_fixture
+
+import ray
+from ray.data._internal.datasource import zarrv2_datasource
+from ray.data.tests.conftest import *  # noqa: F401, F403
+
+
+def _execute_read_tasks(tasks) -> pd.DataFrame:
+    frames = [block for task in tasks for block in task()]
+    return pd.concat(frames, ignore_index=True)
+
+
+def _reconstruct_array(df: pd.DataFrame, array_name: str) -> np.ndarray:
+    """Concatenate all chunks of one array from a long-form result frame.
+
+    Assumes the array is 1-D along its chunked axis 0. For tests with
+    higher-dim arrays, use ``_reconstruct_nd`` (which orders chunks by
+    ``chunk_index`` and concatenates axis 0 first).
+    """
+    sub = df[df["array"] == array_name].sort_values("chunk_index")
+    return np.concatenate(list(sub["chunk"]), axis=0)
+
+
+def _write_real_zarr_store(
+    store_path: Path,
+    arrays: dict,  # {name: (data, chunks)}
+) -> Path:
+    """Write a real Zarr v2 store from numpy arrays and consolidate metadata."""
+    root = zarr.open_group(str(store_path), mode="w")
+    for name, (data, chunks) in arrays.items():
+        root.create_dataset(name, data=data, chunks=chunks, dtype=data.dtype)
+    zarr.consolidate_metadata(str(store_path))
+    return store_path
+
+
+@pytest.fixture
+def zarrv2_group_store(tmp_path) -> Path:
+    """Two arrays at the store root, both 2-D and 1-D, axis-0-aligned (shape[0]==5)."""
+    return _write_real_zarr_store(
+        tmp_path / "group.zarr",
+        {
+            "images": (np.arange(20, dtype="<i4").reshape(5, 4), (2, 4)),
+            "nested": (np.arange(5, dtype="|u1"), (2,)),
+        },
+    )
+
+
+@pytest.fixture
+def zarrv2_root_store(tmp_path) -> Path:
+    """Single-array store with the array sitting directly at the store root."""
+    store_path = tmp_path / "root.zarr"
+    arr = zarr.open(
+        str(store_path),
+        mode="w",
+        shape=(5, 4),
+        chunks=(2, 4),
+        dtype="<i4",
+    )
+    arr[:] = np.arange(20, dtype="<i4").reshape(5, 4)
+    zarr.consolidate_metadata(str(store_path))
+    return store_path
+
+
+@pytest.fixture
+def local_fsspec_fs():
+    """fsspec local filesystem (for parametrized cross-fs read tests)."""
+    return fsspec.filesystem("file")
+
+
+@pytest.fixture
+def heterogeneous_zarrv2_store(tmp_path) -> Path:
+    """A store mixing different ranks, shape[0]s, dtypes, and native chunk sizes.
+
+    Mirrors the UMI-style real-world layout where ``data/*`` arrays share an
+    axis-0 timestep count but differ in everything else, and ``meta/*``
+    arrays live in a separate axis-0 universe entirely. The chunk-per-row
+    datasource handles all of these in one read; nothing has to align.
+    """
+    store_path = tmp_path / "heterogeneous.zarr"
+    root = zarr.open_group(str(store_path), mode="w")
+    # 4-D image tensor with tiny axis-0 chunks (1 image per chunk).
+    root.create_dataset(
+        "data/camera0_rgb",
+        data=np.arange(20 * 2 * 2 * 3, dtype="|u1").reshape(20, 2, 2, 3),
+        chunks=(1, 2, 2, 3),
+    )
+    # 2-D pose array, same shape[0]=20, much larger axis-0 chunks (10).
+    root.create_dataset(
+        "data/robot0_eef_pos",
+        data=np.arange(20 * 3, dtype="<f4").reshape(20, 3),
+        chunks=(10, 3),
+    )
+    # Episode-boundary metadata: separate axis-0 universe.
+    root.create_dataset(
+        "meta/episode_ends",
+        data=np.array([5, 12, 20], dtype="<i8"),
+        chunks=(3,),
+    )
+    zarr.consolidate_metadata(str(store_path))
+    return store_path
+
+
+@pytest.fixture
+def unconsolidated_zarrv2_store(tmp_path) -> Path:
+    """Two arrays at the store root, no ``.zmetadata``.
+
+    Exercises the no-``.zmetadata`` code paths (per-array ``.zarray``
+    discovery and full-store walk) — the common shape of real-world stores
+    behind plain HTTPS or other listing-less filesystems.
+    """
+    store_path = tmp_path / "unconsolidated.zarr"
+    root = zarr.open_group(str(store_path), mode="w")
+    root.create_dataset(
+        "images", data=np.arange(20, dtype="<i4").reshape(5, 4), chunks=(2, 4)
+    )
+    root.create_dataset("nested", data=np.arange(5, dtype="|u1"), chunks=(2,))
+    return store_path
+
+
+@pytest.fixture
+def aligned_zarrv2_store(tmp_path) -> Path:
+    """Three arrays sharing ``shape[0]=8``, different ranks and native chunks.
+
+    Models the UMI-style case where data arrays co-stride on the timestep
+    axis but differ in everything else.
+    """
+    store_path = tmp_path / "aligned.zarr"
+    root = zarr.open_group(str(store_path), mode="w")
+    root.create_dataset(
+        "img",
+        data=np.arange(8 * 4 * 4 * 3, dtype="|u1").reshape(8, 4, 4, 3),
+        chunks=(2, 4, 4, 3),
+    )
+    root.create_dataset(
+        "state",
+        data=np.arange(8 * 3, dtype="<f4").reshape(8, 3),
+        chunks=(4, 3),  # different native axis-0 chunks than img
+    )
+    root.create_dataset(
+        "label",
+        data=np.arange(8, dtype="<i8"),
+        chunks=(8,),
+    )
+    zarr.consolidate_metadata(str(store_path))
+    return store_path
+
+
+@pytest.fixture
+def zarr_zip_store(tmp_path) -> Path:
+    """A small Zarr store packed into a ``.zip`` for URL-detection tests."""
+    src = tmp_path / "src.zarr"
+    _write_real_zarr_store(
+        src,
+        {
+            "data": (np.arange(12, dtype="<i4").reshape(6, 2), (3, 2)),
+        },
+    )
+    zip_path = tmp_path / "store.zarr.zip"
+    import shutil
+
+    shutil.make_archive(
+        base_name=str(tmp_path / "store.zarr"),
+        format="zip",
+        root_dir=str(src),
+    )
+    assert zip_path.exists()
+    return zip_path
+
+
+# ---------------------------------------------------------------------------
+# Metadata discovery
+# ---------------------------------------------------------------------------
+
+
+def test_normalizes_requested_root_array_path(zarrv2_root_store):
+    datasource = zarrv2_datasource.ZarrV2Datasource(
+        str(zarrv2_root_store),
+        array_paths=[""],
+    )
+    assert list(datasource._metadata_by_path) == [""]
+
+
+def test_normalizes_requested_array_paths(zarrv2_group_store):
+    datasource = zarrv2_datasource.ZarrV2Datasource(
+        str(zarrv2_group_store),
+        array_paths=["images/", "nested"],
+    )
+    assert list(datasource._metadata_by_path) == ["images", "nested"]
+
+
+def test_rejects_missing_array_paths(zarrv2_group_store):
+    with pytest.raises(
+        ValueError,
+        match=r"Array\(s\) not found: 'missing'\. Available: 'images', 'nested'",
+    ):
+        zarrv2_datasource.ZarrV2Datasource(
+            str(zarrv2_group_store),
+            array_paths=["missing"],
+        )
+
+
+def test_requires_consolidated_metadata(tmp_path):
+    store_path = tmp_path / "broken.zarr"
+    store_path.mkdir()
+    (store_path / ".zmetadata").write_text(json.dumps({}))
+
+    with pytest.raises(ValueError, match="Missing 'metadata'"):
+        zarrv2_datasource.ZarrV2Datasource(str(store_path))
+
+
+def test_rejects_empty_full_scan_with_actionable_error(tmp_path):
+    empty_store = tmp_path / "empty.zarr"
+    empty_store.mkdir()  # no .zmetadata, no .zarray files anywhere
+
+    with pytest.raises(
+        ValueError, match=r"Full-store scan of .* found no \.zarray files.*"
+    ):
+        zarrv2_datasource.ZarrV2Datasource(
+            str(empty_store), allow_full_metadata_scan=True
+        )
+
+
+def test_loads_per_array_zarray_without_zmetadata(unconsolidated_zarrv2_store):
+    datasource = zarrv2_datasource.ZarrV2Datasource(
+        str(unconsolidated_zarrv2_store),
+        array_paths=["images", "nested"],
+    )
+    assert set(datasource._metadata_by_path) == {"images", "nested"}
+
+
+def test_full_scan_discovers_arrays_without_zmetadata(unconsolidated_zarrv2_store):
+    datasource = zarrv2_datasource.ZarrV2Datasource(
+        str(unconsolidated_zarrv2_store),
+        allow_full_metadata_scan=True,
+    )
+    assert set(datasource._metadata_by_path) == {"images", "nested"}
+
+
+def test_requires_array_paths_or_full_scan_when_unconsolidated(
+    unconsolidated_zarrv2_store,
+):
+    with pytest.raises(
+        ValueError,
+        match=(
+            r"No array_paths were provided and this Zarr store does not "
+            r"contain \.zmetadata"
+        ),
+    ):
+        zarrv2_datasource.ZarrV2Datasource(str(unconsolidated_zarrv2_store))
+
+
+def test_array_paths_missing_zarray_file_raises_value_error(
+    unconsolidated_zarrv2_store,
+):
+    with pytest.raises(
+        ValueError,
+        match=r"Array path 'missing' not found: no \.zarray file at",
+    ):
+        zarrv2_datasource.ZarrV2Datasource(
+            str(unconsolidated_zarrv2_store),
+            array_paths=["missing"],
+        )
+
+
+def test_rejects_zmetadata_with_malformed_zarray_entry(tmp_path):
+    store_path = tmp_path / "malformed.zarr"
+    store_path.mkdir()
+    (store_path / ".zmetadata").write_text(
+        json.dumps(
+            {
+                "metadata": {
+                    "broken/.zarray": {"shape": [5], "chunks": [2]},  # no dtype
+                }
+            }
+        )
+    )
+
+    with pytest.raises(
+        ValueError,
+        match=r"missing required key\(s\) \['dtype'\]",
+    ):
+        zarrv2_datasource.ZarrV2Datasource(str(store_path))
+
+
+# ---------------------------------------------------------------------------
+# ZarrArrayMeta
+# ---------------------------------------------------------------------------
+
+
+def test_zarr_array_meta_from_json_parses_required_fields():
+    meta = zarrv2_datasource.ZarrArrayMeta.from_json(
+        {"shape": [5, 3], "chunks": [2, 3], "dtype": "<f8", "extra": "ignored"},
+        "some/path",
+    )
+    assert meta.shape == (5, 3)
+    assert meta.chunks == (2, 3)
+    assert meta.dtype == "<f8"
+    assert meta.rank == 2
+    assert meta.itemsize == 8
+
+
+# ---------------------------------------------------------------------------
+# chunk_shapes validation
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize(
+    "chunk_shapes",
+    ["invalid", 42, b"bytes", {1, 2}],
+)
+def test_rejects_invalid_chunk_shapes(zarrv2_group_store, chunk_shapes):
+    """Non-list/non-tuple/non-dict inputs are rejected at construction time."""
+    with pytest.raises(
+        ValueError,
+        match="chunk_shapes must be a non-empty sequence of positive integers",
+    ):
+        zarrv2_datasource.ZarrV2Datasource(
+            str(zarrv2_group_store),
+            chunk_shapes=chunk_shapes,
+        )
+
+
+@pytest.mark.parametrize(
+    "chunk_shapes,array_paths,expected",
+    [
+        # No chunk_shapes: every array reads at its native chunk size.
+        # 4-D image with tiny chunks coexists with 2-D pose with big chunks —
+        # nothing is forced into a shared min/max.
+        (
+            None,
+            None,
+            {
+                "data/camera0_rgb": (1, 2, 2, 3),
+                "data/robot0_eef_pos": (10, 3),
+                "meta/episode_ends": (3,),
+            },
+        ),
+        # ``[5]`` prefix overrides axis 0 across arrays of all ranks at once.
+        (
+            [5],
+            None,
+            {
+                "data/camera0_rgb": (5, 2, 2, 3),
+                "data/robot0_eef_pos": (5, 3),
+                "meta/episode_ends": (5,),
+            },
+        ),
+        # Length-2 prefix overrides axes 0+1; needs every selected array to
+        # have rank >= 2, so we filter out ``meta/episode_ends`` (rank 1).
+        (
+            [5, 1],
+            ["data/camera0_rgb", "data/robot0_eef_pos"],
+            {
+                "data/camera0_rgb": (5, 1, 2, 3),
+                "data/robot0_eef_pos": (5, 1),
+            },
+        ),
+        # Per-array overrides may retile only some arrays while others keep
+        # their native chunks.
+        (
+            {
+                "data/camera0_rgb": [5],
+                "data/robot0_eef_pos": [7],
+            },
+            None,
+            {
+                "data/camera0_rgb": (5, 2, 2, 3),
+                "data/robot0_eef_pos": (7, 3),
+                "meta/episode_ends": (3,),
+            },
+        ),
+    ],
+)
+def test_chunk_shapes_resolution_across_mixed_rank(
+    heterogeneous_zarrv2_store, chunk_shapes, array_paths, expected
+):
+    datasource = zarrv2_datasource.ZarrV2Datasource(
+        str(heterogeneous_zarrv2_store),
+        chunk_shapes=chunk_shapes,
+        array_paths=array_paths,
+    )
+    assert datasource._array_chunks == expected
+
+
+@pytest.mark.parametrize(
+    "chunk_shapes",
+    [
+        {"images": 1},
+        {"images": None},
+        {"images": []},
+        {"images": [0]},
+        {"images": [1.5]},
+    ],
+)
+def test_rejects_invalid_chunk_shapes_dict_values(zarrv2_group_store, chunk_shapes):
+    with pytest.raises(
+        ValueError,
+        match=r"chunk_shapes\['images'\] must be .*positive integers",
+    ):
+        zarrv2_datasource.ZarrV2Datasource(
+            str(zarrv2_group_store),
+            chunk_shapes=chunk_shapes,
+        )
+
+
+def test_rejects_invalid_chunk_shapes_dict_keys(zarrv2_group_store):
+    with pytest.raises(
+        ValueError,
+        match="chunk_shapes dict keys must be array-path strings",
+    ):
+        zarrv2_datasource.ZarrV2Datasource(
+            str(zarrv2_group_store),
+            chunk_shapes={1: [2]},
+        )
+
+
+def test_rejects_duplicate_normalized_chunk_shapes_keys(zarrv2_group_store):
+    with pytest.raises(
+        ValueError,
+        match="duplicate array paths after normalization",
+    ):
+        zarrv2_datasource.ZarrV2Datasource(
+            str(zarrv2_group_store),
+            chunk_shapes={"images": [2], "/images/": [3]},
+        )
+
+
+def test_rejects_unknown_chunk_shapes_keys(zarrv2_group_store):
+    with pytest.raises(
+        ValueError,
+        match="Unknown array path\\(s\\) in chunk_shapes",
+    ):
+        zarrv2_datasource.ZarrV2Datasource(
+            str(zarrv2_group_store),
+            chunk_shapes={"does_not_exist": [2]},
+        )
+
+
+# ---------------------------------------------------------------------------
+# align_axis_0 (wide-form mode)
+# ---------------------------------------------------------------------------
+
+
+def test_align_axis_0_emits_wide_rows(aligned_zarrv2_store):
+    """Wide-row schema: ``t_start``, ``t_stop``, one column per selected array."""
+    datasource = zarrv2_datasource.ZarrV2Datasource(
+        str(aligned_zarrv2_store),
+        align_axis_0=True,
+        chunk_shapes=[4],
+    )
+    df = _execute_read_tasks(datasource.get_read_tasks(parallelism=4))
+    assert set(df.columns) == {"t_start", "t_stop", "img", "state", "label"}
+    # shape[0]=8, chunk_shapes=[4] -> 2 rows.
+    assert len(df) == 2
+    # Reconstruct each array by concatenating slices in order.
+    img_recon = np.concatenate(list(df["img"]), axis=0)
+    assert img_recon.shape == (8, 4, 4, 3)
+    state_recon = np.concatenate(list(df["state"]), axis=0)
+    assert state_recon.shape == (8, 3)
+    label_recon = np.concatenate(list(df["label"]), axis=0)
+    assert label_recon.shape == (8,)
+    # t_start/t_stop are correct.
+    starts = sorted(df["t_start"].tolist())
+    stops = sorted(df["t_stop"].tolist())
+    assert starts == [0, 4]
+    assert stops == [4, 8]
+
+
+@pytest.mark.parametrize(
+    "array_paths,extra_cols",
+    [
+        # No filter: all discovered arrays end up aligned.
+        (None, {"img", "state", "label"}),
+        # array_paths selects which arrays to read; align_axis_0 just
+        # asserts that the selected set is mutually aligned.
+        (["img", "state"], {"img", "state"}),
+    ],
+)
+def test_align_axis_0_column_set(aligned_zarrv2_store, array_paths, extra_cols):
+    datasource = zarrv2_datasource.ZarrV2Datasource(
+        str(aligned_zarrv2_store),
+        array_paths=array_paths,
+        align_axis_0=True,
+        chunk_shapes=[4],
+    )
+    df = _execute_read_tasks(datasource.get_read_tasks(parallelism=4))
+    assert set(df.columns) == {"t_start", "t_stop"} | extra_cols
+
+
+def test_align_axis_0_accepts_per_array_chunk_shapes(aligned_zarrv2_store):
+    datasource = zarrv2_datasource.ZarrV2Datasource(
+        str(aligned_zarrv2_store),
+        align_axis_0=True,
+        chunk_shapes={"img": [4], "state": [4], "label": [4]},
+    )
+    df = _execute_read_tasks(datasource.get_read_tasks(parallelism=4))
+    assert len(df) == 2
+    assert sorted(zip(df["t_start"], df["t_stop"])) == [(0, 4), (4, 8)]
+
+
+def test_align_axis_0_rejects_misaligned_shape0(heterogeneous_zarrv2_store):
+    """Misalignment raises with the per-array shape[0] breakdown."""
+    with pytest.raises(
+        ValueError,
+        match=r"All selected arrays must share shape\[0\]",
+    ):
+        zarrv2_datasource.ZarrV2Datasource(
+            str(heterogeneous_zarrv2_store),
+            align_axis_0=True,
+            chunk_shapes=[5],
+        )
+
+
+def test_align_axis_0_rejects_non_bool(aligned_zarrv2_store):
+    """``align_axis_0`` must be a bool — no list form."""
+    with pytest.raises(TypeError, match=r"align_axis_0 must be a bool"):
+        zarrv2_datasource.ZarrV2Datasource(
+            str(aligned_zarrv2_store),
+            align_axis_0=["img", "state"],
+        )
+
+
+def test_align_axis_0_rejects_divergent_axis_0_chunks(aligned_zarrv2_store):
+    """If aligned arrays end up with different axis-0 chunks, error clearly.
+
+    The native chunks differ (img=2, state=4, label=8) — without a
+    ``chunk_shapes`` re-tile they all stay at native, and the validator
+    catches the mismatch.
+    """
+    with pytest.raises(
+        ValueError, match="Aligned arrays must share the same axis-0 chunk size"
+    ):
+        zarrv2_datasource.ZarrV2Datasource(
+            str(aligned_zarrv2_store),
+            align_axis_0=True,
+        )
+
+
+# ---------------------------------------------------------------------------
+# overlap (aligned-mode lookahead)
+# ---------------------------------------------------------------------------
+
+
+def test_overlap_extends_chunk_data(aligned_zarrv2_store):
+    """``overlap=N`` makes each row's per-array slice cover ``N`` extra timesteps.
+
+    Aligned store has shape[0]=8, ``chunk_shapes=[4]`` -> rows own [0,4) and [4,8).
+    With ``overlap=2``, row 0's data covers [0,6) and row 1's data covers [4,8)
+    (clipped at the store end since 4+4+2 > 8).
+    """
+    datasource = zarrv2_datasource.ZarrV2Datasource(
+        str(aligned_zarrv2_store),
+        align_axis_0=True,
+        chunk_shapes=[4],
+        overlap=2,
+    )
+    df = _execute_read_tasks(datasource.get_read_tasks(parallelism=4))
+    # Ownership unchanged: 2 rows of width 4 each.
+    assert sorted(zip(df["t_start"], df["t_stop"])) == [(0, 4), (4, 8)]
+    # Data extents: row 0 has 6 timesteps, row 1 has 4 (clipped at shape[0]=8).
+    rows = sorted(df.to_dict("records"), key=lambda r: r["t_start"])
+    assert rows[0]["img"].shape[0] == 6  # 4 owned + 2 overlap
+    assert rows[0]["state"].shape[0] == 6
+    assert rows[1]["img"].shape[0] == 4  # 4 owned + 0 overlap (clipped)
+    assert rows[1]["state"].shape[0] == 4
+
+
+def test_overlap_requires_align_axis_0(aligned_zarrv2_store):
+    """``overlap`` in long-form (no ``align_axis_0``) is a clear error."""
+    with pytest.raises(ValueError, match="overlap requires align_axis_0=True"):
+        zarrv2_datasource.ZarrV2Datasource(
+            str(aligned_zarrv2_store),
+            overlap=2,
+        )
+
+
+def test_overlap_rejects_negative_and_non_int(aligned_zarrv2_store):
+    for bad in (-1, 1.5, "two"):
+        with pytest.raises(ValueError, match="overlap must be a non-negative integer"):
+            zarrv2_datasource.ZarrV2Datasource(
+                str(aligned_zarrv2_store),
+                align_axis_0=True,
+                chunk_shapes=[4],
+                overlap=bad,
+            )
+
+
+def test_overlap_enables_windowing_without_cross_row_loss(aligned_zarrv2_store):
+    window_len = 3
+    datasource = zarrv2_datasource.ZarrV2Datasource(
+        str(aligned_zarrv2_store),
+        align_axis_0=True,
+        chunk_shapes=[4],
+        overlap=window_len - 1,
+    )
+    df = _execute_read_tasks(datasource.get_read_tasks(parallelism=4))
+    starts = []
+    for _, row in df.iterrows():
+        t_start, t_stop = row["t_start"], row["t_stop"]
+        img = row["img"]
+        for local in range(t_stop - t_start):
+            if local + window_len > img.shape[0]:
+                continue  # only triggers at very end of store
+            starts.append(t_start + local)
+    # 8 timesteps, window_len=3 -> valid global starts are [0,6) = 6 windows.
+    # Without overlap we would have lost ~33%. With overlap=2 we should
+    # capture all 6.
+    assert sorted(starts) == [0, 1, 2, 3, 4, 5]
+
+
+def test_chunk_shapes_rejected_when_longer_than_smallest_array(
+    heterogeneous_zarrv2_store,
+):
+    """A shared ``chunk_shapes`` override longer than a target rank is an error."""
+    with pytest.raises(
+        ValueError,
+        match=r"chunk_shapes override for array .* has 2 axes but array of shape .* has rank 1",
+    ):
+        zarrv2_datasource.ZarrV2Datasource(
+            str(heterogeneous_zarrv2_store),
+            chunk_shapes=[2, 2],  # OK for 2-D and 4-D, fails for 1-D episode_ends
+        )
+
+
+# ---------------------------------------------------------------------------
+# Filesystem handling
+# ---------------------------------------------------------------------------
+
+
+def test_accepts_pyarrow_fs_filesystem(zarrv2_group_store):
+    """A pyarrow.fs.FileSystem passed in is wrapped into fsspec internally."""
+    datasource = zarrv2_datasource.ZarrV2Datasource(
+        str(zarrv2_group_store),
+        filesystem=pyarrow.fs.LocalFileSystem(),
+    )
+    from fsspec.spec import AbstractFileSystem
+
+    assert isinstance(datasource._fs, AbstractFileSystem)
+    assert set(datasource._metadata_by_path) == {"images", "nested"}
+
+
+def test_rejects_unsupported_filesystem_type():
+    """Filesystem that's neither pyarrow.fs nor fsspec raises ``TypeError``."""
+    with pytest.raises(
+        TypeError,
+        match=r"filesystem must be pyarrow\.fs\.FileSystem or",
+    ):
+        zarrv2_datasource.ZarrV2Datasource(
+            "/tmp/some.zarr",
+            filesystem="not-a-filesystem",
+        )
+
+
+# ---------------------------------------------------------------------------
+# .zarr.zip URL support
+# ---------------------------------------------------------------------------
+
+
+def test_reads_zarr_zip_local_path(zarr_zip_store):
+    """A local ``.zarr.zip`` path auto-wires fsspec's ZipFileSystem."""
+    datasource = zarrv2_datasource.ZarrV2Datasource(str(zarr_zip_store))
+    # The store has one array "data" of shape (6, 2) chunks (3, 2) -> 2 chunks.
+    df = _execute_read_tasks(datasource.get_read_tasks(parallelism=2))
+    assert len(df) == 2
+    assert set(df["array"]) == {"data"}
+    recon = _reconstruct_array(df, "data")
+    np.testing.assert_array_equal(recon, np.arange(12, dtype="<i4").reshape(6, 2))
+
+
+# ---------------------------------------------------------------------------
+# Read task generation and execution (end-to-end)
+# ---------------------------------------------------------------------------
+
+
+def test_get_read_tasks_batches_chunks_by_parallelism(tmp_path):
+    """5 chunks split across parallelism=3 produces batches [2, 2, 1]."""
+    store_path = tmp_path / "store.zarr"
+    _write_real_zarr_store(
+        store_path,
+        {"images": (np.arange(5 * 4, dtype="<i4").reshape(5, 4), (1, 4))},
+    )
+    datasource = zarrv2_datasource.ZarrV2Datasource(str(store_path))
+
+    read_tasks = datasource.get_read_tasks(parallelism=3)
+
+    assert len(read_tasks) == 3
+    assert [task.metadata.num_rows for task in read_tasks] == [2, 2, 1]
+    assert all(task.metadata.input_files == (str(store_path),) for task in read_tasks)
+
+
+def test_long_form_schema_and_materialization(tmp_path):
+    """End-to-end: long-form rows are emitted with the expected columns and data."""
+    store_path = tmp_path / "aligned.zarr"
+    images_src = np.arange(20, dtype="<i4").reshape(5, 4)
+    labels_src = np.arange(5, dtype="|u1")
+    _write_real_zarr_store(
+        store_path,
+        {
+            "images": (images_src, (2, 4)),
+            "labels": (labels_src, (2,)),
+        },
+    )
+
+    datasource = zarrv2_datasource.ZarrV2Datasource(str(store_path))
+    df = _execute_read_tasks(datasource.get_read_tasks(parallelism=16))
+
+    # Schema is the long-form quad.
+    assert list(df.columns) == ["array", "chunk_index", "chunk_slices", "chunk"]
+    # 3 chunks for images (5/2), 3 chunks for labels (5/2) = 6 rows total.
+    assert len(df) == 6
+    assert set(df["array"]) == {"images", "labels"}
+
+    np.testing.assert_array_equal(_reconstruct_array(df, "images"), images_src)
+    np.testing.assert_array_equal(_reconstruct_array(df, "labels"), labels_src)
+
+    # ``chunk_slices`` matches the actual chunk shape and indexes back to
+    # the source array: arr[start:stop, ...] equals the chunk.
+    for _, row in df.iterrows():
+        slices = row["chunk_slices"]
+        chunk = row["chunk"]
+        assert len(slices) == chunk.ndim
+        for axis, (start, stop) in enumerate(slices):
+            assert stop - start == chunk.shape[axis]
+        if row["array"] == "images":
+            np.testing.assert_array_equal(
+                chunk,
+                images_src[slices[0][0] : slices[0][1], slices[1][0] : slices[1][1]],
+            )
+
+
+def test_chunk_shapes_override_changes_grid(tmp_path):
+    """User-supplied chunk_shapes controls the chunk grid and row count."""
+    store_path = tmp_path / "tile.zarr"
+    src = np.arange(10, dtype="<i4")
+    _write_real_zarr_store(store_path, {"data": (src, (2,))})  # native: 5 chunks
+
+    datasource = zarrv2_datasource.ZarrV2Datasource(str(store_path), chunk_shapes=[5])
+    df = _execute_read_tasks(datasource.get_read_tasks(parallelism=16))
+    assert sorted(chunk.shape[0] for chunk in df["chunk"]) == [5, 5]
+
+
+def test_heterogeneous_store_emits_one_row_per_chunk(heterogeneous_zarrv2_store):
+    """Mixed-rank/shape/dtype arrays each contribute their chunk count to the output."""
+    datasource = zarrv2_datasource.ZarrV2Datasource(str(heterogeneous_zarrv2_store))
+    df = _execute_read_tasks(datasource.get_read_tasks(parallelism=16))
+
+    # Expected chunk counts:
+    #   data/camera0_rgb       shape=(20,2,2,3) chunks=(1,2,2,3) → 20 chunks
+    #   data/robot0_eef_pos    shape=(20,3)     chunks=(10,3)    → 2 chunks
+    #   meta/episode_ends      shape=(3,)       chunks=(3,)      → 1 chunk
+    counts = df.groupby("array").size().to_dict()
+    assert counts == {
+        "data/camera0_rgb": 20,
+        "data/robot0_eef_pos": 2,
+        "meta/episode_ends": 1,
+    }
+
+
+# ---------------------------------------------------------------------------
+# _read_chunk retry behavior
+# ---------------------------------------------------------------------------
+
+
+class _ScriptedArray:
+    """Stand-in for a zarr Array: each ``[indexer]`` returns the next scripted item.
+
+    Items can be either an exception (raised) or an ndarray (returned). Used
+    to drive ``_read_chunk`` through specific retry scenarios without
+    touching the network.
+    """
+
+    def __init__(self, *responses) -> None:
+        self._responses = list(responses)
+
+    def __getitem__(self, key):
+        item = self._responses.pop(0)
+        if isinstance(item, BaseException):
+            raise item
+        return item
+
+
+class _ScriptedRoot:
+    """Stand-in for a zarr Group: name → :class:`_ScriptedArray`."""
+
+    def __init__(self, **arrays) -> None:
+        self._arrays = arrays
+
+    def __getitem__(self, name):
+        return self._arrays[name]
+
+
+def test_read_chunk_retries_then_succeeds():
+    """Retryable network errors retried with backoff, eventual read succeeds.
+
+    Uses default ``match`` patterns (``DataContext.retried_io_errors`` plus
+    zarr-specific entries like ``"Connection reset"`` and ``"Read timeout"``).
+    """
+    expected = np.array([1, 2, 3], dtype="<i4")
+    arr = _ScriptedArray(
+        ConnectionError("Connection reset by peer"),
+        TimeoutError("Read timeout"),
+        expected,
+    )
+    root = _ScriptedRoot(x=arr)
+
+    out = zarrv2_datasource._read_chunk(
+        root, "x", ((0, 3),), max_attempts=5, max_backoff_s=0
+    )
+    np.testing.assert_array_equal(out, expected)
+
+
+def test_read_chunk_exhausts_retries():
+    arr = _ScriptedArray(
+        ConnectionError("Connection reset"),
+        ConnectionError("Connection reset"),
+        ConnectionError("Connection reset"),
+    )
+    root = _ScriptedRoot(x=arr)
+
+    # call_with_retry re-raises the last exception itself (with ``from None``)
+    # rather than wrapping in a RuntimeError. Match against the original
+    # exception type to pin that behaviour.
+    with pytest.raises(ConnectionError, match="Connection reset"):
+        zarrv2_datasource._read_chunk(
+            root, "x", ((0, 3),), max_attempts=3, max_backoff_s=0
+        )
+
+
+# ---------------------------------------------------------------------------
+# Estimator
+# ---------------------------------------------------------------------------
+
+
+def test_estimate_inmemory_data_size(tmp_path):
+    """Estimate = sum over arrays of numel * dtype.itemsize."""
+    store_path = tmp_path / "est.zarr"
+    _write_real_zarr_store(
+        store_path,
+        {
+            "a": (np.zeros((5, 4), dtype="<i4"), (2, 4)),
+            "b": (np.zeros(5, dtype="|u1"), (2,)),
+        },
+    )
+    datasource = zarrv2_datasource.ZarrV2Datasource(str(store_path))
+    # 5*4*4 (a) + 5*1 (b) = 80 + 5 = 85
+    assert datasource.estimate_inmemory_data_size() == 85
+
+
+# ---------------------------------------------------------------------------
+# Cross-filesystem end-to-end (Ray Data convention)
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize(
+    "fs",
+    [
+        None,
+        lazy_fixture("local_fs"),  # pyarrow.fs (gets wrapped to fsspec internally)
+        lazy_fixture("local_fsspec_fs"),  # native fsspec
+    ],
+)
+def test_read_zarr_basic_across_filesystems(ray_start_regular_shared, fs, local_path):
+    """Round-trip a real Zarr store through read_zarr for each filesystem flavor.
+
+    Mirrors the parametrized read-path coverage other Ray Data datasources use
+    (lance, parquet, json, hudi, …) — exercises None / pyarrow.fs / fsspec
+    input shapes against the same store written to a local path.
+    """
+    store_path = os.path.join(local_path, "data.zarr")
+    images_src = np.arange(20, dtype="<i4").reshape(5, 4)
+    labels_src = np.arange(5, dtype="|u1")
+    _write_real_zarr_store(
+        Path(store_path),
+        {
+            "images": (images_src, (2, 4)),
+            "labels": (labels_src, (2,)),
+        },
+    )
+
+    ds = ray.data.read_zarr(store_path, filesystem=fs)
+
+    # 3 chunks each for images and labels (5/2 → ceil = 3) → 6 rows total.
+    assert ds.count() == 6
+    df = pd.DataFrame(ds.take_all())
+    np.testing.assert_array_equal(_reconstruct_array(df, "images"), images_src)
+    np.testing.assert_array_equal(_reconstruct_array(df, "labels"), labels_src)
+
+
+# ---------------------------------------------------------------------------
+# Public-bucket integration test
+# ---------------------------------------------------------------------------
+
+
+def test_read_zarr_integration_public_s3(ray_start_regular_shared):
+    """End-to-end read against a real Zarr store in a public S3 bucket.
+
+    Uses ``s3://anonymous@ray-example-data/mnist-tiny.zarr`` — a 200-sample
+    MNIST subset with two arrays:
+      * ``images``  shape (200, 28, 28), chunks (50, 28, 28)  → 4 chunks
+      * ``labels``  shape (200,),        chunks (200,)        → 1 chunk
+
+    Under the chunk-per-row schema the total row count is 4 + 1 = 5.
+    """
+    ds = ray.data.read_zarr("s3://anonymous@ray-example-data/mnist-tiny.zarr")
+
+    assert ds.count() == 5
+    df = pd.DataFrame(ds.take_all())
+    assert set(df["array"]) == {"images", "labels"}
+    image_rows = df[df["array"] == "images"]
+    label_rows = df[df["array"] == "labels"]
+    assert {c.shape for c in image_rows["chunk"]} == {(50, 28, 28)}
+    assert {c.shape for c in label_rows["chunk"]} == {(200,)}
+    assert all(c.dtype == np.uint8 for c in image_rows["chunk"])
+    assert all(c.dtype == np.uint8 for c in label_rows["chunk"])
+
+
+# ---------------------------------------------------------------------------
+# Custom codec registration in Ray workers
+# ---------------------------------------------------------------------------
+
+
+# Hook string registers a custom (non-stdlib) codec in each worker process.
+# numcodecs.registry is process-local — built-in codecs (blosc, gzip, zstd)
+# register themselves at import time, but anything else (including
+# ``imagecodecs_jpegxl``) must be explicitly registered in every process
+# that decodes chunks. Ray workers are separate Python processes, so the
+# driver's registration does NOT propagate. The standard fix is to run
+# this registration in each worker via ``runtime_env``'s
+# ``worker_process_setup_hook``.
+_CUSTOM_CODEC_HOOK = """
+import numcodecs
+import numpy as np
+
+class _RayZarrTestCodec(numcodecs.abc.Codec):
+    codec_id = "ray_zarr_test_codec"
+
+    def encode(self, buf):
+        return bytes(buf)
+
+    def decode(self, buf, out=None):
+        arr = np.frombuffer(buf, dtype=np.uint8)
+        if out is not None:
+            out[:] = arr.view(out.dtype)
+            return out
+        return arr.copy()
+
+numcodecs.register_codec(_RayZarrTestCodec)
+"""
+
+
+def test_custom_codec_succeeds_with_worker_setup_hook(tmp_path):
+    """``worker_process_setup_hook`` runs once per worker, before any task,
+    registering the codec in the worker's process. Chunk decode succeeds.
+
+    Builds a tiny Zarr store compressed with a custom codec that numcodecs
+    doesn't auto-register. The driver registers the codec briefly to write
+    the store; Ray workers need their own registration to decode chunks,
+    which the ``worker_process_setup_hook`` arranges.
+    """
+    import numcodecs
+
+    # Register driver-side so we can write the store.
+    exec(_CUSTOM_CODEC_HOOK, {})
+
+    store_path = tmp_path / "codec_test.zarr"
+    arr = zarr.open(
+        str(store_path),
+        mode="w",
+        shape=(8,),
+        chunks=(4,),
+        dtype="u1",
+        compressor=numcodecs.get_codec({"id": "ray_zarr_test_codec"}),
+    )
+    arr[:] = np.arange(8, dtype="u1")
+    zarr.consolidate_metadata(str(store_path))
+
+    if ray.is_initialized():
+        ray.shutdown()
+    ray.init(
+        num_cpus=1,
+        logging_level="ERROR",
+        log_to_driver=False,
+        runtime_env={"worker_process_setup_hook": _CUSTOM_CODEC_HOOK},
+    )
+    try:
+        ds = ray.data.read_zarr(str(store_path))
+        rows = sorted(ds.take_all(), key=lambda r: tuple(r["chunk_index"]))
+        recon = np.concatenate([r["chunk"] for r in rows])
+        np.testing.assert_array_equal(recon, np.arange(8, dtype="u1"))
+    finally:
+        ray.shutdown()
+
+
+if __name__ == "__main__":
+    import sys
+
+    sys.exit(pytest.main(["-v", __file__]))

From 86c61afec6c6a6f981eaf19e914aadc8ca89c810 Mon Sep 17 00:00:00 2001
From: Alexandr Plashchinsky
 <alexandr.plashchinsky@alexandrplashchinsky-H765G66H9V.local>
Date: Fri, 29 May 2026 18:01:13 -0700
Subject: [PATCH 02/45] linting fixes

Signed-off-by: Alexandr Plashchinsky <alexandr.plashchinsky@alexandrplashchinsky-H765G66H9V.local>
---
 python/ray/data/__init__.py | 2 +-
 python/ray/data/read_api.py | 6 ++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/python/ray/data/__init__.py b/python/ray/data/__init__.py
index 1b6589f033f2..feda7cc3c586 100644
--- a/python/ray/data/__init__.py
+++ b/python/ray/data/__init__.py
@@ -82,7 +82,7 @@
     read_unity_catalog,
     read_videos,
     read_webdataset,
-    read_zarr
+    read_zarr,
 )
 
 # Module-level cached global functions for callable classes. It needs to be defined here
diff --git a/python/ray/data/read_api.py b/python/ray/data/read_api.py
index 461343798611..58bb3f183e33 100644
--- a/python/ray/data/read_api.py
+++ b/python/ray/data/read_api.py
@@ -28,7 +28,6 @@
 from ray.data._internal.datasource.bigquery_datasource import BigQueryDatasource
 from ray.data._internal.datasource.binary_datasource import BinaryDatasource
 from ray.data._internal.datasource.clickhouse_datasource import ClickHouseDatasource
-from ray.data._internal.datasource.zarrv2_datasource import ZarrV2Datasource
 from ray.data._internal.datasource.csv_datasource import CSVDatasource
 from ray.data._internal.datasource.databricks_credentials import (
     DatabricksCredentialProvider,
@@ -67,6 +66,7 @@
 from ray.data._internal.datasource.uc_datasource import UnityCatalogConnector
 from ray.data._internal.datasource.video_datasource import VideoDatasource
 from ray.data._internal.datasource.webdataset_datasource import WebDatasetDatasource
+from ray.data._internal.datasource.zarrv2_datasource import ZarrV2Datasource
 from ray.data._internal.delegating_block_builder import DelegatingBlockBuilder
 from ray.data._internal.logical.interfaces import LogicalPlan
 from ray.data._internal.logical.operators import (
@@ -122,6 +122,7 @@
     import daft
     import dask
     import datasets
+    import fsspec.spec
     import mars
     import modin
     import pandas
@@ -924,7 +925,8 @@ def read_videos(
         concurrency=concurrency,
         override_num_blocks=override_num_blocks,
     )
-    
+
+
 @PublicAPI(stability="alpha")
 def read_zarr(
     path: str,

From f8553e4c897b3ea44568d445ab91e154210a2648 Mon Sep 17 00:00:00 2001
From: Alexandr Plashchinsky
 <alexandr.plashchinsky@alexandrplashchinsky-H765G66H9V.local>
Date: Fri, 29 May 2026 18:31:28 -0700
Subject: [PATCH 03/45] lint checking

Signed-off-by: Alexandr Plashchinsky <alexandr.plashchinsky@alexandrplashchinsky-H765G66H9V.local>
---
 pyrefly.toml                                  |  2 ++
 .../_internal/datasource/zarrv2_datasource.py | 19 ++++++++++---------
 .../ray/data/tests/datasource/test_zarrv2.py  | 15 ++++++++++-----
 3 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/pyrefly.toml b/pyrefly.toml
index 6275ebe1122c..17dae8f95385 100644
--- a/pyrefly.toml
+++ b/pyrefly.toml
@@ -65,4 +65,6 @@ ignore-missing-imports = [
     "rapidsmpf.*",
     "rmm.*",
     "confluent_kafka.*",
+    "zarr.*",
+    "numcodecs.*",
 ]
diff --git a/python/ray/data/_internal/datasource/zarrv2_datasource.py b/python/ray/data/_internal/datasource/zarrv2_datasource.py
index d441a79b20ab..0c67590d4ac7 100644
--- a/python/ray/data/_internal/datasource/zarrv2_datasource.py
+++ b/python/ray/data/_internal/datasource/zarrv2_datasource.py
@@ -36,7 +36,7 @@
 logger = logging.getLogger(__name__)
 
 if TYPE_CHECKING:
-    import pyarrow
+    from pyarrow import fs as pyarrow_fs
     from zarr import Array as ZarrArray
     from zarr.hierarchy import Group as ZarrGroup
 
@@ -463,7 +463,7 @@ class ZarrV2Datasource(Datasource):
     def __init__(
         self,
         path: str,
-        filesystem: pyarrow.fs.FileSystem | AbstractFileSystem | None = None,
+        filesystem: pyarrow_fs.FileSystem | AbstractFileSystem | None = None,
         chunk_shapes: dict[str, list] | list | None = None,
         array_paths: list[str] | None = None,
         allow_full_metadata_scan: bool = False,
@@ -554,9 +554,14 @@ def __init__(
                     f"Unknown array path(s) in chunk_shapes: {unknown_chunk_shape_keys}"
                 )
 
-        if align_axis_0 is False:
-            self._aligned_array_names: list[str] | None = None
-        elif align_axis_0 is True:
+        if not isinstance(align_axis_0, bool):
+            raise TypeError(
+                f"align_axis_0 must be a bool, got {type(align_axis_0).__name__}"
+            )
+
+        if not align_axis_0:
+            self._aligned_array_names = None
+        else:
             shape0_by_array = {
                 name: meta.shape[0] if meta.shape else 0
                 for name, meta in self._metadata_by_path.items()
@@ -568,10 +573,6 @@ def __init__(
                     f"shape-compatible subset via array_paths=[...]."
                 )
             self._aligned_array_names = list(self._metadata_by_path.keys())
-        else:
-            raise TypeError(
-                f"align_axis_0 must be a bool, got " f"{type(align_axis_0).__name__}"
-            )
 
         # Validate overlap. Only meaningful when arrays are co-iterated as
         # wide rows, since the trailing lookahead is exposed via the
diff --git a/python/ray/data/tests/datasource/test_zarrv2.py b/python/ray/data/tests/datasource/test_zarrv2.py
index 908450c728d4..1338f6d3f491 100644
--- a/python/ray/data/tests/datasource/test_zarrv2.py
+++ b/python/ray/data/tests/datasource/test_zarrv2.py
@@ -14,6 +14,9 @@
 from ray.data._internal.datasource import zarrv2_datasource
 from ray.data.tests.conftest import *  # noqa: F401, F403
 
+import logging
+from typing import Any, cast
+
 
 def _execute_read_tasks(tasks) -> pd.DataFrame:
     frames = [block for task in tasks for block in task()]
@@ -420,7 +423,7 @@ def test_rejects_invalid_chunk_shapes_dict_keys(zarrv2_group_store):
     ):
         zarrv2_datasource.ZarrV2Datasource(
             str(zarrv2_group_store),
-            chunk_shapes={1: [2]},
+            chunk_shapes=cast(Any, {1: [2]}),
         )
 
 
@@ -526,7 +529,7 @@ def test_align_axis_0_rejects_non_bool(aligned_zarrv2_store):
     with pytest.raises(TypeError, match=r"align_axis_0 must be a bool"):
         zarrv2_datasource.ZarrV2Datasource(
             str(aligned_zarrv2_store),
-            align_axis_0=["img", "state"],
+            align_axis_0=cast(Any, ["img", "state"]),
         )
 
 
@@ -585,12 +588,14 @@ def test_overlap_requires_align_axis_0(aligned_zarrv2_store):
 
 
 def test_overlap_rejects_negative_and_non_int(aligned_zarrv2_store):
-    for bad in (-1, 1.5, "two"):
+    bad_values: list[Any] = [-1, 1.5, "two"]
+
+    for bad in bad_values:
         with pytest.raises(ValueError, match="overlap must be a non-negative integer"):
             zarrv2_datasource.ZarrV2Datasource(
                 str(aligned_zarrv2_store),
                 align_axis_0=True,
-                chunk_shapes=[4],
+                chunk_shape=[4],
                 overlap=bad,
             )
 
@@ -988,7 +993,7 @@ def test_custom_codec_succeeds_with_worker_setup_hook(tmp_path):
         ray.shutdown()
     ray.init(
         num_cpus=1,
-        logging_level="ERROR",
+        logging_level=logging.ERROR,
         log_to_driver=False,
         runtime_env={"worker_process_setup_hook": _CUSTOM_CODEC_HOOK},
     )

From 53f18a1470fc1086f4e78cf9bbd4d90cd8a45833 Mon Sep 17 00:00:00 2001
From: Alexandr Plashchinsky
 <alexandr.plashchinsky@alexandrplashchinsky-H765G66H9V.local>
Date: Fri, 29 May 2026 18:44:23 -0700
Subject: [PATCH 04/45] linting

Signed-off-by: Alexandr Plashchinsky <alexandr.plashchinsky@alexandrplashchinsky-H765G66H9V.local>
---
 python/ray/data/tests/datasource/test_zarrv2.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/python/ray/data/tests/datasource/test_zarrv2.py b/python/ray/data/tests/datasource/test_zarrv2.py
index 1338f6d3f491..d00f48f545f4 100644
--- a/python/ray/data/tests/datasource/test_zarrv2.py
+++ b/python/ray/data/tests/datasource/test_zarrv2.py
@@ -1,6 +1,8 @@
 import json
+import logging
 import os
 from pathlib import Path
+from typing import Any, cast
 
 import fsspec
 import numpy as np
@@ -14,9 +16,6 @@
 from ray.data._internal.datasource import zarrv2_datasource
 from ray.data.tests.conftest import *  # noqa: F401, F403
 
-import logging
-from typing import Any, cast
-
 
 def _execute_read_tasks(tasks) -> pd.DataFrame:
     frames = [block for task in tasks for block in task()]

From 0c00739058e749025baefb0c962e19d4db955430 Mon Sep 17 00:00:00 2001
From: Alexandr Plashchinsky
 <alexandr.plashchinsky@alexandrplashchinsky-H765G66H9V.local>
Date: Fri, 29 May 2026 18:56:22 -0700
Subject: [PATCH 05/45] debugging tests

Signed-off-by: Alexandr Plashchinsky <alexandr.plashchinsky@alexandrplashchinsky-H765G66H9V.local>
---
 python/ray/data/tests/datasource/test_zarrv2.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/ray/data/tests/datasource/test_zarrv2.py b/python/ray/data/tests/datasource/test_zarrv2.py
index d00f48f545f4..9e25a36ad778 100644
--- a/python/ray/data/tests/datasource/test_zarrv2.py
+++ b/python/ray/data/tests/datasource/test_zarrv2.py
@@ -594,7 +594,7 @@ def test_overlap_rejects_negative_and_non_int(aligned_zarrv2_store):
             zarrv2_datasource.ZarrV2Datasource(
                 str(aligned_zarrv2_store),
                 align_axis_0=True,
-                chunk_shape=[4],
+                chunk_shapes=[4],
                 overlap=bad,
             )
 

From f3945b0548221cdb2e6e146b77ce85839a7c859f Mon Sep 17 00:00:00 2001
From: Artur Niederfahrenhorst <artur@anyscale.com>
Date: Thu, 4 Jun 2026 14:53:32 +0200
Subject: [PATCH 06/45] [data] zarr datasource: docs, CI test wiring, py3.11+
 gating, deps

Builds on read_zarr with the pieces needed to land it:

- docs: add read_zarr to the Loading Data API reference
- ci: register the test_zarrv2 py_test target so the suite runs
- compat: gate zarr to Python 3.11+ (zarr 2.18.4+ requires 3.11) via a
  python_version marker in py313 data-test-requirements + pytest.importorskip
  in the test (skips on py3.10)
- retry: replace guessed _ZARR_TRANSIENT_ERROR_PATTERNS with a grounded
  transport/HTTP-S3 token list (+ classifier test, + follow-up note)
- locks: regenerate requirements_compiled_py3.13 and the data-* deplocks

Signed-off-by: Artur Niederfahrenhorst <artur@anyscale.com>
---
 doc/source/data/api/loading_data.rst          |  9 +++
 .../ci/data-base-ci_depset_py3.11.lock        | 37 ++++++++++++
 .../ci/data-base-ci_depset_py3.12.lock        | 37 ++++++++++++
 .../data-pyarrow-latest-ci_depset_py3.12.lock | 35 ++++++++++++
 .../ci/relaxed_data-ci_depset_py3.12.lock     | 37 ++++++++++++
 python/ray/data/BUILD.bazel                   | 14 +++++
 .../_internal/datasource/zarrv2_datasource.py | 56 +++++++++++++++----
 .../ray/data/tests/datasource/test_zarrv2.py  | 50 ++++++++++++++++-
 .../ml/py313/data-test-requirements.txt       |  1 +
 python/requirements_compiled_py3.13.txt       | 10 ++++
 10 files changed, 274 insertions(+), 12 deletions(-)

diff --git a/doc/source/data/api/loading_data.rst b/doc/source/data/api/loading_data.rst
index ef25cdbedd5e..d613fd8666d6 100644
--- a/doc/source/data/api/loading_data.rst
+++ b/doc/source/data/api/loading_data.rst
@@ -352,6 +352,15 @@ WebDataset
 
    read_webdataset
 
+Zarr
+^^^^
+
+.. autosummary::
+   :nosignatures:
+   :toctree: doc/
+
+   read_zarr
+
 Partitioning API
 ^^^^^^^^^^^^^^^^
 
diff --git a/python/deplocks/ci/data-base-ci_depset_py3.11.lock b/python/deplocks/ci/data-base-ci_depset_py3.11.lock
index 98ee0e2a2e29..d50a629aedab 100644
--- a/python/deplocks/ci/data-base-ci_depset_py3.11.lock
+++ b/python/deplocks/ci/data-base-ci_depset_py3.11.lock
@@ -311,6 +311,11 @@ arro3-core==0.8.0 \
     # via
     #   -c /tmp/ray-deps/requirements_compiled_py3.13.txt
     #   deltalake
+asciitree==0.3.3 \
+    --hash=sha256:4aa4b9b649f85e3fcb343363d97564aa1fb62e249677f2e18a96765145cc0f6e
+    # via
+    #   -c /tmp/ray-deps/requirements_compiled_py3.13.txt
+    #   zarr
 asn1crypto==1.5.1 \
     --hash=sha256:13ae38502be632115abf8a24cbe5f4da52e3b5231990aff31123c805306ccb9c \
     --hash=sha256:db4e40728b728508912cbb3d44f19ce188f218e9eba635821bb4b68564f8fd67
@@ -1209,6 +1214,7 @@ deprecated==1.3.1 \
     # via
     #   -c /tmp/ray-deps/requirements_compiled_py3.13.txt
     #   deltalake
+    #   numcodecs
 dill==0.3.8 \
     --hash=sha256:3ebe3c479ad625c4553aca177444d89b486b1d84982eeacded644afc0cf797ca \
     --hash=sha256:c36ca9ffb54365bdd2f8eb3eff7d2a21237f8452b57ace88b1ac615b7e815bd7
@@ -1338,6 +1344,12 @@ fastavro==1.12.1 \
     # via
     #   -c /tmp/ray-deps/requirements_compiled_py3.13.txt
     #   -r python/requirements/ml/py313/data-test-requirements.txt
+fasteners==0.20 ; sys_platform != 'emscripten' \
+    --hash=sha256:55dce8792a41b56f727ba6e123fcaee77fd87e638a6863cec00007bfea84c8d8 \
+    --hash=sha256:9422c40d1e350e4259f509fb2e608d6bc43c0136f79a00db1b49046029d0b3b7
+    # via
+    #   -c /tmp/ray-deps/requirements_compiled_py3.13.txt
+    #   zarr
 fastrlock==0.8.3 ; sys_platform != 'darwin' \
     --hash=sha256:001fd86bcac78c79658bac496e8a17472d64d558cd2227fdc768aa77f877fe40 \
     --hash=sha256:04bb5eef8f460d13b8c0084ea5a9d3aab2c0573991c880c0a34a56bb14951d30 \
@@ -2730,6 +2742,23 @@ networkx==3.6.1 \
     # via
     #   -c /tmp/ray-deps/requirements_compiled_py3.13.txt
     #   torch
+numcodecs==0.15.1 \
+    --hash=sha256:1d471a1829ce52d3f365053a2bd1379e32e369517557c4027ddf5ac0d99c591e \
+    --hash=sha256:1dfdea4a67108205edfce99c1cb6cd621343bc7abb7e16a041c966776920e7de \
+    --hash=sha256:698f1d59511488b8fe215fadc1e679a4c70d894de2cca6d8bf2ab770eed34dfd \
+    --hash=sha256:a34f0fe5e5f3b837bbedbeb98794a6d4a12eeeef8d4697b523905837900b5e1c \
+    --hash=sha256:a4f7bdb26f1b34423cb56d48e75821223be38040907c9b5954eeb7463e7eb03c \
+    --hash=sha256:b0a9d9cd29a0088220682dda4a9898321f7813ff7802be2bbb545f6e3d2f10ff \
+    --hash=sha256:bef8c8e64fab76677324a07672b10c31861775d03fc63ed5012ca384144e4bb9 \
+    --hash=sha256:c3a09e22140f2c691f7df26303ff8fa2dadcf26d7d0828398c0bc09b69e5efa3 \
+    --hash=sha256:cdfaef9f5f2ed8f65858db801f1953f1007c9613ee490a1c56233cd78b505ed5 \
+    --hash=sha256:daed6066ffcf40082da847d318b5ab6123d69ceb433ba603cb87c323a541a8bc \
+    --hash=sha256:e2547fa3a7ffc9399cfd2936aecb620a3db285f2630c86c8a678e477741a4b3c \
+    --hash=sha256:e3d82b70500cf61e8d115faa0d0a76be6ecdc24a16477ee3279d711699ad85f3 \
+    --hash=sha256:eeed77e4d6636641a2cc605fbc6078c7a8f2cc40f3dfa2b3f61e52e6091b04ff
+    # via
+    #   -c /tmp/ray-deps/requirements_compiled_py3.13.txt
+    #   zarr
 numexpr==2.14.1 ; python_full_version < '3.12' \
     --hash=sha256:03130afa04edf83a7b590d207444f05a00363c9b9ea5d81c0f53b1ea13fad55a \
     --hash=sha256:05f9366d23a2e991fd5a8b5e61a17558f028ba86158a4552f8f239b005cdf83c \
@@ -2865,6 +2894,7 @@ numpy==2.2.6 \
     #   keras
     #   ml-dtypes
     #   modin
+    #   numcodecs
     #   numexpr
     #   pandas
     #   pylance
@@ -2884,6 +2914,7 @@ numpy==2.2.6 \
     #   torchtext
     #   torchvision
     #   webdataset
+    #   zarr
 nvidia-nccl-cu12==2.27.5 ; platform_machine != 'aarch64' and sys_platform == 'linux' \
     --hash=sha256:ad730cf15cb5d25fe849c6e6ca9eb5b76db16a80f13f425ac68d8e2e55624457
     # via
@@ -5686,6 +5717,12 @@ yarl==1.23.0 \
     #   -c /tmp/ray-deps/requirements_compiled_py3.13.txt
     #   aiohttp
     #   delta-sharing
+zarr==2.18.7 \
+    --hash=sha256:ac3dc4033e9ae4e9d7b5e27c97ea3eaf1003cc0a07f010bd83d5134bf8c4b223 \
+    --hash=sha256:b2b8f66f14dac4af66b180d2338819981b981f70e196c9a66e6bfaa9e59572f5
+    # via
+    #   -c /tmp/ray-deps/requirements_compiled_py3.13.txt
+    #   -r python/requirements/ml/py313/data-test-requirements.txt
 zict==3.0.0 \
     --hash=sha256:5796e36bd0e0cc8cf0fbc1ace6a68912611c1dbd74750a3f3026b9b9d6a327ae \
     --hash=sha256:e321e263b6a97aafc0790c3cfb3c04656b7066e6738c37fffcca95d803c9fba5
diff --git a/python/deplocks/ci/data-base-ci_depset_py3.12.lock b/python/deplocks/ci/data-base-ci_depset_py3.12.lock
index 4af259189727..003f57a4a433 100644
--- a/python/deplocks/ci/data-base-ci_depset_py3.12.lock
+++ b/python/deplocks/ci/data-base-ci_depset_py3.12.lock
@@ -304,6 +304,11 @@ arro3-core==0.8.0 \
     # via
     #   -c /tmp/ray-deps/requirements_compiled_py3.13.txt
     #   deltalake
+asciitree==0.3.3 \
+    --hash=sha256:4aa4b9b649f85e3fcb343363d97564aa1fb62e249677f2e18a96765145cc0f6e
+    # via
+    #   -c /tmp/ray-deps/requirements_compiled_py3.13.txt
+    #   zarr
 asn1crypto==1.5.1 \
     --hash=sha256:13ae38502be632115abf8a24cbe5f4da52e3b5231990aff31123c805306ccb9c \
     --hash=sha256:db4e40728b728508912cbb3d44f19ce188f218e9eba635821bb4b68564f8fd67
@@ -1195,6 +1200,7 @@ deprecated==1.3.1 \
     # via
     #   -c /tmp/ray-deps/requirements_compiled_py3.13.txt
     #   deltalake
+    #   numcodecs
 dill==0.3.8 \
     --hash=sha256:3ebe3c479ad625c4553aca177444d89b486b1d84982eeacded644afc0cf797ca \
     --hash=sha256:c36ca9ffb54365bdd2f8eb3eff7d2a21237f8452b57ace88b1ac615b7e815bd7
@@ -1328,6 +1334,12 @@ fastavro==1.12.1 \
     # via
     #   -c /tmp/ray-deps/requirements_compiled_py3.13.txt
     #   -r python/requirements/ml/py313/data-test-requirements.txt
+fasteners==0.20 ; sys_platform != 'emscripten' \
+    --hash=sha256:55dce8792a41b56f727ba6e123fcaee77fd87e638a6863cec00007bfea84c8d8 \
+    --hash=sha256:9422c40d1e350e4259f509fb2e608d6bc43c0136f79a00db1b49046029d0b3b7
+    # via
+    #   -c /tmp/ray-deps/requirements_compiled_py3.13.txt
+    #   zarr
 fastrlock==0.8.3 ; sys_platform != 'darwin' \
     --hash=sha256:001fd86bcac78c79658bac496e8a17472d64d558cd2227fdc768aa77f877fe40 \
     --hash=sha256:04bb5eef8f460d13b8c0084ea5a9d3aab2c0573991c880c0a34a56bb14951d30 \
@@ -2719,6 +2731,23 @@ networkx==3.6.1 \
     # via
     #   -c /tmp/ray-deps/requirements_compiled_py3.13.txt
     #   torch
+numcodecs==0.15.1 \
+    --hash=sha256:1d471a1829ce52d3f365053a2bd1379e32e369517557c4027ddf5ac0d99c591e \
+    --hash=sha256:1dfdea4a67108205edfce99c1cb6cd621343bc7abb7e16a041c966776920e7de \
+    --hash=sha256:698f1d59511488b8fe215fadc1e679a4c70d894de2cca6d8bf2ab770eed34dfd \
+    --hash=sha256:a34f0fe5e5f3b837bbedbeb98794a6d4a12eeeef8d4697b523905837900b5e1c \
+    --hash=sha256:a4f7bdb26f1b34423cb56d48e75821223be38040907c9b5954eeb7463e7eb03c \
+    --hash=sha256:b0a9d9cd29a0088220682dda4a9898321f7813ff7802be2bbb545f6e3d2f10ff \
+    --hash=sha256:bef8c8e64fab76677324a07672b10c31861775d03fc63ed5012ca384144e4bb9 \
+    --hash=sha256:c3a09e22140f2c691f7df26303ff8fa2dadcf26d7d0828398c0bc09b69e5efa3 \
+    --hash=sha256:cdfaef9f5f2ed8f65858db801f1953f1007c9613ee490a1c56233cd78b505ed5 \
+    --hash=sha256:daed6066ffcf40082da847d318b5ab6123d69ceb433ba603cb87c323a541a8bc \
+    --hash=sha256:e2547fa3a7ffc9399cfd2936aecb620a3db285f2630c86c8a678e477741a4b3c \
+    --hash=sha256:e3d82b70500cf61e8d115faa0d0a76be6ecdc24a16477ee3279d711699ad85f3 \
+    --hash=sha256:eeed77e4d6636641a2cc605fbc6078c7a8f2cc40f3dfa2b3f61e52e6091b04ff
+    # via
+    #   -c /tmp/ray-deps/requirements_compiled_py3.13.txt
+    #   zarr
 numpy==2.2.6 \
     --hash=sha256:038613e9fb8c72b0a41f025a7e4c3f0b7a1b5d768ece4796b674c8f3fe13efff \
     --hash=sha256:0678000bb9ac1475cd454c6b8c799206af8107e310843532b04d49649c717a47 \
@@ -2793,6 +2822,7 @@ numpy==2.2.6 \
     #   keras
     #   ml-dtypes
     #   modin
+    #   numcodecs
     #   pandas
     #   pylance
     #   ray
@@ -2810,6 +2840,7 @@ numpy==2.2.6 \
     #   torchtext
     #   torchvision
     #   webdataset
+    #   zarr
 nvidia-nccl-cu12==2.27.5 ; platform_machine != 'aarch64' and sys_platform == 'linux' \
     --hash=sha256:ad730cf15cb5d25fe849c6e6ca9eb5b76db16a80f13f425ac68d8e2e55624457
     # via
@@ -5547,6 +5578,12 @@ yarl==1.23.0 \
     #   -c /tmp/ray-deps/requirements_compiled_py3.13.txt
     #   aiohttp
     #   delta-sharing
+zarr==2.18.7 \
+    --hash=sha256:ac3dc4033e9ae4e9d7b5e27c97ea3eaf1003cc0a07f010bd83d5134bf8c4b223 \
+    --hash=sha256:b2b8f66f14dac4af66b180d2338819981b981f70e196c9a66e6bfaa9e59572f5
+    # via
+    #   -c /tmp/ray-deps/requirements_compiled_py3.13.txt
+    #   -r python/requirements/ml/py313/data-test-requirements.txt
 zict==3.0.0 \
     --hash=sha256:5796e36bd0e0cc8cf0fbc1ace6a68912611c1dbd74750a3f3026b9b9d6a327ae \
     --hash=sha256:e321e263b6a97aafc0790c3cfb3c04656b7066e6738c37fffcca95d803c9fba5
diff --git a/python/deplocks/ci/data-pyarrow-latest-ci_depset_py3.12.lock b/python/deplocks/ci/data-pyarrow-latest-ci_depset_py3.12.lock
index 6c415a391192..6d477ce337d5 100644
--- a/python/deplocks/ci/data-pyarrow-latest-ci_depset_py3.12.lock
+++ b/python/deplocks/ci/data-pyarrow-latest-ci_depset_py3.12.lock
@@ -300,6 +300,11 @@ arro3-core==0.8.0 \
     # via
     #   -r python/deplocks/ci/relaxed_data-ci_depset_py3.12.lock
     #   deltalake
+asciitree==0.3.3 \
+    --hash=sha256:4aa4b9b649f85e3fcb343363d97564aa1fb62e249677f2e18a96765145cc0f6e
+    # via
+    #   -r python/deplocks/ci/relaxed_data-ci_depset_py3.12.lock
+    #   zarr
 asn1crypto==1.5.1 \
     --hash=sha256:13ae38502be632115abf8a24cbe5f4da52e3b5231990aff31123c805306ccb9c \
     --hash=sha256:db4e40728b728508912cbb3d44f19ce188f218e9eba635821bb4b68564f8fd67
@@ -1160,6 +1165,7 @@ deprecated==1.3.1 \
     # via
     #   -r python/deplocks/ci/relaxed_data-ci_depset_py3.12.lock
     #   deltalake
+    #   numcodecs
 distlib==0.4.0 \
     --hash=sha256:9659f7d87e46584a30b5780e43ac7a2143098441670ff0a49d5f9034c54a6c16 \
     --hash=sha256:feec40075be03a04501a973d81f633735b4b69f98b05450592310c0f401a4e0d
@@ -1279,6 +1285,12 @@ fastavro==1.12.1 \
     --hash=sha256:eaa7ab3769beadcebb60f0539054c7755f63bd9cf7666e2c15e615ab605f89a8 \
     --hash=sha256:ed924233272719b5d5a6a0b4d80ef3345fc7e84fc7a382b6232192a9112d38a6
     # via -r python/deplocks/ci/relaxed_data-ci_depset_py3.12.lock
+fasteners==0.20 ; sys_platform != 'emscripten' \
+    --hash=sha256:55dce8792a41b56f727ba6e123fcaee77fd87e638a6863cec00007bfea84c8d8 \
+    --hash=sha256:9422c40d1e350e4259f509fb2e608d6bc43c0136f79a00db1b49046029d0b3b7
+    # via
+    #   -r python/deplocks/ci/relaxed_data-ci_depset_py3.12.lock
+    #   zarr
 fastrlock==0.8.3 ; sys_platform != 'darwin' \
     --hash=sha256:001fd86bcac78c79658bac496e8a17472d64d558cd2227fdc768aa77f877fe40 \
     --hash=sha256:04bb5eef8f460d13b8c0084ea5a9d3aab2c0573991c880c0a34a56bb14951d30 \
@@ -2620,6 +2632,23 @@ networkx==3.6.1 \
     # via
     #   -r python/deplocks/ci/relaxed_data-ci_depset_py3.12.lock
     #   torch
+numcodecs==0.15.1 \
+    --hash=sha256:1d471a1829ce52d3f365053a2bd1379e32e369517557c4027ddf5ac0d99c591e \
+    --hash=sha256:1dfdea4a67108205edfce99c1cb6cd621343bc7abb7e16a041c966776920e7de \
+    --hash=sha256:698f1d59511488b8fe215fadc1e679a4c70d894de2cca6d8bf2ab770eed34dfd \
+    --hash=sha256:a34f0fe5e5f3b837bbedbeb98794a6d4a12eeeef8d4697b523905837900b5e1c \
+    --hash=sha256:a4f7bdb26f1b34423cb56d48e75821223be38040907c9b5954eeb7463e7eb03c \
+    --hash=sha256:b0a9d9cd29a0088220682dda4a9898321f7813ff7802be2bbb545f6e3d2f10ff \
+    --hash=sha256:bef8c8e64fab76677324a07672b10c31861775d03fc63ed5012ca384144e4bb9 \
+    --hash=sha256:c3a09e22140f2c691f7df26303ff8fa2dadcf26d7d0828398c0bc09b69e5efa3 \
+    --hash=sha256:cdfaef9f5f2ed8f65858db801f1953f1007c9613ee490a1c56233cd78b505ed5 \
+    --hash=sha256:daed6066ffcf40082da847d318b5ab6123d69ceb433ba603cb87c323a541a8bc \
+    --hash=sha256:e2547fa3a7ffc9399cfd2936aecb620a3db285f2630c86c8a678e477741a4b3c \
+    --hash=sha256:e3d82b70500cf61e8d115faa0d0a76be6ecdc24a16477ee3279d711699ad85f3 \
+    --hash=sha256:eeed77e4d6636641a2cc605fbc6078c7a8f2cc40f3dfa2b3f61e52e6091b04ff
+    # via
+    #   -r python/deplocks/ci/relaxed_data-ci_depset_py3.12.lock
+    #   zarr
 numpy==2.2.6 \
     --hash=sha256:038613e9fb8c72b0a41f025a7e4c3f0b7a1b5d768ece4796b674c8f3fe13efff \
     --hash=sha256:0678000bb9ac1475cd454c6b8c799206af8107e310843532b04d49649c717a47 \
@@ -2691,6 +2720,7 @@ numpy==2.2.6 \
     #   keras
     #   ml-dtypes
     #   modin
+    #   numcodecs
     #   pandas
     #   pylance
     #   raydp
@@ -2707,6 +2737,7 @@ numpy==2.2.6 \
     #   torchtext
     #   torchvision
     #   webdataset
+    #   zarr
 nvidia-nccl-cu12==2.27.5 ; platform_machine != 'aarch64' and sys_platform == 'linux' \
     --hash=sha256:ad730cf15cb5d25fe849c6e6ca9eb5b76db16a80f13f425ac68d8e2e55624457
     # via -r python/deplocks/ci/relaxed_data-ci_depset_py3.12.lock
@@ -5341,6 +5372,10 @@ yarl==1.23.0 \
     #   -r python/deplocks/ci/relaxed_data-ci_depset_py3.12.lock
     #   aiohttp
     #   delta-sharing
+zarr==2.18.7 \
+    --hash=sha256:ac3dc4033e9ae4e9d7b5e27c97ea3eaf1003cc0a07f010bd83d5134bf8c4b223 \
+    --hash=sha256:b2b8f66f14dac4af66b180d2338819981b981f70e196c9a66e6bfaa9e59572f5
+    # via -r python/deplocks/ci/relaxed_data-ci_depset_py3.12.lock
 zict==3.0.0 \
     --hash=sha256:5796e36bd0e0cc8cf0fbc1ace6a68912611c1dbd74750a3f3026b9b9d6a327ae \
     --hash=sha256:e321e263b6a97aafc0790c3cfb3c04656b7066e6738c37fffcca95d803c9fba5
diff --git a/python/deplocks/ci/relaxed_data-ci_depset_py3.12.lock b/python/deplocks/ci/relaxed_data-ci_depset_py3.12.lock
index e5ba282fbe86..882432c52bd6 100644
--- a/python/deplocks/ci/relaxed_data-ci_depset_py3.12.lock
+++ b/python/deplocks/ci/relaxed_data-ci_depset_py3.12.lock
@@ -303,6 +303,11 @@ arro3-core==0.8.0 \
 # via
 #   -c /tmp/ray-deps/requirements_compiled_py3.13.txt
 #   deltalake
+asciitree==0.3.3 \
+    --hash=sha256:4aa4b9b649f85e3fcb343363d97564aa1fb62e249677f2e18a96765145cc0f6e
+# via
+#   -c /tmp/ray-deps/requirements_compiled_py3.13.txt
+#   zarr
 asn1crypto==1.5.1 \
     --hash=sha256:13ae38502be632115abf8a24cbe5f4da52e3b5231990aff31123c805306ccb9c \
     --hash=sha256:db4e40728b728508912cbb3d44f19ce188f218e9eba635821bb4b68564f8fd67
@@ -1188,6 +1193,7 @@ deprecated==1.3.1 \
 # via
 #   -c /tmp/ray-deps/requirements_compiled_py3.13.txt
 #   deltalake
+#   numcodecs
 # via
 #   -c /tmp/ray-deps/requirements_compiled_py3.13.txt
 #   datasets
@@ -1318,6 +1324,12 @@ fastavro==1.12.1 \
 # via
 #   -c /tmp/ray-deps/requirements_compiled_py3.13.txt
 #   -r python/requirements/ml/py313/data-test-requirements.txt
+fasteners==0.20; sys_platform != "emscripten" \
+    --hash=sha256:55dce8792a41b56f727ba6e123fcaee77fd87e638a6863cec00007bfea84c8d8 \
+    --hash=sha256:9422c40d1e350e4259f509fb2e608d6bc43c0136f79a00db1b49046029d0b3b7
+# via
+#   -c /tmp/ray-deps/requirements_compiled_py3.13.txt
+#   zarr
 fastrlock==0.8.3; sys_platform != "darwin" \
     --hash=sha256:001fd86bcac78c79658bac496e8a17472d64d558cd2227fdc768aa77f877fe40 \
     --hash=sha256:04bb5eef8f460d13b8c0084ea5a9d3aab2c0573991c880c0a34a56bb14951d30 \
@@ -2686,6 +2698,23 @@ networkx==3.6.1 \
 # via
 #   -c /tmp/ray-deps/requirements_compiled_py3.13.txt
 #   torch
+numcodecs==0.15.1 \
+    --hash=sha256:1d471a1829ce52d3f365053a2bd1379e32e369517557c4027ddf5ac0d99c591e \
+    --hash=sha256:1dfdea4a67108205edfce99c1cb6cd621343bc7abb7e16a041c966776920e7de \
+    --hash=sha256:698f1d59511488b8fe215fadc1e679a4c70d894de2cca6d8bf2ab770eed34dfd \
+    --hash=sha256:a34f0fe5e5f3b837bbedbeb98794a6d4a12eeeef8d4697b523905837900b5e1c \
+    --hash=sha256:a4f7bdb26f1b34423cb56d48e75821223be38040907c9b5954eeb7463e7eb03c \
+    --hash=sha256:b0a9d9cd29a0088220682dda4a9898321f7813ff7802be2bbb545f6e3d2f10ff \
+    --hash=sha256:bef8c8e64fab76677324a07672b10c31861775d03fc63ed5012ca384144e4bb9 \
+    --hash=sha256:c3a09e22140f2c691f7df26303ff8fa2dadcf26d7d0828398c0bc09b69e5efa3 \
+    --hash=sha256:cdfaef9f5f2ed8f65858db801f1953f1007c9613ee490a1c56233cd78b505ed5 \
+    --hash=sha256:daed6066ffcf40082da847d318b5ab6123d69ceb433ba603cb87c323a541a8bc \
+    --hash=sha256:e2547fa3a7ffc9399cfd2936aecb620a3db285f2630c86c8a678e477741a4b3c \
+    --hash=sha256:e3d82b70500cf61e8d115faa0d0a76be6ecdc24a16477ee3279d711699ad85f3 \
+    --hash=sha256:eeed77e4d6636641a2cc605fbc6078c7a8f2cc40f3dfa2b3f61e52e6091b04ff
+# via
+#   -c /tmp/ray-deps/requirements_compiled_py3.13.txt
+#   zarr
 # via
 #   -c /tmp/ray-deps/requirements_compiled_py3.13.txt
 #   bokeh
@@ -2704,6 +2733,7 @@ networkx==3.6.1 \
 #   keras
 #   ml-dtypes
 #   modin
+#   numcodecs
 #   pandas
 #   pylance
 #   ray
@@ -2721,6 +2751,7 @@ networkx==3.6.1 \
 #   torchtext
 #   torchvision
 #   webdataset
+#   zarr
 nvidia-nccl-cu12==2.27.5; platform_machine != "aarch64" and sys_platform == "linux" \
     --hash=sha256:ad730cf15cb5d25fe849c6e6ca9eb5b76db16a80f13f425ac68d8e2e55624457
 # via
@@ -5344,6 +5375,12 @@ yarl==1.23.0 \
 #   -c /tmp/ray-deps/requirements_compiled_py3.13.txt
 #   aiohttp
 #   delta-sharing
+zarr==2.18.7 \
+    --hash=sha256:ac3dc4033e9ae4e9d7b5e27c97ea3eaf1003cc0a07f010bd83d5134bf8c4b223 \
+    --hash=sha256:b2b8f66f14dac4af66b180d2338819981b981f70e196c9a66e6bfaa9e59572f5
+# via
+#   -c /tmp/ray-deps/requirements_compiled_py3.13.txt
+#   -r python/requirements/ml/py313/data-test-requirements.txt
 zict==3.0.0 \
     --hash=sha256:5796e36bd0e0cc8cf0fbc1ace6a68912611c1dbd74750a3f3026b9b9d6a327ae \
     --hash=sha256:e321e263b6a97aafc0790c3cfb3c04656b7066e6738c37fffcca95d803c9fba5
diff --git a/python/ray/data/BUILD.bazel b/python/ray/data/BUILD.bazel
index 4b532e5a68b5..6b8c5d7dddef 100644
--- a/python/ray/data/BUILD.bazel
+++ b/python/ray/data/BUILD.bazel
@@ -1822,6 +1822,20 @@ py_test(
     ],
 )
 
+py_test(
+    name = "test_zarrv2",
+    size = "medium",
+    srcs = ["tests/datasource/test_zarrv2.py"],
+    tags = [
+        "exclusive",
+        "team:data",
+    ],
+    deps = [
+        ":conftest",
+        "//:ray_lib",
+    ],
+)
+
 py_test(
     name = "test_zip",
     size = "small",
diff --git a/python/ray/data/_internal/datasource/zarrv2_datasource.py b/python/ray/data/_internal/datasource/zarrv2_datasource.py
index 0c67590d4ac7..5e0580c37936 100644
--- a/python/ray/data/_internal/datasource/zarrv2_datasource.py
+++ b/python/ray/data/_internal/datasource/zarrv2_datasource.py
@@ -45,18 +45,52 @@
 
 REQUIRED_ZARRAY_KEYS = ("shape", "chunks", "dtype")
 
-# Zarr-specific transient-error patterns appended to the user's
-# ``DataContext.retried_io_errors`` when reading chunks. The defaults in
-# ``DataContext`` cover AWS-flavored object-store errors; these cover the
-# kind of network-layer messages that bubble up through fsspec/numcodecs
-# when reading chunked array data over HTTPS/S3/GCS.
+# Conservative, *grounded* allow-list of retry triggers for chunk reads. These
+# are matched (substring first, then regex) by ``call_with_retry`` against the
+# ``"module.ClassName: message"`` string that
+# ``ray._common.retry.format_exception`` produces, and are merged on top of the
+# user's ``DataContext.retried_io_errors`` (which already covers PyArrow's
+# ``AWS Error ...`` strings). Modeled on ``DEFAULT_ICEBERG_CATALOG_RETRIED_ERRORS``
+# in ``ray.data.context``: we match transient transport *exception types* and
+# transient HTTP/S3 status codes / reason phrases
+#
+# This is an allow-list, so it doubles as the fail-safe: anything not listed is
+# NOT retried
+#
+# NOTE(Artur):
+#   1. Prefer matching exception *types* (``isinstance``) over strings once
+#      ``call_with_retry`` supports it (see the TODO in
+#      ``python/ray/_common/retry.py``); type matching is immune to message and
+#      library-version drift.
+#   2. The authoritative retry budget belongs in the storage layer -- botocore
+#      adaptive retries, ``pyarrow.fs.S3FileSystem(retry_strategy=...)``, gcsfs --
+#      configured via the ``filesystem`` argument. This list should remain a thin
+#      outer net, not the primary retry mechanism.
 _ZARR_TRANSIENT_ERROR_PATTERNS = (
-    "Connection reset",
-    "Read timeout",
-    "Connection refused",
-    "network",
-    "socket",
-    "HTTP error",
+    # Transient transport / network exception types (matched against the
+    # "ClassName:" prefix; a bare class name matches as a substring).
+    "ConnectionError",
+    "ConnectionResetError",
+    "ConnectionRefusedError",
+    "ConnectionAbortedError",
+    "TimeoutError",
+    "EndpointConnectionError",
+    "ServerDisconnectedError",
+    "ClientConnectorError",
+    "ClientOSError",
+    "IncompleteRead",
+    # Transient HTTP / S3 throttling and server-side responses (object stores
+    # put these in the message text). Status codes use a regex word boundary so
+    # we match 429/5xx but not, for example, 403/404.
+    r"\b(?:429|500|502|503|504)\b",
+    "Too Many Requests",
+    "Service Unavailable",
+    "Internal Server Error",
+    "SlowDown",
+    "ServiceUnavailable",
+    "InternalError",
+    "RequestTimeout",
+    "ThrottlingException",
 )
 
 
diff --git a/python/ray/data/tests/datasource/test_zarrv2.py b/python/ray/data/tests/datasource/test_zarrv2.py
index 9e25a36ad778..0dd67336c5ee 100644
--- a/python/ray/data/tests/datasource/test_zarrv2.py
+++ b/python/ray/data/tests/datasource/test_zarrv2.py
@@ -9,13 +9,16 @@
 import pandas as pd
 import pyarrow.fs
 import pytest
-import zarr
 from pytest_lazy_fixtures import lf as lazy_fixture
 
 import ray
 from ray.data._internal.datasource import zarrv2_datasource
 from ray.data.tests.conftest import *  # noqa: F401, F403
 
+# zarr v2 requires Python 3.11+ (2.18.4+ dropped py3.10), so it isn't installed
+# on py3.10; skip the whole module there instead of hard-failing on import.
+zarr = pytest.importorskip("zarr")
+
 
 def _execute_read_tasks(tasks) -> pd.DataFrame:
     frames = [block for task in tasks for block in task()]
@@ -1005,6 +1008,51 @@ def test_custom_codec_succeeds_with_worker_setup_hook(tmp_path):
         ray.shutdown()
 
 
+@pytest.mark.parametrize(
+    "error_str, retryable",
+    [
+        # Transient transport / network errors -> retry.
+        ("ConnectionResetError: [Errno 104] Connection reset by peer", True),
+        ("TimeoutError: The read operation timed out", True),
+        ("botocore.exceptions.ReadTimeoutError: Read timeout on endpoint URL", True),
+        ("botocore.exceptions.EndpointConnectionError: Could not connect", True),
+        (
+            "aiohttp.client_exceptions.ServerDisconnectedError: Server disconnected",
+            True,
+        ),
+        # Throttling / 5xx surfaced in the message text -> retry.
+        (
+            "botocore.exceptions.ClientError: An error occurred (SlowDown) when "
+            "calling the GetObject operation",
+            True,
+        ),
+        ("OSError: Server returned HTTP status 503 Service Unavailable", True),
+        # Non-transient -> must NOT retry (the allow-list is the fail-safe).
+        ("FileNotFoundError: Array metadata '.zarray' not found", False),
+        (
+            "botocore.exceptions.ClientError: An error occurred (403) when calling "
+            "the GetObject operation: Access Denied",
+            False,
+        ),
+        # A numcodecs decode failure is data corruption, not a transient error.
+        ("ValueError: blosc: invalid compressed buffer", False),
+        ("KeyError: 'chunk 0.0 is missing'", False),
+    ],
+)
+def test_zarr_transient_error_classification(error_str, retryable):
+    """The retry allow-list matches genuine transport/throttling errors, and
+    crucially does NOT match decode-corruption or non-429 4xx errors. Note that
+    a generic ``ClientError`` is retried only for the right code/reason
+    (``SlowDown`` -> retry, ``403`` -> no retry)."""
+    from ray._common.retry import matches_error
+
+    matched = any(
+        matches_error(pattern, error_str)
+        for pattern in zarrv2_datasource._ZARR_TRANSIENT_ERROR_PATTERNS
+    )
+    assert matched is retryable
+
+
 if __name__ == "__main__":
     import sys
 
diff --git a/python/requirements/ml/py313/data-test-requirements.txt b/python/requirements/ml/py313/data-test-requirements.txt
index 445ccba43aa1..0729466ed4e6 100644
--- a/python/requirements/ml/py313/data-test-requirements.txt
+++ b/python/requirements/ml/py313/data-test-requirements.txt
@@ -36,3 +36,4 @@ tensorflow-metadata>=1.17.0
 tf-keras
 torchvision==0.24.0
 confluent-kafka
+zarr<3 ; python_version >= '3.11'  # zarr 2.18.4+ requires py3.11+ (v2 API)
diff --git a/python/requirements_compiled_py3.13.txt b/python/requirements_compiled_py3.13.txt
index 5bc8b363171f..fd78069c15fc 100644
--- a/python/requirements_compiled_py3.13.txt
+++ b/python/requirements_compiled_py3.13.txt
@@ -128,6 +128,8 @@ arro3-core==0.8.0
     # via deltalake
 arrow==1.4.0
     # via isoduration
+asciitree==0.3.3
+    # via zarr
 asgiref==3.9.2
     # via
     #   -r python/requirements/py313/test-requirements.txt
@@ -470,6 +472,7 @@ deltalake==1.5.0
 deprecated==1.3.1
     # via
     #   deltalake
+    #   numcodecs
     #   pymoo
 dill==0.3.8
     # via
@@ -552,6 +555,7 @@ fasteners==0.20
     # via
     #   google-apitools
     #   gsutil
+    #   zarr
 fastjsonschema==2.21.2
     # via nbformat
 fastrlock==0.8.3 ; sys_platform != "darwin"
@@ -1266,6 +1270,8 @@ numba==0.61.2
     # via
     #   -r python/requirements/py313/test-requirements.txt
     #   statsforecast
+numcodecs==0.15.1
+    # via zarr
 numexpr==2.14.1
     # via
     #   -r python/requirements/ml/py313/rllib-test-requirements.txt
@@ -1321,6 +1327,7 @@ numpy==2.2.6
     #   mujoco
     #   nevergrad
     #   numba
+    #   numcodecs
     #   numexpr
     #   onnx
     #   onnxruntime
@@ -1361,6 +1368,7 @@ numpy==2.2.6
     #   utilsforecast
     #   webdataset
     #   xgboost
+    #   zarr
     #   zoopt
 nvidia-nccl-cu12==2.27.5 ; platform_system == "Linux" and platform_machine != "aarch64"
     # via
@@ -2722,6 +2730,8 @@ yq==3.2.2
     # via
     #   -r python/requirements/lint-requirements.txt
     #   -r python/requirements/py313/test-requirements.txt
+zarr==2.18.7 ; python_version >= "3.11"
+    # via -r python/requirements/ml/py313/data-test-requirements.txt
 zict==3.0.0
     # via distributed
 zipp==3.23.1

From 16e5d504c4f37d9ca4c0bdeafbc3dc00685339f9 Mon Sep 17 00:00:00 2001
From: Artur Niederfahrenhorst <artur@anyscale.com>
Date: Thu, 4 Jun 2026 16:13:02 +0200
Subject: [PATCH 07/45] [data] read_zarr: fail fast on zarr-python 3.x with a
 clear error

The datasource uses zarr-python 2.x APIs (zarr.util.normalize_storage_path, .zarray metadata, zarr.open(fs.get_mapper(...))) that were removed/reworked in zarr 3.x, but _check_import only verified zarr was importable. A zarr>=3 install therefore failed mid-read with a cryptic ImportError. Add a version check at construction with an actionable message + a test.

Signed-off-by: Artur Niederfahrenhorst <artur@anyscale.com>
---
 .../data/_internal/datasource/zarrv2_datasource.py | 14 ++++++++++++++
 python/ray/data/tests/datasource/test_zarrv2.py    |  8 ++++++++
 2 files changed, 22 insertions(+)

diff --git a/python/ray/data/_internal/datasource/zarrv2_datasource.py b/python/ray/data/_internal/datasource/zarrv2_datasource.py
index 5e0580c37936..2f38662a1b95 100644
--- a/python/ray/data/_internal/datasource/zarrv2_datasource.py
+++ b/python/ray/data/_internal/datasource/zarrv2_datasource.py
@@ -507,6 +507,20 @@ def __init__(
         super().__init__()
         _check_import(self, module="zarr", package="zarr")
 
+        # This datasource targets Zarr v2 stores via zarr-python 2.x APIs
+        # (``zarr.util.normalize_storage_path``, ``.zarray`` metadata,
+        # ``zarr.open(fs.get_mapper(...))``) that were removed/reworked in
+        # zarr-python 3.x. Fail fast with an actionable message rather than a
+        # cryptic ImportError mid-read if an incompatible version is installed.
+        import zarr
+
+        if int(zarr.__version__.split(".")[0]) >= 3:
+            raise ImportError(
+                f"read_zarr supports zarr-python 2.x (Zarr v2 stores), but found "
+                f"zarr=={zarr.__version__}. Install a compatible version with "
+                f"`pip install 'zarr<3'`."
+            )
+
         self.allow_full_metadata_scan = allow_full_metadata_scan
         self.paths = [str(path)]
 
diff --git a/python/ray/data/tests/datasource/test_zarrv2.py b/python/ray/data/tests/datasource/test_zarrv2.py
index 0dd67336c5ee..22939f8e23f1 100644
--- a/python/ray/data/tests/datasource/test_zarrv2.py
+++ b/python/ray/data/tests/datasource/test_zarrv2.py
@@ -1053,6 +1053,14 @@ def test_zarr_transient_error_classification(error_str, retryable):
     assert matched is retryable
 
 
+def test_rejects_zarr_v3(tmp_path, monkeypatch):
+    """read_zarr targets zarr-python 2.x; an incompatible v3 install must raise a
+    clear, actionable error at construction, not a cryptic ImportError mid-read."""
+    monkeypatch.setattr(zarr, "__version__", "3.0.1")
+    with pytest.raises(ImportError, match=r"zarr-python 2\.x"):
+        zarrv2_datasource.ZarrV2Datasource(str(tmp_path))
+
+
 if __name__ == "__main__":
     import sys
 

From bd6ecfd64992f569ce78d99dc3d31c103f49eb83 Mon Sep 17 00:00:00 2001
From: Artur Niederfahrenhorst <artur@anyscale.com>
Date: Thu, 4 Jun 2026 16:35:41 +0200
Subject: [PATCH 08/45] [data] read_zarr: delegate transient-I/O retries to the
 filesystem

Drop the bespoke _ZARR_TRANSIENT_ERROR_PATTERNS allow-list and the Ray-level call_with_retry in _read_chunk. Like the Iceberg read datasource (which hands read I/O to its FileIO), transient-error retries are now owned by the storage backend behind the fsspec/pyarrow filesystem -- s3fs/botocore and pyarrow.fs.S3FileSystem retry by default and are tunable via the filesystem passed to read_zarr. Removes the now-obsolete retry tests/helpers and documents the guidance on the filesystem arg.

Signed-off-by: Artur Niederfahrenhorst <artur@anyscale.com>
---
 .../_internal/datasource/zarrv2_datasource.py |  89 ++------------
 python/ray/data/read_api.py                   |   6 +-
 .../ray/data/tests/datasource/test_zarrv2.py  | 115 ------------------
 3 files changed, 16 insertions(+), 194 deletions(-)

diff --git a/python/ray/data/_internal/datasource/zarrv2_datasource.py b/python/ray/data/_internal/datasource/zarrv2_datasource.py
index 2f38662a1b95..089131af8427 100644
--- a/python/ray/data/_internal/datasource/zarrv2_datasource.py
+++ b/python/ray/data/_internal/datasource/zarrv2_datasource.py
@@ -18,7 +18,7 @@
 import json
 import logging
 import math
-from collections.abc import Callable, Iterable, Sequence
+from collections.abc import Callable, Iterable
 from dataclasses import dataclass
 from itertools import product
 from typing import TYPE_CHECKING, Any, List, Optional
@@ -27,10 +27,8 @@
 import pandas as pd
 from fsspec.spec import AbstractFileSystem
 
-from ray._common.retry import call_with_retry
 from ray.data._internal.util import _check_import
 from ray.data.block import BlockMetadata
-from ray.data.context import DataContext
 from ray.data.datasource.datasource import Datasource, ReadTask
 
 logger = logging.getLogger(__name__)
@@ -40,59 +38,13 @@
     from zarr import Array as ZarrArray
     from zarr.hierarchy import Group as ZarrGroup
 
+    from ray.data.context import DataContext
+
     ZarrRoot = ZarrGroup | ZarrArray
 
 
 REQUIRED_ZARRAY_KEYS = ("shape", "chunks", "dtype")
 
-# Conservative, *grounded* allow-list of retry triggers for chunk reads. These
-# are matched (substring first, then regex) by ``call_with_retry`` against the
-# ``"module.ClassName: message"`` string that
-# ``ray._common.retry.format_exception`` produces, and are merged on top of the
-# user's ``DataContext.retried_io_errors`` (which already covers PyArrow's
-# ``AWS Error ...`` strings). Modeled on ``DEFAULT_ICEBERG_CATALOG_RETRIED_ERRORS``
-# in ``ray.data.context``: we match transient transport *exception types* and
-# transient HTTP/S3 status codes / reason phrases
-#
-# This is an allow-list, so it doubles as the fail-safe: anything not listed is
-# NOT retried
-#
-# NOTE(Artur):
-#   1. Prefer matching exception *types* (``isinstance``) over strings once
-#      ``call_with_retry`` supports it (see the TODO in
-#      ``python/ray/_common/retry.py``); type matching is immune to message and
-#      library-version drift.
-#   2. The authoritative retry budget belongs in the storage layer -- botocore
-#      adaptive retries, ``pyarrow.fs.S3FileSystem(retry_strategy=...)``, gcsfs --
-#      configured via the ``filesystem`` argument. This list should remain a thin
-#      outer net, not the primary retry mechanism.
-_ZARR_TRANSIENT_ERROR_PATTERNS = (
-    # Transient transport / network exception types (matched against the
-    # "ClassName:" prefix; a bare class name matches as a substring).
-    "ConnectionError",
-    "ConnectionResetError",
-    "ConnectionRefusedError",
-    "ConnectionAbortedError",
-    "TimeoutError",
-    "EndpointConnectionError",
-    "ServerDisconnectedError",
-    "ClientConnectorError",
-    "ClientOSError",
-    "IncompleteRead",
-    # Transient HTTP / S3 throttling and server-side responses (object stores
-    # put these in the message text). Status codes use a regex word boundary so
-    # we match 429/5xx but not, for example, 403/404.
-    r"\b(?:429|500|502|503|504)\b",
-    "Too Many Requests",
-    "Service Unavailable",
-    "Internal Server Error",
-    "SlowDown",
-    "ServiceUnavailable",
-    "InternalError",
-    "RequestTimeout",
-    "ThrottlingException",
-)
-
 
 @dataclass(frozen=True)
 class ZarrArrayMeta:
@@ -291,41 +243,22 @@ def _read_chunk(
     root: ZarrRoot,
     array_name: str,
     chunk_slices: tuple[tuple[int, int], ...],
-    *,
-    match: Optional[Sequence[str]] = None,
-    max_attempts: int = 10,
-    max_backoff_s: int = 32,
 ) -> np.ndarray:
-    """Read ``array[chunk_slices]`` from a Zarr root with transient-error retry.
+    """Read ``array[chunk_slices]`` from a Zarr root.
 
     ``chunk_slices`` is an N-tuple of ``(start, stop)`` pairs, one per axis.
     For a 0-D (scalar) array it is the empty tuple ``()``, which reads the
     single element.
 
-    Retries are delegated to :func:`ray._common.retry.call_with_retry`,
-    matching the pattern used by other Ray Data datasources (lance,
-    iceberg). ``match`` defaults to ``DataContext.retried_io_errors``
-    (covers the AWS-flavored object-store transient errors) plus a small
-    set of zarr-specific network patterns. Pass an explicit ``match``
-    sequence to override.
+    Transient I/O errors (throttling, 5xx, connection resets, timeouts) are
+    retried by the underlying filesystem/storage backend, which owns the retry
+    policy: ``s3fs``/botocore and ``pyarrow.fs.S3FileSystem`` retry by default
+    and are tunable on the ``filesystem`` passed to ``read_zarr`` (e.g. botocore
+    ``retries`` config or pyarrow ``retry_strategy``).
     """
     indexer = tuple(slice(s, e) for s, e in chunk_slices)
-
-    def _read() -> np.ndarray:
-        arr = root if array_name == "" else root[array_name]
-        return arr[indexer]
-
-    if match is None:
-        match = list(DataContext.get_current().retried_io_errors) + list(
-            _ZARR_TRANSIENT_ERROR_PATTERNS
-        )
-    return call_with_retry(
-        _read,
-        description=f"read zarr chunk array={array_name!r} slices={chunk_slices}",
-        match=match,
-        max_attempts=max_attempts,
-        max_backoff_s=max_backoff_s,
-    )
+    arr = root if array_name == "" else root[array_name]
+    return arr[indexer]
 
 
 @dataclass(frozen=True)
diff --git a/python/ray/data/read_api.py b/python/ray/data/read_api.py
index 58bb3f183e33..4b17f960dce0 100644
--- a/python/ray/data/read_api.py
+++ b/python/ray/data/read_api.py
@@ -1083,7 +1083,11 @@ def read_zarr(
             any storage backend configuration that shouldn't be inferred
             internally. Recommended for non-local Zarr stores; for local
             paths it's usually fine to omit. If omitted, the datasource
-            infers the filesystem from ``path``.
+            infers the filesystem from ``path``. Transient-error retries
+            (throttling, 5xx, timeouts) are handled by this filesystem, so
+            configure retry behavior here -- e.g. the botocore ``retries``
+            config on an ``s3fs.S3FileSystem`` or ``retry_strategy`` on a
+            ``pyarrow.fs.S3FileSystem``.
         chunk_shapes: Optional override(s) for chunk geometry along the
             leading axes. Accepts either:
 
diff --git a/python/ray/data/tests/datasource/test_zarrv2.py b/python/ray/data/tests/datasource/test_zarrv2.py
index 22939f8e23f1..3e3d7cd7e46d 100644
--- a/python/ray/data/tests/datasource/test_zarrv2.py
+++ b/python/ray/data/tests/datasource/test_zarrv2.py
@@ -773,76 +773,6 @@ def test_heterogeneous_store_emits_one_row_per_chunk(heterogeneous_zarrv2_store)
     }
 
 
-# ---------------------------------------------------------------------------
-# _read_chunk retry behavior
-# ---------------------------------------------------------------------------
-
-
-class _ScriptedArray:
-    """Stand-in for a zarr Array: each ``[indexer]`` returns the next scripted item.
-
-    Items can be either an exception (raised) or an ndarray (returned). Used
-    to drive ``_read_chunk`` through specific retry scenarios without
-    touching the network.
-    """
-
-    def __init__(self, *responses) -> None:
-        self._responses = list(responses)
-
-    def __getitem__(self, key):
-        item = self._responses.pop(0)
-        if isinstance(item, BaseException):
-            raise item
-        return item
-
-
-class _ScriptedRoot:
-    """Stand-in for a zarr Group: name → :class:`_ScriptedArray`."""
-
-    def __init__(self, **arrays) -> None:
-        self._arrays = arrays
-
-    def __getitem__(self, name):
-        return self._arrays[name]
-
-
-def test_read_chunk_retries_then_succeeds():
-    """Retryable network errors retried with backoff, eventual read succeeds.
-
-    Uses default ``match`` patterns (``DataContext.retried_io_errors`` plus
-    zarr-specific entries like ``"Connection reset"`` and ``"Read timeout"``).
-    """
-    expected = np.array([1, 2, 3], dtype="<i4")
-    arr = _ScriptedArray(
-        ConnectionError("Connection reset by peer"),
-        TimeoutError("Read timeout"),
-        expected,
-    )
-    root = _ScriptedRoot(x=arr)
-
-    out = zarrv2_datasource._read_chunk(
-        root, "x", ((0, 3),), max_attempts=5, max_backoff_s=0
-    )
-    np.testing.assert_array_equal(out, expected)
-
-
-def test_read_chunk_exhausts_retries():
-    arr = _ScriptedArray(
-        ConnectionError("Connection reset"),
-        ConnectionError("Connection reset"),
-        ConnectionError("Connection reset"),
-    )
-    root = _ScriptedRoot(x=arr)
-
-    # call_with_retry re-raises the last exception itself (with ``from None``)
-    # rather than wrapping in a RuntimeError. Match against the original
-    # exception type to pin that behaviour.
-    with pytest.raises(ConnectionError, match="Connection reset"):
-        zarrv2_datasource._read_chunk(
-            root, "x", ((0, 3),), max_attempts=3, max_backoff_s=0
-        )
-
-
 # ---------------------------------------------------------------------------
 # Estimator
 # ---------------------------------------------------------------------------
@@ -1008,51 +938,6 @@ def test_custom_codec_succeeds_with_worker_setup_hook(tmp_path):
         ray.shutdown()
 
 
-@pytest.mark.parametrize(
-    "error_str, retryable",
-    [
-        # Transient transport / network errors -> retry.
-        ("ConnectionResetError: [Errno 104] Connection reset by peer", True),
-        ("TimeoutError: The read operation timed out", True),
-        ("botocore.exceptions.ReadTimeoutError: Read timeout on endpoint URL", True),
-        ("botocore.exceptions.EndpointConnectionError: Could not connect", True),
-        (
-            "aiohttp.client_exceptions.ServerDisconnectedError: Server disconnected",
-            True,
-        ),
-        # Throttling / 5xx surfaced in the message text -> retry.
-        (
-            "botocore.exceptions.ClientError: An error occurred (SlowDown) when "
-            "calling the GetObject operation",
-            True,
-        ),
-        ("OSError: Server returned HTTP status 503 Service Unavailable", True),
-        # Non-transient -> must NOT retry (the allow-list is the fail-safe).
-        ("FileNotFoundError: Array metadata '.zarray' not found", False),
-        (
-            "botocore.exceptions.ClientError: An error occurred (403) when calling "
-            "the GetObject operation: Access Denied",
-            False,
-        ),
-        # A numcodecs decode failure is data corruption, not a transient error.
-        ("ValueError: blosc: invalid compressed buffer", False),
-        ("KeyError: 'chunk 0.0 is missing'", False),
-    ],
-)
-def test_zarr_transient_error_classification(error_str, retryable):
-    """The retry allow-list matches genuine transport/throttling errors, and
-    crucially does NOT match decode-corruption or non-429 4xx errors. Note that
-    a generic ``ClientError`` is retried only for the right code/reason
-    (``SlowDown`` -> retry, ``403`` -> no retry)."""
-    from ray._common.retry import matches_error
-
-    matched = any(
-        matches_error(pattern, error_str)
-        for pattern in zarrv2_datasource._ZARR_TRANSIENT_ERROR_PATTERNS
-    )
-    assert matched is retryable
-
-
 def test_rejects_zarr_v3(tmp_path, monkeypatch):
     """read_zarr targets zarr-python 2.x; an incompatible v3 install must raise a
     clear, actionable error at construction, not a cryptic ImportError mid-read."""

From 67de5fc5113f6494eb4f1ebc57dfa854c8f53a6a Mon Sep 17 00:00:00 2001
From: Artur Niederfahrenhorst <artur@anyscale.com>
Date: Thu, 4 Jun 2026 17:02:38 +0200
Subject: [PATCH 09/45] [data] read_zarr: address review findings (explicit-FS
 scheme, .zarray binary, parallelism=0)

Three cursor-bot review findings on the datasource:

- Explicit filesystem= now strips the URI scheme from the store path
  (split_protocol), matching the filesystem=None branch. pyarrow filesystems
  can't resolve a gs:///s3:// prefix, which broke the documented 'pass an
  explicit filesystem for cloud' path.
- .zarray metadata is opened binary ('rb') like .zmetadata, avoiding text-mode
  fragility on some fsspec backends (json.load handles bytes).
- get_read_tasks guards parallelism<=0 with max(1, ...) instead of dividing by
  zero.

Adds regression tests for the scheme-strip and parallelism=0 cases.

Signed-off-by: Artur Niederfahrenhorst <artur@anyscale.com>
---
 .../_internal/datasource/zarrv2_datasource.py | 16 +++++++++----
 .../ray/data/tests/datasource/test_zarrv2.py  | 24 +++++++++++++++++++
 2 files changed, 35 insertions(+), 5 deletions(-)

diff --git a/python/ray/data/_internal/datasource/zarrv2_datasource.py b/python/ray/data/_internal/datasource/zarrv2_datasource.py
index 089131af8427..0cfc04227568 100644
--- a/python/ray/data/_internal/datasource/zarrv2_datasource.py
+++ b/python/ray/data/_internal/datasource/zarrv2_datasource.py
@@ -25,6 +25,7 @@
 
 import numpy as np
 import pandas as pd
+from fsspec.core import split_protocol
 from fsspec.spec import AbstractFileSystem
 
 from ray.data._internal.util import _check_import
@@ -193,7 +194,7 @@ def _load_metadata_from_array_paths(
             else f"{store_root}/.zarray"
         )
         try:
-            with fs.open(zarray_path, "r") as f:
+            with fs.open(zarray_path, "rb") as f:
                 raw_meta = json.load(f)
         except FileNotFoundError as e:
             raise ValueError(
@@ -226,7 +227,7 @@ def _load_metadata_full_scan(fs, store_path: str) -> dict[str, ZarrArrayMeta]:
             array_path = normalize_storage_path(dirpath.removeprefix(store_prefix))
         zarray_path = f"{dirpath}/.zarray"
         try:
-            with fs.open(zarray_path, "r") as f:
+            with fs.open(zarray_path, "rb") as f:
                 raw = json.load(f)
         except FileNotFoundError:
             continue
@@ -492,7 +493,12 @@ def __init__(
                     f"fsspec.spec.AbstractFileSystem, got "
                     f"{type(filesystem).__name__}"
                 )
-            self._store_path = self.paths[0].rstrip("/")
+            # Strip any URI scheme (e.g. ``gs://`` / ``s3://``) so the path is
+            # backend-relative; pyarrow filesystems (wrapped in
+            # ``ArrowFSWrapper``) require this. Mirrors the ``filesystem is None``
+            # branch, which strips the scheme via ``_resolve_paths_and_filesystem``.
+            _, store_path = split_protocol(self.paths[0])
+            self._store_path = store_path.rstrip("/")
 
         if chunk_shapes is not None and not isinstance(
             chunk_shapes, (tuple, list, dict)
@@ -661,7 +667,7 @@ def _get_long_form_read_tasks(
             ]
             if not descriptors:
                 continue
-            n_tasks = min(parallelism, len(descriptors))
+            n_tasks = max(1, min(parallelism, len(descriptors)))
             batch_size = math.ceil(len(descriptors) / n_tasks)
             for start in range(0, len(descriptors), batch_size):
                 batch = descriptors[start : start + batch_size]
@@ -713,7 +719,7 @@ def _get_aligned_read_tasks(
         if not descriptors:
             return []
 
-        n_tasks = min(parallelism, len(descriptors))
+        n_tasks = max(1, min(parallelism, len(descriptors)))
         batch_size = math.ceil(len(descriptors) / n_tasks)
 
         read_tasks: List[ReadTask] = []
diff --git a/python/ray/data/tests/datasource/test_zarrv2.py b/python/ray/data/tests/datasource/test_zarrv2.py
index 3e3d7cd7e46d..9a40df331aa8 100644
--- a/python/ray/data/tests/datasource/test_zarrv2.py
+++ b/python/ray/data/tests/datasource/test_zarrv2.py
@@ -946,6 +946,30 @@ def test_rejects_zarr_v3(tmp_path, monkeypatch):
         zarrv2_datasource.ZarrV2Datasource(str(tmp_path))
 
 
+def test_explicit_filesystem_strips_uri_scheme(tmp_path):
+    """An explicit ``filesystem=`` plus a scheme-prefixed path must strip the
+    scheme so the store path is backend-relative. Regression: pyarrow
+    filesystems can't resolve a ``file://`` / ``gs://`` prefix in the path."""
+    store_path = tmp_path / "scheme.zarr"
+    _write_real_zarr_store(store_path, {"data": (np.arange(6, dtype="<i4"), (2,))})
+
+    ds = zarrv2_datasource.ZarrV2Datasource(
+        f"file://{store_path}", filesystem=pyarrow.fs.LocalFileSystem()
+    )
+    assert ds._store_path == str(store_path)
+    df = _execute_read_tasks(ds.get_read_tasks(parallelism=2))
+    assert len(df) == 3
+
+
+def test_get_read_tasks_parallelism_zero(tmp_path):
+    """parallelism=0 must not divide by zero; fall back to a single task."""
+    store_path = tmp_path / "p0.zarr"
+    _write_real_zarr_store(store_path, {"data": (np.arange(10, dtype="<i4"), (2,))})
+    ds = zarrv2_datasource.ZarrV2Datasource(str(store_path))
+    tasks = ds.get_read_tasks(parallelism=0)
+    assert len(tasks) >= 1
+
+
 if __name__ == "__main__":
     import sys
 

From 09c6be0a5e45863653551ef0ab2ab09c7d5a525a Mon Sep 17 00:00:00 2001
From: Artur Niederfahrenhorst <artur@anyscale.com>
Date: Thu, 4 Jun 2026 18:50:55 +0200
Subject: [PATCH 10/45] remove one instance of 'import'

Signed-off-by: Artur Niederfahrenhorst <artur@anyscale.com>
---
 python/ray/data/_internal/datasource/zarrv2_datasource.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/python/ray/data/_internal/datasource/zarrv2_datasource.py b/python/ray/data/_internal/datasource/zarrv2_datasource.py
index 0cfc04227568..9d180913ab66 100644
--- a/python/ray/data/_internal/datasource/zarrv2_datasource.py
+++ b/python/ray/data/_internal/datasource/zarrv2_datasource.py
@@ -601,11 +601,6 @@ def __init__(
                     f"arrays to the same axis-0 prefix) to re-tile them."
                 )
 
-        # Lazy zarr import: ``zarr`` is a hard dep of this datasource (gated
-        # by ``_check_import`` above) but ``import ray.data`` shouldn't drag
-        # it in for users who never call ``read_zarr``.
-        import zarr
-
         self.root = zarr.open(self._fs.get_mapper(self._store_path), mode="r")
 
     def estimate_inmemory_data_size(self) -> Optional[int]:

From a06e9b313c69527e03b768564229f5a35efd9f4b Mon Sep 17 00:00:00 2001
From: Artur Niederfahrenhorst <artur@anyscale.com>
Date: Fri, 5 Jun 2026 00:28:39 +0200
Subject: [PATCH 11/45] [data] read_zarr: support zarr on py3.10 (pin zarr
 2.18.3 / numcodecs 0.13.1 for py<3.11)

zarr 2.18.4+ and numcodecs 0.14+ dropped py3.10, so the py3.10 data CI
variants (arrow v17/v23/nightly/mongo) couldn't install zarr and
test_zarrv2 collected zero tests -- which Ray's bazel wrapper treats as a
failure. Pin zarr<2.18.4 and numcodecs 0.13.1 for py<3.11 (the versions
the py3.10 base image already resolves), and marker-gate the numcodecs
constraint in requirements_compiled_py3.13.txt to py3.11+ so the py3.10
data locks can resolve zarr. read_zarr now works and is tested on py3.10
(zarr 2.18.3) and py3.11+ (zarr 2.18.7).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Signed-off-by: Artur Niederfahrenhorst <artur@anyscale.com>
---
 .../ci/data-base-ci_depset_py3.10.lock        | 38 +++++++++++++++++++
 .../ci/data-base-ci_depset_py3.11.lock        |  1 +
 .../ci/data-base-ci_depset_py3.12.lock        |  1 +
 .../ci/data-mongo-ci_depset_py3.10.lock       | 38 +++++++++++++++++++
 .../data-pyarrow-latest-ci_depset_py3.10.lock | 38 +++++++++++++++++++
 ...data-pyarrow-nightly-ci_depset_py3.10.lock | 38 +++++++++++++++++++
 .../ci/data-pyarrow-v17-ci_depset_py3.10.lock | 38 +++++++++++++++++++
 .../ci/relaxed_data-ci_depset_py3.10.lock     | 38 +++++++++++++++++++
 .../ci/relaxed_data-ci_depset_py3.12.lock     |  1 +
 .../ml/py313/data-test-requirements.txt       |  6 +++
 python/requirements_compiled_py3.13.txt       |  6 ++-
 11 files changed, 241 insertions(+), 2 deletions(-)

diff --git a/python/deplocks/ci/data-base-ci_depset_py3.10.lock b/python/deplocks/ci/data-base-ci_depset_py3.10.lock
index b60d94ce5019..5c1e645190d8 100644
--- a/python/deplocks/ci/data-base-ci_depset_py3.10.lock
+++ b/python/deplocks/ci/data-base-ci_depset_py3.10.lock
@@ -311,6 +311,11 @@ arro3-core==0.8.0 \
     # via
     #   -c /tmp/ray-deps/requirements_compiled_py3.13.txt
     #   deltalake
+asciitree==0.3.3 ; python_full_version < '3.11' \
+    --hash=sha256:4aa4b9b649f85e3fcb343363d97564aa1fb62e249677f2e18a96765145cc0f6e
+    # via
+    #   -c /tmp/ray-deps/requirements_compiled_py3.13.txt
+    #   zarr
 asn1crypto==1.5.1 \
     --hash=sha256:13ae38502be632115abf8a24cbe5f4da52e3b5231990aff31123c805306ccb9c \
     --hash=sha256:db4e40728b728508912cbb3d44f19ce188f218e9eba635821bb4b68564f8fd67
@@ -1334,6 +1339,12 @@ fastavro==1.12.1 \
     # via
     #   -c /tmp/ray-deps/requirements_compiled_py3.13.txt
     #   -r python/requirements/ml/py313/data-test-requirements.txt
+fasteners==0.20 ; python_full_version < '3.11' and sys_platform != 'emscripten' \
+    --hash=sha256:55dce8792a41b56f727ba6e123fcaee77fd87e638a6863cec00007bfea84c8d8 \
+    --hash=sha256:9422c40d1e350e4259f509fb2e608d6bc43c0136f79a00db1b49046029d0b3b7
+    # via
+    #   -c /tmp/ray-deps/requirements_compiled_py3.13.txt
+    #   zarr
 fastrlock==0.8.3 ; sys_platform != 'darwin' \
     --hash=sha256:001fd86bcac78c79658bac496e8a17472d64d558cd2227fdc768aa77f877fe40 \
     --hash=sha256:04bb5eef8f460d13b8c0084ea5a9d3aab2c0573991c880c0a34a56bb14951d30 \
@@ -2721,6 +2732,27 @@ networkx==3.2.1 \
     --hash=sha256:9f1bb5cf3409bf324e0a722c20bdb4c20ee39bf1c30ce8ae499c8502b0b5e0c6 \
     --hash=sha256:f18c69adc97877c42332c170849c96cefa91881c99a7cb3e95b7c659ebdc1ec2
     # via torch
+numcodecs==0.13.1 ; python_full_version < '3.11' \
+    --hash=sha256:233bc7f26abce24d57e44ea8ebeb5cd17084690b4e7409dd470fdb75528d615f \
+    --hash=sha256:237b7171609e868a20fd313748494444458ccd696062f67e198f7f8f52000c15 \
+    --hash=sha256:2a86f5367af9168e30f99727ff03b27d849c31ad4522060dde0bce2923b3a8bc \
+    --hash=sha256:2eda97dd2f90add98df6d295f2c6ae846043396e3d51a739ca5db6c03b5eb666 \
+    --hash=sha256:3501a848adaddce98a71a262fee15cd3618312692aa419da77acd18af4a6a3f6 \
+    --hash=sha256:3f593c7506b0ab248961a3b13cb148cc6e8355662ff124ac591822310bc55ecf \
+    --hash=sha256:5195bea384a6428f8afcece793860b1ab0ae28143c853f0b2b20d55a8947c917 \
+    --hash=sha256:796b3e6740107e4fa624cc636248a1580138b3f1c579160f260f76ff13a4261b \
+    --hash=sha256:7a60d75179fd6692e301ddfb3b266d51eb598606dcae7b9fc57f986e8d65cb43 \
+    --hash=sha256:80d3071465f03522e776a31045ddf2cfee7f52df468b977ed3afdd7fe5869701 \
+    --hash=sha256:90d3065ae74c9342048ae0046006f99dcb1388b7288da5a19b3bddf9c30c3176 \
+    --hash=sha256:96add4f783c5ce57cc7e650b6cac79dd101daf887c479a00a29bc1487ced180b \
+    --hash=sha256:96e42f73c31b8c24259c5fac6adba0c3ebf95536e37749dc6c62ade2989dca28 \
+    --hash=sha256:a3cf37881df0898f3a9c0d4477df88133fe85185bffe57ba31bcc2fa207709bc \
+    --hash=sha256:da2230484e6102e5fa3cc1a5dd37ca1f92dfbd183d91662074d6f7574e3e8f53 \
+    --hash=sha256:e5db4824ebd5389ea30e54bc8aeccb82d514d28b6b68da6c536b8fa4596f4bca \
+    --hash=sha256:eda7d7823c9282e65234731fd6bd3986b1f9e035755f7fed248d7d366bb291ab
+    # via
+    #   -r python/requirements/ml/py313/data-test-requirements.txt
+    #   zarr
 numexpr==2.14.1 ; python_full_version < '3.12' \
     --hash=sha256:03130afa04edf83a7b590d207444f05a00363c9b9ea5d81c0f53b1ea13fad55a \
     --hash=sha256:05f9366d23a2e991fd5a8b5e61a17558f028ba86158a4552f8f239b005cdf83c \
@@ -2856,6 +2888,7 @@ numpy==2.2.6 \
     #   keras
     #   ml-dtypes
     #   modin
+    #   numcodecs
     #   numexpr
     #   pandas
     #   pylance
@@ -2875,6 +2908,7 @@ numpy==2.2.6 \
     #   torchtext
     #   torchvision
     #   webdataset
+    #   zarr
 nvidia-nccl-cu12==2.27.5 ; platform_machine != 'aarch64' and sys_platform == 'linux' \
     --hash=sha256:ad730cf15cb5d25fe849c6e6ca9eb5b76db16a80f13f425ac68d8e2e55624457
     # via
@@ -5659,6 +5693,10 @@ yarl==1.23.0 \
     #   -c /tmp/ray-deps/requirements_compiled_py3.13.txt
     #   aiohttp
     #   delta-sharing
+zarr==2.18.3 ; python_full_version < '3.11' \
+    --hash=sha256:2580d8cb6dd84621771a10d31c4d777dca8a27706a1a89b29f42d2d37e2df5ce \
+    --hash=sha256:b1f7dfd2496f436745cdd4c7bcf8d3b4bc1dceef5fdd0d589c87130d842496dd
+    # via -r python/requirements/ml/py313/data-test-requirements.txt
 zict==3.0.0 \
     --hash=sha256:5796e36bd0e0cc8cf0fbc1ace6a68912611c1dbd74750a3f3026b9b9d6a327ae \
     --hash=sha256:e321e263b6a97aafc0790c3cfb3c04656b7066e6738c37fffcca95d803c9fba5
diff --git a/python/deplocks/ci/data-base-ci_depset_py3.11.lock b/python/deplocks/ci/data-base-ci_depset_py3.11.lock
index 5141a673082f..ba0ba6a0c804 100644
--- a/python/deplocks/ci/data-base-ci_depset_py3.11.lock
+++ b/python/deplocks/ci/data-base-ci_depset_py3.11.lock
@@ -2758,6 +2758,7 @@ numcodecs==0.15.1 \
     --hash=sha256:eeed77e4d6636641a2cc605fbc6078c7a8f2cc40f3dfa2b3f61e52e6091b04ff
     # via
     #   -c /tmp/ray-deps/requirements_compiled_py3.13.txt
+    #   -r python/requirements/ml/py313/data-test-requirements.txt
     #   zarr
 numexpr==2.14.1 ; python_full_version < '3.12' \
     --hash=sha256:03130afa04edf83a7b590d207444f05a00363c9b9ea5d81c0f53b1ea13fad55a \
diff --git a/python/deplocks/ci/data-base-ci_depset_py3.12.lock b/python/deplocks/ci/data-base-ci_depset_py3.12.lock
index ae0cc65bd389..1531317181d2 100644
--- a/python/deplocks/ci/data-base-ci_depset_py3.12.lock
+++ b/python/deplocks/ci/data-base-ci_depset_py3.12.lock
@@ -2747,6 +2747,7 @@ numcodecs==0.15.1 \
     --hash=sha256:eeed77e4d6636641a2cc605fbc6078c7a8f2cc40f3dfa2b3f61e52e6091b04ff
     # via
     #   -c /tmp/ray-deps/requirements_compiled_py3.13.txt
+    #   -r python/requirements/ml/py313/data-test-requirements.txt
     #   zarr
 numpy==2.2.6 \
     --hash=sha256:038613e9fb8c72b0a41f025a7e4c3f0b7a1b5d768ece4796b674c8f3fe13efff \
diff --git a/python/deplocks/ci/data-mongo-ci_depset_py3.10.lock b/python/deplocks/ci/data-mongo-ci_depset_py3.10.lock
index 08b72a50062d..eb65e8caa49a 100644
--- a/python/deplocks/ci/data-mongo-ci_depset_py3.10.lock
+++ b/python/deplocks/ci/data-mongo-ci_depset_py3.10.lock
@@ -303,6 +303,11 @@ arro3-core==0.8.0 \
     # via
     #   -r python/deplocks/ci/relaxed_data-ci_depset_py3.10.lock
     #   deltalake
+asciitree==0.3.3 ; python_full_version < '3.11' \
+    --hash=sha256:4aa4b9b649f85e3fcb343363d97564aa1fb62e249677f2e18a96765145cc0f6e
+    # via
+    #   -r python/deplocks/ci/relaxed_data-ci_depset_py3.10.lock
+    #   zarr
 asn1crypto==1.5.1 \
     --hash=sha256:13ae38502be632115abf8a24cbe5f4da52e3b5231990aff31123c805306ccb9c \
     --hash=sha256:db4e40728b728508912cbb3d44f19ce188f218e9eba635821bb4b68564f8fd67
@@ -1280,6 +1285,12 @@ fastavro==1.12.1 \
     --hash=sha256:eaa7ab3769beadcebb60f0539054c7755f63bd9cf7666e2c15e615ab605f89a8 \
     --hash=sha256:ed924233272719b5d5a6a0b4d80ef3345fc7e84fc7a382b6232192a9112d38a6
     # via -r python/deplocks/ci/relaxed_data-ci_depset_py3.10.lock
+fasteners==0.20 ; python_full_version < '3.11' and sys_platform != 'emscripten' \
+    --hash=sha256:55dce8792a41b56f727ba6e123fcaee77fd87e638a6863cec00007bfea84c8d8 \
+    --hash=sha256:9422c40d1e350e4259f509fb2e608d6bc43c0136f79a00db1b49046029d0b3b7
+    # via
+    #   -r python/deplocks/ci/relaxed_data-ci_depset_py3.10.lock
+    #   zarr
 fastrlock==0.8.3 ; sys_platform != 'darwin' \
     --hash=sha256:001fd86bcac78c79658bac496e8a17472d64d558cd2227fdc768aa77f877fe40 \
     --hash=sha256:04bb5eef8f460d13b8c0084ea5a9d3aab2c0573991c880c0a34a56bb14951d30 \
@@ -2603,6 +2614,27 @@ networkx==3.2.1 \
     # via
     #   -r python/deplocks/ci/relaxed_data-ci_depset_py3.10.lock
     #   torch
+numcodecs==0.13.1 ; python_full_version < '3.11' \
+    --hash=sha256:233bc7f26abce24d57e44ea8ebeb5cd17084690b4e7409dd470fdb75528d615f \
+    --hash=sha256:237b7171609e868a20fd313748494444458ccd696062f67e198f7f8f52000c15 \
+    --hash=sha256:2a86f5367af9168e30f99727ff03b27d849c31ad4522060dde0bce2923b3a8bc \
+    --hash=sha256:2eda97dd2f90add98df6d295f2c6ae846043396e3d51a739ca5db6c03b5eb666 \
+    --hash=sha256:3501a848adaddce98a71a262fee15cd3618312692aa419da77acd18af4a6a3f6 \
+    --hash=sha256:3f593c7506b0ab248961a3b13cb148cc6e8355662ff124ac591822310bc55ecf \
+    --hash=sha256:5195bea384a6428f8afcece793860b1ab0ae28143c853f0b2b20d55a8947c917 \
+    --hash=sha256:796b3e6740107e4fa624cc636248a1580138b3f1c579160f260f76ff13a4261b \
+    --hash=sha256:7a60d75179fd6692e301ddfb3b266d51eb598606dcae7b9fc57f986e8d65cb43 \
+    --hash=sha256:80d3071465f03522e776a31045ddf2cfee7f52df468b977ed3afdd7fe5869701 \
+    --hash=sha256:90d3065ae74c9342048ae0046006f99dcb1388b7288da5a19b3bddf9c30c3176 \
+    --hash=sha256:96add4f783c5ce57cc7e650b6cac79dd101daf887c479a00a29bc1487ced180b \
+    --hash=sha256:96e42f73c31b8c24259c5fac6adba0c3ebf95536e37749dc6c62ade2989dca28 \
+    --hash=sha256:a3cf37881df0898f3a9c0d4477df88133fe85185bffe57ba31bcc2fa207709bc \
+    --hash=sha256:da2230484e6102e5fa3cc1a5dd37ca1f92dfbd183d91662074d6f7574e3e8f53 \
+    --hash=sha256:e5db4824ebd5389ea30e54bc8aeccb82d514d28b6b68da6c536b8fa4596f4bca \
+    --hash=sha256:eda7d7823c9282e65234731fd6bd3986b1f9e035755f7fed248d7d366bb291ab
+    # via
+    #   -r python/deplocks/ci/relaxed_data-ci_depset_py3.10.lock
+    #   zarr
 numexpr==2.14.1 ; python_full_version < '3.12' \
     --hash=sha256:03130afa04edf83a7b590d207444f05a00363c9b9ea5d81c0f53b1ea13fad55a \
     --hash=sha256:05f9366d23a2e991fd5a8b5e61a17558f028ba86158a4552f8f239b005cdf83c \
@@ -2734,6 +2766,7 @@ numpy==2.2.6 \
     #   jaxlib
     #   keras
     #   ml-dtypes
+    #   numcodecs
     #   numexpr
     #   pandas
     #   pymars
@@ -2751,6 +2784,7 @@ numpy==2.2.6 \
     #   torchtext
     #   torchvision
     #   webdataset
+    #   zarr
 nvidia-nccl-cu12==2.27.5 ; platform_machine != 'aarch64' and sys_platform == 'linux' \
     --hash=sha256:ad730cf15cb5d25fe849c6e6ca9eb5b76db16a80f13f425ac68d8e2e55624457
     # via -r python/deplocks/ci/relaxed_data-ci_depset_py3.10.lock
@@ -5503,6 +5537,10 @@ yarl==1.23.0 \
     # via
     #   -r python/deplocks/ci/relaxed_data-ci_depset_py3.10.lock
     #   aiohttp
+zarr==2.18.3 ; python_full_version < '3.11' \
+    --hash=sha256:2580d8cb6dd84621771a10d31c4d777dca8a27706a1a89b29f42d2d37e2df5ce \
+    --hash=sha256:b1f7dfd2496f436745cdd4c7bcf8d3b4bc1dceef5fdd0d589c87130d842496dd
+    # via -r python/deplocks/ci/relaxed_data-ci_depset_py3.10.lock
 zict==3.0.0 \
     --hash=sha256:5796e36bd0e0cc8cf0fbc1ace6a68912611c1dbd74750a3f3026b9b9d6a327ae \
     --hash=sha256:e321e263b6a97aafc0790c3cfb3c04656b7066e6738c37fffcca95d803c9fba5
diff --git a/python/deplocks/ci/data-pyarrow-latest-ci_depset_py3.10.lock b/python/deplocks/ci/data-pyarrow-latest-ci_depset_py3.10.lock
index 38d4c5e71be7..1b92b26fec04 100644
--- a/python/deplocks/ci/data-pyarrow-latest-ci_depset_py3.10.lock
+++ b/python/deplocks/ci/data-pyarrow-latest-ci_depset_py3.10.lock
@@ -304,6 +304,11 @@ arro3-core==0.8.0 \
     # via
     #   -r python/deplocks/ci/relaxed_data-ci_depset_py3.10.lock
     #   deltalake
+asciitree==0.3.3 ; python_full_version < '3.11' \
+    --hash=sha256:4aa4b9b649f85e3fcb343363d97564aa1fb62e249677f2e18a96765145cc0f6e
+    # via
+    #   -r python/deplocks/ci/relaxed_data-ci_depset_py3.10.lock
+    #   zarr
 asn1crypto==1.5.1 \
     --hash=sha256:13ae38502be632115abf8a24cbe5f4da52e3b5231990aff31123c805306ccb9c \
     --hash=sha256:db4e40728b728508912cbb3d44f19ce188f218e9eba635821bb4b68564f8fd67
@@ -1283,6 +1288,12 @@ fastavro==1.12.1 \
     --hash=sha256:eaa7ab3769beadcebb60f0539054c7755f63bd9cf7666e2c15e615ab605f89a8 \
     --hash=sha256:ed924233272719b5d5a6a0b4d80ef3345fc7e84fc7a382b6232192a9112d38a6
     # via -r python/deplocks/ci/relaxed_data-ci_depset_py3.10.lock
+fasteners==0.20 ; python_full_version < '3.11' and sys_platform != 'emscripten' \
+    --hash=sha256:55dce8792a41b56f727ba6e123fcaee77fd87e638a6863cec00007bfea84c8d8 \
+    --hash=sha256:9422c40d1e350e4259f509fb2e608d6bc43c0136f79a00db1b49046029d0b3b7
+    # via
+    #   -r python/deplocks/ci/relaxed_data-ci_depset_py3.10.lock
+    #   zarr
 fastrlock==0.8.3 ; sys_platform != 'darwin' \
     --hash=sha256:001fd86bcac78c79658bac496e8a17472d64d558cd2227fdc768aa77f877fe40 \
     --hash=sha256:04bb5eef8f460d13b8c0084ea5a9d3aab2c0573991c880c0a34a56bb14951d30 \
@@ -2624,6 +2635,27 @@ networkx==3.2.1 \
     # via
     #   -r python/deplocks/ci/relaxed_data-ci_depset_py3.10.lock
     #   torch
+numcodecs==0.13.1 ; python_full_version < '3.11' \
+    --hash=sha256:233bc7f26abce24d57e44ea8ebeb5cd17084690b4e7409dd470fdb75528d615f \
+    --hash=sha256:237b7171609e868a20fd313748494444458ccd696062f67e198f7f8f52000c15 \
+    --hash=sha256:2a86f5367af9168e30f99727ff03b27d849c31ad4522060dde0bce2923b3a8bc \
+    --hash=sha256:2eda97dd2f90add98df6d295f2c6ae846043396e3d51a739ca5db6c03b5eb666 \
+    --hash=sha256:3501a848adaddce98a71a262fee15cd3618312692aa419da77acd18af4a6a3f6 \
+    --hash=sha256:3f593c7506b0ab248961a3b13cb148cc6e8355662ff124ac591822310bc55ecf \
+    --hash=sha256:5195bea384a6428f8afcece793860b1ab0ae28143c853f0b2b20d55a8947c917 \
+    --hash=sha256:796b3e6740107e4fa624cc636248a1580138b3f1c579160f260f76ff13a4261b \
+    --hash=sha256:7a60d75179fd6692e301ddfb3b266d51eb598606dcae7b9fc57f986e8d65cb43 \
+    --hash=sha256:80d3071465f03522e776a31045ddf2cfee7f52df468b977ed3afdd7fe5869701 \
+    --hash=sha256:90d3065ae74c9342048ae0046006f99dcb1388b7288da5a19b3bddf9c30c3176 \
+    --hash=sha256:96add4f783c5ce57cc7e650b6cac79dd101daf887c479a00a29bc1487ced180b \
+    --hash=sha256:96e42f73c31b8c24259c5fac6adba0c3ebf95536e37749dc6c62ade2989dca28 \
+    --hash=sha256:a3cf37881df0898f3a9c0d4477df88133fe85185bffe57ba31bcc2fa207709bc \
+    --hash=sha256:da2230484e6102e5fa3cc1a5dd37ca1f92dfbd183d91662074d6f7574e3e8f53 \
+    --hash=sha256:e5db4824ebd5389ea30e54bc8aeccb82d514d28b6b68da6c536b8fa4596f4bca \
+    --hash=sha256:eda7d7823c9282e65234731fd6bd3986b1f9e035755f7fed248d7d366bb291ab
+    # via
+    #   -r python/deplocks/ci/relaxed_data-ci_depset_py3.10.lock
+    #   zarr
 numexpr==2.14.1 ; python_full_version < '3.12' \
     --hash=sha256:03130afa04edf83a7b590d207444f05a00363c9b9ea5d81c0f53b1ea13fad55a \
     --hash=sha256:05f9366d23a2e991fd5a8b5e61a17558f028ba86158a4552f8f239b005cdf83c \
@@ -2756,6 +2788,7 @@ numpy==2.2.6 \
     #   keras
     #   ml-dtypes
     #   modin
+    #   numcodecs
     #   numexpr
     #   pandas
     #   pylance
@@ -2774,6 +2807,7 @@ numpy==2.2.6 \
     #   torchtext
     #   torchvision
     #   webdataset
+    #   zarr
 nvidia-nccl-cu12==2.27.5 ; platform_machine != 'aarch64' and sys_platform == 'linux' \
     --hash=sha256:ad730cf15cb5d25fe849c6e6ca9eb5b76db16a80f13f425ac68d8e2e55624457
     # via -r python/deplocks/ci/relaxed_data-ci_depset_py3.10.lock
@@ -5450,6 +5484,10 @@ yarl==1.23.0 \
     #   -r python/deplocks/ci/relaxed_data-ci_depset_py3.10.lock
     #   aiohttp
     #   delta-sharing
+zarr==2.18.3 ; python_full_version < '3.11' \
+    --hash=sha256:2580d8cb6dd84621771a10d31c4d777dca8a27706a1a89b29f42d2d37e2df5ce \
+    --hash=sha256:b1f7dfd2496f436745cdd4c7bcf8d3b4bc1dceef5fdd0d589c87130d842496dd
+    # via -r python/deplocks/ci/relaxed_data-ci_depset_py3.10.lock
 zict==3.0.0 \
     --hash=sha256:5796e36bd0e0cc8cf0fbc1ace6a68912611c1dbd74750a3f3026b9b9d6a327ae \
     --hash=sha256:e321e263b6a97aafc0790c3cfb3c04656b7066e6738c37fffcca95d803c9fba5
diff --git a/python/deplocks/ci/data-pyarrow-nightly-ci_depset_py3.10.lock b/python/deplocks/ci/data-pyarrow-nightly-ci_depset_py3.10.lock
index 7bb4ef1c13ab..beaab51d573f 100644
--- a/python/deplocks/ci/data-pyarrow-nightly-ci_depset_py3.10.lock
+++ b/python/deplocks/ci/data-pyarrow-nightly-ci_depset_py3.10.lock
@@ -303,6 +303,11 @@ arro3-core==0.8.0 \
     # via
     #   -r python/deplocks/ci/relaxed_data-ci_depset_py3.10.lock
     #   deltalake
+asciitree==0.3.3 ; python_full_version < '3.11' \
+    --hash=sha256:4aa4b9b649f85e3fcb343363d97564aa1fb62e249677f2e18a96765145cc0f6e
+    # via
+    #   -r python/deplocks/ci/relaxed_data-ci_depset_py3.10.lock
+    #   zarr
 asn1crypto==1.5.1 \
     --hash=sha256:13ae38502be632115abf8a24cbe5f4da52e3b5231990aff31123c805306ccb9c \
     --hash=sha256:db4e40728b728508912cbb3d44f19ce188f218e9eba635821bb4b68564f8fd67
@@ -1276,6 +1281,12 @@ fastavro==1.12.1 \
     --hash=sha256:eaa7ab3769beadcebb60f0539054c7755f63bd9cf7666e2c15e615ab605f89a8 \
     --hash=sha256:ed924233272719b5d5a6a0b4d80ef3345fc7e84fc7a382b6232192a9112d38a6
     # via -r python/deplocks/ci/relaxed_data-ci_depset_py3.10.lock
+fasteners==0.20 ; python_full_version < '3.11' and sys_platform != 'emscripten' \
+    --hash=sha256:55dce8792a41b56f727ba6e123fcaee77fd87e638a6863cec00007bfea84c8d8 \
+    --hash=sha256:9422c40d1e350e4259f509fb2e608d6bc43c0136f79a00db1b49046029d0b3b7
+    # via
+    #   -r python/deplocks/ci/relaxed_data-ci_depset_py3.10.lock
+    #   zarr
 fastrlock==0.8.3 ; sys_platform != 'darwin' \
     --hash=sha256:001fd86bcac78c79658bac496e8a17472d64d558cd2227fdc768aa77f877fe40 \
     --hash=sha256:04bb5eef8f460d13b8c0084ea5a9d3aab2c0573991c880c0a34a56bb14951d30 \
@@ -2614,6 +2625,27 @@ networkx==3.2.1 \
     # via
     #   -r python/deplocks/ci/relaxed_data-ci_depset_py3.10.lock
     #   torch
+numcodecs==0.13.1 ; python_full_version < '3.11' \
+    --hash=sha256:233bc7f26abce24d57e44ea8ebeb5cd17084690b4e7409dd470fdb75528d615f \
+    --hash=sha256:237b7171609e868a20fd313748494444458ccd696062f67e198f7f8f52000c15 \
+    --hash=sha256:2a86f5367af9168e30f99727ff03b27d849c31ad4522060dde0bce2923b3a8bc \
+    --hash=sha256:2eda97dd2f90add98df6d295f2c6ae846043396e3d51a739ca5db6c03b5eb666 \
+    --hash=sha256:3501a848adaddce98a71a262fee15cd3618312692aa419da77acd18af4a6a3f6 \
+    --hash=sha256:3f593c7506b0ab248961a3b13cb148cc6e8355662ff124ac591822310bc55ecf \
+    --hash=sha256:5195bea384a6428f8afcece793860b1ab0ae28143c853f0b2b20d55a8947c917 \
+    --hash=sha256:796b3e6740107e4fa624cc636248a1580138b3f1c579160f260f76ff13a4261b \
+    --hash=sha256:7a60d75179fd6692e301ddfb3b266d51eb598606dcae7b9fc57f986e8d65cb43 \
+    --hash=sha256:80d3071465f03522e776a31045ddf2cfee7f52df468b977ed3afdd7fe5869701 \
+    --hash=sha256:90d3065ae74c9342048ae0046006f99dcb1388b7288da5a19b3bddf9c30c3176 \
+    --hash=sha256:96add4f783c5ce57cc7e650b6cac79dd101daf887c479a00a29bc1487ced180b \
+    --hash=sha256:96e42f73c31b8c24259c5fac6adba0c3ebf95536e37749dc6c62ade2989dca28 \
+    --hash=sha256:a3cf37881df0898f3a9c0d4477df88133fe85185bffe57ba31bcc2fa207709bc \
+    --hash=sha256:da2230484e6102e5fa3cc1a5dd37ca1f92dfbd183d91662074d6f7574e3e8f53 \
+    --hash=sha256:e5db4824ebd5389ea30e54bc8aeccb82d514d28b6b68da6c536b8fa4596f4bca \
+    --hash=sha256:eda7d7823c9282e65234731fd6bd3986b1f9e035755f7fed248d7d366bb291ab
+    # via
+    #   -r python/deplocks/ci/relaxed_data-ci_depset_py3.10.lock
+    #   zarr
 numexpr==2.14.1 ; python_full_version < '3.12' \
     --hash=sha256:03130afa04edf83a7b590d207444f05a00363c9b9ea5d81c0f53b1ea13fad55a \
     --hash=sha256:05f9366d23a2e991fd5a8b5e61a17558f028ba86158a4552f8f239b005cdf83c \
@@ -2746,6 +2778,7 @@ numpy==2.2.6 \
     #   keras
     #   ml-dtypes
     #   modin
+    #   numcodecs
     #   numexpr
     #   pandas
     #   pylance
@@ -2764,6 +2797,7 @@ numpy==2.2.6 \
     #   torchtext
     #   torchvision
     #   webdataset
+    #   zarr
 nvidia-nccl-cu12==2.27.5 ; platform_machine != 'aarch64' and sys_platform == 'linux' \
     --hash=sha256:ad730cf15cb5d25fe849c6e6ca9eb5b76db16a80f13f425ac68d8e2e55624457
     # via -r python/deplocks/ci/relaxed_data-ci_depset_py3.10.lock
@@ -5435,6 +5469,10 @@ yarl==1.23.0 \
     # via
     #   -r python/deplocks/ci/relaxed_data-ci_depset_py3.10.lock
     #   aiohttp
+zarr==2.18.3 ; python_full_version < '3.11' \
+    --hash=sha256:2580d8cb6dd84621771a10d31c4d777dca8a27706a1a89b29f42d2d37e2df5ce \
+    --hash=sha256:b1f7dfd2496f436745cdd4c7bcf8d3b4bc1dceef5fdd0d589c87130d842496dd
+    # via -r python/deplocks/ci/relaxed_data-ci_depset_py3.10.lock
 zict==3.0.0 \
     --hash=sha256:5796e36bd0e0cc8cf0fbc1ace6a68912611c1dbd74750a3f3026b9b9d6a327ae \
     --hash=sha256:e321e263b6a97aafc0790c3cfb3c04656b7066e6738c37fffcca95d803c9fba5
diff --git a/python/deplocks/ci/data-pyarrow-v17-ci_depset_py3.10.lock b/python/deplocks/ci/data-pyarrow-v17-ci_depset_py3.10.lock
index 03381ea5e291..fe3557b3bf46 100644
--- a/python/deplocks/ci/data-pyarrow-v17-ci_depset_py3.10.lock
+++ b/python/deplocks/ci/data-pyarrow-v17-ci_depset_py3.10.lock
@@ -305,6 +305,11 @@ arro3-core==0.8.0 \
     # via
     #   -r python/deplocks/ci/relaxed_data-ci_depset_py3.10.lock
     #   deltalake
+asciitree==0.3.3 ; python_full_version < '3.11' \
+    --hash=sha256:4aa4b9b649f85e3fcb343363d97564aa1fb62e249677f2e18a96765145cc0f6e
+    # via
+    #   -r python/deplocks/ci/relaxed_data-ci_depset_py3.10.lock
+    #   zarr
 asn1crypto==1.5.1 \
     --hash=sha256:13ae38502be632115abf8a24cbe5f4da52e3b5231990aff31123c805306ccb9c \
     --hash=sha256:db4e40728b728508912cbb3d44f19ce188f218e9eba635821bb4b68564f8fd67
@@ -1294,6 +1299,12 @@ fastavro==1.12.1 \
     --hash=sha256:eaa7ab3769beadcebb60f0539054c7755f63bd9cf7666e2c15e615ab605f89a8 \
     --hash=sha256:ed924233272719b5d5a6a0b4d80ef3345fc7e84fc7a382b6232192a9112d38a6
     # via -r python/deplocks/ci/relaxed_data-ci_depset_py3.10.lock
+fasteners==0.20 ; python_full_version < '3.11' and sys_platform != 'emscripten' \
+    --hash=sha256:55dce8792a41b56f727ba6e123fcaee77fd87e638a6863cec00007bfea84c8d8 \
+    --hash=sha256:9422c40d1e350e4259f509fb2e608d6bc43c0136f79a00db1b49046029d0b3b7
+    # via
+    #   -r python/deplocks/ci/relaxed_data-ci_depset_py3.10.lock
+    #   zarr
 fastrlock==0.8.3 ; sys_platform != 'darwin' \
     --hash=sha256:001fd86bcac78c79658bac496e8a17472d64d558cd2227fdc768aa77f877fe40 \
     --hash=sha256:04bb5eef8f460d13b8c0084ea5a9d3aab2c0573991c880c0a34a56bb14951d30 \
@@ -2657,6 +2668,27 @@ networkx==3.2.1 \
     # via
     #   -r python/deplocks/ci/relaxed_data-ci_depset_py3.10.lock
     #   torch
+numcodecs==0.13.1 ; python_full_version < '3.11' \
+    --hash=sha256:233bc7f26abce24d57e44ea8ebeb5cd17084690b4e7409dd470fdb75528d615f \
+    --hash=sha256:237b7171609e868a20fd313748494444458ccd696062f67e198f7f8f52000c15 \
+    --hash=sha256:2a86f5367af9168e30f99727ff03b27d849c31ad4522060dde0bce2923b3a8bc \
+    --hash=sha256:2eda97dd2f90add98df6d295f2c6ae846043396e3d51a739ca5db6c03b5eb666 \
+    --hash=sha256:3501a848adaddce98a71a262fee15cd3618312692aa419da77acd18af4a6a3f6 \
+    --hash=sha256:3f593c7506b0ab248961a3b13cb148cc6e8355662ff124ac591822310bc55ecf \
+    --hash=sha256:5195bea384a6428f8afcece793860b1ab0ae28143c853f0b2b20d55a8947c917 \
+    --hash=sha256:796b3e6740107e4fa624cc636248a1580138b3f1c579160f260f76ff13a4261b \
+    --hash=sha256:7a60d75179fd6692e301ddfb3b266d51eb598606dcae7b9fc57f986e8d65cb43 \
+    --hash=sha256:80d3071465f03522e776a31045ddf2cfee7f52df468b977ed3afdd7fe5869701 \
+    --hash=sha256:90d3065ae74c9342048ae0046006f99dcb1388b7288da5a19b3bddf9c30c3176 \
+    --hash=sha256:96add4f783c5ce57cc7e650b6cac79dd101daf887c479a00a29bc1487ced180b \
+    --hash=sha256:96e42f73c31b8c24259c5fac6adba0c3ebf95536e37749dc6c62ade2989dca28 \
+    --hash=sha256:a3cf37881df0898f3a9c0d4477df88133fe85185bffe57ba31bcc2fa207709bc \
+    --hash=sha256:da2230484e6102e5fa3cc1a5dd37ca1f92dfbd183d91662074d6f7574e3e8f53 \
+    --hash=sha256:e5db4824ebd5389ea30e54bc8aeccb82d514d28b6b68da6c536b8fa4596f4bca \
+    --hash=sha256:eda7d7823c9282e65234731fd6bd3986b1f9e035755f7fed248d7d366bb291ab
+    # via
+    #   -r python/deplocks/ci/relaxed_data-ci_depset_py3.10.lock
+    #   zarr
 numexpr==2.14.1 ; python_full_version < '3.12' \
     --hash=sha256:03130afa04edf83a7b590d207444f05a00363c9b9ea5d81c0f53b1ea13fad55a \
     --hash=sha256:05f9366d23a2e991fd5a8b5e61a17558f028ba86158a4552f8f239b005cdf83c \
@@ -2791,6 +2823,7 @@ numpy==2.2.6 \
     #   keras
     #   ml-dtypes
     #   modin
+    #   numcodecs
     #   numexpr
     #   pandas
     #   pyarrow
@@ -2810,6 +2843,7 @@ numpy==2.2.6 \
     #   torchtext
     #   torchvision
     #   webdataset
+    #   zarr
 nvidia-nccl-cu12==2.27.5 ; platform_machine != 'aarch64' and sys_platform == 'linux' \
     --hash=sha256:ad730cf15cb5d25fe849c6e6ca9eb5b76db16a80f13f425ac68d8e2e55624457
     # via -r python/deplocks/ci/relaxed_data-ci_depset_py3.10.lock
@@ -5467,6 +5501,10 @@ yarl==1.23.0 \
     #   -r python/deplocks/ci/relaxed_data-ci_depset_py3.10.lock
     #   aiohttp
     #   delta-sharing
+zarr==2.18.3 ; python_full_version < '3.11' \
+    --hash=sha256:2580d8cb6dd84621771a10d31c4d777dca8a27706a1a89b29f42d2d37e2df5ce \
+    --hash=sha256:b1f7dfd2496f436745cdd4c7bcf8d3b4bc1dceef5fdd0d589c87130d842496dd
+    # via -r python/deplocks/ci/relaxed_data-ci_depset_py3.10.lock
 zict==3.0.0 \
     --hash=sha256:5796e36bd0e0cc8cf0fbc1ace6a68912611c1dbd74750a3f3026b9b9d6a327ae \
     --hash=sha256:e321e263b6a97aafc0790c3cfb3c04656b7066e6738c37fffcca95d803c9fba5
diff --git a/python/deplocks/ci/relaxed_data-ci_depset_py3.10.lock b/python/deplocks/ci/relaxed_data-ci_depset_py3.10.lock
index a211f1f4f3ae..ccf2b0e37f7c 100644
--- a/python/deplocks/ci/relaxed_data-ci_depset_py3.10.lock
+++ b/python/deplocks/ci/relaxed_data-ci_depset_py3.10.lock
@@ -310,6 +310,11 @@ arro3-core==0.8.0 \
 # via
 #   -c /tmp/ray-deps/requirements_compiled_py3.13.txt
 #   deltalake
+asciitree==0.3.3; python_full_version < "3.11" \
+    --hash=sha256:4aa4b9b649f85e3fcb343363d97564aa1fb62e249677f2e18a96765145cc0f6e
+# via
+#   -c /tmp/ray-deps/requirements_compiled_py3.13.txt
+#   zarr
 asn1crypto==1.5.1 \
     --hash=sha256:13ae38502be632115abf8a24cbe5f4da52e3b5231990aff31123c805306ccb9c \
     --hash=sha256:db4e40728b728508912cbb3d44f19ce188f218e9eba635821bb4b68564f8fd67
@@ -1324,6 +1329,12 @@ fastavro==1.12.1 \
 # via
 #   -c /tmp/ray-deps/requirements_compiled_py3.13.txt
 #   -r python/requirements/ml/py313/data-test-requirements.txt
+fasteners==0.20; python_full_version < "3.11" and sys_platform != "emscripten" \
+    --hash=sha256:55dce8792a41b56f727ba6e123fcaee77fd87e638a6863cec00007bfea84c8d8 \
+    --hash=sha256:9422c40d1e350e4259f509fb2e608d6bc43c0136f79a00db1b49046029d0b3b7
+# via
+#   -c /tmp/ray-deps/requirements_compiled_py3.13.txt
+#   zarr
 fastrlock==0.8.3; sys_platform != "darwin" \
     --hash=sha256:001fd86bcac78c79658bac496e8a17472d64d558cd2227fdc768aa77f877fe40 \
     --hash=sha256:04bb5eef8f460d13b8c0084ea5a9d3aab2c0573991c880c0a34a56bb14951d30 \
@@ -2688,6 +2699,27 @@ networkx==3.2.1 \
     --hash=sha256:9f1bb5cf3409bf324e0a722c20bdb4c20ee39bf1c30ce8ae499c8502b0b5e0c6 \
     --hash=sha256:f18c69adc97877c42332c170849c96cefa91881c99a7cb3e95b7c659ebdc1ec2
 # via torch
+numcodecs==0.13.1; python_full_version < "3.11" \
+    --hash=sha256:233bc7f26abce24d57e44ea8ebeb5cd17084690b4e7409dd470fdb75528d615f \
+    --hash=sha256:237b7171609e868a20fd313748494444458ccd696062f67e198f7f8f52000c15 \
+    --hash=sha256:2a86f5367af9168e30f99727ff03b27d849c31ad4522060dde0bce2923b3a8bc \
+    --hash=sha256:2eda97dd2f90add98df6d295f2c6ae846043396e3d51a739ca5db6c03b5eb666 \
+    --hash=sha256:3501a848adaddce98a71a262fee15cd3618312692aa419da77acd18af4a6a3f6 \
+    --hash=sha256:3f593c7506b0ab248961a3b13cb148cc6e8355662ff124ac591822310bc55ecf \
+    --hash=sha256:5195bea384a6428f8afcece793860b1ab0ae28143c853f0b2b20d55a8947c917 \
+    --hash=sha256:796b3e6740107e4fa624cc636248a1580138b3f1c579160f260f76ff13a4261b \
+    --hash=sha256:7a60d75179fd6692e301ddfb3b266d51eb598606dcae7b9fc57f986e8d65cb43 \
+    --hash=sha256:80d3071465f03522e776a31045ddf2cfee7f52df468b977ed3afdd7fe5869701 \
+    --hash=sha256:90d3065ae74c9342048ae0046006f99dcb1388b7288da5a19b3bddf9c30c3176 \
+    --hash=sha256:96add4f783c5ce57cc7e650b6cac79dd101daf887c479a00a29bc1487ced180b \
+    --hash=sha256:96e42f73c31b8c24259c5fac6adba0c3ebf95536e37749dc6c62ade2989dca28 \
+    --hash=sha256:a3cf37881df0898f3a9c0d4477df88133fe85185bffe57ba31bcc2fa207709bc \
+    --hash=sha256:da2230484e6102e5fa3cc1a5dd37ca1f92dfbd183d91662074d6f7574e3e8f53 \
+    --hash=sha256:e5db4824ebd5389ea30e54bc8aeccb82d514d28b6b68da6c536b8fa4596f4bca \
+    --hash=sha256:eda7d7823c9282e65234731fd6bd3986b1f9e035755f7fed248d7d366bb291ab
+# via
+#   -r python/requirements/ml/py313/data-test-requirements.txt
+#   zarr
 numexpr==2.14.1; python_full_version < "3.12" \
     --hash=sha256:03130afa04edf83a7b590d207444f05a00363c9b9ea5d81c0f53b1ea13fad55a \
     --hash=sha256:05f9366d23a2e991fd5a8b5e61a17558f028ba86158a4552f8f239b005cdf83c \
@@ -2767,6 +2799,7 @@ numexpr==2.14.1; python_full_version < "3.12" \
 #   keras
 #   ml-dtypes
 #   modin
+#   numcodecs
 #   numexpr
 #   pandas
 #   pylance
@@ -2786,6 +2819,7 @@ numexpr==2.14.1; python_full_version < "3.12" \
 #   torchtext
 #   torchvision
 #   webdataset
+#   zarr
 nvidia-nccl-cu12==2.27.5; platform_machine != "aarch64" and sys_platform == "linux" \
     --hash=sha256:ad730cf15cb5d25fe849c6e6ca9eb5b76db16a80f13f425ac68d8e2e55624457
 # via
@@ -5456,6 +5490,10 @@ yarl==1.23.0 \
 #   -c /tmp/ray-deps/requirements_compiled_py3.13.txt
 #   aiohttp
 #   delta-sharing
+zarr==2.18.3; python_full_version < "3.11" \
+    --hash=sha256:2580d8cb6dd84621771a10d31c4d777dca8a27706a1a89b29f42d2d37e2df5ce \
+    --hash=sha256:b1f7dfd2496f436745cdd4c7bcf8d3b4bc1dceef5fdd0d589c87130d842496dd
+# via -r python/requirements/ml/py313/data-test-requirements.txt
 zict==3.0.0 \
     --hash=sha256:5796e36bd0e0cc8cf0fbc1ace6a68912611c1dbd74750a3f3026b9b9d6a327ae \
     --hash=sha256:e321e263b6a97aafc0790c3cfb3c04656b7066e6738c37fffcca95d803c9fba5
diff --git a/python/deplocks/ci/relaxed_data-ci_depset_py3.12.lock b/python/deplocks/ci/relaxed_data-ci_depset_py3.12.lock
index b0c073f29b5d..a80945e03499 100644
--- a/python/deplocks/ci/relaxed_data-ci_depset_py3.12.lock
+++ b/python/deplocks/ci/relaxed_data-ci_depset_py3.12.lock
@@ -2714,6 +2714,7 @@ numcodecs==0.15.1 \
     --hash=sha256:eeed77e4d6636641a2cc605fbc6078c7a8f2cc40f3dfa2b3f61e52e6091b04ff
 # via
 #   -c /tmp/ray-deps/requirements_compiled_py3.13.txt
+#   -r python/requirements/ml/py313/data-test-requirements.txt
 #   zarr
 # via
 #   -c /tmp/ray-deps/requirements_compiled_py3.13.txt
diff --git a/python/requirements/ml/py313/data-test-requirements.txt b/python/requirements/ml/py313/data-test-requirements.txt
index 0729466ed4e6..1d7dca98db71 100644
--- a/python/requirements/ml/py313/data-test-requirements.txt
+++ b/python/requirements/ml/py313/data-test-requirements.txt
@@ -37,3 +37,9 @@ tf-keras
 torchvision==0.24.0
 confluent-kafka
 zarr<3 ; python_version >= '3.11'  # zarr 2.18.4+ requires py3.11+ (v2 API)
+zarr>=2.18,<2.18.4 ; python_version < '3.11'  # 2.18.3: last v2 line supporting py3.10
+# numcodecs is zarr's codec dep; 0.14+ dropped py3.10. Pin per-Python with exact
+# versions so the markers survive pip-compile -- the compiled-constraint pin must
+# stay gated to py3.11+, otherwise the py3.10 data locks can't resolve zarr.
+numcodecs==0.15.1 ; python_version >= '3.11'
+numcodecs==0.13.1 ; python_version < '3.11'
diff --git a/python/requirements_compiled_py3.13.txt b/python/requirements_compiled_py3.13.txt
index 432882d7321b..cfde6eee2eea 100644
--- a/python/requirements_compiled_py3.13.txt
+++ b/python/requirements_compiled_py3.13.txt
@@ -1270,8 +1270,10 @@ numba==0.61.2
     # via
     #   -r python/requirements/py313/test-requirements.txt
     #   statsforecast
-numcodecs==0.15.1
-    # via zarr
+numcodecs==0.15.1 ; python_version >= "3.11"
+    # via
+    #   -r python/requirements/ml/py313/data-test-requirements.txt
+    #   zarr
 numexpr==2.14.1
     # via
     #   -r python/requirements/ml/py313/rllib-test-requirements.txt

From 4aa5437ea9764269ae7bac7cf10d525ee9653b3a Mon Sep 17 00:00:00 2001
From: Artur Niederfahrenhorst <artur@anyscale.com>
Date: Fri, 5 Jun 2026 12:49:19 +0200
Subject: [PATCH 12/45] [data] read_zarr: fix latent bugs surfaced by py3.10
 test coverage

Running test_zarrv2 on py3.10 (now that zarr installs there) exercised two
pre-existing bugs the previously-skipped/gated lanes never hit:

- _read_chunk is declared `-> np.ndarray` but returned zarr's widened
  `Array | Group` indexing result; wrap in `np.asarray` so the return type
  holds. pyrefly only flags this once zarr's types are visible on py3.10.
- test_custom_codec_succeeds_with_worker_setup_hook passed the codec
  registration as a *code string* to `worker_process_setup_hook`, which
  expects a callable or dotted import path -- Ray imported the blob as a
  module (ModuleNotFoundError). Pass a local callable (cloud-pickled to
  workers) instead, matching Ray's documented hook contract.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Signed-off-by: Artur Niederfahrenhorst <artur@anyscale.com>
---
 .../_internal/datasource/zarrv2_datasource.py |  4 +-
 .../ray/data/tests/datasource/test_zarrv2.py  | 66 +++++++++----------
 2 files changed, 33 insertions(+), 37 deletions(-)

diff --git a/python/ray/data/_internal/datasource/zarrv2_datasource.py b/python/ray/data/_internal/datasource/zarrv2_datasource.py
index 9d180913ab66..230c6106e3f6 100644
--- a/python/ray/data/_internal/datasource/zarrv2_datasource.py
+++ b/python/ray/data/_internal/datasource/zarrv2_datasource.py
@@ -259,7 +259,9 @@ def _read_chunk(
     """
     indexer = tuple(slice(s, e) for s, e in chunk_slices)
     arr = root if array_name == "" else root[array_name]
-    return arr[indexer]
+    # ``arr`` is a zarr Array here (the caller resolves a concrete array path),
+    # but zarr's types widen it to Array | Group; asarray pins the ndarray return.
+    return np.asarray(arr[indexer])
 
 
 @dataclass(frozen=True)
diff --git a/python/ray/data/tests/datasource/test_zarrv2.py b/python/ray/data/tests/datasource/test_zarrv2.py
index 9a40df331aa8..f399a4f6759f 100644
--- a/python/ray/data/tests/datasource/test_zarrv2.py
+++ b/python/ray/data/tests/datasource/test_zarrv2.py
@@ -866,48 +866,42 @@ def test_read_zarr_integration_public_s3(ray_start_regular_shared):
 # ---------------------------------------------------------------------------
 
 
-# Hook string registers a custom (non-stdlib) codec in each worker process.
-# numcodecs.registry is process-local — built-in codecs (blosc, gzip, zstd)
-# register themselves at import time, but anything else (including
-# ``imagecodecs_jpegxl``) must be explicitly registered in every process
-# that decodes chunks. Ray workers are separate Python processes, so the
-# driver's registration does NOT propagate. The standard fix is to run
-# this registration in each worker via ``runtime_env``'s
-# ``worker_process_setup_hook``.
-_CUSTOM_CODEC_HOOK = """
-import numcodecs
-import numpy as np
-
-class _RayZarrTestCodec(numcodecs.abc.Codec):
-    codec_id = "ray_zarr_test_codec"
-
-    def encode(self, buf):
-        return bytes(buf)
+def test_custom_codec_succeeds_with_worker_setup_hook(tmp_path):
+    """``worker_process_setup_hook`` runs once per worker, before any task,
+    registering a custom codec in the worker's process so chunk decode succeeds.
+
+    numcodecs' registry is process-local: built-in codecs (blosc, gzip, zstd)
+    self-register at import, but a custom codec must be registered in every
+    process that decodes chunks. Ray workers are separate processes, so the
+    driver's registration does not propagate -- ``worker_process_setup_hook``
+    runs the registration in each worker. The hook is passed as a *callable*
+    (cloud-pickled to the workers), not a code string; defining it locally keeps
+    the codec class out of the importable module surface.
+    """
+    import numcodecs
 
-    def decode(self, buf, out=None):
-        arr = np.frombuffer(buf, dtype=np.uint8)
-        if out is not None:
-            out[:] = arr.view(out.dtype)
-            return out
-        return arr.copy()
+    def _register_codec():
+        """Register the test codec in the current process (driver and workers)."""
+        import numcodecs
+        import numpy as np
 
-numcodecs.register_codec(_RayZarrTestCodec)
-"""
+        class _RayZarrTestCodec(numcodecs.abc.Codec):
+            codec_id = "ray_zarr_test_codec"
 
+            def encode(self, buf):
+                return bytes(buf)
 
-def test_custom_codec_succeeds_with_worker_setup_hook(tmp_path):
-    """``worker_process_setup_hook`` runs once per worker, before any task,
-    registering the codec in the worker's process. Chunk decode succeeds.
+            def decode(self, buf, out=None):
+                arr = np.frombuffer(buf, dtype=np.uint8)
+                if out is not None:
+                    out[:] = arr.view(out.dtype)
+                    return out
+                return arr.copy()
 
-    Builds a tiny Zarr store compressed with a custom codec that numcodecs
-    doesn't auto-register. The driver registers the codec briefly to write
-    the store; Ray workers need their own registration to decode chunks,
-    which the ``worker_process_setup_hook`` arranges.
-    """
-    import numcodecs
+        numcodecs.register_codec(_RayZarrTestCodec)
 
     # Register driver-side so we can write the store.
-    exec(_CUSTOM_CODEC_HOOK, {})
+    _register_codec()
 
     store_path = tmp_path / "codec_test.zarr"
     arr = zarr.open(
@@ -927,7 +921,7 @@ def test_custom_codec_succeeds_with_worker_setup_hook(tmp_path):
         num_cpus=1,
         logging_level=logging.ERROR,
         log_to_driver=False,
-        runtime_env={"worker_process_setup_hook": _CUSTOM_CODEC_HOOK},
+        runtime_env={"worker_process_setup_hook": _register_codec},
     )
     try:
         ds = ray.data.read_zarr(str(store_path))

From 0bfb881857fb3230abec374ed33121cf8da7ec20 Mon Sep 17 00:00:00 2001
From: Artur Niederfahrenhorst <artur@anyscale.com>
Date: Fri, 5 Jun 2026 13:22:35 +0200
Subject: [PATCH 13/45] revert import skip

Signed-off-by: Artur Niederfahrenhorst <artur@anyscale.com>
---
 python/ray/data/tests/datasource/test_zarrv2.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/python/ray/data/tests/datasource/test_zarrv2.py b/python/ray/data/tests/datasource/test_zarrv2.py
index f399a4f6759f..7b2e90704c8f 100644
--- a/python/ray/data/tests/datasource/test_zarrv2.py
+++ b/python/ray/data/tests/datasource/test_zarrv2.py
@@ -9,16 +9,13 @@
 import pandas as pd
 import pyarrow.fs
 import pytest
+import zarr
 from pytest_lazy_fixtures import lf as lazy_fixture
 
 import ray
 from ray.data._internal.datasource import zarrv2_datasource
 from ray.data.tests.conftest import *  # noqa: F401, F403
 
-# zarr v2 requires Python 3.11+ (2.18.4+ dropped py3.10), so it isn't installed
-# on py3.10; skip the whole module there instead of hard-failing on import.
-zarr = pytest.importorskip("zarr")
-
 
 def _execute_read_tasks(tasks) -> pd.DataFrame:
     frames = [block for task in tasks for block in task()]

From 5b93d10956d6dc8db1aefd6c88bdffbe70a6aef2 Mon Sep 17 00:00:00 2001
From: Artur Niederfahrenhorst <artur@anyscale.com>
Date: Fri, 5 Jun 2026 13:49:17 +0200
Subject: [PATCH 14/45] fix review

Signed-off-by: Artur Niederfahrenhorst <artur@anyscale.com>
---
 .../_internal/datasource/zarrv2_datasource.py | 49 ++++++++++++++-----
 python/ray/data/read_api.py                   | 11 +++--
 .../ray/data/tests/datasource/test_zarrv2.py  | 33 +++++++++++++
 3 files changed, 75 insertions(+), 18 deletions(-)

diff --git a/python/ray/data/_internal/datasource/zarrv2_datasource.py b/python/ray/data/_internal/datasource/zarrv2_datasource.py
index 230c6106e3f6..f5f8a1016018 100644
--- a/python/ray/data/_internal/datasource/zarrv2_datasource.py
+++ b/python/ray/data/_internal/datasource/zarrv2_datasource.py
@@ -69,11 +69,15 @@ def from_json(cls, raw_meta: dict[str, Any], array_path: str) -> ZarrArrayMeta:
                 f"Invalid .zarray metadata for array path {array_path!r}: "
                 f"missing required key(s) {missing}."
             )
-        return cls(
-            shape=tuple(int(x) for x in raw_meta["shape"]),
-            chunks=tuple(int(x) for x in raw_meta["chunks"]),
-            dtype=str(raw_meta["dtype"]),
-        )
+        shape = tuple(int(x) for x in raw_meta["shape"])
+        chunks = tuple(int(x) for x in raw_meta["chunks"])
+        if len(shape) != len(chunks):
+            raise ValueError(
+                f"Invalid .zarray metadata for array path {array_path!r}: "
+                f"'shape' has rank {len(shape)} but 'chunks' has rank "
+                f"{len(chunks)}; they must have the same number of dimensions."
+            )
+        return cls(shape=shape, chunks=chunks, dtype=str(raw_meta["dtype"]))
 
     @property
     def rank(self) -> int:
@@ -465,6 +469,8 @@ def __init__(
         #   2. ``.zip`` URL/path: auto-wrap with fsspec's ZipFileSystem.
         #   3. Otherwise delegate to Ray Data's standard URL to filesystem
         #      helper (the same one every other ``read_*`` API uses).
+        # "store path" is the path to the Zarr store, relative to the filesystem root.
+        # It is used to construct the Zarr root object.
         if filesystem is None and self.paths[0].endswith(".zip"):
             import fsspec
 
@@ -495,12 +501,21 @@ def __init__(
                     f"fsspec.spec.AbstractFileSystem, got "
                     f"{type(filesystem).__name__}"
                 )
-            # Strip any URI scheme (e.g. ``gs://`` / ``s3://``) so the path is
-            # backend-relative; pyarrow filesystems (wrapped in
-            # ``ArrowFSWrapper``) require this. Mirrors the ``filesystem is None``
-            # branch, which strips the scheme via ``_resolve_paths_and_filesystem``.
-            _, store_path = split_protocol(self.paths[0])
-            self._store_path = store_path.rstrip("/")
+            from fsspec.implementations.zip import ZipFileSystem
+
+            if isinstance(self._fs, ZipFileSystem) and self.paths[0].endswith(".zip"):
+                # An explicit archive filesystem: the store is the archive root,
+                # not a ``.zip``-named entry inside it. (A real sub-path within
+                # the archive is preserved by the scheme-strip below.)
+                self._store_path = ""
+            else:
+                # Strip any URI scheme (e.g. ``gs://`` / ``s3://``) so the path
+                # is backend-relative; pyarrow filesystems (wrapped in
+                # ``ArrowFSWrapper``) require this. Mirrors the
+                # ``filesystem is None`` branch, which strips the scheme via
+                # ``_resolve_paths_and_filesystem``.
+                _, store_path = split_protocol(self.paths[0])
+                self._store_path = store_path.rstrip("/")
 
         if chunk_shapes is not None and not isinstance(
             chunk_shapes, (tuple, list, dict)
@@ -551,9 +566,17 @@ def __init__(
         if not align_axis_0:
             self._aligned_array_names = None
         else:
+            scalar_arrays = sorted(
+                name for name, meta in self._metadata_by_path.items() if not meta.shape
+            )
+            if scalar_arrays:
+                raise ValueError(
+                    f"align_axis_0=True requires every selected array to have "
+                    f"at least one axis, but these are 0-D (scalar): "
+                    f"{scalar_arrays}. Drop them with array_paths=[...]."
+                )
             shape0_by_array = {
-                name: meta.shape[0] if meta.shape else 0
-                for name, meta in self._metadata_by_path.items()
+                name: meta.shape[0] for name, meta in self._metadata_by_path.items()
             }
             if len(set(shape0_by_array.values())) > 1:
                 raise ValueError(
diff --git a/python/ray/data/read_api.py b/python/ray/data/read_api.py
index c7e845608fdb..d8c94abaad02 100644
--- a/python/ray/data/read_api.py
+++ b/python/ray/data/read_api.py
@@ -1048,12 +1048,13 @@ def read_zarr(
         Zarr stores compressed with non-stdlib codecs (e.g.,
         ``imagecodecs_jpegxl`` for UMI camera arrays) require the codec
         package to be imported and registered in every Ray worker, not
-        just the driver. Use ``ray.init`` with a worker setup hook::
+        just the driver. Register them with a ``worker_process_setup_hook``
+        -- pass an importable callable or its dotted path (a string of code
+        is *not* accepted; a string is interpreted as an import path)::
 
-            ray.init(runtime_env={"worker_process_setup_hook": (
-                "import imagecodecs.numcodecs; "
-                "imagecodecs.numcodecs.register_codecs()"
-            )})
+            ray.init(runtime_env={
+                "worker_process_setup_hook": "imagecodecs.numcodecs.register_codecs"
+            })
 
         Driver-side ``.zmetadata`` parsing succeeds without this, but chunk
         decode in workers will fail with a ``numcodecs`` registry lookup
diff --git a/python/ray/data/tests/datasource/test_zarrv2.py b/python/ray/data/tests/datasource/test_zarrv2.py
index 7b2e90704c8f..3c3b46d25315 100644
--- a/python/ray/data/tests/datasource/test_zarrv2.py
+++ b/python/ray/data/tests/datasource/test_zarrv2.py
@@ -961,6 +961,39 @@ def test_get_read_tasks_parallelism_zero(tmp_path):
     assert len(tasks) >= 1
 
 
+def test_rejects_shape_chunks_rank_mismatch():
+    """Malformed .zarray whose shape/chunks ranks differ must raise, not silently
+    plan reads over a dimension prefix (grid/slice zip to the shorter rank)."""
+    with pytest.raises(ValueError, match=r"'shape' has rank 2 but 'chunks' has rank 1"):
+        zarrv2_datasource.ZarrArrayMeta.from_json(
+            {"shape": [10, 10], "chunks": [5], "dtype": "<i4"}, "x"
+        )
+
+
+def test_align_axis_0_rejects_scalar_array(tmp_path):
+    """align_axis_0=True with a 0-D (scalar) array must raise a clear error
+    rather than an IndexError when reading the (empty) axis-0 chunk size."""
+    store_path = tmp_path / "scalar.zarr"
+    root = zarr.open_group(str(store_path), mode="w")
+    root.create_dataset("vec", data=np.arange(8, dtype="<i4"), chunks=(4,))
+    root.create_dataset("scalar", data=np.array(42, dtype="<i4"))  # 0-D
+    zarr.consolidate_metadata(str(store_path))
+
+    with pytest.raises(ValueError, match=r"0-D \(scalar\)"):
+        zarrv2_datasource.ZarrV2Datasource(str(store_path), align_axis_0=True)
+
+
+def test_reads_zarr_zip_with_explicit_zip_filesystem(zarr_zip_store):
+    """A .zip path read through an explicitly-passed fsspec ZipFileSystem must
+    resolve the store at the archive root (store path ``""``), not treat the
+    ``.zip`` name as an entry inside the archive."""
+    zip_fs = fsspec.filesystem("zip", fo=str(zarr_zip_store))
+    ds = zarrv2_datasource.ZarrV2Datasource(str(zarr_zip_store), filesystem=zip_fs)
+    assert ds._store_path == ""
+    df = _execute_read_tasks(ds.get_read_tasks(parallelism=2))
+    assert len(df) == 2
+
+
 if __name__ == "__main__":
     import sys
 

From 93a56c7eaae6360b93644cc0f2fda8570155ccb7 Mon Sep 17 00:00:00 2001
From: Artur Niederfahrenhorst <artur@anyscale.com>
Date: Fri, 5 Jun 2026 14:55:46 +0200
Subject: [PATCH 15/45] fix review

Signed-off-by: Artur Niederfahrenhorst <artur@anyscale.com>
---
 .../_internal/datasource/zarrv2_datasource.py |  5 ++++
 python/ray/data/read_api.py                   | 21 +++++++-------
 .../ray/data/tests/datasource/test_zarrv2.py  | 28 +++++++++++++++++++
 3 files changed, 43 insertions(+), 11 deletions(-)

diff --git a/python/ray/data/_internal/datasource/zarrv2_datasource.py b/python/ray/data/_internal/datasource/zarrv2_datasource.py
index f5f8a1016018..5f27b1e2c070 100644
--- a/python/ray/data/_internal/datasource/zarrv2_datasource.py
+++ b/python/ray/data/_internal/datasource/zarrv2_datasource.py
@@ -77,6 +77,11 @@ def from_json(cls, raw_meta: dict[str, Any], array_path: str) -> ZarrArrayMeta:
                 f"'shape' has rank {len(shape)} but 'chunks' has rank "
                 f"{len(chunks)}; they must have the same number of dimensions."
             )
+        if any(c <= 0 for c in chunks):
+            raise ValueError(
+                f"Invalid .zarray metadata for array path {array_path!r}: "
+                f"'chunks' must be positive, got {list(chunks)}."
+            )
         return cls(shape=shape, chunks=chunks, dtype=str(raw_meta["dtype"]))
 
     @property
diff --git a/python/ray/data/read_api.py b/python/ray/data/read_api.py
index d8c94abaad02..6da0f0db9d00 100644
--- a/python/ray/data/read_api.py
+++ b/python/ray/data/read_api.py
@@ -930,13 +930,13 @@ def read_videos(
 @PublicAPI(stability="alpha")
 def read_zarr(
     path: str,
+    *,
     filesystem: "pyarrow.fs.FileSystem | fsspec.spec.AbstractFileSystem | None" = None,
     chunk_shapes: dict[str, list] | list | None = None,
     array_paths: list[str] | None = None,
     allow_full_metadata_scan: bool = False,
     align_axis_0: bool = False,
     overlap: int = 0,
-    *,
     concurrency: Optional[int] = None,
     override_num_blocks: Optional[int] = None,
     num_cpus: Optional[float] = None,
@@ -963,6 +963,15 @@ def read_zarr(
     Arrays read in the same call need not share any dimension. Different
     ranks, shapes, dtypes, and native chunk sizes coexist as separate rows.
 
+    .. note::
+
+        The ``chunk`` column is a tensor, and tensors of different rank or
+        dtype can't be combined into one batch.
+        Consume long-form per array (filter on the ``array`` column first),
+        or, when the arrays are row-aligned (share ``shape[0]``), use
+        ``align_axis_0=True`` so each array is its own column -- which is
+        batch-safe.
+
     Aligned (wide-form, ``align_axis_0=True``) — one row per axis-0
     chunk, with one column per selected array. Columns:
 
@@ -976,16 +985,6 @@ def read_zarr(
     largest aligned subset. Use ``array_paths`` to pick which arrays to
     read — ``align_axis_0`` itself does not filter.
 
-    Which schema do I want? Stay on the default (long-form) when
-    reading one array, or when the arrays in the store don't all share
-    ``shape[0]`` (e.g., CMIP6 data variables alongside lat/lon coords,
-    anndata's ``X`` alongside ``var/*``, or OME-Zarr image+label arrays at
-    different resolutions). Switch to ``align_axis_0=True`` when you want
-    paired multi-array rows where each row is one "sample" or "timestep"
-    of every array at once — the canonical cases are supervised ML data
-    (paired ``images`` + ``labels``) and robotics imitation learning
-    (paired ``image`` + ``state`` + ``action`` at each timestep).
-
     Metadata discovery follows these rules:
 
     * If the store contains ``.zmetadata``, the datasource reads it and treats
diff --git a/python/ray/data/tests/datasource/test_zarrv2.py b/python/ray/data/tests/datasource/test_zarrv2.py
index 3c3b46d25315..2073718501ac 100644
--- a/python/ray/data/tests/datasource/test_zarrv2.py
+++ b/python/ray/data/tests/datasource/test_zarrv2.py
@@ -994,6 +994,34 @@ def test_reads_zarr_zip_with_explicit_zip_filesystem(zarr_zip_store):
     assert len(df) == 2
 
 
+def test_align_axis_0_columns_unify_across_blocks(aligned_zarrv2_store):
+    """Wide-form gives each array its own column, so blocks combine cleanly
+    across the dataset even with trailing edge chunks of differing shape -- the
+    batch-safe schema for row-aligned arrays."""
+    from ray.data._internal.arrow_ops.transform_pyarrow import unify_schemas
+    from ray.data.block import BlockAccessor
+
+    ds = zarrv2_datasource.ZarrV2Datasource(
+        str(aligned_zarrv2_store), align_axis_0=True, chunk_shapes=[3]
+    )
+    blocks = [block for task in ds.get_read_tasks(parallelism=64) for block in task()]
+    assert len(blocks) > 1  # actually exercise cross-block unification
+    schemas = [BlockAccessor.for_block(b).to_arrow().schema for b in blocks]
+    unified = unify_schemas(schemas)  # must not raise
+    assert {"t_start", "t_stop", "img", "state", "label"}.issubset(set(unified.names))
+
+
+@pytest.mark.parametrize("bad_chunks", [[0], [10, 0], [-2]])
+def test_rejects_non_positive_chunks(bad_chunks):
+    """Zero chunk dims would divide-by-zero in grid_shape and negative dims would
+    silently drop the array; both must raise at metadata parse time."""
+    shape = [10] * len(bad_chunks)
+    with pytest.raises(ValueError, match="'chunks' must be positive"):
+        zarrv2_datasource.ZarrArrayMeta.from_json(
+            {"shape": shape, "chunks": bad_chunks, "dtype": "<i4"}, "x"
+        )
+
+
 if __name__ == "__main__":
     import sys
 

From 6496842880701bfbbf86e05a928f1b7d9621b4f6 Mon Sep 17 00:00:00 2001
From: Artur Niederfahrenhorst <artur@anyscale.com>
Date: Fri, 5 Jun 2026 15:56:13 +0200
Subject: [PATCH 16/45] ignore zarr and numcodecs under pyrefly

Signed-off-by: Artur Niederfahrenhorst <artur@anyscale.com>
---
 pyrefly.toml | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/pyrefly.toml b/pyrefly.toml
index 17dae8f95385..24464f7e184d 100644
--- a/pyrefly.toml
+++ b/pyrefly.toml
@@ -65,6 +65,14 @@ ignore-missing-imports = [
     "rapidsmpf.*",
     "rmm.*",
     "confluent_kafka.*",
+]
+
+# zarr and numcodecs are installed in the data test images, so pyrefly resolves
+# their (strict) stubs -- but read_zarr targets their largely-untyped runtime
+# API, and the tests use it accordingly (e.g. str store paths, a custom codec).
+# Treat them as Any: ``ignore-missing-imports`` only applies to modules that
+# can't be found, not installed ones, so these belong here instead.
+replace-imports-with-any = [
     "zarr.*",
     "numcodecs.*",
 ]

From 79736f51cce46ee19f7780a80b1ff340e0af1928 Mon Sep 17 00:00:00 2001
From: Artur Niederfahrenhorst <artur@anyscale.com>
Date: Fri, 5 Jun 2026 16:16:52 +0200
Subject: [PATCH 17/45] fix pyrefly numcodec

Signed-off-by: Artur Niederfahrenhorst <artur@anyscale.com>
---
 pyrefly.toml                                    | 13 +++++++------
 python/ray/data/tests/datasource/test_zarrv2.py | 12 ++++++------
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/pyrefly.toml b/pyrefly.toml
index 24464f7e184d..873516f90193 100644
--- a/pyrefly.toml
+++ b/pyrefly.toml
@@ -65,14 +65,15 @@ ignore-missing-imports = [
     "rapidsmpf.*",
     "rmm.*",
     "confluent_kafka.*",
+    "zarr.*",
 ]
 
-# zarr and numcodecs are installed in the data test images, so pyrefly resolves
-# their (strict) stubs -- but read_zarr targets their largely-untyped runtime
-# API, and the tests use it accordingly (e.g. str store paths, a custom codec).
-# Treat them as Any: ``ignore-missing-imports`` only applies to modules that
-# can't be found, not installed ones, so these belong here instead.
+# numcodecs' ``abc.Codec`` ABC declares its abstract ``encode``/``decode`` with
+# empty bodies, so pyrefly infers their return type as ``None`` and flags any
+# real codec subclass (like the one in test_zarrv2) as an inconsistent override
+# -- a numcodecs typing defect we can't satisfy without lying about the return
+# type. So treat numcodecs as Any. (``ignore-missing-imports`` only covers
+# modules that can't be found, not installed ones, so it belongs here.)
 replace-imports-with-any = [
-    "zarr.*",
     "numcodecs.*",
 ]
diff --git a/python/ray/data/tests/datasource/test_zarrv2.py b/python/ray/data/tests/datasource/test_zarrv2.py
index 2073718501ac..427839ff1aab 100644
--- a/python/ray/data/tests/datasource/test_zarrv2.py
+++ b/python/ray/data/tests/datasource/test_zarrv2.py
@@ -41,7 +41,7 @@ def _write_real_zarr_store(
     root = zarr.open_group(str(store_path), mode="w")
     for name, (data, chunks) in arrays.items():
         root.create_dataset(name, data=data, chunks=chunks, dtype=data.dtype)
-    zarr.consolidate_metadata(str(store_path))
+    zarr.consolidate_metadata(zarr.DirectoryStore(str(store_path)))
     return store_path
 
 
@@ -69,7 +69,7 @@ def zarrv2_root_store(tmp_path) -> Path:
         dtype="<i4",
     )
     arr[:] = np.arange(20, dtype="<i4").reshape(5, 4)
-    zarr.consolidate_metadata(str(store_path))
+    zarr.consolidate_metadata(zarr.DirectoryStore(str(store_path)))
     return store_path
 
 
@@ -108,7 +108,7 @@ def heterogeneous_zarrv2_store(tmp_path) -> Path:
         data=np.array([5, 12, 20], dtype="<i8"),
         chunks=(3,),
     )
-    zarr.consolidate_metadata(str(store_path))
+    zarr.consolidate_metadata(zarr.DirectoryStore(str(store_path)))
     return store_path
 
 
@@ -153,7 +153,7 @@ def aligned_zarrv2_store(tmp_path) -> Path:
         data=np.arange(8, dtype="<i8"),
         chunks=(8,),
     )
-    zarr.consolidate_metadata(str(store_path))
+    zarr.consolidate_metadata(zarr.DirectoryStore(str(store_path)))
     return store_path
 
 
@@ -910,7 +910,7 @@ def decode(self, buf, out=None):
         compressor=numcodecs.get_codec({"id": "ray_zarr_test_codec"}),
     )
     arr[:] = np.arange(8, dtype="u1")
-    zarr.consolidate_metadata(str(store_path))
+    zarr.consolidate_metadata(zarr.DirectoryStore(str(store_path)))
 
     if ray.is_initialized():
         ray.shutdown()
@@ -977,7 +977,7 @@ def test_align_axis_0_rejects_scalar_array(tmp_path):
     root = zarr.open_group(str(store_path), mode="w")
     root.create_dataset("vec", data=np.arange(8, dtype="<i4"), chunks=(4,))
     root.create_dataset("scalar", data=np.array(42, dtype="<i4"))  # 0-D
-    zarr.consolidate_metadata(str(store_path))
+    zarr.consolidate_metadata(zarr.DirectoryStore(str(store_path)))
 
     with pytest.raises(ValueError, match=r"0-D \(scalar\)"):
         zarrv2_datasource.ZarrV2Datasource(str(store_path), align_axis_0=True)

From 8dec88ca3ea7d7a827f437c9283b9b2be067e006 Mon Sep 17 00:00:00 2001
From: Artur Niederfahrenhorst <artur@anyscale.com>
Date: Wed, 17 Jun 2026 14:24:22 +0200
Subject: [PATCH 18/45] [data] read_zarr: drop validation that zarr.open
 already performs

Build array metadata (shape/chunks/dtype) off the already-opened zarr root
instead of hand-parsing .zarray/.zmetadata JSON. zarr reads and validates the
store on open, so the datasource no longer re-checks missing metadata keys,
shape/chunks rank agreement, positive chunk sizes, or the consolidated
"metadata" key. Removes the 3 _load_metadata_* JSON helpers and
ZarrArrayMeta.from_json; discovery now uses consolidated metadata or zarr group
traversal.

Also removes the 5 tests that asserted zarr's own metadata validation (missing
key / rank mismatch / non-positive chunks / missing "metadata" / from_json),
keeping the datasource's discovery, schema, chunking, and alignment tests.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Signed-off-by: Artur Niederfahrenhorst <artur@anyscale.com>
---
 .../_internal/datasource/zarrv2_datasource.py | 228 +++++-------------
 .../ray/data/tests/datasource/test_zarrv2.py  |  81 +------
 2 files changed, 63 insertions(+), 246 deletions(-)

diff --git a/python/ray/data/_internal/datasource/zarrv2_datasource.py b/python/ray/data/_internal/datasource/zarrv2_datasource.py
index 5f27b1e2c070..54afa70ea036 100644
--- a/python/ray/data/_internal/datasource/zarrv2_datasource.py
+++ b/python/ray/data/_internal/datasource/zarrv2_datasource.py
@@ -15,13 +15,12 @@
 
 from __future__ import annotations
 
-import json
 import logging
 import math
 from collections.abc import Callable, Iterable
 from dataclasses import dataclass
 from itertools import product
-from typing import TYPE_CHECKING, Any, List, Optional
+from typing import TYPE_CHECKING, List, Optional
 
 import numpy as np
 import pandas as pd
@@ -44,45 +43,22 @@
     ZarrRoot = ZarrGroup | ZarrArray
 
 
-REQUIRED_ZARRAY_KEYS = ("shape", "chunks", "dtype")
-
-
 @dataclass(frozen=True)
 class ZarrArrayMeta:
-    """Validated ``.zarray`` metadata for a single Zarr v2 array."""
+    """``shape``/``chunks``/``dtype`` for a single Zarr v2 array."""
 
     shape: tuple[int, ...]
     chunks: tuple[int, ...]
     dtype: str
 
     @classmethod
-    def from_json(cls, raw_meta: dict[str, Any], array_path: str) -> ZarrArrayMeta:
-        """Validate and parse a ``.zarray`` JSON object into a ZarrArrayMeta.
-
-        Raises ``ValueError`` if any of ``shape``/``chunks``/``dtype`` is
-        missing. ``array_path`` is included in the error message so callers
-        don't have to thread context themselves.
-        """
-        missing = [k for k in REQUIRED_ZARRAY_KEYS if k not in raw_meta]
-        if missing:
-            raise ValueError(
-                f"Invalid .zarray metadata for array path {array_path!r}: "
-                f"missing required key(s) {missing}."
-            )
-        shape = tuple(int(x) for x in raw_meta["shape"])
-        chunks = tuple(int(x) for x in raw_meta["chunks"])
-        if len(shape) != len(chunks):
-            raise ValueError(
-                f"Invalid .zarray metadata for array path {array_path!r}: "
-                f"'shape' has rank {len(shape)} but 'chunks' has rank "
-                f"{len(chunks)}; they must have the same number of dimensions."
-            )
-        if any(c <= 0 for c in chunks):
-            raise ValueError(
-                f"Invalid .zarray metadata for array path {array_path!r}: "
-                f"'chunks' must be positive, got {list(chunks)}."
-            )
-        return cls(shape=shape, chunks=chunks, dtype=str(raw_meta["dtype"]))
+    def from_zarr_array(cls, arr: "ZarrArray") -> ZarrArrayMeta:
+        """Adapt an opened ``zarr.Array`` (already validated by zarr on open)."""
+        return cls(
+            shape=tuple(int(s) for s in arr.shape),
+            chunks=tuple(int(c) for c in arr.chunks),
+            dtype=str(arr.dtype),
+        )
 
     @property
     def rank(self) -> int:
@@ -157,93 +133,6 @@ def chunk_slices(
         )
 
 
-# ---------------------------------------------------------------------------
-# Metadata discovery
-# ---------------------------------------------------------------------------
-
-
-def _load_metadata_from_zmetadata_file(
-    fs, z_meta_path: str
-) -> dict[str, ZarrArrayMeta]:
-    """Load all arrays listed in a consolidated ``.zmetadata`` file."""
-    with fs.open(z_meta_path, "rb") as f:
-        consolidated = json.load(f)
-    if "metadata" not in consolidated:
-        raise ValueError(
-            f"Missing 'metadata' key in consolidated metadata at {z_meta_path}."
-        )
-    out: dict[str, ZarrArrayMeta] = {}
-    for key, value in consolidated["metadata"].items():
-        if not key.endswith(".zarray"):
-            continue
-        array_path = "" if key == ".zarray" else key[: -len("/.zarray")]
-        out[array_path] = ZarrArrayMeta.from_json(value, array_path)
-    return out
-
-
-def _load_metadata_from_array_paths(
-    fs, store_path: str, array_paths: Iterable[str]
-) -> dict[str, ZarrArrayMeta]:
-    """Load ``.zarray`` files for the user's explicit array paths.
-
-    Each path is normalized via :func:`zarr.util.normalize_storage_path`,
-    which strips surrounding slashes, collapses doubles, and rejects
-    ``.``/``..`` segments. Raises ``ValueError`` if a requested path has
-    no ``.zarray`` file at the expected location.
-    """
-    from zarr.util import normalize_storage_path
-
-    store_root = store_path.rstrip("/")
-    out: dict[str, ZarrArrayMeta] = {}
-    for raw in array_paths:
-        normalized = normalize_storage_path(raw)
-        zarray_path = (
-            f"{store_root}/{normalized}/.zarray"
-            if normalized
-            else f"{store_root}/.zarray"
-        )
-        try:
-            with fs.open(zarray_path, "rb") as f:
-                raw_meta = json.load(f)
-        except FileNotFoundError as e:
-            raise ValueError(
-                f"Array path {raw!r} not found: no .zarray file at {zarray_path}"
-            ) from e
-        out[normalized] = ZarrArrayMeta.from_json(raw_meta, normalized)
-    return out
-
-
-def _load_metadata_full_scan(fs, store_path: str) -> dict[str, ZarrArrayMeta]:
-    """Recursively walk ``store_path`` for ``.zarray`` files.
-
-    Each discovered relative path is canonicalized via
-    :func:`zarr.util.normalize_storage_path` so the output keys match the
-    format used by the other metadata-loading paths regardless of whether
-    the underlying ``fs.walk`` yields trailing slashes.
-    """
-    from zarr.util import normalize_storage_path
-
-    store_root = store_path.rstrip("/")
-    store_prefix = store_root + "/"
-    out: dict[str, ZarrArrayMeta] = {}
-    for dirpath, _, filenames in fs.walk(store_path):
-        if ".zarray" not in filenames:
-            continue
-        dirpath = dirpath.rstrip("/")
-        if dirpath == store_root:
-            array_path = ""
-        else:
-            array_path = normalize_storage_path(dirpath.removeprefix(store_prefix))
-        zarray_path = f"{dirpath}/.zarray"
-        try:
-            with fs.open(zarray_path, "rb") as f:
-                raw = json.load(f)
-        except FileNotFoundError:
-            continue
-        out[array_path] = ZarrArrayMeta.from_json(raw, array_path)
-    return out
-
-
 # ---------------------------------------------------------------------------
 # Chunk reading
 # ---------------------------------------------------------------------------
@@ -546,6 +435,17 @@ def __init__(
 
                 self.chunk_shapes = tuple(chunk_shapes)
 
+        # Open the store with zarr (consolidated metadata when available). zarr
+        # reads and validates `.zarray`/`.zmetadata` here, so the datasource does
+        # not re-check that metadata itself.
+        store = self._fs.get_mapper(self._store_path)
+        z_meta_path = f"{self._store_path.rstrip('/')}/.zmetadata"
+        self._consolidated = self._fs.exists(z_meta_path)
+        if self._consolidated:
+            self.root = zarr.open_consolidated(store, mode="r")
+        else:
+            self.root = zarr.open(store, mode="r")
+
         self._metadata_by_path = self._load_metadata(array_paths)
         if not self._metadata_by_path:
             raise ValueError(
@@ -631,8 +531,6 @@ def __init__(
                     f"arrays to the same axis-0 prefix) to re-tile them."
                 )
 
-        self.root = zarr.open(self._fs.get_mapper(self._store_path), mode="r")
-
     def estimate_inmemory_data_size(self) -> Optional[int]:
         """Total bytes = sum over selected arrays of ``prod(shape) * itemsize``."""
         return sum(
@@ -787,56 +685,54 @@ def _estimate_aligned_batch_mem_size(
         )
 
     def _load_metadata(self, array_paths) -> dict[str, ZarrArrayMeta]:
-        """Discover and load ``.zarray`` metadata for the selected arrays.
-
-        Discovery prefers consolidated ``.zmetadata`` when it exists. If the
-        store has no ``.zmetadata``, the datasource falls back to reading each
-        requested array's ``.zarray`` directly (when ``array_paths`` is given)
-        or to a recursive scan (when ``allow_full_metadata_scan`` is set).
-        If ``array_paths`` is given, the discovered set is filtered down to it;
-        any requested paths that aren't present in the store raise a
-        ``ValueError`` listing what is available.
+        """Read ``shape``/``chunks``/``dtype`` for the selected arrays off ``self.root``.
+
+        zarr validated the store's metadata when it was opened, so this only
+        adapts the resulting ``zarr.Array`` objects. Discovery uses consolidated
+        metadata when present, then explicit ``array_paths``, then an optional
+        full scan (``allow_full_metadata_scan``). If ``array_paths`` is given,
+        the discovered set is filtered down to it.
         """
-        fs, store_path = self._fs, self._store_path
-
-        z_meta_path = f"{store_path.rstrip('/')}/.zmetadata"
-        if fs.exists(z_meta_path):
-            logger.debug("Loading .zmetadata file")
-            all_arrays = _load_metadata_from_zmetadata_file(fs, z_meta_path)
-        elif array_paths:
-            logger.debug("No .zmetadata; reading requested .zarray files directly")
-            all_arrays = _load_metadata_from_array_paths(fs, store_path, array_paths)
-        elif self.allow_full_metadata_scan:
-            logger.info(
-                "No array_paths provided and no .zmetadata found; "
-                "executing full scan of Zarr store metadata"
-            )
-            all_arrays = _load_metadata_full_scan(fs, store_path)
-            if not all_arrays:
-                # ``fs.walk`` silently returns nothing on filesystems without
-                # directory-listing support (most commonly plain HTTP/HTTPS).
-                # That's distinct from "store exists but has no arrays", so
-                # surface the likely cause.
+        import zarr
+        from zarr.util import normalize_storage_path
+
+        root = self.root
+
+        if isinstance(root, zarr.Array):
+            return {"": ZarrArrayMeta.from_zarr_array(root)}
+
+        requested = (
+            {normalize_storage_path(p) for p in array_paths} if array_paths else None
+        )
+
+        if not self._consolidated and not self.allow_full_metadata_scan:
+            if requested is None:
                 raise ValueError(
-                    f"Full-store scan of {self.paths[0]!r} found no .zarray "
-                    "files. This can occur if the filesystem does not "
-                    "support recursive directory listing (e.g., plain "
-                    "HTTP/HTTPS without an object-store listing API). Pass "
-                    "array_paths=[...] with explicit array names to read "
-                    "from this kind of store."
+                    "No array_paths were provided and this Zarr store does not "
+                    "contain .zmetadata. Pass array_paths=[...] or set "
+                    "allow_full_metadata_scan=True."
                 )
-        else:
-            raise ValueError(
-                "No array_paths were provided and this Zarr store does not "
-                "contain .zmetadata. Pass array_paths=[...] or set "
-                "allow_full_metadata_scan=True."
-            )
+            out: dict[str, ZarrArrayMeta] = {}
+            for raw in array_paths:
+                name = normalize_storage_path(raw)
+                try:
+                    arr = root[name]
+                except KeyError as e:
+                    raise ValueError(
+                        f"Array path {raw!r} not found in Zarr store."
+                    ) from e
+                out[name] = ZarrArrayMeta.from_zarr_array(arr)
+            return out
+
+        all_arrays: dict[str, ZarrArrayMeta] = {}
 
-        if array_paths:
-            from zarr.util import normalize_storage_path
+        def _collect(name: str, obj) -> None:
+            if isinstance(obj, zarr.Array):
+                all_arrays[name] = ZarrArrayMeta.from_zarr_array(obj)
 
-            requested = {normalize_storage_path(p) for p in array_paths}
+        root.visititems(_collect)
 
+        if requested is not None:
             missing = sorted(requested - all_arrays.keys())
             if missing:
                 raise ValueError(
diff --git a/python/ray/data/tests/datasource/test_zarrv2.py b/python/ray/data/tests/datasource/test_zarrv2.py
index 427839ff1aab..c1bbf14e56dd 100644
--- a/python/ray/data/tests/datasource/test_zarrv2.py
+++ b/python/ray/data/tests/datasource/test_zarrv2.py
@@ -1,4 +1,3 @@
-import json
 import logging
 import os
 from pathlib import Path
@@ -211,27 +210,6 @@ def test_rejects_missing_array_paths(zarrv2_group_store):
         )
 
 
-def test_requires_consolidated_metadata(tmp_path):
-    store_path = tmp_path / "broken.zarr"
-    store_path.mkdir()
-    (store_path / ".zmetadata").write_text(json.dumps({}))
-
-    with pytest.raises(ValueError, match="Missing 'metadata'"):
-        zarrv2_datasource.ZarrV2Datasource(str(store_path))
-
-
-def test_rejects_empty_full_scan_with_actionable_error(tmp_path):
-    empty_store = tmp_path / "empty.zarr"
-    empty_store.mkdir()  # no .zmetadata, no .zarray files anywhere
-
-    with pytest.raises(
-        ValueError, match=r"Full-store scan of .* found no \.zarray files.*"
-    ):
-        zarrv2_datasource.ZarrV2Datasource(
-            str(empty_store), allow_full_metadata_scan=True
-        )
-
-
 def test_loads_per_array_zarray_without_zmetadata(unconsolidated_zarrv2_store):
     datasource = zarrv2_datasource.ZarrV2Datasource(
         str(unconsolidated_zarrv2_store),
@@ -266,7 +244,7 @@ def test_array_paths_missing_zarray_file_raises_value_error(
 ):
     with pytest.raises(
         ValueError,
-        match=r"Array path 'missing' not found: no \.zarray file at",
+        match=r"Array path 'missing' not found",
     ):
         zarrv2_datasource.ZarrV2Datasource(
             str(unconsolidated_zarrv2_store),
@@ -274,43 +252,6 @@ def test_array_paths_missing_zarray_file_raises_value_error(
         )
 
 
-def test_rejects_zmetadata_with_malformed_zarray_entry(tmp_path):
-    store_path = tmp_path / "malformed.zarr"
-    store_path.mkdir()
-    (store_path / ".zmetadata").write_text(
-        json.dumps(
-            {
-                "metadata": {
-                    "broken/.zarray": {"shape": [5], "chunks": [2]},  # no dtype
-                }
-            }
-        )
-    )
-
-    with pytest.raises(
-        ValueError,
-        match=r"missing required key\(s\) \['dtype'\]",
-    ):
-        zarrv2_datasource.ZarrV2Datasource(str(store_path))
-
-
-# ---------------------------------------------------------------------------
-# ZarrArrayMeta
-# ---------------------------------------------------------------------------
-
-
-def test_zarr_array_meta_from_json_parses_required_fields():
-    meta = zarrv2_datasource.ZarrArrayMeta.from_json(
-        {"shape": [5, 3], "chunks": [2, 3], "dtype": "<f8", "extra": "ignored"},
-        "some/path",
-    )
-    assert meta.shape == (5, 3)
-    assert meta.chunks == (2, 3)
-    assert meta.dtype == "<f8"
-    assert meta.rank == 2
-    assert meta.itemsize == 8
-
-
 # ---------------------------------------------------------------------------
 # chunk_shapes validation
 # ---------------------------------------------------------------------------
@@ -961,15 +902,6 @@ def test_get_read_tasks_parallelism_zero(tmp_path):
     assert len(tasks) >= 1
 
 
-def test_rejects_shape_chunks_rank_mismatch():
-    """Malformed .zarray whose shape/chunks ranks differ must raise, not silently
-    plan reads over a dimension prefix (grid/slice zip to the shorter rank)."""
-    with pytest.raises(ValueError, match=r"'shape' has rank 2 but 'chunks' has rank 1"):
-        zarrv2_datasource.ZarrArrayMeta.from_json(
-            {"shape": [10, 10], "chunks": [5], "dtype": "<i4"}, "x"
-        )
-
-
 def test_align_axis_0_rejects_scalar_array(tmp_path):
     """align_axis_0=True with a 0-D (scalar) array must raise a clear error
     rather than an IndexError when reading the (empty) axis-0 chunk size."""
@@ -1011,17 +943,6 @@ def test_align_axis_0_columns_unify_across_blocks(aligned_zarrv2_store):
     assert {"t_start", "t_stop", "img", "state", "label"}.issubset(set(unified.names))
 
 
-@pytest.mark.parametrize("bad_chunks", [[0], [10, 0], [-2]])
-def test_rejects_non_positive_chunks(bad_chunks):
-    """Zero chunk dims would divide-by-zero in grid_shape and negative dims would
-    silently drop the array; both must raise at metadata parse time."""
-    shape = [10] * len(bad_chunks)
-    with pytest.raises(ValueError, match="'chunks' must be positive"):
-        zarrv2_datasource.ZarrArrayMeta.from_json(
-            {"shape": shape, "chunks": bad_chunks, "dtype": "<i4"}, "x"
-        )
-
-
 if __name__ == "__main__":
     import sys
 

From ed57c2b52ffddb47fbb8434da7dbdd74fd76ea1a Mon Sep 17 00:00:00 2001
From: Artur Niederfahrenhorst <artur@anyscale.com>
Date: Wed, 17 Jun 2026 21:49:19 +0200
Subject: [PATCH 19/45] [data] read_zarr: trim cruft (docstrings/comments) and
 consolidate validation tests

Cruft trims, no functional or performance change:
- Trim the ZarrV2Datasource class docstring (read_zarr already documents the
  row schemas/columns) and two verbose internal docstrings (effective_chunks
  worked example, _AlignedChunkDescriptor).
- Dedupe the chunk_shapes dict-value validation (two checks -> one; identical
  accept/reject behavior).

Tests: consolidate the five chunk_shapes rejection tests into one parametrized
test, preserving every case and error match.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Signed-off-by: Artur Niederfahrenhorst <artur@anyscale.com>
---
 .../_internal/datasource/zarrv2_datasource.py | 88 ++++--------------
 .../ray/data/tests/datasource/test_zarrv2.py  | 90 ++++++-------------
 2 files changed, 45 insertions(+), 133 deletions(-)

diff --git a/python/ray/data/_internal/datasource/zarrv2_datasource.py b/python/ray/data/_internal/datasource/zarrv2_datasource.py
index 54afa70ea036..4c97ff488c2d 100644
--- a/python/ray/data/_internal/datasource/zarrv2_datasource.py
+++ b/python/ray/data/_internal/datasource/zarrv2_datasource.py
@@ -76,28 +76,11 @@ def effective_chunks(
     ) -> tuple[int, ...]:
         """Resolve the user's ``chunk_shapes`` override(s) against this array's chunks.
 
-        When ``user_chunk_shape`` is a single sequence, it is treated as a
-        prefix that overrides the leading axes; trailing axes keep the
-        array's native chunk values. This lets a single
-        ``chunk_shapes=[16]`` apply meaningfully across arrays of different
-        ranks (e.g., 4-D images alongside 2-D poses).
-
-        When ``user_chunk_shape`` is a dict, it is interpreted as a
-        per-array mapping from array path to that array's override prefix.
-        Arrays omitted from the mapping keep their native chunks.
-
-        - ``None`` → use native chunks unchanged.
-        - shorter than rank → override leading axes, keep native for the rest.
-        - same length as rank → use as-is.
-        - longer than rank → ``ValueError``.
-
-        Example with array shape ``(200, 28, 28)``, native chunks ``(50, 28, 28)``:
-
-            user=None              → (50, 28, 28)
-            user=(16,)             → (16, 28, 28)
-            user=(16, 14)          → (16, 14, 28)
-            user=(16, 14, 14)      → (16, 14, 14)
-            user=(16, 14, 14, 1)   → ValueError
+        A single sequence overrides the leading axes (trailing axes keep the
+        native chunks), so one ``chunk_shapes=[16]`` applies across arrays of
+        different ranks. A dict maps array path → that array's override prefix;
+        arrays absent from it keep native chunks. ``None`` keeps native chunks;
+        an override longer than the array's rank raises ``ValueError``.
         """
         if user_chunk_shape is None:
             return self.chunks
@@ -173,14 +156,10 @@ class _ChunkDescriptor:
 
 @dataclass(frozen=True)
 class _AlignedChunkDescriptor:
-    """One wide-row's worth of read work: a global axis-0 range across N aligned arrays.
-
-    The row "owns" the range ``[t_start, t_stop)`` and reports those as
-    columns. When ``overlap > 0``, the row's actual data extends to
-    ``t_stop_data`` (which is ``min(t_stop + overlap, shape[0])``); the
-    trailing slice is the lookahead from the next row's owned range so
-    sliding windows that start in this row's owned range can reach their
-    full tail without crossing a Ray Data row boundary.
+    """One wide row: a global axis-0 range ``[t_start, t_stop)`` across the
+    aligned arrays. With ``overlap > 0`` the row's data extends to
+    ``t_stop_data = min(t_stop + overlap, shape[0])`` (lookahead so windows
+    starting in this row reach their tail without crossing a row boundary).
     """
 
     chunk_index: int
@@ -262,15 +241,14 @@ def _validate_chunk_shapes_dict(chunk_shapes: dict) -> dict[str, tuple[int, ...]
                 f"got key {k!r} of type {type(k).__name__}"
             )
 
-        if not isinstance(v, (tuple, list)) or not v:
-            raise ValueError(
-                f"chunk_shapes[{k!r}] must be non-empty sequence of "
-                f"positive integers (list or tuple), got {v!r}"
-            )
-        if any(isinstance(x, bool) or not isinstance(x, int) or x <= 0 for x in v):
+        if (
+            not isinstance(v, (tuple, list))
+            or not v
+            or any(isinstance(x, bool) or not isinstance(x, int) or x <= 0 for x in v)
+        ):
             raise ValueError(
-                f"chunk_shapes[{k!r}] must be a non-empty sequence of "
-                f"positive integers (list or tuple), got {v!r}"
+                f"chunk_shapes[{k!r}] must be a non-empty sequence of positive "
+                f"integers (list or tuple), got {v!r}"
             )
 
         normalized_key = normalize_storage_path(k)
@@ -295,37 +273,9 @@ def _validate_chunk_shapes_dict(chunk_shapes: dict) -> dict[str, tuple[int, ...]
 class ZarrV2Datasource(Datasource):
     """Reads one or more Zarr v2 arrays into a Ray Data ``Dataset``.
 
-    Two output schemas, selected at the call site via ``align_axis_0``:
-
-    Long-form (default, ``align_axis_0=False``) — one row per chunk per
-    array. Columns:
-
-    * ``array``: the source array's path within the store
-      (e.g., ``"data/camera0_rgb"``, or ``""`` for a root-level array).
-    * ``chunk_index``: the N-D position of this chunk in the array's chunk
-      grid, as a tuple of ints.
-    * ``chunk_slices``: per-axis ``(start, stop)`` of this chunk in the
-      source array's coordinate space.
-    * ``chunk``: the chunk's data as an ``ndarray`` at its natural shape
-      (possibly shorter at trailing boundaries — no padding).
-
-    Arrays in the same call need not share any dimension; they coexist as
-    separate rows distinguished by ``array``.
-
-    Wide-form (opt-in, ``align_axis_0=True``) — one row per axis-0
-    chunk, with one column per selected array. Columns:
-
-    * ``t_start`` / ``t_stop``: global axis-0 range of this row.
-    * ``<array_name>``: that array's ``[t_start:t_stop, ...]`` slice
-      (one column per selected array).
-
-    All selected arrays must share ``shape[0]`` and must end up with the
-    same axis-0 chunk size after :paramref:`chunk_shapes` resolution; if
-    they don't, ``__init__`` raises ``ValueError`` with a hint pointing at
-    the largest aligned subset. Use :paramref:`array_paths` to pick which
-    arrays to read — ``align_axis_0`` itself does not filter.
-
-    See :func:`ray.data.read_zarr` for the public API.
+    Emits long-form rows (one per chunk per array) or, with
+    ``align_axis_0=True``, wide rows (one per axis-0 chunk, one column per
+    array). See :func:`ray.data.read_zarr` for the row schemas and full API.
     """
 
     def __init__(
diff --git a/python/ray/data/tests/datasource/test_zarrv2.py b/python/ray/data/tests/datasource/test_zarrv2.py
index c1bbf14e56dd..eb3be312a263 100644
--- a/python/ray/data/tests/datasource/test_zarrv2.py
+++ b/python/ray/data/tests/datasource/test_zarrv2.py
@@ -258,18 +258,34 @@ def test_array_paths_missing_zarray_file_raises_value_error(
 
 
 @pytest.mark.parametrize(
-    "chunk_shapes",
-    ["invalid", 42, b"bytes", {1, 2}],
+    "chunk_shapes, match",
+    [
+        # Wrong container type (not list/tuple/dict).
+        ("invalid", "chunk_shapes must be a non-empty sequence of positive integers"),
+        (42, "chunk_shapes must be a non-empty sequence of positive integers"),
+        (b"bytes", "chunk_shapes must be a non-empty sequence of positive integers"),
+        ({1, 2}, "chunk_shapes must be a non-empty sequence of positive integers"),
+        # Bad dict values.
+        ({"images": 1}, r"chunk_shapes\['images'\] must be .*positive integers"),
+        ({"images": None}, r"chunk_shapes\['images'\] must be .*positive integers"),
+        ({"images": []}, r"chunk_shapes\['images'\] must be .*positive integers"),
+        ({"images": [0]}, r"chunk_shapes\['images'\] must be .*positive integers"),
+        ({"images": [1.5]}, r"chunk_shapes\['images'\] must be .*positive integers"),
+        # Bad dict keys.
+        (cast(Any, {1: [2]}), "chunk_shapes dict keys must be array-path strings"),
+        # Duplicate keys after normalization.
+        (
+            {"images": [2], "/images/": [3]},
+            "duplicate array paths after normalization",
+        ),
+        # Unknown array path.
+        ({"does_not_exist": [2]}, r"Unknown array path\(s\) in chunk_shapes"),
+    ],
 )
-def test_rejects_invalid_chunk_shapes(zarrv2_group_store, chunk_shapes):
-    """Non-list/non-tuple/non-dict inputs are rejected at construction time."""
-    with pytest.raises(
-        ValueError,
-        match="chunk_shapes must be a non-empty sequence of positive integers",
-    ):
+def test_rejects_invalid_chunk_shapes(zarrv2_group_store, chunk_shapes, match):
+    with pytest.raises(ValueError, match=match):
         zarrv2_datasource.ZarrV2Datasource(
-            str(zarrv2_group_store),
-            chunk_shapes=chunk_shapes,
+            str(zarrv2_group_store), chunk_shapes=chunk_shapes
         )
 
 
@@ -335,60 +351,6 @@ def test_chunk_shapes_resolution_across_mixed_rank(
     assert datasource._array_chunks == expected
 
 
-@pytest.mark.parametrize(
-    "chunk_shapes",
-    [
-        {"images": 1},
-        {"images": None},
-        {"images": []},
-        {"images": [0]},
-        {"images": [1.5]},
-    ],
-)
-def test_rejects_invalid_chunk_shapes_dict_values(zarrv2_group_store, chunk_shapes):
-    with pytest.raises(
-        ValueError,
-        match=r"chunk_shapes\['images'\] must be .*positive integers",
-    ):
-        zarrv2_datasource.ZarrV2Datasource(
-            str(zarrv2_group_store),
-            chunk_shapes=chunk_shapes,
-        )
-
-
-def test_rejects_invalid_chunk_shapes_dict_keys(zarrv2_group_store):
-    with pytest.raises(
-        ValueError,
-        match="chunk_shapes dict keys must be array-path strings",
-    ):
-        zarrv2_datasource.ZarrV2Datasource(
-            str(zarrv2_group_store),
-            chunk_shapes=cast(Any, {1: [2]}),
-        )
-
-
-def test_rejects_duplicate_normalized_chunk_shapes_keys(zarrv2_group_store):
-    with pytest.raises(
-        ValueError,
-        match="duplicate array paths after normalization",
-    ):
-        zarrv2_datasource.ZarrV2Datasource(
-            str(zarrv2_group_store),
-            chunk_shapes={"images": [2], "/images/": [3]},
-        )
-
-
-def test_rejects_unknown_chunk_shapes_keys(zarrv2_group_store):
-    with pytest.raises(
-        ValueError,
-        match="Unknown array path\\(s\\) in chunk_shapes",
-    ):
-        zarrv2_datasource.ZarrV2Datasource(
-            str(zarrv2_group_store),
-            chunk_shapes={"does_not_exist": [2]},
-        )
-
-
 # ---------------------------------------------------------------------------
 # align_axis_0 (wide-form mode)
 # ---------------------------------------------------------------------------

From 22b414d5c6304bedd9269da6cddc7ced091e04b1 Mon Sep 17 00:00:00 2001
From: Artur Niederfahrenhorst <artur@anyscale.com>
Date: Thu, 18 Jun 2026 16:22:04 +0200
Subject: [PATCH 20/45] [data] Add "Working with Zarr" guide; trim read_zarr
 docstring

Move the read_zarr prose and examples (output schemas, metadata
discovery, chunk re-tiling, aligned/sliding-window reads, custom codecs,
cloud storage) out of the API docstring into a dedicated narrative
guide, and reduce the docstring to a concise API reference that links to
the guide. This addresses review feedback that read_zarr was too verbose.

- New doc/source/data/working-with-zarr.md (MyST), the narrative guide.
- loading-data.rst: add a Zarr tab to the file-format tab-set.
- user-guide.rst: wire the guide into the user-guide toctree.
- read_api.py: trim the read_zarr docstring (~115 lines), keeping the
  Args/Returns API reference and pointing at the guide via :ref:.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Signed-off-by: Artur Niederfahrenhorst <artur@anyscale.com>
---
 doc/source/data/loading-data.rst     |  13 ++
 doc/source/data/user-guide.rst       |   1 +
 doc/source/data/working-with-zarr.md | 179 +++++++++++++++++++++++++++
 python/ray/data/read_api.py          | 141 ++-------------------
 4 files changed, 206 insertions(+), 128 deletions(-)
 create mode 100644 doc/source/data/working-with-zarr.md

diff --git a/doc/source/data/loading-data.rst b/doc/source/data/loading-data.rst
index bb159433f7fb..71049f239a20 100644
--- a/doc/source/data/loading-data.rst
+++ b/doc/source/data/loading-data.rst
@@ -150,6 +150,19 @@ To view the full list of supported file formats, see the
             petal.width   float
             sepal.length  float
 
+    .. tab-item:: Zarr
+
+        To read a Zarr v2 store, call :func:`~ray.data.read_zarr`. By default Ray Data
+        emits one row per array chunk; with ``align_axis_0=True`` it emits one row per
+        axis-0 chunk across row-aligned arrays. See
+        :ref:`Working with Zarr <working_with_zarr>` for details.
+
+        .. code-block:: python
+
+            import ray
+
+            ds = ray.data.read_zarr("s3://anonymous@ray-example-data/mnist-tiny.zarr")
+
 
 Reading files from local disk
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/doc/source/data/user-guide.rst b/doc/source/data/user-guide.rst
index a1cfac597a56..983f7662c755 100644
--- a/doc/source/data/user-guide.rst
+++ b/doc/source/data/user-guide.rst
@@ -24,6 +24,7 @@ shows you how to achieve several tasks.
     working-with-images
     working-with-text
     working-with-tensors
+    working-with-zarr
     working-with-pytorch
     working-with-llms
     monitoring-your-workload
diff --git a/doc/source/data/working-with-zarr.md b/doc/source/data/working-with-zarr.md
new file mode 100644
index 000000000000..81f1b0665b08
--- /dev/null
+++ b/doc/source/data/working-with-zarr.md
@@ -0,0 +1,179 @@
+(working_with_zarr)=
+
+# Working with Zarr
+
+Ray Data reads [Zarr v2](https://zarr.readthedocs.io/) stores — chunked, compressed,
+N-dimensional arrays on local disk or cloud object storage — with
+{func}`ray.data.read_zarr` (zarr-python 2.x / Zarr v2 stores).
+
+This guide covers:
+
+- [The two output schemas](#output-schemas) — long-form (default) and aligned wide-form
+- [Selecting arrays and metadata discovery](#selecting-arrays-and-metadata-discovery)
+- [Controlling chunk size](#controlling-chunk-size)
+- [Reading row-aligned arrays](#reading-row-aligned-arrays)
+- [Custom codecs](#custom-codecs)
+- [Cloud storage and credentials](#cloud-storage-and-credentials)
+
+For the full parameter reference, see {func}`ray.data.read_zarr`.
+
+## Output schemas
+
+`read_zarr` produces one of two schemas, selected by `align_axis_0`.
+
+### Long-form (default)
+
+By default each output row is **one chunk of one array**, with columns:
+
+- `array` — the array's path in the store (for example `"data/camera0_rgb"`, or `""` for a root-level array).
+- `chunk_index` — the N-D index of the chunk in its array's chunk grid.
+- `chunk_slices` — per-axis `(start, stop)` of the chunk in the array's coordinate space.
+- `chunk` — the chunk's data at its natural shape (trailing-edge chunks may be shorter; no padding).
+
+Arrays read in the same call need not share any dimension — different ranks, shapes,
+dtypes, and native chunk sizes coexist as separate rows.
+
+```python
+import ray
+
+ds = ray.data.read_zarr("s3://anonymous@ray-example-data/mnist-tiny.zarr")
+```
+
+```{note}
+The `chunk` column is a tensor, and tensors of different rank or dtype can't be
+combined into one batch. Consume long-form **per array** (filter on the `array`
+column first), or — when arrays are row-aligned (share `shape[0]`) — use
+`align_axis_0=True` so each array becomes its own column, which is batch-safe.
+```
+
+### Aligned wide-form (`align_axis_0=True`)
+
+With `align_axis_0=True` each row is **one axis-0 chunk shared across the selected
+arrays**, with columns:
+
+- `t_start`, `t_stop` — the global axis-0 range of the row.
+- one column per selected array, holding that array's `[t_start:t_stop, ...]` slice.
+
+All selected arrays must share `shape[0]` and resolve to the same axis-0 chunk size
+(after any `chunk_shapes` override); otherwise `read_zarr` raises `ValueError` pointing
+at the largest aligned subset. Use `array_paths` to choose which arrays participate —
+`align_axis_0` itself doesn't filter.
+
+```python
+ds = ray.data.read_zarr(
+    "s3://anonymous@ray-example-data/mnist-tiny.zarr",
+    align_axis_0=True,
+    chunk_shapes=[50],
+)
+```
+
+## Selecting arrays and metadata discovery
+
+By default `read_zarr` reads every array it discovers. Pass `array_paths` to read a
+subset:
+
+```python
+ds = ray.data.read_zarr(store_uri, array_paths=["images", "labels"])
+```
+
+Discovery follows these rules:
+
+- If the store has consolidated `.zmetadata`, it's the canonical array list (filtered by
+  `array_paths` if given). This is the fast path.
+- Otherwise, if `array_paths` is given, each requested array's metadata is read directly
+  — no `.zmetadata` required.
+- Otherwise, if `allow_full_metadata_scan=True`, the store is recursively scanned for
+  arrays. This can be slow or costly on large remote stores, so it's off by default;
+  prefer consolidating metadata with `zarr.consolidate_metadata` ahead of time.
+- Otherwise, `read_zarr` raises `ValueError`.
+
+## Controlling chunk size
+
+Zarr stores are often chunked finely (for example one image per chunk). Read at native
+chunking and you get one Ray Data block per chunk — potentially a very large number of
+tiny blocks, which hurts throughput. `chunk_shapes` re-tiles the leading axes **at read
+time** to coarsen (or refine) block granularity:
+
+- A **sequence** applies as a shared prefix across all selected arrays, overriding the
+  leading axes and keeping trailing axes native. `chunk_shapes=[16]` turns native chunks
+  `(1, 224, 224, 3)` into `(16, 224, 224, 3)` and `(50,)` into `(16,)`.
+- A **dict** overrides per array; arrays absent from it keep native chunks.
+
+```python
+# Coarsen every array's axis 0 to 16-element chunks.
+ds = ray.data.read_zarr(store_uri, chunk_shapes=[16])
+
+# Different overrides per array.
+ds = ray.data.read_zarr(store_uri, chunk_shapes={"images": [16], "labels": [64]})
+```
+
+A shared override may not be longer than the smallest selected array's rank; a per-array
+override may not exceed its target array's rank.
+
+## Reading row-aligned arrays
+
+When arrays share an axis-0 (for example a timestep axis), `align_axis_0=True`
+co-iterates them as the [wide-form schema](#output-schemas) above — one row per axis-0
+chunk, one column per array.
+
+For sliding-window pipelines, `overlap` extends each row's per-array data forward by `N`
+timesteps from the next row's range (clipped at the end of the store). With
+`overlap=K-1`, any window of length `K` that starts in a row's owned `[t_start, t_stop)`
+fits entirely within that row's slice, so a downstream `flat_map` needs no cross-row
+state. Row ownership (the `t_start`/`t_stop` columns) is unchanged; only each per-array
+column's `shape[0]` grows by up to `overlap`. `overlap` requires `align_axis_0=True`.
+
+```python
+ds = ray.data.read_zarr(
+    store_uri,
+    align_axis_0=True,
+    chunk_shapes=[50],
+    overlap=9,  # length-10 windows fit within a row
+)
+```
+
+## Custom codecs
+
+Stores compressed with non-stdlib codecs (for example `imagecodecs` JPEG-XL) need the
+codec package imported and registered **in every Ray worker**, not just the driver.
+Register it with a `worker_process_setup_hook` — pass an importable callable or its
+dotted path (a string of code isn't accepted; a string is interpreted as an import
+path):
+
+```python
+import ray
+
+ray.init(runtime_env={
+    "worker_process_setup_hook": "imagecodecs.numcodecs.register_codecs",
+})
+```
+
+Driver-side `.zmetadata` parsing succeeds without this, but chunk decode in the workers
+fails with a `numcodecs` registry lookup error.
+
+## Cloud storage and credentials
+
+For public S3 data, use the anonymous convention `s3://anonymous@<bucket>/<key>`. GCS
+has no such idiom — pass an explicit anonymous filesystem instead:
+
+```python
+import pyarrow.fs
+
+ds = ray.data.read_zarr(
+    "gs://<bucket>/store.zarr",
+    filesystem=pyarrow.fs.GcsFileSystem(anonymous=True),
+)
+```
+
+For private buckets or custom credentials, pass a configured `filesystem` — either a
+`pyarrow.fs.FileSystem` or an `fsspec` `AbstractFileSystem`. Transient-error retries
+(throttling, 5xx, timeouts) are handled by that filesystem, so configure retry behavior
+there (for example the botocore `retries` config on an `s3fs.S3FileSystem`, or
+`retry_strategy` on a `pyarrow.fs.S3FileSystem`).
+
+```{note}
+`read_zarr` doesn't surface each array's `.zattrs` (Zarr user attributes) in the row
+schema — they're invariant per array, so repeating them on every row would just bloat
+the output. Read them separately (for example with the `zarr` package) if your job
+needs them.
+```
diff --git a/python/ray/data/read_api.py b/python/ray/data/read_api.py
index 6da0f0db9d00..9358555623cc 100644
--- a/python/ray/data/read_api.py
+++ b/python/ray/data/read_api.py
@@ -946,131 +946,23 @@ def read_zarr(
 ):
     """Creates a :class:`~ray.data.Dataset` from a Zarr v2 store.
 
-    Two output schemas, selected by ``align_axis_0``:
+    By default each row is one chunk of one array (long-form), with columns
+    ``array``, ``chunk_index``, ``chunk_slices``, and ``chunk``. With
+    ``align_axis_0=True``, each row is one axis-0 chunk with ``t_start``,
+    ``t_stop``, and one column per selected array (wide-form), for arrays that
+    share ``shape[0]``.
 
-    Default (long-form, ``align_axis_0=False``) — one row per chunk of
-    one array. Columns:
-
-    * ``array``: the source array's path (e.g., ``"data/camera0_rgb"``, or
-      ``""`` for a root-level array).
-    * ``chunk_index``: the N-D index of this chunk in its array's chunk grid.
-    * ``chunk_slices``: per-axis ``(start, stop)`` of this chunk in the
-      source array's coordinate space — useful for mapping a chunk back
-      to its global position without recomputing from the chunk shape.
-    * ``chunk``: the chunk's data at its natural shape
-      (possibly shorter at trailing boundaries — no padding is applied).
-
-    Arrays read in the same call need not share any dimension. Different
-    ranks, shapes, dtypes, and native chunk sizes coexist as separate rows.
+    For the output schemas, chunk re-tiling, aligned and sliding-window reads,
+    metadata discovery, custom codecs, and cloud-storage setup, see
+    :ref:`Working with Zarr <working_with_zarr>`.
 
     .. note::
 
-        The ``chunk`` column is a tensor, and tensors of different rank or
-        dtype can't be combined into one batch.
-        Consume long-form per array (filter on the ``array`` column first),
-        or, when the arrays are row-aligned (share ``shape[0]``), use
-        ``align_axis_0=True`` so each array is its own column -- which is
-        batch-safe.
-
-    Aligned (wide-form, ``align_axis_0=True``) — one row per axis-0
-    chunk, with one column per selected array. Columns:
-
-    * ``t_start``, ``t_stop``: global axis-0 range of this row.
-    * ``<array_name>``: that array's ``[t_start:t_stop, ...]`` slice as
-      one column per selected array.
-
-    All selected arrays must share ``shape[0]`` and must end up with the
-    same axis-0 chunk size after ``chunk_shapes`` resolution; if they
-    don't, ``read_zarr`` raises ``ValueError`` with a hint pointing at the
-    largest aligned subset. Use ``array_paths`` to pick which arrays to
-    read — ``align_axis_0`` itself does not filter.
-
-    Metadata discovery follows these rules:
-
-    * If the store contains ``.zmetadata``, the datasource reads it and treats
-      it as the canonical list of arrays. If ``array_paths`` is provided, the
-      discovered set is filtered down to those paths.
-    * Otherwise, if ``array_paths`` is provided, the datasource reads each
-      requested array's ``.zarray`` file directly. The store doesn't need a
-      ``.zmetadata`` in this case.
-    * Otherwise, if ``allow_full_metadata_scan=True``, the datasource
-      recursively scans the store for ``.zarray`` files. This can be slow or
-      expensive for large remote stores, so it's disabled by default.
-      Before setting ``allow_full_metadata_scan=True``, consider consolidating
-      metdata with ``zarr.consolidate_metadata``.
-    * Otherwise, the datasource raises a :class:`ValueError`.
-
-    Each array's ``.zarray`` metadata must include the keys ``"shape"``,
-    ``"chunks"``, and ``"dtype"``. Reads fail if any discovered array metadata
-    is missing one or more of these required fields.
-
-    ``filesystem`` accepts either a :class:`pyarrow.fs.FileSystem` (as the rest
-    of Ray Data does) or an :class:`fsspec.spec.AbstractFileSystem` (as Zarr's
-    own ecosystem does). pyarrow filesystems are wrapped internally into fsspec
-    via :class:`fsspec.implementations.arrow.ArrowFSWrapper` because Zarr's
-    storage layer requires fsspec. For non-local stores, passing an explicit
-    filesystem is recommended so authentication and backend settings are
-    explicit. If ``filesystem`` is omitted, the datasource infers it from
-    ``path``.
-
-    Examples:
-        Read every array in a store with each array's native chunking
-        (long-form, 4 ``images`` chunks + 1 ``labels`` chunk).
-
-        >>> import ray
-        >>> ds = ray.data.read_zarr(  # doctest: +SKIP
-        ...     "s3://anonymous@ray-example-data/mnist-tiny.zarr",
-        ... )
-        >>> ds.count()  # doctest: +SKIP
-        5
-
-        Aligned read: paired ``(images, labels)`` per row. ``align_axis_0``
-        validates that all selected arrays share ``shape[0]``.
-
-        >>> ds = ray.data.read_zarr(  # doctest: +SKIP
-        ...     "s3://anonymous@ray-example-data/mnist-tiny.zarr",
-        ...     align_axis_0=True,
-        ...     chunk_shapes=[50],
-        ... )
-        >>> ds.count()  # doctest: +SKIP
-        4
-
-        Per-array overrides: retile only selected arrays while leaving
-        others at their native chunking.
-
-        >>> ds = ray.data.read_zarr(  # doctest: +SKIP
-        ...     "s3://anonymous@ray-example-data/mnist-tiny.zarr",
-        ...     chunk_shapes={"images": [50], "labels": [50]},
-        ... )
-
-    Custom codecs:
-        Zarr stores compressed with non-stdlib codecs (e.g.,
-        ``imagecodecs_jpegxl`` for UMI camera arrays) require the codec
-        package to be imported and registered in every Ray worker, not
-        just the driver. Register them with a ``worker_process_setup_hook``
-        -- pass an importable callable or its dotted path (a string of code
-        is *not* accepted; a string is interpreted as an import path)::
-
-            ray.init(runtime_env={
-                "worker_process_setup_hook": "imagecodecs.numcodecs.register_codecs"
-            })
-
-        Driver-side ``.zmetadata`` parsing succeeds without this, but chunk
-        decode in workers will fail with a ``numcodecs`` registry lookup
-        error.
-
-    Anonymous cloud buckets:
-        S3 anonymous reads use the standard URL convention
-        ``s3://anonymous@<bucket>/<key>``. GCS does not have this idiom;
-        instead, pass ``filesystem=pyarrow.fs.GcsFileSystem(anonymous=True)``
-        explicitly.
-
-    Array attributes (``.zattrs``):
-        ``read_zarr`` does not surface each array's ``.zattrs`` (the
-        user-attribute store from the Zarr v2 spec) in the row schema —
-        attrs are invariant per array, so duplicating them on every row
-        would just bloat the output. Read them once (for example with the
-        ``zarr`` python package) if you require them in your job.
+        In long-form the ``chunk`` column is a tensor, and tensors of different
+        rank or dtype can't be combined into one batch. Consume long-form per
+        array (filter on the ``array`` column first), or, when arrays are
+        row-aligned (share ``shape[0]``), use ``align_axis_0=True`` so each
+        array is its own column -- which is batch-safe.
 
     Args:
         path: Path to the Zarr v2 store.
@@ -1100,13 +992,6 @@ def read_zarr(
               different arrays should use different leading-axis chunks.
               Arrays omitted from the dict keep their native chunks.
 
-            ``chunk_shapes=[16]`` re-tiles a 4-D array with native chunks
-            ``(1, 224, 224, 3)`` into ``(16, 224, 224, 3)`` and a 1-D
-            array with native chunks ``(50,)`` into ``(16,)``.
-            ``chunk_shapes={"images": [16], "labels": [64]}`` applies
-            different axis-0 overrides to different arrays in the same
-            read.
-
             A shared list/tuple override may not be longer than the
             smallest selected array's rank. Each per-array dict override
             may not be longer than its target array's rank. If ``None``

From 9b0111297196d607be36b5361d847fa3f6df9fb3 Mon Sep 17 00:00:00 2001
From: Artur Niederfahrenhorst <artur@anyscale.com>
Date: Thu, 18 Jun 2026 18:44:45 +0200
Subject: [PATCH 21/45] polish docs

Signed-off-by: Artur Niederfahrenhorst <artur@anyscale.com>
---
 doc/source/data/working-with-zarr.md | 45 +++++++---------------------
 1 file changed, 10 insertions(+), 35 deletions(-)

diff --git a/doc/source/data/working-with-zarr.md b/doc/source/data/working-with-zarr.md
index 81f1b0665b08..c3e344ad2bd1 100644
--- a/doc/source/data/working-with-zarr.md
+++ b/doc/source/data/working-with-zarr.md
@@ -55,8 +55,7 @@ arrays**, with columns:
 - one column per selected array, holding that array's `[t_start:t_stop, ...]` slice.
 
 All selected arrays must share `shape[0]` and resolve to the same axis-0 chunk size
-(after any `chunk_shapes` override); otherwise `read_zarr` raises `ValueError` pointing
-at the largest aligned subset. Use `array_paths` to choose which arrays participate —
+(after any `chunk_shapes` override). Use `array_paths` to choose which arrays participate —
 `align_axis_0` itself doesn't filter.
 
 ```python
@@ -85,14 +84,14 @@ Discovery follows these rules:
 - Otherwise, if `allow_full_metadata_scan=True`, the store is recursively scanned for
   arrays. This can be slow or costly on large remote stores, so it's off by default;
   prefer consolidating metadata with `zarr.consolidate_metadata` ahead of time.
-- Otherwise, `read_zarr` raises `ValueError`.
 
 ## Controlling chunk size
 
-Zarr stores are often chunked finely (for example one image per chunk). Read at native
-chunking and you get one Ray Data block per chunk — potentially a very large number of
-tiny blocks, which hurts throughput. `chunk_shapes` re-tiles the leading axes **at read
-time** to coarsen (or refine) block granularity:
+Zarr stores are often chunked finely (for example one image per chunk). 
+You can use `chunk_shapes` to chunk the leading axes **at read
+time** to coarsen (or refine) the granularity at which reading happens.
+Note that this does not affect downstream batchsizes and is internal to the reading operation.
+Finely chunked reading can hurt performance.
 
 - A **sequence** applies as a shared prefix across all selected arrays, overriding the
   leading axes and keeping trailing axes native. `chunk_shapes=[16]` turns native chunks
@@ -107,9 +106,6 @@ ds = ray.data.read_zarr(store_uri, chunk_shapes=[16])
 ds = ray.data.read_zarr(store_uri, chunk_shapes={"images": [16], "labels": [64]})
 ```
 
-A shared override may not be longer than the smallest selected array's rank; a per-array
-override may not exceed its target array's rank.
-
 ## Reading row-aligned arrays
 
 When arrays share an axis-0 (for example a timestep axis), `align_axis_0=True`
@@ -119,9 +115,7 @@ chunk, one column per array.
 For sliding-window pipelines, `overlap` extends each row's per-array data forward by `N`
 timesteps from the next row's range (clipped at the end of the store). With
 `overlap=K-1`, any window of length `K` that starts in a row's owned `[t_start, t_stop)`
-fits entirely within that row's slice, so a downstream `flat_map` needs no cross-row
-state. Row ownership (the `t_start`/`t_stop` columns) is unchanged; only each per-array
-column's `shape[0]` grows by up to `overlap`. `overlap` requires `align_axis_0=True`.
+fits entirely within that row's slice.
 
 ```python
 ds = ray.data.read_zarr(
@@ -148,32 +142,13 @@ ray.init(runtime_env={
 })
 ```
 
-Driver-side `.zmetadata` parsing succeeds without this, but chunk decode in the workers
-fails with a `numcodecs` registry lookup error.
-
-## Cloud storage and credentials
+This is a particularity of the underlying Zarr library.
 
-For public S3 data, use the anonymous convention `s3://anonymous@<bucket>/<key>`. GCS
-has no such idiom — pass an explicit anonymous filesystem instead:
 
-```python
-import pyarrow.fs
+## Zarr's .zattrs
 
-ds = ray.data.read_zarr(
-    "gs://<bucket>/store.zarr",
-    filesystem=pyarrow.fs.GcsFileSystem(anonymous=True),
-)
-```
-
-For private buckets or custom credentials, pass a configured `filesystem` — either a
-`pyarrow.fs.FileSystem` or an `fsspec` `AbstractFileSystem`. Transient-error retries
-(throttling, 5xx, timeouts) are handled by that filesystem, so configure retry behavior
-there (for example the botocore `retries` config on an `s3fs.S3FileSystem`, or
-`retry_strategy` on a `pyarrow.fs.S3FileSystem`).
-
-```{note}
 `read_zarr` doesn't surface each array's `.zattrs` (Zarr user attributes) in the row
 schema — they're invariant per array, so repeating them on every row would just bloat
 the output. Read them separately (for example with the `zarr` package) if your job
 needs them.
-```
+

From e2095d8ff3f138a9f73d7c3c2b20edfa2f672c90 Mon Sep 17 00:00:00 2001
From: Artur Niederfahrenhorst <artur@anyscale.com>
Date: Thu, 18 Jun 2026 18:48:41 +0200
Subject: [PATCH 22/45] wip

Signed-off-by: Artur Niederfahrenhorst <artur@anyscale.com>
---
 python/ray/data/read_api.py | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/python/ray/data/read_api.py b/python/ray/data/read_api.py
index 9358555623cc..09d81c6b5788 100644
--- a/python/ray/data/read_api.py
+++ b/python/ray/data/read_api.py
@@ -966,20 +966,16 @@ def read_zarr(
 
     Args:
         path: Path to the Zarr v2 store.
-        filesystem: Optional preconfigured filesystem. Accepts either a
-            :class:`pyarrow.fs.FileSystem` or an :class:`fsspec.spec.AbstractFileSystem`.
+        filesystem: The filesystem
+            implementation to read from. PyArrow filesystems are specified in the
+            `pyarrow docs <https://arrow.apache.org/docs/python/api/\
+            filesystems.html#filesystem-implementations>`_. Specify this parameter if
+            you need to provide specific configurations to the filesystem. By default,
+            the filesystem is automatically selected based on the scheme of the paths.
+            For example, if the path begins with ``s3://``, the `S3FileSystem` is used.
+            Also acceptsan :class:`fsspec.spec.AbstractFileSystem`.
             pyarrow filesystems are wrapped internally with
-            :class:`fsspec.implementations.arrow.ArrowFSWrapper` because
-            Zarr's storage layer requires fsspec. Use this for private
-            buckets, custom credentials, anonymous/public cloud access, or
-            any storage backend configuration that shouldn't be inferred
-            internally. Recommended for non-local Zarr stores; for local
-            paths it's usually fine to omit. If omitted, the datasource
-            infers the filesystem from ``path``. Transient-error retries
-            (throttling, 5xx, timeouts) are handled by this filesystem, so
-            configure retry behavior here -- e.g. the botocore ``retries``
-            config on an ``s3fs.S3FileSystem`` or ``retry_strategy`` on a
-            ``pyarrow.fs.S3FileSystem``.
+            :class:`fsspec.implementations.arrow.ArrowFSWrapper`
         chunk_shapes: Optional override(s) for chunk geometry along the
             leading axes. Accepts either:
 

From c91e586f924e972f2bd81ebf6dbf7d7954213ae8 Mon Sep 17 00:00:00 2001
From: Artur Niederfahrenhorst <artur@anyscale.com>
Date: Thu, 18 Jun 2026 19:45:58 +0200
Subject: [PATCH 23/45] more cleanup

Signed-off-by: Artur Niederfahrenhorst <artur@anyscale.com>
---
 doc/source/data/loading-data.rst              |  5 +--
 .../_internal/datasource/zarrv2_datasource.py | 15 -------
 python/ray/data/read_api.py                   | 39 ++++++-------------
 3 files changed, 13 insertions(+), 46 deletions(-)

diff --git a/doc/source/data/loading-data.rst b/doc/source/data/loading-data.rst
index 1ad2ffe5ccfe..772c9ab9cb40 100644
--- a/doc/source/data/loading-data.rst
+++ b/doc/source/data/loading-data.rst
@@ -152,10 +152,7 @@ To view the full list of supported file formats, see the
 
     .. tab-item:: Zarr
 
-        To read a Zarr v2 store, call :func:`~ray.data.read_zarr`. By default Ray Data
-        emits one row per array chunk; with ``align_axis_0=True`` it emits one row per
-        axis-0 chunk across row-aligned arrays. See
-        :ref:`Working with Zarr <working_with_zarr>` for details.
+        To read a Zarr v2 store, call :func:`~ray.data.read_zarr`.
 
         .. code-block:: python
 
diff --git a/python/ray/data/_internal/datasource/zarrv2_datasource.py b/python/ray/data/_internal/datasource/zarrv2_datasource.py
index 4c97ff488c2d..01d1507c0a4e 100644
--- a/python/ray/data/_internal/datasource/zarrv2_datasource.py
+++ b/python/ray/data/_internal/datasource/zarrv2_datasource.py
@@ -1,18 +1,3 @@
-"""Zarr v2 datasource for Ray Data.
-
-Two output schemas, selected at the call site:
-
-* Long-form (default). Each output row corresponds to one chunk of one
-  array. Arrays in the same call need not share any dimension; they coexist
-  as separate rows distinguished by an ``array`` column.
-* Wide-form (``align_axis_0=True``). Each output row is one axis-0 chunk
-  shared across all selected arrays; the row carries one column per array
-  plus ``t_start`` / ``t_stop`` for the global range.
-
-See :class:`ZarrV2Datasource` for the row schemas and
-:func:`ray.data.read_zarr` for the public API.
-"""
-
 from __future__ import annotations
 
 import logging
diff --git a/python/ray/data/read_api.py b/python/ray/data/read_api.py
index 09d81c6b5788..d20dbf4ff950 100644
--- a/python/ray/data/read_api.py
+++ b/python/ray/data/read_api.py
@@ -976,22 +976,12 @@ def read_zarr(
             Also acceptsan :class:`fsspec.spec.AbstractFileSystem`.
             pyarrow filesystems are wrapped internally with
             :class:`fsspec.implementations.arrow.ArrowFSWrapper`
-        chunk_shapes: Optional override(s) for chunk geometry along the
-            leading axes. Accepts either:
-
-            * A sequence of positive integers (list or tuple), applied as
-              a shared prefix to every selected array, overriding the
-              leading axes and keeping trailing axes at each array's
-              native chunking.
-            * A dict mapping array paths to per-array prefix overrides,
-              for cases where only some arrays should be re-tiled or
-              different arrays should use different leading-axis chunks.
-              Arrays omitted from the dict keep their native chunks.
-
-            A shared list/tuple override may not be longer than the
-            smallest selected array's rank. Each per-array dict override
-            may not be longer than its target array's rank. If ``None``
-            (the default), every array keeps its native chunks.
+        chunk_shapes: Optional re-tiling of the leading chunk axes at read
+            time (see :ref:`Working with Zarr <working_with_zarr>`). Either a
+            sequence applied as a shared prefix across all selected arrays
+            (trailing axes keep native chunks), or a dict of per-array
+            prefixes (arrays absent from it keep native chunks). An override
+            may not exceed its target array's rank. Defaults to native chunks.
         array_paths: Optional list of array paths within the Zarr store to
             read. If unspecified, all arrays discovered in the store are
             included.
@@ -1006,17 +996,12 @@ def read_zarr(
             ``shape[0]`` and must end up with the same effective axis-0
             chunk size after ``chunk_shapes`` resolution. The
             default (``False``) uses the long-form chunk-per-row schema.
-        overlap: When set with ``align_axis_0``, extends each row's per-array
-            data forward by ``overlap`` timesteps from the next row's owned
-            range (clipped at the end of the store). Used for sliding-window
-            pipelines: with ``overlap=K-1``, any window of length ``K``
-            starting in this row's owned ``[t_start, t_stop)`` fits
-            entirely within the row's per-array slice, so a downstream
-            ``flat_map`` doesn't need cross-row state. The row's ownership
-            (the ``t_start``/``t_stop`` columns) is unchanged; only
-            ``chunk.shape[0]`` of each per-array column grows by up to
-            ``overlap``. Requires ``align_axis_0=True``. Defaults to ``0`` —
-            no overlap, each row's data exactly covers its owned range.
+        overlap: With ``align_axis_0``, extend each row's per-array data
+            forward by ``overlap`` timesteps (clipped at the store end) for
+            sliding-window pipelines; see
+            :ref:`Working with Zarr <working_with_zarr>`. Row ownership
+            (the ``t_start``/``t_stop`` columns) is unchanged. Requires
+            ``align_axis_0=True``. Defaults to ``0``.
         concurrency: The maximum number of Ray tasks to run concurrently. Set this
             to control number of tasks to run concurrently. This doesn't change the
             total number of tasks run or the total number of output blocks. By default,

From 025c759ab192d79ee141f454839a270224752dfe Mon Sep 17 00:00:00 2001
From: Artur Niederfahrenhorst <artur@anyscale.com>
Date: Thu, 18 Jun 2026 19:51:49 +0200
Subject: [PATCH 24/45] polish

Signed-off-by: Artur Niederfahrenhorst <artur@anyscale.com>
---
 python/ray/data/read_api.py | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/python/ray/data/read_api.py b/python/ray/data/read_api.py
index d20dbf4ff950..30dc4e50fdfc 100644
--- a/python/ray/data/read_api.py
+++ b/python/ray/data/read_api.py
@@ -989,19 +989,16 @@ def read_zarr(
             ``.zarray`` files when ``array_paths`` is unspecified and
             ``.zmetadata`` is missing. This may be slow or expensive for large
             remote stores, so it is disabled by default.
-        align_axis_0: Opt-in switch to the wide-form schema. Pass ``True``
-            to emit one row per axis-0 chunk with one column per selected
-            array, plus ``t_start`` and ``t_stop`` columns naming the
-            global axis-0 range. All selected arrays must share
-            ``shape[0]`` and must end up with the same effective axis-0
-            chunk size after ``chunk_shapes`` resolution. The
-            default (``False``) uses the long-form chunk-per-row schema.
-        overlap: With ``align_axis_0``, extend each row's per-array data
-            forward by ``overlap`` timesteps (clipped at the store end) for
-            sliding-window pipelines; see
-            :ref:`Working with Zarr <working_with_zarr>`. Row ownership
-            (the ``t_start``/``t_stop`` columns) is unchanged. Requires
-            ``align_axis_0=True``. Defaults to ``0``.
+        align_axis_0: If ``True``, emit the wide-form schema: one row per
+            axis-0 chunk with one column per selected array, plus ``t_start``
+            and ``t_stop`` columns naming the global axis-0 range. All selected
+            arrays must share ``shape[0]`` and resolve to the same effective
+            axis-0 chunk size after ``chunk_shapes`` resolution. Defaults to
+            ``False`` (long-form, one chunk per row).
+        overlap: The number of additional axis-0 timesteps to extend each
+            row's per-array data forward by, clipped at the store end, for
+            sliding-window pipelines. Only valid with ``align_axis_0=True``.
+            Defaults to ``0``. See :ref:`Working with Zarr <working_with_zarr>`.
         concurrency: The maximum number of Ray tasks to run concurrently. Set this
             to control number of tasks to run concurrently. This doesn't change the
             total number of tasks run or the total number of output blocks. By default,

From 4b430cc54d10b11a4ec32a5934934e3b1f71e47c Mon Sep 17 00:00:00 2001
From: Artur Niederfahrenhorst <artur@anyscale.com>
Date: Thu, 18 Jun 2026 20:11:20 +0200
Subject: [PATCH 25/45] polish

Signed-off-by: Artur Niederfahrenhorst <artur@anyscale.com>
---
 .../_internal/datasource/zarrv2_datasource.py | 40 ++-----------------
 .../ray/data/tests/datasource/test_zarrv2.py  |  7 ----
 2 files changed, 4 insertions(+), 43 deletions(-)

diff --git a/python/ray/data/_internal/datasource/zarrv2_datasource.py b/python/ray/data/_internal/datasource/zarrv2_datasource.py
index 01d1507c0a4e..2c05e5518e87 100644
--- a/python/ray/data/_internal/datasource/zarrv2_datasource.py
+++ b/python/ray/data/_internal/datasource/zarrv2_datasource.py
@@ -111,22 +111,8 @@ def _read_chunk(
     array_name: str,
     chunk_slices: tuple[tuple[int, int], ...],
 ) -> np.ndarray:
-    """Read ``array[chunk_slices]`` from a Zarr root.
-
-    ``chunk_slices`` is an N-tuple of ``(start, stop)`` pairs, one per axis.
-    For a 0-D (scalar) array it is the empty tuple ``()``, which reads the
-    single element.
-
-    Transient I/O errors (throttling, 5xx, connection resets, timeouts) are
-    retried by the underlying filesystem/storage backend, which owns the retry
-    policy: ``s3fs``/botocore and ``pyarrow.fs.S3FileSystem`` retry by default
-    and are tunable on the ``filesystem`` passed to ``read_zarr`` (e.g. botocore
-    ``retries`` config or pyarrow ``retry_strategy``).
-    """
     indexer = tuple(slice(s, e) for s, e in chunk_slices)
     arr = root if array_name == "" else root[array_name]
-    # ``arr`` is a zarr Array here (the caller resolves a concrete array path),
-    # but zarr's types widen it to Array | Group; asarray pins the ndarray return.
     return np.asarray(arr[indexer])
 
 
@@ -214,18 +200,11 @@ def read_fn() -> Iterable[pd.DataFrame]:
 
 
 def _validate_chunk_shapes_dict(chunk_shapes: dict) -> dict[str, tuple[int, ...]]:
+    """Normalize chunk_shapes keys to store paths and validate their values."""
     from zarr.util import normalize_storage_path
 
-    normalized_chunk_shapes: dict[str, tuple[int, ...]] = {}
-    original_keys_by_normalized: dict[str, str] = {}
-
+    normalized: dict[str, tuple[int, ...]] = {}
     for k, v in chunk_shapes.items():
-        if not isinstance(k, str):
-            raise ValueError(
-                "chunk_shapes dict keys must be array-path strings, "
-                f"got key {k!r} of type {type(k).__name__}"
-            )
-
         if (
             not isinstance(v, (tuple, list))
             or not v
@@ -235,19 +214,8 @@ def _validate_chunk_shapes_dict(chunk_shapes: dict) -> dict[str, tuple[int, ...]
                 f"chunk_shapes[{k!r}] must be a non-empty sequence of positive "
                 f"integers (list or tuple), got {v!r}"
             )
-
-        normalized_key = normalize_storage_path(k)
-
-        if normalized_key in original_keys_by_normalized:
-            prev_key = original_keys_by_normalized[normalized_key]
-            raise ValueError(
-                "chunk_shapes contains duplicate array paths after normalization: "
-                f"{prev_key!r} and {k!r} both normalize to {normalized_key!r}"
-            )
-
-        original_keys_by_normalized[normalized_key] = k
-        normalized_chunk_shapes[normalized_key] = tuple(v)
-    return normalized_chunk_shapes
+        normalized[normalize_storage_path(k)] = tuple(v)
+    return normalized
 
 
 # ---------------------------------------------------------------------------
diff --git a/python/ray/data/tests/datasource/test_zarrv2.py b/python/ray/data/tests/datasource/test_zarrv2.py
index eb3be312a263..431e1b6b85e8 100644
--- a/python/ray/data/tests/datasource/test_zarrv2.py
+++ b/python/ray/data/tests/datasource/test_zarrv2.py
@@ -271,13 +271,6 @@ def test_array_paths_missing_zarray_file_raises_value_error(
         ({"images": []}, r"chunk_shapes\['images'\] must be .*positive integers"),
         ({"images": [0]}, r"chunk_shapes\['images'\] must be .*positive integers"),
         ({"images": [1.5]}, r"chunk_shapes\['images'\] must be .*positive integers"),
-        # Bad dict keys.
-        (cast(Any, {1: [2]}), "chunk_shapes dict keys must be array-path strings"),
-        # Duplicate keys after normalization.
-        (
-            {"images": [2], "/images/": [3]},
-            "duplicate array paths after normalization",
-        ),
         # Unknown array path.
         ({"does_not_exist": [2]}, r"Unknown array path\(s\) in chunk_shapes"),
     ],

From f43448d4783757fc9be05e4d9efeae5ece478326 Mon Sep 17 00:00:00 2001
From: Artur Niederfahrenhorst <artur@anyscale.com>
Date: Thu, 18 Jun 2026 20:18:41 +0200
Subject: [PATCH 26/45] polish

Signed-off-by: Artur Niederfahrenhorst <artur@anyscale.com>
---
 .../data/_internal/datasource/zarrv2_datasource.py | 14 +-------------
 1 file changed, 1 insertion(+), 13 deletions(-)

diff --git a/python/ray/data/_internal/datasource/zarrv2_datasource.py b/python/ray/data/_internal/datasource/zarrv2_datasource.py
index 2c05e5518e87..c3a48d6fe5a7 100644
--- a/python/ray/data/_internal/datasource/zarrv2_datasource.py
+++ b/python/ray/data/_internal/datasource/zarrv2_datasource.py
@@ -243,12 +243,6 @@ def __init__(
     ) -> None:
         super().__init__()
         _check_import(self, module="zarr", package="zarr")
-
-        # This datasource targets Zarr v2 stores via zarr-python 2.x APIs
-        # (``zarr.util.normalize_storage_path``, ``.zarray`` metadata,
-        # ``zarr.open(fs.get_mapper(...))``) that were removed/reworked in
-        # zarr-python 3.x. Fail fast with an actionable message rather than a
-        # cryptic ImportError mid-read if an incompatible version is installed.
         import zarr
 
         if int(zarr.__version__.split(".")[0]) >= 3:
@@ -302,15 +296,9 @@ def __init__(
 
             if isinstance(self._fs, ZipFileSystem) and self.paths[0].endswith(".zip"):
                 # An explicit archive filesystem: the store is the archive root,
-                # not a ``.zip``-named entry inside it. (A real sub-path within
-                # the archive is preserved by the scheme-strip below.)
+                # not a ``.zip``-named entry inside it.
                 self._store_path = ""
             else:
-                # Strip any URI scheme (e.g. ``gs://`` / ``s3://``) so the path
-                # is backend-relative; pyarrow filesystems (wrapped in
-                # ``ArrowFSWrapper``) require this. Mirrors the
-                # ``filesystem is None`` branch, which strips the scheme via
-                # ``_resolve_paths_and_filesystem``.
                 _, store_path = split_protocol(self.paths[0])
                 self._store_path = store_path.rstrip("/")
 

From b4e990327323c49b34ffbadcf26e08788d9f3e82 Mon Sep 17 00:00:00 2001
From: Artur Niederfahrenhorst <artur@anyscale.com>
Date: Thu, 18 Jun 2026 20:59:31 +0200
Subject: [PATCH 27/45] [data] working-with-zarr: fix vale documentation_style
 lint errors

Resolves the 5 errors from the `lint: documentation_style` CI job (all in
this guide):
- backtick the `zarr-python` package name (Vale.Terms)
- "does not" -> "doesn't" (Google.Contractions)
- "batchsizes" -> "batch sizes" (Vale.Spelling)
- drop "above" (Google.WordList)
- backtick `JPEG-XL` (Google.Acronyms)

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Signed-off-by: Artur Niederfahrenhorst <artur@anyscale.com>
---
 doc/source/data/working-with-zarr.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/doc/source/data/working-with-zarr.md b/doc/source/data/working-with-zarr.md
index c3e344ad2bd1..64eaa5e03333 100644
--- a/doc/source/data/working-with-zarr.md
+++ b/doc/source/data/working-with-zarr.md
@@ -4,7 +4,7 @@
 
 Ray Data reads [Zarr v2](https://zarr.readthedocs.io/) stores — chunked, compressed,
 N-dimensional arrays on local disk or cloud object storage — with
-{func}`ray.data.read_zarr` (zarr-python 2.x / Zarr v2 stores).
+{func}`ray.data.read_zarr` (`zarr-python` 2.x / Zarr v2 stores).
 
 This guide covers:
 
@@ -87,10 +87,10 @@ Discovery follows these rules:
 
 ## Controlling chunk size
 
-Zarr stores are often chunked finely (for example one image per chunk). 
+Zarr stores are often chunked finely (for example one image per chunk).
 You can use `chunk_shapes` to chunk the leading axes **at read
 time** to coarsen (or refine) the granularity at which reading happens.
-Note that this does not affect downstream batchsizes and is internal to the reading operation.
+Note that this doesn't affect downstream batch sizes and is internal to the reading operation.
 Finely chunked reading can hurt performance.
 
 - A **sequence** applies as a shared prefix across all selected arrays, overriding the
@@ -109,7 +109,7 @@ ds = ray.data.read_zarr(store_uri, chunk_shapes={"images": [16], "labels": [64]}
 ## Reading row-aligned arrays
 
 When arrays share an axis-0 (for example a timestep axis), `align_axis_0=True`
-co-iterates them as the [wide-form schema](#output-schemas) above — one row per axis-0
+co-iterates them as the [wide-form schema](#output-schemas) — one row per axis-0
 chunk, one column per array.
 
 For sliding-window pipelines, `overlap` extends each row's per-array data forward by `N`
@@ -128,7 +128,7 @@ ds = ray.data.read_zarr(
 
 ## Custom codecs
 
-Stores compressed with non-stdlib codecs (for example `imagecodecs` JPEG-XL) need the
+Stores compressed with non-stdlib codecs (for example `imagecodecs` `JPEG-XL`) need the
 codec package imported and registered **in every Ray worker**, not just the driver.
 Register it with a `worker_process_setup_hook` — pass an importable callable or its
 dotted path (a string of code isn't accepted; a string is interpreted as an import

From 69ac9d94b4c4dfec2a788c76bb88f4465553633a Mon Sep 17 00:00:00 2001
From: Artur Niederfahrenhorst <artur@anyscale.com>
Date: Thu, 18 Jun 2026 21:01:51 +0200
Subject: [PATCH 28/45] Add back example docstring

Signed-off-by: Artur Niederfahrenhorst <artur@anyscale.com>
---
 python/ray/data/read_api.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/python/ray/data/read_api.py b/python/ray/data/read_api.py
index 30dc4e50fdfc..7e248d0e9ea8 100644
--- a/python/ray/data/read_api.py
+++ b/python/ray/data/read_api.py
@@ -964,6 +964,30 @@ def read_zarr(
         row-aligned (share ``shape[0]``), use ``align_axis_0=True`` so each
         array is its own column -- which is batch-safe.
 
+    Examples:
+        Read every array at its native chunking (long-form, one row per chunk):
+
+        >>> import ray
+        >>> ds = ray.data.read_zarr(  # doctest: +SKIP
+        ...     "s3://anonymous@ray-example-data/mnist-tiny.zarr",
+        ... )
+
+        Aligned read -- paired ``(images, labels)`` per row; ``align_axis_0``
+        requires all selected arrays to share ``shape[0]``:
+
+        >>> ds = ray.data.read_zarr(  # doctest: +SKIP
+        ...     "s3://anonymous@ray-example-data/mnist-tiny.zarr",
+        ...     align_axis_0=True,
+        ...     chunk_shapes=[50],
+        ... )
+
+        Per-array chunk overrides -- re-tile only the selected arrays:
+
+        >>> ds = ray.data.read_zarr(  # doctest: +SKIP
+        ...     "s3://anonymous@ray-example-data/mnist-tiny.zarr",
+        ...     chunk_shapes={"images": [50], "labels": [50]},
+        ... )
+
     Args:
         path: Path to the Zarr v2 store.
         filesystem: The filesystem

From f1f68b511114b5e2757aea1e36724f39a4f3e8bb Mon Sep 17 00:00:00 2001
From: Artur Niederfahrenhorst <artur@anyscale.com>
Date: Thu, 18 Jun 2026 21:50:24 +0200
Subject: [PATCH 29/45] [data] read_zarr: harden metadata discovery and
 local-path reads

Address four edge-case review findings in ZarrV2Datasource:

1. Pin local:// stores to the driver node: set supports_distributed_reads
   from the path scheme (like FileBasedDatasource) so read tasks aren't
   scheduled on workers that can't see the driver's local disk.
2. Detect consolidated metadata by trying open_consolidated rather than a
   separately-built exists() probe. The probe could disagree with the
   mapper's key lookup (e.g. archive/root stores with an empty store path)
   and wrongly treat a consolidated store as unconsolidated.
3. Reject a group path passed via array_paths on an unconsolidated store
   with a clear "is a group, not an array" error instead of a confusing
   AttributeError later. (The consolidated and full-scan paths already
   filter to arrays.)
4. Validate array_paths for single root-level array stores so a bad path
   errors instead of silently returning the root array.

Add a test for each.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Signed-off-by: Artur Niederfahrenhorst <artur@anyscale.com>
---
 .../_internal/datasource/zarrv2_datasource.py | 42 +++++++++++++-----
 .../ray/data/tests/datasource/test_zarrv2.py  | 43 +++++++++++++++++++
 2 files changed, 75 insertions(+), 10 deletions(-)

diff --git a/python/ray/data/_internal/datasource/zarrv2_datasource.py b/python/ray/data/_internal/datasource/zarrv2_datasource.py
index c3a48d6fe5a7..82998658aa21 100644
--- a/python/ray/data/_internal/datasource/zarrv2_datasource.py
+++ b/python/ray/data/_internal/datasource/zarrv2_datasource.py
@@ -12,7 +12,7 @@
 from fsspec.core import split_protocol
 from fsspec.spec import AbstractFileSystem
 
-from ray.data._internal.util import _check_import
+from ray.data._internal.util import _check_import, _is_local_scheme
 from ray.data.block import BlockMetadata
 from ray.data.datasource.datasource import Datasource, ReadTask
 
@@ -254,6 +254,10 @@ def __init__(
 
         self.allow_full_metadata_scan = allow_full_metadata_scan
         self.paths = [str(path)]
+        # ``local://`` stores live on the driver's local disk, so pin reads to
+        # the driver node (workers on other nodes can't see those files).
+        # Mirrors FileBasedDatasource. Non-local/cloud stores read distributed.
+        self._supports_distributed_reads = not _is_local_scheme(self.paths)
 
         # Resolve filesystem + store path. The order of precedence:
         #   1. Explicit ``filesystem=`` always wins.
@@ -328,14 +332,18 @@ def __init__(
 
         # Open the store with zarr (consolidated metadata when available). zarr
         # reads and validates `.zarray`/`.zmetadata` here, so the datasource does
-        # not re-check that metadata itself.
+        # not re-check that metadata itself. Detect consolidation by *trying*
+        # ``open_consolidated`` rather than a separately-constructed ``exists``
+        # probe: the probe can disagree with the mapper's own key lookup (e.g.
+        # archive/root stores whose store path is empty) and wrongly treat a
+        # consolidated store as unconsolidated.
         store = self._fs.get_mapper(self._store_path)
-        z_meta_path = f"{self._store_path.rstrip('/')}/.zmetadata"
-        self._consolidated = self._fs.exists(z_meta_path)
-        if self._consolidated:
+        try:
             self.root = zarr.open_consolidated(store, mode="r")
-        else:
+            self._consolidated = True
+        except KeyError:
             self.root = zarr.open(store, mode="r")
+            self._consolidated = False
 
         self._metadata_by_path = self._load_metadata(array_paths)
         if not self._metadata_by_path:
@@ -422,6 +430,10 @@ def __init__(
                     f"arrays to the same axis-0 prefix) to re-tile them."
                 )
 
+    @property
+    def supports_distributed_reads(self) -> bool:
+        return self._supports_distributed_reads
+
     def estimate_inmemory_data_size(self) -> Optional[int]:
         """Total bytes = sum over selected arrays of ``prod(shape) * itemsize``."""
         return sum(
@@ -588,14 +600,22 @@ def _load_metadata(self, array_paths) -> dict[str, ZarrArrayMeta]:
         from zarr.util import normalize_storage_path
 
         root = self.root
-
-        if isinstance(root, zarr.Array):
-            return {"": ZarrArrayMeta.from_zarr_array(root)}
-
         requested = (
             {normalize_storage_path(p) for p in array_paths} if array_paths else None
         )
 
+        if isinstance(root, zarr.Array):
+            # A store that is itself an array exposes exactly one path: "" (root).
+            # Reject any requested path that isn't the root so a bad ``array_paths``
+            # fails loudly here instead of silently returning the root array.
+            if requested is not None and requested != {""}:
+                raise ValueError(
+                    f"This Zarr store is a single root-level array (path ''), "
+                    f"but array_paths={array_paths!r} requested other path(s). "
+                    f"Pass array_paths=[''] or omit it."
+                )
+            return {"": ZarrArrayMeta.from_zarr_array(root)}
+
         if not self._consolidated and not self.allow_full_metadata_scan:
             if requested is None:
                 raise ValueError(
@@ -612,6 +632,8 @@ def _load_metadata(self, array_paths) -> dict[str, ZarrArrayMeta]:
                     raise ValueError(
                         f"Array path {raw!r} not found in Zarr store."
                     ) from e
+                if not isinstance(arr, zarr.Array):
+                    raise ValueError(f"Array path {raw!r} is a group, not an array.")
                 out[name] = ZarrArrayMeta.from_zarr_array(arr)
             return out
 
diff --git a/python/ray/data/tests/datasource/test_zarrv2.py b/python/ray/data/tests/datasource/test_zarrv2.py
index 431e1b6b85e8..7b51d7f30c6a 100644
--- a/python/ray/data/tests/datasource/test_zarrv2.py
+++ b/python/ray/data/tests/datasource/test_zarrv2.py
@@ -252,6 +252,49 @@ def test_array_paths_missing_zarray_file_raises_value_error(
         )
 
 
+def test_local_scheme_pins_reads_to_driver_node(zarrv2_group_store):
+    """``local://`` stores can't be read distributed; plain/cloud paths can."""
+    local = zarrv2_datasource.ZarrV2Datasource("local://" + str(zarrv2_group_store))
+    assert local.supports_distributed_reads is False
+
+    plain = zarrv2_datasource.ZarrV2Datasource(str(zarrv2_group_store))
+    assert plain.supports_distributed_reads is True
+
+
+def test_consolidation_detected_via_open_consolidated(
+    zarrv2_group_store, unconsolidated_zarrv2_store
+):
+    """``_consolidated`` reflects whether ``.zmetadata`` actually opened."""
+    consolidated = zarrv2_datasource.ZarrV2Datasource(
+        str(zarrv2_group_store), array_paths=["images"]
+    )
+    assert consolidated._consolidated is True
+
+    unconsolidated = zarrv2_datasource.ZarrV2Datasource(
+        str(unconsolidated_zarrv2_store), array_paths=["images"]
+    )
+    assert unconsolidated._consolidated is False
+
+
+def test_array_paths_rejects_group_path(tmp_path):
+    """Requesting a group path (not an array) on an unconsolidated store errors."""
+    store_path = tmp_path / "withgroup.zarr"
+    root = zarr.open_group(str(store_path), mode="w")
+    grp = root.create_group("grp")
+    grp.create_dataset("inner", data=np.arange(4, dtype="<i4"), chunks=(2,))
+    # Not consolidated -> the per-array ``.zarray`` lookup path.
+    with pytest.raises(ValueError, match="is a group, not an array"):
+        zarrv2_datasource.ZarrV2Datasource(str(store_path), array_paths=["grp"])
+
+
+def test_root_array_rejects_non_root_array_paths(zarrv2_root_store):
+    """A single root-level array rejects array_paths that aren't the root ''."""
+    with pytest.raises(ValueError, match="single root-level array"):
+        zarrv2_datasource.ZarrV2Datasource(
+            str(zarrv2_root_store), array_paths=["missing"]
+        )
+
+
 # ---------------------------------------------------------------------------
 # chunk_shapes validation
 # ---------------------------------------------------------------------------

From 472b5b9c2d8b904ee1f4ca59d9de44a809e4ddaa Mon Sep 17 00:00:00 2001
From: Artur Niederfahrenhorst <artur@anyscale.com>
Date: Thu, 18 Jun 2026 22:41:08 +0200
Subject: [PATCH 30/45] [data] working-with-zarr: fix dangling TOC link
 breaking the RTD build

The "This guide covers" list linked to #cloud-storage-and-credentials, but
that section was renamed to "Zarr's .zattrs". Sphinx emits a
myst.xref_missing warning, which ReadTheDocs (fail_on_warning: true) turns
into a build failure -- though Buildkite's doc build tolerates it.

Repoint the bullet to the .zattrs section via an explicit `(zarr-zattrs)=`
target so the link doesn't depend on the auto-generated heading slug.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Signed-off-by: Artur Niederfahrenhorst <artur@anyscale.com>
---
 doc/source/data/working-with-zarr.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/doc/source/data/working-with-zarr.md b/doc/source/data/working-with-zarr.md
index 64eaa5e03333..e32c3626f64a 100644
--- a/doc/source/data/working-with-zarr.md
+++ b/doc/source/data/working-with-zarr.md
@@ -13,7 +13,7 @@ This guide covers:
 - [Controlling chunk size](#controlling-chunk-size)
 - [Reading row-aligned arrays](#reading-row-aligned-arrays)
 - [Custom codecs](#custom-codecs)
-- [Cloud storage and credentials](#cloud-storage-and-credentials)
+- [Zarr's .zattrs](#zarr-zattrs)
 
 For the full parameter reference, see {func}`ray.data.read_zarr`.
 
@@ -145,6 +145,7 @@ ray.init(runtime_env={
 This is a particularity of the underlying Zarr library.
 
 
+(zarr-zattrs)=
 ## Zarr's .zattrs
 
 `read_zarr` doesn't surface each array's `.zattrs` (Zarr user attributes) in the row

From d75d6fae18e1f4537fcce014c2882cedb89a7987 Mon Sep 17 00:00:00 2001
From: Artur Niederfahrenhorst <artur@anyscale.com>
Date: Thu, 18 Jun 2026 23:23:42 +0200
Subject: [PATCH 31/45] [data] read_zarr: accept NumPy integer chunk_shapes

chunk_shapes validation used isinstance(x, int), which rejected NumPy
scalar integers (numpy.int64, etc.) even when positive -- a common case
since chunk sizes are often derived from array metadata. Accept any
numbers.Integral (excluding bool) via a shared _is_positive_int helper,
and normalize stored values to plain ints. Adds a test.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Signed-off-by: Artur Niederfahrenhorst <artur@anyscale.com>
---
 .../_internal/datasource/zarrv2_datasource.py   | 17 +++++++++++------
 python/ray/data/tests/datasource/test_zarrv2.py | 16 ++++++++++++++++
 2 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/python/ray/data/_internal/datasource/zarrv2_datasource.py b/python/ray/data/_internal/datasource/zarrv2_datasource.py
index 82998658aa21..068c475786bf 100644
--- a/python/ray/data/_internal/datasource/zarrv2_datasource.py
+++ b/python/ray/data/_internal/datasource/zarrv2_datasource.py
@@ -2,6 +2,7 @@
 
 import logging
 import math
+import numbers
 from collections.abc import Callable, Iterable
 from dataclasses import dataclass
 from itertools import product
@@ -199,6 +200,11 @@ def read_fn() -> Iterable[pd.DataFrame]:
     return read_fn
 
 
+def _is_positive_int(x) -> bool:
+    """True for a positive integer, including NumPy integers; False for bool."""
+    return not isinstance(x, bool) and isinstance(x, numbers.Integral) and x > 0
+
+
 def _validate_chunk_shapes_dict(chunk_shapes: dict) -> dict[str, tuple[int, ...]]:
     """Normalize chunk_shapes keys to store paths and validate their values."""
     from zarr.util import normalize_storage_path
@@ -208,13 +214,13 @@ def _validate_chunk_shapes_dict(chunk_shapes: dict) -> dict[str, tuple[int, ...]
         if (
             not isinstance(v, (tuple, list))
             or not v
-            or any(isinstance(x, bool) or not isinstance(x, int) or x <= 0 for x in v)
+            or not all(_is_positive_int(x) for x in v)
         ):
             raise ValueError(
                 f"chunk_shapes[{k!r}] must be a non-empty sequence of positive "
                 f"integers (list or tuple), got {v!r}"
             )
-        normalized[normalize_storage_path(k)] = tuple(v)
+        normalized[normalize_storage_path(k)] = tuple(int(x) for x in v)
     return normalized
 
 
@@ -319,16 +325,15 @@ def __init__(
             if isinstance(chunk_shapes, dict):
                 self.chunk_shapes = _validate_chunk_shapes_dict(chunk_shapes)
             else:
-                if not chunk_shapes or any(
-                    isinstance(x, bool) or not isinstance(x, int) or x <= 0
-                    for x in chunk_shapes
+                if not chunk_shapes or not all(
+                    _is_positive_int(x) for x in chunk_shapes
                 ):
                     raise ValueError(
                         "chunk_shapes must be a non-empty sequence of positive integers "
                         f"(list or tuple), got {chunk_shapes!r}"
                     )
 
-                self.chunk_shapes = tuple(chunk_shapes)
+                self.chunk_shapes = tuple(int(x) for x in chunk_shapes)
 
         # Open the store with zarr (consolidated metadata when available). zarr
         # reads and validates `.zarray`/`.zmetadata` here, so the datasource does
diff --git a/python/ray/data/tests/datasource/test_zarrv2.py b/python/ray/data/tests/datasource/test_zarrv2.py
index 7b51d7f30c6a..df50b7222b22 100644
--- a/python/ray/data/tests/datasource/test_zarrv2.py
+++ b/python/ray/data/tests/datasource/test_zarrv2.py
@@ -325,6 +325,22 @@ def test_rejects_invalid_chunk_shapes(zarrv2_group_store, chunk_shapes, match):
         )
 
 
+def test_chunk_shapes_accepts_numpy_ints(zarrv2_group_store):
+    """NumPy integer chunk sizes (common from array metadata) are accepted and
+    normalized to plain ints."""
+    seq = zarrv2_datasource.ZarrV2Datasource(
+        str(zarrv2_group_store), chunk_shapes=[np.int64(2)]
+    )
+    assert seq.chunk_shapes == (2,)
+    assert all(type(x) is int for x in seq.chunk_shapes)
+
+    per_array = zarrv2_datasource.ZarrV2Datasource(
+        str(zarrv2_group_store), chunk_shapes={"images": [np.int64(2)]}
+    )
+    assert per_array.chunk_shapes == {"images": (2,)}
+    assert all(type(x) is int for v in per_array.chunk_shapes.values() for x in v)
+
+
 @pytest.mark.parametrize(
     "chunk_shapes,array_paths,expected",
     [

From d4b97ecc4fa0c2ed2104bb2bb8c7e72da24b0ffd Mon Sep 17 00:00:00 2001
From: Artur Niederfahrenhorst <artur@anyscale.com>
Date: Thu, 18 Jun 2026 23:24:26 +0200
Subject: [PATCH 32/45] delete test

Signed-off-by: Artur Niederfahrenhorst <artur@anyscale.com>
---
 python/ray/data/tests/datasource/test_zarrv2.py | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/python/ray/data/tests/datasource/test_zarrv2.py b/python/ray/data/tests/datasource/test_zarrv2.py
index df50b7222b22..7b51d7f30c6a 100644
--- a/python/ray/data/tests/datasource/test_zarrv2.py
+++ b/python/ray/data/tests/datasource/test_zarrv2.py
@@ -325,22 +325,6 @@ def test_rejects_invalid_chunk_shapes(zarrv2_group_store, chunk_shapes, match):
         )
 
 
-def test_chunk_shapes_accepts_numpy_ints(zarrv2_group_store):
-    """NumPy integer chunk sizes (common from array metadata) are accepted and
-    normalized to plain ints."""
-    seq = zarrv2_datasource.ZarrV2Datasource(
-        str(zarrv2_group_store), chunk_shapes=[np.int64(2)]
-    )
-    assert seq.chunk_shapes == (2,)
-    assert all(type(x) is int for x in seq.chunk_shapes)
-
-    per_array = zarrv2_datasource.ZarrV2Datasource(
-        str(zarrv2_group_store), chunk_shapes={"images": [np.int64(2)]}
-    )
-    assert per_array.chunk_shapes == {"images": (2,)}
-    assert all(type(x) is int for v in per_array.chunk_shapes.values() for x in v)
-
-
 @pytest.mark.parametrize(
     "chunk_shapes,array_paths,expected",
     [

From cb7e075a662764d99402b947996e7e12fc23257f Mon Sep 17 00:00:00 2001
From: Artur Niederfahrenhorst <artur@anyscale.com>
Date: Fri, 19 Jun 2026 09:57:57 +0200
Subject: [PATCH 33/45] [data] read_zarr: yield Arrow blocks via
 DelegatingBlockBuilder

Build read-task output with DelegatingBlockBuilder (-> ArrowBlockBuilder)
instead of hand-constructing pandas DataFrames, matching the tensor/per-row
datasources (image, audio, video, torch). Blocks are now pyarrow Tables and
the Arrow tensor extension handles the variable-shaped `chunk` column
(shorter trailing-edge chunks) automatically. Drops the pandas dependency
in the datasource.

Test updates:
- _execute_read_tasks converts each (now-Arrow) block to pandas.
- _reconstruct_array sorts by a tuple key, since chunk_index/chunk_slices
  round-trip as Arrow lists, not Python tuples.
- Drop ray_start_regular_shared from the two auto-init tests: building an
  Arrow block auto-inits Ray, which conflicted with the fixture's unguarded
  ray.init() (the rest of the module already relies on auto-init).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Signed-off-by: Artur Niederfahrenhorst <artur@anyscale.com>
---
 .../_internal/datasource/zarrv2_datasource.py | 59 +++++++++----------
 .../ray/data/tests/datasource/test_zarrv2.py  | 13 ++--
 2 files changed, 38 insertions(+), 34 deletions(-)

diff --git a/python/ray/data/_internal/datasource/zarrv2_datasource.py b/python/ray/data/_internal/datasource/zarrv2_datasource.py
index 068c475786bf..639cd73e125c 100644
--- a/python/ray/data/_internal/datasource/zarrv2_datasource.py
+++ b/python/ray/data/_internal/datasource/zarrv2_datasource.py
@@ -9,12 +9,12 @@
 from typing import TYPE_CHECKING, List, Optional
 
 import numpy as np
-import pandas as pd
 from fsspec.core import split_protocol
 from fsspec.spec import AbstractFileSystem
 
+from ray.data._internal.delegating_block_builder import DelegatingBlockBuilder
 from ray.data._internal.util import _check_import, _is_local_scheme
-from ray.data.block import BlockMetadata
+from ray.data.block import Block, BlockMetadata
 from ray.data.datasource.datasource import Datasource, ReadTask
 
 logger = logging.getLogger(__name__)
@@ -143,32 +143,33 @@ class _AlignedChunkDescriptor:
 def _create_read_fn(
     batch: list[_ChunkDescriptor],
     root: ZarrRoot,
-) -> Callable[[], Iterable[pd.DataFrame]]:
-    """Build a read-task callable that materializes one DataFrame for one batch.
+) -> Callable[[], Iterable[Block]]:
+    """Build a read-task callable that materializes one block for one batch.
 
-    Each output row carries ``(array, chunk_index, chunk)``. ``chunk`` is
-    the data at its natural shape — possibly shorter than the nominal chunk
-    shape at trailing boundaries.
+    Each output row carries ``(array, chunk_index, chunk_slices, chunk)``.
+    ``chunk`` is the data at its natural shape — possibly shorter than the
+    nominal chunk shape at trailing boundaries.
 
     The caller is expected to pass batches whose chunks all come from one
     array. Arrow's tensor extension requires all tensor elements in a
     column to share rank, so mixing 4-D image chunks with 1-D label chunks
-    in one block would fail at conversion time.
+    in one block would fail at build time.
     :meth:`ZarrV2Datasource.get_read_tasks` enforces this by allocating one
     batch per array.
     """
 
-    def read_fn() -> Iterable[pd.DataFrame]:
-        yield pd.DataFrame(
-            {
-                "array": [d.array_name for d in batch],
-                "chunk_index": [d.chunk_index for d in batch],
-                "chunk_slices": [d.chunk_slices for d in batch],
-                "chunk": [
-                    _read_chunk(root, d.array_name, d.chunk_slices) for d in batch
-                ],
-            }
-        )
+    def read_fn() -> Iterable[Block]:
+        builder = DelegatingBlockBuilder()
+        for d in batch:
+            builder.add(
+                {
+                    "array": d.array_name,
+                    "chunk_index": d.chunk_index,
+                    "chunk_slices": d.chunk_slices,
+                    "chunk": _read_chunk(root, d.array_name, d.chunk_slices),
+                }
+            )
+        yield builder.build()
 
     return read_fn
 
@@ -177,7 +178,7 @@ def _create_aligned_read_fn(
     batch: list[_AlignedChunkDescriptor],
     aligned_array_names: list[str],
     root: ZarrRoot,
-) -> Callable[[], Iterable[pd.DataFrame]]:
+) -> Callable[[], Iterable[Block]]:
     """Build a read-task callable for aligned (wide-row) reads.
 
     Each output row carries ``t_start``, ``t_stop``, and one column per
@@ -186,16 +187,14 @@ def _create_aligned_read_fn(
     share the same axis-0 range.
     """
 
-    def read_fn() -> Iterable[pd.DataFrame]:
-        cols: dict[str, list] = {
-            "t_start": [d.t_start for d in batch],
-            "t_stop": [d.t_stop for d in batch],
-        }
-        for name in aligned_array_names:
-            cols[name] = [
-                _read_chunk(root, name, ((d.t_start, d.t_stop_data),)) for d in batch
-            ]
-        yield pd.DataFrame(cols)
+    def read_fn() -> Iterable[Block]:
+        builder = DelegatingBlockBuilder()
+        for d in batch:
+            row = {"t_start": d.t_start, "t_stop": d.t_stop}
+            for name in aligned_array_names:
+                row[name] = _read_chunk(root, name, ((d.t_start, d.t_stop_data),))
+            builder.add(row)
+        yield builder.build()
 
     return read_fn
 
diff --git a/python/ray/data/tests/datasource/test_zarrv2.py b/python/ray/data/tests/datasource/test_zarrv2.py
index 7b51d7f30c6a..facca885254c 100644
--- a/python/ray/data/tests/datasource/test_zarrv2.py
+++ b/python/ray/data/tests/datasource/test_zarrv2.py
@@ -13,11 +13,14 @@
 
 import ray
 from ray.data._internal.datasource import zarrv2_datasource
+from ray.data.block import BlockAccessor
 from ray.data.tests.conftest import *  # noqa: F401, F403
 
 
 def _execute_read_tasks(tasks) -> pd.DataFrame:
-    frames = [block for task in tasks for block in task()]
+    frames = [
+        BlockAccessor.for_block(block).to_pandas() for task in tasks for block in task()
+    ]
     return pd.concat(frames, ignore_index=True)
 
 
@@ -28,7 +31,9 @@ def _reconstruct_array(df: pd.DataFrame, array_name: str) -> np.ndarray:
     higher-dim arrays, use ``_reconstruct_nd`` (which orders chunks by
     ``chunk_index`` and concatenates axis 0 first).
     """
-    sub = df[df["array"] == array_name].sort_values("chunk_index")
+    sub = df[df["array"] == array_name].sort_values(
+        "chunk_index", key=lambda col: col.map(tuple)
+    )
     return np.concatenate(list(sub["chunk"]), axis=0)
 
 
@@ -742,7 +747,7 @@ def test_estimate_inmemory_data_size(tmp_path):
         lazy_fixture("local_fsspec_fs"),  # native fsspec
     ],
 )
-def test_read_zarr_basic_across_filesystems(ray_start_regular_shared, fs, local_path):
+def test_read_zarr_basic_across_filesystems(fs, local_path):
     """Round-trip a real Zarr store through read_zarr for each filesystem flavor.
 
     Mirrors the parametrized read-path coverage other Ray Data datasources use
@@ -774,7 +779,7 @@ def test_read_zarr_basic_across_filesystems(ray_start_regular_shared, fs, local_
 # ---------------------------------------------------------------------------
 
 
-def test_read_zarr_integration_public_s3(ray_start_regular_shared):
+def test_read_zarr_integration_public_s3():
     """End-to-end read against a real Zarr store in a public S3 bucket.
 
     Uses ``s3://anonymous@ray-example-data/mnist-tiny.zarr`` — a 200-sample

From 5cf44f2c972ca5fa1f882980a5d2139d061295e8 Mon Sep 17 00:00:00 2001
From: Artur Niederfahrenhorst <artur@anyscale.com>
Date: Fri, 19 Jun 2026 11:13:26 +0200
Subject: [PATCH 34/45] datasource polish

Signed-off-by: Artur Niederfahrenhorst <artur@anyscale.com>
---
 .../_internal/datasource/zarrv2_datasource.py | 62 ++++---------------
 1 file changed, 13 insertions(+), 49 deletions(-)

diff --git a/python/ray/data/_internal/datasource/zarrv2_datasource.py b/python/ray/data/_internal/datasource/zarrv2_datasource.py
index 639cd73e125c..a7755d88e445 100644
--- a/python/ray/data/_internal/datasource/zarrv2_datasource.py
+++ b/python/ray/data/_internal/datasource/zarrv2_datasource.py
@@ -9,8 +9,6 @@
 from typing import TYPE_CHECKING, List, Optional
 
 import numpy as np
-from fsspec.core import split_protocol
-from fsspec.spec import AbstractFileSystem
 
 from ray.data._internal.delegating_block_builder import DelegatingBlockBuilder
 from ray.data._internal.util import _check_import, _is_local_scheme
@@ -20,6 +18,7 @@
 logger = logging.getLogger(__name__)
 
 if TYPE_CHECKING:
+    from fsspec.spec import AbstractFileSystem
     from pyarrow import fs as pyarrow_fs
     from zarr import Array as ZarrArray
     from zarr.hierarchy import Group as ZarrGroup
@@ -39,7 +38,6 @@ class ZarrArrayMeta:
 
     @classmethod
     def from_zarr_array(cls, arr: "ZarrArray") -> ZarrArrayMeta:
-        """Adapt an opened ``zarr.Array`` (already validated by zarr on open)."""
         return cls(
             shape=tuple(int(s) for s in arr.shape),
             chunks=tuple(int(c) for c in arr.chunks),
@@ -52,7 +50,6 @@ def rank(self) -> int:
 
     @property
     def itemsize(self) -> int:
-        """Bytes per element."""
         return np.dtype(self.dtype).itemsize
 
     def effective_chunks(
@@ -144,18 +141,9 @@ def _create_read_fn(
     batch: list[_ChunkDescriptor],
     root: ZarrRoot,
 ) -> Callable[[], Iterable[Block]]:
-    """Build a read-task callable that materializes one block for one batch.
-
-    Each output row carries ``(array, chunk_index, chunk_slices, chunk)``.
-    ``chunk`` is the data at its natural shape — possibly shorter than the
-    nominal chunk shape at trailing boundaries.
-
-    The caller is expected to pass batches whose chunks all come from one
-    array. Arrow's tensor extension requires all tensor elements in a
-    column to share rank, so mixing 4-D image chunks with 1-D label chunks
-    in one block would fail at build time.
-    :meth:`ZarrV2Datasource.get_read_tasks` enforces this by allocating one
-    batch per array.
+    """Build a callable that materializes one block per batch.
+
+    This is the case where arrays are not aligned.
     """
 
     def read_fn() -> Iterable[Block]:
@@ -179,12 +167,14 @@ def _create_aligned_read_fn(
     aligned_array_names: list[str],
     root: ZarrRoot,
 ) -> Callable[[], Iterable[Block]]:
-    """Build a read-task callable for aligned (wide-row) reads.
+    """Build a callable for aligned (wide-row) reads.
 
     Each output row carries ``t_start``, ``t_stop``, and one column per
     aligned array holding that array's ``[t_start:t_stop, ...]`` slice at
     its natural shape (edge rows may be shorter). All arrays in one row
     share the same axis-0 range.
+
+    This is the case where arrays are aligned on axis 0.
     """
 
     def read_fn() -> Iterable[Block]:
@@ -261,7 +251,6 @@ def __init__(
         self.paths = [str(path)]
         # ``local://`` stores live on the driver's local disk, so pin reads to
         # the driver node (workers on other nodes can't see those files).
-        # Mirrors FileBasedDatasource. Non-local/cloud stores read distributed.
         self._supports_distributed_reads = not _is_local_scheme(self.paths)
 
         # Resolve filesystem + store path. The order of precedence:
@@ -308,6 +297,8 @@ def __init__(
                 # not a ``.zip``-named entry inside it.
                 self._store_path = ""
             else:
+                from fsspec.core import split_protocol
+
                 _, store_path = split_protocol(self.paths[0])
                 self._store_path = store_path.rstrip("/")
 
@@ -334,13 +325,8 @@ def __init__(
 
                 self.chunk_shapes = tuple(int(x) for x in chunk_shapes)
 
-        # Open the store with zarr (consolidated metadata when available). zarr
-        # reads and validates `.zarray`/`.zmetadata` here, so the datasource does
-        # not re-check that metadata itself. Detect consolidation by *trying*
-        # ``open_consolidated`` rather than a separately-constructed ``exists``
-        # probe: the probe can disagree with the mapper's own key lookup (e.g.
-        # archive/root stores whose store path is empty) and wrongly treat a
-        # consolidated store as unconsolidated.
+        # Open the store with zarr (consolidated metadata when available).
+        # Detect consolidation by *trying* ``open_consolidated``.
         store = self._fs.get_mapper(self._store_path)
         try:
             self.root = zarr.open_consolidated(store, mode="r")
@@ -366,11 +352,6 @@ def __init__(
                     f"Unknown array path(s) in chunk_shapes: {unknown_chunk_shape_keys}"
                 )
 
-        if not isinstance(align_axis_0, bool):
-            raise TypeError(
-                f"align_axis_0 must be a bool, got {type(align_axis_0).__name__}"
-            )
-
         if not align_axis_0:
             self._aligned_array_names = None
         else:
@@ -397,7 +378,7 @@ def __init__(
         # Validate overlap. Only meaningful when arrays are co-iterated as
         # wide rows, since the trailing lookahead is exposed via the
         # per-array column being longer than ``t_stop - t_start``.
-        if isinstance(overlap, bool) or not isinstance(overlap, int) or overlap < 0:
+        if not isinstance(overlap, int) or overlap < 0:
             raise ValueError(f"overlap must be a non-negative integer, got {overlap!r}")
         if overlap and self._aligned_array_names is None:
             raise ValueError(
@@ -451,23 +432,7 @@ def get_read_tasks(
         per_task_row_limit: Optional[int] = None,
         data_context: Optional["DataContext"] = None,
     ) -> List[ReadTask]:
-        """Enumerate every chunk and wrap it (or batches of chunks) in ReadTasks.
-
-        Long-form mode (default): one task per per-array chunk batch.
-        Per-array batching keeps each block's ``chunk`` column rank-uniform
-        (Arrow's tensor extension requires this). ``parallelism`` is
-        treated as a per-array budget — each array's chunks are split into
-        ``min(parallelism, n_chunks_for_array)`` tasks.
-
-        Aligned mode (``align_axis_0=True``): one task per batch of
-        aligned axis-0 chunks. Each yielded row carries ``t_start``,
-        ``t_stop``, and one column per selected array containing that
-        array's slice for the row's axis-0 range.
-        """
-        # ``data_context`` is part of the Datasource ABC; this datasource
-        # doesn't read anything off it today (no context-aware behavior).
-        # Threaded through to the helpers so they keep the same signature
-        # in case a future change needs it.
+        """Enumerate every chunk and wrap it (or batches of chunks) in ReadTasks."""
         if self._aligned_array_names is not None:
             return self._get_aligned_read_tasks(
                 parallelism, per_task_row_limit, data_context
@@ -482,7 +447,6 @@ def _get_long_form_read_tasks(
         per_task_row_limit: Optional[int] = None,
         data_context: Optional["DataContext"] = None,
     ) -> List[ReadTask]:
-        """Long-form read tasks. See :meth:`get_read_tasks` for semantics."""
         read_tasks: List[ReadTask] = []
         for name, meta in self._metadata_by_path.items():
             chunks = self._array_chunks[name]

From a7ebe8fdc7de5c51368754f8e0da5a269c5f5ff3 Mon Sep 17 00:00:00 2001
From: Artur Niederfahrenhorst <artur@anyscale.com>
Date: Fri, 19 Jun 2026 11:21:08 +0200
Subject: [PATCH 35/45] fix test

Signed-off-by: Artur Niederfahrenhorst <artur@anyscale.com>
---
 .../data/_internal/datasource/zarrv2_datasource.py    |  3 +++
 python/ray/data/tests/datasource/test_zarrv2.py       | 11 +----------
 2 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/python/ray/data/_internal/datasource/zarrv2_datasource.py b/python/ray/data/_internal/datasource/zarrv2_datasource.py
index a7755d88e445..3521856fbc76 100644
--- a/python/ray/data/_internal/datasource/zarrv2_datasource.py
+++ b/python/ray/data/_internal/datasource/zarrv2_datasource.py
@@ -240,6 +240,9 @@ def __init__(
         _check_import(self, module="zarr", package="zarr")
         import zarr
 
+        _check_import(self, module="fsspec", package="fsspec")
+        from fsspec.spec import AbstractFileSystem
+
         if int(zarr.__version__.split(".")[0]) >= 3:
             raise ImportError(
                 f"read_zarr supports zarr-python 2.x (Zarr v2 stores), but found "
diff --git a/python/ray/data/tests/datasource/test_zarrv2.py b/python/ray/data/tests/datasource/test_zarrv2.py
index facca885254c..ecf7e21a25b2 100644
--- a/python/ray/data/tests/datasource/test_zarrv2.py
+++ b/python/ray/data/tests/datasource/test_zarrv2.py
@@ -1,7 +1,7 @@
 import logging
 import os
 from pathlib import Path
-from typing import Any, cast
+from typing import Any
 
 import fsspec
 import numpy as np
@@ -467,15 +467,6 @@ def test_align_axis_0_rejects_misaligned_shape0(heterogeneous_zarrv2_store):
         )
 
 
-def test_align_axis_0_rejects_non_bool(aligned_zarrv2_store):
-    """``align_axis_0`` must be a bool — no list form."""
-    with pytest.raises(TypeError, match=r"align_axis_0 must be a bool"):
-        zarrv2_datasource.ZarrV2Datasource(
-            str(aligned_zarrv2_store),
-            align_axis_0=cast(Any, ["img", "state"]),
-        )
-
-
 def test_align_axis_0_rejects_divergent_axis_0_chunks(aligned_zarrv2_store):
     """If aligned arrays end up with different axis-0 chunks, error clearly.
 

From 56dc9072e5ea6d5dd7084f467e66e4232f78235a Mon Sep 17 00:00:00 2001
From: Artur Niederfahrenhorst <artur@anyscale.com>
Date: Fri, 19 Jun 2026 11:51:39 +0200
Subject: [PATCH 36/45] [data] read_zarr: drop redundant zarr datasource tests

Remove three tests whose coverage is fully subsumed elsewhere:

- test_align_axis_0_accepts_per_array_chunk_shapes: dict chunk_shapes
  resolution is covered by test_chunk_shapes_resolution_across_mixed_rank
  (asserts _array_chunks directly), and aligned wide-row output by
  test_align_axis_0_emits_wide_rows; the aligned path consumes the resolved
  chunks regardless of dict vs sequence, so the combination adds no path.
- test_overlap_enables_windowing_without_cross_row_loss: its assertion is
  pure arithmetic on the per-row data extents already asserted by
  test_overlap_extends_chunk_data; it exercises no new datasource behavior.
- test_align_axis_0_column_set: the no-array_paths case duplicated the column
  assertion in test_align_axis_0_emits_wide_rows; de-parametrized to keep only
  the array_paths-filtering case, which is its unique coverage.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Signed-off-by: Artur Niederfahrenhorst <artur@anyscale.com>
---
 .../ray/data/tests/datasource/test_zarrv2.py  | 52 ++-----------------
 1 file changed, 5 insertions(+), 47 deletions(-)

diff --git a/python/ray/data/tests/datasource/test_zarrv2.py b/python/ray/data/tests/datasource/test_zarrv2.py
index ecf7e21a25b2..3473c2b89561 100644
--- a/python/ray/data/tests/datasource/test_zarrv2.py
+++ b/python/ray/data/tests/datasource/test_zarrv2.py
@@ -422,36 +422,17 @@ def test_align_axis_0_emits_wide_rows(aligned_zarrv2_store):
     assert stops == [4, 8]
 
 
-@pytest.mark.parametrize(
-    "array_paths,extra_cols",
-    [
-        # No filter: all discovered arrays end up aligned.
-        (None, {"img", "state", "label"}),
-        # array_paths selects which arrays to read; align_axis_0 just
-        # asserts that the selected set is mutually aligned.
-        (["img", "state"], {"img", "state"}),
-    ],
-)
-def test_align_axis_0_column_set(aligned_zarrv2_store, array_paths, extra_cols):
+def test_align_axis_0_column_set(aligned_zarrv2_store):
+    """array_paths selects which arrays are read; aligned mode emits one column
+    per selected array (plus t_start/t_stop)."""
     datasource = zarrv2_datasource.ZarrV2Datasource(
         str(aligned_zarrv2_store),
-        array_paths=array_paths,
+        array_paths=["img", "state"],
         align_axis_0=True,
         chunk_shapes=[4],
     )
     df = _execute_read_tasks(datasource.get_read_tasks(parallelism=4))
-    assert set(df.columns) == {"t_start", "t_stop"} | extra_cols
-
-
-def test_align_axis_0_accepts_per_array_chunk_shapes(aligned_zarrv2_store):
-    datasource = zarrv2_datasource.ZarrV2Datasource(
-        str(aligned_zarrv2_store),
-        align_axis_0=True,
-        chunk_shapes={"img": [4], "state": [4], "label": [4]},
-    )
-    df = _execute_read_tasks(datasource.get_read_tasks(parallelism=4))
-    assert len(df) == 2
-    assert sorted(zip(df["t_start"], df["t_stop"])) == [(0, 4), (4, 8)]
+    assert set(df.columns) == {"t_start", "t_stop", "img", "state"}
 
 
 def test_align_axis_0_rejects_misaligned_shape0(heterogeneous_zarrv2_store):
@@ -534,29 +515,6 @@ def test_overlap_rejects_negative_and_non_int(aligned_zarrv2_store):
             )
 
 
-def test_overlap_enables_windowing_without_cross_row_loss(aligned_zarrv2_store):
-    window_len = 3
-    datasource = zarrv2_datasource.ZarrV2Datasource(
-        str(aligned_zarrv2_store),
-        align_axis_0=True,
-        chunk_shapes=[4],
-        overlap=window_len - 1,
-    )
-    df = _execute_read_tasks(datasource.get_read_tasks(parallelism=4))
-    starts = []
-    for _, row in df.iterrows():
-        t_start, t_stop = row["t_start"], row["t_stop"]
-        img = row["img"]
-        for local in range(t_stop - t_start):
-            if local + window_len > img.shape[0]:
-                continue  # only triggers at very end of store
-            starts.append(t_start + local)
-    # 8 timesteps, window_len=3 -> valid global starts are [0,6) = 6 windows.
-    # Without overlap we would have lost ~33%. With overlap=2 we should
-    # capture all 6.
-    assert sorted(starts) == [0, 1, 2, 3, 4, 5]
-
-
 def test_chunk_shapes_rejected_when_longer_than_smallest_array(
     heterogeneous_zarrv2_store,
 ):

From 27a9fd97bc22ed79346d76dc7bb5a23e83ff2f36 Mon Sep 17 00:00:00 2001
From: Artur Niederfahrenhorst <artur@anyscale.com>
Date: Fri, 19 Jun 2026 12:12:14 +0200
Subject: [PATCH 37/45] [data] read_zarr: honor per_task_row_limit and
 retried_io_errors

Fix #2 (limit pushdown): the read fns now slice their batch to
per_task_row_limit, so a downstream limit(K) reads ~K chunks instead of the
whole batch's I/O. Previously ReadTask only truncated the already-built block
(_iter_sliced_blocks), so every chunk in the batch was still fetched.

Fix #4 (retries): chunk reads are wrapped in
iterate_with_retry(match=DataContext.retried_io_errors) -- the same mechanism
FileBasedDatasource uses -- so zarr reads now honor Ray Data's retry config.
The underlying filesystem's own retry still applies underneath.

Tests: per_task_row_limit caps the number of _read_chunk calls (not just the
output row count); _read_chunk retries a transient error then succeeds.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Signed-off-by: Artur Niederfahrenhorst <artur@anyscale.com>
---
 .../_internal/datasource/zarrv2_datasource.py | 83 +++++++++++++++----
 .../ray/data/tests/datasource/test_zarrv2.py  | 51 ++++++++++++
 2 files changed, 117 insertions(+), 17 deletions(-)

diff --git a/python/ray/data/_internal/datasource/zarrv2_datasource.py b/python/ray/data/_internal/datasource/zarrv2_datasource.py
index 3521856fbc76..89e41db7c2a3 100644
--- a/python/ray/data/_internal/datasource/zarrv2_datasource.py
+++ b/python/ray/data/_internal/datasource/zarrv2_datasource.py
@@ -6,12 +6,16 @@
 from collections.abc import Callable, Iterable
 from dataclasses import dataclass
 from itertools import product
-from typing import TYPE_CHECKING, List, Optional
+from typing import TYPE_CHECKING, Any, List, Optional
 
 import numpy as np
 
 from ray.data._internal.delegating_block_builder import DelegatingBlockBuilder
-from ray.data._internal.util import _check_import, _is_local_scheme
+from ray.data._internal.util import (
+    _check_import,
+    _is_local_scheme,
+    iterate_with_retry,
+)
 from ray.data.block import Block, BlockMetadata
 from ray.data.datasource.datasource import Datasource, ReadTask
 
@@ -104,14 +108,37 @@ def chunk_slices(
 # ---------------------------------------------------------------------------
 
 
+def _call_with_retry(
+    fn: Callable[[], Any],
+    description: str,
+    match: Optional[List[str]],
+) -> Any:
+    if not match:
+        return fn()
+    return next(
+        iterate_with_retry(lambda: [fn()], description=description, match=match)
+    )
+
+
 def _read_chunk(
     root: ZarrRoot,
     array_name: str,
     chunk_slices: tuple[tuple[int, int], ...],
+    retry_match: Optional[List[str]] = None,
 ) -> np.ndarray:
-    indexer = tuple(slice(s, e) for s, e in chunk_slices)
-    arr = root if array_name == "" else root[array_name]
-    return np.asarray(arr[indexer])
+    """Read ``array[chunk_slices]`` as an ndarray.
+
+    Transient I/O errors matching ``retry_match`` (Ray Data's
+    ``DataContext.retried_io_errors``) are retried; the underlying filesystem's
+    own retry policy still applies underneath.
+    """
+
+    def _read() -> np.ndarray:
+        indexer = tuple(slice(s, e) for s, e in chunk_slices)
+        arr = root if array_name == "" else root[array_name]
+        return np.asarray(arr[indexer])
+
+    return _call_with_retry(_read, "read a Zarr chunk", retry_match)
 
 
 @dataclass(frozen=True)
@@ -140,11 +167,16 @@ class _AlignedChunkDescriptor:
 def _create_read_fn(
     batch: list[_ChunkDescriptor],
     root: ZarrRoot,
+    per_task_row_limit: Optional[int],
+    retry_match: Optional[List[str]],
 ) -> Callable[[], Iterable[Block]]:
     """Build a callable that materializes one block per batch.
 
-    This is the case where arrays are not aligned.
+    This is the case where arrays are not aligned. ``per_task_row_limit`` caps
+    how many chunks this task reads so a downstream ``limit`` reads only what it
+    needs instead of the whole batch (``None`` reads the whole batch).
     """
+    batch = batch[:per_task_row_limit]
 
     def read_fn() -> Iterable[Block]:
         builder = DelegatingBlockBuilder()
@@ -154,7 +186,9 @@ def read_fn() -> Iterable[Block]:
                     "array": d.array_name,
                     "chunk_index": d.chunk_index,
                     "chunk_slices": d.chunk_slices,
-                    "chunk": _read_chunk(root, d.array_name, d.chunk_slices),
+                    "chunk": _read_chunk(
+                        root, d.array_name, d.chunk_slices, retry_match
+                    ),
                 }
             )
         yield builder.build()
@@ -166,6 +200,8 @@ def _create_aligned_read_fn(
     batch: list[_AlignedChunkDescriptor],
     aligned_array_names: list[str],
     root: ZarrRoot,
+    per_task_row_limit: Optional[int],
+    retry_match: Optional[List[str]],
 ) -> Callable[[], Iterable[Block]]:
     """Build a callable for aligned (wide-row) reads.
 
@@ -174,15 +210,19 @@ def _create_aligned_read_fn(
     its natural shape (edge rows may be shorter). All arrays in one row
     share the same axis-0 range.
 
-    This is the case where arrays are aligned on axis 0.
+    This is the case where arrays are aligned on axis 0. ``per_task_row_limit``
+    caps how many rows this task reads (``None`` reads the whole batch).
     """
+    batch = batch[:per_task_row_limit]
 
     def read_fn() -> Iterable[Block]:
         builder = DelegatingBlockBuilder()
         for d in batch:
             row = {"t_start": d.t_start, "t_stop": d.t_stop}
             for name in aligned_array_names:
-                row[name] = _read_chunk(root, name, ((d.t_start, d.t_stop_data),))
+                row[name] = _read_chunk(
+                    root, name, ((d.t_start, d.t_stop_data),), retry_match
+                )
             builder.add(row)
         yield builder.build()
 
@@ -436,19 +476,22 @@ def get_read_tasks(
         data_context: Optional["DataContext"] = None,
     ) -> List[ReadTask]:
         """Enumerate every chunk and wrap it (or batches of chunks) in ReadTasks."""
+        from ray.data.context import DataContext
+
+        retry_match = (data_context or DataContext.get_current()).retried_io_errors
         if self._aligned_array_names is not None:
             return self._get_aligned_read_tasks(
-                parallelism, per_task_row_limit, data_context
+                parallelism, per_task_row_limit, retry_match
             )
         return self._get_long_form_read_tasks(
-            parallelism, per_task_row_limit, data_context
+            parallelism, per_task_row_limit, retry_match
         )
 
     def _get_long_form_read_tasks(
         self,
         parallelism: int,
-        per_task_row_limit: Optional[int] = None,
-        data_context: Optional["DataContext"] = None,
+        per_task_row_limit: Optional[int],
+        retry_match: Optional[List[str]],
     ) -> List[ReadTask]:
         read_tasks: List[ReadTask] = []
         for name, meta in self._metadata_by_path.items():
@@ -470,7 +513,9 @@ def _get_long_form_read_tasks(
                 batch = descriptors[start : start + batch_size]
                 read_tasks.append(
                     ReadTask(
-                        _create_read_fn(batch, self.root),
+                        _create_read_fn(
+                            batch, self.root, per_task_row_limit, retry_match
+                        ),
                         BlockMetadata(
                             num_rows=len(batch),
                             size_bytes=self._estimate_long_form_batch_mem_size(batch),
@@ -493,8 +538,8 @@ def _estimate_long_form_batch_mem_size(self, batch: list[_ChunkDescriptor]) -> i
     def _get_aligned_read_tasks(
         self,
         parallelism: int,
-        per_task_row_limit: Optional[int] = None,
-        data_context: Optional["DataContext"] = None,
+        per_task_row_limit: Optional[int],
+        retry_match: Optional[List[str]],
     ) -> List[ReadTask]:
         """Aligned read tasks. See :meth:`get_read_tasks` for semantics."""
         assert self._aligned_array_names is not None
@@ -525,7 +570,11 @@ def _get_aligned_read_tasks(
             read_tasks.append(
                 ReadTask(
                     _create_aligned_read_fn(
-                        batch, self._aligned_array_names, self.root
+                        batch,
+                        self._aligned_array_names,
+                        self.root,
+                        per_task_row_limit,
+                        retry_match,
                     ),
                     BlockMetadata(
                         num_rows=len(batch),
diff --git a/python/ray/data/tests/datasource/test_zarrv2.py b/python/ray/data/tests/datasource/test_zarrv2.py
index 3473c2b89561..f40750f06c0d 100644
--- a/python/ray/data/tests/datasource/test_zarrv2.py
+++ b/python/ray/data/tests/datasource/test_zarrv2.py
@@ -595,6 +595,57 @@ def test_get_read_tasks_batches_chunks_by_parallelism(tmp_path):
     assert all(task.metadata.input_files == (str(store_path),) for task in read_tasks)
 
 
+def test_per_task_row_limit_caps_chunks_read(tmp_path, monkeypatch):
+    """per_task_row_limit bounds how many chunks a task actually reads, so a
+    downstream ``limit`` doesn't pull the whole batch's I/O."""
+    store_path = tmp_path / "limit.zarr"
+    _write_real_zarr_store(store_path, {"data": (np.arange(10, dtype="<i4"), (1,))})
+    datasource = zarrv2_datasource.ZarrV2Datasource(str(store_path))
+
+    reads = []
+    real_read_chunk = zarrv2_datasource._read_chunk
+
+    def _spy(*args, **kwargs):
+        reads.append(1)
+        return real_read_chunk(*args, **kwargs)
+
+    monkeypatch.setattr(zarrv2_datasource, "_read_chunk", _spy)
+
+    # parallelism=1 -> one task batching all 10 chunks; cap it at 3.
+    tasks = datasource.get_read_tasks(parallelism=1, per_task_row_limit=3)
+    blocks = [block for task in tasks for block in task()]
+
+    total_rows = sum(BlockAccessor.for_block(b).num_rows() for b in blocks)
+    assert total_rows == 3
+    # The fix: only 3 chunks were actually read (not all 10, then truncated).
+    assert len(reads) == 3
+
+
+def test_read_chunk_retries_transient_io(monkeypatch):
+    """_read_chunk retries reads whose error matches retry_match (Ray Data's
+    DataContext.retried_io_errors), then succeeds."""
+    monkeypatch.setattr("time.sleep", lambda *_: None)  # no backoff in the test
+
+    class _FlakyArray:
+        attempts = 0
+
+        def __getitem__(self, _idx):
+            type(self).attempts += 1
+            if self.attempts < 3:
+                raise OSError("Connection reset by peer")
+            return np.arange(4, dtype="<i4")
+
+    class _Root:
+        def __getitem__(self, _name):
+            return _FlakyArray()
+
+    out = zarrv2_datasource._read_chunk(
+        _Root(), "x", ((0, 4),), retry_match=["Connection reset"]
+    )
+    np.testing.assert_array_equal(out, np.arange(4, dtype="<i4"))
+    assert _FlakyArray.attempts == 3  # failed twice, then succeeded
+
+
 def test_long_form_schema_and_materialization(tmp_path):
     """End-to-end: long-form rows are emitted with the expected columns and data."""
     store_path = tmp_path / "aligned.zarr"

From 7eb47f3cc356e95aee7fb6799c81602f8e462c13 Mon Sep 17 00:00:00 2001
From: Artur Niederfahrenhorst <artur@anyscale.com>
Date: Fri, 19 Jun 2026 13:48:14 +0200
Subject: [PATCH 38/45] [data] read_zarr: lazy grid-range read-task planning
 (O(parallelism))

_get_long_form_read_tasks no longer materializes a per-chunk descriptor list on
the driver -- the product(grid) enumeration was O(total chunks) and ran even for
take(1)/limit (e.g. ~64,800 descriptors for one MUR SST array). Read tasks now
describe a contiguous flat range of the chunk grid; the read fn unravels each
flat index to an N-D chunk_index lazily on the worker. Planning is O(n_tasks)
per array, independent of chunk count.

- New _ChunkRange (replaces per-chunk _ChunkDescriptor) + _unravel (row-major,
  preserving the previous itertools.product ordering).
- size_bytes is now an O(1) upper-bound estimate (full-size chunk per index)
  instead of an O(chunks) exact sum.
- per_task_row_limit caps the range, not a list slice; aligned path unchanged
  (already O(output rows)).

Adds a test asserting chunk_index order is identical to grid enumeration.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Signed-off-by: Artur Niederfahrenhorst <artur@anyscale.com>
---
 .../_internal/datasource/zarrv2_datasource.py | 107 +++++++++++-------
 .../ray/data/tests/datasource/test_zarrv2.py  |  17 +++
 2 files changed, 85 insertions(+), 39 deletions(-)

diff --git a/python/ray/data/_internal/datasource/zarrv2_datasource.py b/python/ray/data/_internal/datasource/zarrv2_datasource.py
index 89e41db7c2a3..306767364464 100644
--- a/python/ray/data/_internal/datasource/zarrv2_datasource.py
+++ b/python/ray/data/_internal/datasource/zarrv2_datasource.py
@@ -5,7 +5,6 @@
 import numbers
 from collections.abc import Callable, Iterable
 from dataclasses import dataclass
-from itertools import product
 from typing import TYPE_CHECKING, Any, List, Optional
 
 import numpy as np
@@ -115,6 +114,7 @@ def _call_with_retry(
 ) -> Any:
     if not match:
         return fn()
+    # TODO(Artur): This would be more elegant with a general retry helper for non-iterables.
     return next(
         iterate_with_retry(lambda: [fn()], description=description, match=match)
     )
@@ -142,12 +142,35 @@ def _read() -> np.ndarray:
 
 
 @dataclass(frozen=True)
-class _ChunkDescriptor:
-    """One long-form row's worth of read work: which chunk of which array."""
+class _ChunkRange:
+    """A contiguous slice ``[flat_start, flat_stop)`` of an array's chunk grid.
+
+    The flat indices address the row-major flattening of the chunk grid; the
+    read fn unravels each to an N-D ``chunk_index`` lazily on the worker. Keeping
+    a range (not a materialized per-chunk list) makes read-task planning
+    O(parallelism) rather than O(total chunks) -- important for stores with very
+    many chunks.
+    """
 
     array_name: str
-    chunk_index: tuple[int, ...]
-    chunk_slices: tuple[tuple[int, int], ...]
+    meta: ZarrArrayMeta
+    chunks: tuple[int, ...]
+    grid: tuple[int, ...]
+    flat_start: int
+    flat_stop: int
+
+
+def _unravel(flat_index: int, grid: tuple[int, ...]) -> tuple[int, ...]:
+    """Row-major (C-order) flat index -> N-D chunk index.
+
+    Matches ``itertools.product(*(range(n) for n in grid))`` ordering, so the
+    emitted ``chunk_index`` sequence is identical to enumerating the grid.
+    """
+    idx = []
+    for n in reversed(grid):
+        idx.append(flat_index % n)
+        flat_index //= n
+    return tuple(reversed(idx))
 
 
 @dataclass(frozen=True)
@@ -165,29 +188,35 @@ class _AlignedChunkDescriptor:
 
 
 def _create_read_fn(
-    batch: list[_ChunkDescriptor],
+    chunk_range: _ChunkRange,
     root: ZarrRoot,
     per_task_row_limit: Optional[int],
     retry_match: Optional[List[str]],
 ) -> Callable[[], Iterable[Block]]:
-    """Build a callable that materializes one block per batch.
+    """Build a callable that materializes one block for a chunk-grid range.
 
-    This is the case where arrays are not aligned. ``per_task_row_limit`` caps
-    how many chunks this task reads so a downstream ``limit`` reads only what it
-    needs instead of the whole batch (``None`` reads the whole batch).
+    This is the case where arrays are not aligned. Chunks are enumerated lazily
+    (on the worker) from ``chunk_range``. ``per_task_row_limit`` caps how many
+    chunks this task reads so a downstream ``limit`` reads only what it needs
+    (``None`` reads the whole range).
     """
-    batch = batch[:per_task_row_limit]
+    cr = chunk_range
+    stop = cr.flat_stop
+    if per_task_row_limit is not None:
+        stop = min(stop, cr.flat_start + per_task_row_limit)
 
     def read_fn() -> Iterable[Block]:
         builder = DelegatingBlockBuilder()
-        for d in batch:
+        for flat_index in range(cr.flat_start, stop):
+            chunk_index = _unravel(flat_index, cr.grid)
+            chunk_slices = cr.meta.chunk_slices(chunk_index, cr.chunks)
             builder.add(
                 {
-                    "array": d.array_name,
-                    "chunk_index": d.chunk_index,
-                    "chunk_slices": d.chunk_slices,
+                    "array": cr.array_name,
+                    "chunk_index": chunk_index,
+                    "chunk_slices": chunk_slices,
                     "chunk": _read_chunk(
-                        root, d.array_name, d.chunk_slices, retry_match
+                        root, cr.array_name, chunk_slices, retry_match
                     ),
                 }
             )
@@ -497,28 +526,27 @@ def _get_long_form_read_tasks(
         for name, meta in self._metadata_by_path.items():
             chunks = self._array_chunks[name]
             grid = self._array_grids[name]
-            descriptors = [
-                _ChunkDescriptor(
-                    array_name=name,
-                    chunk_index=chunk_index,
-                    chunk_slices=meta.chunk_slices(chunk_index, chunks),
-                )
-                for chunk_index in product(*(range(n) for n in grid))
-            ]
-            if not descriptors:
+            n_chunks = math.prod(grid)
+            if n_chunks == 0:
                 continue
-            n_tasks = max(1, min(parallelism, len(descriptors)))
-            batch_size = math.ceil(len(descriptors) / n_tasks)
-            for start in range(0, len(descriptors), batch_size):
-                batch = descriptors[start : start + batch_size]
+            # Split the chunk grid into contiguous flat-index ranges. This is
+            # O(n_tasks), not O(n_chunks): we never materialize a per-chunk list
+            # on the driver -- the read fn unravels chunks lazily on the worker.
+            n_tasks = max(1, min(parallelism, n_chunks))
+            batch_size = math.ceil(n_chunks / n_tasks)
+            for flat_start in range(0, n_chunks, batch_size):
+                flat_stop = min(flat_start + batch_size, n_chunks)
+                chunk_range = _ChunkRange(
+                    name, meta, chunks, grid, flat_start, flat_stop
+                )
                 read_tasks.append(
                     ReadTask(
                         _create_read_fn(
-                            batch, self.root, per_task_row_limit, retry_match
+                            chunk_range, self.root, per_task_row_limit, retry_match
                         ),
                         BlockMetadata(
-                            num_rows=len(batch),
-                            size_bytes=self._estimate_long_form_batch_mem_size(batch),
+                            num_rows=flat_stop - flat_start,
+                            size_bytes=self._estimate_range_mem_size(chunk_range),
                             input_files=(self.paths[0],),
                             exec_stats=None,
                         ),
@@ -527,13 +555,14 @@ def _get_long_form_read_tasks(
                 )
         return read_tasks
 
-    def _estimate_long_form_batch_mem_size(self, batch: list[_ChunkDescriptor]) -> int:
-        """Sum in-memory bytes across all chunks in one long-form batch."""
-        return sum(
-            math.prod(stop - start for start, stop in desc.chunk_slices)
-            * self._metadata_by_path[desc.array_name].itemsize
-            for desc in batch
-        )
+    def _estimate_range_mem_size(self, chunk_range: _ChunkRange) -> int:
+        """Upper-bound in-memory bytes for a chunk-grid range.
+
+        Assumes a full-size chunk per index; trailing-edge chunks are smaller,
+        so this slightly over-estimates. O(1) -- it does not enumerate the range.
+        """
+        n = chunk_range.flat_stop - chunk_range.flat_start
+        return n * math.prod(chunk_range.chunks) * chunk_range.meta.itemsize
 
     def _get_aligned_read_tasks(
         self,
diff --git a/python/ray/data/tests/datasource/test_zarrv2.py b/python/ray/data/tests/datasource/test_zarrv2.py
index f40750f06c0d..e1f483a52ea4 100644
--- a/python/ray/data/tests/datasource/test_zarrv2.py
+++ b/python/ray/data/tests/datasource/test_zarrv2.py
@@ -595,6 +595,23 @@ def test_get_read_tasks_batches_chunks_by_parallelism(tmp_path):
     assert all(task.metadata.input_files == (str(store_path),) for task in read_tasks)
 
 
+def test_long_form_chunk_index_order_matches_grid(tmp_path):
+    """Lazy grid-range tasks emit chunk_index in the same row-major order as a
+    full grid enumeration (regression guard for the lazy-unravel refactor)."""
+    from itertools import product
+
+    store_path = tmp_path / "order.zarr"
+    # shape (6, 4), chunks (2, 2) -> grid (3, 2) = 6 chunks.
+    _write_real_zarr_store(
+        store_path, {"a": (np.arange(6 * 4, dtype="<i4").reshape(6, 4), (2, 2))}
+    )
+    datasource = zarrv2_datasource.ZarrV2Datasource(str(store_path))
+    # parallelism=2 -> two flat-index ranges; concatenated they must be in order.
+    df = _execute_read_tasks(datasource.get_read_tasks(parallelism=2))
+    got = [tuple(int(x) for x in ci) for ci in df["chunk_index"]]
+    assert got == list(product(range(3), range(2)))
+
+
 def test_per_task_row_limit_caps_chunks_read(tmp_path, monkeypatch):
     """per_task_row_limit bounds how many chunks a task actually reads, so a
     downstream ``limit`` doesn't pull the whole batch's I/O."""

From ac80b47cd16e5aa366a40a32635b79c90cedca2c Mon Sep 17 00:00:00 2001
From: Artur Niederfahrenhorst <artur@anyscale.com>
Date: Fri, 19 Jun 2026 14:25:07 +0200
Subject: [PATCH 39/45] [data] read_zarr: use np.unravel_index for chunk index

Replace the hand-rolled flat-index -> N-D unravel helper with numpy's
np.unravel_index (the recognized primitive for exactly this). Its default
C-order matches the previous ordering, so the emitted chunk_index sequence is
unchanged (the ordering test still passes); int() keeps the indices as Python
ints. Drops the _unravel helper.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Signed-off-by: Artur Niederfahrenhorst <artur@anyscale.com>
---
 .../_internal/datasource/zarrv2_datasource.py     | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)

diff --git a/python/ray/data/_internal/datasource/zarrv2_datasource.py b/python/ray/data/_internal/datasource/zarrv2_datasource.py
index 306767364464..dba6437a5c03 100644
--- a/python/ray/data/_internal/datasource/zarrv2_datasource.py
+++ b/python/ray/data/_internal/datasource/zarrv2_datasource.py
@@ -160,19 +160,6 @@ class _ChunkRange:
     flat_stop: int
 
 
-def _unravel(flat_index: int, grid: tuple[int, ...]) -> tuple[int, ...]:
-    """Row-major (C-order) flat index -> N-D chunk index.
-
-    Matches ``itertools.product(*(range(n) for n in grid))`` ordering, so the
-    emitted ``chunk_index`` sequence is identical to enumerating the grid.
-    """
-    idx = []
-    for n in reversed(grid):
-        idx.append(flat_index % n)
-        flat_index //= n
-    return tuple(reversed(idx))
-
-
 @dataclass(frozen=True)
 class _AlignedChunkDescriptor:
     """One wide row: a global axis-0 range ``[t_start, t_stop)`` across the
@@ -208,7 +195,7 @@ def _create_read_fn(
     def read_fn() -> Iterable[Block]:
         builder = DelegatingBlockBuilder()
         for flat_index in range(cr.flat_start, stop):
-            chunk_index = _unravel(flat_index, cr.grid)
+            chunk_index = tuple(int(i) for i in np.unravel_index(flat_index, cr.grid))
             chunk_slices = cr.meta.chunk_slices(chunk_index, cr.chunks)
             builder.add(
                 {

From 78e424276054a629446aab718e9f57e21219c5f8 Mon Sep 17 00:00:00 2001
From: Artur Niederfahrenhorst <artur@anyscale.com>
Date: Fri, 19 Jun 2026 14:47:23 +0200
Subject: [PATCH 40/45] polish tests

Signed-off-by: Artur Niederfahrenhorst <artur@anyscale.com>
---
 .../ray/data/tests/datasource/test_zarrv2.py  | 23 ++++---------------
 1 file changed, 4 insertions(+), 19 deletions(-)

diff --git a/python/ray/data/tests/datasource/test_zarrv2.py b/python/ray/data/tests/datasource/test_zarrv2.py
index e1f483a52ea4..ee8aaae97630 100644
--- a/python/ray/data/tests/datasource/test_zarrv2.py
+++ b/python/ray/data/tests/datasource/test_zarrv2.py
@@ -25,12 +25,7 @@ def _execute_read_tasks(tasks) -> pd.DataFrame:
 
 
 def _reconstruct_array(df: pd.DataFrame, array_name: str) -> np.ndarray:
-    """Concatenate all chunks of one array from a long-form result frame.
-
-    Assumes the array is 1-D along its chunked axis 0. For tests with
-    higher-dim arrays, use ``_reconstruct_nd`` (which orders chunks by
-    ``chunk_index`` and concatenates axis 0 first).
-    """
+    """Concatenate all chunks of one array from a long-form result frame."""
     sub = df[df["array"] == array_name].sort_values(
         "chunk_index", key=lambda col: col.map(tuple)
     )
@@ -308,19 +303,9 @@ def test_root_array_rejects_non_root_array_paths(zarrv2_root_store):
 @pytest.mark.parametrize(
     "chunk_shapes, match",
     [
-        # Wrong container type (not list/tuple/dict).
-        ("invalid", "chunk_shapes must be a non-empty sequence of positive integers"),
-        (42, "chunk_shapes must be a non-empty sequence of positive integers"),
-        (b"bytes", "chunk_shapes must be a non-empty sequence of positive integers"),
-        ({1, 2}, "chunk_shapes must be a non-empty sequence of positive integers"),
-        # Bad dict values.
-        ({"images": 1}, r"chunk_shapes\['images'\] must be .*positive integers"),
-        ({"images": None}, r"chunk_shapes\['images'\] must be .*positive integers"),
-        ({"images": []}, r"chunk_shapes\['images'\] must be .*positive integers"),
-        ({"images": [0]}, r"chunk_shapes\['images'\] must be .*positive integers"),
-        ({"images": [1.5]}, r"chunk_shapes\['images'\] must be .*positive integers"),
-        # Unknown array path.
-        ({"does_not_exist": [2]}, r"Unknown array path\(s\) in chunk_shapes"),
+        ("invalid", "positive integers"),
+        ({"images": 1}, "positive integers"),
+        ({"does_not_exist": [2]}, "Unknown array path"),
     ],
 )
 def test_rejects_invalid_chunk_shapes(zarrv2_group_store, chunk_shapes, match):

From 4145c7456bcba69794787aee6dbef8bae33907e1 Mon Sep 17 00:00:00 2001
From: Artur Niederfahrenhorst <artur@anyscale.com>
Date: Fri, 19 Jun 2026 15:30:24 +0200
Subject: [PATCH 41/45] [data] read_zarr: satisfy pyrefly type check

pyrefly (CI lint) flagged 3 type errors in the new files. None are runtime bugs
(the suite passes); the fixes clarify intent or suppress an intentional fake:
- _create_aligned_read_fn: annotate `row: dict[str, Any]` so assigning a chunk
  ndarray isn't checked against the all-int TypedDict pyrefly inferred from the
  t_start/t_stop literals.
- _is_positive_int: `int(x) > 0` (pyrefly can't type `>` on numbers.Integral).
- test_read_chunk_retries_transient_io: `# pyrefly: ignore[bad-argument-type]`
  for the deliberate fake _Root() passed to _read_chunk (repo convention).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Signed-off-by: Artur Niederfahrenhorst <artur@anyscale.com>
---
 python/ray/data/_internal/datasource/zarrv2_datasource.py | 4 ++--
 python/ray/data/tests/datasource/test_zarrv2.py           | 5 ++++-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/python/ray/data/_internal/datasource/zarrv2_datasource.py b/python/ray/data/_internal/datasource/zarrv2_datasource.py
index dba6437a5c03..2e922e3d5c84 100644
--- a/python/ray/data/_internal/datasource/zarrv2_datasource.py
+++ b/python/ray/data/_internal/datasource/zarrv2_datasource.py
@@ -234,7 +234,7 @@ def _create_aligned_read_fn(
     def read_fn() -> Iterable[Block]:
         builder = DelegatingBlockBuilder()
         for d in batch:
-            row = {"t_start": d.t_start, "t_stop": d.t_stop}
+            row: dict[str, Any] = {"t_start": d.t_start, "t_stop": d.t_stop}
             for name in aligned_array_names:
                 row[name] = _read_chunk(
                     root, name, ((d.t_start, d.t_stop_data),), retry_match
@@ -247,7 +247,7 @@ def read_fn() -> Iterable[Block]:
 
 def _is_positive_int(x) -> bool:
     """True for a positive integer, including NumPy integers; False for bool."""
-    return not isinstance(x, bool) and isinstance(x, numbers.Integral) and x > 0
+    return not isinstance(x, bool) and isinstance(x, numbers.Integral) and int(x) > 0
 
 
 def _validate_chunk_shapes_dict(chunk_shapes: dict) -> dict[str, tuple[int, ...]]:
diff --git a/python/ray/data/tests/datasource/test_zarrv2.py b/python/ray/data/tests/datasource/test_zarrv2.py
index ee8aaae97630..dd43cc413a07 100644
--- a/python/ray/data/tests/datasource/test_zarrv2.py
+++ b/python/ray/data/tests/datasource/test_zarrv2.py
@@ -642,7 +642,10 @@ def __getitem__(self, _name):
             return _FlakyArray()
 
     out = zarrv2_datasource._read_chunk(
-        _Root(), "x", ((0, 4),), retry_match=["Connection reset"]
+        _Root(),  # pyrefly: ignore[bad-argument-type]
+        "x",
+        ((0, 4),),
+        retry_match=["Connection reset"],
     )
     np.testing.assert_array_equal(out, np.arange(4, dtype="<i4"))
     assert _FlakyArray.attempts == 3  # failed twice, then succeeded

From fa16bd3e38b0c7849d342da0f98d6bb372d22d20 Mon Sep 17 00:00:00 2001
From: Artur Niederfahrenhorst <artur@anyscale.com>
Date: Fri, 19 Jun 2026 16:03:00 +0200
Subject: [PATCH 42/45] simplify retries

Signed-off-by: Artur Niederfahrenhorst <artur@anyscale.com>
---
 .../_internal/datasource/zarrv2_datasource.py | 26 +++++++------------
 1 file changed, 9 insertions(+), 17 deletions(-)

diff --git a/python/ray/data/_internal/datasource/zarrv2_datasource.py b/python/ray/data/_internal/datasource/zarrv2_datasource.py
index 2e922e3d5c84..e83b3d4b4030 100644
--- a/python/ray/data/_internal/datasource/zarrv2_datasource.py
+++ b/python/ray/data/_internal/datasource/zarrv2_datasource.py
@@ -107,19 +107,6 @@ def chunk_slices(
 # ---------------------------------------------------------------------------
 
 
-def _call_with_retry(
-    fn: Callable[[], Any],
-    description: str,
-    match: Optional[List[str]],
-) -> Any:
-    if not match:
-        return fn()
-    # TODO(Artur): This would be more elegant with a general retry helper for non-iterables.
-    return next(
-        iterate_with_retry(lambda: [fn()], description=description, match=match)
-    )
-
-
 def _read_chunk(
     root: ZarrRoot,
     array_name: str,
@@ -128,9 +115,7 @@ def _read_chunk(
 ) -> np.ndarray:
     """Read ``array[chunk_slices]`` as an ndarray.
 
-    Transient I/O errors matching ``retry_match`` (Ray Data's
-    ``DataContext.retried_io_errors``) are retried; the underlying filesystem's
-    own retry policy still applies underneath.
+    The underlying filesystem's own retry policy still applies underneath.
     """
 
     def _read() -> np.ndarray:
@@ -138,7 +123,14 @@ def _read() -> np.ndarray:
         arr = root if array_name == "" else root[array_name]
         return np.asarray(arr[indexer])
 
-    return _call_with_retry(_read, "read a Zarr chunk", retry_match)
+    if not retry_match:
+        return _read()
+    # TODO(Artur): This would be more elegant with a general retry helper for non-iterables.
+    return next(
+        iterate_with_retry(
+            lambda: [_read()], description="read a Zarr chunk", match=retry_match
+        )
+    )
 
 
 @dataclass(frozen=True)

From 5d29c90c36e62886e32f64bca60ea04a417550d0 Mon Sep 17 00:00:00 2001
From: Artur Niederfahrenhorst <artur@anyscale.com>
Date: Sun, 21 Jun 2026 23:03:12 +0200
Subject: [PATCH 43/45] Richard's comment

Signed-off-by: Artur Niederfahrenhorst <artur@anyscale.com>
---
 .../ray/data/tests/datasource/test_zarrv2.py  | 191 ++++++++++--------
 1 file changed, 106 insertions(+), 85 deletions(-)

diff --git a/python/ray/data/tests/datasource/test_zarrv2.py b/python/ray/data/tests/datasource/test_zarrv2.py
index dd43cc413a07..cdda751d8b09 100644
--- a/python/ray/data/tests/datasource/test_zarrv2.py
+++ b/python/ray/data/tests/datasource/test_zarrv2.py
@@ -382,7 +382,7 @@ def test_chunk_shapes_resolution_across_mixed_rank(
 # ---------------------------------------------------------------------------
 
 
-def test_align_axis_0_emits_wide_rows(aligned_zarrv2_store):
+def test_align_axis_0_emits_wide_rows(ray_start_regular_shared, aligned_zarrv2_store):
     """Wide-row schema: ``t_start``, ``t_stop``, one column per selected array."""
     datasource = zarrv2_datasource.ZarrV2Datasource(
         str(aligned_zarrv2_store),
@@ -407,7 +407,7 @@ def test_align_axis_0_emits_wide_rows(aligned_zarrv2_store):
     assert stops == [4, 8]
 
 
-def test_align_axis_0_column_set(aligned_zarrv2_store):
+def test_align_axis_0_column_set(ray_start_regular_shared, aligned_zarrv2_store):
     """array_paths selects which arrays are read; aligned mode emits one column
     per selected array (plus t_start/t_stop)."""
     datasource = zarrv2_datasource.ZarrV2Datasource(
@@ -454,7 +454,7 @@ def test_align_axis_0_rejects_divergent_axis_0_chunks(aligned_zarrv2_store):
 # ---------------------------------------------------------------------------
 
 
-def test_overlap_extends_chunk_data(aligned_zarrv2_store):
+def test_overlap_extends_chunk_data(ray_start_regular_shared, aligned_zarrv2_store):
     """``overlap=N`` makes each row's per-array slice cover ``N`` extra timesteps.
 
     Aligned store has shape[0]=8, ``chunk_shapes=[4]`` -> rows own [0,4) and [4,8).
@@ -548,7 +548,7 @@ def test_rejects_unsupported_filesystem_type():
 # ---------------------------------------------------------------------------
 
 
-def test_reads_zarr_zip_local_path(zarr_zip_store):
+def test_reads_zarr_zip_local_path(ray_start_regular_shared, zarr_zip_store):
     """A local ``.zarr.zip`` path auto-wires fsspec's ZipFileSystem."""
     datasource = zarrv2_datasource.ZarrV2Datasource(str(zarr_zip_store))
     # The store has one array "data" of shape (6, 2) chunks (3, 2) -> 2 chunks.
@@ -580,7 +580,7 @@ def test_get_read_tasks_batches_chunks_by_parallelism(tmp_path):
     assert all(task.metadata.input_files == (str(store_path),) for task in read_tasks)
 
 
-def test_long_form_chunk_index_order_matches_grid(tmp_path):
+def test_long_form_chunk_index_order_matches_grid(ray_start_regular_shared, tmp_path):
     """Lazy grid-range tasks emit chunk_index in the same row-major order as a
     full grid enumeration (regression guard for the lazy-unravel refactor)."""
     from itertools import product
@@ -597,7 +597,9 @@ def test_long_form_chunk_index_order_matches_grid(tmp_path):
     assert got == list(product(range(3), range(2)))
 
 
-def test_per_task_row_limit_caps_chunks_read(tmp_path, monkeypatch):
+def test_per_task_row_limit_caps_chunks_read(
+    ray_start_regular_shared, tmp_path, monkeypatch
+):
     """per_task_row_limit bounds how many chunks a task actually reads, so a
     downstream ``limit`` doesn't pull the whole batch's I/O."""
     store_path = tmp_path / "limit.zarr"
@@ -651,7 +653,7 @@ def __getitem__(self, _name):
     assert _FlakyArray.attempts == 3  # failed twice, then succeeded
 
 
-def test_long_form_schema_and_materialization(tmp_path):
+def test_long_form_schema_and_materialization(ray_start_regular_shared, tmp_path):
     """End-to-end: long-form rows are emitted with the expected columns and data."""
     store_path = tmp_path / "aligned.zarr"
     images_src = np.arange(20, dtype="<i4").reshape(5, 4)
@@ -691,7 +693,7 @@ def test_long_form_schema_and_materialization(tmp_path):
             )
 
 
-def test_chunk_shapes_override_changes_grid(tmp_path):
+def test_chunk_shapes_override_changes_grid(ray_start_regular_shared, tmp_path):
     """User-supplied chunk_shapes controls the chunk grid and row count."""
     store_path = tmp_path / "tile.zarr"
     src = np.arange(10, dtype="<i4")
@@ -702,7 +704,9 @@ def test_chunk_shapes_override_changes_grid(tmp_path):
     assert sorted(chunk.shape[0] for chunk in df["chunk"]) == [5, 5]
 
 
-def test_heterogeneous_store_emits_one_row_per_chunk(heterogeneous_zarrv2_store):
+def test_heterogeneous_store_emits_one_row_per_chunk(
+    ray_start_regular_shared, heterogeneous_zarrv2_store
+):
     """Mixed-rank/shape/dtype arrays each contribute their chunk count to the output."""
     datasource = zarrv2_datasource.ZarrV2Datasource(str(heterogeneous_zarrv2_store))
     df = _execute_read_tasks(datasource.get_read_tasks(parallelism=16))
@@ -752,7 +756,7 @@ def test_estimate_inmemory_data_size(tmp_path):
         lazy_fixture("local_fsspec_fs"),  # native fsspec
     ],
 )
-def test_read_zarr_basic_across_filesystems(fs, local_path):
+def test_read_zarr_basic_across_filesystems(ray_start_regular_shared, fs, local_path):
     """Round-trip a real Zarr store through read_zarr for each filesystem flavor.
 
     Mirrors the parametrized read-path coverage other Ray Data datasources use
@@ -784,7 +788,7 @@ def test_read_zarr_basic_across_filesystems(fs, local_path):
 # ---------------------------------------------------------------------------
 
 
-def test_read_zarr_integration_public_s3():
+def test_read_zarr_integration_public_s3(ray_start_regular_shared):
     """End-to-end read against a real Zarr store in a public S3 bucket.
 
     Uses ``s3://anonymous@ray-example-data/mnist-tiny.zarr`` — a 200-sample
@@ -807,77 +811,6 @@ def test_read_zarr_integration_public_s3():
     assert all(c.dtype == np.uint8 for c in label_rows["chunk"])
 
 
-# ---------------------------------------------------------------------------
-# Custom codec registration in Ray workers
-# ---------------------------------------------------------------------------
-
-
-def test_custom_codec_succeeds_with_worker_setup_hook(tmp_path):
-    """``worker_process_setup_hook`` runs once per worker, before any task,
-    registering a custom codec in the worker's process so chunk decode succeeds.
-
-    numcodecs' registry is process-local: built-in codecs (blosc, gzip, zstd)
-    self-register at import, but a custom codec must be registered in every
-    process that decodes chunks. Ray workers are separate processes, so the
-    driver's registration does not propagate -- ``worker_process_setup_hook``
-    runs the registration in each worker. The hook is passed as a *callable*
-    (cloud-pickled to the workers), not a code string; defining it locally keeps
-    the codec class out of the importable module surface.
-    """
-    import numcodecs
-
-    def _register_codec():
-        """Register the test codec in the current process (driver and workers)."""
-        import numcodecs
-        import numpy as np
-
-        class _RayZarrTestCodec(numcodecs.abc.Codec):
-            codec_id = "ray_zarr_test_codec"
-
-            def encode(self, buf):
-                return bytes(buf)
-
-            def decode(self, buf, out=None):
-                arr = np.frombuffer(buf, dtype=np.uint8)
-                if out is not None:
-                    out[:] = arr.view(out.dtype)
-                    return out
-                return arr.copy()
-
-        numcodecs.register_codec(_RayZarrTestCodec)
-
-    # Register driver-side so we can write the store.
-    _register_codec()
-
-    store_path = tmp_path / "codec_test.zarr"
-    arr = zarr.open(
-        str(store_path),
-        mode="w",
-        shape=(8,),
-        chunks=(4,),
-        dtype="u1",
-        compressor=numcodecs.get_codec({"id": "ray_zarr_test_codec"}),
-    )
-    arr[:] = np.arange(8, dtype="u1")
-    zarr.consolidate_metadata(zarr.DirectoryStore(str(store_path)))
-
-    if ray.is_initialized():
-        ray.shutdown()
-    ray.init(
-        num_cpus=1,
-        logging_level=logging.ERROR,
-        log_to_driver=False,
-        runtime_env={"worker_process_setup_hook": _register_codec},
-    )
-    try:
-        ds = ray.data.read_zarr(str(store_path))
-        rows = sorted(ds.take_all(), key=lambda r: tuple(r["chunk_index"]))
-        recon = np.concatenate([r["chunk"] for r in rows])
-        np.testing.assert_array_equal(recon, np.arange(8, dtype="u1"))
-    finally:
-        ray.shutdown()
-
-
 def test_rejects_zarr_v3(tmp_path, monkeypatch):
     """read_zarr targets zarr-python 2.x; an incompatible v3 install must raise a
     clear, actionable error at construction, not a cryptic ImportError mid-read."""
@@ -886,7 +819,7 @@ def test_rejects_zarr_v3(tmp_path, monkeypatch):
         zarrv2_datasource.ZarrV2Datasource(str(tmp_path))
 
 
-def test_explicit_filesystem_strips_uri_scheme(tmp_path):
+def test_explicit_filesystem_strips_uri_scheme(ray_start_regular_shared, tmp_path):
     """An explicit ``filesystem=`` plus a scheme-prefixed path must strip the
     scheme so the store path is backend-relative. Regression: pyarrow
     filesystems can't resolve a ``file://`` / ``gs://`` prefix in the path."""
@@ -923,7 +856,9 @@ def test_align_axis_0_rejects_scalar_array(tmp_path):
         zarrv2_datasource.ZarrV2Datasource(str(store_path), align_axis_0=True)
 
 
-def test_reads_zarr_zip_with_explicit_zip_filesystem(zarr_zip_store):
+def test_reads_zarr_zip_with_explicit_zip_filesystem(
+    ray_start_regular_shared, zarr_zip_store
+):
     """A .zip path read through an explicitly-passed fsspec ZipFileSystem must
     resolve the store at the archive root (store path ``""``), not treat the
     ``.zip`` name as an entry inside the archive."""
@@ -934,7 +869,9 @@ def test_reads_zarr_zip_with_explicit_zip_filesystem(zarr_zip_store):
     assert len(df) == 2
 
 
-def test_align_axis_0_columns_unify_across_blocks(aligned_zarrv2_store):
+def test_align_axis_0_columns_unify_across_blocks(
+    ray_start_regular_shared, aligned_zarrv2_store
+):
     """Wide-form gives each array its own column, so blocks combine cleanly
     across the dataset even with trailing edge chunks of differing shape -- the
     batch-safe schema for row-aligned arrays."""
@@ -955,3 +892,87 @@ def test_align_axis_0_columns_unify_across_blocks(aligned_zarrv2_store):
     import sys
 
     sys.exit(pytest.main(["-v", __file__]))
+
+
+# ---------------------------------------------------------------------------
+# Custom codec registration in Ray workers
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture
+def fresh_ray():
+    """A clean Ray for a test that needs its own ``ray.init`` (e.g. a custom
+    ``runtime_env``). Unlike ``shutdown_only`` (teardown only), it also shuts
+    down any pre-existing cluster, so isolation doesn't depend on test order.
+    """
+    if ray.is_initialized():
+        ray.shutdown()
+    yield
+    if ray.is_initialized():
+        ray.shutdown()
+
+
+def test_custom_codec_succeeds_with_worker_setup_hook(fresh_ray, tmp_path):
+    """``worker_process_setup_hook`` runs once per worker, before any task,
+    registering a custom codec in the worker's process so chunk decode succeeds.
+
+    numcodecs' registry is process-local: built-in codecs (blosc, gzip, zstd)
+    self-register at import, but a custom codec must be registered in every
+    process that decodes chunks. Ray workers are separate processes, so the
+    driver's registration does not propagate -- ``worker_process_setup_hook``
+    runs the registration in each worker. The hook is passed as a *callable*
+    (cloud-pickled to the workers), not a code string; defining it locally keeps
+    the codec class out of the importable module surface.
+
+    The worker hook must be set at cluster start, so this needs its own
+    ``ray.init`` rather than the shared ``ray_start_regular_shared`` cluster.
+    The ``fresh_ray`` fixture guarantees a clean Ray before and after, so the
+    test is isolated regardless of where it runs in the suite.
+    """
+    import numcodecs
+
+    def _register_codec():
+        """Register the test codec in the current process (driver and workers)."""
+        import numcodecs
+        import numpy as np
+
+        class _RayZarrTestCodec(numcodecs.abc.Codec):
+            codec_id = "ray_zarr_test_codec"
+
+            def encode(self, buf):
+                return bytes(buf)
+
+            def decode(self, buf, out=None):
+                arr = np.frombuffer(buf, dtype=np.uint8)
+                if out is not None:
+                    out[:] = arr.view(out.dtype)
+                    return out
+                return arr.copy()
+
+        numcodecs.register_codec(_RayZarrTestCodec)
+
+    # Register driver-side so we can write the store.
+    _register_codec()
+
+    store_path = tmp_path / "codec_test.zarr"
+    arr = zarr.open(
+        str(store_path),
+        mode="w",
+        shape=(8,),
+        chunks=(4,),
+        dtype="u1",
+        compressor=numcodecs.get_codec({"id": "ray_zarr_test_codec"}),
+    )
+    arr[:] = np.arange(8, dtype="u1")
+    zarr.consolidate_metadata(zarr.DirectoryStore(str(store_path)))
+
+    ray.init(
+        num_cpus=1,
+        logging_level=logging.ERROR,
+        log_to_driver=False,
+        runtime_env={"worker_process_setup_hook": _register_codec},
+    )
+    ds = ray.data.read_zarr(str(store_path))
+    rows = sorted(ds.take_all(), key=lambda r: tuple(r["chunk_index"]))
+    recon = np.concatenate([r["chunk"] for r in rows])
+    np.testing.assert_array_equal(recon, np.arange(8, dtype="u1"))

From e869a761bf121891d60f470abf53ee4eb06d9868 Mon Sep 17 00:00:00 2001
From: Artur Niederfahrenhorst <artur@anyscale.com>
Date: Sun, 21 Jun 2026 23:05:46 +0200
Subject: [PATCH 44/45] polish

Signed-off-by: Artur Niederfahrenhorst <artur@anyscale.com>
---
 .../ray/data/tests/datasource/test_zarrv2.py  | 19 +++----------------
 1 file changed, 3 insertions(+), 16 deletions(-)

diff --git a/python/ray/data/tests/datasource/test_zarrv2.py b/python/ray/data/tests/datasource/test_zarrv2.py
index cdda751d8b09..ecbab51de52c 100644
--- a/python/ray/data/tests/datasource/test_zarrv2.py
+++ b/python/ray/data/tests/datasource/test_zarrv2.py
@@ -913,26 +913,13 @@ def fresh_ray():
 
 
 def test_custom_codec_succeeds_with_worker_setup_hook(fresh_ray, tmp_path):
-    """``worker_process_setup_hook`` runs once per worker, before any task,
-    registering a custom codec in the worker's process so chunk decode succeeds.
-
-    numcodecs' registry is process-local: built-in codecs (blosc, gzip, zstd)
-    self-register at import, but a custom codec must be registered in every
-    process that decodes chunks. Ray workers are separate processes, so the
-    driver's registration does not propagate -- ``worker_process_setup_hook``
-    runs the registration in each worker. The hook is passed as a *callable*
-    (cloud-pickled to the workers), not a code string; defining it locally keeps
-    the codec class out of the importable module surface.
-
-    The worker hook must be set at cluster start, so this needs its own
-    ``ray.init`` rather than the shared ``ray_start_regular_shared`` cluster.
-    The ``fresh_ray`` fixture guarantees a clean Ray before and after, so the
-    test is isolated regardless of where it runs in the suite.
+    """Test that we successfully register a custom codec.
+
+    numcodecs' registry is process-local.
     """
     import numcodecs
 
     def _register_codec():
-        """Register the test codec in the current process (driver and workers)."""
         import numcodecs
         import numpy as np
 

From 7bfebfd4254d37cf474e9b3f4990344090ca7c82 Mon Sep 17 00:00:00 2001
From: Artur Niederfahrenhorst <artur@anyscale.com>
Date: Mon, 22 Jun 2026 18:18:38 +0200
Subject: [PATCH 45/45] [data] read_zarr: drop the live-S3 integration test

The datasource is backend-agnostic: path/filesystem resolution is delegated to
pyarrow/fsspec (shared Ray Data machinery), so a live remote read exercises
generic pyarrow/fsspec, not datasource logic. Filesystem handling is already
covered hermetically by test_read_zarr_basic_across_filesystems (parametrized
over fs flavors on local paths), so the unit-test file stays network-free.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Signed-off-by: Artur Niederfahrenhorst <artur@anyscale.com>
---
 .../ray/data/tests/datasource/test_zarrv2.py  | 28 -------------------
 1 file changed, 28 deletions(-)

diff --git a/python/ray/data/tests/datasource/test_zarrv2.py b/python/ray/data/tests/datasource/test_zarrv2.py
index ecbab51de52c..96da15d2ed8b 100644
--- a/python/ray/data/tests/datasource/test_zarrv2.py
+++ b/python/ray/data/tests/datasource/test_zarrv2.py
@@ -783,34 +783,6 @@ def test_read_zarr_basic_across_filesystems(ray_start_regular_shared, fs, local_
     np.testing.assert_array_equal(_reconstruct_array(df, "labels"), labels_src)
 
 
-# ---------------------------------------------------------------------------
-# Public-bucket integration test
-# ---------------------------------------------------------------------------
-
-
-def test_read_zarr_integration_public_s3(ray_start_regular_shared):
-    """End-to-end read against a real Zarr store in a public S3 bucket.
-
-    Uses ``s3://anonymous@ray-example-data/mnist-tiny.zarr`` — a 200-sample
-    MNIST subset with two arrays:
-      * ``images``  shape (200, 28, 28), chunks (50, 28, 28)  → 4 chunks
-      * ``labels``  shape (200,),        chunks (200,)        → 1 chunk
-
-    Under the chunk-per-row schema the total row count is 4 + 1 = 5.
-    """
-    ds = ray.data.read_zarr("s3://anonymous@ray-example-data/mnist-tiny.zarr")
-
-    assert ds.count() == 5
-    df = pd.DataFrame(ds.take_all())
-    assert set(df["array"]) == {"images", "labels"}
-    image_rows = df[df["array"] == "images"]
-    label_rows = df[df["array"] == "labels"]
-    assert {c.shape for c in image_rows["chunk"]} == {(50, 28, 28)}
-    assert {c.shape for c in label_rows["chunk"]} == {(200,)}
-    assert all(c.dtype == np.uint8 for c in image_rows["chunk"])
-    assert all(c.dtype == np.uint8 for c in label_rows["chunk"])
-
-
 def test_rejects_zarr_v3(tmp_path, monkeypatch):
     """read_zarr targets zarr-python 2.x; an incompatible v3 install must raise a
     clear, actionable error at construction, not a cryptic ImportError mid-read."""