Skip to content

Commit

Permalink
Allow non-memory zarr stores in to_zarr with distributed (#…
Browse files Browse the repository at this point in the history
…10422)

In `dask.array.core.to_zarr` I swapped the check for distributed scheduler + MutableMapping to distributed scheduler + zarr.storage.MemoryStore, which seems to be the only zarr.storage type that is backed by memory. Though I'm unsure about the [LMDB store](https://zarr.readthedocs.io/en/stable/api/storage.html#zarr.storage.LMDBStore).

I also removed the import of MutableMapping since this was the only place in all of `dask.array.core` to reference it.
  • Loading branch information
GFleishman committed May 11, 2024
1 parent 6f5821d commit d5a81bb
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 9 deletions.
11 changes: 2 additions & 9 deletions dask/array/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,7 @@
import uuid
import warnings
from bisect import bisect
from collections.abc import (
Collection,
Iterable,
Iterator,
Mapping,
MutableMapping,
Sequence,
)
from collections.abc import Collection, Iterable, Iterator, Mapping, Sequence
from functools import partial, reduce, wraps
from itertools import product, zip_longest
from numbers import Integral, Number
Expand Down Expand Up @@ -3672,7 +3665,7 @@ def to_zarr(

if isinstance(url, zarr.Array):
z = url
if isinstance(z.store, (dict, MutableMapping)):
if isinstance(z.store, (dict, zarr.storage.MemoryStore, zarr.storage.KVStore)):
try:
from distributed import default_client

Expand Down
41 changes: 41 additions & 0 deletions dask/tests/test_distributed.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,47 @@ def test_zarr_distributed_roundtrip(c):
assert a2.chunks == a.chunks


def test_zarr_distributed_with_explicit_directory_store(c):
pytest.importorskip("numpy")
da = pytest.importorskip("dask.array")
zarr = pytest.importorskip("zarr")

with tmpdir() as d:
chunks = (1, 1)
a = da.zeros((3, 3), chunks=chunks)
s = zarr.storage.DirectoryStore(d)
z = zarr.creation.open_array(
shape=a.shape,
chunks=chunks,
dtype=a.dtype,
store=s,
mode="a",
)
a.to_zarr(z)
a2 = da.from_zarr(d)
da.assert_eq(a, a2, scheduler=c)
assert a2.chunks == a.chunks


def test_zarr_distributed_with_explicit_memory_store(c):
pytest.importorskip("numpy")
da = pytest.importorskip("dask.array")
zarr = pytest.importorskip("zarr")

chunks = (1, 1)
a = da.zeros((3, 3), chunks=chunks)
s = zarr.storage.MemoryStore()
z = zarr.creation.open_array(
shape=a.shape,
chunks=chunks,
dtype=a.dtype,
store=s,
mode="a",
)
with pytest.raises(RuntimeError, match="distributed scheduler"):
a.to_zarr(z)


def test_zarr_in_memory_distributed_err(c):
pytest.importorskip("numpy")
da = pytest.importorskip("dask.array")
Expand Down

0 comments on commit d5a81bb

Please sign in to comment.