Skip to content

Commit e3bee0b

Browse files
committed
Add properties to get chunk and shard slices
1 parent c8d8e64 commit e3bee0b

File tree

4 files changed

+97
-0
lines changed

4 files changed

+97
-0
lines changed

changes/xxxx.feature.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Added new ``Array.chunk_slices`` and ``Array.shard_slices`` to get slices aligned with array chunks and shards respectively.

docs/user-guide/arrays.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -566,6 +566,26 @@ In this example a shard shape of (1000, 1000) and a chunk shape of (100, 100) is
566566
This means that `10*10` chunks are stored in each shard, and there are `10*10` shards in total.
567567
Without the `shards` argument, there would be 10,000 chunks stored as individual files.
568568

569+
## Accessing chunks and shards
570+
571+
Arrays have useful properties for accessing data aligned to chunks and shards.
572+
This can be useful for getting slices that can be used to write to shards in parallel, or read from chunks in parallel.
573+
574+
```python exec="true" session="arrays" source="above" result="ansi"
575+
a = zarr.create_array(store={}, shape=(100, 50), shards=(50, 40), chunks=(25, 20), dtype='uint8')
576+
577+
print("All shard slices:")
578+
for shard_slice in a.shard_slices:
579+
print(shard_slice)
580+
# shard_data = a[shard_slice]
581+
582+
print("All chunk slices:")
583+
for chunk_slice in a.chunk_slices:
584+
print(chunk_slice)
585+
# chunk_data = a[chunk_slice]
586+
```
587+
588+
569589
## Missing features in 3.0
570590

571591
The following features have not been ported to 3.0 yet.

src/zarr/core/array.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from typing import (
1111
TYPE_CHECKING,
1212
Any,
13+
Generator,
1314
Generic,
1415
Literal,
1516
TypeAlias,
@@ -1381,6 +1382,32 @@ async def example():
13811382
async def nbytes_stored(self) -> int:
13821383
return await self.store_path.store.getsize_prefix(self.store_path.path)
13831384

1385+
@property
1386+
def chunk_slices(self) -> Generator[tuple[slice, ...]]:
1387+
"""
1388+
Iterator over all chunks.
1389+
1390+
Yields
1391+
------
1392+
chunk_slice :
1393+
Slice for each chunk in this array.
1394+
"""
1395+
yield from self._iter_chunk_regions()
1396+
1397+
@property
1398+
def shard_slices(self) -> Generator[tuple[slice, ...]]:
1399+
"""
1400+
Iterator over all shards.
1401+
1402+
This can be used to loop through and index every shard of an array.
1403+
1404+
Yields
1405+
------
1406+
shard_slice :
1407+
Slice for each shard in this array.
1408+
"""
1409+
yield from self._iter_shard_regions()
1410+
13841411
def _iter_chunk_coords(
13851412
self, *, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None
13861413
) -> Iterator[tuple[int, ...]]:
@@ -2355,6 +2382,34 @@ def shards(self) -> tuple[int, ...] | None:
23552382
"""
23562383
return self._async_array.shards
23572384

2385+
@property
2386+
def chunk_slices(self) -> Generator[tuple[slice, ...]]:
2387+
"""
2388+
Iterator over all chunks.
2389+
2390+
This can be used to loop through and index every chunk of an array.
2391+
2392+
Yields
2393+
------
2394+
chunk_slice :
2395+
Slice for each chunk in this array.
2396+
"""
2397+
yield from self._async_array.chunk_slices
2398+
2399+
@property
2400+
def shard_slices(self) -> Generator[tuple[slice, ...]]:
2401+
"""
2402+
Iterator over all shards.
2403+
2404+
This can be used to loop through and index every shard of an array.
2405+
2406+
Yields
2407+
------
2408+
shard_slice :
2409+
Slice for each shard in this array.
2410+
"""
2411+
yield from self._async_array.shard_slices
2412+
23582413
@property
23592414
def size(self) -> int:
23602415
"""Returns the total number of elements in the array.

tests/test_array.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2153,3 +2153,24 @@ def test_create_array_with_data_num_gets(
21532153
# one get for the metadata and one per shard.
21542154
# Note: we don't actually need one get per shard, but this is the current behavior
21552155
assert store.counter["get"] == 1 + num_shards
2156+
2157+
2158+
@pytest.mark.parametrize("shards", [None, (4, 6)])
2159+
def test_chunk_slices(shards: None | tuple[int, ...]) -> None:
2160+
arr = zarr.create_array(store={}, shape=(4, 8), dtype="uint8", chunks=(2, 3), shards=shards)
2161+
assert list(arr.chunk_slices) == [
2162+
(slice(0, 2, 1), slice(0, 3, 1)),
2163+
(slice(0, 2, 1), slice(3, 6, 1)),
2164+
(slice(0, 2, 1), slice(6, 8, 1)),
2165+
(slice(2, 4, 1), slice(0, 3, 1)),
2166+
(slice(2, 4, 1), slice(3, 6, 1)),
2167+
(slice(2, 4, 1), slice(6, 8, 1)),
2168+
]
2169+
2170+
2171+
def test_shard_slices() -> None:
2172+
arr = zarr.create_array(store={}, shape=(4, 8), dtype="uint8", chunks=(2, 3), shards=(4, 6))
2173+
assert list(arr.shard_slices) == [
2174+
(slice(0, 4, 1), slice(0, 6, 1)),
2175+
(slice(0, 4, 1), slice(6, 8, 1)),
2176+
]

0 commit comments

Comments
 (0)