From 7bfcf6f164f57831889de2c31ace6a83753a77c5 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Fri, 24 Oct 2025 18:50:22 +0200 Subject: [PATCH] feat: make `async_array` public --- src/zarr/core/array.py | 106 ++++++++++++++++++++++------------------- tests/test_api.py | 2 +- tests/test_array.py | 34 ++++++------- tests/test_v2.py | 6 +-- 4 files changed, 79 insertions(+), 69 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 8bd8be40b2..6ceb5cd921 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -2039,6 +2039,16 @@ class Array: _async_array: AsyncArray[ArrayV3Metadata] | AsyncArray[ArrayV2Metadata] + @property + def async_array(self) -> AsyncArray[ArrayV3Metadata] | AsyncArray[ArrayV2Metadata]: + """The underlying asynchronous array. Useful for batching requests. + + Returns + ------- + The AsyncArray underlying this synchronous Array. + """ + return self._async_array + @classmethod @deprecated("Use zarr.create_array instead.", category=ZarrDeprecationWarning) def create( @@ -2296,7 +2306,7 @@ def open( @property def store(self) -> Store: - return self._async_array.store + return self.async_array.store @property def ndim(self) -> int: @@ -2307,7 +2317,7 @@ def ndim(self) -> int: int The number of dimensions in the array. """ - return self._async_array.ndim + return self.async_array.ndim @property def shape(self) -> tuple[int, ...]: @@ -2318,7 +2328,7 @@ def shape(self) -> tuple[int, ...]: tuple[int, ...] The shape of the array. """ - return self._async_array.shape + return self.async_array.shape @shape.setter def shape(self, value: tuple[int, ...]) -> None: @@ -2338,7 +2348,7 @@ def chunks(self) -> tuple[int, ...]: tuple A tuple of integers representing the length of each dimension of a chunk. """ - return self._async_array.chunks + return self.async_array.chunks @property def shards(self) -> tuple[int, ...] | None: @@ -2353,7 +2363,7 @@ def shards(self) -> tuple[int, ...] | None: tuple | None A tuple of integers representing the length of each dimension of a shard or None if sharding is not used. """ - return self._async_array.shards + return self.async_array.shards @property def size(self) -> int: @@ -2364,7 +2374,7 @@ def size(self) -> int: int Total number of elements in the array. """ - return self._async_array.size + return self.async_array.size @property def dtype(self) -> np.dtype[Any]: @@ -2375,7 +2385,7 @@ def dtype(self) -> np.dtype[Any]: np.dtype The NumPy data type. """ - return self._async_array.dtype + return self.async_array.dtype @property def attrs(self) -> Attributes: @@ -2395,33 +2405,33 @@ def attrs(self) -> Attributes: @property def path(self) -> str: """Storage path.""" - return self._async_array.path + return self.async_array.path @property def name(self) -> str: """Array name following h5py convention.""" - return self._async_array.name + return self.async_array.name @property def basename(self) -> str: """Final component of name.""" - return self._async_array.basename + return self.async_array.basename @property def metadata(self) -> ArrayMetadata: - return self._async_array.metadata + return self.async_array.metadata @property def store_path(self) -> StorePath: - return self._async_array.store_path + return self.async_array.store_path @property def order(self) -> MemoryOrder: - return self._async_array.order + return self.async_array.order @property def read_only(self) -> bool: - return self._async_array.read_only + return self.async_array.read_only @property def fill_value(self) -> Any: @@ -2433,14 +2443,14 @@ def filters(self) -> tuple[Numcodec, ...] | tuple[ArrayArrayCodec, ...]: Filters that are applied to each chunk of the array, in order, before serializing that chunk to bytes. """ - return self._async_array.filters + return self.async_array.filters @property def serializer(self) -> None | ArrayBytesCodec: """ Array-to-bytes codec to use for serializing the chunks into bytes. """ - return self._async_array.serializer + return self.async_array.serializer @property @deprecated("Use Array.compressors instead.", category=ZarrDeprecationWarning) @@ -2452,7 +2462,7 @@ def compressor(self) -> Numcodec | None: `array.compressor` is deprecated since v3.0.0 and will be removed in a future release. Use [`array.compressors`][zarr.Array.compressors] instead. """ - return self._async_array.compressor + return self.async_array.compressor @property def compressors(self) -> tuple[Numcodec, ...] | tuple[BytesBytesCodec, ...]: @@ -2460,28 +2470,28 @@ def compressors(self) -> tuple[Numcodec, ...] | tuple[BytesBytesCodec, ...]: Compressors that are applied to each chunk of the array. Compressors are applied in order, and after any filters are applied (if any are specified) and the data is serialized into bytes. """ - return self._async_array.compressors + return self.async_array.compressors @property def cdata_shape(self) -> tuple[int, ...]: """ The shape of the chunk grid for this array. """ - return self._async_array._chunk_grid_shape + return self.async_array._chunk_grid_shape @property def _chunk_grid_shape(self) -> tuple[int, ...]: """ The shape of the chunk grid for this array. """ - return self._async_array._chunk_grid_shape + return self.async_array._chunk_grid_shape @property def _shard_grid_shape(self) -> tuple[int, ...]: """ The shape of the shard grid for this array. """ - return self._async_array._shard_grid_shape + return self.async_array._shard_grid_shape @property def nchunks(self) -> int: @@ -2491,14 +2501,14 @@ def nchunks(self) -> int: Note that if a sharding codec is used, then the number of chunks may exceed the number of stored objects supporting this array. """ - return self._async_array.nchunks + return self.async_array.nchunks @property def _nshards(self) -> int: """ The number of shards in the stored representation of this array. """ - return self._async_array._nshards + return self.async_array._nshards @property def nbytes(self) -> int: @@ -2513,7 +2523,7 @@ def nbytes(self) -> int: dtypes. It is not possible to determine the size of an array with variable-length elements from the shape and dtype alone. """ - return self._async_array.nbytes + return self.async_array.nbytes @property def nchunks_initialized(self) -> int: @@ -2539,7 +2549,7 @@ def nchunks_initialized(self) -> int: >>> arr.nchunks_initialized 6 """ - return sync(self._async_array.nchunks_initialized()) + return sync(self.async_array.nchunks_initialized()) @property def _nshards_initialized(self) -> int: @@ -2561,7 +2571,7 @@ def _nshards_initialized(self) -> int: >>> arr._nshard_initialized 3 """ - return sync(self._async_array._nshards_initialized()) + return sync(self.async_array._nshards_initialized()) def nbytes_stored(self) -> int: """ @@ -2571,7 +2581,7 @@ def nbytes_stored(self) -> int: ------- size : int """ - return sync(self._async_array.nbytes_stored()) + return sync(self.async_array.nbytes_stored()) def _iter_shard_keys( self, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None @@ -2592,7 +2602,7 @@ def _iter_shard_keys( str The storage key of each shard in the selection. """ - return self._async_array._iter_shard_keys(origin=origin, selection_shape=selection_shape) + return self.async_array._iter_shard_keys(origin=origin, selection_shape=selection_shape) def _iter_chunk_coords( self, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None @@ -2618,7 +2628,7 @@ def _iter_chunk_coords( tuple[int, ...] The coordinates of each chunk in the selection. """ - return self._async_array._iter_chunk_coords(origin=origin, selection_shape=selection_shape) + return self.async_array._iter_chunk_coords(origin=origin, selection_shape=selection_shape) def _iter_shard_coords( self, *, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None @@ -2644,7 +2654,7 @@ def _iter_shard_coords( tuple[int, ...] The coordinates of each shard in the selection. """ - return self._async_array._iter_shard_coords(origin=origin, selection_shape=selection_shape) + return self.async_array._iter_shard_coords(origin=origin, selection_shape=selection_shape) def _iter_chunk_regions( self, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None @@ -2664,7 +2674,7 @@ def _iter_chunk_regions( tuple[slice, ...] A tuple of slice objects representing the region spanned by each chunk in the selection. """ - return self._async_array._iter_chunk_regions(origin=origin, selection_shape=selection_shape) + return self.async_array._iter_chunk_regions(origin=origin, selection_shape=selection_shape) def _iter_shard_regions( self, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None @@ -2684,7 +2694,7 @@ def _iter_shard_regions( tuple[slice, ...] A tuple of slice objects representing the region spanned by each chunk in the selection. """ - return self._async_array._iter_shard_regions(origin=origin, selection_shape=selection_shape) + return self.async_array._iter_shard_regions(origin=origin, selection_shape=selection_shape) def __array__( self, dtype: npt.DTypeLike | None = None, copy: bool | None = None @@ -3088,7 +3098,7 @@ def get_basic_selection( if prototype is None: prototype = default_buffer_prototype() return sync( - self._async_array._get_selection( + self.async_array._get_selection( BasicIndexer(selection, self.shape, self.metadata.chunk_grid), out=out, fields=fields, @@ -3197,7 +3207,7 @@ def set_basic_selection( if prototype is None: prototype = default_buffer_prototype() indexer = BasicIndexer(selection, self.shape, self.metadata.chunk_grid) - sync(self._async_array._set_selection(indexer, value, fields=fields, prototype=prototype)) + sync(self.async_array._set_selection(indexer, value, fields=fields, prototype=prototype)) def get_orthogonal_selection( self, @@ -3326,7 +3336,7 @@ def get_orthogonal_selection( prototype = default_buffer_prototype() indexer = OrthogonalIndexer(selection, self.shape, self.metadata.chunk_grid) return sync( - self._async_array._get_selection( + self.async_array._get_selection( indexer=indexer, out=out, fields=fields, prototype=prototype ) ) @@ -3445,7 +3455,7 @@ def set_orthogonal_selection( prototype = default_buffer_prototype() indexer = OrthogonalIndexer(selection, self.shape, self.metadata.chunk_grid) return sync( - self._async_array._set_selection(indexer, value, fields=fields, prototype=prototype) + self.async_array._set_selection(indexer, value, fields=fields, prototype=prototype) ) def get_mask_selection( @@ -3533,7 +3543,7 @@ def get_mask_selection( prototype = default_buffer_prototype() indexer = MaskIndexer(mask, self.shape, self.metadata.chunk_grid) return sync( - self._async_array._get_selection( + self.async_array._get_selection( indexer=indexer, out=out, fields=fields, prototype=prototype ) ) @@ -3622,7 +3632,7 @@ def set_mask_selection( if prototype is None: prototype = default_buffer_prototype() indexer = MaskIndexer(mask, self.shape, self.metadata.chunk_grid) - sync(self._async_array._set_selection(indexer, value, fields=fields, prototype=prototype)) + sync(self.async_array._set_selection(indexer, value, fields=fields, prototype=prototype)) def get_coordinate_selection( self, @@ -3711,7 +3721,7 @@ def get_coordinate_selection( prototype = default_buffer_prototype() indexer = CoordinateIndexer(selection, self.shape, self.metadata.chunk_grid) out_array = sync( - self._async_array._get_selection( + self.async_array._get_selection( indexer=indexer, out=out, fields=fields, prototype=prototype ) ) @@ -3824,7 +3834,7 @@ def set_coordinate_selection( f"elements with an array of {value.shape[0]} elements." ) - sync(self._async_array._set_selection(indexer, value, fields=fields, prototype=prototype)) + sync(self.async_array._set_selection(indexer, value, fields=fields, prototype=prototype)) def get_block_selection( self, @@ -3926,7 +3936,7 @@ def get_block_selection( prototype = default_buffer_prototype() indexer = BlockIndexer(selection, self.shape, self.metadata.chunk_grid) return sync( - self._async_array._get_selection( + self.async_array._get_selection( indexer=indexer, out=out, fields=fields, prototype=prototype ) ) @@ -4026,7 +4036,7 @@ def set_block_selection( if prototype is None: prototype = default_buffer_prototype() indexer = BlockIndexer(selection, self.shape, self.metadata.chunk_grid) - sync(self._async_array._set_selection(indexer, value, fields=fields, prototype=prototype)) + sync(self.async_array._set_selection(indexer, value, fields=fields, prototype=prototype)) @property def vindex(self) -> VIndex: @@ -4090,7 +4100,7 @@ def resize(self, new_shape: ShapeLike) -> None: #>(50, 50) ``` """ - sync(self._async_array.resize(new_shape)) + sync(self.async_array.resize(new_shape)) def append(self, data: npt.ArrayLike, axis: int = 0) -> tuple[int, ...]: """Append `data` to `axis`. @@ -4126,7 +4136,7 @@ def append(self, data: npt.ArrayLike, axis: int = 0) -> tuple[int, ...]: >>> z.shape (20000, 2000) """ - return sync(self._async_array.append(data, axis=axis)) + return sync(self.async_array.append(data, axis=axis)) def update_attributes(self, new_attributes: dict[str, JSON]) -> Array: """ @@ -4154,7 +4164,7 @@ def update_attributes(self, new_attributes: dict[str, JSON]) -> Array: overwritten by the new values. """ # TODO: remove this cast when type inference improves - new_array = sync(self._async_array.update_attributes(new_attributes)) + new_array = sync(self.async_array.update_attributes(new_attributes)) # TODO: remove this cast when type inference improves _new_array = cast("AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]", new_array) return type(self)(_new_array) @@ -4191,7 +4201,7 @@ def info(self) -> Any: Codecs : [BytesCodec(endian=)] No. bytes : 40 """ - return self._async_array.info + return self.async_array.info def info_complete(self) -> Any: """ @@ -4211,7 +4221,7 @@ def info_complete(self) -> Any: ------- [zarr.Array.info][] - The statically known subset of metadata about an array. """ - return sync(self._async_array.info_complete()) + return sync(self.async_array.info_complete()) async def _shards_initialized( @@ -4523,7 +4533,7 @@ async def from_array( async def _copy_array_region( chunk_coords: tuple[int, ...] | slice, _data: Array ) -> None: - arr = await _data._async_array.getitem(chunk_coords) + arr = await _data.async_array.getitem(chunk_coords) await result.setitem(chunk_coords, arr) # Stream data from the source array to the new array diff --git a/tests/test_api.py b/tests/test_api.py index 30f648a815..816de784ca 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -231,7 +231,7 @@ def test_open_array_respects_write_empty_chunks_config(zarr_format: ZarrFormat) arr2 = zarr.open(store=store, path="test_array", config={"write_empty_chunks": True}) assert isinstance(arr2, zarr.Array) - assert arr2._async_array._config.write_empty_chunks is True + assert arr2.async_array._config.write_empty_chunks is True arr2[0:5] = np.zeros(5) assert arr2.nchunks_initialized == 1 diff --git a/tests/test_array.py b/tests/test_array.py index 5219616739..f0fbafc886 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -376,7 +376,7 @@ def test_nchunks(test_cls: type[Array] | type[AsyncArray[Any]], nchunks: int) -> if test_cls == Array: observed = arr.nchunks else: - observed = arr._async_array.nchunks + observed = arr.async_array.nchunks assert observed == expected @@ -410,8 +410,8 @@ async def test_nchunks_initialized( observed = arr._nshards_initialized assert observed == arr.nchunks_initialized // chunks_per_shard else: - observed = await arr._async_array._nshards_initialized() - assert observed == await arr._async_array.nchunks_initialized() // chunks_per_shard + observed = await arr.async_array._nshards_initialized() + assert observed == await arr.async_array.nchunks_initialized() // chunks_per_shard assert observed == expected # delete chunks @@ -421,8 +421,8 @@ async def test_nchunks_initialized( observed = arr._nshards_initialized assert observed == arr.nchunks_initialized // chunks_per_shard else: - observed = await arr._async_array._nshards_initialized() - assert observed == await arr._async_array.nchunks_initialized() // chunks_per_shard + observed = await arr.async_array._nshards_initialized() + assert observed == await arr.async_array.nchunks_initialized() // chunks_per_shard expected = arr._nshards - idx - 1 assert observed == expected @@ -448,7 +448,7 @@ async def test_chunks_initialized( ) for keys, region in zip(chunks_accumulated, arr._iter_shard_regions(), strict=False): arr[region] = 1 - observed = sorted(await _shards_initialized(arr._async_array)) + observed = sorted(await _shards_initialized(arr.async_array)) expected = sorted(keys) assert observed == expected @@ -500,7 +500,7 @@ def test_info_v2(self, chunks: tuple[int, int], shards: tuple[int, int] | None) result = arr.info expected = ArrayInfo( _zarr_format=2, - _data_type=arr._async_array._zdtype, + _data_type=arr.async_array._zdtype, _fill_value=arr.fill_value, _shape=(8, 8), _chunk_shape=chunks, @@ -518,7 +518,7 @@ def test_info_v3(self, chunks: tuple[int, int], shards: tuple[int, int] | None) result = arr.info expected = ArrayInfo( _zarr_format=3, - _data_type=arr._async_array._zdtype, + _data_type=arr.async_array._zdtype, _fill_value=arr.fill_value, _shape=(8, 8), _chunk_shape=chunks, @@ -544,7 +544,7 @@ def test_info_complete(self, chunks: tuple[int, int], shards: tuple[int, int] | result = arr.info_complete() expected = ArrayInfo( _zarr_format=3, - _data_type=arr._async_array._zdtype, + _data_type=arr.async_array._zdtype, _fill_value=arr.fill_value, _shape=(8, 8), _chunk_shape=chunks, @@ -889,7 +889,7 @@ def test_write_empty_chunks_behavior( config={"write_empty_chunks": write_empty_chunks}, ) - assert arr._async_array._config.write_empty_chunks == write_empty_chunks + assert arr.async_array._config.write_empty_chunks == write_empty_chunks # initialize the store with some non-fill value chunks arr[:] = fill_value + 1 @@ -960,7 +960,7 @@ async def test_nbytes( store = MemoryStore() arr = zarr.create_array(store=store, shape=shape, dtype=dtype, fill_value=0) if array_type == "async": - assert arr._async_array.nbytes == np.prod(arr.shape) * arr.dtype.itemsize + assert arr.async_array.nbytes == np.prod(arr.shape) * arr.dtype.itemsize else: assert arr.nbytes == np.prod(arr.shape) * arr.dtype.itemsize @@ -1963,7 +1963,7 @@ def test_chunk_grid_shape( shard_grid_shape = tuple(ceildiv(a, b) for a, b in zip(array_shape, _shard_shape, strict=True)) assert arr._chunk_grid_shape == chunk_grid_shape assert arr.cdata_shape == chunk_grid_shape - assert arr._async_array.cdata_shape == chunk_grid_shape + assert arr.async_array.cdata_shape == chunk_grid_shape assert arr._shard_grid_shape == shard_grid_shape assert arr._nshards == np.prod(shard_grid_shape) @@ -1994,7 +1994,7 @@ def test_iter_chunk_coords( observed = tuple(_iter_chunk_coords(arr)) assert observed == expected assert observed == tuple(arr._iter_chunk_coords()) - assert observed == tuple(arr._async_array._iter_chunk_coords()) + assert observed == tuple(arr.async_array._iter_chunk_coords()) @pytest.mark.parametrize( @@ -2027,7 +2027,7 @@ def test_iter_shard_coords( observed = tuple(_iter_shard_coords(arr)) assert observed == expected assert observed == tuple(arr._iter_shard_coords()) - assert observed == tuple(arr._async_array._iter_shard_coords()) + assert observed == tuple(arr.async_array._iter_shard_coords()) @pytest.mark.parametrize( @@ -2062,7 +2062,7 @@ def test_iter_shard_keys( observed = tuple(_iter_shard_keys(arr)) assert observed == expected assert observed == tuple(arr._iter_shard_keys()) - assert observed == tuple(arr._async_array._iter_shard_keys()) + assert observed == tuple(arr.async_array._iter_shard_keys()) @pytest.mark.parametrize( @@ -2098,7 +2098,7 @@ def test_iter_shard_regions( observed = tuple(_iter_shard_regions(arr)) assert observed == expected assert observed == tuple(arr._iter_shard_regions()) - assert observed == tuple(arr._async_array._iter_shard_regions()) + assert observed == tuple(arr.async_array._iter_shard_regions()) @pytest.mark.parametrize( @@ -2127,7 +2127,7 @@ def test_iter_chunk_regions( observed = tuple(_iter_chunk_regions(arr)) assert observed == expected assert observed == tuple(arr._iter_chunk_regions()) - assert observed == tuple(arr._async_array._iter_chunk_regions()) + assert observed == tuple(arr.async_array._iter_chunk_regions()) @pytest.mark.parametrize("num_shards", [1, 3]) diff --git a/tests/test_v2.py b/tests/test_v2.py index b223e022c6..cb990f6159 100644 --- a/tests/test_v2.py +++ b/tests/test_v2.py @@ -143,13 +143,13 @@ def test_create_array_defaults(store: Store) -> None: g = zarr.open(store, mode="w", zarr_format=2) assert isinstance(g, Group) arr = g.create_array("one", dtype="i8", shape=(1,), chunks=(1,), compressor=None) - assert arr._async_array.compressor is None + assert arr.async_array.compressor is None assert not (arr.filters) arr = g.create_array("two", dtype="i8", shape=(1,), chunks=(1,)) - assert arr._async_array.compressor is not None + assert arr.async_array.compressor is not None assert not (arr.filters) arr = g.create_array("three", dtype="i8", shape=(1,), chunks=(1,), compressor=Zstd()) - assert arr._async_array.compressor is not None + assert arr.async_array.compressor is not None assert not (arr.filters) with pytest.raises(ValueError): g.create_array(