From fe84ad673e08be460c7bc5517c7e0fba38a3ac03 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Tue, 17 Jun 2025 15:45:19 +0700 Subject: [PATCH 1/9] un-xfail some big endian tests --- virtualizarr/tests/test_parsers/test_fits.py | 3 --- virtualizarr/tests/test_parsers/test_hdf/test_hdf.py | 1 - virtualizarr/tests/test_parsers/test_netcdf3.py | 6 ------ 3 files changed, 10 deletions(-) diff --git a/virtualizarr/tests/test_parsers/test_fits.py b/virtualizarr/tests/test_parsers/test_fits.py index 3b4d86b1..9e079bef 100644 --- a/virtualizarr/tests/test_parsers/test_fits.py +++ b/virtualizarr/tests/test_parsers/test_fits.py @@ -11,9 +11,6 @@ @requires_kerchunk @requires_network -@pytest.mark.xfail( - reason="Big endian not yet supported by zarr-python 3.0" -) # https://github.com/zarr-developers/zarr-python/issues/2324 def test_open_hubble_data(): # data from https://registry.opendata.aws/hst/ file_url = "s3://stpubdata/hst/public/f05i/f05i0201m/f05i0201m_a1f.fits" diff --git a/virtualizarr/tests/test_parsers/test_hdf/test_hdf.py b/virtualizarr/tests/test_parsers/test_hdf/test_hdf.py index 4a48572d..a2399aac 100644 --- a/virtualizarr/tests/test_parsers/test_hdf/test_hdf.py +++ b/virtualizarr/tests/test_parsers/test_hdf/test_hdf.py @@ -221,7 +221,6 @@ def test_coord_names( ) as vds: assert set(vds.coords) == {"lat", "lon"} - @pytest.mark.xfail(reason="Requires Zarr v3 big endian dtype support") def test_big_endian( self, big_endian_dtype_hdf5_file, diff --git a/virtualizarr/tests/test_parsers/test_netcdf3.py b/virtualizarr/tests/test_parsers/test_netcdf3.py index 1b7b25fb..7706173f 100644 --- a/virtualizarr/tests/test_parsers/test_netcdf3.py +++ b/virtualizarr/tests/test_parsers/test_netcdf3.py @@ -10,9 +10,6 @@ @requires_scipy -@pytest.mark.xfail( - reason="Big endian not yet supported by zarr-python 3.0" -) # https://github.com/zarr-developers/zarr-python/issues/2324 def test_read_netcdf3(netcdf3_file, array_v3_metadata): filepath = str(netcdf3_file) store = obstore_local(file_url=filepath) @@ -37,9 +34,6 @@ def test_read_netcdf3(netcdf3_file, array_v3_metadata): @requires_network -@pytest.mark.xfail( - reason="Big endian not yet supported by zarr-python 3.0" -) # https://github.com/zarr-developers/zarr-python/issues/2324 def test_read_http_netcdf3(array_v3_metadata): file_url = "https://github.com/pydata/xarray-data/raw/master/air_temperature.nc" store = obstore_http(file_url=file_url) From 21986fa7942a1f0c280f745940e8cdca987b181f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 17 Jun 2025 08:49:55 +0000 Subject: [PATCH 2/9] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- virtualizarr/tests/test_parsers/test_netcdf3.py | 1 - 1 file changed, 1 deletion(-) diff --git a/virtualizarr/tests/test_parsers/test_netcdf3.py b/virtualizarr/tests/test_parsers/test_netcdf3.py index 7706173f..fa467720 100644 --- a/virtualizarr/tests/test_parsers/test_netcdf3.py +++ b/virtualizarr/tests/test_parsers/test_netcdf3.py @@ -1,4 +1,3 @@ -import pytest import xarray as xr import xarray.testing as xrt From 7a2e4f05601056609f566f97345253f1eb2b4b1f Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Tue, 17 Jun 2025 15:51:16 +0700 Subject: [PATCH 3/9] update minimum require zarr version to the as-yet-unreleased zarr 3.1.0 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index a8740a0f..99d38c0e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,7 @@ dependencies = [ "numcodecs>=0.15.1", "ujson", "packaging", - "zarr>=3.0.8", + "zarr>=3.1.0", "obstore>=0.5.1", ] From ed36be655effb3459db33080821290060f5807db Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Tue, 17 Jun 2025 15:58:57 +0700 Subject: [PATCH 4/9] Revert "update minimum require zarr version to the as-yet-unreleased zarr 3.1.0" This reverts commit 7a2e4f05601056609f566f97345253f1eb2b4b1f. --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 99d38c0e..a8740a0f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,7 @@ dependencies = [ "numcodecs>=0.15.1", "ujson", "packaging", - "zarr>=3.1.0", + "zarr>=3.0.8", "obstore>=0.5.1", ] From 5aab8c4d3de7ff1425bf6c5350d05a856ba0a261 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Sun, 29 Jun 2025 00:29:59 -0400 Subject: [PATCH 5/9] remove need for vendored zarr metadata code --- virtualizarr/manifests/store.py | 21 +++++++-------- virtualizarr/vendor/zarr/core/metadata.py | 32 ----------------------- 2 files changed, 9 insertions(+), 44 deletions(-) delete mode 100644 virtualizarr/vendor/zarr/core/metadata.py diff --git a/virtualizarr/manifests/store.py b/virtualizarr/manifests/store.py index 7dd6c5a4..98bc1b82 100644 --- a/virtualizarr/manifests/store.py +++ b/virtualizarr/manifests/store.py @@ -17,7 +17,6 @@ from zarr.core.common import BytesLike from virtualizarr.manifests.group import ManifestGroup -from virtualizarr.vendor.zarr.core.metadata import dict_to_buffer if TYPE_CHECKING: from obstore.store import ( @@ -59,13 +58,7 @@ def get_store_prefix(url: str) -> str: def get_zarr_metadata(manifest_group: ManifestGroup, key: str) -> Buffer: """ - Generate the expected Zarr V3 metadata from a virtual dataset. - - Group metadata is returned for all Datasets and Array metadata - is returned for all DataArrays. - - Combines the ManifestArray metadata with the attrs from the DataArray - and adds `dimension_names` for all arrays if not already provided. + Extract the expected Zarr V3 metadata from a ManifestGroup and convert to spec-compliant format. Parameters ---------- @@ -76,15 +69,13 @@ def get_zarr_metadata(manifest_group: ManifestGroup, key: str) -> Buffer: ------- Buffer """ - # If requesting the root metadata, return the standard group metadata with additional dataset specific attributes - if key == "zarr.json": metadata = manifest_group.metadata.to_dict() - return dict_to_buffer(metadata, prototype=default_buffer_prototype()) else: var, _ = key.split("/") metadata = manifest_group.arrays[var].metadata.to_dict() - return dict_to_buffer(metadata, prototype=default_buffer_prototype()) + + return metadata.to_buffer_dict(prototype=default_buffer_prototype()) def parse_manifest_index(key: str, chunk_key_encoding: str = ".") -> tuple[int, ...]: @@ -260,8 +251,10 @@ async def get( byte_range: ByteRequest | None = None, ) -> Buffer | None: # docstring inherited + if key.endswith("zarr.json"): return get_zarr_metadata(self._group, key) + var = key.split("/")[0] marr = self._group.arrays[var] manifest = marr.manifest @@ -272,22 +265,26 @@ async def get( path = manifest._paths[*chunk_indexes] offset = manifest._offsets[*chunk_indexes] length = manifest._lengths[*chunk_indexes] + # Get the configured object store instance that matches the path store = self._store_registry.get_store(path) if not store: raise ValueError( f"Could not find a store to use for {path} in the store registry" ) + # Truncate path to match Obstore expectations key = urlparse(path).path if hasattr(store, "prefix") and store.prefix: # strip the prefix from key key = key.removeprefix(str(store.prefix)) + # Transform the input byte range to account for the chunk location in the file chunk_end_exclusive = offset + length byte_range = _transform_byte_range( byte_range, chunk_start=offset, chunk_end_exclusive=chunk_end_exclusive ) + # Actually get the bytes try: bytes = await store.get_range_async( diff --git a/virtualizarr/vendor/zarr/core/metadata.py b/virtualizarr/vendor/zarr/core/metadata.py deleted file mode 100644 index bc53de84..00000000 --- a/virtualizarr/vendor/zarr/core/metadata.py +++ /dev/null @@ -1,32 +0,0 @@ -import json -from typing import Any - -import numpy as np -from zarr.core.buffer import Buffer, BufferPrototype -from zarr.core.metadata.v3 import V3JsonEncoder - - -def _replace_special_floats(obj: object) -> Any: - """Helper function to replace NaN/Inf/-Inf values with special strings - - Note: this cannot be done in the V3JsonEncoder because Python's `json.dumps` optimistically - converts NaN/Inf values to special types outside of the encoding step. - """ - if isinstance(obj, float): - if np.isnan(obj): - return "NaN" - elif np.isinf(obj): - return "Infinity" if obj > 0 else "-Infinity" - elif isinstance(obj, dict): - # Recursively replace in dictionaries - return {k: _replace_special_floats(v) for k, v in obj.items()} - elif isinstance(obj, list): - # Recursively replace in lists - return [_replace_special_floats(item) for item in obj] - return obj - - -def dict_to_buffer(input: dict, prototype: BufferPrototype) -> Buffer: - # modified from ArrayV3Metadata.to_buffer_dict - d = _replace_special_floats(input) - return prototype.buffer.from_bytes(json.dumps(d, cls=V3JsonEncoder).encode()) From 93ebf747717d10dd55720f1cf08df7dcfe0bd54d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 29 Jun 2025 04:30:09 +0000 Subject: [PATCH 6/9] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- virtualizarr/manifests/store.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/virtualizarr/manifests/store.py b/virtualizarr/manifests/store.py index 98bc1b82..084f280c 100644 --- a/virtualizarr/manifests/store.py +++ b/virtualizarr/manifests/store.py @@ -74,7 +74,7 @@ def get_zarr_metadata(manifest_group: ManifestGroup, key: str) -> Buffer: else: var, _ = key.split("/") metadata = manifest_group.arrays[var].metadata.to_dict() - + return metadata.to_buffer_dict(prototype=default_buffer_prototype()) @@ -254,7 +254,7 @@ async def get( if key.endswith("zarr.json"): return get_zarr_metadata(self._group, key) - + var = key.split("/")[0] marr = self._group.arrays[var] manifest = marr.manifest @@ -272,7 +272,7 @@ async def get( raise ValueError( f"Could not find a store to use for {path} in the store registry" ) - + # Truncate path to match Obstore expectations key = urlparse(path).path if hasattr(store, "prefix") and store.prefix: From 3c6ca366ff1ade640bda680e3195adace6efae6f Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Fri, 11 Jul 2025 16:35:42 -0400 Subject: [PATCH 7/9] Handle metadata get requests --- virtualizarr/manifests/store.py | 37 +++++++++++---------------------- 1 file changed, 12 insertions(+), 25 deletions(-) diff --git a/virtualizarr/manifests/store.py b/virtualizarr/manifests/store.py index 084f280c..ce3d81f0 100644 --- a/virtualizarr/manifests/store.py +++ b/virtualizarr/manifests/store.py @@ -56,28 +56,6 @@ def get_store_prefix(url: str) -> str: return "" if scheme in {"", "file"} else f"{scheme}://{netloc}" -def get_zarr_metadata(manifest_group: ManifestGroup, key: str) -> Buffer: - """ - Extract the expected Zarr V3 metadata from a ManifestGroup and convert to spec-compliant format. - - Parameters - ---------- - manifest_group : ManifestGroup - key : str - - Returns - ------- - Buffer - """ - if key == "zarr.json": - metadata = manifest_group.metadata.to_dict() - else: - var, _ = key.split("/") - metadata = manifest_group.arrays[var].metadata.to_dict() - - return metadata.to_buffer_dict(prototype=default_buffer_prototype()) - - def parse_manifest_index(key: str, chunk_key_encoding: str = ".") -> tuple[int, ...]: """ Splits `key` provided to a zarr store into the variable indicated @@ -252,9 +230,18 @@ async def get( ) -> Buffer | None: # docstring inherited - if key.endswith("zarr.json"): - return get_zarr_metadata(self._group, key) - + if key == "zarr.json": + # Return group metadata + return self._group.metadata.to_buffer_dict( + prototype=default_buffer_prototype() + )["zarr.json"] + elif key.endswith("zarr.json"): + # Return array metadata + # TODO: Handle nested groups + var, _ = key.split("/") + return self._group.arrays[var].metadata.to_buffer_dict( + prototype=default_buffer_prototype() + )["zarr.json"] var = key.split("/")[0] marr = self._group.arrays[var] manifest = marr.manifest From bccccd15b1b785afaa02cb266e70dc632790b975 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Fri, 11 Jul 2025 16:40:59 -0400 Subject: [PATCH 8/9] Change expected dtype from i4" + assert var.dtype == "int32" From 45c7c741e0d544a13bbb8ec74ee8fc26773bc675 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 14 Jul 2025 13:30:05 +0000 Subject: [PATCH 9/9] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- virtualizarr/manifests/store.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/virtualizarr/manifests/store.py b/virtualizarr/manifests/store.py index f9acf1c3..716a4929 100644 --- a/virtualizarr/manifests/store.py +++ b/virtualizarr/manifests/store.py @@ -232,13 +232,13 @@ async def get( chunk_indexes = parse_manifest_index( key, marr.metadata.chunk_key_encoding.separator ) - + path = manifest._paths[chunk_indexes] if path == "": return None offset = manifest._offsets[chunk_indexes] length = manifest._lengths[chunk_indexes] - + # Get the configured object store instance that matches the path store = self._store_registry.get_store(path) if not store: