From 0797c15e3a61a9ab28892c29b4c3b9b7a85f52a1 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Thu, 10 Jul 2025 21:45:27 +0200
Subject: [PATCH 1/9] refactor default chunk encoding to skip config. add tests
 for deprecated config keys

---
 src/zarr/core/array.py  | 109 +++++++++++++++++++++++++++++++---------
 src/zarr/core/config.py |  41 ++++++++-------
 tests/test_config.py    |  80 ++++++++++-------------------
 tests/test_v2.py        |  57 ++++++++++-----------
 4 files changed, 159 insertions(+), 128 deletions(-)

diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
index 312dc0bc4d..185f72e41a 100644
--- a/src/zarr/core/array.py
+++ b/src/zarr/core/array.py
@@ -30,6 +30,8 @@
 from zarr.abc.store import Store, set_or_delete
 from zarr.codecs._v2 import V2Codec
 from zarr.codecs.bytes import BytesCodec
+from zarr.codecs.vlen_utf8 import VLenBytesCodec, VLenUTF8Codec
+from zarr.codecs.zstd import ZstdCodec
 from zarr.core._info import ArrayInfo
 from zarr.core.array_spec import ArrayConfig, ArrayConfigLike, parse_array_config
 from zarr.core.attributes import Attributes
@@ -72,7 +74,7 @@
     ZDTypeLike,
     parse_data_type,
 )
-from zarr.core.dtype.common import HasEndianness, HasItemSize
+from zarr.core.dtype.common import HasEndianness, HasItemSize, HasObjectCodec
 from zarr.core.indexing import (
     BasicIndexer,
     BasicSelection,
@@ -710,7 +712,10 @@ def _create_metadata_v3(
 
         shape = parse_shapelike(shape)
         if codecs is None:
-            filters, serializer, compressors = _get_default_chunk_encoding_v3(dtype)
+            filters = default_filters_v3(dtype)
+            serializer = default_serializer_v3(dtype)
+            compressors = default_compressors_v3(dtype)
+
             codecs_parsed = (*filters, serializer, *compressors)
         else:
             codecs_parsed = tuple(codecs)
@@ -850,10 +855,9 @@ async def _create_v2(
         else:
             await ensure_no_existing_node(store_path, zarr_format=2)
 
-        default_filters, default_compressor = _get_default_chunk_encoding_v2(dtype)
         compressor_parsed: CompressorLikev2
         if compressor == "auto":
-            compressor_parsed = default_compressor
+            compressor_parsed = default_compressor_v2(dtype)
         elif isinstance(compressor, BytesBytesCodec):
             raise ValueError(
                 "Cannot use a BytesBytesCodec as a compressor for zarr v2 arrays. "
@@ -863,7 +867,7 @@ async def _create_v2(
             compressor_parsed = compressor
 
         if filters is None:
-            filters = default_filters
+            filters = default_filters_v2(dtype)
 
         metadata = cls._create_metadata_v2(
             shape=shape,
@@ -4654,19 +4658,80 @@ def _get_default_chunk_encoding_v3(
     )
 
 
-def _get_default_chunk_encoding_v2(
-    dtype: ZDType[TBaseDType, TBaseScalar],
-) -> tuple[tuple[numcodecs.abc.Codec, ...] | None, numcodecs.abc.Codec | None]:
+def default_filters_v3(dtype: ZDType[Any, Any]) -> tuple[ArrayArrayCodec, ...]:
     """
-    Get the default chunk encoding for Zarr format 2 arrays, given a dtype
+    Given a data type, return the default filters for that data type.
+
+    This is an empty tuple. No data types have default filters.
     """
-    dtype_category = categorize_data_type(dtype)
-    filters = zarr_config.get("array.v2_default_filters").get(dtype_category)
-    compressor = zarr_config.get("array.v2_default_compressor").get(dtype_category)
-    if filters is not None:
-        filters = tuple(numcodecs.get_codec(f) for f in filters)
+    return ()
 
-    return filters, numcodecs.get_codec(compressor)
+
+def default_compressors_v3(dtype: ZDType[Any, Any]) -> tuple[BytesBytesCodec, ...]:
+    """
+    Given a data type, return the default compressors for that data type.
+
+    This is just a tuple containing ``ZstdCodec``
+    """
+    return (ZstdCodec(),)
+
+
+def default_serializer_v3(dtype: ZDType[Any, Any]) -> ArrayBytesCodec:
+    """
+    Given a data type, return the default serializer for that data type.
+
+    The default serializer for most data types is the ``BytesCodec``, which may or may not be
+    parameterized with an endianness, depending on whether the data type has endianness. Variable
+    length strings and variable length bytes have hard-coded serializers -- ``VLenUTF8Codec`` and
+    ``VLenBytesCodec``, respectively.
+
+    """
+    serializer: ArrayBytesCodec = BytesCodec()
+
+    if isinstance(dtype, HasEndianness):
+        serializer = BytesCodec(endian="little")
+    elif isinstance(dtype, HasObjectCodec):
+        if dtype.object_codec_id == "vlen-bytes":
+            serializer = VLenBytesCodec()
+        elif dtype.object_codec_id == "vlen-utf8":
+            serializer = VLenUTF8Codec()
+        else:
+            msg = f"Data type {dtype} requires an unknown object codec: {dtype.object_codec_id}"
+            raise ValueError(msg)
+    return serializer
+
+
+def default_filters_v2(dtype: ZDType[Any, Any]) -> tuple[numcodecs.abc.Codec] | None:
+    """
+    Given a data type, return the default filters for that data type.
+
+    For data types that require an object codec, namely variable length data types,
+    this is a tuple containing the object codec. Otherwise it's ``None``.
+    """
+    if isinstance(dtype, HasObjectCodec):
+        if dtype.object_codec_id == "vlen-bytes":
+            from numcodecs import VLenBytes
+
+            return (VLenBytes(),)
+        elif dtype.object_codec_id == "vlen-utf8":
+            from numcodecs import VLenUTF8
+
+            return (VLenUTF8(),)
+        else:
+            msg = f"Data type {dtype} requires an unknown object codec: {dtype.object_codec_id}"
+            raise ValueError(msg)
+    return None
+
+
+def default_compressor_v2(dtype: ZDType[Any, Any]) -> numcodecs.abc.Codec:
+    """
+    Given a data type, return the default compressors for that data type.
+
+    This is just the numcodecs ``Zstd`` codec.
+    """
+    from numcodecs import Zstd
+
+    return Zstd(level=0, checksum=False)
 
 
 def _parse_chunk_encoding_v2(
@@ -4678,14 +4743,13 @@ def _parse_chunk_encoding_v2(
     """
     Generate chunk encoding classes for Zarr format 2 arrays with optional defaults.
     """
-    default_filters, default_compressor = _get_default_chunk_encoding_v2(dtype)
     _filters: tuple[numcodecs.abc.Codec, ...] | None
     _compressor: numcodecs.abc.Codec | None
 
     if compressor is None or compressor == ():
         _compressor = None
     elif compressor == "auto":
-        _compressor = default_compressor
+        _compressor = default_compressor_v2(dtype)
     elif isinstance(compressor, tuple | list) and len(compressor) == 1:
         _compressor = parse_compressor(compressor[0])
     else:
@@ -4697,7 +4761,7 @@ def _parse_chunk_encoding_v2(
     if filters is None:
         _filters = None
     elif filters == "auto":
-        _filters = default_filters
+        _filters = default_filters_v2(dtype)
     else:
         if isinstance(filters, Iterable):
             for idx, f in enumerate(filters):
@@ -4722,14 +4786,11 @@ def _parse_chunk_encoding_v3(
     """
     Generate chunk encoding classes for v3 arrays with optional defaults.
     """
-    default_array_array, default_array_bytes, default_bytes_bytes = _get_default_chunk_encoding_v3(
-        dtype
-    )
 
     if filters is None:
         out_array_array: tuple[ArrayArrayCodec, ...] = ()
     elif filters == "auto":
-        out_array_array = default_array_array
+        out_array_array = default_filters_v3(dtype)
     else:
         maybe_array_array: Iterable[Codec | dict[str, JSON]]
         if isinstance(filters, dict | Codec):
@@ -4739,7 +4800,7 @@ def _parse_chunk_encoding_v3(
         out_array_array = tuple(_parse_array_array_codec(c) for c in maybe_array_array)
 
     if serializer == "auto":
-        out_array_bytes = default_array_bytes
+        out_array_bytes = default_serializer_v3(dtype)
     else:
         # TODO: ensure that the serializer is compatible with the ndarray produced by the
         # array-array codecs. For example, if a sequence of array-array codecs produces an
@@ -4749,7 +4810,7 @@ def _parse_chunk_encoding_v3(
     if compressors is None:
         out_bytes_bytes: tuple[BytesBytesCodec, ...] = ()
     elif compressors == "auto":
-        out_bytes_bytes = default_bytes_bytes
+        out_bytes_bytes = default_compressors_v3(dtype)
     else:
         maybe_bytes_bytes: Iterable[Codec | dict[str, JSON]]
         if isinstance(compressors, dict | Codec):
diff --git a/src/zarr/core/config.py b/src/zarr/core/config.py
index 05d048ef74..cc3c33cd17 100644
--- a/src/zarr/core/config.py
+++ b/src/zarr/core/config.py
@@ -78,6 +78,25 @@ def enable_gpu(self) -> ConfigSet:
         )
 
 
+# these keys were removed from the config as part of the 3.1.0 release.
+# these deprecations should be removed in 3.1.1 or thereabouts.
+deprecations = {
+    "array.v2_default_compressor.numeric": None,
+    "array.v2_default_compressor.string": None,
+    "array.v2_default_compressor.bytes": None,
+    "array.v2_default_filters.string": None,
+    "array.v2_default_filters.bytes": None,
+    "array.v3_default_filters.numeric": None,
+    "array.v3_default_filters.raw": None,
+    "array.v3_default_filters.bytes": None,
+    "array.v3_default_serializer.numeric": None,
+    "array.v3_default_serializer.string": None,
+    "array.v3_default_serializer.bytes": None,
+    "array.v3_default_compressors.string": None,
+    "array.v3_default_compressors.bytes": None,
+    "array.v3_default_compressors": None,
+}
+
 # The default configuration for zarr
 config = Config(
     "zarr",
@@ -87,27 +106,6 @@ def enable_gpu(self) -> ConfigSet:
             "array": {
                 "order": "C",
                 "write_empty_chunks": False,
-                "v2_default_compressor": {
-                    "default": {"id": "zstd", "level": 0, "checksum": False},
-                    "variable-length-string": {"id": "zstd", "level": 0, "checksum": False},
-                },
-                "v2_default_filters": {
-                    "default": None,
-                    "variable-length-string": [{"id": "vlen-utf8"}],
-                },
-                "v3_default_filters": {"default": [], "variable-length-string": []},
-                "v3_default_serializer": {
-                    "default": {"name": "bytes", "configuration": {"endian": "little"}},
-                    "variable-length-string": {"name": "vlen-utf8"},
-                },
-                "v3_default_compressors": {
-                    "default": [
-                        {"name": "zstd", "configuration": {"level": 0, "checksum": False}},
-                    ],
-                    "variable-length-string": [
-                        {"name": "zstd", "configuration": {"level": 0, "checksum": False}}
-                    ],
-                },
             },
             "async": {"concurrency": 10, "timeout": None},
             "threading": {"max_workers": None},
@@ -132,6 +130,7 @@ def enable_gpu(self) -> ConfigSet:
             "ndbuffer": "zarr.buffer.cpu.NDBuffer",
         }
     ],
+    deprecations=deprecations,
 )
 
 
diff --git a/tests/test_config.py b/tests/test_config.py
index e267601272..c59e721c49 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -1,6 +1,6 @@
 import os
 from collections.abc import Iterable
-from typing import TYPE_CHECKING, Any
+from typing import Any
 from unittest import mock
 from unittest.mock import Mock
 
@@ -16,16 +16,13 @@
     BloscCodec,
     BytesCodec,
     Crc32cCodec,
-    GzipCodec,
     ShardingCodec,
 )
-from zarr.core.array import create_array
 from zarr.core.array_spec import ArraySpec
 from zarr.core.buffer import NDBuffer
 from zarr.core.buffer.core import Buffer
 from zarr.core.codec_pipeline import BatchedCodecPipeline
 from zarr.core.config import BadConfigError, config
-from zarr.core.dtype import Int8, VariableLengthUTF8
 from zarr.core.indexing import SelectorTuple
 from zarr.registry import (
     fully_qualified_name,
@@ -38,7 +35,6 @@
     register_ndbuffer,
     register_pipeline,
 )
-from zarr.storage import MemoryStore
 from zarr.testing.buffer import (
     NDBufferUsingTestNDArrayLike,
     StoreExpectingTestBuffer,
@@ -46,9 +42,6 @@
     TestNDArrayLike,
 )
 
-if TYPE_CHECKING:
-    from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType
-
 
 def test_config_defaults_set() -> None:
     # regression test for available defaults
@@ -60,27 +53,6 @@ def test_config_defaults_set() -> None:
                 "array": {
                     "order": "C",
                     "write_empty_chunks": False,
-                    "v2_default_compressor": {
-                        "default": {"id": "zstd", "level": 0, "checksum": False},
-                        "variable-length-string": {"id": "zstd", "level": 0, "checksum": False},
-                    },
-                    "v2_default_filters": {
-                        "default": None,
-                        "variable-length-string": [{"id": "vlen-utf8"}],
-                    },
-                    "v3_default_filters": {"default": [], "variable-length-string": []},
-                    "v3_default_serializer": {
-                        "default": {"name": "bytes", "configuration": {"endian": "little"}},
-                        "variable-length-string": {"name": "vlen-utf8"},
-                    },
-                    "v3_default_compressors": {
-                        "default": [
-                            {"name": "zstd", "configuration": {"level": 0, "checksum": False}},
-                        ],
-                        "variable-length-string": [
-                            {"name": "zstd", "configuration": {"level": 0, "checksum": False}}
-                        ],
-                    },
                 },
                 "async": {"concurrency": 10, "timeout": None},
                 "threading": {"max_workers": None},
@@ -323,29 +295,31 @@ class NewCodec2(BytesCodec):
         get_codec_class("new_codec")
 
 
-@pytest.mark.parametrize("dtype_category", ["variable-length-string", "default"])
-@pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning")
-async def test_default_codecs(dtype_category: str) -> None:
+@pytest.mark.parametrize(
+    "key",
+    [
+        "array.v2_default_compressor.numeric",
+        "array.v2_default_compressor.string",
+        "array.v2_default_compressor.bytes",
+        "array.v2_default_filters.string",
+        "array.v2_default_filters.bytes",
+        "array.v3_default_filters.numeric",
+        "array.v3_default_filters.raw",
+        "array.v3_default_filters.bytes",
+        "array.v3_default_serializer.numeric",
+        "array.v3_default_serializer.string",
+        "array.v3_default_serializer.bytes",
+        "array.v3_default_compressors.string",
+        "array.v3_default_compressors.bytes",
+        "array.v3_default_compressors",
+    ],
+)
+def test_deprecated_config(key: str) -> None:
     """
-    Test that the default compressors are sensitive to the current setting of the config.
+    Test that a valuerror is raised when setting the default chunk encoding for a given
+    data type category
     """
-    zdtype: ZDType[TBaseDType, TBaseScalar]
-    if dtype_category == "variable-length-string":
-        zdtype = VariableLengthUTF8()  # type: ignore[assignment]
-    else:
-        zdtype = Int8()
-    expected_compressors = (GzipCodec(),)
-    new_conf = {
-        f"array.v3_default_compressors.{dtype_category}": [
-            c.to_dict() for c in expected_compressors
-        ]
-    }
-    with config.set(new_conf):
-        arr = await create_array(
-            shape=(100,),
-            chunks=(100,),
-            dtype=zdtype,
-            zarr_format=3,
-            store=MemoryStore(),
-        )
-        assert arr.compressors == expected_compressors
+
+    with pytest.raises(ValueError):
+        with zarr.config.set({key: "foo"}):
+            pass
diff --git a/tests/test_v2.py b/tests/test_v2.py
index 29f031663f..4d17305995 100644
--- a/tests/test_v2.py
+++ b/tests/test_v2.py
@@ -73,37 +73,34 @@ def test_codec_pipeline() -> None:
 async def test_v2_encode_decode(
     dtype: str, expected_dtype: str, fill_value: bytes, fill_value_json: str
 ) -> None:
-    with config.set(
-        {
-            "array.v2_default_filters.bytes": [{"id": "vlen-bytes"}],
-            "array.v2_default_compressor.bytes": None,
-        }
-    ):
-        store = zarr.storage.MemoryStore()
-        g = zarr.group(store=store, zarr_format=2)
-        g.create_array(
-            name="foo", shape=(3,), chunks=(3,), dtype=dtype, fill_value=fill_value, compressor=None
-        )
+    store = zarr.storage.MemoryStore()
+    g = zarr.group(store=store, zarr_format=2)
+    g.create_array(
+        name="foo", shape=(3,), chunks=(3,), dtype=dtype, fill_value=fill_value, compressor=None
+    )
 
-        result = await store.get("foo/.zarray", zarr.core.buffer.default_buffer_prototype())
-        assert result is not None
-
-        serialized = json.loads(result.to_bytes())
-        expected = {
-            "chunks": [3],
-            "compressor": None,
-            "dtype": expected_dtype,
-            "fill_value": fill_value_json,
-            "filters": None,
-            "order": "C",
-            "shape": [3],
-            "zarr_format": 2,
-            "dimension_separator": ".",
-        }
-        assert serialized == expected
-
-        data = zarr.open_array(store=store, path="foo")[:]
-        np.testing.assert_equal(data, np.full((3,), b"X", dtype=dtype))
+    result = await store.get("foo/.zarray", zarr.core.buffer.default_buffer_prototype())
+    assert result is not None
+
+    serialized = json.loads(result.to_bytes())
+    expected = {
+        "chunks": [3],
+        "compressor": None,
+        "dtype": expected_dtype,
+        "fill_value": fill_value_json,
+        "filters": None,
+        "order": "C",
+        "shape": [3],
+        "zarr_format": 2,
+        "dimension_separator": ".",
+    }
+    assert serialized == expected
+
+    data = zarr.open_array(store=store, path="foo")[:]
+    np.testing.assert_equal(data, np.full((3,), b"X", dtype=dtype))
+
+    data = zarr.open_array(store=store, path="foo")[:]
+    np.testing.assert_equal(data, np.full((3,), b"X", dtype=dtype))
 
 
 @pytest.mark.parametrize(

From 855901ca485279effbe86824f236edfb33fcd064 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Thu, 10 Jul 2025 22:03:14 +0200
Subject: [PATCH 2/9] remove chunk encoding configuration from docs

---
 docs/user-guide/arrays.rst | 10 ----------
 docs/user-guide/config.rst | 20 +-------------------
 2 files changed, 1 insertion(+), 29 deletions(-)

diff --git a/docs/user-guide/arrays.rst b/docs/user-guide/arrays.rst
index f45dfbebe8..67b134d442 100644
--- a/docs/user-guide/arrays.rst
+++ b/docs/user-guide/arrays.rst
@@ -246,16 +246,6 @@ built-in delta filter::
    >>> z.compressors
    (LZMA(codec_name='numcodecs.lzma', codec_config={'filters': [{'id': 3, 'dist': 4}, {'id': 33, 'preset': 1}]}),)
 
-The default compressor can be changed by setting the value of the using Zarr's
-:ref:`user-guide-config`, e.g.::
-
-   >>> with zarr.config.set({'array.v2_default_compressor.default': {'id': 'blosc'}}):
-   ...     z = zarr.create_array(store={}, shape=(100000000,), chunks=(1000000,), dtype='int32', zarr_format=2)
-   >>> z.filters
-   ()
-   >>> z.compressors
-   (Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0),)
-
 To disable compression, set ``compressors=None`` when creating an array, e.g.::
 
    >>> z = zarr.create_array(store='data/example-8.zarr', shape=(100000000,), chunks=(1000000,), dtype='int32', compressors=None)
diff --git a/docs/user-guide/config.rst b/docs/user-guide/config.rst
index 5a9d26f2b9..0ae8017ca9 100644
--- a/docs/user-guide/config.rst
+++ b/docs/user-guide/config.rst
@@ -43,25 +43,7 @@ This is the current default configuration::
 
    >>> zarr.config.pprint()
    {'array': {'order': 'C',
-            'v2_default_compressor': {'default': {'checksum': False,
-                                                   'id': 'zstd',
-                                                   'level': 0},
-                                       'variable-length-string': {'checksum': False,
-                                                                  'id': 'zstd',
-                                                                  'level': 0}},
-            'v2_default_filters': {'default': None,
-                                    'variable-length-string': [{'id': 'vlen-utf8'}]},
-            'v3_default_compressors': {'default': [{'configuration': {'checksum': False,
-                                                                        'level': 0},
-                                                      'name': 'zstd'}],
-                                       'variable-length-string': [{'configuration': {'checksum': False,
-                                                                                       'level': 0},
-                                                                     'name': 'zstd'}]},
-            'v3_default_filters': {'default': [], 'variable-length-string': []},
-            'v3_default_serializer': {'default': {'configuration': {'endian': 'little'},
-                                                   'name': 'bytes'},
-                                       'variable-length-string': {'name': 'vlen-utf8'}},
-            'write_empty_chunks': False},
+              'write_empty_chunks': False},
    'async': {'concurrency': 10, 'timeout': None},
    'buffer': 'zarr.buffer.cpu.Buffer',
    'codec_pipeline': {'batch_size': 1,

From 7447805588a8689f2bc43cdd5507acf12c071eea Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Thu, 10 Jul 2025 23:33:35 +0200
Subject: [PATCH 3/9] don't create invalid string dtype arrays in test

---
 tests/test_array.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/test_array.py b/tests/test_array.py
index 0bca860e84..da9aa7aa30 100644
--- a/tests/test_array.py
+++ b/tests/test_array.py
@@ -1335,6 +1335,8 @@ async def test_invalid_v3_arguments(
     async def test_v2_chunk_encoding(
         store: MemoryStore, compressors: CompressorsLike, filters: FiltersLike, dtype: str
     ) -> None:
+        if dtype == "str" and filters != "auto":
+            pytest.skip("Only the auto filters are compatible with str dtype in this test.")
         arr = await create_array(
             store=store,
             dtype=dtype,

From 173766d1bd248ab5440d2ceaf33588c7e98971d7 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Thu, 10 Jul 2025 23:34:39 +0200
Subject: [PATCH 4/9] add v2-style error when creating a vlen dtype without the
 right codec

---
 src/zarr/core/array.py | 43 ++++++++++++++++++++++++++++++++----------
 1 file changed, 33 insertions(+), 10 deletions(-)

diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
index 185f72e41a..825741f855 100644
--- a/src/zarr/core/array.py
+++ b/src/zarr/core/array.py
@@ -70,6 +70,8 @@
 from zarr.core.config import categorize_data_type
 from zarr.core.config import config as zarr_config
 from zarr.core.dtype import (
+    VariableLengthBytes,
+    VariableLengthUTF8,
     ZDType,
     ZDTypeLike,
     parse_data_type,
@@ -111,6 +113,7 @@
 )
 from zarr.core.metadata.v2 import (
     CompressorLikev2,
+    get_object_codec_id,
     parse_compressor,
     parse_filters,
 )
@@ -4686,7 +4689,7 @@ def default_serializer_v3(dtype: ZDType[Any, Any]) -> ArrayBytesCodec:
     ``VLenBytesCodec``, respectively.
 
     """
-    serializer: ArrayBytesCodec = BytesCodec()
+    serializer: ArrayBytesCodec = BytesCodec(endian=None)
 
     if isinstance(dtype, HasEndianness):
         serializer = BytesCodec(endian="little")
@@ -4772,7 +4775,33 @@ def _parse_chunk_encoding_v2(
                     )
                     raise TypeError(msg)
         _filters = parse_filters(filters)
-
+    if isinstance(dtype, HasObjectCodec):
+        # check the filters and the compressor for the object codec required for this data type
+        if _filters is None:
+            if _compressor is None:
+                object_codec_id = None
+            else:
+                object_codec_id = get_object_codec_id((_compressor.get_config(),))
+        else:
+            object_codec_id = get_object_codec_id(
+                (
+                    *[f.get_config() for f in _filters],
+                    _compressor.get_config() if _compressor is not None else None,
+                )
+            )
+        if object_codec_id is None:
+            if isinstance(dtype, VariableLengthUTF8):
+                codec_name = "the numcodecs.VLenUTF8 codec"
+            elif isinstance(dtype, VariableLengthBytes):
+                codec_name = "the numcodecs.VLenBytes codec"
+            else:
+                codec_name = "an unknown object codec"
+            msg = (
+                f"Data type {dtype} requires {codec_name}, "
+                "but no such codec was specified in the filters or compressor parameters for "
+                "this array. "
+            )
+            raise ValueError(msg)
     return _filters, _compressor
 
 
@@ -4820,17 +4849,11 @@ def _parse_chunk_encoding_v3(
 
         out_bytes_bytes = tuple(_parse_bytes_bytes_codec(c) for c in maybe_bytes_bytes)
 
-    # specialize codecs as needed given the dtype
-
-    # TODO: refactor so that the config only contains the name of the codec, and we use the dtype
-    # to create the codec instance, instead of storing a dict representation of a full codec.
-
     # TODO: ensure that the serializer is compatible with the ndarray produced by the
     # array-array codecs. For example, if a sequence of array-array codecs produces an
     # array with a single-byte data type, then the serializer should not specify endiannesss.
-    if isinstance(out_array_bytes, BytesCodec) and not isinstance(dtype, HasEndianness):
-        # The default endianness in the bytescodec might not be None, so we need to replace it
-        out_array_bytes = replace(out_array_bytes, endian=None)
+
+    # TODO: add checks to ensure that the right serializer is used for vlen data types
     return out_array_array, out_array_bytes, out_bytes_bytes
 
 

From f4d31a28875b1511efd356c8b773fe1b2b03812e Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Thu, 10 Jul 2025 23:46:39 +0200
Subject: [PATCH 5/9] test for v2-style error when creating an object array
 without an object codec

---
 src/zarr/core/array.py |  4 +--
 tests/test_array.py    | 61 +++++++++++++++++++++++++++++++++++-------
 2 files changed, 54 insertions(+), 11 deletions(-)

diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
index 825741f855..fc77567088 100644
--- a/src/zarr/core/array.py
+++ b/src/zarr/core/array.py
@@ -4721,7 +4721,7 @@ def default_filters_v2(dtype: ZDType[Any, Any]) -> tuple[numcodecs.abc.Codec] |
 
             return (VLenUTF8(),)
         else:
-            msg = f"Data type {dtype} requires an unknown object codec: {dtype.object_codec_id}"
+            msg = f"Data type {dtype} requires an unknown object codec: {dtype.object_codec_id!r}"
             raise ValueError(msg)
     return None
 
@@ -4795,7 +4795,7 @@ def _parse_chunk_encoding_v2(
             elif isinstance(dtype, VariableLengthBytes):
                 codec_name = "the numcodecs.VLenBytes codec"
             else:
-                codec_name = "an unknown object codec"
+                codec_name = f"an unknown object codec with id {dtype.object_codec_id!r}"
             msg = (
                 f"Data type {dtype} requires {codec_name}, "
                 "but no such codec was specified in the filters or compressor parameters for "
diff --git a/tests/test_array.py b/tests/test_array.py
index da9aa7aa30..285df21dd5 100644
--- a/tests/test_array.py
+++ b/tests/test_array.py
@@ -41,17 +41,22 @@
 from zarr.core.chunk_grids import _auto_partition
 from zarr.core.chunk_key_encodings import ChunkKeyEncodingParams
 from zarr.core.common import JSON, MemoryOrder, ZarrFormat
-from zarr.core.dtype import parse_data_type
-from zarr.core.dtype.common import ENDIANNESS_STR, EndiannessStr
-from zarr.core.dtype.npy.common import NUMPY_ENDIANNESS_STR, endianness_from_numpy_str
-from zarr.core.dtype.npy.float import Float32, Float64
-from zarr.core.dtype.npy.int import Int16, UInt8
-from zarr.core.dtype.npy.string import VariableLengthUTF8
-from zarr.core.dtype.npy.structured import (
+from zarr.core.dtype import (
+    DateTime64,
+    Float32,
+    Float64,
+    Int16,
     Structured,
+    TimeDelta64,
+    UInt8,
+    VariableLengthBytes,
+    VariableLengthUTF8,
+    ZDType,
+    parse_data_type,
 )
-from zarr.core.dtype.npy.time import DateTime64, TimeDelta64
-from zarr.core.dtype.wrapper import ZDType
+from zarr.core.dtype.common import ENDIANNESS_STR, EndiannessStr
+from zarr.core.dtype.npy.common import NUMPY_ENDIANNESS_STR, endianness_from_numpy_str
+from zarr.core.dtype.npy.string import UTF8Base
 from zarr.core.group import AsyncGroup
 from zarr.core.indexing import BasicIndexer, ceildiv
 from zarr.core.metadata.v2 import ArrayV2Metadata
@@ -1850,3 +1855,41 @@ def test_array_repr(store: Store) -> None:
     dtype = "uint8"
     arr = zarr.create_array(store, shape=shape, dtype=dtype)
     assert str(arr) == f"<Array {store} shape={shape} dtype={dtype}>"
+
+
+class UnknownObjectDtype(UTF8Base[np.dtypes.ObjectDType]):
+    object_codec_id = "unknown"  # type: ignore[assignment]
+
+    def to_native_dtype(self) -> np.dtypes.ObjectDType:
+        """
+        Create a NumPy object dtype from this VariableLengthUTF8 ZDType.
+
+        Returns
+        -------
+        np.dtypes.ObjectDType
+            The NumPy object dtype.
+        """
+        return np.dtype("o")
+
+
+@pytest.mark.parametrize(
+    "dtype", [VariableLengthUTF8(), VariableLengthBytes(), UnknownObjectDtype()]
+)
+def test_chunk_encoding_no_object_codec_errors(dtype: ZDType[Any, Any]) -> None:
+    """
+    Test that a valuerror is raised when checking the chunk encoding for a v2 array with a
+    data type that requires an object codec, but where no object codec is specified
+    """
+    if isinstance(dtype, VariableLengthUTF8):
+        codec_name = "the numcodecs.VLenUTF8 codec"
+    elif isinstance(dtype, VariableLengthBytes):
+        codec_name = "the numcodecs.VLenBytes codec"
+    else:
+        codec_name = f"an unknown object codec with id {dtype.object_codec_id!r}"
+    msg = (
+        f"Data type {dtype} requires {codec_name}, "
+        "but no such codec was specified in the filters or compressor parameters for "
+        "this array. "
+    )
+    with pytest.raises(ValueError, match=re.escape(msg)):
+        _parse_chunk_encoding_v2(filters=None, compressor=None, dtype=dtype)

From 6bbd4dfc31201d509704c4f8cc0a2841c81dbd06 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Thu, 10 Jul 2025 23:50:52 +0200
Subject: [PATCH 6/9] lint

---
 src/zarr/core/array.py | 8 ++++----
 tests/test_array.py    | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
index fc77567088..608843b861 100644
--- a/src/zarr/core/array.py
+++ b/src/zarr/core/array.py
@@ -4790,10 +4790,10 @@ def _parse_chunk_encoding_v2(
                 )
             )
         if object_codec_id is None:
-            if isinstance(dtype, VariableLengthUTF8):
-                codec_name = "the numcodecs.VLenUTF8 codec"
-            elif isinstance(dtype, VariableLengthBytes):
-                codec_name = "the numcodecs.VLenBytes codec"
+            if isinstance(dtype, VariableLengthUTF8):  # type: ignore[unreachable]
+                codec_name = "the numcodecs.VLenUTF8 codec"  # type: ignore[unreachable]
+            elif isinstance(dtype, VariableLengthBytes):  # type: ignore[unreachable]
+                codec_name = "the numcodecs.VLenBytes codec"  # type: ignore[unreachable]
             else:
                 codec_name = f"an unknown object codec with id {dtype.object_codec_id!r}"
             msg = (
diff --git a/tests/test_array.py b/tests/test_array.py
index 285df21dd5..1aca9ffb7a 100644
--- a/tests/test_array.py
+++ b/tests/test_array.py
@@ -1869,7 +1869,7 @@ def to_native_dtype(self) -> np.dtypes.ObjectDType:
         np.dtypes.ObjectDType
             The NumPy object dtype.
         """
-        return np.dtype("o")
+        return np.dtype("o")  # type: ignore[return-value]
 
 
 @pytest.mark.parametrize(
@@ -1885,7 +1885,7 @@ def test_chunk_encoding_no_object_codec_errors(dtype: ZDType[Any, Any]) -> None:
     elif isinstance(dtype, VariableLengthBytes):
         codec_name = "the numcodecs.VLenBytes codec"
     else:
-        codec_name = f"an unknown object codec with id {dtype.object_codec_id!r}"
+        codec_name = f"an unknown object codec with id {dtype.object_codec_id!r}"  # type: ignore[attr-defined]
     msg = (
         f"Data type {dtype} requires {codec_name}, "
         "but no such codec was specified in the filters or compressor parameters for "

From 0d7f83b129774cfd18460e0c8a056b6a25d01f18 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Fri, 11 Jul 2025 00:03:34 +0200
Subject: [PATCH 7/9] changelog

---
 changes/3228.removal.rst | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)
 create mode 100644 changes/3228.removal.rst

diff --git a/changes/3228.removal.rst b/changes/3228.removal.rst
new file mode 100644
index 0000000000..495d09f44a
--- /dev/null
+++ b/changes/3228.removal.rst
@@ -0,0 +1,16 @@
+Removes default chunk encoding settings (filters, serializer, compressors) from the global
+configuration object.
+
+This removal is justified on the basis that storing chunk encoding settings in the config required
+a brittle, confusing, and inaccurate categorization of array data types, which was particularly
+unsuitable after the recent addition of new data types that didn't fit naturally into the
+pre-existing categories.
+
+The default chunk encoding is the same (Zstandard compression, and the required object codecs for
+variable length data types), but the chunk encoding is now generated by functions that cannot be
+reconfigured at runtime. Users who relied on setting the default chunk encoding via the global configuration object should
+instead specify the desired chunk encoding explicitly when creating an array.
+
+This change also adds an extra validation step to the creation of Zarr V2 arrays, which ensures that
+arrays with a ``VariableLengthUTF8`` or ``VariableLengthBytes`` cannot be created without the
+correct "object codec".
\ No newline at end of file

From 52c1be1628695f52209224d9dae4282e850131cb Mon Sep 17 00:00:00 2001
From: Davis Bennett <davis.v.bennett@gmail.com>
Date: Fri, 11 Jul 2025 10:25:29 +0200
Subject: [PATCH 8/9] Update 3228.removal.rst

---
 changes/3228.removal.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/changes/3228.removal.rst b/changes/3228.removal.rst
index 495d09f44a..0463897755 100644
--- a/changes/3228.removal.rst
+++ b/changes/3228.removal.rst
@@ -12,5 +12,5 @@ reconfigured at runtime. Users who relied on setting the default chunk encoding
 instead specify the desired chunk encoding explicitly when creating an array.
 
 This change also adds an extra validation step to the creation of Zarr V2 arrays, which ensures that
-arrays with a ``VariableLengthUTF8`` or ``VariableLengthBytes`` cannot be created without the
-correct "object codec".
\ No newline at end of file
+arrays with a ``VariableLengthUTF8`` or ``VariableLengthBytes`` data type cannot be created without the
+correct "object codec".

From d61637f69f51c61a4d1ab886031345c7dc95af46 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Fri, 11 Jul 2025 12:21:57 +0200
Subject: [PATCH 9/9] test coverage

---
 src/zarr/core/array.py |  4 ++--
 tests/test_array.py    | 24 ++++++++++++++++++++++++
 2 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
index 608843b861..a4f7fc086a 100644
--- a/src/zarr/core/array.py
+++ b/src/zarr/core/array.py
@@ -4699,7 +4699,7 @@ def default_serializer_v3(dtype: ZDType[Any, Any]) -> ArrayBytesCodec:
         elif dtype.object_codec_id == "vlen-utf8":
             serializer = VLenUTF8Codec()
         else:
-            msg = f"Data type {dtype} requires an unknown object codec: {dtype.object_codec_id}"
+            msg = f"Data type {dtype} requires an unknown object codec: {dtype.object_codec_id!r}."
             raise ValueError(msg)
     return serializer
 
@@ -4721,7 +4721,7 @@ def default_filters_v2(dtype: ZDType[Any, Any]) -> tuple[numcodecs.abc.Codec] |
 
             return (VLenUTF8(),)
         else:
-            msg = f"Data type {dtype} requires an unknown object codec: {dtype.object_codec_id!r}"
+            msg = f"Data type {dtype} requires an unknown object codec: {dtype.object_codec_id!r}."
             raise ValueError(msg)
     return None
 
diff --git a/tests/test_array.py b/tests/test_array.py
index 1aca9ffb7a..4783bca05c 100644
--- a/tests/test_array.py
+++ b/tests/test_array.py
@@ -35,6 +35,8 @@
     _parse_chunk_encoding_v3,
     chunks_initialized,
     create_array,
+    default_filters_v2,
+    default_serializer_v3,
 )
 from zarr.core.buffer import NDArrayLike, NDArrayLikeOrScalar, default_buffer_prototype
 from zarr.core.buffer.cpu import NDBuffer
@@ -1893,3 +1895,25 @@ def test_chunk_encoding_no_object_codec_errors(dtype: ZDType[Any, Any]) -> None:
     )
     with pytest.raises(ValueError, match=re.escape(msg)):
         _parse_chunk_encoding_v2(filters=None, compressor=None, dtype=dtype)
+
+
+def test_unknown_object_codec_default_serializer_v3() -> None:
+    """
+    Test that we get a valueerrror when trying to create the default serializer for a data type
+    that requires an unknown object codec
+    """
+    dtype = UnknownObjectDtype()
+    msg = f"Data type {dtype} requires an unknown object codec: {dtype.object_codec_id!r}."
+    with pytest.raises(ValueError, match=re.escape(msg)):
+        default_serializer_v3(dtype)
+
+
+def test_unknown_object_codec_default_filters_v2() -> None:
+    """
+    Test that we get a valueerrror when trying to create the default serializer for a data type
+    that requires an unknown object codec
+    """
+    dtype = UnknownObjectDtype()
+    msg = f"Data type {dtype} requires an unknown object codec: {dtype.object_codec_id!r}."
+    with pytest.raises(ValueError, match=re.escape(msg)):
+        default_filters_v2(dtype)