From a98451c29412b50a5aca224f2007eb1992b53ecc Mon Sep 17 00:00:00 2001 From: Chuck Daniels Date: Fri, 22 May 2026 15:24:25 -0400 Subject: [PATCH 1/4] Run doctests via pytest and fix broken doctests. Fixes #3498 --- pyproject.toml | 4 +- src/zarr/abc/store.py | 47 ++- src/zarr/api/synchronous.py | 46 ++- src/zarr/codecs/numcodecs/_codecs.py | 16 +- src/zarr/core/array.py | 578 ++++++++++++++------------- src/zarr/core/group.py | 132 +++--- src/zarr/storage/_local.py | 54 ++- src/zarr/storage/_memory.py | 53 ++- 8 files changed, 501 insertions(+), 429 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 837f2f24ab..5f7a21bc3e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -414,7 +414,7 @@ ignore_errors = true [tool.pytest.ini_options] minversion = "7" -testpaths = ["tests", "docs/user-guide"] +testpaths = ["src", "tests", "docs/user-guide"] log_cli_level = "INFO" log_level = "INFO" xfail_strict = true @@ -430,6 +430,8 @@ addopts = [ "--benchmark-disable", # benchmark routines run as tests without benchmarking instrumentation "--durations", "10", "-ra", "--strict-config", "--strict-markers", + "--doctest-modules", + "--ignore=tests/test_regression/scripts", ] filterwarnings = [ "error", diff --git a/src/zarr/abc/store.py b/src/zarr/abc/store.py index 3247649f10..ab58acf59f 100644 --- a/src/zarr/abc/store.py +++ b/src/zarr/abc/store.py @@ -256,12 +256,20 @@ async def _get_bytes( Examples -------- - >>> store = await MemoryStore.open() - >>> await store.set("data", Buffer.from_bytes(b"hello world")) - >>> data = await store.get_bytes("data", prototype=default_buffer_prototype()) - >>> print(data) + >>> async def example(): + ... from zarr.core.buffer.cpu import Buffer + ... from zarr.storage import MemoryStore + ... + ... store = await MemoryStore.open() + ... await store.set("data", Buffer.from_bytes(b"hello world")) + ... # No need to specify prototype for MemoryStore + ... return await store._get_bytes("data") + + >>> import asyncio + >>> asyncio.run(example()) b'hello world' """ + buffer = await self.get(key, prototype, byte_range) if buffer is None: raise FileNotFoundError(key) @@ -309,10 +317,11 @@ def _get_bytes_sync( Examples -------- + >>> from zarr.core.buffer.cpu import Buffer + >>> from zarr.storage import MemoryStore >>> store = MemoryStore() - >>> await store.set("data", Buffer.from_bytes(b"hello world")) - >>> data = store.get_bytes_sync("data", prototype=default_buffer_prototype()) - >>> print(data) + >>> store.set_sync("data", Buffer.from_bytes(b"hello world")) + >>> store._get_bytes_sync("data") # No need to specify prototype for MemoryStore b'hello world' """ @@ -358,11 +367,18 @@ async def _get_json( Examples -------- - >>> store = await MemoryStore.open() - >>> metadata = {"zarr_format": 3, "node_type": "array"} - >>> await store.set("zarr.json", Buffer.from_bytes(json.dumps(metadata).encode())) - >>> data = await store.get_json("zarr.json", prototype=default_buffer_prototype()) - >>> print(data) + >>> async def example(): + ... from zarr.core.buffer.cpu import Buffer + ... from zarr.storage import MemoryStore + ... + ... store = await MemoryStore.open() + ... metadata = {"zarr_format": 3, "node_type": "array"} + ... await store.set("zarr.json", Buffer.from_bytes(json.dumps(metadata).encode())) + ... # No need to specify prototype for MemoryStore + ... return await store._get_json("zarr.json") + + >>> import asyncio + >>> asyncio.run(example()) {'zarr_format': 3, 'node_type': 'array'} """ @@ -414,11 +430,12 @@ def _get_json_sync( Examples -------- + >>> from zarr.core.buffer.cpu import Buffer + >>> from zarr.storage import MemoryStore >>> store = MemoryStore() >>> metadata = {"zarr_format": 3, "node_type": "array"} - >>> store.set("zarr.json", Buffer.from_bytes(json.dumps(metadata).encode())) - >>> data = store.get_json_sync("zarr.json", prototype=default_buffer_prototype()) - >>> print(data) + >>> store.set_sync("zarr.json", Buffer.from_bytes(json.dumps(metadata).encode())) + >>> store._get_json_sync("zarr.json") # No need to specify prototype for MemoryStore {'zarr_format': 3, 'node_type': 'array'} """ diff --git a/src/zarr/api/synchronous.py b/src/zarr/api/synchronous.py index 97d460a183..7916e74e59 100644 --- a/src/zarr/api/synchronous.py +++ b/src/zarr/api/synchronous.py @@ -1126,31 +1126,32 @@ def from_array( -------- Create an array from an existing Array: - ```python - import zarr - store = zarr.storage.MemoryStore() - store2 = zarr.storage.LocalStore('example_from_array.zarr') - arr = zarr.create_array( - store=store, - shape=(100,100), - chunks=(10,10), - dtype='int32', - fill_value=0) - arr2 = zarr.from_array(store2, data=arr, overwrite=True) - # - ``` + >>> import asyncio + >>> import zarr + >>> store = zarr.storage.MemoryStore() + >>> store2 = zarr.storage.LocalStore("example_from_array.zarr") + >>> arr = zarr.create_array( + ... store=store, + ... shape=(100,100), + ... chunks=(10,10), + ... dtype="int32", + ... fill_value=0 + ... ) + >>> arr2 = zarr.from_array(store2, data=arr, overwrite=True) + >>> arr2 + + >>> asyncio.run(store2.clear()) # Remove files generated by test Create an array from an existing NumPy array: - ```python - import zarr - import numpy as np - arr3 = zarr.from_array( - zarr.storage.MemoryStore(), - data=np.arange(10000, dtype='i4').reshape(100, 100), - ) - # - ``` + >>> import zarr + >>> import numpy as np + >>> arr3 = zarr.from_array( + ... zarr.storage.MemoryStore(), + ... data=np.arange(10000, dtype="i4").reshape(100, 100), + ... ) + >>> arr3 + Create an array from any array-like object: @@ -1183,6 +1184,7 @@ def from_array( # array([[0, 0],[0, 0]]) ``` """ + return Array( sync( zarr.core.array.from_array( diff --git a/src/zarr/codecs/numcodecs/_codecs.py b/src/zarr/codecs/numcodecs/_codecs.py index 06c085ad2a..1be1381a08 100644 --- a/src/zarr/codecs/numcodecs/_codecs.py +++ b/src/zarr/codecs/numcodecs/_codecs.py @@ -8,14 +8,16 @@ import zarr import zarr.codecs.numcodecs as numcodecs +store = zarr.storage.MemoryStore() array = zarr.create_array( - store="data_numcodecs.zarr", - shape=(1024, 1024), - chunks=(64, 64), - dtype="uint32", - filters=[numcodecs.Delta(dtype="uint32")], - compressors=[numcodecs.BZ2(level=5)], - overwrite=True) + store=store, + shape=(1024, 1024), + chunks=(64, 64), + dtype="uint32", + filters=[numcodecs.Delta(dtype="uint32")], + compressors=[numcodecs.BZ2(level=5)], + overwrite=True +) array[:] = np.arange(np.prod(array.shape), dtype=array.dtype).reshape(*array.shape) ``` diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 4e9bd6e12f..fdc96e7ff0 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -847,10 +847,13 @@ def read_chunk_sizes(self) -> tuple[tuple[int, ...], ...]: Examples -------- - >>> arr = zarr.create_array(store, shape=(100, 80), chunks=(30, 40)) + >>> import zarr.storage + >>> store = zarr.storage.MemoryStore() + >>> arr = zarr.create_array(store, dtype="i1", shape=(100, 80), chunks=(30, 40)) >>> arr.read_chunk_sizes ((30, 30, 30, 10), (40, 40)) """ + from zarr.codecs.sharding import ShardingCodec codecs: tuple[Codec, ...] = getattr(self.metadata, "codecs", ()) @@ -876,10 +879,13 @@ def write_chunk_sizes(self) -> tuple[tuple[int, ...], ...]: Examples -------- - >>> arr = zarr.create_array(store, shape=(100, 80), chunks=(30, 40)) + >>> import zarr.storage + >>> store = zarr.storage.MemoryStore() + >>> arr = zarr.create_array(store, dtype="i1", shape=(100, 80), chunks=(30, 40)) >>> arr.write_chunk_sizes ((30, 30, 30, 10), (40, 40)) """ + return self._chunk_grid.chunk_sizes @property @@ -1453,26 +1459,25 @@ async def getitem( Examples -------- - ```python - import asyncio - import zarr.api.asynchronous - - async def example(): - store = zarr.storage.MemoryStore() - async_arr = await zarr.api.asynchronous.create_array( - store=store, - shape=(100,100), - chunks=(10,10), - dtype='i4', - fill_value=0) - result = await async_arr.getitem((0,1)) - print(result) - #> 0 - return result - - value = asyncio.run(example()) - ``` + >>> async def example(): + ... import zarr.api.asynchronous + ... import zarr.storage + ... + ... store = zarr.storage.MemoryStore() + ... async_arr = await zarr.api.asynchronous.create_array( + ... store=store, + ... shape=(100,100), + ... chunks=(10,10), + ... dtype="i4", + ... fill_value=0, + ... ) + ... return await async_arr.getitem((0,1)) + + >>> import asyncio + >>> asyncio.run(example()) + np.int32(0) """ + return await _getitem( self.store_path, self.metadata, @@ -1730,20 +1735,25 @@ def info(self) -> Any: Examples -------- - - >>> arr = await zarr.api.asynchronous.create( - ... path="array", shape=(3, 4, 5), chunks=(2, 2, 2)) + >>> import asyncio + >>> arr = asyncio.run( + ... zarr.api.asynchronous.create( + ... path="array", shape=(3, 4, 5), chunks=(2, 2, 2) + ... ) ... ) >>> arr.info Type : Array Zarr format : 3 - Data type : DataType.float64 + Data type : Float64(endianness='little') + Fill value : 0.0 Shape : (3, 4, 5) Chunk shape : (2, 2, 2) Order : C Read-only : False Store type : MemoryStore - Codecs : [{'endian': }] + Filters : () + Serializer : BytesCodec(endian=) + Compressors : (ZstdCodec(level=0, checksum=False),) No. bytes : 480 """ return self._info() @@ -2002,7 +2012,9 @@ def read_chunk_sizes(self) -> tuple[tuple[int, ...], ...]: Examples -------- - >>> arr = zarr.open_array(store) + >>> import zarr + >>> store = zarr.storage.MemoryStore() + >>> arr = zarr.create_array(store, dtype="i1", shape=(100, 80), chunks=(30, 40)) >>> arr.read_chunk_sizes ((30, 30, 30, 10), (40, 40)) """ @@ -2025,7 +2037,9 @@ def write_chunk_sizes(self) -> tuple[tuple[int, ...], ...]: Examples -------- - >>> arr = zarr.open_array(store) + >>> import zarr + >>> store = zarr.storage.MemoryStore() + >>> arr = zarr.create_array(store, dtype="i1", shape=(100, 80), chunks=(30, 40)) >>> arr.write_chunk_sizes ((30, 30, 30, 10), (40, 40)) """ @@ -2255,7 +2269,7 @@ def nchunks_initialized(self) -> int: Examples -------- - >>> arr = zarr.create_array(store={}, shape=(10,), chunks=(1,), shards=(2,)) + >>> arr = zarr.create_array(store={}, dtype="i1", shape=(10,), chunks=(1,), shards=(2,)) >>> arr.nchunks_initialized 0 >>> arr[:5] = 1 @@ -2277,11 +2291,11 @@ def _nshards_initialized(self) -> int: Examples -------- - >>> arr = await zarr.create(shape=(10,), chunks=(2,)) + >>> arr = zarr.create(shape=(10,), chunks=(2,)) >>> arr._nshards_initialized 0 >>> arr[:5] = 1 - >>> arr._nshard_initialized + >>> arr._nshards_initialized 3 """ return sync(self.async_array._nshards_initialized()) @@ -2449,66 +2463,66 @@ def __getitem__(self, selection: Selection) -> NDArrayLikeOrScalar: Examples -------- - Setup a 1-dimensional array:: + Setup a 1-dimensional array: >>> import zarr >>> import numpy as np >>> data = np.arange(100, dtype="uint16") >>> z = zarr.create_array( - >>> StorePath(MemoryStore(mode="w")), - >>> shape=data.shape, - >>> chunks=(10,), - >>> dtype=data.dtype, - >>> ) + ... zarr.storage.MemoryStore(), + ... shape=data.shape, + ... chunks=(10,), + ... dtype=data.dtype, + ... ) >>> z[:] = data - Retrieve a single item:: + Retrieve a single item: >>> z[5] - 5 + array(5, dtype=uint16) - Retrieve a region via slicing:: + Retrieve a region via slicing: >>> z[:5] - array([0, 1, 2, 3, 4]) + array([0, 1, 2, 3, 4], dtype=uint16) >>> z[-5:] - array([95, 96, 97, 98, 99]) + array([95, 96, 97, 98, 99], dtype=uint16) >>> z[5:10] - array([5, 6, 7, 8, 9]) + array([5, 6, 7, 8, 9], dtype=uint16) >>> z[5:10:2] - array([5, 7, 9]) + array([5, 7, 9], dtype=uint16) >>> z[::2] - array([ 0, 2, 4, ..., 94, 96, 98]) + array([ 0, 2, 4, ..., 94, 96, 98], dtype=uint16) - Load the entire array into memory:: + Load the entire array into memory: >>> z[...] - array([ 0, 1, 2, ..., 97, 98, 99]) + array([ 0, 1, 2, ..., 97, 98, 99], dtype=uint16) - Setup a 2-dimensional array:: + Setup a 2-dimensional array: >>> data = np.arange(100, dtype="uint16").reshape(10, 10) >>> z = zarr.create_array( - >>> StorePath(MemoryStore(mode="w")), - >>> shape=data.shape, - >>> chunks=(10, 10), - >>> dtype=data.dtype, - >>> ) + ... zarr.storage.MemoryStore(), + ... shape=data.shape, + ... chunks=(10, 10), + ... dtype=data.dtype, + ... ) >>> z[:] = data - Retrieve an item:: + Retrieve an item: >>> z[2, 2] - 22 + array(22, dtype=uint16) - Retrieve a region via slicing:: + Retrieve a region via slicing: >>> z[1:3, 1:3] array([[11, 12], - [21, 22]]) + [21, 22]], dtype=uint16) >>> z[1:3, :] array([[10, 11, 12, 13, 14, 15, 16, 17, 18, 19], - [20, 21, 22, 23, 24, 25, 26, 27, 28, 29]]) + [20, 21, 22, 23, 24, 25, 26, 27, 28, 29]], dtype=uint16) >>> z[:, 1:3] array([[ 1, 2], [11, 12], @@ -2519,19 +2533,19 @@ def __getitem__(self, selection: Selection) -> NDArrayLikeOrScalar: [61, 62], [71, 72], [81, 82], - [91, 92]]) + [91, 92]], dtype=uint16) >>> z[0:5:2, 0:5:2] array([[ 0, 2, 4], [20, 22, 24], - [40, 42, 44]]) + [40, 42, 44]], dtype=uint16) >>> z[::2, ::2] array([[ 0, 2, 4, 6, 8], [20, 22, 24, 26, 28], [40, 42, 44, 46, 48], [60, 62, 64, 66, 68], - [80, 82, 84, 86, 88]]) + [80, 82, 84, 86, 88]], dtype=uint16) - Load the entire array into memory:: + Load the entire array into memory: >>> z[...] array([[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9], @@ -2543,7 +2557,7 @@ def __getitem__(self, selection: Selection) -> NDArrayLikeOrScalar: [60, 61, 62, 63, 64, 65, 66, 67, 68, 69], [70, 71, 72, 73, 74, 75, 76, 77, 78, 79], [80, 81, 82, 83, 84, 85, 86, 87, 88, 89], - [90, 91, 92, 93, 94, 95, 96, 97, 98, 99]]) + [90, 91, 92, 93, 94, 95, 96, 97, 98, 99]], dtype=uint16) Notes ----- @@ -2576,8 +2590,8 @@ def __getitem__(self, selection: Selection) -> NDArrayLikeOrScalar: [get_orthogonal_selection][zarr.Array.get_orthogonal_selection], [set_orthogonal_selection][zarr.Array.set_orthogonal_selection], [get_block_selection][zarr.Array.get_block_selection], [set_block_selection][zarr.Array.set_block_selection], [vindex][zarr.Array.vindex], [oindex][zarr.Array.oindex], [blocks][zarr.Array.blocks], [__setitem__][zarr.Array.__setitem__] - """ + fields, pure_selection = pop_fields(selection) if is_pure_fancy_indexing(pure_selection, self.ndim): return self.vindex[cast("CoordinateSelection | MaskSelection", selection)] @@ -2599,43 +2613,43 @@ def __setitem__(self, selection: Selection, value: npt.ArrayLike) -> None: Examples -------- - Setup a 1-dimensional array:: + Setup a 1-dimensional array: >>> import zarr >>> z = zarr.zeros( - >>> shape=(100,), - >>> store=StorePath(MemoryStore(mode="w")), - >>> chunk_shape=(5,), - >>> dtype="i4", - >>> ) + ... shape=(100,), + ... store=zarr.storage.MemoryStore(), + ... chunk_shape=(5,), + ... dtype="i4", + ... ) - Set all array elements to the same scalar value:: + Set all array elements to the same scalar value: >>> z[...] = 42 >>> z[...] - array([42, 42, 42, ..., 42, 42, 42]) + array([42, 42, 42, ..., 42, 42, 42], dtype=int32) - Set a portion of the array:: + Set a portion of the array: >>> z[:10] = np.arange(10) >>> z[-10:] = np.arange(10)[::-1] >>> z[...] - array([ 0, 1, 2, ..., 2, 1, 0]) + array([ 0, 1, 2, ..., 2, 1, 0], dtype=int32) - Setup a 2-dimensional array:: + Setup a 2-dimensional array: >>> z = zarr.zeros( - >>> shape=(5, 5), - >>> store=StorePath(MemoryStore(mode="w")), - >>> chunk_shape=(5, 5), - >>> dtype="i4", - >>> ) + ... shape=(5, 5), + ... store=zarr.storage.MemoryStore(), + ... chunk_shape=(5, 5), + ... dtype="i4", + ... ) - Set all array elements to the same scalar value:: + Set all array elements to the same scalar value: >>> z[...] = 42 - Set a portion of the array:: + Set a portion of the array: >>> z[0, :] = np.arange(z.shape[1]) >>> z[:, 0] = np.arange(z.shape[0]) @@ -2644,7 +2658,7 @@ def __setitem__(self, selection: Selection, value: npt.ArrayLike) -> None: [ 1, 42, 42, 42, 42], [ 2, 42, 42, 42, 42], [ 3, 42, 42, 42, 42], - [ 4, 42, 42, 42, 42]]) + [ 4, 42, 42, 42, 42]], dtype=int32) Notes ----- @@ -2711,8 +2725,8 @@ def get_basic_selection( Parameters ---------- - selection : tuple - A tuple specifying the requested item or region for each dimension of the + selection : BasicSelection + A selection specifying the requested item or region for each dimension of the array. May be any combination of int and/or slice or ellipsis for multidimensional arrays. out : NDBuffer, optional If given, load the selected data directly into this buffer. @@ -2729,67 +2743,67 @@ def get_basic_selection( Examples -------- - Setup a 1-dimensional array:: + Setup a 1-dimensional array: >>> import zarr >>> import numpy as np >>> data = np.arange(100, dtype="uint16") >>> z = zarr.create_array( - >>> StorePath(MemoryStore(mode="w")), - >>> shape=data.shape, - >>> chunks=(3,), - >>> dtype=data.dtype, - >>> ) + ... zarr.storage.MemoryStore(), + ... shape=data.shape, + ... chunks=(3,), + ... dtype=data.dtype, + ... ) >>> z[:] = data - Retrieve a single item:: + Retrieve a single item: >>> z.get_basic_selection(5) - 5 + np.uint16(5) - Retrieve a region via slicing:: + Retrieve a region via slicing: >>> z.get_basic_selection(slice(5)) - array([0, 1, 2, 3, 4]) + array([0, 1, 2, 3, 4], dtype=uint16) >>> z.get_basic_selection(slice(-5, None)) - array([95, 96, 97, 98, 99]) + array([95, 96, 97, 98, 99], dtype=uint16) >>> z.get_basic_selection(slice(5, 10)) - array([5, 6, 7, 8, 9]) + array([5, 6, 7, 8, 9], dtype=uint16) >>> z.get_basic_selection(slice(5, 10, 2)) - array([5, 7, 9]) + array([5, 7, 9], dtype=uint16) >>> z.get_basic_selection(slice(None, None, 2)) - array([ 0, 2, 4, ..., 94, 96, 98]) + array([ 0, 2, 4, ..., 94, 96, 98], dtype=uint16) - Setup a 3-dimensional array:: + Setup a 3-dimensional array: >>> data = np.arange(1000).reshape(10, 10, 10) >>> z = zarr.create_array( - >>> StorePath(MemoryStore(mode="w")), - >>> shape=data.shape, - >>> chunks=(5, 5, 5), - >>> dtype=data.dtype, - >>> ) + ... zarr.storage.MemoryStore(), + ... shape=data.shape, + ... chunks=(5, 5, 5), + ... dtype=data.dtype, + ... ) >>> z[:] = data - Retrieve an item:: + Retrieve an item: >>> z.get_basic_selection((1, 2, 3)) - 123 + np.int64(123) - Retrieve a region via slicing and Ellipsis:: + Retrieve a region via slicing and Ellipsis: >>> z.get_basic_selection((slice(1, 3), slice(1, 3), 0)) array([[110, 120], [210, 220]]) - >>> z.get_basic_selection(0, (slice(1, 3), slice(None))) + >>> z.get_basic_selection((0, slice(1, 3), slice(None))) array([[10, 11, 12, 13, 14, 15, 16, 17, 18, 19], [20, 21, 22, 23, 24, 25, 26, 27, 28, 29]]) - >>> z.get_basic_selection((..., 5)) - array([[ 2 12 22 32 42 52 62 72 82 92] - [102 112 122 132 142 152 162 172 182 192] + >>> z.get_basic_selection((..., 2)) + array([[ 2, 12, 22, 32, 42, 52, 62, 72, 82, 92], + [102, 112, 122, 132, 142, 152, 162, 172, 182, 192], ... - [802 812 822 832 842 852 862 872 882 892] - [902 912 922 932 942 952 962 972 982 992]] + [802, 812, 822, 832, 842, 852, 862, 872, 882, 892], + [902, 912, 922, 932, 942, 952, 962, 972, 982, 992]]) Notes ----- @@ -2856,43 +2870,43 @@ def set_basic_selection( Examples -------- - Setup a 1-dimensional array:: + Setup a 1-dimensional array: >>> import zarr >>> z = zarr.zeros( - >>> shape=(100,), - >>> store=StorePath(MemoryStore(mode="w")), - >>> chunk_shape=(100,), - >>> dtype="i4", - >>> ) + ... shape=(100,), + ... store=zarr.storage.MemoryStore(), + ... chunk_shape=(100,), + ... dtype="i4", + ... ) - Set all array elements to the same scalar value:: + Set all array elements to the same scalar value: >>> z.set_basic_selection(..., 42) >>> z[...] - array([42, 42, 42, ..., 42, 42, 42]) + array([42, 42, 42, ..., 42, 42, 42], dtype=int32) - Set a portion of the array:: + Set a portion of the array: >>> z.set_basic_selection(slice(10), np.arange(10)) >>> z.set_basic_selection(slice(-10, None), np.arange(10)[::-1]) >>> z[...] - array([ 0, 1, 2, ..., 2, 1, 0]) + array([ 0, 1, 2, ..., 2, 1, 0], dtype=int32) - Setup a 2-dimensional array:: + Setup a 2-dimensional array: >>> z = zarr.zeros( - >>> shape=(5, 5), - >>> store=StorePath(MemoryStore(mode="w")), - >>> chunk_shape=(5, 5), - >>> dtype="i4", - >>> ) + ... shape=(5, 5), + ... store=zarr.storage.MemoryStore(), + ... chunk_shape=(5, 5), + ... dtype="i4", + ... ) - Set all array elements to the same scalar value:: + Set all array elements to the same scalar value: >>> z.set_basic_selection(..., 42) - Set a portion of the array:: + Set a portion of the array: >>> z.set_basic_selection((0, slice(None)), np.arange(z.shape[1])) >>> z.set_basic_selection((slice(None), 0), np.arange(z.shape[0])) @@ -2901,7 +2915,7 @@ def set_basic_selection( [ 1, 42, 42, 42, 42], [ 2, 42, 42, 42, 42], [ 3, 42, 42, 42, 42], - [ 4, 42, 42, 42, 42]]) + [ 4, 42, 42, 42, 42]], dtype=int32) Notes ----- @@ -2967,21 +2981,21 @@ def get_orthogonal_selection( Examples -------- - Setup a 2-dimensional array:: + Setup a 2-dimensional array: >>> import zarr >>> import numpy as np >>> data = np.arange(100).reshape(10, 10) >>> z = zarr.create_array( - >>> StorePath(MemoryStore(mode="w")), - >>> shape=data.shape, - >>> chunks=data.shape, - >>> dtype=data.dtype, - >>> ) + ... zarr.storage.MemoryStore(), + ... shape=data.shape, + ... chunks=data.shape, + ... dtype=data.dtype, + ... ) >>> z[:] = data Retrieve rows and columns via any combination of int, slice, integer array and/or - Boolean array:: + Boolean array: >>> z.get_orthogonal_selection(([1, 4], slice(None))) array([[10, 11, 12, 13, 14, 15, 16, 17, 18, 19], @@ -3008,7 +3022,7 @@ def get_orthogonal_selection( [41, 44]]) For convenience, the orthogonal selection functionality is also available via the - `oindex` property, e.g.:: + `oindex` property, e.g.: >>> z.oindex[[1, 4], :] array([[10, 11, 12, 13, 14, 15, 16, 17, 18, 19], @@ -3091,18 +3105,18 @@ def set_orthogonal_selection( Examples -------- - Setup a 2-dimensional array:: + Setup a 2-dimensional array: >>> import zarr >>> z = zarr.zeros( - >>> shape=(5, 5), - >>> store=StorePath(MemoryStore(mode="w")), - >>> chunk_shape=(5, 5), - >>> dtype="i4", - >>> ) + ... shape=(5, 5), + ... store=zarr.storage.MemoryStore(), + ... chunk_shape=(5, 5), + ... dtype="i4", + ... ) - Set data for a selection of rows:: + Set data for a selection of rows: >>> z.set_orthogonal_selection(([1, 4], slice(None)), 1) >>> z[...] @@ -3110,9 +3124,9 @@ def set_orthogonal_selection( [1, 1, 1, 1, 1], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], - [1, 1, 1, 1, 1]]) + [1, 1, 1, 1, 1]], dtype=int32) - Set data for a selection of columns:: + Set data for a selection of columns: >>> z.set_orthogonal_selection((slice(None), [1, 4]), 2) >>> z[...] @@ -3120,9 +3134,9 @@ def set_orthogonal_selection( [1, 2, 1, 1, 2], [0, 2, 0, 0, 2], [0, 2, 0, 0, 2], - [1, 2, 1, 1, 2]]) + [1, 2, 1, 1, 2]], dtype=int32) - Set data for a selection of rows and columns:: + Set data for a selection of rows and columns: >>> z.set_orthogonal_selection(([1, 4], [1, 4]), 3) >>> z[...] @@ -3130,9 +3144,9 @@ def set_orthogonal_selection( [1, 3, 1, 1, 3], [0, 2, 0, 0, 2], [0, 2, 0, 0, 2], - [1, 3, 1, 1, 3]]) + [1, 3, 1, 1, 3]], dtype=int32) - Set data from a 2D array:: + Set data from a 2D array: >>> values = np.arange(10).reshape(2, 5) >>> z.set_orthogonal_selection(([0, 3], ...), values) @@ -3141,10 +3155,9 @@ def set_orthogonal_selection( [1, 3, 1, 1, 3], [0, 2, 0, 0, 2], [5, 6, 7, 8, 9], - [1, 3, 1, 1, 3]]) + [1, 3, 1, 1, 3]], dtype=int32) - For convenience, this functionality is also available via the `oindex` property. - E.g.:: + For convenience, this functionality is also available via the `oindex` property: >>> z.oindex[[1, 4], [1, 4]] = 4 >>> z[...] @@ -3152,7 +3165,7 @@ def set_orthogonal_selection( [1, 4, 1, 1, 4], [0, 2, 0, 0, 2], [5, 6, 7, 8, 9], - [1, 4, 1, 1, 4]]) + [1, 4, 1, 1, 4]], dtype=int32) Notes ----- @@ -3214,20 +3227,20 @@ def get_mask_selection( Examples -------- - Setup a 2-dimensional array:: + Setup a 2-dimensional array: >>> import zarr >>> import numpy as np >>> data = np.arange(100).reshape(10, 10) >>> z = zarr.create_array( - >>> StorePath(MemoryStore(mode="w")), - >>> shape=data.shape, - >>> chunks=data.shape, - >>> dtype=data.dtype, - >>> ) + ... zarr.storage.MemoryStore(), + ... shape=data.shape, + ... chunks=data.shape, + ... dtype=data.dtype, + ... ) >>> z[:] = data - Retrieve items by specifying a mask:: + Retrieve items by specifying a mask: >>> sel = np.zeros_like(z, dtype=bool) >>> sel[1, 1] = True @@ -3236,7 +3249,7 @@ def get_mask_selection( array([11, 44]) For convenience, the mask selection functionality is also available via the - `vindex` property, e.g.:: + `vindex` property: >>> z.vindex[sel] array([11, 44]) @@ -3297,17 +3310,17 @@ def set_mask_selection( Examples -------- - Setup a 2-dimensional array:: + Setup a 2-dimensional array: >>> import zarr >>> z = zarr.zeros( - >>> shape=(5, 5), - >>> store=StorePath(MemoryStore(mode="w")), - >>> chunk_shape=(5, 5), - >>> dtype="i4", - >>> ) + ... shape=(5, 5), + ... store=zarr.storage.MemoryStore(), + ... chunk_shape=(5, 5), + ... dtype="i4", + ... ) - Set data for a selection of items:: + Set data for a selection of items: >>> sel = np.zeros_like(z, dtype=bool) >>> sel[1, 1] = True @@ -3318,10 +3331,9 @@ def set_mask_selection( [0, 1, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], - [0, 0, 0, 0, 1]]) + [0, 0, 0, 0, 1]], dtype=int32) - For convenience, this functionality is also available via the `vindex` property. - E.g.:: + For convenience, this functionality is also available via the `vindex` property: >>> z.vindex[sel] = 2 >>> z[...] @@ -3329,7 +3341,7 @@ def set_mask_selection( [0, 2, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], - [0, 0, 0, 0, 2]]) + [0, 0, 0, 0, 2]], dtype=int32) Notes ----- @@ -3388,29 +3400,29 @@ def get_coordinate_selection( Examples -------- - Setup a 2-dimensional array:: + Setup a 2-dimensional array: >>> import zarr >>> import numpy as np >>> data = np.arange(0, 100, dtype="uint16").reshape((10, 10)) >>> z = zarr.create_array( - >>> StorePath(MemoryStore(mode="w")), - >>> shape=data.shape, - >>> chunks=(3, 3), - >>> dtype=data.dtype, - >>> ) + ... zarr.storage.MemoryStore(), + ... shape=data.shape, + ... chunks=(3, 3), + ... dtype=data.dtype, + ... ) >>> z[:] = data - Retrieve items by specifying their coordinates:: + Retrieve items by specifying their coordinates: >>> z.get_coordinate_selection(([1, 4], [1, 4])) - array([11, 44]) + array([11, 44], dtype=uint16) For convenience, the coordinate selection functionality is also available via the - `vindex` property, e.g.:: + `vindex` property: >>> z.vindex[[1, 4], [1, 4]] - array([11, 44]) + array([11, 44], dtype=uint16) Notes ----- @@ -3478,17 +3490,17 @@ def set_coordinate_selection( Examples -------- - Setup a 2-dimensional array:: + Setup a 2-dimensional array: >>> import zarr >>> z = zarr.zeros( - >>> shape=(5, 5), - >>> store=StorePath(MemoryStore(mode="w")), - >>> chunk_shape=(5, 5), - >>> dtype="i4", - >>> ) + ... shape=(5, 5), + ... store=zarr.storage.MemoryStore(), + ... chunk_shape=(5, 5), + ... dtype="i4", + ... ) - Set data for a selection of items:: + Set data for a selection of items: >>> z.set_coordinate_selection(([1, 4], [1, 4]), 1) >>> z[...] @@ -3496,10 +3508,9 @@ def set_coordinate_selection( [0, 1, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], - [0, 0, 0, 0, 1]]) + [0, 0, 0, 0, 1]], dtype=int32) - For convenience, this functionality is also available via the `vindex` property. - E.g.:: + For convenience, this functionality is also available via the `vindex` property: >>> z.vindex[[1, 4], [1, 4]] = 2 >>> z[...] @@ -3507,7 +3518,7 @@ def set_coordinate_selection( [0, 2, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], - [0, 0, 0, 0, 2]]) + [0, 0, 0, 0, 2]], dtype=int32) Notes ----- @@ -3590,40 +3601,40 @@ def get_block_selection( Examples -------- - Setup a 2-dimensional array:: + Setup a 2-dimensional array: >>> import zarr >>> import numpy as np >>> data = np.arange(0, 100, dtype="uint16").reshape((10, 10)) >>> z = zarr.create_array( - >>> StorePath(MemoryStore(mode="w")), - >>> shape=data.shape, - >>> chunks=(3, 3), - >>> dtype=data.dtype, - >>> ) + ... zarr.storage.MemoryStore(), + ... shape=data.shape, + ... chunks=(3, 3), + ... dtype=data.dtype, + ... ) >>> z[:] = data - Retrieve items by specifying their block coordinates:: + Retrieve items by specifying their block coordinates: >>> z.get_block_selection((1, slice(None))) array([[30, 31, 32, 33, 34, 35, 36, 37, 38, 39], [40, 41, 42, 43, 44, 45, 46, 47, 48, 49], - [50, 51, 52, 53, 54, 55, 56, 57, 58, 59]]) + [50, 51, 52, 53, 54, 55, 56, 57, 58, 59]], dtype=uint16) - Which is equivalent to:: + Which is equivalent to: >>> z[3:6, :] array([[30, 31, 32, 33, 34, 35, 36, 37, 38, 39], [40, 41, 42, 43, 44, 45, 46, 47, 48, 49], - [50, 51, 52, 53, 54, 55, 56, 57, 58, 59]]) + [50, 51, 52, 53, 54, 55, 56, 57, 58, 59]], dtype=uint16) For convenience, the block selection functionality is also available via the - `blocks` property, e.g.:: + `blocks` property: >>> z.blocks[1] array([[30, 31, 32, 33, 34, 35, 36, 37, 38, 39], [40, 41, 42, 43, 44, 45, 46, 47, 48, 49], - [50, 51, 52, 53, 54, 55, 56, 57, 58, 59]]) + [50, 51, 52, 53, 54, 55, 56, 57, 58, 59]], dtype=uint16) Notes ----- @@ -3633,13 +3644,12 @@ def get_block_selection( Slices are supported. However, only with a step size of one. - Block index arrays may be multidimensional to index multidimensional arrays. - For example:: + Block index arrays may be multidimensional to index multidimensional arrays: >>> z.blocks[0, 1:3] array([[ 3, 4, 5, 6, 7, 8], [13, 14, 15, 16, 17, 18], - [23, 24, 25, 26, 27, 28]]) + [23, 24, 25, 26, 27, 28]], dtype=uint16) Related ------- @@ -3691,17 +3701,17 @@ def set_block_selection( Examples -------- - Set up a 2-dimensional array:: + Set up a 2-dimensional array: >>> import zarr >>> z = zarr.zeros( - >>> shape=(6, 6), - >>> store=StorePath(MemoryStore(mode="w")), - >>> chunk_shape=(2, 2), - >>> dtype="i4", - >>> ) + ... shape=(6, 6), + ... store=zarr.storage.MemoryStore(), + ... chunk_shape=(2, 2), + ... dtype="i4", + ... ) - Set data for a selection of items:: + Set data for a selection of items: >>> z.set_block_selection((1, 0), 1) >>> z[...] @@ -3710,10 +3720,9 @@ def set_block_selection( [1, 1, 0, 0, 0, 0], [1, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0]]) + [0, 0, 0, 0, 0, 0]], dtype=int32) - For convenience, this functionality is also available via the `blocks` property. - E.g.:: + For convenience, this functionality is also available via the `blocks` property: >>> z.blocks[2, 1] = 4 >>> z[...] @@ -3722,7 +3731,7 @@ def set_block_selection( [1, 1, 0, 0, 0, 0], [1, 1, 0, 0, 0, 0], [0, 0, 4, 4, 0, 0], - [0, 0, 4, 4, 0, 0]]) + [0, 0, 4, 4, 0, 0]], dtype=int32) >>> z.blocks[:, 2] = 7 >>> z[...] @@ -3731,7 +3740,7 @@ def set_block_selection( [1, 1, 0, 0, 7, 7], [1, 1, 0, 0, 7, 7], [0, 0, 4, 4, 7, 7], - [0, 0, 4, 4, 7, 7]]) + [0, 0, 4, 4, 7, 7]], dtype=int32) Notes ----- @@ -3849,7 +3858,7 @@ def append(self, data: npt.ArrayLike, axis: int = 0) -> tuple[int, ...]: -------- >>> import numpy as np >>> import zarr - >>> a = np.arange(10000000, dtype='i4').reshape(10000, 1000) + >>> a = np.arange(10000000, dtype="i4").reshape(10000, 1000) >>> z = zarr.array(a, chunks=(1000, 100)) >>> z.shape (10000, 1000) @@ -3913,13 +3922,16 @@ def info(self) -> Any: >>> arr.info Type : Array Zarr format : 3 - Data type : DataType.float32 + Data type : Float32(endianness='little') + Fill value : 0.0 Shape : (10,) Chunk shape : (2,) Order : C Read-only : False Store type : MemoryStore - Codecs : [BytesCodec(endian=)] + Filters : () + Serializer : BytesCodec(endian=) + Compressors : (ZstdCodec(level=0, checksum=False),) No. bytes : 40 """ return self.async_array.info @@ -4161,48 +4173,64 @@ async def from_array( Examples -------- - Create an array from an existing Array:: + Create an array from an existing Array: + >>> import asyncio >>> import zarr >>> store = zarr.storage.MemoryStore() - >>> store2 = zarr.storage.LocalStore('example.zarr') + >>> store2 = zarr.storage.LocalStore("example.zarr") >>> arr = zarr.create_array( - >>> store=store, - >>> shape=(100,100), - >>> chunks=(10,10), - >>> dtype='int32', - >>> fill_value=0) - >>> arr2 = await zarr.api.asynchronous.from_array(store2, data=arr) + ... store=store, + ... shape=(100,100), + ... chunks=(10,10), + ... dtype="int32", + ... fill_value=0, + ... ) + + >>> arr2 = asyncio.run(from_array(store2, data=arr, overwrite=True)) + >>> arr2 + >>> asyncio.run(store2.clear()) # Remove files generated by test + + Create an array from an existing NumPy array: + + >>> arr3 = asyncio.run( + ... from_array( + ... zarr.storage.MemoryStore(), + ... data=np.arange(10000, dtype="i4").reshape(100, 100), + ... ) + ... ) + >>> arr3 + + + Create an array from any array-like object: - Create an array from an existing NumPy array:: - - >>> arr3 = await zarr.api.asynchronous.from_array( - >>> zarr.storage.MemoryStore(), - >>> data=np.arange(10000, dtype='i4').reshape(100, 100), - >>> ) - - - Create an array from any array-like object:: - - >>> arr4 = await zarr.api.asynchronous.from_array( - >>> zarr.storage.MemoryStore(), - >>> data=[[1, 2], [3, 4]], - >>> ) - - >>> await arr4.getitem(...) - array([[1, 2],[3, 4]]) - - Create an array from an existing Array without copying the data:: - - >>> arr5 = await zarr.api.asynchronous.from_array( - >>> zarr.storage.MemoryStore(), - >>> data=Array(arr4), - >>> write_data=False, - >>> ) - - >>> await arr5.getitem(...) - array([[0, 0],[0, 0]]) + >>> arr4 = asyncio.run( + ... from_array( + ... zarr.storage.MemoryStore(), + ... data=[[1, 2], [3, 4]], + ... ) + ... ) + >>> arr4 + + >>> asyncio.run(arr4.getitem(...)) + array([[1, 2], + [3, 4]]) + + Create an array from an existing Array without copying the data: + + >>> arr5 = asyncio.run( + ... from_array( + ... zarr.storage.MemoryStore(), + ... data=Array(arr4), + ... write_data=False, + ... ) + ... ) + >>> arr5 + + >>> asyncio.run(arr5.getitem(...)) + array([[0, 0], + [0, 0]]) """ mode: Literal["a"] = "a" config_parsed = parse_array_config(config) @@ -4645,15 +4673,19 @@ async def create_array( Examples -------- + >>> import asyncio >>> import zarr - >>> store = zarr.storage.MemoryStore(mode='w') - >>> async_arr = await zarr.api.asynchronous.create_array( - >>> store=store, - >>> shape=(100,100), - >>> chunks=(10,10), - >>> dtype='i4', - >>> fill_value=0) - + >>> store = zarr.storage.MemoryStore() + >>> asyncio.run( + ... zarr.api.asynchronous.create_array( + ... store=store, + ... shape=(100,100), + ... chunks=(10,10), + ... dtype="i4", + ... fill_value=0 + ... ) + ... ) + """ data_parsed, shape_parsed, dtype_parsed = _parse_data_params( data=data, shape=shape, dtype=dtype @@ -5067,7 +5099,7 @@ def _parse_data_params( shape_out = shape if dtype is None: msg = ( - "The data parameter was set to None, but dtype was not specified." + "The data parameter was set to None, but dtype was not specified. " "Either provide an array-like value for data, or specify dtype." ) raise ValueError(msg) diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index 213b7fb607..de8c8e9a68 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -1898,12 +1898,13 @@ def __delitem__(self, key: str) -> None: Examples -------- >>> import zarr - >>> group = Group.from_store(zarr.storage.MemoryStore() - >>> group.create_array(name="subarray", shape=(10,), chunks=(10,)) + >>> group = Group.from_store(zarr.storage.MemoryStore()) + >>> a = group.create_array(name="subarray", dtype="i1", shape=(10,), chunks=(10,)) >>> del group["subarray"] >>> "subarray" in group False """ + self._sync(self._async_group.delitem(key)) def __iter__(self) -> Iterator[str]: @@ -1914,14 +1915,10 @@ def __iter__(self) -> Iterator[str]: >>> g1 = zarr.group() >>> g2 = g1.create_group('foo') >>> g3 = g1.create_group('bar') - >>> d1 = g1.create_array('baz', shape=(10,), chunks=(10,)) - >>> d2 = g1.create_array('quux', shape=(10,), chunks=(10,)) - >>> for name in g1: - ... print(name) - baz - bar - foo - quux + >>> d1 = g1.create_array('baz', dtype="i1", shape=(10,), chunks=(10,)) + >>> d2 = g1.create_array('quux', dtype="i1", shape=(10,), chunks=(10,)) + >>> sorted(g1) + ['bar', 'baz', 'foo', 'quux'] """ yield from self.keys() @@ -1944,11 +1941,12 @@ def __setitem__(self, key: str, value: Any) -> None: Examples -------- + >>> import numpy as np >>> import zarr >>> group = zarr.group() - >>> group["foo"] = zarr.zeros((10,)) + >>> group["foo"] = np.array(zarr.zeros((10,))) >>> group["foo"] - + """ self._sync(self._async_group.setitem(key, value)) @@ -1960,10 +1958,15 @@ async def update_attributes_async(self, new_attributes: dict[str, Any]) -> Group Examples -------- - >>> import zarr - >>> group = zarr.group() - >>> await group.update_attributes_async({"foo": "bar"}) - >>> group.attrs.asdict() + >>> async def example(): + ... import zarr + ... + ... group = zarr.group() + ... new_group = await group.update_attributes_async({"foo": "bar"}) + ... return new_group.attrs.asdict() + + >>> import asyncio + >>> asyncio.run(example()) {'foo': 'bar'} """ new_metadata = replace(self.metadata, attributes=new_attributes) @@ -2062,8 +2065,7 @@ def update_attributes(self, new_attributes: dict[str, Any]) -> Group: Examples -------- >>> import zarr - >>> group = zarr.group() - >>> group.update_attributes({"foo": "bar"}) + >>> group = zarr.group().update_attributes({"foo": "bar"}) >>> group.attrs.asdict() {'foo': 'bar'} """ @@ -2169,19 +2171,17 @@ def create_hierarchy( >>> import zarr >>> from zarr.core.group import GroupMetadata >>> root = zarr.create_group(store={}) - >>> for key, val in root.create_hierarchy({'a/b/c': GroupMetadata()}): - ... print(key, val) - ... - - - + >>> sorted(root.create_hierarchy({'a/b/c': GroupMetadata()})) + [('a', ), + ('a/b', ), + ('a/b/c', )] """ for key, node in self._sync_iter( self._async_group.create_hierarchy(nodes, overwrite=overwrite) ): yield (key, _parse_async_node(node)) - def keys(self) -> Generator[str, None]: + def keys(self) -> Generator[str]: """Return an iterator over group member names. Examples @@ -2190,14 +2190,10 @@ def keys(self) -> Generator[str, None]: >>> g1 = zarr.group() >>> g2 = g1.create_group('foo') >>> g3 = g1.create_group('bar') - >>> d1 = g1.create_array('baz', shape=(10,), chunks=(10,)) - >>> d2 = g1.create_array('quux', shape=(10,), chunks=(10,)) - >>> for name in g1.keys(): - ... print(name) - baz - bar - foo - quux + >>> d1 = g1.create_array('baz', dtype="i1", shape=(10,), chunks=(10,)) + >>> d2 = g1.create_array('quux', dtype="i1", shape=(10,), chunks=(10,)) + >>> sorted(g1.keys()) + ['bar', 'baz', 'foo', 'quux'] """ yield from self._sync_iter(self._async_group.keys()) @@ -2209,14 +2205,13 @@ def __contains__(self, member: str) -> bool: >>> import zarr >>> g1 = zarr.group() >>> g2 = g1.create_group('foo') - >>> d1 = g1.create_array('bar', shape=(10,), chunks=(10,)) + >>> d1 = g1.create_array('bar', dtype="i1", shape=(10,), chunks=(10,)) >>> 'foo' in g1 True >>> 'bar' in g1 True >>> 'baz' in g1 False - """ return self._sync(self._async_group.contains(member)) @@ -2227,10 +2222,9 @@ def groups(self) -> Generator[tuple[str, Group], None]: -------- >>> import zarr >>> group = zarr.group() - >>> group.create_group("subgroup") - >>> for name, subgroup in group.groups(): - ... print(name, subgroup) - subgroup + >>> subgroup = group.create_group("subgroup") + >>> list(group.groups()) + [('subgroup', )] """ for name, async_group in self._sync_iter(self._async_group.groups()): yield name, Group(async_group) @@ -2242,10 +2236,9 @@ def group_keys(self) -> Generator[str, None]: -------- >>> import zarr >>> group = zarr.group() - >>> group.create_group("subgroup") - >>> for name in group.group_keys(): - ... print(name) - subgroup + >>> subgroup = group.create_group("subgroup") + >>> list(group.group_keys()) + ['subgroup'] """ for name, _ in self.groups(): yield name @@ -2257,10 +2250,9 @@ def group_values(self) -> Generator[Group, None]: -------- >>> import zarr >>> group = zarr.group() - >>> group.create_group("subgroup") - >>> for subgroup in group.group_values(): - ... print(subgroup) - + >>> subgroup = group.create_group("subgroup") + >>> list(group.group_values()) + [] """ for _, group in self.groups(): yield group @@ -2272,10 +2264,9 @@ def arrays(self) -> Generator[tuple[str, AnyArray], None]: -------- >>> import zarr >>> group = zarr.group() - >>> group.create_array("subarray", shape=(10,), chunks=(10,)) - >>> for name, subarray in group.arrays(): - ... print(name, subarray) - subarray + >>> subarray = group.create_array("subarray", dtype="i1", shape=(10,), chunks=(10,)) + >>> list(group.arrays()) + [('subarray', )] """ for name, async_array in self._sync_iter(self._async_group.arrays()): yield name, Array(async_array) @@ -2287,10 +2278,9 @@ def array_keys(self) -> Generator[str, None]: -------- >>> import zarr >>> group = zarr.group() - >>> group.create_array("subarray", shape=(10,), chunks=(10,)) - >>> for name in group.array_keys(): - ... print(name) - subarray + >>> subarray = group.create_array("subarray", dtype="i1", shape=(10,), chunks=(10,)) + >>> list(group.array_keys()) + ['subarray'] """ for name, _ in self.arrays(): @@ -2303,10 +2293,9 @@ def array_values(self) -> Generator[AnyArray, None]: -------- >>> import zarr >>> group = zarr.group() - >>> group.create_array("subarray", shape=(10,), chunks=(10,)) - >>> for subarray in group.array_values(): - ... print(subarray) - + >>> subarray = group.create_array("subarray", dtype="i1", shape=(10,), chunks=(10,)) + >>> list(group.array_values()) + [] """ for _, array in self.arrays(): yield array @@ -2363,7 +2352,7 @@ def create_group(self, name: str, **kwargs: Any) -> Group: >>> group = zarr.group() >>> subgroup = group.create_group("subgroup") >>> subgroup - + """ return Group(self._sync(self._async_group.create_group(name, **kwargs))) @@ -2929,21 +2918,24 @@ async def create_hierarchy( Yields ------ tuple[str, AsyncGroup | AsyncArray] - This function yields (path, node) pairs, in the order the nodes were created. + Yields (path, node) pairs, in the order the nodes were created. Examples -------- - >>> from zarr.api.asynchronous import create_hierarchy - >>> from zarr.storage import MemoryStore - >>> from zarr.core.group import GroupMetadata + >>> async def example(): + ... from zarr.api.asynchronous import create_hierarchy + ... from zarr.core.group import GroupMetadata + ... from zarr.storage import MemoryStore + ... + ... store = MemoryStore() + ... nodes = {'a': GroupMetadata(attributes={'name': 'leaf'})} + ... return sorted([x async for x in create_hierarchy(store=store, nodes=nodes)]) + >>> import asyncio - >>> store = MemoryStore() - >>> nodes = {'a': GroupMetadata(attributes={'name': 'leaf'})} - >>> async def run(): - ... print(dict([x async for x in create_hierarchy(store=store, nodes=nodes)])) - >>> asyncio.run(run()) - # {'a': , '': } + >>> asyncio.run(example()) + [('', ), ('a', )] """ + # normalize the keys to be valid paths nodes_normed_keys = _normalize_path_keys(nodes) diff --git a/src/zarr/storage/_local.py b/src/zarr/storage/_local.py index 96f1e61746..038de4fef8 100644 --- a/src/zarr/storage/_local.py +++ b/src/zarr/storage/_local.py @@ -397,13 +397,20 @@ async def _get_bytes( Examples -------- - >>> store = await LocalStore.open("data") - >>> await store.set("data", Buffer.from_bytes(b"hello")) - >>> # No need to specify prototype for LocalStore - >>> data = await store.get_bytes("data") - >>> print(data) + >>> async def example(): + ... import json + ... from zarr.core.buffer.cpu import Buffer + ... + ... store = await LocalStore.open("data") + ... await store.set("data", Buffer.from_bytes(b"hello")) + ... # No need to specify prototype for LocalStore + ... return await store._get_bytes("data") + + >>> import asyncio + >>> asyncio.run(example()) b'hello' """ + if prototype is None: prototype = default_buffer_prototype() return await super()._get_bytes(key, prototype=prototype, byte_range=byte_range) @@ -453,13 +460,14 @@ def _get_bytes_sync( Examples -------- + >>> from zarr.core.buffer.cpu import Buffer >>> store = LocalStore("data") - >>> store.set("data", Buffer.from_bytes(b"hello")) + >>> store.set_sync("data", Buffer.from_bytes(b"hello")) >>> # No need to specify prototype for LocalStore - >>> data = store.get_bytes("data") - >>> print(data) + >>> store._get_bytes_sync("data") b'hello' """ + if prototype is None: prototype = default_buffer_prototype() return super()._get_bytes_sync(key, prototype=prototype, byte_range=byte_range) @@ -510,15 +518,21 @@ async def _get_json( Examples -------- - >>> store = await LocalStore.open("data") - >>> import json - >>> metadata = {"zarr_format": 3, "node_type": "array"} - >>> await store.set("zarr.json", Buffer.from_bytes(json.dumps(metadata).encode())) - >>> # No need to specify prototype for LocalStore - >>> data = await store.get_json("zarr.json") - >>> print(data) + >>> async def example(): + ... import json + ... from zarr.core.buffer.cpu import Buffer + ... + ... store = await LocalStore.open("data") + ... metadata = {"zarr_format": 3, "node_type": "array"} + ... await store.set("zarr.json", Buffer.from_bytes(json.dumps(metadata).encode())) + ... # No need to specify prototype for LocalStore + ... return await store._get_json("zarr.json") + + >>> import asyncio + >>> asyncio.run(example()) {'zarr_format': 3, 'node_type': 'array'} """ + if prototype is None: prototype = default_buffer_prototype() return await super()._get_json(key, prototype=prototype, byte_range=byte_range) @@ -573,15 +587,15 @@ def _get_json_sync( Examples -------- - >>> store = LocalStore("data") >>> import json + >>> from zarr.core.buffer.cpu import Buffer + >>> store = LocalStore("data") >>> metadata = {"zarr_format": 3, "node_type": "array"} - >>> store.set("zarr.json", Buffer.from_bytes(json.dumps(metadata).encode())) - >>> # No need to specify prototype for LocalStore - >>> data = store.get_json("zarr.json") - >>> print(data) + >>> store.set_sync("zarr.json", Buffer.from_bytes(json.dumps(metadata).encode())) + >>> store._get_json_sync("zarr.json") # No need to specify prototype for LocalStore {'zarr_format': 3, 'node_type': 'array'} """ + if prototype is None: prototype = default_buffer_prototype() return super()._get_json_sync(key, prototype=prototype, byte_range=byte_range) diff --git a/src/zarr/storage/_memory.py b/src/zarr/storage/_memory.py index bd91029732..121fcdab7f 100644 --- a/src/zarr/storage/_memory.py +++ b/src/zarr/storage/_memory.py @@ -266,13 +266,19 @@ async def _get_bytes( Examples -------- - >>> store = await MemoryStore.open() - >>> await store.set("data", Buffer.from_bytes(b"hello")) - >>> # No need to specify prototype for MemoryStore - >>> data = await store.get_bytes("data") - >>> print(data) + >>> async def example(): + ... from zarr.core.buffer.cpu import Buffer + ... + ... store = await MemoryStore.open() + ... await store.set("data", Buffer.from_bytes(b"hello")) + ... # No need to specify prototype for MemoryStore + ... return await store._get_bytes("data") + + >>> import asyncio + >>> asyncio.run(example()) b'hello' """ + if prototype is None: prototype = default_buffer_prototype() return await super()._get_bytes(key, prototype=prototype, byte_range=byte_range) @@ -322,11 +328,10 @@ def _get_bytes_sync( Examples -------- + >>> from zarr.core.buffer.cpu import Buffer >>> store = MemoryStore() - >>> store.set("data", Buffer.from_bytes(b"hello")) - >>> # No need to specify prototype for MemoryStore - >>> data = store.get_bytes("data") - >>> print(data) + >>> store.set_sync("data", Buffer.from_bytes(b"hello")) + >>> store._get_bytes_sync("data") # No need to specify prototype for MemoryStore b'hello' """ if prototype is None: @@ -379,15 +384,21 @@ async def _get_json( Examples -------- - >>> store = await MemoryStore.open() - >>> import json - >>> metadata = {"zarr_format": 3, "node_type": "array"} - >>> await store.set("zarr.json", Buffer.from_bytes(json.dumps(metadata).encode())) - >>> # No need to specify prototype for MemoryStore - >>> data = await store.get_json("zarr.json") - >>> print(data) + >>> async def example(): + ... import json + ... from zarr.core.buffer.cpu import Buffer + ... + ... store = await MemoryStore.open() + ... metadata = {"zarr_format": 3, "node_type": "array"} + ... await store.set("zarr.json", Buffer.from_bytes(json.dumps(metadata).encode())) + ... # No need to specify prototype for MemoryStore + ... return await store._get_json("zarr.json") + + >>> import asyncio + >>> asyncio.run(example()) {'zarr_format': 3, 'node_type': 'array'} """ + if prototype is None: prototype = default_buffer_prototype() return await super()._get_json(key, prototype=prototype, byte_range=byte_range) @@ -442,15 +453,15 @@ def _get_json_sync( Examples -------- - >>> store = MemoryStore() >>> import json + >>> from zarr.core.buffer.cpu import Buffer + >>> store = MemoryStore() >>> metadata = {"zarr_format": 3, "node_type": "array"} - >>> store.set("zarr.json", Buffer.from_bytes(json.dumps(metadata).encode())) - >>> # No need to specify prototype for MemoryStore - >>> data = store.get_json("zarr.json") - >>> print(data) + >>> store.set_sync("zarr.json", Buffer.from_bytes(json.dumps(metadata).encode())) + >>> store._get_json_sync("zarr.json") # No need to specify prototype for MemoryStore {'zarr_format': 3, 'node_type': 'array'} """ + if prototype is None: prototype = default_buffer_prototype() return super()._get_json_sync(key, prototype=prototype, byte_range=byte_range) From 176a239739cbcbe26bd0d2c642ae8e7d5a440a55 Mon Sep 17 00:00:00 2001 From: Chuck Daniels Date: Fri, 22 May 2026 15:28:12 -0400 Subject: [PATCH 2/4] Add changes file for PR --- changes/4000.misc.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changes/4000.misc.md diff --git a/changes/4000.misc.md b/changes/4000.misc.md new file mode 100644 index 0000000000..17d48d3016 --- /dev/null +++ b/changes/4000.misc.md @@ -0,0 +1 @@ +Run all doctests via pytest and fix all broken doctests. From 098db89b3d9434837d19988966f4da44552efdf6 Mon Sep 17 00:00:00 2001 From: Chuck Daniels Date: Fri, 22 May 2026 15:37:45 -0400 Subject: [PATCH 3/4] Ignore cli sources for doctests --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 5f7a21bc3e..e342e8305c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -432,6 +432,7 @@ addopts = [ "-ra", "--strict-config", "--strict-markers", "--doctest-modules", "--ignore=tests/test_regression/scripts", + "--ignore=src/zarr/_cli", ] filterwarnings = [ "error", From ec9f2e24a16c3d8cf8889c1878ac4ea466fcd458 Mon Sep 17 00:00:00 2001 From: Chuck Daniels Date: Tue, 26 May 2026 15:12:53 -0400 Subject: [PATCH 4/4] Prefer store={} to MemoryStore in doctests --- src/zarr/api/synchronous.py | 52 +++++++---------------- src/zarr/core/array.py | 83 ++++++++++++++----------------------- 2 files changed, 47 insertions(+), 88 deletions(-) diff --git a/src/zarr/api/synchronous.py b/src/zarr/api/synchronous.py index 7916e74e59..8386427b3f 100644 --- a/src/zarr/api/synchronous.py +++ b/src/zarr/api/synchronous.py @@ -1128,61 +1128,41 @@ def from_array( >>> import asyncio >>> import zarr - >>> store = zarr.storage.MemoryStore() - >>> store2 = zarr.storage.LocalStore("example_from_array.zarr") + >>> store = zarr.storage.LocalStore("example_from_array.zarr") >>> arr = zarr.create_array( - ... store=store, + ... store={}, ... shape=(100,100), ... chunks=(10,10), ... dtype="int32", ... fill_value=0 ... ) - >>> arr2 = zarr.from_array(store2, data=arr, overwrite=True) + >>> arr2 = zarr.from_array(store, data=arr, overwrite=True) >>> arr2 - >>> asyncio.run(store2.clear()) # Remove files generated by test + >>> asyncio.run(store.clear()) # Remove files generated by test Create an array from an existing NumPy array: - >>> import zarr >>> import numpy as np - >>> arr3 = zarr.from_array( - ... zarr.storage.MemoryStore(), - ... data=np.arange(10000, dtype="i4").reshape(100, 100), - ... ) - >>> arr3 + >>> zarr.from_array({}, data=np.arange(10000, dtype="i4").reshape(100, 100)) Create an array from any array-like object: - ```python - import zarr - arr4 = zarr.from_array( - zarr.storage.MemoryStore(), - data=[[1, 2], [3, 4]], - ) - # - arr4[...] - # array([[1, 2],[3, 4]]) - ``` + >>> arr3 = zarr.from_array({}, data=[[1, 2], [3, 4]]) + >>> arr3 + + >>> arr3[...] + array([[1, 2], [3, 4]]) Create an array from an existing Array without copying the data: - ```python - import zarr - arr4 = zarr.from_array( - zarr.storage.MemoryStore(), - data=[[1, 2], [3, 4]], - ) - arr5 = zarr.from_array( - zarr.storage.MemoryStore(), - data=arr4, - write_data=False, - ) - # - arr5[...] - # array([[0, 0],[0, 0]]) - ``` + >>> arr4 = zarr.from_array({}, data=[[1, 2], [3, 4]]) + >>> arr5 = zarr.from_array({}, data=arr4, write_data=False) + >>> arr5 + + >>> arr5[...] + array([[0, 0], [0, 0]]) """ return Array( diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index fdc96e7ff0..366c19bb0c 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -847,9 +847,7 @@ def read_chunk_sizes(self) -> tuple[tuple[int, ...], ...]: Examples -------- - >>> import zarr.storage - >>> store = zarr.storage.MemoryStore() - >>> arr = zarr.create_array(store, dtype="i1", shape=(100, 80), chunks=(30, 40)) + >>> arr = zarr.create_array({}, dtype="i1", shape=(100, 80), chunks=(30, 40)) >>> arr.read_chunk_sizes ((30, 30, 30, 10), (40, 40)) """ @@ -880,8 +878,7 @@ def write_chunk_sizes(self) -> tuple[tuple[int, ...], ...]: Examples -------- >>> import zarr.storage - >>> store = zarr.storage.MemoryStore() - >>> arr = zarr.create_array(store, dtype="i1", shape=(100, 80), chunks=(30, 40)) + >>> arr = zarr.create_array({}, dtype="i1", shape=(100, 80), chunks=(30, 40)) >>> arr.write_chunk_sizes ((30, 30, 30, 10), (40, 40)) """ @@ -1463,15 +1460,15 @@ async def getitem( ... import zarr.api.asynchronous ... import zarr.storage ... - ... store = zarr.storage.MemoryStore() ... async_arr = await zarr.api.asynchronous.create_array( - ... store=store, + ... store={}, ... shape=(100,100), ... chunks=(10,10), ... dtype="i4", ... fill_value=0, ... ) - ... return await async_arr.getitem((0,1)) + ... + ... return await async_arr.getitem((0, 1)) >>> import asyncio >>> asyncio.run(example()) @@ -2013,8 +2010,7 @@ def read_chunk_sizes(self) -> tuple[tuple[int, ...], ...]: Examples -------- >>> import zarr - >>> store = zarr.storage.MemoryStore() - >>> arr = zarr.create_array(store, dtype="i1", shape=(100, 80), chunks=(30, 40)) + >>> arr = zarr.create_array({}, dtype="i1", shape=(100, 80), chunks=(30, 40)) >>> arr.read_chunk_sizes ((30, 30, 30, 10), (40, 40)) """ @@ -2038,8 +2034,7 @@ def write_chunk_sizes(self) -> tuple[tuple[int, ...], ...]: Examples -------- >>> import zarr - >>> store = zarr.storage.MemoryStore() - >>> arr = zarr.create_array(store, dtype="i1", shape=(100, 80), chunks=(30, 40)) + >>> arr = zarr.create_array({}, dtype="i1", shape=(100, 80), chunks=(30, 40)) >>> arr.write_chunk_sizes ((30, 30, 30, 10), (40, 40)) """ @@ -2469,7 +2464,7 @@ def __getitem__(self, selection: Selection) -> NDArrayLikeOrScalar: >>> import numpy as np >>> data = np.arange(100, dtype="uint16") >>> z = zarr.create_array( - ... zarr.storage.MemoryStore(), + ... {}, ... shape=data.shape, ... chunks=(10,), ... dtype=data.dtype, @@ -2503,7 +2498,7 @@ def __getitem__(self, selection: Selection) -> NDArrayLikeOrScalar: >>> data = np.arange(100, dtype="uint16").reshape(10, 10) >>> z = zarr.create_array( - ... zarr.storage.MemoryStore(), + ... {}, ... shape=data.shape, ... chunks=(10, 10), ... dtype=data.dtype, @@ -2618,7 +2613,7 @@ def __setitem__(self, selection: Selection, value: npt.ArrayLike) -> None: >>> import zarr >>> z = zarr.zeros( ... shape=(100,), - ... store=zarr.storage.MemoryStore(), + ... store={}, ... chunk_shape=(5,), ... dtype="i4", ... ) @@ -2640,7 +2635,7 @@ def __setitem__(self, selection: Selection, value: npt.ArrayLike) -> None: >>> z = zarr.zeros( ... shape=(5, 5), - ... store=zarr.storage.MemoryStore(), + ... store={}, ... chunk_shape=(5, 5), ... dtype="i4", ... ) @@ -2749,7 +2744,7 @@ def get_basic_selection( >>> import numpy as np >>> data = np.arange(100, dtype="uint16") >>> z = zarr.create_array( - ... zarr.storage.MemoryStore(), + ... {}, ... shape=data.shape, ... chunks=(3,), ... dtype=data.dtype, @@ -2778,7 +2773,7 @@ def get_basic_selection( >>> data = np.arange(1000).reshape(10, 10, 10) >>> z = zarr.create_array( - ... zarr.storage.MemoryStore(), + ... {}, ... shape=data.shape, ... chunks=(5, 5, 5), ... dtype=data.dtype, @@ -2875,7 +2870,7 @@ def set_basic_selection( >>> import zarr >>> z = zarr.zeros( ... shape=(100,), - ... store=zarr.storage.MemoryStore(), + ... store={}, ... chunk_shape=(100,), ... dtype="i4", ... ) @@ -2897,7 +2892,7 @@ def set_basic_selection( >>> z = zarr.zeros( ... shape=(5, 5), - ... store=zarr.storage.MemoryStore(), + ... store={}, ... chunk_shape=(5, 5), ... dtype="i4", ... ) @@ -2987,7 +2982,7 @@ def get_orthogonal_selection( >>> import numpy as np >>> data = np.arange(100).reshape(10, 10) >>> z = zarr.create_array( - ... zarr.storage.MemoryStore(), + ... {}, ... shape=data.shape, ... chunks=data.shape, ... dtype=data.dtype, @@ -3110,7 +3105,7 @@ def set_orthogonal_selection( >>> import zarr >>> z = zarr.zeros( ... shape=(5, 5), - ... store=zarr.storage.MemoryStore(), + ... store={}, ... chunk_shape=(5, 5), ... dtype="i4", ... ) @@ -3233,7 +3228,7 @@ def get_mask_selection( >>> import numpy as np >>> data = np.arange(100).reshape(10, 10) >>> z = zarr.create_array( - ... zarr.storage.MemoryStore(), + ... {}, ... shape=data.shape, ... chunks=data.shape, ... dtype=data.dtype, @@ -3315,7 +3310,7 @@ def set_mask_selection( >>> import zarr >>> z = zarr.zeros( ... shape=(5, 5), - ... store=zarr.storage.MemoryStore(), + ... store={}, ... chunk_shape=(5, 5), ... dtype="i4", ... ) @@ -3406,7 +3401,7 @@ def get_coordinate_selection( >>> import numpy as np >>> data = np.arange(0, 100, dtype="uint16").reshape((10, 10)) >>> z = zarr.create_array( - ... zarr.storage.MemoryStore(), + ... {}, ... shape=data.shape, ... chunks=(3, 3), ... dtype=data.dtype, @@ -3495,7 +3490,7 @@ def set_coordinate_selection( >>> import zarr >>> z = zarr.zeros( ... shape=(5, 5), - ... store=zarr.storage.MemoryStore(), + ... store={}, ... chunk_shape=(5, 5), ... dtype="i4", ... ) @@ -3607,7 +3602,7 @@ def get_block_selection( >>> import numpy as np >>> data = np.arange(0, 100, dtype="uint16").reshape((10, 10)) >>> z = zarr.create_array( - ... zarr.storage.MemoryStore(), + ... {}, ... shape=data.shape, ... chunks=(3, 3), ... dtype=data.dtype, @@ -3706,7 +3701,7 @@ def set_block_selection( >>> import zarr >>> z = zarr.zeros( ... shape=(6, 6), - ... store=zarr.storage.MemoryStore(), + ... store={}, ... chunk_shape=(2, 2), ... dtype="i4", ... ) @@ -4177,40 +4172,31 @@ async def from_array( >>> import asyncio >>> import zarr - >>> store = zarr.storage.MemoryStore() - >>> store2 = zarr.storage.LocalStore("example.zarr") + >>> store = zarr.storage.LocalStore("example.zarr") >>> arr = zarr.create_array( - ... store=store, + ... store={}, ... shape=(100,100), ... chunks=(10,10), ... dtype="int32", ... fill_value=0, ... ) - >>> arr2 = asyncio.run(from_array(store2, data=arr, overwrite=True)) + >>> arr2 = asyncio.run(from_array(store, data=arr, overwrite=True)) >>> arr2 - >>> asyncio.run(store2.clear()) # Remove files generated by test + >>> asyncio.run(store.clear()) # Remove files generated by test Create an array from an existing NumPy array: >>> arr3 = asyncio.run( - ... from_array( - ... zarr.storage.MemoryStore(), - ... data=np.arange(10000, dtype="i4").reshape(100, 100), - ... ) + ... from_array({}, data=np.arange(10000, dtype="i4").reshape(100, 100)) ... ) >>> arr3 Create an array from any array-like object: - >>> arr4 = asyncio.run( - ... from_array( - ... zarr.storage.MemoryStore(), - ... data=[[1, 2], [3, 4]], - ... ) - ... ) + >>> arr4 = asyncio.run(from_array({}, data=[[1, 2], [3, 4]])) >>> arr4 >>> asyncio.run(arr4.getitem(...)) @@ -4219,13 +4205,7 @@ async def from_array( Create an array from an existing Array without copying the data: - >>> arr5 = asyncio.run( - ... from_array( - ... zarr.storage.MemoryStore(), - ... data=Array(arr4), - ... write_data=False, - ... ) - ... ) + >>> arr5 = asyncio.run(from_array({}, data=Array(arr4), write_data=False)) >>> arr5 >>> asyncio.run(arr5.getitem(...)) @@ -4675,10 +4655,9 @@ async def create_array( -------- >>> import asyncio >>> import zarr - >>> store = zarr.storage.MemoryStore() >>> asyncio.run( ... zarr.api.asynchronous.create_array( - ... store=store, + ... store={}, ... shape=(100,100), ... chunks=(10,10), ... dtype="i4",