From 7bc1871b6c79b9b7466d0543548f565928603242 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Thu, 25 Jun 2026 15:21:50 -0400 Subject: [PATCH 01/10] read only store spec --- .../2026-06-25-read-only-array-design.md | 131 ++++++++++++++++++ 1 file changed, 131 insertions(+) create mode 100644 dev-docs/specs/2026-06-25-read-only-array-design.md diff --git a/dev-docs/specs/2026-06-25-read-only-array-design.md b/dev-docs/specs/2026-06-25-read-only-array-design.md new file mode 100644 index 0000000..149e780 --- /dev/null +++ b/dev-docs/specs/2026-06-25-read-only-array-design.md @@ -0,0 +1,131 @@ +# Read-only array via a `ReadOnly` storage adapter + +**Date:** 2026-06-25 +**Status:** Approved, ready for implementation plan + +## Problem + +`PyArray` wraps `Array` — the maximal +zarrs storage trait — and all write methods (`store_chunk`, `store_encoded_chunk`, +`erase_chunk`, `erase_metadata`, `compact_chunk`) live directly on it. + +We want a `read_only()` method that returns an array which **raises at runtime** if +the user attempts to mutate it. The motivation is guarding against *accidental* +mutation of an array to which the user actually has write access. + +zarrs' built-in `Array::readable()` downgrades to `Array`, +a strictly less-capable *type* that `PyArray::new` will not accept (compile error: +`expected trait ReadableWritableListableStorageTraits, found trait +ReadableStorageTraits`). A compile-time-distinct read-only type is also the wrong +model for our future needs: stores like read-only S3 (via `ObjectStore`) or future +Python-protocol-backed stores only know they are read-only **at runtime**, not at +compile time. + +## Decision + +Introduce a `ReadOnly` storage **adapter** that wraps a readable + listable store +and **satisfies the maximal `ReadableWritableListableStorageTraits`** by implementing +its write methods as runtime errors. This keeps the entire existing stack (one +`PyArray` type over one storage trait) unchanged — no new pyclass, no type surgery — +while making writes fail at runtime instead of compile time. + +This works because zarrs provides a blanket impl: any `T: Readable + Writable + +Listable + 'static` automatically implements `ReadableWritableListableStorageTraits` +(`zarrs_storage/src/storage_sync.rs:254`). So an adapter that *delegates* reads and +lists and *errors* on writes transparently fills the maximal-trait slot. + +Rejected alternative — a separate `ReadOnlyArray` pyclass over +`Array`: gives compile-time guarantees but (a) requires +duplicating/factoring all read + metadata methods into shared macros for a second +type, and (b) cannot represent stores whose read-only-ness is only known at runtime, +which is the more important real-world case. + +## Components + +### 1. `ReadOnly` — sync adapter + +New file: `src/storage/read_only.rs`. + +```rust +pub struct ReadOnly(Arc); + +impl ReadOnly { + pub fn new(inner: Arc) -> Self { Self(inner) } +} +``` + +Trait impls (over `T: ?Sized + ReadableListableStorageTraits` as appropriate): + +- `ReadableStorageTraits` — delegate every method to `self.0` + (`get_partial_many`, `size_key`, `supports_get_partial`). +- `ListableStorageTraits` — delegate every method to `self.0` + (`list`, `list_prefix`, `list_dir`, `size_prefix`). +- `WritableStorageTraits` — **every** method returns + `Err(StorageError::ReadOnly)`: + - `set` → `Err(StorageError::ReadOnly)` + - `set_partial_many` → `Err(StorageError::ReadOnly)` + - `erase` → `Err(StorageError::ReadOnly)` + - `erase_prefix` → `Err(StorageError::ReadOnly)` + - `supports_set_partial` → `false` + +`StorageError::ReadOnly` already exists with the message "a write operation was +attempted on a read only store" (`zarrs_storage/src/lib.rs:170`). It flows through +the existing `ZarristaError` conversion and surfaces as a Python exception. + +The blanket impl then yields `ReadableWritableListableStorageTraits` for free. + +### 2. `AsyncReadOnly` — async adapter + +Same file (or `src/storage/read_only.rs` shared). Mirrors `ReadOnly` against the +async traits (`AsyncReadableStorageTraits`, `AsyncListableStorageTraits`, +`AsyncWritableStorageTraits`), whose method set is identical with `async fn`. Write +methods return `Err(StorageError::ReadOnly)`. The async blanket impl +(`storage_async.rs`) yields `AsyncReadableWritableListableStorageTraits`. + +### 3. `PyArray::read_only` (sync) + +Replace the current non-compiling body in `src/array/sync.rs`: + +```rust +fn read_only(&self) -> Self { + let inner = self.inner.storage().readable_listable(); // RWL -> RL + let storage = Arc::new(ReadOnly::new(inner)); // RL -> faked RWL + Self::new(Arc::new(self.inner.with_storage(storage))) +} +``` + +- `Array::storage()` (`zarrs/src/array.rs:710`) → `Arc`. +- `.readable_listable()` (`storage_sync.rs:248`) downgrades to `Arc`. +- `Array::with_storage(storage)` (`zarrs/src/array.rs:420`) rebuilds the `Array` + with the same metadata over the new storage. + +### 4. `PyAsyncArray::read_only` (async) + +Same shape in `src/array/async.rs`, using `AsyncReadOnly` and the async +`readable_listable()`. + +## Data flow + +- Reads (`__getitem__`, `retrieve_array_subset`, `retrieve_chunk`, + `retrieve_encoded_chunk`) and metadata accessors: pass through the adapter + untouched. +- Writes (`store_chunk`, `store_encoded_chunk`, `erase_chunk`, `erase_metadata`, + `compact_chunk`): hit the adapter's erroring write methods and raise + `StorageError::ReadOnly` → Python exception. + +## Out of scope (YAGNI for this pass) + +- A `read_only` boolean / introspection property on the array. Future direction: + expose `array.store`, and put `is_read_only` on that store object instead. +- Wrapping genuinely read-only stores at the `PySyncStorage`/`PyAsyncStorage` + boundary (S3, Python-protocol stores). The `ReadOnly` adapter built here is the + reusable mechanism that will enable it; the boundary wiring is a separate change. + +## Testing + +- `read_only()` returns an array that still reads correctly (round-trip a subset + read equals the source array's read). +- Each write method on a read-only array raises (Python-level assertion that the + expected exception type is raised) for both sync and async. +- A normal (non-read-only) array still writes successfully — no regression. +``` \ No newline at end of file From 69210884624d80aef77a0fb406603e3b4f948614 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Thu, 25 Jun 2026 15:28:35 -0400 Subject: [PATCH 02/10] feat: add Array.read_only via a read-only storage adapter Co-Authored-By: Claude Opus 4.8 (1M context) --- src/array/sync.rs | 25 +++++++++++ src/storage/mod.rs | 2 + src/storage/read_only.rs | 97 ++++++++++++++++++++++++++++++++++++++++ tests/test_read_only.py | 58 ++++++++++++++++++++++++ 4 files changed, 182 insertions(+) create mode 100644 src/storage/read_only.rs create mode 100644 tests/test_read_only.py diff --git a/src/array/sync.rs b/src/array/sync.rs index 38bcc96..9a5f4dc 100644 --- a/src/array/sync.rs +++ b/src/array/sync.rs @@ -9,6 +9,7 @@ use crate::array_bytes::PyArrayBytes; use crate::codec::PyCodecOptions; use crate::decoded_array::DecodedArray; use crate::error::ZarristaResult; +use crate::metadata::PyArrayMetadata; use crate::node::PyNodePath; use crate::storage::PySyncStorage; use pyo3::prelude::*; @@ -50,6 +51,24 @@ impl PyArray { ) } + /// Use the provided metadata to open a new array at `path` in `store`. + /// + /// This does **not** write to the store, use `store_metadata` to write metadata to storage. + #[staticmethod] + #[pyo3( + signature = (metadata, store, path = PyNodePath::root()), + text_signature = "(metadata, store, path='/')" + )] + fn from_metadata( + metadata: PyArrayMetadata, + store: PySyncStorage, + path: PyNodePath, + ) -> ZarristaResult { + let inner = + Array::new_with_metadata(store.into_inner(), path.as_str(), metadata.into_inner())?; + Ok(Self::new(Arc::new(inner))) + } + /// Open the array stored at `path` in `store`. #[staticmethod] #[pyo3( @@ -85,6 +104,12 @@ impl PyArray { Ok(()) } + fn read_only(&self) -> Self { + let inner = self.inner.storage().readable_listable(); + let storage = Arc::new(crate::storage::ReadOnly::new(inner)); + Self::new(Arc::new(self.inner.with_storage(storage))) + } + /// Read a region of the array, using numpy-style basic indexing. /// /// Returns one of the decoded result classes (`Tensor`, `VariableArray`, diff --git a/src/storage/mod.rs b/src/storage/mod.rs index ad51b68..9f0d5fa 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -1,6 +1,8 @@ mod r#async; +mod read_only; mod sync; +pub(crate) use read_only::ReadOnly; pub use r#async::PyAsyncStorage; pub(crate) use sync::PySyncStorage; pub use sync::{PyFilesystemStore, PyMemoryStore}; diff --git a/src/storage/read_only.rs b/src/storage/read_only.rs new file mode 100644 index 0000000..49f4180 --- /dev/null +++ b/src/storage/read_only.rs @@ -0,0 +1,97 @@ +//! A storage adapter that reads and lists transparently but rejects all writes +//! at runtime. It satisfies the maximal `ReadableWritableListableStorageTraits` +//! (via zarrs' blanket impl) so a read-only array stays the same `Array` type as +//! a writable one; mutation attempts surface as `StorageError::ReadOnly`. + +use std::sync::Arc; + +use zarrs::storage::byte_range::{ByteRange, ByteRangeIterator}; +use zarrs::storage::{ + Bytes, ListableStorageTraits, MaybeBytes, MaybeBytesIterator, OffsetBytesIterator, + ReadableListableStorageTraits, ReadableStorageTraits, StorageError, StoreKey, StoreKeys, + StoreKeysPrefixes, StorePrefix, WritableStorageTraits, +}; + +/// Wraps a readable + listable store, rejecting every write with +/// [`StorageError::ReadOnly`]. +pub struct ReadOnly(Arc); + +impl ReadOnly { + pub fn new(inner: Arc) -> Self { + Self(inner) + } +} + +impl ReadableStorageTraits for ReadOnly { + fn get(&self, key: &StoreKey) -> Result { + self.0.get(key) + } + + fn get_partial_many<'a>( + &'a self, + key: &StoreKey, + byte_ranges: ByteRangeIterator<'a>, + ) -> Result, StorageError> { + self.0.get_partial_many(key, byte_ranges) + } + + fn get_partial( + &self, + key: &StoreKey, + byte_range: ByteRange, + ) -> Result { + self.0.get_partial(key, byte_range) + } + + fn size_key(&self, key: &StoreKey) -> Result, StorageError> { + self.0.size_key(key) + } + + fn supports_get_partial(&self) -> bool { + self.0.supports_get_partial() + } +} + +impl ListableStorageTraits for ReadOnly { + fn list(&self) -> Result { + self.0.list() + } + + fn list_prefix(&self, prefix: &StorePrefix) -> Result { + self.0.list_prefix(prefix) + } + + fn list_dir(&self, prefix: &StorePrefix) -> Result { + self.0.list_dir(prefix) + } + + fn size_prefix(&self, prefix: &StorePrefix) -> Result { + self.0.size_prefix(prefix) + } +} + +impl WritableStorageTraits for ReadOnly { + fn set(&self, _key: &StoreKey, _value: Bytes) -> Result<(), StorageError> { + Err(StorageError::ReadOnly) + } + + fn set_partial_many( + &self, + _key: &StoreKey, + _offset_values: OffsetBytesIterator, + ) -> Result<(), StorageError> { + Err(StorageError::ReadOnly) + } + + fn erase(&self, _key: &StoreKey) -> Result<(), StorageError> { + Err(StorageError::ReadOnly) + } + + fn erase_prefix(&self, _prefix: &StorePrefix) -> Result<(), StorageError> { + Err(StorageError::ReadOnly) + } + + fn supports_set_partial(&self) -> bool { + false + } +} diff --git a/tests/test_read_only.py b/tests/test_read_only.py new file mode 100644 index 0000000..9eb7160 --- /dev/null +++ b/tests/test_read_only.py @@ -0,0 +1,58 @@ +"""`Array.read_only()` returns an array that reads normally but raises on writes.""" + +import numpy as np +import pytest + +from zarrista import ArrayBytes, ArrayBuilder, ChunkGrid, DataType, FillValue, MemoryStore +from zarrista.exceptions import ZarristaError + + +def _writable_array(): + """A 4x4 int8 array (single 4x4 chunk, fill 0) created in a MemoryStore.""" + return ( + ArrayBuilder( + ChunkGrid.regular([4, 4], [4, 4]), + DataType.from_string("int8"), + FillValue(b"\x00"), + ) + .create(MemoryStore(), "/a") + ) + + +def test_read_only_still_reads(): + array = _writable_array() + array.store_chunk([0, 0], ArrayBytes(np.arange(16, dtype="int8").tobytes())) + + ro = array.read_only() + + np.testing.assert_array_equal( + ro.retrieve_chunk([0, 0]).to_numpy(), + np.arange(16, dtype="int8").reshape(4, 4), + ) + assert ro.shape == [4, 4] + + +def test_read_only_store_chunk_raises(): + # The underlying StorageError::ReadOnly may be wrapped by the Array layer + # (e.g. into an ArrayError); assert the read-only failure surfaces, not the + # exact wrapper class. + ro = _writable_array().read_only() + with pytest.raises(ZarristaError, match="read only"): + ro.store_chunk([0, 0], ArrayBytes(np.zeros(16, dtype="int8").tobytes())) + + +def test_read_only_erase_metadata_raises(): + # `ArrayBuilder.create` already stored metadata, so erasing it is a real write. + ro = _writable_array().read_only() + with pytest.raises(ZarristaError, match="read only"): + ro.erase_metadata() + + +def test_writable_array_still_writes(): + """The original array (and any non-read-only array) writes without error.""" + array = _writable_array() + array.store_chunk([0, 0], ArrayBytes(np.ones(16, dtype="int8").tobytes())) + np.testing.assert_array_equal( + array.retrieve_chunk([0, 0]).to_numpy(), + np.ones((4, 4), dtype="int8"), + ) From 1f6e69fd331c496a3835637f6c20dd83f1ad4134 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Thu, 25 Jun 2026 15:32:15 -0400 Subject: [PATCH 03/10] feat: add AsyncArray.read_only via an async read-only storage adapter Co-Authored-By: Claude Opus 4.8 (1M context) --- Cargo.lock | 1 + Cargo.toml | 1 + src/array/async.rs | 7 ++++ src/storage/mod.rs | 2 +- src/storage/read_only.rs | 86 ++++++++++++++++++++++++++++++++++++++-- tests/test_read_only.py | 32 +++++++++++++++ 6 files changed, 125 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f1cd5d9..6c0b3c3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5247,6 +5247,7 @@ dependencies = [ "arrow-array", "arrow-buffer", "arrow-schema", + "async-trait", "bytes", "dlpark", "half", diff --git a/Cargo.toml b/Cargo.toml index 97081e7..f5f4093 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,6 +21,7 @@ abi3-py311 = ["pyo3/abi3-py311"] [dependencies] arrow-array = "59" +async-trait = "0.1" arrow-buffer = "59" arrow-schema = "59" bytes = "1.12.0" diff --git a/src/array/async.rs b/src/array/async.rs index ba418f2..fe60cad 100644 --- a/src/array/async.rs +++ b/src/array/async.rs @@ -112,6 +112,13 @@ impl PyAsyncArray { }) } + /// Return a read-only view of this array; writes raise at runtime. + fn read_only(&self) -> Self { + let inner = self.inner.storage().readable_listable(); + let storage = Arc::new(crate::storage::AsyncReadOnly::new(inner)); + Self::new(Arc::new(self.inner.with_storage(storage))) + } + fn erase_metadata<'py>(&self, py: Python<'py>) -> PyResult> { let inner = self.inner.clone(); future_into_py(py, async move { diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 9f0d5fa..a9ec061 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -2,7 +2,7 @@ mod r#async; mod read_only; mod sync; -pub(crate) use read_only::ReadOnly; pub use r#async::PyAsyncStorage; +pub(crate) use read_only::{AsyncReadOnly, ReadOnly}; pub(crate) use sync::PySyncStorage; pub use sync::{PyFilesystemStore, PyMemoryStore}; diff --git a/src/storage/read_only.rs b/src/storage/read_only.rs index 49f4180..b83329f 100644 --- a/src/storage/read_only.rs +++ b/src/storage/read_only.rs @@ -7,9 +7,11 @@ use std::sync::Arc; use zarrs::storage::byte_range::{ByteRange, ByteRangeIterator}; use zarrs::storage::{ - Bytes, ListableStorageTraits, MaybeBytes, MaybeBytesIterator, OffsetBytesIterator, - ReadableListableStorageTraits, ReadableStorageTraits, StorageError, StoreKey, StoreKeys, - StoreKeysPrefixes, StorePrefix, WritableStorageTraits, + AsyncListableStorageTraits, AsyncMaybeBytesIterator, AsyncReadableListableStorageTraits, + AsyncReadableStorageTraits, AsyncWritableStorageTraits, Bytes, ListableStorageTraits, + MaybeBytes, MaybeBytesIterator, OffsetBytesIterator, ReadableListableStorageTraits, + ReadableStorageTraits, StorageError, StoreKey, StoreKeys, StoreKeysPrefixes, StorePrefix, + WritableStorageTraits, }; /// Wraps a readable + listable store, rejecting every write with @@ -95,3 +97,81 @@ impl WritableStorageTraits for ReadOnly { false } } + +/// Async twin of [`ReadOnly`]: reads and lists transparently, rejects writes. +pub struct AsyncReadOnly(Arc); + +impl AsyncReadOnly { + pub fn new(inner: Arc) -> Self { + Self(inner) + } +} + +#[async_trait::async_trait] +impl AsyncReadableStorageTraits for AsyncReadOnly { + async fn get(&self, key: &StoreKey) -> Result { + self.0.get(key).await + } + + async fn get_partial_many<'a>( + &'a self, + key: &StoreKey, + byte_ranges: ByteRangeIterator<'a>, + ) -> Result, StorageError> { + self.0.get_partial_many(key, byte_ranges).await + } + + async fn size_key(&self, key: &StoreKey) -> Result, StorageError> { + self.0.size_key(key).await + } + + fn supports_get_partial(&self) -> bool { + self.0.supports_get_partial() + } +} + +#[async_trait::async_trait] +impl AsyncListableStorageTraits for AsyncReadOnly { + async fn list(&self) -> Result { + self.0.list().await + } + + async fn list_prefix(&self, prefix: &StorePrefix) -> Result { + self.0.list_prefix(prefix).await + } + + async fn list_dir(&self, prefix: &StorePrefix) -> Result { + self.0.list_dir(prefix).await + } + + async fn size_prefix(&self, prefix: &StorePrefix) -> Result { + self.0.size_prefix(prefix).await + } +} + +#[async_trait::async_trait] +impl AsyncWritableStorageTraits for AsyncReadOnly { + async fn set(&self, _key: &StoreKey, _value: Bytes) -> Result<(), StorageError> { + Err(StorageError::ReadOnly) + } + + async fn set_partial_many<'a>( + &'a self, + _key: &StoreKey, + _offset_values: OffsetBytesIterator<'a>, + ) -> Result<(), StorageError> { + Err(StorageError::ReadOnly) + } + + async fn erase(&self, _key: &StoreKey) -> Result<(), StorageError> { + Err(StorageError::ReadOnly) + } + + async fn erase_prefix(&self, _prefix: &StorePrefix) -> Result<(), StorageError> { + Err(StorageError::ReadOnly) + } + + fn supports_set_partial(&self) -> bool { + false + } +} diff --git a/tests/test_read_only.py b/tests/test_read_only.py index 9eb7160..53f11c5 100644 --- a/tests/test_read_only.py +++ b/tests/test_read_only.py @@ -56,3 +56,35 @@ def test_writable_array_still_writes(): array.retrieve_chunk([0, 0]).to_numpy(), np.ones((4, 4), dtype="int8"), ) + + +# --- async --------------------------------------------------------------- + +import zarr # noqa: E402 +from obstore.store import LocalStore # noqa: E402 + +from zarrista import AsyncArray # noqa: E402 + + +async def _async_writable_array(tmp_path): + """A 4x4 int8 array written with zarr-python, opened async over obstore.""" + path = tmp_path / "a.zarr" + z = zarr.create_array(store=str(path), shape=(4, 4), chunks=(4, 4), dtype="int8") + z[:] = np.arange(16, dtype="int8").reshape(4, 4) + return await AsyncArray.open_async(LocalStore(str(path))) + + +async def test_async_read_only_still_reads(tmp_path): + arr = await _async_writable_array(tmp_path) + ro = arr.read_only() + chunk = await ro.retrieve_chunk([0, 0]) + np.testing.assert_array_equal( + chunk.to_numpy(), np.arange(16, dtype="int8").reshape(4, 4) + ) + + +async def test_async_read_only_store_chunk_raises(tmp_path): + arr = await _async_writable_array(tmp_path) + ro = arr.read_only() + with pytest.raises(ZarristaError, match="read only"): + await ro.store_chunk([0, 0], ArrayBytes(np.zeros(16, dtype="int8").tobytes())) From 32bc842e963053e93391d199a25f40b3cd29328d Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Thu, 25 Jun 2026 15:38:17 -0400 Subject: [PATCH 04/10] refactor where read only array lives --- src/storage/async.rs | 85 +++++++++++++++++++ src/storage/mod.rs | 6 +- src/storage/read_only.rs | 177 --------------------------------------- src/storage/sync.rs | 92 +++++++++++++++++++- 4 files changed, 177 insertions(+), 183 deletions(-) delete mode 100644 src/storage/read_only.rs diff --git a/src/storage/async.rs b/src/storage/async.rs index 25d3b90..bad7ee4 100644 --- a/src/storage/async.rs +++ b/src/storage/async.rs @@ -1,12 +1,19 @@ use std::sync::Arc; +use async_trait::async_trait; use object_store::ObjectStore; use pyo3::exceptions::{PyTypeError, PyValueError}; use pyo3::prelude::*; use pyo3::pybacked::PyBackedStr; use pyo3_bytes::PyBytes; use pyo3_object_store::AnyObjectStore; +use zarrs::storage::byte_range::ByteRangeIterator; use zarrs::storage::AsyncReadableWritableListableStorageTraits; +use zarrs::storage::{ + AsyncListableStorageTraits, AsyncMaybeBytesIterator, AsyncReadableListableStorageTraits, + AsyncReadableStorageTraits, AsyncWritableStorageTraits, Bytes, MaybeBytes, OffsetBytesIterator, + StorageError, StoreKey, StoreKeys, StoreKeysPrefixes, StorePrefix, +}; use zarrs_icechunk::AsyncIcechunkStore; use zarrs_object_store::AsyncObjectStore; @@ -123,3 +130,81 @@ impl FromPyObject<'_, '_> for PyAsyncIcechunkStore { Ok(Self(AsyncIcechunkStore::new(session))) } } + +/// An async storage adapter that reads and lists transparently but rejects all writes at runtime. +pub struct AsyncReadOnly(Arc); + +impl AsyncReadOnly { + pub fn new(inner: Arc) -> Self { + Self(inner) + } +} + +#[async_trait] +impl AsyncReadableStorageTraits for AsyncReadOnly { + async fn get(&self, key: &StoreKey) -> Result { + self.0.get(key).await + } + + async fn get_partial_many<'a>( + &'a self, + key: &StoreKey, + byte_ranges: ByteRangeIterator<'a>, + ) -> Result, StorageError> { + self.0.get_partial_many(key, byte_ranges).await + } + + async fn size_key(&self, key: &StoreKey) -> Result, StorageError> { + self.0.size_key(key).await + } + + fn supports_get_partial(&self) -> bool { + self.0.supports_get_partial() + } +} + +#[async_trait] +impl AsyncListableStorageTraits for AsyncReadOnly { + async fn list(&self) -> Result { + self.0.list().await + } + + async fn list_prefix(&self, prefix: &StorePrefix) -> Result { + self.0.list_prefix(prefix).await + } + + async fn list_dir(&self, prefix: &StorePrefix) -> Result { + self.0.list_dir(prefix).await + } + + async fn size_prefix(&self, prefix: &StorePrefix) -> Result { + self.0.size_prefix(prefix).await + } +} + +#[async_trait] +impl AsyncWritableStorageTraits for AsyncReadOnly { + async fn set(&self, _key: &StoreKey, _value: Bytes) -> Result<(), StorageError> { + Err(StorageError::ReadOnly) + } + + async fn set_partial_many<'a>( + &'a self, + _key: &StoreKey, + _offset_values: OffsetBytesIterator<'a>, + ) -> Result<(), StorageError> { + Err(StorageError::ReadOnly) + } + + async fn erase(&self, _key: &StoreKey) -> Result<(), StorageError> { + Err(StorageError::ReadOnly) + } + + async fn erase_prefix(&self, _prefix: &StorePrefix) -> Result<(), StorageError> { + Err(StorageError::ReadOnly) + } + + fn supports_set_partial(&self) -> bool { + false + } +} diff --git a/src/storage/mod.rs b/src/storage/mod.rs index a9ec061..e564e3d 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -1,8 +1,6 @@ mod r#async; -mod read_only; mod sync; -pub use r#async::PyAsyncStorage; -pub(crate) use read_only::{AsyncReadOnly, ReadOnly}; +pub use r#async::{AsyncReadOnly, PyAsyncStorage}; pub(crate) use sync::PySyncStorage; -pub use sync::{PyFilesystemStore, PyMemoryStore}; +pub use sync::{PyFilesystemStore, PyMemoryStore, ReadOnly}; diff --git a/src/storage/read_only.rs b/src/storage/read_only.rs deleted file mode 100644 index b83329f..0000000 --- a/src/storage/read_only.rs +++ /dev/null @@ -1,177 +0,0 @@ -//! A storage adapter that reads and lists transparently but rejects all writes -//! at runtime. It satisfies the maximal `ReadableWritableListableStorageTraits` -//! (via zarrs' blanket impl) so a read-only array stays the same `Array` type as -//! a writable one; mutation attempts surface as `StorageError::ReadOnly`. - -use std::sync::Arc; - -use zarrs::storage::byte_range::{ByteRange, ByteRangeIterator}; -use zarrs::storage::{ - AsyncListableStorageTraits, AsyncMaybeBytesIterator, AsyncReadableListableStorageTraits, - AsyncReadableStorageTraits, AsyncWritableStorageTraits, Bytes, ListableStorageTraits, - MaybeBytes, MaybeBytesIterator, OffsetBytesIterator, ReadableListableStorageTraits, - ReadableStorageTraits, StorageError, StoreKey, StoreKeys, StoreKeysPrefixes, StorePrefix, - WritableStorageTraits, -}; - -/// Wraps a readable + listable store, rejecting every write with -/// [`StorageError::ReadOnly`]. -pub struct ReadOnly(Arc); - -impl ReadOnly { - pub fn new(inner: Arc) -> Self { - Self(inner) - } -} - -impl ReadableStorageTraits for ReadOnly { - fn get(&self, key: &StoreKey) -> Result { - self.0.get(key) - } - - fn get_partial_many<'a>( - &'a self, - key: &StoreKey, - byte_ranges: ByteRangeIterator<'a>, - ) -> Result, StorageError> { - self.0.get_partial_many(key, byte_ranges) - } - - fn get_partial( - &self, - key: &StoreKey, - byte_range: ByteRange, - ) -> Result { - self.0.get_partial(key, byte_range) - } - - fn size_key(&self, key: &StoreKey) -> Result, StorageError> { - self.0.size_key(key) - } - - fn supports_get_partial(&self) -> bool { - self.0.supports_get_partial() - } -} - -impl ListableStorageTraits for ReadOnly { - fn list(&self) -> Result { - self.0.list() - } - - fn list_prefix(&self, prefix: &StorePrefix) -> Result { - self.0.list_prefix(prefix) - } - - fn list_dir(&self, prefix: &StorePrefix) -> Result { - self.0.list_dir(prefix) - } - - fn size_prefix(&self, prefix: &StorePrefix) -> Result { - self.0.size_prefix(prefix) - } -} - -impl WritableStorageTraits for ReadOnly { - fn set(&self, _key: &StoreKey, _value: Bytes) -> Result<(), StorageError> { - Err(StorageError::ReadOnly) - } - - fn set_partial_many( - &self, - _key: &StoreKey, - _offset_values: OffsetBytesIterator, - ) -> Result<(), StorageError> { - Err(StorageError::ReadOnly) - } - - fn erase(&self, _key: &StoreKey) -> Result<(), StorageError> { - Err(StorageError::ReadOnly) - } - - fn erase_prefix(&self, _prefix: &StorePrefix) -> Result<(), StorageError> { - Err(StorageError::ReadOnly) - } - - fn supports_set_partial(&self) -> bool { - false - } -} - -/// Async twin of [`ReadOnly`]: reads and lists transparently, rejects writes. -pub struct AsyncReadOnly(Arc); - -impl AsyncReadOnly { - pub fn new(inner: Arc) -> Self { - Self(inner) - } -} - -#[async_trait::async_trait] -impl AsyncReadableStorageTraits for AsyncReadOnly { - async fn get(&self, key: &StoreKey) -> Result { - self.0.get(key).await - } - - async fn get_partial_many<'a>( - &'a self, - key: &StoreKey, - byte_ranges: ByteRangeIterator<'a>, - ) -> Result, StorageError> { - self.0.get_partial_many(key, byte_ranges).await - } - - async fn size_key(&self, key: &StoreKey) -> Result, StorageError> { - self.0.size_key(key).await - } - - fn supports_get_partial(&self) -> bool { - self.0.supports_get_partial() - } -} - -#[async_trait::async_trait] -impl AsyncListableStorageTraits for AsyncReadOnly { - async fn list(&self) -> Result { - self.0.list().await - } - - async fn list_prefix(&self, prefix: &StorePrefix) -> Result { - self.0.list_prefix(prefix).await - } - - async fn list_dir(&self, prefix: &StorePrefix) -> Result { - self.0.list_dir(prefix).await - } - - async fn size_prefix(&self, prefix: &StorePrefix) -> Result { - self.0.size_prefix(prefix).await - } -} - -#[async_trait::async_trait] -impl AsyncWritableStorageTraits for AsyncReadOnly { - async fn set(&self, _key: &StoreKey, _value: Bytes) -> Result<(), StorageError> { - Err(StorageError::ReadOnly) - } - - async fn set_partial_many<'a>( - &'a self, - _key: &StoreKey, - _offset_values: OffsetBytesIterator<'a>, - ) -> Result<(), StorageError> { - Err(StorageError::ReadOnly) - } - - async fn erase(&self, _key: &StoreKey) -> Result<(), StorageError> { - Err(StorageError::ReadOnly) - } - - async fn erase_prefix(&self, _prefix: &StorePrefix) -> Result<(), StorageError> { - Err(StorageError::ReadOnly) - } - - fn supports_set_partial(&self) -> bool { - false - } -} diff --git a/src/storage/sync.rs b/src/storage/sync.rs index 6d6afe3..250bafa 100644 --- a/src/storage/sync.rs +++ b/src/storage/sync.rs @@ -4,9 +4,13 @@ use pyo3::prelude::*; use std::path::PathBuf; use std::sync::Arc; use zarrs::filesystem::FilesystemStore; +use zarrs::storage::byte_range::{ByteRange, ByteRangeIterator}; use zarrs::storage::store::MemoryStore; -use zarrs::storage::ReadableWritableListableStorageTraits; - +use zarrs::storage::{ + Bytes, ListableStorageTraits, MaybeBytes, MaybeBytesIterator, OffsetBytesIterator, + ReadableListableStorageTraits, ReadableStorageTraits, ReadableWritableListableStorageTraits, + StorageError, StoreKey, StoreKeys, StoreKeysPrefixes, StorePrefix, WritableStorageTraits, +}; /// A zarrista sync store object adapted to the maximal `zarrs` storage trait. pub struct PySyncStorage(Arc); @@ -79,3 +83,87 @@ impl PyMemoryStore { "MemoryStore()".to_string() } } + +/// Wraps a readable + listable store, rejecting every write with +/// [`StorageError::ReadOnly`]. +pub struct ReadOnly(Arc); + +impl ReadOnly { + pub fn new(inner: Arc) -> Self { + Self(inner) + } +} + +impl ReadableStorageTraits for ReadOnly { + fn get(&self, key: &StoreKey) -> Result { + self.0.get(key) + } + + fn get_partial_many<'a>( + &'a self, + key: &StoreKey, + byte_ranges: ByteRangeIterator<'a>, + ) -> Result, StorageError> { + self.0.get_partial_many(key, byte_ranges) + } + + fn get_partial( + &self, + key: &StoreKey, + byte_range: ByteRange, + ) -> Result { + self.0.get_partial(key, byte_range) + } + + fn size_key(&self, key: &StoreKey) -> Result, StorageError> { + self.0.size_key(key) + } + + fn supports_get_partial(&self) -> bool { + self.0.supports_get_partial() + } +} + +impl ListableStorageTraits for ReadOnly { + fn list(&self) -> Result { + self.0.list() + } + + fn list_prefix(&self, prefix: &StorePrefix) -> Result { + self.0.list_prefix(prefix) + } + + fn list_dir(&self, prefix: &StorePrefix) -> Result { + self.0.list_dir(prefix) + } + + fn size_prefix(&self, prefix: &StorePrefix) -> Result { + self.0.size_prefix(prefix) + } +} + +impl WritableStorageTraits for ReadOnly { + fn set(&self, _key: &StoreKey, _value: Bytes) -> Result<(), StorageError> { + Err(StorageError::ReadOnly) + } + + fn set_partial_many( + &self, + _key: &StoreKey, + _offset_values: OffsetBytesIterator, + ) -> Result<(), StorageError> { + Err(StorageError::ReadOnly) + } + + fn erase(&self, _key: &StoreKey) -> Result<(), StorageError> { + Err(StorageError::ReadOnly) + } + + fn erase_prefix(&self, _prefix: &StorePrefix) -> Result<(), StorageError> { + Err(StorageError::ReadOnly) + } + + fn supports_set_partial(&self) -> bool { + false + } +} From 82408abaaafb8cba99dbd1a5b586bbc76cf42499 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Thu, 25 Jun 2026 15:42:08 -0400 Subject: [PATCH 05/10] add async array from metadata --- src/array/async.rs | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/src/array/async.rs b/src/array/async.rs index fe60cad..74976b9 100644 --- a/src/array/async.rs +++ b/src/array/async.rs @@ -8,9 +8,10 @@ use crate::array::PyChunkIndices; use crate::array_bytes::PyArrayBytes; use crate::codec::PyCodecOptions; use crate::decoded_array::DecodedArray; -use crate::error::ZarristaError; +use crate::error::{ZarristaError, ZarristaResult}; +use crate::metadata::PyArrayMetadata; use crate::node::PyNodePath; -use crate::storage::PyAsyncStorage; +use crate::storage::{AsyncReadOnlyStorageAdapter, PyAsyncStorage}; use pyo3::prelude::*; use pyo3_async_runtimes::tokio::future_into_py; use pyo3_bytes::PyBytes; @@ -56,6 +57,24 @@ impl PyAsyncArray { ) } + /// Use the provided metadata to open a new array at `path` in `store`. + /// + /// This does **not** write to the store, use `store_metadata` to write metadata to storage. + #[staticmethod] + #[pyo3( + signature = (metadata, store, path = PyNodePath::root()), + text_signature = "(metadata, store, path='/')" + )] + fn from_metadata( + metadata: PyArrayMetadata, + store: PyAsyncStorage, + path: PyNodePath, + ) -> ZarristaResult { + let inner = + Array::new_with_metadata(store.into_inner(), path.as_str(), metadata.into_inner())?; + Ok(Self::new(Arc::new(inner))) + } + /// Open the array stored at `path` in `store`. #[staticmethod] #[pyo3( @@ -114,8 +133,8 @@ impl PyAsyncArray { /// Return a read-only view of this array; writes raise at runtime. fn read_only(&self) -> Self { - let inner = self.inner.storage().readable_listable(); - let storage = Arc::new(crate::storage::AsyncReadOnly::new(inner)); + let read_list_storage = self.inner.storage().readable_listable(); + let storage = Arc::new(AsyncReadOnlyStorageAdapter::new(read_list_storage)); Self::new(Arc::new(self.inner.with_storage(storage))) } From 5ae6987dfc4e22d693dadaf866880cbe49124391 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Thu, 25 Jun 2026 15:42:20 -0400 Subject: [PATCH 06/10] rename storage adapter --- src/array/sync.rs | 6 +++--- src/storage/async.rs | 10 +++++----- src/storage/mod.rs | 4 ++-- src/storage/sync.rs | 13 ++++++------- 4 files changed, 16 insertions(+), 17 deletions(-) diff --git a/src/array/sync.rs b/src/array/sync.rs index 9a5f4dc..be20628 100644 --- a/src/array/sync.rs +++ b/src/array/sync.rs @@ -11,7 +11,7 @@ use crate::decoded_array::DecodedArray; use crate::error::ZarristaResult; use crate::metadata::PyArrayMetadata; use crate::node::PyNodePath; -use crate::storage::PySyncStorage; +use crate::storage::{PySyncStorage, ReadOnlyStorageAdapter}; use pyo3::prelude::*; use pyo3_bytes::PyBytes; use zarrs::array::Array; @@ -105,8 +105,8 @@ impl PyArray { } fn read_only(&self) -> Self { - let inner = self.inner.storage().readable_listable(); - let storage = Arc::new(crate::storage::ReadOnly::new(inner)); + let read_list_storage = self.inner.storage().readable_listable(); + let storage = Arc::new(ReadOnlyStorageAdapter::new(read_list_storage)); Self::new(Arc::new(self.inner.with_storage(storage))) } diff --git a/src/storage/async.rs b/src/storage/async.rs index bad7ee4..01a59c9 100644 --- a/src/storage/async.rs +++ b/src/storage/async.rs @@ -132,16 +132,16 @@ impl FromPyObject<'_, '_> for PyAsyncIcechunkStore { } /// An async storage adapter that reads and lists transparently but rejects all writes at runtime. -pub struct AsyncReadOnly(Arc); +pub struct AsyncReadOnlyStorageAdapter(Arc); -impl AsyncReadOnly { +impl AsyncReadOnlyStorageAdapter { pub fn new(inner: Arc) -> Self { Self(inner) } } #[async_trait] -impl AsyncReadableStorageTraits for AsyncReadOnly { +impl AsyncReadableStorageTraits for AsyncReadOnlyStorageAdapter { async fn get(&self, key: &StoreKey) -> Result { self.0.get(key).await } @@ -164,7 +164,7 @@ impl AsyncReadableStorageTraits for AsyncReadOnly { } #[async_trait] -impl AsyncListableStorageTraits for AsyncReadOnly { +impl AsyncListableStorageTraits for AsyncReadOnlyStorageAdapter { async fn list(&self) -> Result { self.0.list().await } @@ -183,7 +183,7 @@ impl AsyncListableStorageTraits for AsyncReadOnly { } #[async_trait] -impl AsyncWritableStorageTraits for AsyncReadOnly { +impl AsyncWritableStorageTraits for AsyncReadOnlyStorageAdapter { async fn set(&self, _key: &StoreKey, _value: Bytes) -> Result<(), StorageError> { Err(StorageError::ReadOnly) } diff --git a/src/storage/mod.rs b/src/storage/mod.rs index e564e3d..abefccb 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -1,6 +1,6 @@ mod r#async; mod sync; -pub use r#async::{AsyncReadOnly, PyAsyncStorage}; +pub use r#async::{AsyncReadOnlyStorageAdapter, PyAsyncStorage}; pub(crate) use sync::PySyncStorage; -pub use sync::{PyFilesystemStore, PyMemoryStore, ReadOnly}; +pub use sync::{PyFilesystemStore, PyMemoryStore, ReadOnlyStorageAdapter}; diff --git a/src/storage/sync.rs b/src/storage/sync.rs index 250bafa..1fec4d6 100644 --- a/src/storage/sync.rs +++ b/src/storage/sync.rs @@ -84,17 +84,16 @@ impl PyMemoryStore { } } -/// Wraps a readable + listable store, rejecting every write with -/// [`StorageError::ReadOnly`]. -pub struct ReadOnly(Arc); +/// A storage adapter that reads and lists transparently but rejects all writes at runtime. +pub struct ReadOnlyStorageAdapter(Arc); -impl ReadOnly { +impl ReadOnlyStorageAdapter { pub fn new(inner: Arc) -> Self { Self(inner) } } -impl ReadableStorageTraits for ReadOnly { +impl ReadableStorageTraits for ReadOnlyStorageAdapter { fn get(&self, key: &StoreKey) -> Result { self.0.get(key) } @@ -124,7 +123,7 @@ impl ReadableStorageTraits for ReadOnly { } } -impl ListableStorageTraits for ReadOnly { +impl ListableStorageTraits for ReadOnlyStorageAdapter { fn list(&self) -> Result { self.0.list() } @@ -142,7 +141,7 @@ impl ListableStorageTraits for ReadOnly { } } -impl WritableStorageTraits for ReadOnly { +impl WritableStorageTraits for ReadOnlyStorageAdapter { fn set(&self, _key: &StoreKey, _value: Bytes) -> Result<(), StorageError> { Err(StorageError::ReadOnly) } From a857347125929a960efc41beb50c4bec70e895c6 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Thu, 25 Jun 2026 15:43:03 -0400 Subject: [PATCH 07/10] sort --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index f5f4093..14f3569 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,9 +21,9 @@ abi3-py311 = ["pyo3/abi3-py311"] [dependencies] arrow-array = "59" -async-trait = "0.1" arrow-buffer = "59" arrow-schema = "59" +async-trait = "0.1" bytes = "1.12.0" dlpark = { git = "https://github.com/kylebarron/dlpark", rev = "31c6f49c064e634326c97172d39a00acecd854b6", features = [ "pyo3", From 1eba0fe91985bea03f207e88e3c5d38b6de3a867 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Thu, 25 Jun 2026 15:44:46 -0400 Subject: [PATCH 08/10] update type stub and move test file --- python/zarrista/_array.pyi | 32 +++++++++++++++++++++++++++++ tests/{ => array}/test_read_only.py | 0 2 files changed, 32 insertions(+) rename tests/{ => array}/test_read_only.py (100%) diff --git a/python/zarrista/_array.pyi b/python/zarrista/_array.pyi index 25257a4..01f2af2 100644 --- a/python/zarrista/_array.pyi +++ b/python/zarrista/_array.pyi @@ -33,6 +33,16 @@ class Array: @staticmethod def open(store: FilesystemStore | MemoryStore, path: str = "/") -> Array: """Open the array stored at `path` in `store`.""" + @staticmethod + def from_metadata( + metadata: ArrayMetadataV3, + store: FilesystemStore | MemoryStore, + path: str = "/", + ) -> Array: + """Use the provided metadata to open a new array at `path` in `store`. + + This does **not** write the metadata to the store. + """ @property def attrs(self) -> dict[str, JSONValue]: """The array's user attributes as a dict.""" @@ -129,6 +139,12 @@ class Array: """ def erase_metadata(self) -> None: """Delete the array's metadata from the store.""" + def read_only(self) -> Array: + """Return a read-only view of this array. + + Reads behave identically, but any write (`store_chunk`, `erase_chunk`, + `erase_metadata`, ...) raises at runtime. + """ @property def shape(self) -> list[int]: """The array shape.""" @@ -147,6 +163,16 @@ class AsyncArray: `store` may be an obstore `ObjectStore` or an icechunk `Session`. """ + @staticmethod + def from_metadata( + metadata: ArrayMetadataV3, + store: AsyncStore, + path: str = "/", + ) -> AsyncArray: + """Use the provided metadata to open a new array at `path` in `store`. + + This does **not** write the metadata to the store. + """ @property def attrs(self) -> dict[str, JSONValue]: """The array's user attributes as a dict.""" @@ -243,6 +269,12 @@ class AsyncArray: """ async def erase_metadata(self) -> None: """Delete the array's metadata from the store.""" + def read_only(self) -> AsyncArray: + """Return a read-only view of this array. + + Reads behave identically, but any write (`store_chunk`, `erase_chunk`, + `erase_metadata`, ...) raises at runtime. + """ @property def shape(self) -> list[int]: """The array shape.""" diff --git a/tests/test_read_only.py b/tests/array/test_read_only.py similarity index 100% rename from tests/test_read_only.py rename to tests/array/test_read_only.py From b68431be5f2dbe6c5f1d52a8a1dd6f554a15cf9d Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Thu, 25 Jun 2026 15:46:39 -0400 Subject: [PATCH 09/10] lint --- tests/array/test_read_only.py | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/tests/array/test_read_only.py b/tests/array/test_read_only.py index 53f11c5..08d4685 100644 --- a/tests/array/test_read_only.py +++ b/tests/array/test_read_only.py @@ -3,20 +3,25 @@ import numpy as np import pytest -from zarrista import ArrayBytes, ArrayBuilder, ChunkGrid, DataType, FillValue, MemoryStore +from zarrista import ( + Array, + ArrayBuilder, + ArrayBytes, + ChunkGrid, + DataType, + FillValue, + MemoryStore, +) from zarrista.exceptions import ZarristaError -def _writable_array(): +def _writable_array() -> Array: """A 4x4 int8 array (single 4x4 chunk, fill 0) created in a MemoryStore.""" - return ( - ArrayBuilder( - ChunkGrid.regular([4, 4], [4, 4]), - DataType.from_string("int8"), - FillValue(b"\x00"), - ) - .create(MemoryStore(), "/a") - ) + return ArrayBuilder( + ChunkGrid.regular([4, 4], [4, 4]), + DataType.from_string("int8"), + FillValue(b"\x00"), + ).create(MemoryStore(), "/a") def test_read_only_still_reads(): @@ -79,7 +84,8 @@ async def test_async_read_only_still_reads(tmp_path): ro = arr.read_only() chunk = await ro.retrieve_chunk([0, 0]) np.testing.assert_array_equal( - chunk.to_numpy(), np.arange(16, dtype="int8").reshape(4, 4) + chunk.to_numpy(), + np.arange(16, dtype="int8").reshape(4, 4), ) From d7f28986eea05de124f3c0b393fa2b6c1d7ddc3a Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Thu, 25 Jun 2026 15:46:57 -0400 Subject: [PATCH 10/10] lint --- tests/array/test_read_only.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/array/test_read_only.py b/tests/array/test_read_only.py index 08d4685..3073522 100644 --- a/tests/array/test_read_only.py +++ b/tests/array/test_read_only.py @@ -71,7 +71,7 @@ def test_writable_array_still_writes(): from zarrista import AsyncArray # noqa: E402 -async def _async_writable_array(tmp_path): +async def _async_writable_array(tmp_path) -> AsyncArray: """A 4x4 int8 array written with zarr-python, opened async over obstore.""" path = tmp_path / "a.zarr" z = zarr.create_array(store=str(path), shape=(4, 4), chunks=(4, 4), dtype="int8")