diff --git a/.gitignore b/.gitignore
index 65d7fa78..28e560cc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,6 +12,8 @@ templates/
 .env.registry
 docs/guides/miden-dashboard/operators.json
 docs/guides/miden-dashboard/guardian-dashboard/
+docs/guides/horizontal-scaling/operators.json
+docs/guides/horizontal-scaling/docker-compose.override.yml
 
 .cursor/
 .claude/
diff --git a/Cargo.lock b/Cargo.lock
index 052f46eb..b9ea5554 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3263,6 +3263,7 @@ dependencies = [
  "tokio",
  "tokio-postgres",
  "tokio-postgres-rustls",
+ "tokio-util",
  "tonic",
  "tonic-prost",
  "tonic-prost-build",
@@ -8053,6 +8054,7 @@ dependencies = [
  "bytes",
  "futures-core",
  "futures-sink",
+ "futures-util",
  "pin-project-lite",
  "tokio",
 ]
diff --git a/crates/server/Cargo.toml b/crates/server/Cargo.toml
index d2c108a9..fd28ea33 100644
--- a/crates/server/Cargo.toml
+++ b/crates/server/Cargo.toml
@@ -41,6 +41,7 @@ url = "2.5"
 zeroize = { version = "1.7", features = ["derive"] }
 serde_json = { workspace = true }
 tokio = { workspace = true, features = ["full"] }
+tokio-util = { version = "0.7", features = ["rt"] }
 tonic = { workspace = true }
 tonic-prost = { workspace = true }
 tonic-reflection = "0.14"
diff --git a/crates/server/migrations/2026-06-23-000001_auth_sessions/down.sql b/crates/server/migrations/2026-06-23-000001_auth_sessions/down.sql
new file mode 100644
index 00000000..874930ac
--- /dev/null
+++ b/crates/server/migrations/2026-06-23-000001_auth_sessions/down.sql
@@ -0,0 +1,3 @@
+DROP INDEX IF EXISTS auth_sessions_realm_expires_idx;
+DROP INDEX IF EXISTS auth_sessions_expires_idx;
+DROP TABLE IF EXISTS auth_sessions;
diff --git a/crates/server/migrations/2026-06-23-000001_auth_sessions/up.sql b/crates/server/migrations/2026-06-23-000001_auth_sessions/up.sql
new file mode 100644
index 00000000..0b26a593
--- /dev/null
+++ b/crates/server/migrations/2026-06-23-000001_auth_sessions/up.sql
@@ -0,0 +1,21 @@
+-- Shared operator/EVM session store for horizontal scaling (issue #242).
+-- Sessions move out of per-process memory so a session issued on one replica
+-- is honored on every replica. Keyed by the SHA-256 digest of the session
+-- token (the plaintext token is never stored). The primary key is composite on
+-- (realm, token_digest) so operator and EVM sessions share one table with the
+-- realm boundary enforced by the database, not merely by token randomness.
+
+CREATE TABLE auth_sessions (
+    realm        TEXT        NOT NULL,
+    token_digest BYTEA       NOT NULL,
+    subject      JSONB       NOT NULL,
+    issued_at    TIMESTAMPTZ NOT NULL,
+    expires_at   TIMESTAMPTZ NOT NULL,
+    -- Set on logout; the row is kept until natural expiry so the revocation
+    -- is honored fleet-wide for as long as the token would have been valid.
+    revoked_at   TIMESTAMPTZ NULL,
+    PRIMARY KEY (realm, token_digest)
+);
+
+CREATE INDEX auth_sessions_expires_idx ON auth_sessions (expires_at);
+CREATE INDEX auth_sessions_realm_expires_idx ON auth_sessions (realm, expires_at);
diff --git a/crates/server/migrations/2026-06-23-000002_auth_challenges/down.sql b/crates/server/migrations/2026-06-23-000002_auth_challenges/down.sql
new file mode 100644
index 00000000..16f2effa
--- /dev/null
+++ b/crates/server/migrations/2026-06-23-000002_auth_challenges/down.sql
@@ -0,0 +1,3 @@
+DROP INDEX IF EXISTS auth_challenges_expires_idx;
+DROP INDEX IF EXISTS auth_challenges_realm_principal_idx;
+DROP TABLE IF EXISTS auth_challenges;
diff --git a/crates/server/migrations/2026-06-23-000002_auth_challenges/up.sql b/crates/server/migrations/2026-06-23-000002_auth_challenges/up.sql
new file mode 100644
index 00000000..be2494ed
--- /dev/null
+++ b/crates/server/migrations/2026-06-23-000002_auth_challenges/up.sql
@@ -0,0 +1,21 @@
+-- Shared operator/EVM login-challenge store for horizontal scaling (issue #242).
+-- A challenge issued on one replica must be verifiable on another. Realm-aware
+-- so the two verification models coexist: `challenge_key` is the operator
+-- signing-digest hex or the EVM nonce, and `payload` carries the realm-specific
+-- fields needed to match/recover at verify time. Matching runs in Rust (Falcon
+-- verify / ECDSA recover); the store provides the candidates and the single-use
+-- claim.
+
+CREATE TABLE auth_challenges (
+    realm         TEXT        NOT NULL,
+    challenge_key TEXT        NOT NULL,
+    principal     TEXT        NOT NULL,
+    payload       JSONB       NOT NULL,
+    issued_at     TIMESTAMPTZ NOT NULL,
+    expires_at    TIMESTAMPTZ NOT NULL,
+    consumed_at   TIMESTAMPTZ NULL,
+    PRIMARY KEY (realm, challenge_key)
+);
+
+CREATE INDEX auth_challenges_realm_principal_idx ON auth_challenges (realm, principal);
+CREATE INDEX auth_challenges_expires_idx ON auth_challenges (expires_at);
diff --git a/crates/server/migrations/2026-06-24-000001_worker_leases/down.sql b/crates/server/migrations/2026-06-24-000001_worker_leases/down.sql
new file mode 100644
index 00000000..f6742949
--- /dev/null
+++ b/crates/server/migrations/2026-06-24-000001_worker_leases/down.sql
@@ -0,0 +1 @@
+DROP TABLE IF EXISTS worker_leases;
diff --git a/crates/server/migrations/2026-06-24-000001_worker_leases/up.sql b/crates/server/migrations/2026-06-24-000001_worker_leases/up.sql
new file mode 100644
index 00000000..c225178e
--- /dev/null
+++ b/crates/server/migrations/2026-06-24-000001_worker_leases/up.sql
@@ -0,0 +1,14 @@
+-- Single-owner coordination for background workers under horizontal scaling
+-- (issue #242, subsumes #190). At most one replica holds a named lease at a
+-- time; the holder renews on a heartbeat and a stale lease can be reclaimed by
+-- another replica once it expires. `fence_token` increments only on a change of
+-- holder (steal), so a superseded holder can be detected at its write boundary.
+
+CREATE TABLE worker_leases (
+    lease_name  TEXT        PRIMARY KEY,
+    holder_id   TEXT        NOT NULL,
+    acquired_at TIMESTAMPTZ NOT NULL,
+    renewed_at  TIMESTAMPTZ NOT NULL,
+    expires_at  TIMESTAMPTZ NOT NULL,
+    fence_token BIGINT      NOT NULL DEFAULT 0
+);
diff --git a/crates/server/src/ack/mod.rs b/crates/server/src/ack/mod.rs
index 4fecb69f..caf2a511 100644
--- a/crates/server/src/ack/mod.rs
+++ b/crates/server/src/ack/mod.rs
@@ -11,7 +11,7 @@ pub mod miden_falcon_rpo;
 mod secrets_manager;
 
 use crate::delta_object::DeltaObject;
-use crate::error::{GuardianError, Result};
+use crate::error::Result;
 use guardian_shared::SignatureScheme;
 use miden_protocol::crypto::dsa::ecdsa_k256_keccak::SigningKey as EcdsaSecretKey;
 use std::path::{Path, PathBuf};
@@ -25,9 +25,6 @@ pub(crate) use miden_ecdsa::{
 };
 pub use miden_falcon_rpo::MidenFalconRpoSigner;
 
-const ENV_GUARDIAN_ENV: &str = "GUARDIAN_ENV";
-const PROD_ENV: &str = "prod";
-
 /// The ECDSA signer is abstracted over [`EcdsaSignerBackend`] so its key can live
 /// in a hosted backend (e.g. AWS KMS); Falcon stays concrete because hosted
 /// backends only support the secp256k1 ECDSA scheme.
@@ -40,7 +37,7 @@ pub struct AckRegistry {
 impl AckRegistry {
     pub async fn new(keystore_path: PathBuf) -> Result<Self> {
         let ecdsa_backend = EcdsaBackendKind::from_env()?;
-        if is_prod_environment()? {
+        if crate::config::stage::is_prod()? {
             let provider = AwsSecretsManagerProvider::from_env().await?;
             Self::from_provider(keystore_path, ecdsa_backend, Some(&provider)).await
         } else {
@@ -135,19 +132,10 @@ async fn build_ecdsa_signer<P: AckSecretProvider>(
     Ok(MidenEcdsaSigner::new(backend))
 }
 
-fn is_prod_environment() -> Result<bool> {
-    match std::env::var(ENV_GUARDIAN_ENV) {
-        Ok(value) => Ok(value.eq_ignore_ascii_case(PROD_ENV)),
-        Err(std::env::VarError::NotPresent) => Ok(false),
-        Err(std::env::VarError::NotUnicode(_)) => Err(GuardianError::ConfigurationError(format!(
-            "{ENV_GUARDIAN_ENV} must contain valid UTF-8"
-        ))),
-    }
-}
-
 #[cfg(all(test, not(any(feature = "integration", feature = "e2e"))))]
 mod tests {
     use super::*;
+    use crate::error::GuardianError;
     use async_trait::async_trait;
     use miden_keystore::{EcdsaKeyStore, FilesystemEcdsaKeyStore, FilesystemKeyStore, KeyStore};
     use miden_protocol::crypto::dsa::falcon512_poseidon2::SecretKey as FalconSecretKey;
diff --git a/crates/server/src/api/dashboard.rs b/crates/server/src/api/dashboard.rs
index 556a5d7c..5c8a5532 100644
--- a/crates/server/src/api/dashboard.rs
+++ b/crates/server/src/api/dashboard.rs
@@ -173,23 +173,31 @@ pub async fn verify_operator_login(
     security(("operator_session" = [])),
     responses(
         (status = 200, description = "Session invalidated", body = LogoutOperatorResponse),
+        (status = 500, description = "Session revocation failed", body = crate::openapi::ApiErrorResponse),
     )
 )]
 pub async fn logout_operator(
     State(state): State<AppState>,
     headers: HeaderMap,
-) -> impl IntoResponse {
+) -> Result<(
+    StatusCode,
+    [(header::HeaderName, String); 1],
+    Json<LogoutOperatorResponse>,
+)> {
     let token = extract_cookie(&headers, state.dashboard.cookie_name());
+    // Fail closed: if the shared session store cannot revoke (e.g. Postgres is
+    // unavailable), surface the error so the caller can retry instead of being
+    // told a logout succeeded that did not take effect fleet-wide.
     state
         .dashboard
         .logout(token.as_deref(), state.clock.now())
-        .await;
+        .await?;
 
-    (
+    Ok((
         StatusCode::OK,
         [(header::SET_COOKIE, state.dashboard.clear_cookie_header())],
         Json(LogoutOperatorResponse { success: true }),
-    )
+    ))
 }
 
 /// Paginated list of accounts visible to the operator. Requires the
diff --git a/crates/server/src/api/evm.rs b/crates/server/src/api/evm.rs
index 87b4e233..d0e032e2 100644
--- a/crates/server/src/api/evm.rs
+++ b/crates/server/src/api/evm.rs
@@ -173,6 +173,7 @@ pub async fn verify_evm_session(
     security(("evm_session" = [])),
     responses(
         (status = 200, description = "Session invalidated", body = LogoutResponse),
+        (status = 500, description = "Session revocation failed", body = crate::openapi::ApiErrorResponse),
     )
 )]
 pub async fn logout_evm_session(
@@ -180,11 +181,13 @@ pub async fn logout_evm_session(
     headers: HeaderMap,
 ) -> Result<([(header::HeaderName, String); 1], Json<LogoutResponse>)> {
     let token = extract_cookie(&headers, state.evm.sessions.cookie_name());
+    // Fail closed: a revoke failure (e.g. shared store outage) is surfaced so the
+    // caller can retry rather than believing the session was invalidated.
     state
         .evm
         .sessions
         .logout(token.as_deref(), state.clock.now())
-        .await;
+        .await?;
     Ok((
         [(header::SET_COOKIE, state.evm.sessions.clear_cookie_header())],
         Json(LogoutResponse { success: true }),
diff --git a/crates/server/src/builder/handle.rs b/crates/server/src/builder/handle.rs
index 1ad15636..70c7c654 100644
--- a/crates/server/src/builder/handle.rs
+++ b/crates/server/src/builder/handle.rs
@@ -42,6 +42,7 @@ use crate::state::AppState;
 /// Provides methods to run the server with the configured settings.
 pub struct ServerHandle {
     pub(crate) app_state: AppState,
+    pub(crate) leader: std::sync::Arc<dyn crate::coordination::LeaderElector>,
     pub(crate) startup_info: StartupInfo,
     pub(crate) cors_layer: Option<CorsLayer>,
     pub(crate) rate_limit_config: Option<RateLimitConfig>,
@@ -127,13 +128,15 @@ impl ServerHandle {
         // Start background jobs based on canonicalization config
         if self.app_state.canonicalization.is_some() {
             tracing::info!("Starting canonicalization worker");
-            start_canonicalization_worker(self.app_state.clone());
+            start_canonicalization_worker(self.app_state.clone(), self.leader.clone());
         } else {
             tracing::info!(
                 "Running in optimistic mode - deltas accepted without on-chain verification"
             );
         }
 
+        start_session_sweep_worker(self.app_state.clone());
+
         // Start HTTP server if enabled
         if self.http_enabled {
             let state = self.app_state.clone();
@@ -368,3 +371,33 @@ impl ServerHandle {
         }
     }
 }
+
+const SESSION_SWEEP_INTERVAL_SECS: u64 = 60;
+
+/// Periodically reclaim expired operator sessions/challenges from the
+/// coordination store. Expiry is enforced on read regardless; this only frees
+/// rows (Postgres) or memory (in-memory).
+fn start_session_sweep_worker(state: AppState) {
+    tokio::spawn(async move {
+        let mut ticker =
+            tokio::time::interval(std::time::Duration::from_secs(SESSION_SWEEP_INTERVAL_SECS));
+        loop {
+            ticker.tick().await;
+            if let Err(error) = state.dashboard.sweep_expired(state.clock.now()).await {
+                tracing::warn!(
+                    target: "dashboard.session_sweep",
+                    %error,
+                    "operator session/challenge sweep failed",
+                );
+            }
+            #[cfg(feature = "evm")]
+            if let Err(error) = state.evm.sessions.sweep_expired(state.clock.now()).await {
+                tracing::warn!(
+                    target: "evm.session_sweep",
+                    %error,
+                    "EVM session/challenge sweep failed",
+                );
+            }
+        }
+    });
+}
diff --git a/crates/server/src/builder/mod.rs b/crates/server/src/builder/mod.rs
index f71d3030..9ccc659c 100644
--- a/crates/server/src/builder/mod.rs
+++ b/crates/server/src/builder/mod.rs
@@ -41,6 +41,7 @@ pub struct ServerBuilder {
     ack: Option<AckRegistry>,
     canonicalization: Option<CanonicalizationConfig>,
     dashboard: Option<Arc<DashboardState>>,
+    coordination: Option<crate::coordination::CoordinationHandles>,
     logging_config: Option<LoggingConfig>,
     cors_layer: Option<tower_http::cors::CorsLayer>,
     rate_limit_config: Option<RateLimitConfig>,
@@ -63,6 +64,7 @@ impl ServerBuilder {
             ack: None,
             canonicalization: Some(CanonicalizationConfig::default()),
             dashboard: None,
+            coordination: None,
             logging_config: None,
             cors_layer: None,
             rate_limit_config: None,
@@ -181,6 +183,14 @@ impl ServerBuilder {
         self
     }
 
+    /// Coordination store handles selected by the storage backend (Postgres =>
+    /// shared, filesystem => in-memory). Injected into the realm-scoped consumers
+    /// when their state is built from the environment.
+    pub fn coordination(mut self, handles: crate::coordination::CoordinationHandles) -> Self {
+        self.coordination = Some(handles);
+        self
+    }
+
     /// Configure canonicalization mode
     ///
     /// # Arguments
@@ -442,12 +452,59 @@ impl ServerBuilder {
             .ok_or("Auditor not set. Use .auditor(...) — typically populated by StorageMetadataBuilder::build()")?;
 
         let ack = self.ack.ok_or("AckRegistry not set. Use .ack(...)")?;
+        let coordination = self.coordination;
+        // Fail closed before anything else: the Postgres backend must never fall
+        // back to per-process coordination (AlwaysLeader + in-memory sessions),
+        // which would let every replica run canonicalization and split auth
+        // state. Checking here (not only on the dashboard==None path) catches a
+        // manual builder that supplies a custom dashboard but skips coordination.
+        if coordination.is_none() && storage.kind() == crate::storage::StorageType::Postgres {
+            return Err("Postgres storage requires coordination handles for shared \
+                 sessions/challenges and canonicalization leadership; call \
+                 .coordination(...) (populated by StorageMetadataBuilder::build())"
+                .to_string());
+        }
+        let coordination_mode = coordination
+            .as_ref()
+            .map(|handles| handles.mode)
+            .unwrap_or(crate::coordination::CoordinationMode::SingleProcess);
+        let leader: Arc<dyn crate::coordination::LeaderElector> = coordination
+            .as_ref()
+            .map(|handles| handles.leader.clone())
+            .unwrap_or_else(|| {
+                Arc::new(crate::coordination::AlwaysLeader::new(
+                    crate::coordination::CANONICALIZATION_LEASE,
+                    "single-process",
+                ))
+            });
         let dashboard = match self.dashboard {
             Some(dashboard) => dashboard,
-            None => Arc::new(DashboardState::from_env_for_network(network_type).await?),
+            None => match coordination.as_ref() {
+                Some(handles) => Arc::new(
+                    DashboardState::from_env_for_network_with_stores(
+                        network_type,
+                        handles.operator_sessions.clone(),
+                        handles.operator_challenges.clone(),
+                    )
+                    .await?,
+                ),
+                // The Postgres-without-coordination case already failed closed
+                // above, so reaching here with no handles means a non-Postgres
+                // (filesystem/dev) backend using per-process dashboard state.
+                None => Arc::new(DashboardState::from_env_for_network(network_type).await?),
+            },
         };
         #[cfg(feature = "evm")]
-        let evm = Arc::new(EvmAppState::from_env().await?);
+        let evm = {
+            let sessions = match coordination.as_ref() {
+                Some(handles) => crate::evm::EvmSessionState::new(
+                    handles.evm_sessions.clone(),
+                    handles.evm_challenges.clone(),
+                ),
+                None => crate::evm::EvmSessionState::default(),
+            };
+            Arc::new(EvmAppState::from_env_with_sessions(sessions).await?)
+        };
 
         let network_client = MidenNetworkClient::from_network(network_type)
             .await
@@ -456,6 +513,7 @@ impl ServerBuilder {
         let startup_info = startup::StartupInfo::new(
             network_type,
             storage.kind(),
+            coordination_mode.as_str(),
             ack.ecdsa_backend_id(),
             ack.commitment(&SignatureScheme::Falcon),
             ack.commitment(&SignatureScheme::Ecdsa),
@@ -467,6 +525,29 @@ impl ServerBuilder {
             metrics_config.enabled.then_some(metrics_config.bind_addr),
         );
 
+        // Prod fail-fast: an enabled rate limit that partitions to 0 per replica
+        // (global limit below GUARDIAN_MAX_REPLICAS) silently throttles all
+        // traffic on every replica. Mirror the filesystem-backend prod guard and
+        // refuse to start rather than serve a fleet that denies every request.
+        // (A missing cursor secret only warns and boots — it is not a prod guard.)
+        // Non-prod keeps the warning emitted by
+        // RateLimitConfig::from_env.
+        let rate_limit_config = self
+            .rate_limit_config
+            .unwrap_or_else(RateLimitConfig::from_env);
+        if crate::config::stage::is_prod().map_err(|error| error.to_string())?
+            && rate_limit_config.enabled
+            && (rate_limit_config.burst_per_sec == 0 || rate_limit_config.per_min == 0)
+        {
+            return Err(
+                "rate limiting partitions to 0 requests per replica in the prod stage \
+                 (GUARDIAN_ENV=prod): a global GUARDIAN_RATE_BURST_PER_SEC/GUARDIAN_RATE_PER_MIN \
+                 below GUARDIAN_MAX_REPLICAS makes every replica throttle all traffic. Raise the \
+                 global rate limit or lower GUARDIAN_MAX_REPLICAS."
+                    .to_string(),
+            );
+        }
+
         let app_state = AppState {
             storage,
             metadata,
@@ -482,9 +563,10 @@ impl ServerBuilder {
 
         Ok(ServerHandle {
             app_state,
+            leader,
             startup_info,
             cors_layer: self.cors_layer,
-            rate_limit_config: self.rate_limit_config,
+            rate_limit_config: Some(rate_limit_config),
             body_limit_config: self.body_limit_config,
             metrics_config,
             http_enabled: self.http_enabled,
diff --git a/crates/server/src/builder/startup.rs b/crates/server/src/builder/startup.rs
index 52c01b37..cd29e0d8 100644
--- a/crates/server/src/builder/startup.rs
+++ b/crates/server/src/builder/startup.rs
@@ -19,6 +19,7 @@ use std::net::SocketAddr;
 pub(crate) struct StartupInfo {
     network: NetworkType,
     storage: StorageType,
+    coordination_mode: &'static str,
     ecdsa_backend: &'static str,
     falcon_commitment: String,
     ecdsa_commitment: String,
@@ -35,6 +36,7 @@ impl StartupInfo {
     pub(crate) fn new(
         network: NetworkType,
         storage: StorageType,
+        coordination_mode: &'static str,
         ecdsa_backend: &'static str,
         falcon_commitment: String,
         ecdsa_commitment: String,
@@ -48,6 +50,7 @@ impl StartupInfo {
         Self {
             network,
             storage,
+            coordination_mode,
             ecdsa_backend,
             falcon_commitment,
             ecdsa_commitment,
@@ -74,6 +77,22 @@ impl StartupInfo {
             "network"
         );
         tracing::info!(storage = %self.storage, "storage backend");
+        tracing::info!(
+            mode = self.coordination_mode,
+            backend = backend_label(&self.storage),
+            stage = if crate::config::stage::is_prod().unwrap_or(false) {
+                "prod"
+            } else {
+                "non-prod"
+            },
+            max_replicas = %std::env::var("GUARDIAN_MAX_REPLICAS").unwrap_or_else(|_| "1".to_string()),
+            cursor_secret = if self.cursor_secret_configured {
+                "configured"
+            } else {
+                "ephemeral"
+            },
+            "coordination",
+        );
         tracing::info!(
             falcon = "enabled",
             falcon_commitment = %self.falcon_commitment,
@@ -115,6 +134,13 @@ impl StartupInfo {
     }
 }
 
+fn backend_label(storage: &StorageType) -> &'static str {
+    match storage {
+        StorageType::Postgres => "postgres",
+        StorageType::Filesystem => "filesystem",
+    }
+}
+
 fn port_label(port: Option<u16>) -> String {
     match port {
         Some(port) => port.to_string(),
@@ -142,6 +168,7 @@ mod tests {
         let info = StartupInfo::new(
             NetworkType::MidenDevnet,
             StorageType::Postgres,
+            "shared",
             "aws-kms",
             "0xfalcon".to_string(),
             "0xecdsa".to_string(),
@@ -159,6 +186,7 @@ mod tests {
 
         assert_eq!(info.network, NetworkType::MidenDevnet);
         assert_eq!(info.storage, StorageType::Postgres);
+        assert_eq!(info.coordination_mode, "shared");
         assert_eq!(info.ecdsa_backend, "aws-kms");
         assert_eq!(info.falcon_commitment, "0xfalcon");
         assert_eq!(info.ecdsa_commitment, "0xecdsa");
@@ -178,6 +206,7 @@ mod tests {
         let info = StartupInfo::new(
             NetworkType::MidenLocal,
             StorageType::Filesystem,
+            "single-process",
             "in-memory",
             "0xfalcon".to_string(),
             "0xecdsa".to_string(),
@@ -198,6 +227,33 @@ mod tests {
         assert_eq!(info.grpc_port, None);
     }
 
+    #[test]
+    fn backend_label_maps_storage_type() {
+        assert_eq!(backend_label(&StorageType::Postgres), "postgres");
+        assert_eq!(backend_label(&StorageType::Filesystem), "filesystem");
+    }
+
+    #[test]
+    fn coordination_mode_label_is_logged_as_resolved() {
+        let info = StartupInfo::new(
+            NetworkType::MidenDevnet,
+            StorageType::Postgres,
+            "single-process",
+            "in-memory",
+            "0xfalcon".to_string(),
+            "0xecdsa".to_string(),
+            None,
+            0,
+            false,
+            None,
+            None,
+            None,
+        );
+        // Mode reflects the resolved coordination backing passed in, not the
+        // storage type — so it cannot claim "shared" while actually in-memory.
+        assert_eq!(info.coordination_mode, "single-process");
+    }
+
     #[test]
     fn port_label_renders_number_or_disabled() {
         assert_eq!(port_label(Some(3000)), "3000");
diff --git a/crates/server/src/builder/storage.rs b/crates/server/src/builder/storage.rs
index 50a7c57e..73a65fb5 100644
--- a/crates/server/src/builder/storage.rs
+++ b/crates/server/src/builder/storage.rs
@@ -100,6 +100,7 @@ impl StorageMetadataBuilder {
             Arc<dyn StorageBackend>,
             Arc<dyn MetadataStore>,
             SharedAuditor,
+            crate::coordination::CoordinationHandles,
         ),
         String,
     > {
@@ -128,11 +129,20 @@ impl StorageMetadataBuilder {
             let auditor: SharedAuditor = Arc::new(PostgresAuditor::new(metadata.pool_handle()));
 
             let storage = wrap_with_encryption(storage).await?;
-            Ok((storage, Arc::new(metadata), auditor))
+            let holder_id = format!("{}-{:016x}", std::process::id(), rand::random::<u64>());
+            let coordination = crate::coordination::CoordinationHandles::postgres(
+                metadata.pool_handle(),
+                holder_id,
+            );
+
+            Ok((storage, Arc::new(metadata), auditor, coordination))
         }
 
         #[cfg(not(feature = "postgres"))]
         {
+            reject_filesystem_in_prod(
+                crate::config::stage::is_prod().map_err(|error| error.to_string())?,
+            )?;
             let storage_path = self
                 .storage_path
                 .ok_or_else(|| "GUARDIAN_STORAGE_PATH is required".to_string())?;
@@ -153,11 +163,29 @@ impl StorageMetadataBuilder {
             let auditor: SharedAuditor = Arc::new(LogAuditor::new());
 
             let storage = wrap_with_encryption(storage).await?;
-            Ok((storage, Arc::new(metadata), auditor))
+            let coordination = crate::coordination::CoordinationHandles::in_memory();
+
+            Ok((storage, Arc::new(metadata), auditor, coordination))
         }
     }
 }
 
+/// The filesystem backend is local to one task and cannot be shared across
+/// replicas, so it is refused in the prod stage. It remains the default for
+/// local development and tests.
+#[cfg(not(feature = "postgres"))]
+fn reject_filesystem_in_prod(is_prod: bool) -> Result<(), String> {
+    if is_prod {
+        return Err(
+            "the filesystem storage backend is not supported in the prod stage \
+                    (GUARDIAN_ENV=prod): it is single-instance only and cannot be shared across \
+                    replicas. Use the Postgres image and set DATABASE_URL."
+                .to_string(),
+        );
+    }
+    Ok(())
+}
+
 async fn wrap_with_encryption<S>(storage: S) -> Result<Arc<dyn StorageBackend>, String>
 where
     S: StorageBackend + MarkerStore + 'static,
@@ -433,6 +461,19 @@ mod tests {
         }
     }
 
+    #[cfg(not(feature = "postgres"))]
+    #[test]
+    fn filesystem_rejected_in_prod_stage() {
+        assert!(
+            reject_filesystem_in_prod(true).is_err(),
+            "prod stage must refuse the filesystem backend"
+        );
+        assert!(
+            reject_filesystem_in_prod(false).is_ok(),
+            "non-prod tolerates the filesystem backend"
+        );
+    }
+
     #[cfg(not(feature = "postgres"))]
     #[tokio::test]
     async fn test_build_without_storage_path_fails() {
diff --git a/crates/server/src/config/mod.rs b/crates/server/src/config/mod.rs
new file mode 100644
index 00000000..12ef045f
--- /dev/null
+++ b/crates/server/src/config/mod.rs
@@ -0,0 +1 @@
+pub mod stage;
diff --git a/crates/server/src/config/stage.rs b/crates/server/src/config/stage.rs
new file mode 100644
index 00000000..c6b2ef12
--- /dev/null
+++ b/crates/server/src/config/stage.rs
@@ -0,0 +1,16 @@
+use crate::error::{GuardianError, Result};
+
+const ENV_GUARDIAN_ENV: &str = "GUARDIAN_ENV";
+const PROD_ENV: &str = "prod";
+
+/// True when the deployment stage is production (`GUARDIAN_ENV=prod`,
+/// case-insensitive). Gates production-only startup guards.
+pub fn is_prod() -> Result<bool> {
+    match std::env::var(ENV_GUARDIAN_ENV) {
+        Ok(value) => Ok(value.trim().eq_ignore_ascii_case(PROD_ENV)),
+        Err(std::env::VarError::NotPresent) => Ok(false),
+        Err(std::env::VarError::NotUnicode(_)) => Err(GuardianError::ConfigurationError(format!(
+            "{ENV_GUARDIAN_ENV} must contain valid UTF-8"
+        ))),
+    }
+}
diff --git a/crates/server/src/coordination/challenge_store.rs b/crates/server/src/coordination/challenge_store.rs
new file mode 100644
index 00000000..b4571315
--- /dev/null
+++ b/crates/server/src/coordination/challenge_store.rs
@@ -0,0 +1,297 @@
+use std::collections::HashMap;
+use std::sync::Arc;
+
+use async_trait::async_trait;
+use chrono::{DateTime, Utc};
+use guardian_shared::hex::{FromHex, IntoHex};
+use miden_protocol::Word;
+use tokio::sync::Mutex;
+
+use crate::error::{GuardianError, Result};
+
+/// Realm-specific data needed to match a submitted credential against a pending
+/// challenge at verify time. Operator verification re-runs a Falcon signature
+/// check over the signing digest; EVM verification recovers the signer from the
+/// full original challenge fields.
+#[derive(Clone, Debug)]
+pub enum ChallengePayload {
+    OperatorDigest(Word),
+    EvmChallenge {
+        address: String,
+        nonce: String,
+        issued_at: DateTime<Utc>,
+        expires_at: DateTime<Utc>,
+    },
+}
+
+impl ChallengePayload {
+    /// JSONB representation persisted in `auth_challenges.payload`. `Word` is not
+    /// directly serializable, so the operator digest is stored as canonical hex.
+    pub fn to_value(&self) -> serde_json::Value {
+        match self {
+            ChallengePayload::OperatorDigest(word) => serde_json::json!({
+                "kind": "operator_digest",
+                "signing_digest": (*word).into_hex(),
+            }),
+            ChallengePayload::EvmChallenge {
+                address,
+                nonce,
+                issued_at,
+                expires_at,
+            } => serde_json::json!({
+                "kind": "evm_challenge",
+                "address": address,
+                "nonce": nonce,
+                "issued_at": issued_at.to_rfc3339(),
+                "expires_at": expires_at.to_rfc3339(),
+            }),
+        }
+    }
+
+    pub fn from_value(value: &serde_json::Value) -> Result<Self> {
+        let kind = value
+            .get("kind")
+            .and_then(serde_json::Value::as_str)
+            .ok_or_else(|| {
+                GuardianError::StorageError("challenge payload missing kind".to_string())
+            })?;
+        match kind {
+            "operator_digest" => {
+                let hex = string_field(value, "signing_digest")?;
+                let word = Word::from_hex(&hex).map_err(GuardianError::StorageError)?;
+                Ok(ChallengePayload::OperatorDigest(word))
+            }
+            "evm_challenge" => Ok(ChallengePayload::EvmChallenge {
+                address: string_field(value, "address")?,
+                nonce: string_field(value, "nonce")?,
+                issued_at: time_field(value, "issued_at")?,
+                expires_at: time_field(value, "expires_at")?,
+            }),
+            other => Err(GuardianError::StorageError(format!(
+                "unknown challenge payload kind: {other}"
+            ))),
+        }
+    }
+}
+
+fn string_field(value: &serde_json::Value, key: &str) -> Result<String> {
+    value
+        .get(key)
+        .and_then(serde_json::Value::as_str)
+        .map(str::to_string)
+        .ok_or_else(|| GuardianError::StorageError(format!("challenge payload missing {key}")))
+}
+
+fn time_field(value: &serde_json::Value, key: &str) -> Result<DateTime<Utc>> {
+    let raw = string_field(value, key)?;
+    DateTime::parse_from_rfc3339(&raw)
+        .map(|parsed| parsed.with_timezone(&Utc))
+        .map_err(|error| {
+            GuardianError::StorageError(format!("challenge payload {key} invalid: {error}"))
+        })
+}
+
+#[derive(Clone, Debug)]
+pub struct StoredChallenge {
+    pub key: String,
+    pub payload: ChallengePayload,
+    pub issued_at: DateTime<Utc>,
+    pub expires_at: DateTime<Utc>,
+}
+
+/// A realm-scoped store of pending login challenges grouped by principal
+/// (operator commitment or EVM address). Verification matches a returned
+/// credential against the active challenges in Rust, then claims the matched one
+/// via [`ChallengeStore::consume`], which is single-use across replicas.
+#[async_trait]
+pub trait ChallengeStore: Send + Sync {
+    async fn issue(
+        &self,
+        principal: &str,
+        challenge: StoredChallenge,
+        max_outstanding: usize,
+        now: DateTime<Utc>,
+    ) -> Result<()>;
+    async fn active_for(&self, principal: &str, now: DateTime<Utc>)
+    -> Result<Vec<StoredChallenge>>;
+    async fn consume(&self, principal: &str, key: &str, now: DateTime<Utc>) -> Result<bool>;
+    async fn sweep_expired(&self, now: DateTime<Utc>) -> Result<u64>;
+}
+
+#[derive(Clone, Default)]
+pub struct InMemoryChallengeStore {
+    challenges: Arc<Mutex<HashMap<String, Vec<StoredChallenge>>>>,
+}
+
+impl InMemoryChallengeStore {
+    pub fn new() -> Self {
+        Self::default()
+    }
+}
+
+#[async_trait]
+impl ChallengeStore for InMemoryChallengeStore {
+    async fn issue(
+        &self,
+        principal: &str,
+        challenge: StoredChallenge,
+        max_outstanding: usize,
+        now: DateTime<Utc>,
+    ) -> Result<()> {
+        let mut challenges = self.challenges.lock().await;
+        let pending = challenges.entry(principal.to_string()).or_default();
+        pending.retain(|challenge| challenge.expires_at > now);
+        pending.push(challenge);
+        if pending.len() > max_outstanding {
+            pending.sort_by_key(|challenge| challenge.issued_at);
+            let drain_len = pending.len() - max_outstanding;
+            pending.drain(0..drain_len);
+        }
+        Ok(())
+    }
+
+    async fn active_for(
+        &self,
+        principal: &str,
+        now: DateTime<Utc>,
+    ) -> Result<Vec<StoredChallenge>> {
+        let challenges = self.challenges.lock().await;
+        Ok(challenges
+            .get(principal)
+            .map(|pending| {
+                pending
+                    .iter()
+                    .filter(|challenge| challenge.expires_at > now)
+                    .cloned()
+                    .collect()
+            })
+            .unwrap_or_default())
+    }
+
+    async fn consume(&self, principal: &str, key: &str, now: DateTime<Utc>) -> Result<bool> {
+        let mut challenges = self.challenges.lock().await;
+        let Some(pending) = challenges.get_mut(principal) else {
+            return Ok(false);
+        };
+        let matched = pending
+            .iter()
+            .position(|challenge| challenge.key == key && challenge.expires_at > now);
+        let Some(index) = matched else {
+            return Ok(false);
+        };
+        pending.remove(index);
+        if pending.is_empty() {
+            challenges.remove(principal);
+        }
+        Ok(true)
+    }
+
+    async fn sweep_expired(&self, now: DateTime<Utc>) -> Result<u64> {
+        let mut challenges = self.challenges.lock().await;
+        let before: usize = challenges.values().map(Vec::len).sum();
+        for pending in challenges.values_mut() {
+            pending.retain(|challenge| challenge.expires_at > now);
+        }
+        challenges.retain(|_, pending| !pending.is_empty());
+        let after: usize = challenges.values().map(Vec::len).sum();
+        Ok((before - after) as u64)
+    }
+}
+
+#[cfg(all(test, not(any(feature = "integration", feature = "e2e"))))]
+mod tests {
+    use super::*;
+    use chrono::Duration;
+
+    fn challenge(key: &str, now: DateTime<Utc>, ttl_secs: i64) -> StoredChallenge {
+        StoredChallenge {
+            key: key.to_string(),
+            payload: ChallengePayload::EvmChallenge {
+                address: "0x1".to_string(),
+                nonce: key.to_string(),
+                issued_at: now,
+                expires_at: now + Duration::seconds(ttl_secs),
+            },
+            issued_at: now,
+            expires_at: now + Duration::seconds(ttl_secs),
+        }
+    }
+
+    #[tokio::test]
+    async fn consume_is_single_use() {
+        let store = InMemoryChallengeStore::new();
+        let now = Utc::now();
+        store
+            .issue("0xp", challenge("k1", now, 60), 8, now)
+            .await
+            .unwrap();
+
+        assert!(store.consume("0xp", "k1", now).await.unwrap());
+        assert!(!store.consume("0xp", "k1", now).await.unwrap());
+    }
+
+    #[tokio::test]
+    async fn active_for_hides_expired() {
+        let store = InMemoryChallengeStore::new();
+        let now = Utc::now();
+        store
+            .issue("0xp", challenge("k1", now, 10), 8, now)
+            .await
+            .unwrap();
+
+        assert_eq!(store.active_for("0xp", now).await.unwrap().len(), 1);
+        assert!(
+            store
+                .active_for("0xp", now + Duration::seconds(11))
+                .await
+                .unwrap()
+                .is_empty()
+        );
+    }
+
+    #[tokio::test]
+    async fn issue_caps_outstanding_dropping_oldest() {
+        let store = InMemoryChallengeStore::new();
+        let now = Utc::now();
+        for i in 0..5 {
+            let issued = now + Duration::seconds(i);
+            let mut c = challenge(&format!("k{i}"), now, 600);
+            c.issued_at = issued;
+            store.issue("0xp", c, 3, now).await.unwrap();
+        }
+        let active = store.active_for("0xp", now).await.unwrap();
+        assert_eq!(active.len(), 3);
+        assert!(active.iter().all(|c| c.key != "k0" && c.key != "k1"));
+    }
+
+    #[tokio::test]
+    async fn consume_unknown_principal_is_false() {
+        let store = InMemoryChallengeStore::new();
+        assert!(!store.consume("0xnope", "k1", Utc::now()).await.unwrap());
+    }
+
+    #[test]
+    fn evm_payload_round_trips_through_json() {
+        let payload = ChallengePayload::EvmChallenge {
+            address: "0xabc".to_string(),
+            nonce: "0xdead".to_string(),
+            issued_at: "2026-06-23T00:00:00+00:00".parse().unwrap(),
+            expires_at: "2026-06-23T00:05:00+00:00".parse().unwrap(),
+        };
+        let value = payload.to_value();
+        let restored = ChallengePayload::from_value(&value).unwrap();
+        match restored {
+            ChallengePayload::EvmChallenge { address, nonce, .. } => {
+                assert_eq!(address, "0xabc");
+                assert_eq!(nonce, "0xdead");
+            }
+            _ => panic!("expected evm challenge"),
+        }
+    }
+
+    #[test]
+    fn unknown_payload_kind_is_rejected() {
+        let value = serde_json::json!({ "kind": "nope" });
+        assert!(ChallengePayload::from_value(&value).is_err());
+    }
+}
diff --git a/crates/server/src/coordination/leader.rs b/crates/server/src/coordination/leader.rs
new file mode 100644
index 00000000..1301cd09
--- /dev/null
+++ b/crates/server/src/coordination/leader.rs
@@ -0,0 +1,94 @@
+use async_trait::async_trait;
+use chrono::{DateTime, Utc};
+use std::time::Duration;
+
+use crate::error::Result;
+
+/// A held leadership lease. `fence_token` strictly increases on every
+/// (re)acquisition so a superseded holder can be detected at the write boundary.
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub struct Lease {
+    pub name: String,
+    pub holder_id: String,
+    pub fence_token: i64,
+    pub expires_at: DateTime<Utc>,
+}
+
+/// Coordinates single-owner background work across replicas. `renew` runs on its
+/// own timer concurrent with the protected work; a `false` return means the lease
+/// was lost. `verify_held` is the mandatory fence check the holder runs
+/// immediately before any state-mutating write.
+#[async_trait]
+pub trait LeaderElector: Send + Sync {
+    async fn try_acquire(&self, ttl: Duration) -> Result<Option<Lease>>;
+    async fn renew(&self, lease: &Lease, ttl: Duration) -> Result<bool>;
+    async fn verify_held(&self, lease: &Lease) -> Result<bool>;
+    async fn release(&self, lease: Lease) -> Result<()>;
+}
+
+/// Single-process elector: the only replica is always the leader. Used on the
+/// filesystem backend, where no shared coordination store exists.
+pub struct AlwaysLeader {
+    name: String,
+    holder_id: String,
+}
+
+impl AlwaysLeader {
+    pub fn new(name: impl Into<String>, holder_id: impl Into<String>) -> Self {
+        Self {
+            name: name.into(),
+            holder_id: holder_id.into(),
+        }
+    }
+
+    fn lease(&self) -> Lease {
+        Lease {
+            name: self.name.clone(),
+            holder_id: self.holder_id.clone(),
+            fence_token: 0,
+            expires_at: DateTime::<Utc>::MAX_UTC,
+        }
+    }
+}
+
+#[async_trait]
+impl LeaderElector for AlwaysLeader {
+    async fn try_acquire(&self, _ttl: Duration) -> Result<Option<Lease>> {
+        Ok(Some(self.lease()))
+    }
+
+    async fn renew(&self, _lease: &Lease, _ttl: Duration) -> Result<bool> {
+        Ok(true)
+    }
+
+    async fn verify_held(&self, _lease: &Lease) -> Result<bool> {
+        Ok(true)
+    }
+
+    async fn release(&self, _lease: Lease) -> Result<()> {
+        Ok(())
+    }
+}
+
+#[cfg(all(test, not(any(feature = "integration", feature = "e2e"))))]
+mod tests {
+    use super::*;
+
+    #[tokio::test]
+    async fn always_leader_acquires_renews_and_verifies() {
+        let elector = AlwaysLeader::new("canonicalization", "single-process");
+        let lease = elector
+            .try_acquire(Duration::from_secs(30))
+            .await
+            .unwrap()
+            .expect("always leader acquires");
+        assert_eq!(lease.holder_id, "single-process");
+        assert!(
+            elector
+                .renew(&lease, Duration::from_secs(30))
+                .await
+                .unwrap()
+        );
+        assert!(elector.verify_held(&lease).await.unwrap());
+    }
+}
diff --git a/crates/server/src/coordination/mod.rs b/crates/server/src/coordination/mod.rs
new file mode 100644
index 00000000..8a69d797
--- /dev/null
+++ b/crates/server/src/coordination/mod.rs
@@ -0,0 +1,103 @@
+pub mod challenge_store;
+pub mod leader;
+#[cfg(feature = "postgres")]
+pub mod postgres;
+pub mod session_store;
+
+pub use challenge_store::{
+    ChallengePayload, ChallengeStore, InMemoryChallengeStore, StoredChallenge,
+};
+pub use leader::{AlwaysLeader, LeaderElector, Lease};
+pub use session_store::{
+    InMemorySessionStore, SessionKey, SessionStore, SessionSubject, StoredSession,
+};
+
+use std::sync::Arc;
+
+/// Whether coordination is backed by the shared external store (replica-safe) or
+/// is single-process in-memory. Carried on the handles so the startup log and
+/// guards reflect the **actual** resolved backing, not an inference.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum CoordinationMode {
+    Shared,
+    SingleProcess,
+}
+
+impl CoordinationMode {
+    pub fn as_str(self) -> &'static str {
+        match self {
+            CoordinationMode::Shared => "shared",
+            CoordinationMode::SingleProcess => "single-process",
+        }
+    }
+}
+
+/// Lease name for the single-owner canonicalization worker.
+pub const CANONICALIZATION_LEASE: &str = "canonicalization";
+
+/// Coordination store handles selected by the storage backend, threaded from the
+/// storage builder (where the Postgres pool is available) into the realm-scoped
+/// consumers.
+#[derive(Clone)]
+pub struct CoordinationHandles {
+    pub mode: CoordinationMode,
+    pub operator_sessions: Arc<dyn SessionStore>,
+    pub operator_challenges: Arc<dyn ChallengeStore>,
+    pub leader: Arc<dyn LeaderElector>,
+    #[cfg(feature = "evm")]
+    pub evm_sessions: Arc<dyn SessionStore>,
+    #[cfg(feature = "evm")]
+    pub evm_challenges: Arc<dyn ChallengeStore>,
+}
+
+impl CoordinationHandles {
+    pub fn in_memory() -> Self {
+        Self {
+            mode: CoordinationMode::SingleProcess,
+            operator_sessions: Arc::new(InMemorySessionStore::new()),
+            operator_challenges: Arc::new(InMemoryChallengeStore::new()),
+            leader: Arc::new(AlwaysLeader::new(CANONICALIZATION_LEASE, "single-process")),
+            #[cfg(feature = "evm")]
+            evm_sessions: Arc::new(InMemorySessionStore::new()),
+            #[cfg(feature = "evm")]
+            evm_challenges: Arc::new(InMemoryChallengeStore::new()),
+        }
+    }
+
+    #[cfg(feature = "postgres")]
+    pub fn postgres(
+        pool: diesel_async::pooled_connection::deadpool::Pool<diesel_async::AsyncPgConnection>,
+        holder_id: String,
+    ) -> Self {
+        use postgres::{PgChallengeStore, PgLeaseElector, PgSessionStore};
+        Self {
+            mode: CoordinationMode::Shared,
+            operator_sessions: Arc::new(PgSessionStore::new(pool.clone(), Realm::Operator)),
+            operator_challenges: Arc::new(PgChallengeStore::new(pool.clone(), Realm::Operator)),
+            leader: Arc::new(PgLeaseElector::new(
+                pool.clone(),
+                CANONICALIZATION_LEASE,
+                holder_id,
+            )),
+            #[cfg(feature = "evm")]
+            evm_sessions: Arc::new(PgSessionStore::new(pool.clone(), Realm::Evm)),
+            #[cfg(feature = "evm")]
+            evm_challenges: Arc::new(PgChallengeStore::new(pool, Realm::Evm)),
+        }
+    }
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum Realm {
+    Operator,
+    Evm,
+}
+
+impl Realm {
+    pub fn as_str(self) -> &'static str {
+        match self {
+            Realm::Operator => "operator",
+            Realm::Evm => "evm",
+        }
+    }
+}
diff --git a/crates/server/src/coordination/postgres/challenge_store.rs b/crates/server/src/coordination/postgres/challenge_store.rs
new file mode 100644
index 00000000..2f24dbd7
--- /dev/null
+++ b/crates/server/src/coordination/postgres/challenge_store.rs
@@ -0,0 +1,279 @@
+use async_trait::async_trait;
+use chrono::{DateTime, Utc};
+use diesel::prelude::*;
+use diesel::sql_types::{BigInt, Double, Jsonb, Text};
+use diesel_async::pooled_connection::deadpool::Pool;
+use diesel_async::scoped_futures::ScopedFutureExt;
+use diesel_async::{AsyncConnection, AsyncPgConnection, RunQueryDsl};
+
+use crate::coordination::Realm;
+use crate::coordination::challenge_store::{ChallengePayload, ChallengeStore, StoredChallenge};
+use crate::error::{GuardianError, Result};
+use crate::schema::auth_challenges;
+
+#[derive(Queryable, Selectable)]
+#[diesel(table_name = auth_challenges)]
+#[diesel(check_for_backend(diesel::pg::Pg))]
+#[allow(dead_code)]
+struct AuthChallengeRow {
+    realm: String,
+    challenge_key: String,
+    principal: String,
+    payload: serde_json::Value,
+    issued_at: DateTime<Utc>,
+    expires_at: DateTime<Utc>,
+    consumed_at: Option<DateTime<Utc>>,
+}
+
+impl AuthChallengeRow {
+    fn into_stored(self) -> Result<StoredChallenge> {
+        Ok(StoredChallenge {
+            key: self.challenge_key,
+            payload: ChallengePayload::from_value(&self.payload)?,
+            issued_at: self.issued_at,
+            expires_at: self.expires_at,
+        })
+    }
+}
+
+/// Postgres-backed [`ChallengeStore`] bound to one realm. Verification matches
+/// in Rust over [`ChallengeStore::active_for`]; [`ChallengeStore::consume`] is an
+/// atomic single-use claim keyed by `(realm, challenge_key)`.
+pub struct PgChallengeStore {
+    pool: Pool<AsyncPgConnection>,
+    realm: Realm,
+}
+
+impl PgChallengeStore {
+    pub fn new(pool: Pool<AsyncPgConnection>, realm: Realm) -> Self {
+        Self { pool, realm }
+    }
+}
+
+#[async_trait]
+impl ChallengeStore for PgChallengeStore {
+    async fn issue(
+        &self,
+        principal: &str,
+        challenge: StoredChallenge,
+        max_outstanding: usize,
+        _now: DateTime<Utc>,
+    ) -> Result<()> {
+        let mut conn = super::checkout(&self.pool, "challenge").await?;
+
+        let realm = self.realm.as_str().to_string();
+        let principal = principal.to_string();
+        let challenge_key = challenge.key;
+        let payload = challenge.payload.to_value();
+        // The duration is clock-independent (both ends app-computed); anchoring on
+        // the DB clock for the stored row keeps expiry/capping consistent across
+        // replicas regardless of per-process clock skew.
+        let ttl_secs = (challenge.expires_at - challenge.issued_at)
+            .num_seconds()
+            .max(0) as f64;
+        let max = max_outstanding as i64;
+        let lock_key = format!("{realm}|{principal}");
+
+        conn.transaction::<(), diesel::result::Error, _>(|conn| {
+            async move {
+                // Serialize concurrent issuance for this (realm, principal) so the
+                // insert + cap-trim below sees a consistent row set; without it two
+                // racing issues can each trim to `max` independently and leave
+                // `max + 1` outstanding. The xact lock auto-releases at commit and
+                // only contends per principal, not across the table.
+                diesel::sql_query("SELECT pg_advisory_xact_lock(hashtextextended($1, 0))")
+                    .bind::<Text, _>(&lock_key)
+                    .execute(conn)
+                    .await?;
+
+                // ON CONFLICT refreshes a re-issued challenge (latest wins,
+                // re-arming consumed_at) rather than aborting the transaction on
+                // a duplicate `(realm, challenge_key)`. Keys are random nonces /
+                // unique digests so a collision is practically a re-issue; this
+                // matches InMemoryChallengeStore, which tolerates re-issue.
+                diesel::sql_query(
+                    "INSERT INTO auth_challenges \
+                     (realm, challenge_key, principal, payload, issued_at, expires_at) \
+                     VALUES ($1, $2, $3, $4, now(), now() + make_interval(secs => $5)) \
+                     ON CONFLICT (realm, challenge_key) DO UPDATE SET \
+                         principal = EXCLUDED.principal, \
+                         payload = EXCLUDED.payload, \
+                         issued_at = EXCLUDED.issued_at, \
+                         expires_at = EXCLUDED.expires_at, \
+                         consumed_at = NULL",
+                )
+                .bind::<Text, _>(&realm)
+                .bind::<Text, _>(&challenge_key)
+                .bind::<Text, _>(&principal)
+                .bind::<Jsonb, _>(&payload)
+                .bind::<Double, _>(ttl_secs)
+                .execute(conn)
+                .await?;
+
+                diesel::sql_query(
+                    "DELETE FROM auth_challenges \
+                     WHERE realm = $1 AND principal = $2 AND expires_at < now()",
+                )
+                .bind::<Text, _>(&realm)
+                .bind::<Text, _>(&principal)
+                .execute(conn)
+                .await?;
+
+                diesel::sql_query(
+                    "DELETE FROM auth_challenges WHERE ctid IN (\
+                     SELECT ctid FROM auth_challenges \
+                     WHERE realm = $1 AND principal = $2 \
+                     ORDER BY issued_at DESC OFFSET $3)",
+                )
+                .bind::<Text, _>(&realm)
+                .bind::<Text, _>(&principal)
+                .bind::<BigInt, _>(max)
+                .execute(conn)
+                .await?;
+
+                Ok(())
+            }
+            .scope_boxed()
+        })
+        .await
+        .map_err(|error| GuardianError::StorageError(format!("challenge issue: {error}")))?;
+
+        Ok(())
+    }
+
+    async fn active_for(
+        &self,
+        principal: &str,
+        _now: DateTime<Utc>,
+    ) -> Result<Vec<StoredChallenge>> {
+        let mut conn = super::checkout(&self.pool, "challenge").await?;
+        let rows = auth_challenges::table
+            .filter(auth_challenges::realm.eq(self.realm.as_str()))
+            .filter(auth_challenges::principal.eq(principal))
+            .filter(auth_challenges::consumed_at.is_null())
+            .filter(auth_challenges::expires_at.gt(diesel::dsl::now))
+            .select(AuthChallengeRow::as_select())
+            .load(&mut conn)
+            .await
+            .map_err(|error| GuardianError::StorageError(format!("challenge load: {error}")))?;
+        rows.into_iter()
+            .map(AuthChallengeRow::into_stored)
+            .collect()
+    }
+
+    async fn consume(&self, principal: &str, key: &str, _now: DateTime<Utc>) -> Result<bool> {
+        let mut conn = super::checkout(&self.pool, "challenge").await?;
+        // `principal` is part of the predicate (not just `(realm, key)`) so the
+        // Postgres and in-memory impls agree that a wrong-principal consume fails.
+        let affected = diesel::update(auth_challenges::table)
+            .filter(auth_challenges::realm.eq(self.realm.as_str()))
+            .filter(auth_challenges::principal.eq(principal))
+            .filter(auth_challenges::challenge_key.eq(key))
+            .filter(auth_challenges::consumed_at.is_null())
+            .filter(auth_challenges::expires_at.gt(diesel::dsl::now))
+            .set(auth_challenges::consumed_at.eq(diesel::dsl::now))
+            .execute(&mut conn)
+            .await
+            .map_err(|error| GuardianError::StorageError(format!("challenge consume: {error}")))?;
+        Ok(affected == 1)
+    }
+
+    async fn sweep_expired(&self, _now: DateTime<Utc>) -> Result<u64> {
+        let mut conn = super::checkout(&self.pool, "challenge").await?;
+        let deleted = diesel::delete(auth_challenges::table)
+            .filter(auth_challenges::realm.eq(self.realm.as_str()))
+            .filter(auth_challenges::expires_at.lt(diesel::dsl::now))
+            .execute(&mut conn)
+            .await
+            .map_err(|error| GuardianError::StorageError(format!("challenge sweep: {error}")))?;
+        Ok(deleted as u64)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::storage::postgres::{build_postgres_pool_lazy, run_migrations};
+    use chrono::Duration;
+
+    fn database_url() -> Option<String> {
+        std::env::var("DATABASE_URL")
+            .ok()
+            .filter(|url| !url.trim().is_empty())
+    }
+
+    #[tokio::test]
+    async fn active_for_fails_closed_when_store_unreachable() {
+        let pool = build_postgres_pool_lazy("postgresql://127.0.0.1:1/__guardian_coord_fault__", 1)
+            .expect("lazy pool builds even with an unreachable address");
+        let store = PgChallengeStore::new(pool, Realm::Operator);
+        assert!(
+            store.active_for("0xprincipal", Utc::now()).await.is_err(),
+            "challenge lookup must fail closed when the store is unreachable",
+        );
+    }
+
+    #[tokio::test]
+    #[ignore = "requires DATABASE_URL with migrations applied"]
+    async fn challenge_is_single_use_across_replicas() {
+        let url = database_url().expect("DATABASE_URL must be set for this #[ignore] test");
+        run_migrations(&url).await.expect("migrations apply");
+        let replica_a = PgChallengeStore::new(
+            build_postgres_pool_lazy(&url, 2).expect("pool a"),
+            Realm::Evm,
+        );
+        let replica_b = PgChallengeStore::new(
+            build_postgres_pool_lazy(&url, 2).expect("pool b"),
+            Realm::Evm,
+        );
+        let now = Utc::now();
+        let stamp = now.timestamp_micros();
+        let principal = format!("0xprincipal-{stamp}");
+        let key = format!("nonce-{stamp}");
+
+        replica_a
+            .issue(
+                &principal,
+                StoredChallenge {
+                    key: key.clone(),
+                    payload: ChallengePayload::EvmChallenge {
+                        address: principal.clone(),
+                        nonce: key.clone(),
+                        issued_at: now,
+                        expires_at: now + Duration::minutes(5),
+                    },
+                    issued_at: now,
+                    expires_at: now + Duration::minutes(5),
+                },
+                8,
+                now,
+            )
+            .await
+            .expect("issue on replica A");
+
+        assert!(
+            replica_b
+                .active_for(&principal, now)
+                .await
+                .expect("active_for on B")
+                .iter()
+                .any(|challenge| challenge.key == key),
+            "a challenge issued on A must be visible on B",
+        );
+
+        assert!(
+            replica_b
+                .consume(&principal, &key, now)
+                .await
+                .expect("consume on B"),
+            "first consume wins on replica B",
+        );
+        assert!(
+            !replica_a
+                .consume(&principal, &key, now)
+                .await
+                .expect("replay consume on A"),
+            "single-use: a replay on replica A must lose",
+        );
+    }
+}
diff --git a/crates/server/src/coordination/postgres/lease.rs b/crates/server/src/coordination/postgres/lease.rs
new file mode 100644
index 00000000..a86f7840
--- /dev/null
+++ b/crates/server/src/coordination/postgres/lease.rs
@@ -0,0 +1,213 @@
+use std::time::Duration;
+
+use async_trait::async_trait;
+use chrono::{DateTime, Utc};
+use diesel::OptionalExtension;
+use diesel::sql_types::{BigInt, Double, Integer, Text, Timestamptz};
+use diesel_async::pooled_connection::deadpool::Pool;
+use diesel_async::{AsyncPgConnection, RunQueryDsl};
+
+use crate::coordination::leader::{LeaderElector, Lease};
+use crate::error::{GuardianError, Result};
+
+#[derive(diesel::QueryableByName)]
+struct AcquireRow {
+    #[diesel(sql_type = BigInt)]
+    fence_token: i64,
+    #[diesel(sql_type = Timestamptz)]
+    expires_at: DateTime<Utc>,
+}
+
+#[derive(diesel::QueryableByName)]
+struct HeldRow {
+    #[diesel(sql_type = Integer)]
+    #[allow(dead_code)]
+    held: i32,
+}
+
+/// Postgres lease elector backed by one `worker_leases` row. All timing uses the
+/// database clock so replicas agree. `fence_token` only advances when ownership
+/// changes (a steal), so a holder can detect supersession at its write boundary.
+pub struct PgLeaseElector {
+    pool: Pool<AsyncPgConnection>,
+    lease_name: String,
+    holder_id: String,
+}
+
+impl PgLeaseElector {
+    pub fn new(
+        pool: Pool<AsyncPgConnection>,
+        lease_name: impl Into<String>,
+        holder_id: impl Into<String>,
+    ) -> Self {
+        Self {
+            pool,
+            lease_name: lease_name.into(),
+            holder_id: holder_id.into(),
+        }
+    }
+}
+
+#[async_trait]
+impl LeaderElector for PgLeaseElector {
+    async fn try_acquire(&self, ttl: Duration) -> Result<Option<Lease>> {
+        let mut conn = super::checkout(&self.pool, "lease").await?;
+        let row = diesel::sql_query(
+            "INSERT INTO worker_leases \
+             (lease_name, holder_id, acquired_at, renewed_at, expires_at, fence_token) \
+             VALUES ($1, $2, now(), now(), now() + make_interval(secs => $3), 0) \
+             ON CONFLICT (lease_name) DO UPDATE SET \
+                 holder_id = EXCLUDED.holder_id, \
+                 acquired_at = CASE WHEN worker_leases.holder_id = EXCLUDED.holder_id \
+                     THEN worker_leases.acquired_at ELSE now() END, \
+                 renewed_at = now(), \
+                 expires_at = now() + make_interval(secs => $3), \
+                 fence_token = CASE WHEN worker_leases.holder_id = EXCLUDED.holder_id \
+                     THEN worker_leases.fence_token ELSE worker_leases.fence_token + 1 END \
+             WHERE worker_leases.expires_at < now() \
+                OR worker_leases.holder_id = EXCLUDED.holder_id \
+             RETURNING fence_token, expires_at",
+        )
+        .bind::<Text, _>(&self.lease_name)
+        .bind::<Text, _>(&self.holder_id)
+        .bind::<Double, _>(ttl.as_secs_f64())
+        .get_result::<AcquireRow>(&mut conn)
+        .await
+        .optional()
+        .map_err(|error| GuardianError::StorageError(format!("lease acquire: {error}")))?;
+
+        Ok(row.map(|row| Lease {
+            name: self.lease_name.clone(),
+            holder_id: self.holder_id.clone(),
+            fence_token: row.fence_token,
+            expires_at: row.expires_at,
+        }))
+    }
+
+    async fn renew(&self, lease: &Lease, ttl: Duration) -> Result<bool> {
+        let mut conn = super::checkout(&self.pool, "lease").await?;
+        let affected = diesel::sql_query(
+            "UPDATE worker_leases SET renewed_at = now(), expires_at = now() + make_interval(secs => $1) \
+             WHERE lease_name = $2 AND holder_id = $3 AND fence_token = $4 AND now() < expires_at",
+        )
+        .bind::<Double, _>(ttl.as_secs_f64())
+        .bind::<Text, _>(&lease.name)
+        .bind::<Text, _>(&lease.holder_id)
+        .bind::<BigInt, _>(lease.fence_token)
+        .execute(&mut conn)
+        .await
+        .map_err(|error| GuardianError::StorageError(format!("lease renew: {error}")))?;
+        Ok(affected == 1)
+    }
+
+    async fn verify_held(&self, lease: &Lease) -> Result<bool> {
+        let mut conn = super::checkout(&self.pool, "lease").await?;
+        let row = diesel::sql_query(
+            "SELECT 1 AS held FROM worker_leases \
+             WHERE lease_name = $1 AND holder_id = $2 AND fence_token = $3 AND now() < expires_at",
+        )
+        .bind::<Text, _>(&lease.name)
+        .bind::<Text, _>(&lease.holder_id)
+        .bind::<BigInt, _>(lease.fence_token)
+        .get_result::<HeldRow>(&mut conn)
+        .await
+        .optional()
+        .map_err(|error| GuardianError::StorageError(format!("lease verify: {error}")))?;
+        Ok(row.is_some())
+    }
+
+    async fn release(&self, lease: Lease) -> Result<()> {
+        let mut conn = super::checkout(&self.pool, "lease").await?;
+        // Expire the lease in place instead of deleting the row, so `fence_token`
+        // survives and keeps advancing monotonically on the next steal. A DELETE
+        // would let a fresh acquire re-INSERT `fence_token = 0`, after which a
+        // stale `Lease { fence_token: 0 }` from a long-gone holder could pass
+        // `verify_held` again.
+        diesel::sql_query(
+            "UPDATE worker_leases SET expires_at = now() \
+             WHERE lease_name = $1 AND holder_id = $2 AND fence_token = $3",
+        )
+        .bind::<Text, _>(&lease.name)
+        .bind::<Text, _>(&lease.holder_id)
+        .bind::<BigInt, _>(lease.fence_token)
+        .execute(&mut conn)
+        .await
+        .map_err(|error| GuardianError::StorageError(format!("lease release: {error}")))?;
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::storage::postgres::{build_postgres_pool_lazy, run_migrations};
+
+    fn database_url() -> Option<String> {
+        std::env::var("DATABASE_URL")
+            .ok()
+            .filter(|url| !url.trim().is_empty())
+    }
+
+    #[tokio::test]
+    async fn try_acquire_fails_closed_when_unreachable() {
+        let pool = build_postgres_pool_lazy("postgresql://127.0.0.1:1/__guardian_lease_fault__", 1)
+            .expect("lazy pool builds even with an unreachable address");
+        let elector = PgLeaseElector::new(pool, "canonicalization", "replica-a");
+        assert!(
+            elector.try_acquire(Duration::from_secs(30)).await.is_err(),
+            "lease acquire must surface an error (not a false None) when unreachable",
+        );
+    }
+
+    #[tokio::test]
+    #[ignore = "requires DATABASE_URL with migrations applied"]
+    async fn single_owner_failover_fences_the_old_holder() {
+        let url = database_url().expect("DATABASE_URL must be set for this #[ignore] test");
+        run_migrations(&url).await.expect("migrations apply");
+        let name = format!("canon-test-{}", Utc::now().timestamp_micros());
+        let short_ttl = Duration::from_secs(1);
+        let ttl = Duration::from_secs(60);
+        let a = PgLeaseElector::new(
+            build_postgres_pool_lazy(&url, 2).unwrap(),
+            &name,
+            "replica-a",
+        );
+        let b = PgLeaseElector::new(
+            build_postgres_pool_lazy(&url, 2).unwrap(),
+            &name,
+            "replica-b",
+        );
+
+        let lease_a = a
+            .try_acquire(short_ttl)
+            .await
+            .expect("acquire A")
+            .expect("A becomes the single owner");
+        // While A holds an unexpired lease, B cannot acquire.
+        assert!(
+            b.try_acquire(ttl).await.expect("B attempt").is_none(),
+            "only one replica may hold the lease",
+        );
+
+        // A crashes (stops renewing); after the TTL elapses B steals the expired
+        // lease, which advances the fence token (change of holder).
+        tokio::time::sleep(Duration::from_millis(1200)).await;
+        let lease_b = b
+            .try_acquire(ttl)
+            .await
+            .expect("acquire B")
+            .expect("B takes over the expired lease");
+        assert!(
+            lease_b.fence_token > lease_a.fence_token,
+            "a steal must advance the fence token",
+        );
+
+        // The superseded holder A can neither renew nor pass its fence check;
+        // only the current holder B is verified.
+        assert!(!a.renew(&lease_a, ttl).await.expect("A stale renew"));
+        assert!(!a.verify_held(&lease_a).await.expect("A stale verify"));
+        assert!(b.verify_held(&lease_b).await.expect("B verify"));
+
+        b.release(lease_b).await.expect("cleanup");
+    }
+}
diff --git a/crates/server/src/coordination/postgres/mod.rs b/crates/server/src/coordination/postgres/mod.rs
new file mode 100644
index 00000000..abc007d9
--- /dev/null
+++ b/crates/server/src/coordination/postgres/mod.rs
@@ -0,0 +1,23 @@
+pub mod challenge_store;
+pub mod lease;
+pub mod session_store;
+
+pub use challenge_store::PgChallengeStore;
+pub use lease::PgLeaseElector;
+pub use session_store::PgSessionStore;
+
+use diesel_async::AsyncPgConnection;
+use diesel_async::pooled_connection::deadpool::{Object, Pool};
+
+use crate::error::{GuardianError, Result};
+
+/// Check out a pooled connection, mapping checkout failure to the fail-closed
+/// `StorageError` surface. `context` labels the call site in the error message.
+async fn checkout(
+    pool: &Pool<AsyncPgConnection>,
+    context: &str,
+) -> Result<Object<AsyncPgConnection>> {
+    pool.get()
+        .await
+        .map_err(|error| GuardianError::StorageError(format!("{context} pool: {error}")))
+}
diff --git a/crates/server/src/coordination/postgres/session_store.rs b/crates/server/src/coordination/postgres/session_store.rs
new file mode 100644
index 00000000..c7b0dac9
--- /dev/null
+++ b/crates/server/src/coordination/postgres/session_store.rs
@@ -0,0 +1,216 @@
+use async_trait::async_trait;
+use chrono::{DateTime, Utc};
+use diesel::prelude::*;
+use diesel_async::pooled_connection::deadpool::Pool;
+use diesel_async::{AsyncPgConnection, RunQueryDsl};
+
+use crate::coordination::Realm;
+use crate::coordination::session_store::{SessionKey, SessionStore, SessionSubject, StoredSession};
+use crate::error::{GuardianError, Result};
+use crate::schema::auth_sessions;
+
+#[derive(Insertable)]
+#[diesel(table_name = auth_sessions)]
+struct NewAuthSession {
+    token_digest: Vec<u8>,
+    realm: String,
+    subject: serde_json::Value,
+    issued_at: DateTime<Utc>,
+    expires_at: DateTime<Utc>,
+}
+
+#[derive(Queryable, Selectable)]
+#[diesel(table_name = auth_sessions)]
+#[diesel(check_for_backend(diesel::pg::Pg))]
+#[allow(dead_code)]
+struct AuthSessionRow {
+    token_digest: Vec<u8>,
+    realm: String,
+    subject: serde_json::Value,
+    issued_at: DateTime<Utc>,
+    expires_at: DateTime<Utc>,
+    revoked_at: Option<DateTime<Utc>>,
+}
+
+impl AuthSessionRow {
+    fn into_stored(self) -> Result<StoredSession> {
+        let subject: SessionSubject = serde_json::from_value(self.subject).map_err(|error| {
+            GuardianError::StorageError(format!("session subject decode: {error}"))
+        })?;
+        Ok(StoredSession {
+            subject,
+            issued_at: self.issued_at,
+            expires_at: self.expires_at,
+        })
+    }
+}
+
+/// Postgres-backed [`SessionStore`] bound to one realm. Expiry and revocation
+/// use the database clock so every replica agrees. Any DB error surfaces as a
+/// `StorageError`, which the auth path treats as fail-closed.
+pub struct PgSessionStore {
+    pool: Pool<AsyncPgConnection>,
+    realm: Realm,
+}
+
+impl PgSessionStore {
+    pub fn new(pool: Pool<AsyncPgConnection>, realm: Realm) -> Self {
+        Self { pool, realm }
+    }
+}
+
+#[async_trait]
+impl SessionStore for PgSessionStore {
+    async fn insert(&self, key: SessionKey, session: StoredSession) -> Result<()> {
+        let mut conn = super::checkout(&self.pool, "session").await?;
+        let subject = serde_json::to_value(&session.subject).map_err(|error| {
+            GuardianError::StorageError(format!("session subject encode: {error}"))
+        })?;
+        let row = NewAuthSession {
+            token_digest: key.to_vec(),
+            realm: self.realm.as_str().to_string(),
+            subject,
+            issued_at: session.issued_at,
+            expires_at: session.expires_at,
+        };
+        // Upsert: a digest collision (astronomically unlikely) or a re-insert
+        // over an unswept revoked row replaces it with the fresh, unrevoked
+        // session rather than erroring.
+        diesel::insert_into(auth_sessions::table)
+            .values(&row)
+            .on_conflict((auth_sessions::realm, auth_sessions::token_digest))
+            .do_update()
+            .set((
+                auth_sessions::realm.eq(self.realm.as_str()),
+                auth_sessions::subject.eq(&row.subject),
+                auth_sessions::issued_at.eq(session.issued_at),
+                auth_sessions::expires_at.eq(session.expires_at),
+                auth_sessions::revoked_at.eq(None::<DateTime<Utc>>),
+            ))
+            .execute(&mut conn)
+            .await
+            .map_err(|error| GuardianError::StorageError(format!("session insert: {error}")))?;
+        Ok(())
+    }
+
+    async fn get(&self, key: &SessionKey, _now: DateTime<Utc>) -> Result<Option<StoredSession>> {
+        let mut conn = super::checkout(&self.pool, "session").await?;
+        let row = auth_sessions::table
+            .filter(auth_sessions::token_digest.eq(key.to_vec()))
+            .filter(auth_sessions::realm.eq(self.realm.as_str()))
+            .filter(auth_sessions::revoked_at.is_null())
+            .filter(auth_sessions::expires_at.gt(diesel::dsl::now))
+            .select(AuthSessionRow::as_select())
+            .first(&mut conn)
+            .await
+            .optional()
+            .map_err(|error| GuardianError::StorageError(format!("session lookup: {error}")))?;
+        row.map(AuthSessionRow::into_stored).transpose()
+    }
+
+    async fn revoke(&self, key: &SessionKey) -> Result<Option<StoredSession>> {
+        let mut conn = super::checkout(&self.pool, "session").await?;
+        let row = diesel::update(auth_sessions::table)
+            .filter(auth_sessions::token_digest.eq(key.to_vec()))
+            .filter(auth_sessions::realm.eq(self.realm.as_str()))
+            .filter(auth_sessions::revoked_at.is_null())
+            .set(auth_sessions::revoked_at.eq(diesel::dsl::now))
+            .returning(AuthSessionRow::as_returning())
+            .get_result(&mut conn)
+            .await
+            .optional()
+            .map_err(|error| GuardianError::StorageError(format!("session revoke: {error}")))?;
+        row.map(AuthSessionRow::into_stored).transpose()
+    }
+
+    async fn sweep_expired(&self, _now: DateTime<Utc>) -> Result<u64> {
+        let mut conn = super::checkout(&self.pool, "session").await?;
+        let deleted = diesel::delete(auth_sessions::table)
+            .filter(auth_sessions::realm.eq(self.realm.as_str()))
+            .filter(auth_sessions::expires_at.lt(diesel::dsl::now))
+            .execute(&mut conn)
+            .await
+            .map_err(|error| GuardianError::StorageError(format!("session sweep: {error}")))?;
+        Ok(deleted as u64)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::storage::postgres::{build_postgres_pool_lazy, run_migrations};
+    use chrono::Duration;
+
+    fn database_url() -> Option<String> {
+        std::env::var("DATABASE_URL")
+            .ok()
+            .filter(|url| !url.trim().is_empty())
+    }
+
+    fn unique_key(now: DateTime<Utc>) -> SessionKey {
+        let mut key = [0u8; 32];
+        key[..16].copy_from_slice(&now.timestamp_micros().to_le_bytes().repeat(2)[..16]);
+        key
+    }
+
+    #[tokio::test]
+    async fn get_fails_closed_when_store_unreachable() {
+        let pool = build_postgres_pool_lazy("postgresql://127.0.0.1:1/__guardian_coord_fault__", 1)
+            .expect("lazy pool builds even with an unreachable address");
+        let store = PgSessionStore::new(pool, Realm::Operator);
+        assert!(
+            store.get(&[7u8; 32], Utc::now()).await.is_err(),
+            "session lookup must fail closed when the store is unreachable",
+        );
+    }
+
+    #[tokio::test]
+    #[ignore = "requires DATABASE_URL with migrations applied"]
+    async fn session_visible_across_replicas_and_revoke_propagates() {
+        let url = database_url().expect("DATABASE_URL must be set for this #[ignore] test");
+        run_migrations(&url).await.expect("migrations apply");
+        let replica_a = PgSessionStore::new(
+            build_postgres_pool_lazy(&url, 2).expect("pool a"),
+            Realm::Operator,
+        );
+        let replica_b = PgSessionStore::new(
+            build_postgres_pool_lazy(&url, 2).expect("pool b"),
+            Realm::Operator,
+        );
+        let now = Utc::now();
+        let key = unique_key(now);
+
+        replica_a
+            .insert(
+                key,
+                StoredSession {
+                    subject: SessionSubject::Operator {
+                        operator_id: "op-x".to_string(),
+                        commitment: "0xc".to_string(),
+                    },
+                    issued_at: now,
+                    expires_at: now + Duration::hours(1),
+                },
+            )
+            .await
+            .expect("insert on replica A");
+
+        assert!(
+            replica_b.get(&key, now).await.expect("get on B").is_some(),
+            "a session written by replica A must be visible on replica B",
+        );
+
+        assert!(
+            replica_a.revoke(&key).await.expect("revoke on A").is_some(),
+            "revoke returns the prior session",
+        );
+        assert!(
+            replica_b
+                .get(&key, now)
+                .await
+                .expect("get on B after revoke")
+                .is_none(),
+            "revocation on A must be honored on B",
+        );
+    }
+}
diff --git a/crates/server/src/coordination/session_store.rs b/crates/server/src/coordination/session_store.rs
new file mode 100644
index 00000000..d517c6e4
--- /dev/null
+++ b/crates/server/src/coordination/session_store.rs
@@ -0,0 +1,167 @@
+use std::collections::HashMap;
+use std::sync::Arc;
+
+use async_trait::async_trait;
+use chrono::{DateTime, Utc};
+use serde::{Deserialize, Serialize};
+use tokio::sync::Mutex;
+
+use crate::error::Result;
+
+pub type SessionKey = [u8; 32];
+
+/// Realm-specific authenticated identity persisted with a session. Operator
+/// permissions are intentionally absent: they are re-resolved from the live
+/// allowlist on each request, so only the stable identity is stored.
+#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(tag = "realm", rename_all = "snake_case")]
+pub enum SessionSubject {
+    Operator {
+        operator_id: String,
+        commitment: String,
+    },
+    Evm {
+        address: String,
+    },
+}
+
+#[derive(Clone, Debug)]
+pub struct StoredSession {
+    pub subject: SessionSubject,
+    pub issued_at: DateTime<Utc>,
+    pub expires_at: DateTime<Utc>,
+}
+
+/// A store of authenticated sessions keyed by the SHA-256 digest of the session
+/// token. Each instance is bound to a single realm at construction (the Postgres
+/// implementation scopes its rows by that realm; the in-memory implementation is
+/// instance-scoped). Implementations expose only unexpired, unrevoked sessions;
+/// reclamation of expired rows is the job of [`SessionStore::sweep_expired`].
+#[async_trait]
+pub trait SessionStore: Send + Sync {
+    async fn insert(&self, key: SessionKey, session: StoredSession) -> Result<()>;
+    async fn get(&self, key: &SessionKey, now: DateTime<Utc>) -> Result<Option<StoredSession>>;
+    /// Revoke a session (logout), returning the prior session if present for
+    /// logout-side logging. The cross-replica contract: once revoked, `get` MUST
+    /// reject it on every replica until its natural expiry. The Postgres
+    /// implementation marks `revoked_at` and keeps the row until expiry; the
+    /// in-memory implementation removes it.
+    async fn revoke(&self, key: &SessionKey) -> Result<Option<StoredSession>>;
+    async fn sweep_expired(&self, now: DateTime<Utc>) -> Result<u64>;
+}
+
+#[derive(Clone, Default)]
+pub struct InMemorySessionStore {
+    sessions: Arc<Mutex<HashMap<SessionKey, StoredSession>>>,
+}
+
+impl InMemorySessionStore {
+    pub fn new() -> Self {
+        Self::default()
+    }
+}
+
+#[async_trait]
+impl SessionStore for InMemorySessionStore {
+    async fn insert(&self, key: SessionKey, session: StoredSession) -> Result<()> {
+        self.sessions.lock().await.insert(key, session);
+        Ok(())
+    }
+
+    async fn get(&self, key: &SessionKey, now: DateTime<Utc>) -> Result<Option<StoredSession>> {
+        Ok(self
+            .sessions
+            .lock()
+            .await
+            .get(key)
+            .filter(|session| session.expires_at > now)
+            .cloned())
+    }
+
+    async fn revoke(&self, key: &SessionKey) -> Result<Option<StoredSession>> {
+        Ok(self.sessions.lock().await.remove(key))
+    }
+
+    async fn sweep_expired(&self, now: DateTime<Utc>) -> Result<u64> {
+        let mut sessions = self.sessions.lock().await;
+        let before = sessions.len();
+        sessions.retain(|_, session| session.expires_at > now);
+        Ok((before - sessions.len()) as u64)
+    }
+}
+
+#[cfg(all(test, not(any(feature = "integration", feature = "e2e"))))]
+mod tests {
+    use super::*;
+    use chrono::Duration;
+
+    fn operator_session(now: DateTime<Utc>, ttl_secs: i64) -> StoredSession {
+        StoredSession {
+            subject: SessionSubject::Operator {
+                operator_id: "op-1".to_string(),
+                commitment: "0xabc".to_string(),
+            },
+            issued_at: now,
+            expires_at: now + Duration::seconds(ttl_secs),
+        }
+    }
+
+    #[tokio::test]
+    async fn get_returns_unexpired_and_hides_expired() {
+        let store = InMemorySessionStore::new();
+        let now = Utc::now();
+        store
+            .insert([1u8; 32], operator_session(now, 60))
+            .await
+            .unwrap();
+
+        assert!(store.get(&[1u8; 32], now).await.unwrap().is_some());
+        assert!(
+            store
+                .get(&[1u8; 32], now + Duration::seconds(61))
+                .await
+                .unwrap()
+                .is_none()
+        );
+    }
+
+    #[tokio::test]
+    async fn revoke_returns_record_then_absent() {
+        let store = InMemorySessionStore::new();
+        let now = Utc::now();
+        store
+            .insert([2u8; 32], operator_session(now, 60))
+            .await
+            .unwrap();
+
+        assert!(store.revoke(&[2u8; 32]).await.unwrap().is_some());
+        assert!(store.get(&[2u8; 32], now).await.unwrap().is_none());
+    }
+
+    #[tokio::test]
+    async fn sweep_reclaims_only_expired() {
+        let store = InMemorySessionStore::new();
+        let now = Utc::now();
+        store
+            .insert([3u8; 32], operator_session(now, 10))
+            .await
+            .unwrap();
+        store
+            .insert([4u8; 32], operator_session(now, 600))
+            .await
+            .unwrap();
+
+        let swept = store
+            .sweep_expired(now + Duration::seconds(60))
+            .await
+            .unwrap();
+        assert_eq!(swept, 1);
+        assert!(
+            store
+                .get(&[4u8; 32], now + Duration::seconds(60))
+                .await
+                .unwrap()
+                .is_some()
+        );
+    }
+}
diff --git a/crates/server/src/dashboard/state.rs b/crates/server/src/dashboard/state.rs
index a49ab312..578c6e54 100644
--- a/crates/server/src/dashboard/state.rs
+++ b/crates/server/src/dashboard/state.rs
@@ -1,10 +1,9 @@
-use std::collections::HashMap;
 use std::sync::Arc;
 
 use chrono::{DateTime, Utc};
 use guardian_shared::hex::{FromHex, IntoHex};
 use miden_protocol::crypto::dsa::falcon512_poseidon2::Signature;
-use tokio::sync::{Mutex, RwLock};
+use tokio::sync::RwLock;
 
 use super::allowlist::{
     AllowlistSource, OperatorAllowlist, OperatorAllowlistEntryInput, normalize_commitment,
@@ -13,35 +12,71 @@ use super::config::DashboardConfig;
 use super::cursor::CursorSecret;
 use super::types::{
     AuthenticatedOperator, IssuedOperatorSession, OperatorChallenge, OperatorChallengePayload,
-    OperatorSessionRecord, PendingChallenge,
 };
 use super::util::{cookie_date, correlation_id, random_hex, rate_limit_error};
+use crate::coordination::{
+    ChallengePayload, ChallengeStore, InMemoryChallengeStore, InMemorySessionStore, SessionStore,
+    SessionSubject, StoredChallenge, StoredSession,
+};
 use crate::error::{GuardianError, Result};
 use crate::middleware::rate_limit::RateLimitStore;
 use crate::network::NetworkType;
 use crate::secret::session_digest;
 
-#[derive(Clone, Debug)]
+#[derive(Clone)]
 pub struct DashboardState {
     config: DashboardConfig,
     allowlist_source: AllowlistSource,
     allowlist: Arc<RwLock<OperatorAllowlist>>,
-    challenges: Arc<Mutex<HashMap<String, Vec<PendingChallenge>>>>,
-    sessions: Arc<Mutex<HashMap<[u8; 32], OperatorSessionRecord>>>,
+    session_store: Arc<dyn SessionStore>,
+    challenge_store: Arc<dyn ChallengeStore>,
     commitment_rate_limits: RateLimitStore,
     cursor_secret: CursorSecret,
     cursor_secret_configured: bool,
     started_at: DateTime<Utc>,
 }
 
+impl std::fmt::Debug for DashboardState {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("DashboardState")
+            .field("config", &self.config)
+            .field("cursor_secret_configured", &self.cursor_secret_configured)
+            .field("started_at", &self.started_at)
+            .finish_non_exhaustive()
+    }
+}
+
 impl DashboardState {
     pub async fn from_env_for_network(
         network_type: NetworkType,
+    ) -> std::result::Result<Self, String> {
+        Self::from_env_for_network_with_stores(
+            network_type,
+            Arc::new(InMemorySessionStore::new()),
+            Arc::new(InMemoryChallengeStore::new()),
+        )
+        .await
+    }
+
+    /// Same as [`DashboardState::from_env_for_network`] but with explicit,
+    /// realm-bound coordination stores. The server builder passes shared
+    /// (Postgres) stores here on the Postgres backend; the default path uses
+    /// in-memory stores (single-process / dev).
+    pub async fn from_env_for_network_with_stores(
+        network_type: NetworkType,
+        session_store: Arc<dyn SessionStore>,
+        challenge_store: Arc<dyn ChallengeStore>,
     ) -> std::result::Result<Self, String> {
         let config = DashboardConfig::from_env_for_network(network_type)?;
         let allowlist_source = AllowlistSource::from_env().await?;
         let allowlist = allowlist_source.load().await?;
-        Self::from_allowlist_source(allowlist_source, allowlist, config)
+        Self::from_allowlist_source(
+            allowlist_source,
+            allowlist,
+            config,
+            session_store,
+            challenge_store,
+        )
     }
 
     pub fn for_tests(entries: Vec<(String, String)>) -> Self {
@@ -58,6 +93,8 @@ impl DashboardState {
             AllowlistSource::Static,
             allowlist,
             DashboardConfig::for_tests(),
+            Arc::new(InMemorySessionStore::new()),
+            Arc::new(InMemoryChallengeStore::new()),
         )
         .expect("dashboard test configuration should be valid")
     }
@@ -76,6 +113,8 @@ impl DashboardState {
             AllowlistSource::Static,
             allowlist,
             DashboardConfig::for_tests(),
+            Arc::new(InMemorySessionStore::new()),
+            Arc::new(InMemoryChallengeStore::new()),
         )
         .expect("dashboard test configuration should be valid")
     }
@@ -132,19 +171,20 @@ impl DashboardState {
             .is_some()
         {
             let expires_at = now + self.config.nonce_ttl;
-            let mut challenges = self.challenges.lock().await;
-            let pending = challenges.entry(normalized_commitment.clone()).or_default();
-            pending.retain(|challenge| challenge.expires_at > now);
-            pending.push(PendingChallenge {
-                signing_digest,
+            let challenge = StoredChallenge {
+                key: signing_digest.into_hex(),
+                payload: ChallengePayload::OperatorDigest(signing_digest),
                 issued_at: now,
                 expires_at,
-            });
-            if pending.len() > self.config.max_outstanding_challenges {
-                pending.sort_by_key(|challenge| challenge.issued_at);
-                let drain_len = pending.len() - self.config.max_outstanding_challenges;
-                pending.drain(0..drain_len);
-            }
+            };
+            self.challenge_store
+                .issue(
+                    &normalized_commitment,
+                    challenge,
+                    self.config.max_outstanding_challenges,
+                    now,
+                )
+                .await?;
 
             tracing::info!(
                 auth_event = "challenge_issued",
@@ -221,18 +261,16 @@ impl DashboardState {
             ));
         }
 
-        let mut challenges = self.challenges.lock().await;
-        let pending = challenges.entry(normalized_commitment.clone()).or_default();
-        pending.retain(|challenge| challenge.expires_at > now);
-
-        let matched_index = pending
-            .iter()
-            .position(|challenge| public_key.verify(challenge.signing_digest, &signature));
+        let active = self
+            .challenge_store
+            .active_for(&normalized_commitment, now)
+            .await?;
+        let matched = active.iter().find(|challenge| match &challenge.payload {
+            ChallengePayload::OperatorDigest(digest) => public_key.verify(*digest, &signature),
+            _ => false,
+        });
 
-        let Some(matched_index) = matched_index else {
-            if pending.is_empty() {
-                challenges.remove(&normalized_commitment);
-            }
+        let Some(matched) = matched else {
             tracing::warn!(
                 auth_event = "verify_failed",
                 correlation_id = %correlation_id,
@@ -244,34 +282,42 @@ impl DashboardState {
             ));
         };
 
-        pending.remove(matched_index);
-        if pending.is_empty() {
-            challenges.remove(&normalized_commitment);
+        if !self
+            .challenge_store
+            .consume(&normalized_commitment, &matched.key, now)
+            .await?
+        {
+            tracing::warn!(
+                auth_event = "verify_failed",
+                correlation_id = %correlation_id,
+                operator_id = %operator.operator_id,
+                "Operator verify rejected because the matched challenge was already consumed"
+            );
+            return Err(GuardianError::AuthenticationFailed(
+                "Invalid operator credentials".to_string(),
+            ));
         }
-        drop(challenges);
 
         let issued_at = now;
         let expires_at = now + self.config.session_ttl;
-        // Stash the freshly-resolved principal (identity + current
-        // permissions) into the session record. `authenticate_session`
-        // re-resolves permissions per request from the live allowlist
-        // anyway, so the copy held here is just a fallback used for
-        // logout-side logging.
         let operator_identity = operator.clone();
         let token = random_hex::<32>();
         let cookie_header = self.session_cookie_header(&token, issued_at, expires_at);
         let session_key = session_digest(&token);
 
-        let mut sessions = self.sessions.lock().await;
-        sessions.retain(|_, session| session.expires_at > now);
-        sessions.insert(
-            session_key,
-            OperatorSessionRecord {
-                operator: operator_identity.clone(),
-                issued_at,
-                expires_at,
-            },
-        );
+        self.session_store
+            .insert(
+                session_key,
+                StoredSession {
+                    subject: SessionSubject::Operator {
+                        operator_id: operator_identity.operator_id.clone(),
+                        commitment: normalized_commitment.clone(),
+                    },
+                    issued_at,
+                    expires_at,
+                },
+            )
+            .await?;
 
         tracing::info!(
             auth_event = "verify_success",
@@ -293,33 +339,39 @@ impl DashboardState {
         now: DateTime<Utc>,
     ) -> Result<AuthenticatedOperator> {
         self.refresh_allowlist().await?;
-        let mut sessions = self.sessions.lock().await;
-        sessions.retain(|_, session| session.expires_at > now);
 
         let session_key = session_digest(token);
-        let session = sessions.get(&session_key).cloned().ok_or_else(|| {
-            tracing::warn!(
-                auth_event = "session_rejected",
-                reason = "missing_or_expired",
-                "Operator session rejected"
-            );
-            GuardianError::AuthenticationFailed("Invalid operator session".to_string())
-        })?;
+        let session = self
+            .session_store
+            .get(&session_key, now)
+            .await?
+            .ok_or_else(|| {
+                tracing::warn!(
+                    auth_event = "session_rejected",
+                    reason = "missing_or_expired",
+                    "Operator session rejected"
+                );
+                GuardianError::AuthenticationFailed("Invalid operator session".to_string())
+            })?;
 
-        // Re-resolve the principal from the **live** allowlist snapshot
-        // rather than returning the (potentially stale) copy carried in
-        // the session record. This is the load-bearing wiring for
-        // feature 006-operator-authz FR-008 / SC-004: a permission
-        // grant or revocation written to the allowlist source takes
-        // effect on the next authenticated request without re-login.
-        let Some(live_operator) = self
-            .lookup_allowlisted_operator(&session.operator.commitment)
-            .await
+        let SessionSubject::Operator {
+            operator_id,
+            commitment,
+        } = &session.subject
         else {
-            sessions.remove(&session_key);
+            return Err(GuardianError::AuthenticationFailed(
+                "Invalid operator session".to_string(),
+            ));
+        };
+
+        // Re-resolve the principal from the **live** allowlist snapshot rather
+        // than the identity carried in the session record, so a permission grant
+        // or revocation takes effect on the next request without re-login.
+        let Some(live_operator) = self.lookup_allowlisted_operator(commitment).await else {
+            self.session_store.revoke(&session_key).await?;
             tracing::warn!(
                 auth_event = "session_rejected",
-                operator_id = %session.operator.operator_id,
+                operator_id = %operator_id,
                 reason = "revoked",
                 "Operator session rejected because the operator is no longer allowlisted"
             );
@@ -331,25 +383,27 @@ impl DashboardState {
         Ok(live_operator)
     }
 
-    pub async fn logout(&self, token: Option<&str>, now: DateTime<Utc>) {
-        let mut sessions = self.sessions.lock().await;
-        sessions.retain(|_, session| session.expires_at > now);
+    pub async fn logout(&self, token: Option<&str>, _now: DateTime<Utc>) -> Result<()> {
         if let Some(token) = token
-            && let Some(session) = sessions.remove(&session_digest(token))
+            && let Some(session) = self.session_store.revoke(&session_digest(token)).await?
+            && let SessionSubject::Operator { operator_id, .. } = &session.subject
         {
             tracing::info!(
                 auth_event = "logout",
-                operator_id = %session.operator.operator_id,
+                operator_id = %operator_id,
                 issued_at = %session.issued_at.to_rfc3339(),
                 "Operator session cleared"
             );
         }
+        Ok(())
     }
 
     fn from_allowlist_source(
         allowlist_source: AllowlistSource,
         allowlist: OperatorAllowlist,
         mut config: DashboardConfig,
+        session_store: Arc<dyn SessionStore>,
+        challenge_store: Arc<dyn ChallengeStore>,
     ) -> std::result::Result<Self, String> {
         tracing::info!(
             auth_event = "allowlist_loaded",
@@ -358,29 +412,42 @@ impl DashboardState {
         );
         let configured_cursor_secret = config.take_cursor_secret();
         let cursor_secret_configured = configured_cursor_secret.is_some();
-        let cursor_secret = configured_cursor_secret.unwrap_or_else(|| {
-            if !cfg!(test) {
-                tracing::warn!(
-                    "dashboard cursor secret not configured; generating ephemeral per-process \
-                     secret. Multi-replica deployments must set \
-                     GUARDIAN_DASHBOARD_CURSOR_SECRET to a stable shared 32-byte hex value."
-                );
+        let cursor_secret = match configured_cursor_secret {
+            Some(secret) => secret,
+            None => {
+                if !cfg!(test) {
+                    tracing::warn!(
+                        "dashboard cursor secret not configured; generating ephemeral per-process \
+                         secret. This degrades only dashboard pagination: a multi-replica \
+                         deployment must set GUARDIAN_DASHBOARD_CURSOR_SECRET to a stable shared \
+                         64-hex (32-byte) value, or a cursor minted on one replica fails on another."
+                    );
+                }
+                CursorSecret::generate()
             }
-            CursorSecret::generate()
-        });
+        };
         Ok(Self {
             commitment_rate_limits: RateLimitStore::new(config.commitment_rate_limit.clone()),
             config,
             allowlist_source,
             allowlist: Arc::new(RwLock::new(allowlist)),
-            challenges: Arc::new(Mutex::new(HashMap::new())),
-            sessions: Arc::new(Mutex::new(HashMap::new())),
+            session_store,
+            challenge_store,
             cursor_secret,
             cursor_secret_configured,
             started_at: Utc::now(),
         })
     }
 
+    /// Reclaim expired operator sessions and challenges. Run periodically by the
+    /// server's background sweep; expiry is also enforced on read, so this is
+    /// housekeeping (and a no-op for the in-memory backend beyond freeing memory).
+    pub async fn sweep_expired(&self, now: DateTime<Utc>) -> Result<()> {
+        self.session_store.sweep_expired(now).await?;
+        self.challenge_store.sweep_expired(now).await?;
+        Ok(())
+    }
+
     /// Server-side signing secret for opaque pagination cursors. See
     /// `crate::dashboard::cursor`. Generated once per server startup.
     pub fn cursor_secret(&self) -> &CursorSecret {
@@ -794,7 +861,7 @@ mod tests {
     }
 
     #[tokio::test]
-    async fn session_map_never_holds_plaintext_token() {
+    async fn session_is_keyed_by_digest_not_plaintext_token() {
         let operator = TestSigner::new();
         let state = DashboardState::for_tests(vec![(
             "operator-1".to_string(),
@@ -813,12 +880,49 @@ mod tests {
             .expect("verify");
         let token = parse_token_from_cookie(&session.cookie_header);
 
-        let sessions = state.sessions.lock().await;
-        let only_key = sessions.keys().next().expect("session exists");
+        // The store only ever receives `session_digest(token)`, never the
+        // plaintext token: the digest differs from the token bytes, and
+        // authentication round-trips on the real token (proving the mapping).
         assert_ne!(
-            only_key.as_slice(),
+            crate::secret::session_digest(&token).as_slice(),
             token.as_bytes(),
-            "map key must be a digest, not the plaintext token"
+            "session key must be a digest, not the plaintext token"
+        );
+        state
+            .authenticate_session(&token, now)
+            .await
+            .expect("session lookup round-trips on the real token");
+    }
+
+    #[tokio::test]
+    async fn unset_cursor_secret_boots_in_every_stage() {
+        let _env_lock = ENV_LOCK.lock().await;
+        let _no_secret = EnvVarGuard::remove("GUARDIAN_DASHBOARD_CURSOR_SECRET");
+
+        let prod = EnvVarGuard::set("GUARDIAN_ENV", "prod");
+        let prod_result = DashboardState::from_allowlist_source(
+            super::AllowlistSource::Static,
+            super::OperatorAllowlist::from_entries(Vec::new()).expect("empty allowlist is valid"),
+            DashboardConfig::for_tests(),
+            std::sync::Arc::new(crate::coordination::InMemorySessionStore::new()),
+            std::sync::Arc::new(crate::coordination::InMemoryChallengeStore::new()),
+        );
+        assert!(
+            prod_result.is_ok(),
+            "prod stage tolerates an unset cursor secret (warns, ephemeral fallback)"
+        );
+        drop(prod);
+
+        let non_prod_result = DashboardState::from_allowlist_source(
+            super::AllowlistSource::Static,
+            super::OperatorAllowlist::from_entries(Vec::new()).expect("empty allowlist is valid"),
+            DashboardConfig::for_tests(),
+            std::sync::Arc::new(crate::coordination::InMemorySessionStore::new()),
+            std::sync::Arc::new(crate::coordination::InMemoryChallengeStore::new()),
+        );
+        assert!(
+            non_prod_result.is_ok(),
+            "non-prod tolerates an unset cursor secret (ephemeral fallback)"
         );
     }
 }
diff --git a/crates/server/src/dashboard/types.rs b/crates/server/src/dashboard/types.rs
index dc66265f..847229ad 100644
--- a/crates/server/src/dashboard/types.rs
+++ b/crates/server/src/dashboard/types.rs
@@ -1,7 +1,6 @@
 use std::collections::BTreeSet;
 use std::sync::Arc;
 
-use chrono::{DateTime, Utc};
 use guardian_shared::auth_request_payload::AuthRequestPayload;
 use miden_protocol::Word;
 use serde::{Deserialize, Serialize};
@@ -57,17 +56,3 @@ pub struct IssuedOperatorSession {
     pub expires_at: String,
     pub cookie_header: String,
 }
-
-#[derive(Clone, Debug)]
-pub(crate) struct PendingChallenge {
-    pub(crate) signing_digest: Word,
-    pub(crate) issued_at: DateTime<Utc>,
-    pub(crate) expires_at: DateTime<Utc>,
-}
-
-#[derive(Clone, Debug)]
-pub(crate) struct OperatorSessionRecord {
-    pub(crate) operator: AuthenticatedOperator,
-    pub(crate) issued_at: DateTime<Utc>,
-    pub(crate) expires_at: DateTime<Utc>,
-}
diff --git a/crates/server/src/evm/mod.rs b/crates/server/src/evm/mod.rs
index 332f5111..396e4a1d 100644
--- a/crates/server/src/evm/mod.rs
+++ b/crates/server/src/evm/mod.rs
@@ -21,10 +21,17 @@ pub struct EvmAppState {
 
 impl EvmAppState {
     pub async fn from_env() -> Result<Self, String> {
-        let chains = Arc::new(EvmChainRegistry::from_env()?);
-        let sessions = Arc::new(EvmSessionState::default());
+        Self::from_env_with_sessions(EvmSessionState::default()).await
+    }
 
-        Ok(Self { chains, sessions })
+    /// Build EVM state with explicit (evm-realm) session state. The server
+    /// builder passes shared (Postgres) stores on the Postgres backend.
+    pub async fn from_env_with_sessions(sessions: EvmSessionState) -> Result<Self, String> {
+        let chains = Arc::new(EvmChainRegistry::from_env()?);
+        Ok(Self {
+            chains,
+            sessions: Arc::new(sessions),
+        })
     }
 
     pub fn for_tests() -> Self {
diff --git a/crates/server/src/evm/session.rs b/crates/server/src/evm/session.rs
index fb4d2ce4..749017c7 100644
--- a/crates/server/src/evm/session.rs
+++ b/crates/server/src/evm/session.rs
@@ -1,10 +1,12 @@
-use std::collections::HashMap;
 use std::sync::Arc;
 
 use chrono::{DateTime, Duration, Utc};
 use rand::RngCore;
-use tokio::sync::Mutex;
 
+use crate::coordination::{
+    ChallengePayload, ChallengeStore, InMemoryChallengeStore, InMemorySessionStore, SessionStore,
+    SessionSubject, StoredChallenge, StoredSession,
+};
 use crate::error::{GuardianError, Result};
 use crate::metadata::network::normalize_evm_address;
 use crate::secret::session_digest;
@@ -16,8 +18,8 @@ const MAX_OUTSTANDING_CHALLENGES: usize = 8;
 
 #[derive(Clone)]
 pub struct EvmSessionState {
-    challenges: Arc<Mutex<HashMap<String, Vec<PendingEvmChallenge>>>>,
-    sessions: Arc<Mutex<HashMap<[u8; 32], EvmSessionRecord>>>,
+    session_store: Arc<dyn SessionStore>,
+    challenge_store: Arc<dyn ChallengeStore>,
 }
 
 #[derive(Clone, Debug, PartialEq, Eq)]
@@ -40,27 +42,29 @@ pub struct AuthenticatedEvmSession {
     pub address: String,
 }
 
-#[derive(Clone)]
-struct PendingEvmChallenge {
-    challenge: EvmChallenge,
-}
-
-#[derive(Clone)]
-struct EvmSessionRecord {
-    address: String,
-    expires_at: DateTime<Utc>,
-}
-
 impl Default for EvmSessionState {
     fn default() -> Self {
-        Self {
-            challenges: Arc::new(Mutex::new(HashMap::new())),
-            sessions: Arc::new(Mutex::new(HashMap::new())),
-        }
+        Self::new(
+            Arc::new(InMemorySessionStore::new()),
+            Arc::new(InMemoryChallengeStore::new()),
+        )
     }
 }
 
 impl EvmSessionState {
+    /// Build EVM session state over explicit, evm-realm coordination stores. The
+    /// server builder passes shared (Postgres) stores on the Postgres backend;
+    /// the default uses in-memory stores (single-process / dev).
+    pub fn new(
+        session_store: Arc<dyn SessionStore>,
+        challenge_store: Arc<dyn ChallengeStore>,
+    ) -> Self {
+        Self {
+            session_store,
+            challenge_store,
+        }
+    }
+
     pub fn cookie_name(&self) -> &'static str {
         COOKIE_NAME
     }
@@ -82,16 +86,20 @@ impl EvmSessionState {
             expires_at: now + Duration::seconds(CHALLENGE_TTL_SECS),
         };
 
-        let mut challenges = self.challenges.lock().await;
-        let pending = challenges.entry(address).or_default();
-        pending.retain(|challenge| challenge.challenge.expires_at > now);
-        pending.push(PendingEvmChallenge {
-            challenge: challenge.clone(),
-        });
-        if pending.len() > MAX_OUTSTANDING_CHALLENGES {
-            let drain_len = pending.len() - MAX_OUTSTANDING_CHALLENGES;
-            pending.drain(0..drain_len);
-        }
+        let stored = StoredChallenge {
+            key: challenge.nonce.clone(),
+            payload: ChallengePayload::EvmChallenge {
+                address: challenge.address.clone(),
+                nonce: challenge.nonce.clone(),
+                issued_at: challenge.issued_at,
+                expires_at: challenge.expires_at,
+            },
+            issued_at: challenge.issued_at,
+            expires_at: challenge.expires_at,
+        };
+        self.challenge_store
+            .issue(&address, stored, MAX_OUTSTANDING_CHALLENGES, now)
+            .await?;
 
         Ok(challenge)
     }
@@ -105,20 +113,32 @@ impl EvmSessionState {
     ) -> Result<VerifiedEvmSession> {
         let address = normalize_evm_address(address).map_err(GuardianError::InvalidInput)?;
         let signature = crate::evm::proposal::normalize_signature(signature)?;
-        let mut challenges = self.challenges.lock().await;
-        let pending = challenges.entry(address.clone()).or_default();
-        pending.retain(|challenge| challenge.challenge.expires_at > now);
-
-        let Some(index) = pending
-            .iter()
-            .position(|pending| pending.challenge.nonce.eq_ignore_ascii_case(nonce))
-        else {
+
+        let active = self.challenge_store.active_for(&address, now).await?;
+        let matched = active.iter().find_map(|stored| match &stored.payload {
+            ChallengePayload::EvmChallenge {
+                address: challenge_address,
+                nonce: challenge_nonce,
+                issued_at,
+                expires_at,
+            } if challenge_nonce.eq_ignore_ascii_case(nonce) => Some((
+                stored.key.clone(),
+                EvmChallenge {
+                    address: challenge_address.clone(),
+                    nonce: challenge_nonce.clone(),
+                    issued_at: *issued_at,
+                    expires_at: *expires_at,
+                },
+            )),
+            _ => None,
+        });
+
+        let Some((key, challenge)) = matched else {
             return Err(GuardianError::AuthenticationFailed(
                 "No active EVM challenge matched the nonce".to_string(),
             ));
         };
 
-        let challenge = pending[index].challenge.clone();
         let recovered = crate::evm::contracts::recover_session_address(&challenge, &signature)?;
         if recovered != address {
             return Err(GuardianError::AuthenticationFailed(
@@ -126,25 +146,28 @@ impl EvmSessionState {
             ));
         }
 
-        pending.remove(index);
-        if pending.is_empty() {
-            challenges.remove(&address);
+        if !self.challenge_store.consume(&address, &key, now).await? {
+            return Err(GuardianError::AuthenticationFailed(
+                "No active EVM challenge matched the nonce".to_string(),
+            ));
         }
-        drop(challenges);
 
         let token = random_hex_32();
         let expires_at = now + Duration::seconds(SESSION_TTL_SECS);
         let cookie_header = self.session_cookie_header(&token, expires_at);
         let session_key = session_digest(&token);
-        let mut sessions = self.sessions.lock().await;
-        sessions.retain(|_, session| session.expires_at > now);
-        sessions.insert(
-            session_key,
-            EvmSessionRecord {
-                address: address.clone(),
-                expires_at,
-            },
-        );
+        self.session_store
+            .insert(
+                session_key,
+                StoredSession {
+                    subject: SessionSubject::Evm {
+                        address: address.clone(),
+                    },
+                    issued_at: now,
+                    expires_at,
+                },
+            )
+            .await?;
 
         Ok(VerifiedEvmSession {
             address,
@@ -158,25 +181,34 @@ impl EvmSessionState {
         token: &str,
         now: DateTime<Utc>,
     ) -> Result<AuthenticatedEvmSession> {
-        let mut sessions = self.sessions.lock().await;
-        sessions.retain(|_, session| session.expires_at > now);
-        let session = sessions
-            .get(&session_digest(token))
-            .cloned()
+        let session = self
+            .session_store
+            .get(&session_digest(token), now)
+            .await?
             .ok_or_else(|| {
                 GuardianError::AuthenticationFailed("Invalid EVM session".to_string())
             })?;
-        Ok(AuthenticatedEvmSession {
-            address: session.address,
-        })
+        let SessionSubject::Evm { address } = session.subject else {
+            return Err(GuardianError::AuthenticationFailed(
+                "Invalid EVM session".to_string(),
+            ));
+        };
+        Ok(AuthenticatedEvmSession { address })
     }
 
-    pub async fn logout(&self, token: Option<&str>, now: DateTime<Utc>) {
-        let mut sessions = self.sessions.lock().await;
-        sessions.retain(|_, session| session.expires_at > now);
+    pub async fn logout(&self, token: Option<&str>, _now: DateTime<Utc>) -> Result<()> {
         if let Some(token) = token {
-            sessions.remove(&session_digest(token));
+            self.session_store.revoke(&session_digest(token)).await?;
         }
+        Ok(())
+    }
+
+    /// Reclaim expired EVM sessions and challenges (housekeeping; expiry is also
+    /// enforced on read).
+    pub async fn sweep_expired(&self, now: DateTime<Utc>) -> Result<()> {
+        self.session_store.sweep_expired(now).await?;
+        self.challenge_store.sweep_expired(now).await?;
+        Ok(())
     }
 
     fn session_cookie_header(&self, token: &str, expires_at: DateTime<Utc>) -> String {
@@ -202,7 +234,7 @@ mod tests {
     use super::*;
 
     #[tokio::test]
-    async fn challenge_is_single_use_after_manual_removal() {
+    async fn challenge_is_single_use_via_consume() {
         let state = EvmSessionState::default();
         let now = Utc::now();
         let challenge = state
@@ -210,13 +242,27 @@ mod tests {
             .await
             .expect("challenge");
 
-        let mut challenges = state.challenges.lock().await;
-        let pending = challenges
-            .get_mut(&challenge.address)
-            .expect("pending challenge");
-        assert_eq!(pending.len(), 1);
-        pending.remove(0);
-        assert!(pending.is_empty());
+        let active = state
+            .challenge_store
+            .active_for(&challenge.address, now)
+            .await
+            .expect("active challenges");
+        assert_eq!(active.len(), 1);
+
+        assert!(
+            state
+                .challenge_store
+                .consume(&challenge.address, &challenge.nonce, now)
+                .await
+                .expect("consume")
+        );
+        assert!(
+            !state
+                .challenge_store
+                .consume(&challenge.address, &challenge.nonce, now)
+                .await
+                .expect("replay consume")
+        );
     }
 
     #[test]
diff --git a/crates/server/src/jobs/canonicalization/processor.rs b/crates/server/src/jobs/canonicalization/processor.rs
index d906312e..a21480e7 100644
--- a/crates/server/src/jobs/canonicalization/processor.rs
+++ b/crates/server/src/jobs/canonicalization/processor.rs
@@ -1,10 +1,39 @@
+use std::sync::Arc;
+
 use crate::canonicalization::CanonicalizationConfig;
+use crate::coordination::{AlwaysLeader, CANONICALIZATION_LEASE, LeaderElector, Lease};
 use crate::delta_object::{DeltaObject, DeltaStatus};
 use crate::error::{GuardianError, Result};
 use crate::state::AppState;
 use crate::state_object::StateObject;
 use async_trait::async_trait;
 use chrono::{DateTime, Utc};
+use tokio_util::sync::CancellationToken;
+
+/// A leader handle for a single canonicalization pass: who we are, the fence we
+/// hold, and a cancellation signal tripped when the lease is lost mid-pass.
+struct PassLease {
+    leader: Arc<dyn LeaderElector>,
+    lease: Lease,
+    cancel: CancellationToken,
+}
+
+impl PassLease {
+    /// Single-process default (filesystem / tests): always the leader, never
+    /// cancelled.
+    fn single_process() -> Self {
+        Self {
+            leader: Arc::new(AlwaysLeader::new(CANONICALIZATION_LEASE, "single-process")),
+            lease: Lease {
+                name: CANONICALIZATION_LEASE.to_string(),
+                holder_id: "single-process".to_string(),
+                fence_token: 0,
+                expires_at: DateTime::<Utc>::MAX_UTC,
+            },
+            cancel: CancellationToken::new(),
+        }
+    }
+}
 
 #[async_trait]
 pub trait Processor: Send + Sync {
@@ -36,11 +65,35 @@ fn get_candidates(deltas: &[DeltaObject]) -> Vec<DeltaObject> {
 
 struct DeltasProcessorBase {
     state: AppState,
+    pass: PassLease,
     max_retries: u32,
     submission_grace_period_seconds: u64,
 }
 
 impl DeltasProcessorBase {
+    /// Mandatory fence check before every custody-state write: the canonical
+    /// `submit_state` / `submit_delta`, the `update_auth` cosigner-key sync, the
+    /// discard `delete_delta`, and the retry `update_delta_status`. If this
+    /// replica no longer holds the lease (superseded mid-pass), refuse the write
+    /// so a stale leader can never commit a custody transition. Best-effort
+    /// cleanup that trails a fenced write — clearing `has_pending_candidate` and
+    /// deleting a finalized proposal — is intentionally left unfenced: both are
+    /// idempotent and non-custodial, so a brief two-leader overlap can at most
+    /// repeat them harmlessly.
+    async fn ensure_lease_held(&self, delta: &DeltaObject) -> Result<()> {
+        if self.pass.leader.verify_held(&self.pass.lease).await? {
+            return Ok(());
+        }
+        tracing::warn!(
+            account_id = %delta.account_id,
+            nonce = delta.nonce,
+            "Canonicalization lease lost; refusing canonical write"
+        );
+        Err(GuardianError::StorageError(
+            "canonicalization lease lost; aborting write".to_string(),
+        ))
+    }
+
     fn candidate_age_seconds(&self, delta: &DeltaObject, now: DateTime<Utc>) -> Option<u64> {
         let DeltaStatus::Candidate { timestamp, .. } = &delta.status else {
             return None;
@@ -65,6 +118,12 @@ impl DeltasProcessorBase {
         );
 
         for account_id in account_ids {
+            if self.pass.cancel.is_cancelled() {
+                tracing::warn!(
+                    "Canonicalization pass cancelled (lease lost); stopping before next account"
+                );
+                break;
+            }
             if let Err(e) = self.process_account(&account_id).await {
                 tracing::error!(
                     account_id = %account_id,
@@ -109,6 +168,13 @@ impl DeltasProcessorBase {
         );
 
         for delta in candidates {
+            if self.pass.cancel.is_cancelled() {
+                tracing::warn!(
+                    account_id = %account_id,
+                    "Canonicalization pass cancelled (lease lost); stopping before next candidate"
+                );
+                break;
+            }
             let nonce = delta.nonce;
             if let Err(e) = self.process_candidate(delta).await {
                 tracing::error!(
@@ -203,6 +269,7 @@ impl DeltasProcessorBase {
                         "Delta verification failed after max retries, discarding"
                     );
 
+                    self.ensure_lease_held(&delta).await?;
                     storage_backend
                         .delete_delta(&delta.account_id, delta.nonce)
                         .await
@@ -282,6 +349,7 @@ impl DeltasProcessorBase {
 
                     let new_status = delta.status.with_incremented_retry(now);
 
+                    self.ensure_lease_held(&delta).await?;
                     storage_backend
                         .update_delta_status(&delta.account_id, delta.nonce, new_status)
                         .await
@@ -340,6 +408,7 @@ impl DeltasProcessorBase {
             auth_scheme: String::new(),
         };
 
+        self.ensure_lease_held(&delta).await?;
         storage_backend
             .submit_state(&updated_state)
             .await
@@ -363,6 +432,7 @@ impl DeltasProcessorBase {
                 "Syncing cosigner public keys from on-chain storage"
             );
 
+            self.ensure_lease_held(&delta).await?;
             self.state
                 .metadata
                 .update_auth(&delta.account_id, new_auth, &now)
@@ -380,6 +450,7 @@ impl DeltasProcessorBase {
         let mut canonical_delta = delta.clone();
         canonical_delta.status = DeltaStatus::canonical(now.clone());
 
+        self.ensure_lease_held(&delta).await?;
         storage_backend
             .submit_delta(&canonical_delta)
             .await
@@ -452,10 +523,31 @@ pub struct DeltasProcessor {
 }
 
 impl DeltasProcessor {
+    /// Single-process processor (filesystem / tests): always the leader, never
+    /// fenced out. Behavior is identical to the pre-lease worker.
+    #[allow(dead_code)]
     pub fn new(state: AppState, config: CanonicalizationConfig) -> Self {
+        let pass = PassLease::single_process();
+        Self::with_lease(state, config, pass.leader, pass.lease, pass.cancel)
+    }
+
+    /// Lease-bound processor used by the multi-replica worker: writes are fenced
+    /// by `leader`/`lease` and the pass aborts when `cancel` is tripped.
+    pub fn with_lease(
+        state: AppState,
+        config: CanonicalizationConfig,
+        leader: Arc<dyn LeaderElector>,
+        lease: Lease,
+        cancel: CancellationToken,
+    ) -> Self {
         Self {
             base: DeltasProcessorBase {
                 state,
+                pass: PassLease {
+                    leader,
+                    lease,
+                    cancel,
+                },
                 max_retries: config.max_retries,
                 submission_grace_period_seconds: config.submission_grace_period_seconds,
             },
@@ -483,6 +575,7 @@ impl TestDeltasProcessor {
         Self {
             base: DeltasProcessorBase {
                 state,
+                pass: PassLease::single_process(),
                 max_retries: u32::MAX, // Test processor doesn't discard on retries
                 submission_grace_period_seconds: 0,
             },
diff --git a/crates/server/src/jobs/canonicalization/worker.rs b/crates/server/src/jobs/canonicalization/worker.rs
index 2346d902..a79052aa 100644
--- a/crates/server/src/jobs/canonicalization/worker.rs
+++ b/crates/server/src/jobs/canonicalization/worker.rs
@@ -1,16 +1,22 @@
+use std::sync::Arc;
+use std::time::Duration;
+
+use tokio::time::interval;
+use tokio_util::sync::CancellationToken;
+
+use crate::coordination::{LeaderElector, Lease};
 use crate::error::Result;
 use crate::state::AppState;
-use tokio::time::interval;
 
 use super::processor::{DeltasProcessor, Processor, TestDeltasProcessor};
 
-pub fn start_worker(state: AppState) {
+pub fn start_worker(state: AppState, leader: Arc<dyn LeaderElector>) {
     tokio::spawn(async move {
-        run_worker(state).await;
+        run_worker(state, leader).await;
     });
 }
 
-async fn run_worker(state: AppState) {
+async fn run_worker(state: AppState, leader: Arc<dyn LeaderElector>) {
     let config = match &state.canonicalization {
         Some(config) => config.clone(),
         None => {
@@ -21,12 +27,43 @@ async fn run_worker(state: AppState) {
         }
     };
 
-    let processor = DeltasProcessor::new(state.clone(), config.clone());
-    let mut interval_timer = interval(config.check_interval());
+    let check_interval = config.check_interval();
+    // TTL outlives several renew cycles so a healthy holder never loses the lease
+    // mid-pass and the lease survives the idle gap between ticks; failover after a
+    // crash happens within one TTL.
+    let lease_ttl = check_interval * 3;
+    let renew_interval = check_interval;
+    let mut interval_timer = interval(check_interval);
 
     loop {
         interval_timer.tick().await;
 
+        let lease = match leader.try_acquire(lease_ttl).await {
+            Ok(Some(lease)) => lease,
+            Ok(None) => continue,
+            Err(error) => {
+                tracing::warn!(error = %error, "Failed to acquire canonicalization lease");
+                continue;
+            }
+        };
+
+        let cancel = CancellationToken::new();
+        let renewal = spawn_renewal(
+            leader.clone(),
+            lease.clone(),
+            lease_ttl,
+            renew_interval,
+            cancel.clone(),
+        );
+
+        let processor = DeltasProcessor::with_lease(
+            state.clone(),
+            config.clone(),
+            leader.clone(),
+            lease,
+            cancel.clone(),
+        );
+
         let started = std::time::Instant::now();
         let result = processor.process_all_accounts().await;
         metrics::histogram!(crate::metrics::names::CANONICALIZATION_RUN_DURATION_SECONDS)
@@ -38,12 +75,49 @@ async fn run_worker(state: AppState) {
         )
         .increment(1);
 
+        cancel.cancel();
+        let _ = renewal.await;
+
         if let Err(e) = result {
             tracing::error!(error = %e, "Canonicalization worker error");
         }
     }
 }
 
+/// Renew the lease on its own timer, concurrent with the pass. On a lost lease
+/// (stolen, expired, or store error) it trips `cancel` so the pass aborts at its
+/// next checkpoint; the fence check still guards any in-flight write.
+fn spawn_renewal(
+    leader: Arc<dyn LeaderElector>,
+    lease: Lease,
+    ttl: Duration,
+    renew_interval: Duration,
+    cancel: CancellationToken,
+) -> tokio::task::JoinHandle<()> {
+    tokio::spawn(async move {
+        let mut ticker = interval(renew_interval);
+        ticker.tick().await;
+        loop {
+            tokio::select! {
+                _ = cancel.cancelled() => break,
+                _ = ticker.tick() => match leader.renew(&lease, ttl).await {
+                    Ok(true) => {}
+                    Ok(false) => {
+                        tracing::warn!("Canonicalization lease lost during pass; cancelling");
+                        cancel.cancel();
+                        break;
+                    }
+                    Err(error) => {
+                        tracing::warn!(error = %error, "Canonicalization lease renew failed; cancelling pass");
+                        cancel.cancel();
+                        break;
+                    }
+                },
+            }
+        }
+    })
+}
+
 pub async fn process_all_accounts_now(state: &AppState) -> Result<()> {
     let processor = TestDeltasProcessor::new(state.clone());
     processor.process_all_accounts().await
diff --git a/crates/server/src/lib.rs b/crates/server/src/lib.rs
index fee40fdb..688d41e1 100644
--- a/crates/server/src/lib.rs
+++ b/crates/server/src/lib.rs
@@ -5,6 +5,8 @@ pub mod api;
 pub mod audit;
 pub mod build_info;
 pub mod builder;
+pub mod config;
+pub mod coordination;
 pub mod dashboard;
 pub mod middleware;
 
diff --git a/crates/server/src/main.rs b/crates/server/src/main.rs
index 1af43301..06b5b6a3 100644
--- a/crates/server/src/main.rs
+++ b/crates/server/src/main.rs
@@ -17,7 +17,7 @@ async fn main() {
         .unwrap_or_else(|_| "/var/guardian/keystore".to_string())
         .into();
 
-    let (storage_backend, metadata, auditor) = StorageMetadataBuilder::from_env()
+    let (storage_backend, metadata, auditor, coordination) = StorageMetadataBuilder::from_env()
         .build()
         .await
         .expect("Failed to initialize storage backends");
@@ -44,6 +44,7 @@ async fn main() {
         .storage(storage_backend)
         .metadata(metadata)
         .auditor(auditor)
+        .coordination(coordination)
         .ack(ack)
         .http(true, 3000)
         .grpc(true, 50051)
diff --git a/crates/server/src/middleware/rate_limit.rs b/crates/server/src/middleware/rate_limit.rs
index 3b87548a..22174c85 100644
--- a/crates/server/src/middleware/rate_limit.rs
+++ b/crates/server/src/middleware/rate_limit.rs
@@ -27,6 +27,10 @@ const DEFAULT_BURST_PER_SEC: u32 = 10;
 const DEFAULT_PER_MIN: u32 = 60;
 /// Environment variable for enabling or disabling rate limiting
 const ENV_RATE_LIMIT_ENABLED: &str = "GUARDIAN_RATE_LIMIT_ENABLED";
+/// Deployment's maximum replica capacity; the configured global limits are
+/// divided by it so per-process enforcement keeps the fleet aggregate at or
+/// below the global limit (issue #242). Drives rate limiting only.
+const ENV_MAX_REPLICAS: &str = "GUARDIAN_MAX_REPLICAS";
 /// Cleanup interval for stale entries
 const CLEANUP_INTERVAL_SECS: u64 = 60;
 
@@ -45,15 +49,35 @@ impl RateLimitConfig {
     /// Load configuration from environment variables
     pub fn from_env() -> Self {
         let enabled = env_flag(ENV_RATE_LIMIT_ENABLED, true);
-        let burst_per_sec = env::var("GUARDIAN_RATE_BURST_PER_SEC")
+        let max_replicas = env::var(ENV_MAX_REPLICAS)
             .ok()
-            .and_then(|v| v.parse().ok())
-            .unwrap_or(DEFAULT_BURST_PER_SEC);
+            .and_then(|v| v.parse::<u32>().ok())
+            .unwrap_or(1);
+        let burst_per_sec = partition_limit(
+            env::var("GUARDIAN_RATE_BURST_PER_SEC")
+                .ok()
+                .and_then(|v| v.parse().ok())
+                .unwrap_or(DEFAULT_BURST_PER_SEC),
+            max_replicas,
+        );
+        let per_min = partition_limit(
+            env::var("GUARDIAN_RATE_PER_MIN")
+                .ok()
+                .and_then(|v| v.parse().ok())
+                .unwrap_or(DEFAULT_PER_MIN),
+            max_replicas,
+        );
 
-        let per_min = env::var("GUARDIAN_RATE_PER_MIN")
-            .ok()
-            .and_then(|v| v.parse().ok())
-            .unwrap_or(DEFAULT_PER_MIN);
+        if enabled && (burst_per_sec == 0 || per_min == 0) {
+            tracing::warn!(
+                max_replicas,
+                burst_per_sec,
+                per_min,
+                "rate limit partitions to 0 per replica (global limit is below GUARDIAN_MAX_REPLICAS); \
+                 this replica will throttle all traffic. Raise the global rate limit or lower \
+                 GUARDIAN_MAX_REPLICAS."
+            );
+        }
 
         Self {
             enabled,
@@ -82,6 +106,16 @@ impl Default for RateLimitConfig {
     }
 }
 
+/// Per-replica share of a global limit: `global / max_replicas` (floor), with
+/// `max_replicas` clamped to ≥ 1. The floor — not a round-up or a ≥1 clamp —
+/// guarantees the fleet aggregate (`max_replicas × share`) never exceeds the
+/// global limit (FR-009). A share of `0` means this replica denies all requests;
+/// that only happens when the global limit is below the replica count (an
+/// extreme misconfiguration), and it still never exceeds the global limit.
+fn partition_limit(global_limit: u32, max_replicas: u32) -> u32 {
+    global_limit / max_replicas.max(1)
+}
+
 /// Parse a boolean env flag: unset → `default_value`; `0`/`false`/
 /// `no`/`off` (case-insensitive) → false; anything else → true.
 /// Shared by env-driven configs (rate limiting, metrics).
@@ -426,6 +460,10 @@ mod tests {
     use axum::http::header::HeaderValue;
     use std::net::{IpAddr, SocketAddr};
 
+    /// Serializes the env-mutating `from_env` tests so they don't race the
+    /// shared process environment under the multi-threaded test runner.
+    static ENV_LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(());
+
     fn request_with_peer_ip(peer_ip: IpAddr) -> Request<Body> {
         let mut req = Request::builder().uri("/test").body(Body::empty()).unwrap();
         req.extensions_mut()
@@ -433,6 +471,18 @@ mod tests {
         req
     }
 
+    #[test]
+    fn partition_divides_global_limit_by_max_replicas() {
+        assert_eq!(partition_limit(600, 6), 100);
+        assert_eq!(partition_limit(600, 1), 600);
+        assert_eq!(partition_limit(600, 0), 600, "zero replicas treated as one");
+        // global < max_replicas: floor is 0 (deny) so the fleet aggregate
+        // (6 x 0 = 0) never exceeds the global limit (FR-009).
+        assert_eq!(partition_limit(5, 6), 0);
+        // 6 x 100 = 600 == global; never exceeds.
+        assert!(partition_limit(600, 6) * 6 <= 600);
+    }
+
     #[test]
     fn test_rate_limit_config_default() {
         let config = RateLimitConfig::default();
@@ -451,12 +501,13 @@ mod tests {
 
     #[test]
     fn test_rate_limit_config_from_env_defaults() {
-        // Clear any existing env vars
-        // SAFETY: This test runs single-threaded and these env vars are test-specific
+        let _guard = ENV_LOCK.lock().unwrap_or_else(|poison| poison.into_inner());
+        // SAFETY: serialized by ENV_LOCK; vars are test-specific.
         unsafe {
             env::remove_var(ENV_RATE_LIMIT_ENABLED);
             env::remove_var("GUARDIAN_RATE_BURST_PER_SEC");
             env::remove_var("GUARDIAN_RATE_PER_MIN");
+            env::remove_var(ENV_MAX_REPLICAS);
         }
 
         let config = RateLimitConfig::from_env();
@@ -467,7 +518,8 @@ mod tests {
 
     #[test]
     fn test_rate_limit_config_from_env_disabled() {
-        // SAFETY: This test runs single-threaded and these env vars are test-specific
+        let _guard = ENV_LOCK.lock().unwrap_or_else(|poison| poison.into_inner());
+        // SAFETY: serialized by ENV_LOCK; vars are test-specific.
         unsafe {
             env::set_var(ENV_RATE_LIMIT_ENABLED, "false");
         }
@@ -475,12 +527,35 @@ mod tests {
         let config = RateLimitConfig::from_env();
         assert!(!config.enabled);
 
-        // SAFETY: This test runs single-threaded and these env vars are test-specific
+        // SAFETY: serialized by ENV_LOCK; vars are test-specific.
         unsafe {
             env::remove_var(ENV_RATE_LIMIT_ENABLED);
         }
     }
 
+    #[test]
+    fn from_env_partitions_limits_by_max_replicas() {
+        let _guard = ENV_LOCK.lock().unwrap_or_else(|poison| poison.into_inner());
+        // SAFETY: serialized by ENV_LOCK; vars are test-specific.
+        unsafe {
+            env::set_var("GUARDIAN_RATE_BURST_PER_SEC", "600");
+            env::set_var("GUARDIAN_RATE_PER_MIN", "6000");
+            env::set_var(ENV_MAX_REPLICAS, "6");
+        }
+
+        let config = RateLimitConfig::from_env();
+
+        // SAFETY: serialized by ENV_LOCK; vars are test-specific.
+        unsafe {
+            env::remove_var("GUARDIAN_RATE_BURST_PER_SEC");
+            env::remove_var("GUARDIAN_RATE_PER_MIN");
+            env::remove_var(ENV_MAX_REPLICAS);
+        }
+
+        assert_eq!(config.burst_per_sec, 100);
+        assert_eq!(config.per_min, 1000);
+    }
+
     #[test]
     fn test_rate_limit_store_allows_under_limit() {
         let config = RateLimitConfig::new(5, 10);
@@ -623,7 +698,8 @@ mod tests {
 
     #[test]
     fn test_rate_limit_layer_from_env() {
-        // SAFETY: This test runs single-threaded and these env vars are test-specific
+        let _guard = ENV_LOCK.lock().unwrap_or_else(|poison| poison.into_inner());
+        // SAFETY: serialized by ENV_LOCK; vars are test-specific.
         unsafe {
             env::remove_var(ENV_RATE_LIMIT_ENABLED);
             env::remove_var("GUARDIAN_RATE_BURST_PER_SEC");
diff --git a/crates/server/src/schema.rs b/crates/server/src/schema.rs
index 0edce19b..07461052 100644
--- a/crates/server/src/schema.rs
+++ b/crates/server/src/schema.rs
@@ -108,6 +108,51 @@ diesel::table! {
     }
 }
 
+diesel::table! {
+    /// Representation of the `auth_sessions` table.
+    ///
+    /// Shared operator/EVM session store for horizontal scaling (issue #242).
+    auth_sessions (realm, token_digest) {
+        realm -> Text,
+        token_digest -> Bytea,
+        subject -> Jsonb,
+        issued_at -> Timestamptz,
+        expires_at -> Timestamptz,
+        revoked_at -> Nullable<Timestamptz>,
+    }
+}
+
+diesel::table! {
+    /// Representation of the `auth_challenges` table.
+    ///
+    /// Shared operator/EVM login-challenge store for horizontal scaling
+    /// (issue #242). Composite key `(realm, challenge_key)`.
+    auth_challenges (realm, challenge_key) {
+        realm -> Text,
+        challenge_key -> Text,
+        principal -> Text,
+        payload -> Jsonb,
+        issued_at -> Timestamptz,
+        expires_at -> Timestamptz,
+        consumed_at -> Nullable<Timestamptz>,
+    }
+}
+
+diesel::table! {
+    /// Representation of the `worker_leases` table.
+    ///
+    /// Single-owner background-worker coordination for horizontal scaling
+    /// (issue #242).
+    worker_leases (lease_name) {
+        lease_name -> Text,
+        holder_id -> Text,
+        acquired_at -> Timestamptz,
+        renewed_at -> Timestamptz,
+        expires_at -> Timestamptz,
+        fence_token -> Int8,
+    }
+}
+
 diesel::table! {
     /// Single-row store-level encryption marker. Its presence indicates the
     /// store is encrypted.
diff --git a/crates/server/src/storage/postgres.rs b/crates/server/src/storage/postgres.rs
index 7e1a28c6..f425278a 100644
--- a/crates/server/src/storage/postgres.rs
+++ b/crates/server/src/storage/postgres.rs
@@ -29,17 +29,63 @@ use url::Url;
 
 pub const MIGRATIONS: EmbeddedMigrations = embed_migrations!("migrations");
 
+const MIGRATION_ADVISORY_LOCK_KEY: i64 = 0x4755_4152_4449_414E;
+const MIGRATION_LOCK_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(60);
+const MIGRATION_LOCK_POLL: std::time::Duration = std::time::Duration::from_millis(500);
+
+#[derive(diesel::QueryableByName)]
+struct AdvisoryLockAcquired {
+    #[diesel(sql_type = diesel::sql_types::Bool)]
+    acquired: bool,
+}
+
 /// Run database migrations. Call once at application startup.
+///
+/// Migrations run under a session advisory lock so that replicas booting
+/// simultaneously serialize: the first holder migrates and the rest block,
+/// then find nothing pending. The lock is released explicitly and, as a
+/// backstop, on connection drop.
 pub async fn run_migrations(database_url: &str) -> Result<(), String> {
     let url = database_url.to_string();
     tokio::task::spawn_blocking(move || {
         let mut conn = PgConnection::establish(&url)
             .map_err(|e| format!("Failed to connect for migrations: {e}"))?;
 
-        conn.run_pending_migrations(MIGRATIONS)
-            .map_err(|e| format!("Failed to run migrations: {e}"))?;
+        let deadline = std::time::Instant::now() + MIGRATION_LOCK_TIMEOUT;
+        loop {
+            let attempt = diesel::RunQueryDsl::get_result::<AdvisoryLockAcquired>(
+                diesel::sql_query(format!(
+                    "SELECT pg_try_advisory_lock({MIGRATION_ADVISORY_LOCK_KEY}) AS acquired"
+                )),
+                &mut conn,
+            )
+            .map_err(|e| format!("Failed to attempt migration advisory lock: {e}"))?;
+            if attempt.acquired {
+                break;
+            }
+            if std::time::Instant::now() >= deadline {
+                return Err(format!(
+                    "Timed out after {}s waiting for the migration advisory lock; \
+                     another replica may be stuck mid-migration",
+                    MIGRATION_LOCK_TIMEOUT.as_secs()
+                ));
+            }
+            std::thread::sleep(MIGRATION_LOCK_POLL);
+        }
+
+        let result = conn
+            .run_pending_migrations(MIGRATIONS)
+            .map(|_| ())
+            .map_err(|e| format!("Failed to run migrations: {e}"));
+
+        let _ = diesel::RunQueryDsl::execute(
+            diesel::sql_query(format!(
+                "SELECT pg_advisory_unlock({MIGRATION_ADVISORY_LOCK_KEY})"
+            )),
+            &mut conn,
+        );
 
-        Ok::<(), String>(())
+        result
     })
     .await
     .map_err(|e| format!("Migration task failed: {e}"))??;
diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md
index 8acfcd26..6d2cd4cc 100644
--- a/docs/CONFIGURATION.md
+++ b/docs/CONFIGURATION.md
@@ -176,6 +176,7 @@ multi-stack deployments get scoped IDs.
 | `GUARDIAN_RATE_LIMIT_ENABLED` | `true` | Master kill-switch for HTTP rate limiting. Set `false` only in test environments. |
 | `GUARDIAN_RATE_BURST_PER_SEC` | `10` (code default); `200` set by the prod Terraform profile | Token-bucket burst. |
 | `GUARDIAN_RATE_PER_MIN` | `60` (code default); `5000` set by the prod Terraform profile | Sustained rate. |
+| `GUARDIAN_MAX_REPLICAS` | `1` (code default); autoscaling **max** capacity set by the prod Terraform profile | Per-replica rate-limit divisor: each replica enforces `global / GUARDIAN_MAX_REPLICAS` so the fleet aggregate stays at or below the global limit. Use the autoscaling **max**, not the running count. Drives rate-limiting only — coordination mode is backend-derived. Running below max over-throttles (accepted); an override is clamped up to the autoscaling max by Terraform. See [`runbooks/horizontal-scaling.md`](./runbooks/horizontal-scaling.md). |
 | `GUARDIAN_MAX_REQUEST_BYTES` | `1048576` (1 MB) | Reject request bodies larger than this. |
 | `GUARDIAN_MAX_PENDING_PROPOSALS_PER_ACCOUNT` | `20` | Account-level cap; hitting it returns `pending_proposals_limit`. |
 | `GUARDIAN_CORS_ALLOWED_ORIGINS` | _unset_ | Comma-separated explicit origins. **Unset → permissive `Any` origin / `Any` methods / `Any` headers, credentials disabled** (suitable for local dev). **Set → strict allowlist with `allow_credentials(true)`** (required for production browser clients). |
@@ -206,11 +207,29 @@ one-command Grafana dashboard stack.
 |---|---|---|
 | `GUARDIAN_OPERATOR_PUBLIC_KEYS_SECRET_ID` | _unset_ | AWS Secrets Manager secret name/ARN holding the operator allowlist JSON. Hot-reloaded on every challenge and authenticated `/dashboard/*` request. |
 | `GUARDIAN_OPERATOR_PUBLIC_KEYS_FILE` | _unset_ | Local JSON path for the same payload. Local dev only. |
-| `GUARDIAN_DASHBOARD_CURSOR_SECRET` | random per process | 32-byte hex HMAC key for dashboard pagination cursors. Pin a shared value when running ≥2 ECS tasks so cursors validate across replicas. |
+| `GUARDIAN_DASHBOARD_CURSOR_SECRET` | random per process if unset | 32-byte hex HMAC key for dashboard pagination cursors. Pin a shared value across replicas so cursors validate everywhere. If unset the server **warns** and generates an ephemeral per-process key and still boots (in every stage); an ephemeral key only breaks dashboard pagination across replicas — nothing else, so it is not a startup guard. |
 
 `GET /dashboard/info.environment` is derived from `GUARDIAN_NETWORK_TYPE`
 (`testnet`, `devnet`, or `local`) rather than configured separately.
 
+### Prod-stage startup guards & HA behavior
+
+When `GUARDIAN_ENV=prod`, the server fails fast on misconfigurations that are
+silently broken across replicas:
+
+- the **filesystem** storage backend is refused (single-instance only — use the
+  Postgres image with `DATABASE_URL`);
+- a rate limit that partitions to **0 requests per replica** is refused — i.e.
+  the global `GUARDIAN_RATE_BURST_PER_SEC`/`GUARDIAN_RATE_PER_MIN` is below
+  `GUARDIAN_MAX_REPLICAS`, which would make every replica throttle all traffic.
+  Raise the global limit or lower `GUARDIAN_MAX_REPLICAS`. (Non-prod only warns.)
+
+On the Postgres backend, operator/EVM sessions, login challenges, and the
+canonicalization lease are shared across replicas (backend-derived — no tunable
+disables this). If the database is briefly unavailable, authentication **fails
+closed** (rejected, never bypassed) and recovers automatically. See the
+[horizontal-scaling runbook](./runbooks/horizontal-scaling.md).
+
 Allowlist payload shapes and enrollment flow:
 [`docs/DASHBOARD.md`](./DASHBOARD.md).
 
@@ -305,5 +324,5 @@ this saves you from grepping:
 | EVM support locally | `GUARDIAN_EVM_RPC_URLS` (allowed chain set derives from its keys) + build with `--features evm` |
 | Use Secrets Manager for ACK keys | `GUARDIAN_ENV=prod` + `AWS_REGION=<region>` + secrets pre-created |
 | Run the dashboard locally | `GUARDIAN_OPERATOR_PUBLIC_KEYS_FILE=/path/to/allowlist.json` |
-| Multi-replica dashboard | `GUARDIAN_DASHBOARD_CURSOR_SECRET=<32-byte hex>` pinned across tasks |
+| Multi-replica (HA) | Postgres backend + `GUARDIAN_DASHBOARD_CURSOR_SECRET=<64 hex>` pinned across tasks + `GUARDIAN_MAX_REPLICAS=<autoscaling max>` (all set by the prod Terraform profile) |
 | Higher throughput in prod | `GUARDIAN_RATE_BURST_PER_SEC`, `GUARDIAN_RATE_PER_MIN`, `GUARDIAN_DB_POOL_MAX_SIZE` |
diff --git a/docs/SERVER_AWS_DEPLOY.md b/docs/SERVER_AWS_DEPLOY.md
index 19289227..cc3f280c 100644
--- a/docs/SERVER_AWS_DEPLOY.md
+++ b/docs/SERVER_AWS_DEPLOY.md
@@ -547,6 +547,22 @@ aws ecr delete-repository --repository-name guardian-server --force --region us-
 - RDS Proxy between ECS and RDS
 - higher Guardian runtime rate-limit and DB-pool defaults for benchmark traffic
 
+#### Horizontal scaling (multiple replicas)
+
+The prod profile runs 2–6 tasks behind the ALB. Because it sets `GUARDIAN_ENV=prod`
+and the Postgres backend, the server runs **shared coordination** (sessions,
+login challenges, and the canonicalization lease live in Postgres) — so any
+request lands on any replica and canonicalization runs on exactly one replica at
+a time. Terraform also sets `GUARDIAN_MAX_REPLICAS` from
+`effective_guardian_max_replicas` (derived from the autoscaling max capacity,
+prod `max(desired, 6)`) so rate limits are partitioned across the fleet. Set
+`GUARDIAN_DASHBOARD_CURSOR_SECRET` to a stable shared value so dashboard
+pagination works across replicas — leaving it unset only degrades pagination (a
+startup warning, not a failure) and it is not wired by Terraform today.
+Watch the per-replica `GUARDIAN_DB_POOL_MAX_SIZE` against Postgres
+`max_connections` (RDS Proxy absorbs most of this). Full operator guidance:
+[`runbooks/horizontal-scaling.md`](./runbooks/horizontal-scaling.md).
+
 ## HTTPS And gRPC
 
 HTTPS is enabled when `acm_certificate_arn` is set. DNS can be managed through Cloudflare, Route 53, or both depending on which variables are provided.
diff --git a/docs/guides/README.md b/docs/guides/README.md
index 4968bd02..b66d100f 100644
--- a/docs/guides/README.md
+++ b/docs/guides/README.md
@@ -24,6 +24,7 @@ storage, or network), not after Compose.
 | [AWS-managed ACK signers](./aws-signers/README.md) | Self-hosted Compose: Postgres + Secrets Manager (Falcon) + KMS (ECDSA) |
 | [Miden Dashboard UI](./miden-dashboard/README.md) | Self-hosted Compose: Postgres + Guardian server + the Miden Dashboard operator UI |
 | [Observability](./observability/README.md) | Local Compose: server + Prometheus + pre-provisioned Grafana dashboard |
+| [Horizontal scaling](./horizontal-scaling/README.md) | Local Compose: two replicas + round-robin proxy + shared Postgres (sessions, lease failover, fail-closed auth) |
 
 ## Adding a guide
 
diff --git a/docs/guides/horizontal-scaling/.env.example b/docs/guides/horizontal-scaling/.env.example
new file mode 100644
index 00000000..0139d38f
--- /dev/null
+++ b/docs/guides/horizontal-scaling/.env.example
@@ -0,0 +1,19 @@
+# Copy to .env and fill in. See ../../CONFIGURATION.md for the authoritative
+# meaning of each variable.
+
+# Published image tag to run (e.g. a release tag, or `latest`).
+GUARDIAN_VERSION=latest
+
+# Postgres password for the shared state/coordination database.
+POSTGRES_PASSWORD=guardian
+
+# 64-hex (32-byte) key signing dashboard pagination cursors. MUST be identical
+# on every replica. Generate with: openssl rand -hex 32
+GUARDIAN_DASHBOARD_CURSOR_SECRET=
+
+# Replica capacity the rate limiter divides global limits by. Match the number
+# of `server-*` services in docker-compose.yml (2 here).
+GUARDIAN_MAX_REPLICAS=2
+
+# Miden network the replicas target.
+GUARDIAN_NETWORK_TYPE=MidenDevnet
diff --git a/docs/guides/horizontal-scaling/Caddyfile b/docs/guides/horizontal-scaling/Caddyfile
new file mode 100644
index 00000000..c367a151
--- /dev/null
+++ b/docs/guides/horizontal-scaling/Caddyfile
@@ -0,0 +1,33 @@
+# Round-robin reverse proxy across the two Guardian replicas — the stand-in for
+# the ALB in front of the prod ECS tasks. Caddy's reverse_proxy automatically
+# sets X-Forwarded-For, which the Guardian server reads to key rate limiting by
+# the real client IP rather than the proxy's address.
+:8080 {
+	reverse_proxy server-a:3000 server-b:3000 {
+		lb_policy round_robin
+
+		# Like an ALB target group: actively poll each replica and pull an
+		# unhealthy one out of rotation, re-adding it when it recovers. Without
+		# this, round-robin keeps sending half the traffic to a dead replica
+		# (connection refused -> 502).
+		#
+		# Health probes hit `/`, which passes through the server's per-IP rate
+		# limiter keyed on this proxy's address. Keep the interval comfortably
+		# slower than the per-replica refill (global / GUARDIAN_MAX_REPLICAS, so
+		# ~0.5 req/s here) so a probe is never itself rate-limited (429) and
+		# flaps a healthy replica down.
+		health_uri /
+		health_interval 5s
+		health_timeout 2s
+
+		# Belt-and-suspenders: if a request races a replica that died between
+		# health polls, retry the other replica within this window instead of
+		# returning 502 to the client.
+		lb_try_duration 5s
+		lb_try_interval 250ms
+
+		# Passively mark an upstream down on a failed dial too.
+		fail_duration 10s
+		max_fails 1
+	}
+}
diff --git a/docs/guides/horizontal-scaling/README.md b/docs/guides/horizontal-scaling/README.md
new file mode 100644
index 00000000..f09c6d35
--- /dev/null
+++ b/docs/guides/horizontal-scaling/README.md
@@ -0,0 +1,249 @@
+# Horizontal scaling: two replicas behind a proxy
+
+Run two Guardian replicas behind a round-robin proxy, sharing one Postgres, and
+watch the coordination layer (issue #242) work end to end on your laptop. This
+mirrors the prod topology — 2–6 ECS tasks behind a load balancer — in miniature.
+
+```text
+                         ┌─────────────┐
+   client ──▶  :8080 ──▶ │ proxy/Caddy │ ──round-robin──┬──▶ server-a :3000
+                         └─────────────┘                └──▶ server-b :3010
+                                                              │         │
+                                                              └────┬────┘
+                                                                   ▼
+                                                            postgres :5432
+                                            (sessions · challenges · worker lease)
+```
+
+Everything that must be shared for a correct multi-replica deployment lives in
+Postgres, so a session minted on one replica is honored on the other, and only
+one replica ever canonicalizes. The variable meanings live in
+[`../../CONFIGURATION.md`](../../CONFIGURATION.md); the operational contract is in
+[`../../runbooks/horizontal-scaling.md`](../../runbooks/horizontal-scaling.md).
+
+## Prerequisites
+
+- Docker with Compose v2 (`docker compose`).
+- That's it — no AWS. ACK signing keys are generated locally per replica, which
+  is fine here because this guide exercises operator/EVM auth and coordination,
+  not the multisig co-signing flow (which is the only thing that needs one
+  shared ACK key — see [From this demo to production](#from-this-demo-to-production)).
+
+## Configure and run
+
+```sh
+cp .env.example .env
+# Set a real cursor secret — it MUST be identical on every replica:
+#   openssl rand -hex 32   →   paste into GUARDIAN_DASHBOARD_CURSOR_SECRET
+cp operators.example.json operators.json   # empty allowlist `[]`; add a key for the login walkthrough (step 7)
+```
+
+> **Building an unreleased version?** The published `latest` image does not yet
+> contain these coordination changes (issue #242). Drop in the local-build
+> override so the replicas build from the repo-root `Dockerfile` instead of
+> pulling from the registry — Compose auto-merges it, so no extra flags are
+> needed on any command:
+>
+> ```sh
+> cp docker-compose.override.yml.example docker-compose.override.yml
+> ```
+>
+> Once the change ships in a published image, delete
+> `docker-compose.override.yml` to go back to the registry image.
+
+```sh
+docker compose up -d --build
+```
+
+The proxy is at <http://localhost:8080>. Each replica is also exposed directly —
+`server-a` on `:3000`, `server-b` on `:3010` — so you can target a specific
+replica during the walkthrough. Postgres is on `:5432`.
+
+## What is shared (and why)
+
+| Shared in Postgres | Table | Effect across replicas |
+|---|---|---|
+| Operator/EVM sessions | `auth_sessions` | Log in on A, your cookie works on B; logout is honored fleet-wide. |
+| Login challenges | `auth_challenges` | A challenge is single-use even if issued on A and verified on B. |
+| Canonicalization lease | `worker_leases` | Exactly one replica promotes candidates; the others stand by. |
+
+Coordination is **backend-derived**: it is on because the backend is Postgres.
+No environment variable enables or disables it.
+
+## Validation walkthrough
+
+### 1. Both replicas report shared coordination
+
+```sh
+docker compose logs server-a server-b | grep -i "coordination mode"
+```
+
+Each replica prints one line; both must read `mode=shared backend=postgres`. If
+you ever see `mode=single-process backend=filesystem`, that replica is **not**
+safe to run alongside others.
+
+### 2. Exactly one canonicalization lease holder
+
+```sh
+docker compose exec postgres \
+  psql -U guardian -d guardian \
+  -c "select lease_name, holder_id, fence_token from worker_leases;"
+```
+
+You get a single `canonicalization` row with one `holder_id` (formatted
+`{pid}-{random}`) — never two. Both replicas run the worker loop, but only the
+lease holder does work; the other keeps trying to acquire and backs off.
+
+### 3. Lease failover with a fencing-token bump
+
+Stop the current holder and watch a different replica take over within the lease
+TTL (~30s, i.e. 3× the 10s canonicalization interval):
+
+```sh
+docker compose stop server-a       # if A wasn't the holder, stop server-b instead
+watch -n2 'docker compose exec -T postgres \
+  psql -U guardian -d guardian \
+  -c "select holder_id, fence_token, expires_at from worker_leases;"'
+```
+
+`holder_id` changes to the surviving replica and `fence_token` **increments** —
+the increment is the steal signal a superseded holder uses to fence itself off
+at its next write. Bring the replica back with `docker compose start server-a`;
+the lease does not bounce back (the current holder keeps renewing).
+
+### 4. Proxy request failover
+
+The lease failover above is server-side; the proxy also has to stop routing
+*client* requests to a dead replica. That is what the `health_uri` / `lb_*`
+directives in the [`Caddyfile`](./Caddyfile) do — a bare `round_robin` (no health
+checks) keeps sending half the traffic to the dead replica and returns `502`.
+Kill a replica and hit the proxy:
+
+```sh
+docker compose stop server-b
+for i in $(seq 1 4); do curl -s -o /dev/null -w "%{http_code} " \
+  http://localhost:8080/pubkey; done; echo
+```
+
+Every response stays `200` — Caddy health-checks each replica and routes only to
+the survivor. Bring it back with `docker compose start server-b`; Caddy re-adds
+it within one health interval (~5s). (Strip the health directives from the
+`Caddyfile` and the same loop returns alternating `502`s.)
+
+### 5. Auth fails closed when the shared store is down
+
+Pause Postgres and watch the holder step down rather than barrel ahead:
+
+```sh
+docker compose pause postgres
+docker compose logs -f server-a server-b   # Ctrl-C after a few seconds
+```
+
+You will see lease renew/acquire failures and storage errors — the worker
+**cancels its pass** instead of canonicalizing blind. If you have completed the
+login walkthrough below, an authenticated request fails rather than silently
+succeeding: authentication is **fail-closed**.
+
+> `docker compose pause` freezes Postgres mid-connection (SIGSTOP), so an
+> in-flight request *hangs until it times out* rather than getting a prompt
+> `5xx`. Either way it never succeeds. To see a fast `5xx` instead (socket
+> closed → connection refused), use `docker compose stop postgres` and
+> `docker compose start postgres` to recover.
+
+Recover with:
+
+```sh
+docker compose unpause postgres
+```
+
+Coordination resumes automatically; no manual intervention.
+
+### 6. Rate-limit partitioning and `X-Forwarded-For`
+
+Each replica enforces `global / GUARDIAN_MAX_REPLICAS`. With the default global
+burst of 10 and `GUARDIAN_MAX_REPLICAS=2`, a single replica caps at ~5 req/s.
+Hammer one replica directly (the challenge endpoint is unauthenticated and
+rate-limited):
+
+```sh
+for i in $(seq 1 12); do
+  curl -s -o /dev/null -w "%{http_code} " \
+    "http://localhost:3000/auth/challenge?commitment=0xdemo"
+done; echo
+```
+
+After the per-replica burst is spent you see `429`s. Through the proxy
+(`:8080`), Caddy sets `X-Forwarded-For`, so the server keys the limit on your
+real client IP rather than the proxy address — confirm by repeating the loop
+against `http://localhost:8080/...` and seeing the same per-IP behavior.
+
+### 7. (End-to-end) An operator session survives losing its replica
+
+This is the headline, and it needs a real operator key to sign the challenge.
+Use the [`examples/operator-smoke-web`](../../../examples/operator-smoke-web)
+harness (or the operator client) pointed at the **proxy** URL
+`http://localhost:8080`:
+
+1. Generate a Falcon operator key with the harness and add its public key to
+   `operators.json` (replacing the empty `[]`); the allowlist hot-reloads, so no
+   restart is needed:
+
+   ```json
+   [{ "public_key": "0x<falcon-operator-pubkey>", "permissions": ["dashboard:read"] }]
+   ```
+2. Complete the login (`GET /auth/challenge` → sign → `POST
+   /auth/verify`). The proxy round-robins, so this may land on either
+   replica; the session row is written to `auth_sessions`.
+3. Make an authenticated request (e.g. `GET /dashboard/accounts`) a few times —
+   each may be served by a different replica, and all succeed: the cookie is
+   validated against the shared store, not per-process memory.
+4. Now `docker compose stop` the replica that handled your login and repeat —
+   **your session still works** on the survivor. Then `POST
+   /auth/logout`; the revocation is honored on every replica.
+
+## Cleanup
+
+```sh
+docker compose down -v        # -v also drops the Postgres + keystore volumes
+```
+
+## From this demo to production
+
+This guide stays AWS-free to be runnable; a real prod deployment differs in two
+ways that do not change the coordination behavior shown above:
+
+- **`GUARDIAN_ENV=prod`** activates the prod-stage startup guards — a filesystem
+  storage backend and a rate limit that partitions to 0 req/replica are each
+  refused at startup. (An unset `GUARDIAN_DASHBOARD_CURSOR_SECRET` only *warns* —
+  it degrades cross-replica dashboard pagination, not custody, so a
+  single-replica prod server still boots.) Note these guards live behind the ACK
+  registry init, which in prod requires AWS first: set `GUARDIAN_ENV=prod`
+  without `AWS_REGION` and the server refuses to start with `AWS_REGION is
+  required when GUARDIAN_ENV=prod` before it ever reaches the storage or
+  rate-limit checks — so observing those two specifically needs AWS configured.
+- **One shared ACK signing key.** Each replica here auto-generates its own
+  guardian ACK key into its local keystore — and in non-prod it does so on
+  *every* startup, so the identity is not even stable across a single replica's
+  restart. That is fine for auth + coordination, which is all this guide
+  exercises. It is **not** enough for the multisig co-signing flow: every replica
+  must present the *same* guardian identity, because each account pins the
+  guardian's `/pubkey` commitment into its `openzeppelin::guardian::public_key`
+  slot at configure time. Route a multisig flow through the round-robin proxy and
+  the replica that did not configure the account rejects it with
+  `invalid GUARDIAN public key binding`. So prod pins one ACK key via AWS Secrets
+  Manager — see the [aws-signers guide](../aws-signers/README.md). Per-account
+  state already lives in Postgres and needs nothing extra.
+
+> **Smoke-testing multisig against this demo?** Until a stable non-AWS identity
+> lands ([issue #289](https://github.com/OpenZeppelin/guardian/issues/289) — a
+> local file/env signer key, so every replica can share one identity without
+> AWS), point your client at a **single replica directly**
+> (`http://localhost:3000`), never the proxy (`:8080`). Also make the client's
+> Miden RPC network match the server's `GUARDIAN_NETWORK_TYPE` (e.g. devnet RPC
+> ↔ `MidenDevnet`), or canonicalization will loop on an `on_chain=0x00…0`
+> commitment because the account was deployed to a different network than the
+> guardian verifies against.
+
+The managed path (published Postgres image + the prod Terraform profile) sets
+all of this for you; see [`../../SERVER_AWS_DEPLOY.md`](../../SERVER_AWS_DEPLOY.md)
+and the [horizontal-scaling runbook](../../runbooks/horizontal-scaling.md).
diff --git a/docs/guides/horizontal-scaling/docker-compose.override.yml.example b/docs/guides/horizontal-scaling/docker-compose.override.yml.example
new file mode 100644
index 00000000..ce405ae3
--- /dev/null
+++ b/docs/guides/horizontal-scaling/docker-compose.override.yml.example
@@ -0,0 +1,28 @@
+# Local-build override for testing UNRELEASED changes — e.g. a feature branch the
+# published `ghcr.io/openzeppelin/guardian` image does not yet contain. Builds
+# both replicas from the repo-root Dockerfile (the `postgres` feature, like the
+# published image) instead of pulling from the registry.
+#
+# Copy it into place; Compose then auto-merges it onto docker-compose.yml with no
+# -f flags to remember (so every command in README.md works unchanged):
+#   cp docker-compose.override.yml.example docker-compose.override.yml
+#   docker compose up -d --build
+#
+# Once the changes ship in a published image, delete docker-compose.override.yml
+# and `docker compose up -d` uses the registry image again.
+
+x-local-build: &local-build
+  image: guardian-server:local
+  pull_policy: build
+  build:
+    context: ../../..
+    dockerfile: Dockerfile
+    target: server-runner
+    args:
+      GUARDIAN_SERVER_FEATURES: postgres
+
+services:
+  server-a:
+    <<: *local-build
+  server-b:
+    <<: *local-build
diff --git a/docs/guides/horizontal-scaling/docker-compose.yml b/docs/guides/horizontal-scaling/docker-compose.yml
new file mode 100644
index 00000000..b08e32fc
--- /dev/null
+++ b/docs/guides/horizontal-scaling/docker-compose.yml
@@ -0,0 +1,89 @@
+# Horizontal scaling: two Guardian replicas behind a round-robin proxy, sharing
+# one Postgres. Demonstrates the coordination layer (issue #242) end to end and
+# locally: shared operator/EVM sessions and login challenges, the single-owner
+# canonicalization lease with failover, per-replica rate-limit partitioning, and
+# fail-closed auth when the shared store is unavailable.
+#
+# Run from this directory:
+#   cp .env.example .env                       # then fill in the values
+#   cp operators.example.json operators.json   # for the operator-login walkthrough
+#   docker compose up
+#
+# Compose auto-loads .env for both ${VAR} interpolation and each server's runtime
+# config. Walkthrough: ./README.md   Full config reference: ../../CONFIGURATION.md
+#
+# Coordination is backend-derived: because both replicas use the Postgres backend
+# they share sessions/challenges/lease automatically — no flag turns it on. ACK
+# signing keys are auto-generated per replica here, which is fine for this demo
+# (it exercises auth + coordination, not multisig co-signing); a real prod
+# deployment pins one shared ACK key via Secrets Manager (GUARDIAN_ENV=prod, see
+# the aws-signers guide).
+
+x-guardian-server: &guardian-server
+  image: ghcr.io/openzeppelin/guardian:${GUARDIAN_VERSION:-latest}
+  pull_policy: always
+  depends_on:
+    postgres:
+      condition: service_healthy
+  environment:
+    RUST_LOG: info
+    GUARDIAN_NETWORK_TYPE: ${GUARDIAN_NETWORK_TYPE:-MidenDevnet}
+    DATABASE_URL: postgres://guardian:${POSTGRES_PASSWORD:?set POSTGRES_PASSWORD in .env}@postgres:5432/guardian
+    GUARDIAN_KEYSTORE_PATH: /var/guardian/keystore
+    GUARDIAN_OPERATOR_PUBLIC_KEYS_FILE: /etc/guardian/operators.json
+    # Pinned and identical on every replica: pagination cursors are signed with
+    # this key, so a cursor minted on one replica must validate on another.
+    GUARDIAN_DASHBOARD_CURSOR_SECRET: ${GUARDIAN_DASHBOARD_CURSOR_SECRET:?set GUARDIAN_DASHBOARD_CURSOR_SECRET in .env (openssl rand -hex 32)}
+    # Rate-limit partition divisor: each replica enforces global/GUARDIAN_MAX_REPLICAS
+    # so the fleet aggregate stays at or below the global limit.
+    GUARDIAN_MAX_REPLICAS: ${GUARDIAN_MAX_REPLICAS:-2}
+
+services:
+  postgres:
+    image: postgres:16-alpine
+    volumes:
+      - guardian-postgres:/var/lib/postgresql/data
+    environment:
+      POSTGRES_USER: guardian
+      POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:?set POSTGRES_PASSWORD in .env}
+      POSTGRES_DB: guardian
+    ports:
+      - "5432:5432" # exposed so you can inspect worker_leases / auth_sessions with psql
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -U guardian -d guardian"]
+      interval: 5s
+      timeout: 5s
+      retries: 5
+
+  server-a:
+    <<: *guardian-server
+    hostname: server-a
+    volumes:
+      - guardian-keystore-a:/var/guardian/keystore
+      - ./operators.json:/etc/guardian/operators.json:ro
+    ports:
+      - "3000:3000" # target replica A directly (bypassing the proxy)
+
+  server-b:
+    <<: *guardian-server
+    hostname: server-b
+    volumes:
+      - guardian-keystore-b:/var/guardian/keystore
+      - ./operators.json:/etc/guardian/operators.json:ro
+    ports:
+      - "3010:3000" # target replica B directly (bypassing the proxy)
+
+  proxy:
+    image: caddy:2-alpine
+    depends_on:
+      - server-a
+      - server-b
+    volumes:
+      - ./Caddyfile:/etc/caddy/Caddyfile:ro
+    ports:
+      - "8080:8080" # round-robin entrypoint; this is what a client/LB would hit
+
+volumes:
+  guardian-postgres:
+  guardian-keystore-a:
+  guardian-keystore-b:
diff --git a/docs/guides/horizontal-scaling/operators.example.json b/docs/guides/horizontal-scaling/operators.example.json
new file mode 100644
index 00000000..fe51488c
--- /dev/null
+++ b/docs/guides/horizontal-scaling/operators.example.json
@@ -0,0 +1 @@
+[]
diff --git a/docs/openapi-dashboard.json b/docs/openapi-dashboard.json
index ca4d5bf0..0a96804d 100644
--- a/docs/openapi-dashboard.json
+++ b/docs/openapi-dashboard.json
@@ -108,6 +108,16 @@
                 }
               }
             }
+          },
+          "500": {
+            "description": "Session revocation failed",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ApiErrorResponse"
+                }
+              }
+            }
           }
         },
         "security": [
diff --git a/docs/openapi-evm.json b/docs/openapi-evm.json
index 98ff4d3e..fdb377dd 100644
--- a/docs/openapi-evm.json
+++ b/docs/openapi-evm.json
@@ -194,6 +194,16 @@
                 }
               }
             }
+          },
+          "500": {
+            "description": "Session revocation failed",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ApiErrorResponse"
+                }
+              }
+            }
           }
         },
         "security": [
diff --git a/docs/openapi.json b/docs/openapi.json
index b5b1b04d..b3874347 100644
--- a/docs/openapi.json
+++ b/docs/openapi.json
@@ -108,6 +108,16 @@
                 }
               }
             }
+          },
+          "500": {
+            "description": "Session revocation failed",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ApiErrorResponse"
+                }
+              }
+            }
           }
         },
         "security": [
@@ -2190,6 +2200,16 @@
                 }
               }
             }
+          },
+          "500": {
+            "description": "Session revocation failed",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ApiErrorResponse"
+                }
+              }
+            }
           }
         },
         "security": [
diff --git a/docs/runbooks/horizontal-scaling.md b/docs/runbooks/horizontal-scaling.md
new file mode 100644
index 00000000..8d793c61
--- /dev/null
+++ b/docs/runbooks/horizontal-scaling.md
@@ -0,0 +1,101 @@
+# Runbook: Horizontal Scaling (multiple Guardian replicas)
+
+Guardian runs as 2–6 ECS tasks behind a round-robin load balancer in the prod
+profile. This runbook covers what an operator must configure for a correct
+high-availability (HA) deployment and how the server behaves across replicas.
+Tracking: issue #242.
+
+## TL;DR — required for a correct multi-replica deployment
+
+| Setting | Why it matters across replicas |
+|---|---|
+| **Postgres backend** (`DATABASE_URL`) | Sessions, login challenges, and the canonicalization lease live in Postgres so they are shared. The filesystem backend is **dev-only** and is refused at startup in the prod stage. |
+| **`GUARDIAN_DASHBOARD_CURSOR_SECRET`** (64 hex chars) | Pagination cursors are signed with this key. If it differs per replica, a cursor minted on one replica fails on another. Unset → the server **warns** and generates an ephemeral per-process secret (boots fine in every stage); pin a shared value so multi-replica dashboard pagination works. Degrades only pagination, not custody. |
+| **`GUARDIAN_ENV=prod`** | Activates the prod-stage startup guards (filesystem-backend refusal, 0-req/replica rate-limit refusal). Set by Terraform from `var.deployment_stage`. |
+| **`GUARDIAN_MAX_REPLICAS`** | Rate-limit partitioning divisor (see below). Defaults from the autoscaling max capacity via Terraform. |
+
+With the published Postgres image + the prod Terraform profile, all of these are
+set for you. The rest of this doc is for understanding and for non-default
+deployments.
+
+## Coordination is backend-derived (not a tunable)
+
+The coordination mode is determined by the **storage backend alone**:
+
+- **Postgres backend → shared coordination** (replica-safe). Always. No tunable
+  can turn this off — a missing or wrong env var can never silently revert a
+  Postgres deployment to per-process state.
+- **Filesystem backend → in-memory, single-process** coordination (dev only).
+
+The startup log emits one line reflecting the resolved state, e.g.:
+
+```text
+coordination mode=shared backend=postgres stage=prod max_replicas=6 cursor_secret=configured
+```
+
+If you ever see `mode=single-process backend=filesystem` on a deployment you
+believe is multi-replica, that deployment is **not** safe to run with more than
+one task.
+
+If the server is auto-built with the Postgres backend but coordination handles
+were not wired (only possible via a manual/embedded builder), it **fails to
+start** rather than falling back to per-process state.
+
+## Behavior across replicas
+
+- **Operator & EVM login**: a challenge issued on one replica verifies on any
+  other; an established session is honored everywhere; logout and expiry are
+  effective fleet-wide.
+- **Canonicalization** runs on exactly one replica at a time via a Postgres
+  lease (`worker_leases`). Leadership transfers automatically to another replica
+  within one lease TTL (≈ 3× the canonicalization check interval) if the holder
+  crashes. A superseded holder cannot commit a canonical write (a fencing token
+  is checked before every state/delta write).
+- **Rate limiting** is per-process but partitioned (see below).
+
+## Failure modes (by design)
+
+- **Shared store (Postgres) briefly unavailable → auth fails closed.** Login and
+  authenticated requests are rejected (never bypassed) until Postgres returns.
+  The canonicalization leader steps down rather than risk double-processing, and
+  resumes automatically. This is a deliberate change from the old always-
+  available in-memory behavior.
+- **DB connection budget**: each replica opens up to `GUARDIAN_DB_POOL_MAX_SIZE`
+  (default 32 in prod) connections, plus the metadata pool, plus per-request
+  session lookups. With N replicas the total can approach
+  `N × (pools × size)`; keep it under Postgres `max_connections`. Prod routes
+  through RDS Proxy by default, which pools server-side and absorbs much of this.
+
+## `GUARDIAN_MAX_REPLICAS` and rate limiting
+
+The configured global limits (`GUARDIAN_RATE_BURST_PER_SEC`,
+`GUARDIAN_RATE_PER_MIN`) are divided by `GUARDIAN_MAX_REPLICAS` so each replica
+enforces `global / GUARDIAN_MAX_REPLICAS`. With round-robin distribution the
+fleet aggregate stays at or below the global limit.
+
+- Default = the deployment's **autoscaling max capacity** (Terraform). It must be
+  the *max*, not the count you happen to run now — partitioning by max is
+  conservative.
+- **Drives rate-limiting only.** It has no effect on coordination mode.
+- **Tolerance band**: when fewer than max replicas are running, the fleet
+  over-throttles (stricter than the global limit) — accepted. HTTP keep-alive can
+  also pin a client to one replica, throttling it at `global / max` (e.g. 1/6) —
+  also accepted; it is fail-closed (never too loose).
+- **Override** (`var.guardian_max_replicas`): an explicit value is clamped **up**
+  to the autoscaling max, so it can never drop below real capacity (which would
+  let the aggregate exceed the global limit). Setting it higher only
+  over-throttles.
+
+## Validate the coordination behavior locally
+
+To see this contract in action before deploying — shared sessions, single-owner
+lease with failover, fail-closed auth, rate-limit partitioning — run the
+[horizontal-scaling guide](../guides/horizontal-scaling/README.md): two replicas
+behind a round-robin proxy sharing one Postgres, all on Docker Compose.
+
+## Filesystem backend is dev-only
+
+The filesystem backend keeps state local to one task (and does not persist audit
+events). In the prod stage the server **refuses to start** on the filesystem
+backend with an actionable error. Use it only for local development / single
+process.
diff --git a/examples/operator-smoke-web/package-lock.json b/examples/operator-smoke-web/package-lock.json
index 9065d832..40d3bb21 100644
--- a/examples/operator-smoke-web/package-lock.json
+++ b/examples/operator-smoke-web/package-lock.json
@@ -27,7 +27,7 @@
     },
     "../../packages/guardian-operator-client": {
       "name": "@openzeppelin/guardian-operator-client",
-      "version": "0.14.7",
+      "version": "0.15.0",
       "license": "MIT",
       "devDependencies": {
         "typescript": "^5.7.2",
diff --git a/infra/data.tf b/infra/data.tf
index bb7963e2..aa00dc45 100644
--- a/infra/data.tf
+++ b/infra/data.tf
@@ -78,56 +78,60 @@ locals {
     for subnet_id in local.effective_rds_proxy_subnet_ids : data.aws_subnet.rds_proxy_candidate[subnet_id].availability_zone_id
   ])
 
-  cluster_name                                 = var.cluster_name != "" ? var.cluster_name : "${var.stack_name}-cluster"
-  server_service_name                          = var.server_service_name != "" ? var.server_service_name : "${var.stack_name}-server"
-  alb_name                                     = var.alb_name != "" ? var.alb_name : "${var.stack_name}-alb"
-  target_group_name                            = var.target_group_name != "" ? var.target_group_name : "${var.stack_name}-server-tg"
-  grpc_target_group_name                       = "${var.stack_name}-grpc-tg"
-  alb_security_group_name                      = var.alb_security_group_name != "" ? var.alb_security_group_name : "${var.stack_name}-alb-sg"
-  server_security_group_name                   = var.server_security_group_name != "" ? var.server_security_group_name : "${var.stack_name}-server-sg"
-  postgres_security_group_name                 = var.postgres_security_group_name != "" ? var.postgres_security_group_name : "${var.stack_name}-postgres-sg"
-  task_execution_role_name                     = var.task_execution_role_name != "" ? var.task_execution_role_name : "${var.stack_name}-ecs-task-execution"
-  task_role_name                               = var.task_role_name != "" ? var.task_role_name : "${var.stack_name}-ecs-task"
-  server_task_family                           = var.server_task_family != "" ? var.server_task_family : "${var.stack_name}-server"
-  server_container_name                        = var.server_container_name != "" ? var.server_container_name : "${var.stack_name}-server"
-  server_log_group_name                        = var.server_log_group_name != "" ? var.server_log_group_name : "/ecs/${local.server_service_name}"
-  cluster_log_group_name                       = "/aws/ecs/${local.cluster_name}/cluster"
-  postgres_identifier_seed                     = lower(replace(var.stack_name, "/[^0-9A-Za-z]/", ""))
-  postgres_identifier_base                     = local.postgres_identifier_seed != "" ? local.postgres_identifier_seed : "guardian"
-  postgres_identifier_default                  = substr(can(regex("^[a-z]", local.postgres_identifier_base)) ? local.postgres_identifier_base : "g${local.postgres_identifier_base}", 0, 63)
-  postgres_db                                  = var.postgres_db != "" ? var.postgres_db : local.postgres_identifier_default
-  postgres_user                                = var.postgres_user != "" ? var.postgres_user : local.postgres_identifier_default
-  postgres_password                            = var.postgres_password != "" ? var.postgres_password : "${var.stack_name}_dev_password"
-  postgres_port                                = 5432
-  rds_instance_identifier                      = "${var.stack_name}-postgres"
-  rds_subnet_group_name                        = "${var.stack_name}-postgres-subnets"
-  database_secret_name                         = "${var.stack_name}/server/database-url"
-  database_credentials_secret_name             = "${var.stack_name}/server/database-credentials"
-  operator_public_keys_secret_name             = "${var.stack_name}/server/operator-public-keys"
-  evm_allowed_chain_ids_secret_name            = "${var.stack_name}/server/evm-allowed-chain-ids"
-  evm_rpc_urls_secret_name                     = "${var.stack_name}/server/evm-rpc-urls"
-  ack_falcon_secret_name                       = var.guardian_ack_falcon_secret_name != "" ? var.guardian_ack_falcon_secret_name : "${var.stack_name}/server/ack-falcon-secret-key"
-  ack_ecdsa_secret_name                        = var.guardian_ack_ecdsa_secret_name != "" ? var.guardian_ack_ecdsa_secret_name : "${var.stack_name}/server/ack-ecdsa-secret-key"
-  managed_storage_encryption_enabled           = local.is_prod && var.guardian_storage_encryption_secret_name != ""
-  storage_encryption_secret_name               = local.managed_storage_encryption_enabled ? var.guardian_storage_encryption_secret_name : ""
-  rds_proxy_name                               = "${var.stack_name}-postgres-proxy"
-  rds_proxy_role_name                          = "${var.stack_name}-rds-proxy"
-  rds_proxy_security_group_name                = "${var.stack_name}-rds-proxy-sg"
-  rds_master_password                          = var.postgres_password != "" ? var.postgres_password : random_password.postgres[0].result
-  effective_rds_instance_class                 = var.rds_instance_class != "" ? var.rds_instance_class : (local.is_prod ? "db.r6g.large" : "db.t3.micro")
-  effective_rds_allocated_storage              = var.rds_allocated_storage != null ? var.rds_allocated_storage : (local.is_prod ? 50 : 20)
-  effective_server_desired_count               = var.server_desired_count != null ? var.server_desired_count : (local.is_prod ? 2 : 1)
-  effective_server_autoscaling_enabled         = var.server_autoscaling_enabled != null ? var.server_autoscaling_enabled : local.is_prod
-  effective_server_autoscaling_min_capacity    = var.server_autoscaling_min_capacity != null ? var.server_autoscaling_min_capacity : local.effective_server_desired_count
-  effective_server_autoscaling_max_capacity    = var.server_autoscaling_max_capacity != null ? var.server_autoscaling_max_capacity : (local.is_prod ? max(local.effective_server_desired_count, 6) : local.effective_server_desired_count)
-  effective_server_autoscaling_cpu_target      = var.server_autoscaling_cpu_target != null ? var.server_autoscaling_cpu_target : 65
-  effective_server_autoscaling_memory_target   = var.server_autoscaling_memory_target != null ? var.server_autoscaling_memory_target : 75
-  effective_rds_proxy_enabled                  = var.rds_proxy_enabled != null ? var.rds_proxy_enabled : local.is_prod
-  effective_rds_proxy_route_database_url       = local.effective_rds_proxy_enabled && (var.rds_proxy_route_database_url != null ? var.rds_proxy_route_database_url : true)
-  effective_rds_max_allocated_storage          = var.rds_max_allocated_storage != null ? var.rds_max_allocated_storage : (local.is_prod ? max(local.effective_rds_allocated_storage, 200) : null)
-  effective_guardian_rate_limit_enabled        = var.guardian_rate_limit_enabled != null ? var.guardian_rate_limit_enabled : true
-  effective_guardian_rate_burst_per_sec        = var.guardian_rate_burst_per_sec != null ? var.guardian_rate_burst_per_sec : (local.is_prod ? 200 : 10)
-  effective_guardian_rate_per_min              = var.guardian_rate_per_min != null ? var.guardian_rate_per_min : (local.is_prod ? 5000 : 60)
+  cluster_name                               = var.cluster_name != "" ? var.cluster_name : "${var.stack_name}-cluster"
+  server_service_name                        = var.server_service_name != "" ? var.server_service_name : "${var.stack_name}-server"
+  alb_name                                   = var.alb_name != "" ? var.alb_name : "${var.stack_name}-alb"
+  target_group_name                          = var.target_group_name != "" ? var.target_group_name : "${var.stack_name}-server-tg"
+  grpc_target_group_name                     = "${var.stack_name}-grpc-tg"
+  alb_security_group_name                    = var.alb_security_group_name != "" ? var.alb_security_group_name : "${var.stack_name}-alb-sg"
+  server_security_group_name                 = var.server_security_group_name != "" ? var.server_security_group_name : "${var.stack_name}-server-sg"
+  postgres_security_group_name               = var.postgres_security_group_name != "" ? var.postgres_security_group_name : "${var.stack_name}-postgres-sg"
+  task_execution_role_name                   = var.task_execution_role_name != "" ? var.task_execution_role_name : "${var.stack_name}-ecs-task-execution"
+  task_role_name                             = var.task_role_name != "" ? var.task_role_name : "${var.stack_name}-ecs-task"
+  server_task_family                         = var.server_task_family != "" ? var.server_task_family : "${var.stack_name}-server"
+  server_container_name                      = var.server_container_name != "" ? var.server_container_name : "${var.stack_name}-server"
+  server_log_group_name                      = var.server_log_group_name != "" ? var.server_log_group_name : "/ecs/${local.server_service_name}"
+  cluster_log_group_name                     = "/aws/ecs/${local.cluster_name}/cluster"
+  postgres_identifier_seed                   = lower(replace(var.stack_name, "/[^0-9A-Za-z]/", ""))
+  postgres_identifier_base                   = local.postgres_identifier_seed != "" ? local.postgres_identifier_seed : "guardian"
+  postgres_identifier_default                = substr(can(regex("^[a-z]", local.postgres_identifier_base)) ? local.postgres_identifier_base : "g${local.postgres_identifier_base}", 0, 63)
+  postgres_db                                = var.postgres_db != "" ? var.postgres_db : local.postgres_identifier_default
+  postgres_user                              = var.postgres_user != "" ? var.postgres_user : local.postgres_identifier_default
+  postgres_password                          = var.postgres_password != "" ? var.postgres_password : "${var.stack_name}_dev_password"
+  postgres_port                              = 5432
+  rds_instance_identifier                    = "${var.stack_name}-postgres"
+  rds_subnet_group_name                      = "${var.stack_name}-postgres-subnets"
+  database_secret_name                       = "${var.stack_name}/server/database-url"
+  database_credentials_secret_name           = "${var.stack_name}/server/database-credentials"
+  operator_public_keys_secret_name           = "${var.stack_name}/server/operator-public-keys"
+  evm_allowed_chain_ids_secret_name          = "${var.stack_name}/server/evm-allowed-chain-ids"
+  evm_rpc_urls_secret_name                   = "${var.stack_name}/server/evm-rpc-urls"
+  ack_falcon_secret_name                     = var.guardian_ack_falcon_secret_name != "" ? var.guardian_ack_falcon_secret_name : "${var.stack_name}/server/ack-falcon-secret-key"
+  ack_ecdsa_secret_name                      = var.guardian_ack_ecdsa_secret_name != "" ? var.guardian_ack_ecdsa_secret_name : "${var.stack_name}/server/ack-ecdsa-secret-key"
+  managed_storage_encryption_enabled         = local.is_prod && var.guardian_storage_encryption_secret_name != ""
+  storage_encryption_secret_name             = local.managed_storage_encryption_enabled ? var.guardian_storage_encryption_secret_name : ""
+  rds_proxy_name                             = "${var.stack_name}-postgres-proxy"
+  rds_proxy_role_name                        = "${var.stack_name}-rds-proxy"
+  rds_proxy_security_group_name              = "${var.stack_name}-rds-proxy-sg"
+  rds_master_password                        = var.postgres_password != "" ? var.postgres_password : random_password.postgres[0].result
+  effective_rds_instance_class               = var.rds_instance_class != "" ? var.rds_instance_class : (local.is_prod ? "db.r6g.large" : "db.t3.micro")
+  effective_rds_allocated_storage            = var.rds_allocated_storage != null ? var.rds_allocated_storage : (local.is_prod ? 50 : 20)
+  effective_server_desired_count             = var.server_desired_count != null ? var.server_desired_count : (local.is_prod ? 2 : 1)
+  effective_server_autoscaling_enabled       = var.server_autoscaling_enabled != null ? var.server_autoscaling_enabled : local.is_prod
+  effective_server_autoscaling_min_capacity  = var.server_autoscaling_min_capacity != null ? var.server_autoscaling_min_capacity : local.effective_server_desired_count
+  effective_server_autoscaling_max_capacity  = var.server_autoscaling_max_capacity != null ? var.server_autoscaling_max_capacity : (local.is_prod ? max(local.effective_server_desired_count, 6) : local.effective_server_desired_count)
+  effective_server_autoscaling_cpu_target    = var.server_autoscaling_cpu_target != null ? var.server_autoscaling_cpu_target : 65
+  effective_server_autoscaling_memory_target = var.server_autoscaling_memory_target != null ? var.server_autoscaling_memory_target : 75
+  effective_rds_proxy_enabled                = var.rds_proxy_enabled != null ? var.rds_proxy_enabled : local.is_prod
+  effective_rds_proxy_route_database_url     = local.effective_rds_proxy_enabled && (var.rds_proxy_route_database_url != null ? var.rds_proxy_route_database_url : true)
+  effective_rds_max_allocated_storage        = var.rds_max_allocated_storage != null ? var.rds_max_allocated_storage : (local.is_prod ? max(local.effective_rds_allocated_storage, 200) : null)
+  effective_guardian_rate_limit_enabled      = var.guardian_rate_limit_enabled != null ? var.guardian_rate_limit_enabled : true
+  effective_guardian_rate_burst_per_sec      = var.guardian_rate_burst_per_sec != null ? var.guardian_rate_burst_per_sec : (local.is_prod ? 200 : 10)
+  effective_guardian_rate_per_min            = var.guardian_rate_per_min != null ? var.guardian_rate_per_min : (local.is_prod ? 5000 : 60)
+  # GUARDIAN_MAX_REPLICAS defaults to the autoscaling max capacity. An explicit
+  # override is clamped UP to that max so it can never drop below real capacity
+  # (which would let the fleet aggregate exceed the global rate limit).
+  effective_guardian_max_replicas              = var.guardian_max_replicas != null ? max(var.guardian_max_replicas, local.effective_server_autoscaling_max_capacity) : local.effective_server_autoscaling_max_capacity
   effective_guardian_db_pool_max_size          = var.guardian_db_pool_max_size != null ? var.guardian_db_pool_max_size : (local.is_prod ? 32 : 16)
   effective_guardian_metadata_db_pool_max_size = var.guardian_metadata_db_pool_max_size != null ? var.guardian_metadata_db_pool_max_size : local.effective_guardian_db_pool_max_size
   managed_evm_allowed_chain_ids_secret_enabled = var.guardian_evm_allowed_chain_ids_secret_arn == "" && var.guardian_evm_allowed_chain_ids != ""
diff --git a/infra/ecs.tf b/infra/ecs.tf
index a6987c7f..d0b6daee 100644
--- a/infra/ecs.tf
+++ b/infra/ecs.tf
@@ -144,6 +144,10 @@ resource "aws_ecs_task_definition" "server" {
             name  = "GUARDIAN_RATE_PER_MIN"
             value = tostring(local.effective_guardian_rate_per_min)
           },
+          {
+            name  = "GUARDIAN_MAX_REPLICAS"
+            value = tostring(local.effective_guardian_max_replicas)
+          },
           {
             name  = "GUARDIAN_DB_POOL_MAX_SIZE"
             value = tostring(local.effective_guardian_db_pool_max_size)
diff --git a/infra/variables.tf b/infra/variables.tf
index a65cffe3..a0be5978 100644
--- a/infra/variables.tf
+++ b/infra/variables.tf
@@ -302,6 +302,27 @@ variable "guardian_rate_per_min" {
   default     = null
 }
 
+variable "guardian_max_replicas" {
+  description = <<-EOT
+    Optional override for GUARDIAN_MAX_REPLICAS, the maximum replica capacity the
+    server divides global rate limits by. Defaults to the effective autoscaling
+    max capacity. Drives rate-limit partitioning only (coordination mode is
+    backend-derived). A value below the real max would let the aggregate exceed
+    the global limit, so an explicit override is clamped up to the autoscaling
+    max in data.tf and can only ever raise the divisor, never lower it.
+  EOT
+  type        = number
+  default     = null
+
+  validation {
+    condition = (
+      var.guardian_max_replicas == null ||
+      (var.guardian_max_replicas >= 1 && floor(var.guardian_max_replicas) == var.guardian_max_replicas)
+    )
+    error_message = "guardian_max_replicas must be an integer >= 1 when set."
+  }
+}
+
 variable "guardian_rate_limit_enabled" {
   description = "Optional override to enable or disable Guardian HTTP rate limiting"
   type        = bool
diff --git a/packages/miden-multisig-client/.gitignore b/packages/miden-multisig-client/.gitignore
index 04c01ba7..11059a0b 100644
--- a/packages/miden-multisig-client/.gitignore
+++ b/packages/miden-multisig-client/.gitignore
@@ -1,2 +1,3 @@
 node_modules/
-dist/
\ No newline at end of file
+dist/
+test-results/
\ No newline at end of file
diff --git a/speckit/features/010-horizontal-scaling/contracts/config-contract.md b/speckit/features/010-horizontal-scaling/contracts/config-contract.md
new file mode 100644
index 00000000..9d0fb972
--- /dev/null
+++ b/speckit/features/010-horizontal-scaling/contracts/config-contract.md
@@ -0,0 +1,102 @@
+# Contract: Configuration & Startup Guards
+
+**Feature**: 010-horizontal-scaling
+
+Operator-facing configuration contract. These are the only externally visible
+behavior changes (no client wire contract changes).
+
+## Environment variables
+
+| Variable | Status | Behavior |
+|---|---|---|
+| `GUARDIAN_ENV` | **reused** | Stage signal. `prod` (case-insensitive) activates HA fail-fast guards. Already set from Terraform `var.deployment_stage` (`infra/ecs.tf:128-129`). Currently only gates ACK secrets (`ack/mod.rs:139-145`); `is_prod_environment()` is promoted to a shared `config/stage.rs` helper. |
+| `GUARDIAN_DASHBOARD_CURSOR_SECRET` | **enforcement changed** | 64-hex (32-byte) shared secret. Optional in every stage: if unset, warn and fall back to an ephemeral per-process secret (boots, never fails startup). A missing shared secret degrades only dashboard pagination across replicas, so it is not a startup guard (`dashboard/state.rs`). |
+| `GUARDIAN_MAX_REPLICAS` | **new** | Positive integer = the deployment's autoscaling **max** capacity. Drives **rate limiting only**: divides `GUARDIAN_RATE_BURST_PER_SEC`/`GUARDIAN_RATE_PER_MIN` per replica (`global / GUARDIAN_MAX_REPLICAS`) so aggregate stays at or below the global limit (over-throttles below max capacity). Defaults from Terraform `effective_server_autoscaling_max_capacity` (see below); overridable, but a value **below** the real max makes per-replica caps too high so the aggregate can exceed the global limit (too loose) — Terraform should validate `>=` effective max. A value above the real max over-throttles. Unset or `1` => current per-process rate-limit behavior. **Does NOT affect coordination mode** — that is backend-derived (FR-020). |
+| `DATABASE_URL` | unchanged | Required for the Postgres backend (which the prod image uses). |
+| `GUARDIAN_DB_POOL_MAX_SIZE` / `GUARDIAN_METADATA_DB_POOL_MAX_SIZE` | unchanged | Per-replica pool sizes; runbook adds guidance: total ≈ size x replicas x pools must stay under Postgres `max_connections`. |
+| `GUARDIAN_RATE_LIMIT_ENABLED` / `GUARDIAN_RATE_BURST_PER_SEC` / `GUARDIAN_RATE_PER_MIN` | unchanged | Now interpreted as global limits when `GUARDIAN_MAX_REPLICAS > 1`. |
+
+Optional (implementation may add, with documented defaults): lease TTL / renew
+interval overrides (e.g. `GUARDIAN_CANON_LEASE_TTL_SECS`,
+`GUARDIAN_CANON_LEASE_RENEW_SECS`). Default to safe values if absent. The lease
+TTL is sized for renew/failover only and is independent of the canonicalization
+`submission_grace_period_seconds`.
+
+## Terraform wiring (default ships from infra)
+
+`GUARDIAN_MAX_REPLICAS` MUST default from the deployment's autoscaling max
+capacity rather than a manually maintained value:
+
+- `infra/data.tf` already computes
+  `local.effective_server_autoscaling_max_capacity` (prod = `max(desired, 6)`).
+- `infra/ecs.tf` already injects the server env block (after
+  `GUARDIAN_RATE_PER_MIN`). Add:
+  ```hcl
+  {
+    name  = "GUARDIAN_MAX_REPLICAS"
+    value = tostring(local.effective_guardian_max_replicas)
+  }
+  ```
+  where `local.effective_guardian_max_replicas = var.guardian_max_replicas != null ? max(var.guardian_max_replicas, local.effective_server_autoscaling_max_capacity) : local.effective_server_autoscaling_max_capacity`
+  (new `var.guardian_max_replicas` defaults to `null`, i.e. derive from max
+  capacity; an explicit override is clamped **up** to the autoscaling max so it
+  can only ever raise the divisor, never lower it below the real fleet size).
+
+This keeps the default correct on every deploy with no operator action; the
+runbook documents the override, not a required value.
+
+## Startup guards (fail-fast, prod only)
+
+The server MUST refuse to start, with a clear actionable error naming the
+variable and remedy, when `GUARDIAN_ENV=prod` and any of:
+
+1. The active storage backend is the **filesystem** backend (US5/FR-012). Remedy:
+   build/run with the Postgres backend and set `DATABASE_URL`.
+2. An enabled global rate limit partitions to **zero** requests per replica
+   (`GUARDIAN_RATE_BURST_PER_SEC`/`GUARDIAN_RATE_PER_MIN` below
+   `GUARDIAN_MAX_REPLICAS`, FR-013). Remedy: raise the global limit or lower
+   `GUARDIAN_MAX_REPLICAS`.
+
+In non-prod, condition (1) is allowed (dev default) and (2) warns but starts. A
+missing `GUARDIAN_DASHBOARD_CURSOR_SECRET` is NOT a startup guard in any stage:
+it warns and boots with an ephemeral per-process secret (FR-008), because it
+degrades pagination only.
+
+## Error message contract
+
+Each guard error MUST: name the offending variable/backend, state the
+consequence under multiple replicas, and give the exact remedy. Errors are
+startup/config errors (process exits non-zero), not request-path errors — no
+change to HTTP/gRPC boundary error shapes.
+
+## Startup mode log line (FR-019)
+
+On startup the server logs exactly one coordination-mode line reflecting the
+**resolved** state (never operator intent):
+
+```text
+coordination mode=shared backend=postgres stage=prod max_replicas=6 cursor_secret=configured
+coordination mode=single-process backend=filesystem stage=dev max_replicas=1 cursor_secret=ephemeral
+```
+
+`mode=shared` iff coordination is backed by the external store (Postgres);
+`mode=single-process` for the in-memory impls (filesystem). This is the
+discoverable signal that replaces an explicit `DISTRIBUTED_MODE` toggle —
+coordination is determined by the resolved storage backend alone, not a flag and
+not a tunable, so the line cannot disagree with reality. (`max_replicas` is shown
+for the rate-limit context; it does not affect the mode.)
+
+## Documentation surface (US6)
+
+- `docs/runbooks/horizontal-scaling.md` (new) — required env vars, state-store
+  dependency (shared Postgres), pool sizing vs `max_connections`,
+  `GUARDIAN_MAX_REPLICAS` guidance (rate-limit partitioning only;
+  over-throttling/keep-alive tolerance below max capacity; a too-low override can
+  let aggregate limits exceed the global limit, a too-high override over-throttles),
+  coordination mode is backend-derived (Postgres = shared),
+  filesystem = dev-only, failover behavior of the canonicalization lease.
+- `docs/CONFIGURATION.md` — add `GUARDIAN_MAX_REPLICAS` (default from autoscaling
+  max capacity; rate-limiting effect only — does not change coordination mode),
+  document the prod guards.
+- `docs/SERVER_AWS_DEPLOY.md` — HA notes referencing the existing prod profile
+  (`infra/data.tf` desired 2 / max 6) and `GUARDIAN_MAX_REPLICAS` sourced from it.
diff --git a/speckit/features/010-horizontal-scaling/contracts/coordination-traits.md b/speckit/features/010-horizontal-scaling/contracts/coordination-traits.md
new file mode 100644
index 00000000..45ba4887
--- /dev/null
+++ b/speckit/features/010-horizontal-scaling/contracts/coordination-traits.md
@@ -0,0 +1,178 @@
+# Contract: Coordination Traits
+
+**Feature**: 010-horizontal-scaling
+
+These are the **internal** server traits introduced by this feature. They are not
+part of any client (HTTP/gRPC) wire contract — no proto, payload, status enum, or
+error surface changes. The traits exist so that shared coordination has two
+interchangeable implementations selected with the storage backend:
+
+| Trait | In-memory impl (filesystem/dev) | Postgres impl (prod) | Backs table |
+|---|---|---|---|
+| `SessionStore` | `InMemorySessionStore` | `PgSessionStore` | `auth_sessions` |
+| `ChallengeStore` | `InMemoryChallengeStore` | `PgChallengeStore` | `auth_challenges` |
+| `LeaderElector` | `AlwaysLeader` | `PgLeaseElector` | `worker_leases` |
+
+All methods are `async` and return the crate's existing error type; auth-facing
+errors MUST map to the **same** boundary errors operators/clients see today
+(Constitution IV — no error-surface drift).
+
+## `SessionStore`
+
+Each store instance is **realm-bound at construction** (operator vs evm), so the
+methods carry no realm. `StoredSession { subject: SessionSubject, issued_at,
+expires_at }`; `SessionSubject` is `Operator { operator_id, commitment }` |
+`Evm { address }` (no permissions — re-resolved per request).
+
+```text
+trait SessionStore {
+    async fn insert(&self, key: [u8;32], session: StoredSession) -> Result<()>;
+    async fn get(&self, key: &[u8;32], now) -> Result<Option<StoredSession>>;
+    async fn revoke(&self, key: &[u8;32]) -> Result<Option<StoredSession>>; // logout; returns prior for logging
+    async fn sweep_expired(&self, now) -> Result<u64>;
+}
+```
+
+Behavioral contract:
+- `get` returns `Some` only when the session is unrevoked and `now < expires_at`.
+- Validity is evaluated against the store's clock (DB clock for Postgres).
+- `revoke` returns the prior session (for logout logging) and, once revoked, `get`
+  MUST reject it on every replica until natural expiry. The Postgres impl marks
+  `revoked_at` and keeps the row until expiry; the in-memory impl removes it.
+- Replaces the `DashboardState` and `EvmSessionState` session maps without
+  changing the outcome of `authenticate_session` (permissions still re-resolved
+  from the live allowlist at call time).
+
+## `ChallengeStore`
+
+Each store instance is **realm-scoped** at construction (operator vs evm), so the
+trait methods don't take a realm. The stored challenge carries a realm-appropriate
+`key` and `payload`:
+
+```text
+trait ChallengeStore {
+    async fn issue(&self, principal: &str, challenge: StoredChallenge, max_outstanding: usize, now) -> Result<()>;
+    async fn active_for(&self, principal: &str, now) -> Result<Vec<StoredChallenge>>;
+    async fn consume(&self, principal: &str, key: &str, now) -> Result<bool>; // true => this caller won the single-use claim
+    async fn sweep_expired(&self, now) -> Result<u64>;
+}
+struct StoredChallenge { key: String, payload: ChallengePayload, issued_at, expires_at }
+enum ChallengePayload { OperatorDigest(Word), EvmChallenge { address, nonce, issued_at, expires_at } }
+```
+
+**Why match-in-Rust, not match-in-store**: the two realms verify differently and
+neither check is expressible in SQL — operator does a Falcon
+`public_key.verify(signing_digest, sig)` (`dashboard/state.rs:228-230`), EVM does
+a nonce compare then `recover_session_address(challenge, sig)`
+(`evm/session.rs:112-127`). So the store returns candidate payloads
+(`active_for`), the caller matches one, then `consume(principal, key)` atomically
+claims it. `key` is the signing-digest hex (operator) or the nonce (EVM); see
+data-model.md table `auth_challenges` `(realm, challenge_key)`.
+
+Behavioral contract:
+- `consume(principal, key)` atomically sets `consumed_at` and returns `true` only
+  if the challenge was unconsumed and unexpired — a replay (or a lost race) on any
+  replica returns `false` (FR-003).
+- Issue-on-replica-A / match+consume-on-replica-B succeeds (FR-001).
+
+## `LeaderElector`
+
+```text
+trait LeaderElector {
+    async fn try_acquire(&self, lease: &str, holder_id: &str, ttl: Duration) -> Result<Option<Lease>>;
+    async fn renew(&self, lease: &Lease) -> Result<bool>;     // false => lease lost
+    async fn verify_held(&self, lease: &Lease) -> Result<bool>; // fence-checked ownership at submission boundary
+    async fn release(&self, lease: Lease) -> Result<()>;       // graceful shutdown
+}
+struct Lease { name, holder_id, fence_token, expires_at }
+```
+
+Behavioral contract:
+- At most one holder satisfies `renew` at any instant (atomic conditional write +
+  DB-clock TTL).
+- `AlwaysLeader` always returns a lease, always renews `true`, and `verify_held`
+  always returns `true` (single replica).
+
+**Renewal concurrency (resolves the long-pass split-brain)**: lease renewal MUST
+run on its **own timer** (`renew_interval`, e.g. 5s) in a task **concurrent with**
+the canonicalization pass — NOT at tick boundaries. A pass may run longer than the
+check interval; renewal at tick boundaries would let the lease expire mid-pass
+while a renewal was still pending, allowing another replica to claim it. The
+worker therefore becomes: one renewal task + the pass, sharing a cancellation
+signal (e.g. `tokio_util::sync::CancellationToken` or a `watch` channel).
+
+**Cooperative cancellation (makes "abort the current pass" mechanical)**: today
+`process_all_accounts()` is a single awaited call with no cancellation hook
+(`worker.rs:27-44`). This feature adds a cancellation check that the processor
+polls **between accounts** (and before each on-chain submission). When `renew`
+returns `false`, the renewal task trips the cancellation signal and the pass stops
+at the next checkpoint. "Abort the current pass" is thus a concrete mechanism, not
+just a requirement.
+
+**Fence enforcement at the submission boundary (MUST, not may)**: `fence_token`
+advances on each change of holder (a steal). Because cancellation is cooperative
+there is a window between losing the lease and the pass actually stopping, so the
+processor MUST call `verify_held` (fence/ownership re-check) immediately before
+**every** state-mutating write — canonical `submit_state`/`submit_delta` **and**
+the retry/discard writes — and MUST skip the write if it returns `false`.
+
+The fence is **advisory**, not atomic: `verify_held` is a separate round-trip, so
+in principle the lease could be stolen between the check and the write (TOCTOU).
+This is acceptable because the writes are **idempotent** — canonical promotion is
+a deterministic upsert (same delta → identical bytes) and retry/discard are
+idempotent per candidate — so a brief two-leader overlap can at most re-apply the
+same transition, never corrupt state. The fence + idempotency + cooperative
+cancellation together strongly mitigate split-brain; TTL + voluntary abort alone
+is not relied on.
+
+## Selection rule (wiring)
+
+`builder/storage.rs` already chooses the storage backend. The same decision point
+selects the coordination family, keying on the **storage backend alone**:
+
+- `feature = "postgres"` + `DATABASE_URL` set => Postgres impls (share the
+  storage/metadata pool or a dedicated small pool — decided at implementation).
+- filesystem backend => in-memory impls + `AlwaysLeader`.
+
+Coordination is **not** gated on `GUARDIAN_MAX_REPLICAS` or any other tunable:
+a Postgres deployment always uses shared coordination. This is deliberate and
+default-safe — a missing/mis-set tunable must never silently revert a multi-replica
+deployment to per-process state (the #242 bug). The single-instance
+session-lookup optimization is deferred to a future explicit, guarded opt-in.
+
+Coordination availability therefore can never diverge from where shared state
+lives. Because the session/challenge stores are realm-bound, they are owned by
+their realm's consumer, not shared on `AppState`: the builder constructs an
+operator-realm `SessionStore`+`ChallengeStore` pair injected into `DashboardState`
+and an evm-realm pair injected into `EvmSessionState`. `AppState`
+(`builder/state.rs`) carries only `Arc<dyn LeaderElector>` (used by the
+canonicalization worker).
+
+## Availability & performance trade-offs (explicit behavior changes)
+
+These are deliberate consequences of moving auth state into Postgres. They are
+behavior changes from today's always-available in-memory maps and are stated here
+so they are not surprises.
+
+**Shared-store outage => auth fails closed**: with the Postgres impls, a `get`,
+`consume`, `issue`, or `put` that errors because Postgres is briefly unavailable
+results in the authenticated request / login being **rejected** (fail-closed), not
+allowed through. This is the safe choice for a custody system: a DB blip must
+never grant access. It is a change from today, where the in-memory map is always
+available and never fails for store reasons. The boundary error returned MUST stay
+within the existing auth/transient-error surface (no new error shape); operators
+see auth failures during a DB outage, which is expected and documented in the
+runbook. The canonicalization lease likewise fails closed: a renewal that errors
+is treated as a lost lease (the holder steps down), so an outage stalls
+canonicalization rather than risking double-processing — it resumes when the DB
+returns.
+
+**Per-request DB lookup is a deliberate trade-off vs. caching**: FR-003 requires
+logout/expiry to be honored on **every** replica **immediately**, which rules out
+a local per-replica session cache (a cache would serve revoked sessions until its
+TTL). The accepted consequence is that every authenticated request performs one
+indexed Postgres `SELECT` (by `token_digest` PK) where today it is an in-memory
+map hit. Immediate revocation is chosen over lower per-request latency. This adds
+per-request DB load and reinforces the connection-pool sizing concern (see the
+horizontal-scaling runbook). Challenges are touched only during login (low
+volume); the per-request cost is the session lookup.
diff --git a/speckit/features/010-horizontal-scaling/contracts/db-schema.md b/speckit/features/010-horizontal-scaling/contracts/db-schema.md
new file mode 100644
index 00000000..87b87ded
--- /dev/null
+++ b/speckit/features/010-horizontal-scaling/contracts/db-schema.md
@@ -0,0 +1,69 @@
+# Contract: Database Schema (new migrations)
+
+**Feature**: 010-horizontal-scaling
+
+Three new Diesel migrations under `crates/server/migrations/`, embedded and run at
+startup. Postgres backend only. Column details and lifecycle rules are in
+[data-model.md](../data-model.md); this file fixes the migration contract.
+
+## Migration: `<date>_auth_sessions`
+
+`up.sql` creates `auth_sessions` (composite PK `(realm TEXT, token_digest
+BYTEA)` so operator and EVM sessions are namespaced rather than relying on token
+randomness, `subject JSONB`, `issued_at`, `expires_at`, `revoked_at` nullable) +
+index on `expires_at` and `(realm, expires_at)`. `down.sql` drops it.
+
+## Migration: `<date>_auth_challenges`
+
+`up.sql` creates `auth_challenges` with composite PK `(realm TEXT, challenge_key
+TEXT)` — `challenge_key` is the operator signing-digest hex or the EVM nonce —
+plus `principal TEXT`, `payload JSONB` (realm-specific match/recover fields, see
+data-model.md), `issued_at`, `expires_at`, `consumed_at` nullable + index on
+`(realm, principal)` and `expires_at`. `down.sql` drops it.
+
+## Migration: `<date>_worker_leases`
+
+`up.sql` creates `worker_leases` (PK `lease_name TEXT`, `holder_id TEXT`,
+`acquired_at`, `renewed_at`, `expires_at`, `fence_token BIGINT NOT NULL DEFAULT
+0`). `down.sql` drops it.
+
+## Migration execution under concurrent replica startup — REQUIRED
+
+All replicas run the embedded migrations against one Postgres at boot. The runner
+(`storage/postgres.rs`) MUST wrap `run_pending_migrations` in a Postgres
+**session-level advisory lock** on a fixed key, acquired with a **bounded wait**:
+poll `SELECT pg_try_advisory_lock($key)` until it succeeds or a timeout elapses ->
+migrate -> `SELECT pg_advisory_unlock($key)`. One replica migrates; the rest poll,
+then find nothing pending. The bounded wait (vs. an unbounded `pg_advisory_lock`)
+means a replica stuck mid-migration fails the others fast rather than wedging the
+fleet on boot. Without the lock, simultaneous first-deploy boots can race/deadlock
+on identical migrations. (Acceptable here — short, single-connection — unlike the
+canonicalization lease, which spans pool churn and uses a lease row instead.)
+
+## Constraints on the migration set
+
+- **Additive only**: no changes to existing tables (`states`, `deltas`,
+  `delta_proposals`, `account_metadata`, `admin_actions`). No FKs to custody
+  tables (keeps append-only delta lineage isolated — Constitution III).
+- **Reversible**: every `up.sql` has a matching `down.sql`.
+- **No data migration / backfill**: these tables start empty; sessions and
+  challenges are ephemeral and challenges/sessions in flight at deploy time
+  simply require a re-login (acceptable, documented in the runbook).
+- **Filesystem backend creates none of these** — it uses in-memory stores.
+
+## Atomic operations the impls rely on
+
+- Challenge single-use consume: conditional `UPDATE ... SET consumed_at = now()
+  WHERE realm = $1 AND challenge_key = $2 AND consumed_at IS NULL AND now() <
+  expires_at` — affected-row-count `1` => this caller won the claim, `0` =>
+  already consumed/expired.
+- Lease acquire/steal: `INSERT ... ON CONFLICT (lease_name) DO UPDATE ... WHERE
+  worker_leases.expires_at < now() OR worker_leases.holder_id = excluded.holder_id`.
+- Lease renew: `UPDATE ... WHERE lease_name = $1 AND holder_id = $2 AND now() <
+  expires_at`.
+- Lease fence verify (submission boundary, mandatory): `SELECT 1 FROM
+  worker_leases WHERE lease_name = $1 AND holder_id = $2 AND fence_token = $3 AND
+  now() < expires_at`.
+
+These must be single round-trip statements (no read-modify-write races across
+replicas).
diff --git a/speckit/features/010-horizontal-scaling/data-model.md b/speckit/features/010-horizontal-scaling/data-model.md
new file mode 100644
index 00000000..c6b68c73
--- /dev/null
+++ b/speckit/features/010-horizontal-scaling/data-model.md
@@ -0,0 +1,199 @@
+# Phase 1 Data Model: Horizontal Scaling Correctness
+
+**Feature**: 010-horizontal-scaling | **Date**: 2026-06-20
+
+Three new Postgres tables, added as Diesel migrations under
+`crates/server/migrations/` (embedded via `embed_migrations!`, run at startup —
+`crates/server/src/storage/postgres.rs:29-47`). All three exist **only** in the
+Postgres backend; the filesystem/dev backend uses in-memory equivalents and
+creates no tables.
+
+All timestamps are `TIMESTAMPTZ` and all expiry comparisons use the database
+clock (`now()`), giving a single authoritative clock across replicas.
+
+## Migration concurrency (multi-replica startup) — REQUIRED
+
+With 2-6 replicas booting simultaneously (ECS rolling deploy or cold start),
+every replica runs `embed_migrations!` against the **one** shared Postgres at the
+same time. Diesel's embedded runner does not serialize concurrent runners safely
+by default, so first-deploy startup can race or deadlock applying the same
+migration. This feature MUST guard migration with a **Postgres session-level
+advisory lock**:
+
+```text
+run_migrations(conn):
+    until pg_try_advisory_lock(<fixed_migration_key>) or deadline:  -- bounded wait, polls
+        sleep(poll_interval)
+    run_pending_migrations(conn);                      -- no-op for replicas that lose the race
+    SELECT pg_advisory_unlock(<fixed_migration_key>);
+```
+
+The first replica to grab the lock migrates; the others poll `pg_try_advisory_lock`
+until it frees, then find no pending migrations and proceed. The wait is **bounded
+by a timeout** (rather than `pg_advisory_lock`'s unbounded block) so a replica
+stuck mid-migration fails the others fast instead of wedging the whole fleet on
+boot; the holder still releases on unlock and on connection drop. This is a short,
+single-connection critical section — not across request/pool churn (unlike the
+canonicalization lease). This change lives in `storage/postgres.rs`
+(`run_migrations`). See the matching edge case in spec.md and the db-schema.md
+contract.
+
+---
+
+## Entity: Auth Session  → table `auth_sessions`
+
+Replaces the per-process `Arc<Mutex<HashMap<[u8;32], OperatorSessionRecord>>>`
+(`dashboard/state.rs:30`) and the EVM equivalent (`evm/session.rs`).
+
+| Column | Type | Notes |
+|---|---|---|
+| `realm` | `TEXT` NOT NULL | `operator` \| `evm` (discriminator); part of the composite PK |
+| `token_digest` | `BYTEA` (32) NOT NULL | SHA-256 of the session token (never store plaintext; matches current `[u8;32]` keying); part of the composite PK |
+| `subject` | `JSONB` NOT NULL | Realm-specific identity: operator `AuthenticatedOperator` or EVM `address`. Permissions are re-resolved from the live allowlist at use time (preserves `authenticate_session` behavior, `dashboard/state.rs:290-332`) |
+| `issued_at` | `TIMESTAMPTZ` NOT NULL | |
+| `expires_at` | `TIMESTAMPTZ` NOT NULL | indexed for TTL sweep |
+| `revoked_at` | `TIMESTAMPTZ` NULL | set on logout; a non-null value => session rejected on every replica (FR-003) |
+
+**Indexes**: composite PK on `(realm, token_digest)`; index on `expires_at`
+(sweep); index on `(realm, expires_at)`.
+
+**Lifecycle / validation**:
+- Created on successful `verify`.
+- Valid iff `revoked_at IS NULL AND now() < expires_at`.
+- Logout sets `revoked_at = now()` (idempotent).
+- A revoked row is **kept until its original `expires_at`** so the revocation is
+  honored across every replica for as long as the token would otherwise have been
+  valid; setting `revoked_at` (not deleting) is what makes logout effective fleet-
+  wide. The sweep then deletes any row where `expires_at < now()` (covers both
+  naturally expired and revoked-then-expired rows). There is no separate
+  "revocation grace" — a revoked token is rejected immediately via `revoked_at`
+  and the row is reclaimed at natural expiry.
+- **Invariant**: stored subject identity is authoritative, but authorization
+  (permissions) is always recomputed from the current allowlist — no stale
+  permission capture.
+
+---
+
+## Entity: Auth Challenge  → table `auth_challenges`
+
+Replaces per-process `Arc<Mutex<HashMap<String, Vec<PendingChallenge>>>>`
+(`dashboard/state.rs:29`) and the EVM equivalent. Supports issue-on-A /
+verify-on-B (FR-001).
+
+| Column | Type | Notes |
+|---|---|---|
+| `realm` | `TEXT` NOT NULL | `operator` \| `evm` (part of PK) |
+| `challenge_key` | `TEXT` NOT NULL | per-challenge unique key **within a realm** (part of PK). Operator: the signing digest hex. EVM: the challenge nonce. This is what `consume` targets. |
+| `principal` | `TEXT` NOT NULL | operator commitment (`dashboard/state.rs:108-168`) or EVM address; indexed for `active_for` lookup |
+| `payload` | `JSONB` NOT NULL | realm-specific fields needed to match/recover at verify time (see below) |
+| `issued_at` | `TIMESTAMPTZ` NOT NULL | |
+| `expires_at` | `TIMESTAMPTZ` NOT NULL | indexed |
+| `consumed_at` | `TIMESTAMPTZ` NULL | set when `verify` succeeds; single-use across replicas |
+
+**Realm-aware payload (resolves the EVM modeling gap)**: the two realms verify
+differently, so a single `signing_digest` column does not model both:
+- **Operator** matches by Falcon-verifying the stored signing digest (a `Word`)
+  against the submitted signature (`dashboard/state.rs:228-230`). `payload` =
+  `{ "signing_digest": "<hex>" }`; `challenge_key` = that hex.
+- **EVM** matches by **nonce**, then recovers the signer from the **full original
+  challenge** (`address`, `nonce`, `issued_at`, `expires_at`) via
+  `recover_session_address` (`evm/session.rs:112-127`). `payload` =
+  `{ "address", "nonce", "issued_at", "expires_at" }`; `challenge_key` = the nonce.
+
+Verification matching (Falcon verify / nonce compare + ECDSA recover) runs in
+Rust, not SQL: `active_for(principal)` returns the unexpired, unconsumed payloads;
+the caller matches one; then `consume(challenge_key)` atomically claims it.
+
+**Primary key**: `(realm, challenge_key)`. **Indexes**: PK; index on
+`(realm, principal)` for `active_for`; index on `expires_at` for the sweep.
+
+**Lifecycle / validation**:
+- Created by `issue_challenge` (per-principal cap via `max_outstanding`, oldest
+  pruned, matching today's `Vec` cap).
+- Consumable iff `consumed_at IS NULL AND now() < expires_at`; `consume`
+  conditionally sets `consumed_at = now()` and reports whether it won the race
+  (single-use; a replay on any replica fails — FR-003, US1 scenario 3).
+- Multiple pending challenges per principal allowed; the `(realm, principal)`
+  index supports `active_for`.
+
+---
+
+## Entity: Worker Lease  → table `worker_leases`
+
+Backs single-owner canonicalization (FR-004/005/006, US2). Generic enough for
+future background workers.
+
+| Column | Type | Notes |
+|---|---|---|
+| `lease_name` | `TEXT` PRIMARY KEY | e.g. `canonicalization` |
+| `holder_id` | `TEXT` NOT NULL | replica identity (e.g. hostname/task-id + random suffix, generated at boot) |
+| `acquired_at` | `TIMESTAMPTZ` NOT NULL | when current holder first took the lease |
+| `renewed_at` | `TIMESTAMPTZ` NOT NULL | last heartbeat |
+| `expires_at` | `TIMESTAMPTZ` NOT NULL | `renewed_at + ttl`; another replica may claim only when `now() >= expires_at` |
+| `fence_token` | `BIGINT` NOT NULL | monotonically incremented on each (re)acquisition; guards against a stale holder acting after losing the lease |
+
+**Acquire / renew (single atomic statement)**:
+- Acquire/steal: `INSERT ... ON CONFLICT (lease_name) DO UPDATE SET holder_id =
+  excluded.holder_id, acquired_at = now(), renewed_at = now(), expires_at = now()
+  + ttl, fence_token = worker_leases.fence_token + 1 WHERE worker_leases.expires_at
+  < now() OR worker_leases.holder_id = excluded.holder_id` (claim only if expired
+  or already mine).
+- Renew: `UPDATE ... SET renewed_at = now(), expires_at = now() + ttl WHERE
+  lease_name = $1 AND holder_id = $2 AND now() < expires_at` — runs on its **own
+  timer concurrent with the pass** (not at tick boundaries); a failed renew (0
+  rows) means the lease was lost; the renewal task trips the pass's cancellation
+  signal (see coordination-traits.md "Renewal concurrency").
+- Verify-held (fence check at submission boundary): `SELECT 1 FROM worker_leases
+  WHERE lease_name = $1 AND holder_id = $2 AND fence_token = $3 AND now() <
+  expires_at` — the processor MUST run this immediately before any on-chain
+  submission / canonical promotion and skip the write if it returns no row.
+
+**Timing constraints**:
+- `renew_interval << ttl` (e.g. renew every 5s, ttl 30s).
+- The TTL is sized **solely** for renew/failover: it must comfortably exceed one
+  renew interval (a healthy holder never loses its lease) and it sets the failover
+  bound — another replica may claim only after `ttl` elapses without a renew, so
+  failover (SC-003) happens within `ttl` of the holder dying.
+- The TTL is **independent of** the canonicalization `submission_grace_period`
+  (600s default) and the check interval (10s); those govern delta promotion
+  timing, not lease ownership. Do not couple them.
+
+**Invariant (no split-brain)**: at most one `holder_id` can satisfy the renew
+predicate at a time because acquisition is a single atomic conditional write
+against the DB clock. The cooperative-cancellation abort path has a small window
+between lease loss and the pass stopping; the **mandatory** fence check
+(`verify_held`) immediately before every state-mutating write strongly mitigates
+that window. Note the fence is **advisory** (a separate round-trip, TOCTOU): a
+lease could in principle be stolen between the check and the write. That residual
+window is benign here because the canonical writes are **idempotent deterministic
+upserts** — the same delta produces identical state/delta bytes regardless of
+which replica writes — and retry/discard writes are likewise idempotent for a
+given candidate. So a brief overlap cannot corrupt state; it can at most
+re-apply the same transition. TTL + voluntary abort alone is NOT relied on.
+
+---
+
+## In-memory equivalents (filesystem/dev backend)
+
+No tables. The `coordination` module provides:
+- `InMemorySessionStore` / `InMemoryChallengeStore` — the current
+  `Arc<Mutex<HashMap>>` behavior, byte-for-byte.
+- `AlwaysLeader` — `try_acquire`/`renew`/`verify_held` always succeed (single
+  replica is always the leader).
+
+Selected when the filesystem backend is active (backend-derived selection, R9 —
+**not** gated on `GUARDIAN_MAX_REPLICAS`). A Postgres deployment always uses the
+shared (table-backed) impls. This keeps single-replica/dev behavior identical to
+today and requires no database (FR-014; constitution dev-default invariant).
+
+---
+
+## Relationships & boundaries
+
+- `auth_sessions` / `auth_challenges` are independent of the custody record
+  tables (`states`, `deltas`, `delta_proposals`, `account_metadata`) — no FKs,
+  no impact on append-only delta lineage (Constitution III).
+- `worker_leases` is pure coordination metadata; it never participates in or
+  alters the pending->candidate->canonical/discarded transitions — it only gates
+  **which replica** executes them.
+- None of these tables are exposed on any client wire contract.
diff --git a/speckit/features/010-horizontal-scaling/spec.md b/speckit/features/010-horizontal-scaling/spec.md
new file mode 100644
index 00000000..65e3633e
--- /dev/null
+++ b/speckit/features/010-horizontal-scaling/spec.md
@@ -0,0 +1,480 @@
+# Feature Specification: Horizontal Scaling Correctness Across Multiple Guardian Instances
+
+**Feature Branch**: `010-horizontal-scaling`
+**Created**: 2026-06-20
+**Status**: Draft
+**Input**: User description: "Ensure horizontal scaling works correctly across multiple Guardian instances (issue #242)"
+**Tracking issue**: [#242](https://github.com/OpenZeppelin/guardian/issues/242)
+
+## Overview
+
+The production deployment runs the Guardian server as 2-6 ECS tasks behind a
+round-robin load balancer. Several subsystems were written under an implicit
+single-instance assumption, so a request that begins on one replica and
+continues on another can fail, and background work runs redundantly on every
+replica. This feature makes the server correct under horizontal scaling: any
+request may land on any replica, replicas may be added or removed at any time,
+and operators have a documented configuration for a highly-available (HA)
+deployment.
+
+The scope is **correctness and operability under multiple replicas**, not new
+end-user functionality. Each subsystem below is independently testable and
+independently shippable.
+
+## User Scenarios & Testing *(mandatory)*
+
+### User Story 1 - Operator login succeeds with multiple replicas (Priority: P1)
+
+An operator authenticates to the dashboard while the load balancer routes the
+challenge request and the verification request to different replicas. The login
+must complete successfully regardless of which replica handles each step, and an
+established session must be honored by every replica.
+
+**Why this priority**: Authentication is the entry point to all operator
+functionality. Today the auth challenge and the session record live in
+per-process memory, so a login or any subsequent authenticated call that lands
+on a different replica than the one that issued the challenge/session fails.
+This makes the dashboard effectively unusable with more than one replica - the
+highest-impact breakage in the issue.
+
+**Independent Test**: Run 2+ replicas behind the load balancer and complete the
+full challenge -> sign -> verify -> authenticated-request flow, forcing each step
+onto a different replica. Login completes and the session is accepted on every
+replica.
+
+**Acceptance Scenarios**:
+
+1. **Given** 2+ replicas behind the load balancer, **When** an operator requests
+   a login challenge from replica A and submits the signed response to replica B,
+   **Then** verification succeeds and a session is established.
+2. **Given** an established operator session, **When** an authenticated request
+   is routed to any replica, **Then** the session is recognized and the request
+   is authorized without re-login.
+3. **Given** a pending challenge issued by one replica, **When** the operator
+   never completes it, **Then** the challenge expires consistently and cannot be
+   replayed on any replica after expiry.
+4. **Given** an operator logs out on one replica, **When** a subsequent request
+   with the same session token reaches any other replica, **Then** the session
+   is rejected.
+
+---
+
+### User Story 2 - A delta is canonicalized exactly once (Priority: P1)
+
+The background canonicalization worker promotes pending candidate deltas to
+canonical state after verifying them against on-chain state. With multiple
+replicas, each pending candidate must be processed exactly once, regardless of
+how many replicas are running.
+
+**Why this priority**: The canonicalization worker currently runs on every
+replica with no leader election or shared lock, so every replica independently
+re-processes the same candidates. This causes duplicate work and races on state
+transitions (promote/discard/retry-budget), which can corrupt the proposal
+nonce sequence and lead to permanent state-commitment mismatches. Correctness of
+custody state is paramount.
+
+**Independent Test**: Run 2+ replicas, create pending candidates, and confirm
+each candidate transitions exactly once (one promotion or one discard), with no
+duplicate submissions or double-counted retries, across the full replica set.
+
+**Acceptance Scenarios**:
+
+1. **Given** N replicas running and a pending candidate delta, **When** the
+   canonicalization interval elapses, **Then** exactly one replica processes the
+   candidate and it is promoted or discarded exactly once.
+2. **Given** the replica currently performing canonicalization stops or crashes,
+   **When** the next interval elapses, **Then** another replica takes over
+   canonicalization with no manual intervention.
+3. **Given** a candidate's retry budget, **When** processing fails, **Then** the
+   retry count is incremented exactly once per interval across the whole fleet
+   (not once per replica).
+4. **Given** only a single replica is running, **When** canonicalization runs,
+   **Then** behavior is unchanged from today (no regression).
+
+---
+
+### User Story 3 - Pagination cursors are valid across all replicas (Priority: P2)
+
+An operator pages through dashboard list results (e.g. accounts, deltas) where
+successive page requests are routed to different replicas. Cursors returned by
+one replica remain valid on every other replica.
+
+**Why this priority**: Cursors are signed/verified with a secret that, when
+unset, is generated randomly per process. Across replicas this silently breaks
+pagination (a cursor from replica A fails verification on replica B). It is
+high-frequency operator pain but degrades to "start over" rather than corrupting
+state, so it ranks below auth and canonicalization.
+
+**Independent Test**: With 2+ replicas and a shared cursor secret configured,
+request page 1 from one replica and page 2 (using the returned cursor) from
+another; the second page returns the correct continuation. With the secret
+unset in a multi-replica configuration, startup surfaces the misconfiguration
+with a warning and still boots.
+
+**Acceptance Scenarios**:
+
+1. **Given** a shared cursor secret configured on all replicas, **When** a cursor
+   issued by one replica is submitted to another, **Then** it verifies and
+   returns the correct next page.
+2. **Given** a multi-replica configuration with no shared cursor secret, **When**
+   the server starts, **Then** the operator is clearly warned that pagination
+   will break across replicas and the server boots (in every stage) with an
+   ephemeral per-process secret, rather than silently proceeding without notice.
+3. **Given** a tampered or expired cursor, **When** it is submitted to any
+   replica, **Then** it is rejected consistently.
+
+---
+
+### User Story 4 - Rate limits are enforced consistently across replicas (Priority: P2)
+
+A client making requests that are spread across replicas by the load balancer is
+subject to rate limits that reflect total traffic, within a documented
+tolerance - not per-replica limits that multiply with replica count.
+
+**Why this priority**: The rate limiter is per-process, so the effective limit
+scales with replica count (e.g. 2 replicas ~ 2x the configured burst). This
+weakens an abuse-prevention control, but it fails open (more lenient) rather
+than blocking legitimate traffic, so it ranks below correctness-critical items.
+
+**Independent Test**: With 2+ replicas and `GUARDIAN_MAX_REPLICAS` set to the
+autoscaling max capacity, drive traffic exceeding the global limit through the
+load balancer and confirm the aggregate accepted rate stays at or below the
+global limit (stricter when running below max capacity), rather than scaling by
+replica count.
+
+**Acceptance Scenarios**:
+
+1. **Given** a configured global request limit and 2+ replicas, **When** a client
+   exceeds the limit across replicas, **Then** excess requests are throttled so
+   the aggregate accepted rate stays at or below the global limit, regardless of
+   how the load balancer distributes them.
+2. **Given** rate limiting is disabled by configuration, **When** running with
+   multiple replicas, **Then** no throttling occurs (no regression).
+3. **Given** `GUARDIAN_MAX_REPLICAS` is set to the autoscaling max capacity,
+   **When** fewer than that many replicas are running, **Then** aggregate
+   enforcement is stricter than the global limit (never looser), and no request
+   depends on an external coordination service.
+
+---
+
+### User Story 5 - Filesystem backend is refused in the prod stage (Priority: P3)
+
+When the server is configured for the production stage, it refuses to start with
+the filesystem storage backend, because filesystem storage is local to a single
+task and cannot be shared across replicas. The filesystem backend remains fully
+supported for local development.
+
+**Why this priority**: The filesystem backend cannot back a multi-replica
+deployment (each replica would have divergent local state and audit events are
+not persisted). Refusing it in prod prevents a silent, dangerous
+misconfiguration. It is a guardrail rather than core multi-replica plumbing, and
+the published prod image is already built with the Postgres backend, so it ranks
+P3.
+
+**Independent Test**: Start the server in the prod stage with the filesystem
+backend selected and confirm it fails fast with a clear, actionable error. Start
+the same configuration in a non-prod stage and confirm it starts (dev-only path
+preserved).
+
+**Acceptance Scenarios**:
+
+1. **Given** the server is configured for the prod stage, **When** it would use
+   the filesystem storage backend, **Then** startup fails with an error
+   identifying the misconfiguration and the required remedy (use a shared
+   database backend).
+2. **Given** the server is configured for a non-prod stage, **When** it uses the
+   filesystem backend, **Then** it starts normally (development workflow
+   unaffected).
+3. **Given** the prod stage with a shared database backend, **When** the server
+   starts, **Then** there is no filesystem-related failure.
+
+---
+
+### User Story 6 - Operators have an HA configuration runbook (Priority: P3)
+
+An operator deploying multiple replicas can follow a single runbook listing
+every environment variable and external state-store dependency required for a
+correct HA deployment, and understands what breaks if each is omitted.
+
+**Why this priority**: Several of the fixes above depend on operator
+configuration (shared secrets, shared state stores, stage selection). Without
+documentation the feature is not safely usable, but it depends on the other
+stories being defined first, so it is sequenced last.
+
+**Independent Test**: A reviewer follows only the runbook to configure a 2+
+replica deployment and all P1/P2 acceptance scenarios pass without consulting
+source code.
+
+**Acceptance Scenarios**:
+
+1. **Given** the operator runbook, **When** an operator configures an HA
+   deployment using only the runbook, **Then** all required environment
+   variables and state-store dependencies are covered.
+2. **Given** the runbook, **When** an operator reads it, **Then** each HA-related
+   setting documents the consequence of omitting it.
+3. **Given** the runbook, **When** an operator reviews stage guidance, **Then**
+   the dev-only status of the filesystem backend is clearly stated.
+
+### Edge Cases
+
+- **Replica added mid-session**: A newly started replica must immediately honor
+  existing sessions, challenges, cursors, and the elected canonicalization owner
+  without restart of the fleet.
+- **Replica removed mid-flight**: When the replica that holds canonicalization
+  leadership disappears, leadership must transfer within a bounded time so
+  canonicalization is not stalled.
+- **Clock skew between replicas**: Challenge/session expiry and lease/lock
+  timing must remain correct (or fail safe) when replica clocks differ within a
+  reasonable bound.
+- **Concurrent migrations on simultaneous startup**: When 2-6 replicas boot at
+  once (rolling deploy / cold start) they all run schema migrations against the
+  one shared store at the same time. Applying migrations MUST be serialized (one
+  replica migrates, the rest wait then proceed) so first-deploy startup cannot
+  race or deadlock. See FR-017.
+- **Shared store outage**: If the shared coordination/state store is temporarily
+  unavailable, each affected subsystem MUST have a defined, documented behavior
+  rather than undefined behavior or a crash loop. Specifically: authenticated
+  requests and login **fail closed** (auth rejected, never bypassed) and the
+  canonicalization leader **steps down** (work stalls, never double-processes),
+  both recovering automatically when the store returns. See FR-018.
+- **Split brain during leadership handoff**: Two replicas must never both believe
+  they own canonicalization long enough to double-process a candidate.
+- **Mixed configuration across replicas**: Replicas configured with different
+  shared secrets (e.g. one missing the cursor secret) - the failure mode must be
+  detectable rather than silent.
+- **Single-replica deployments**: All changes must preserve current behavior when
+  exactly one replica runs (no new mandatory infrastructure for dev/local).
+
+## Requirements *(mandatory)*
+
+### Functional Requirements
+
+- **FR-001**: Operator dashboard auth challenges MUST be resolvable by any
+  replica, so a challenge issued by one replica can be verified by another.
+- **FR-002**: Operator (and EVM, where applicable) sessions MUST be recognized by
+  any replica, so an authenticated request succeeds on any replica without
+  re-login.
+- **FR-003**: Session and challenge lifecycle events (issuance, consumption,
+  expiry, logout/revocation) MUST be consistent across replicas; a logged-out or
+  expired session/challenge MUST be rejected on every replica.
+- **FR-004**: Canonicalization of any pending candidate MUST occur exactly once
+  across the entire fleet per processing interval, regardless of replica count.
+- **FR-005**: The system MUST elect, or otherwise coordinate, a single owner for
+  canonicalization at any given time, and MUST transfer ownership automatically
+  when the current owner becomes unavailable. Ownership renewal MUST run
+  concurrently with (not gated on) the canonicalization pass, the pass MUST be
+  cooperatively cancellable so a lost owner can stop promptly, and every
+  state-mutating write (canonical promotion **and** retry/discard) MUST be gated
+  by an advisory fencing check so a superseded owner is prevented from committing
+  during the cancellation window. (The fence is a pre-write ownership re-check;
+  combined with idempotent writes — same delta ⇒ identical bytes — a brief
+  two-leader overlap can at most re-apply the same transition, never corrupt
+  state.)
+- **FR-006**: Canonicalization retry budgets and state transitions
+  (promote/discard) MUST be counted once per interval across the fleet, never
+  once per replica.
+- **FR-007**: Pagination cursors MUST be issued and verified using a shared
+  secret so a cursor issued by one replica is valid on all replicas.
+- **FR-008**: When a shared cursor secret is not configured, the system MUST
+  surface the misconfiguration at startup with a warning in every stage, rather
+  than silently generating a per-process secret without notice. The server still
+  boots, using an ephemeral per-process secret; a missing cursor secret degrades
+  only dashboard pagination across replicas (a cursor minted on one replica is
+  rejected on another) and never affects correctness or auth, so it is not a
+  startup guard.
+- **FR-009**: The aggregate request rate enforced across all replicas MUST NOT
+  exceed the configured global limit. This is achieved by dividing the global
+  limit by the deployment's **maximum replica capacity** (`GUARDIAN_MAX_REPLICAS`),
+  so each replica enforces `global_limit / GUARDIAN_MAX_REPLICAS`. When fewer than
+  the maximum number of replicas are running, aggregate enforcement is stricter
+  than the global limit (never looser); the resulting tolerance band MUST be
+  documented. `GUARDIAN_MAX_REPLICAS` MUST default from the deployment's
+  autoscaling max capacity (set by infrastructure), not from a manually maintained
+  value, and MUST remain operator-overridable.
+- **FR-010**: Rate limiting MUST NOT introduce any external coordination
+  dependency on the request hot path; enforcement is per-process arithmetic over
+  the partitioned budget and therefore has no shared-store failure mode. Any
+  future shared/global limiter would have to define and document its
+  fail-open/fail-closed behavior; none is introduced by this feature.
+- **FR-011**: The system MUST provide a single configuration value that
+  identifies the deployment stage (at minimum distinguishing "prod" from
+  non-prod) usable by HA guardrails.
+- **FR-012**: In the prod stage, the system MUST refuse to start with a storage
+  backend that cannot be shared across replicas (the filesystem backend),
+  failing fast with an actionable error.
+- **FR-013**: In the prod stage, the system MUST fail fast when a setting is
+  missing or misconfigured in a way that would make every replica serve
+  incorrectly (e.g. a global rate limit that partitions to zero requests per
+  replica); in non-prod the same condition MUST warn but allow startup. A missing
+  shared cursor secret is explicitly NOT such a setting: it warns but boots in
+  every stage (FR-008), because it degrades pagination only, not correctness.
+- **FR-014**: All HA behaviors MUST preserve existing single-replica behavior;
+  running exactly one replica MUST NOT require new external infrastructure for
+  local/dev use.
+- **FR-015**: The Rust and TypeScript clients MUST observe no behavior drift as a
+  result of these changes; the wire contract for clients MUST remain unchanged
+  unless an explicit, documented contract change is made.
+- **FR-016**: Operator-facing documentation MUST enumerate every environment
+  variable and external state-store dependency required for a correct HA
+  deployment, including the consequence of omitting each, and MUST mark the
+  filesystem backend as dev-only.
+- **FR-017**: Schema migrations MUST be safe under concurrent execution by
+  multiple replicas starting simultaneously; migration application MUST be
+  serialized across the fleet so a first deploy cannot race or deadlock, with no
+  manual "migrate first, then start" operator step required.
+- **FR-018**: When the shared state store is briefly unavailable, authentication
+  (login and authenticated requests) MUST fail closed (rejected, never bypassed)
+  and the canonicalization owner MUST step down rather than risk double-processing;
+  both MUST recover automatically when the store returns. This fail-closed auth
+  behavior is an accepted, documented change from the previous always-available
+  in-memory behavior.
+- **FR-019**: At startup the server MUST emit a single, unambiguous log line
+  stating which coordination mode is active — "shared" (backed by the external
+  store, replica-safe) or "single-process" (in-memory, single-replica only) —
+  together with the effective HA-relevant settings it derives from configuration:
+  the storage backend, the deployment stage, the maximum replica capacity, and
+  whether the pagination cursor secret was supplied or generated. This makes the
+  active mode explicit and diagnosable without inferring it from other logs, and
+  is the discoverable signal that replaces an explicit mode toggle (coordination
+  capability is determined by resolved configuration, not a separate flag). The
+  line MUST reflect the actual resolved state, never operator intent.
+- **FR-020**: The coordination mode MUST be determined by the **storage backend
+  alone**: the Postgres backend MUST use shared coordination (sessions,
+  challenges, leadership) and the filesystem backend MUST use in-memory
+  coordination. Shared coordination MUST be the default whenever Postgres is
+  active and MUST NOT be disabled by any tunable — a missing, mis-overridden, or
+  low `GUARDIAN_MAX_REPLICAS` (or any other knob) MUST NEVER silently reintroduce
+  per-process auth/canonicalization state on a Postgres deployment. (Skipping the
+  per-request session lookup for a deployment known to be single-instance is a
+  possible future optimization behind an explicit, guarded opt-in; it is out of
+  scope here and MUST NOT be inferred from a rate-limit signal.)
+
+### Key Entities
+
+- **Auth Challenge**: A short-lived, one-time login challenge bound to an
+  operator identity; must be readable and consumable by any replica until it
+  expires or is consumed.
+- **Operator Session**: An authenticated session with an issue and expiry time
+  and a revocation (logout) state; must be authoritative across replicas.
+- **Canonicalization Lease / Leadership**: The right, held by at most one replica
+  at a time, to run the canonicalization worker; has a holder identity, an
+  expiry/heartbeat so it can be reclaimed, and a fencing token (advancing on each
+  steal) re-checked before every state-mutating write so a superseded holder is
+  prevented from committing (advisory check, made safe by idempotent writes).
+- **Pagination Cursor**: An opaque, integrity-protected continuation token whose
+  validity depends on a secret shared by all replicas.
+- **Maximum Replica Capacity** (`GUARDIAN_MAX_REPLICAS`): The
+  infrastructure-derived signal for how many replicas the deployment can scale to.
+  It feeds **rate-limit partitioning only** (`global_limit / GUARDIAN_MAX_REPLICAS`).
+  It MUST NOT influence the coordination mode (which is backend-derived, FR-020).
+- **Effective Rate-Limit Budget**: The per-replica share of the global limit,
+  computed as `global_limit / GUARDIAN_MAX_REPLICAS`. Per-client burst/sustained
+  counters remain per-process; they are partitioned, not aggregated, so total
+  enforcement stays at or below the global limit.
+- **Deployment Stage**: A configuration value identifying the environment (prod
+  vs. non-prod) that gates HA guardrails.
+
+## Success Criteria *(mandatory)*
+
+### Measurable Outcomes
+
+- **SC-001**: With 2+ replicas behind the load balancer, an operator completes
+  the full login flow with a 100% success rate across 20 consecutive attempts,
+  including attempts where challenge and verification are forced onto different
+  replicas.
+- **SC-002**: With 2+ replicas, every pending candidate is canonicalized exactly
+  once - zero duplicate promotions, discards, or submissions - across a test of
+  at least 50 candidates.
+- **SC-003**: When the replica holding canonicalization leadership is terminated,
+  canonicalization resumes on another replica within the configured lease TTL
+  (the failover bound, independent of the delta submission grace period), with no
+  manual intervention.
+- **SC-004**: With 2+ replicas and a shared cursor secret, 100% of pagination
+  cursors issued by one replica are accepted by other replicas across a paging
+  test of at least 100 page transitions.
+- **SC-005**: With N replicas, the aggregate accepted request rate for a client
+  exceeding the configured limit stays at or below the configured global limit
+  (rather than ~ Nx the limit). The documented tolerance band MUST also state the
+  two-sided imprecision: (a) running below the autoscaling max capacity enforces
+  stricter than the global limit, and (b) HTTP keep-alive can pin a single client
+  to one replica, so that client may be throttled at
+  `global_limit / GUARDIAN_MAX_REPLICAS` (e.g. 1/6) — an over-strict, fail-closed
+  outcome for that client. Both are accepted trade-offs of partitioning without
+  shared hot-path state.
+- **SC-006**: A prod-stage server configured with the filesystem backend (or with
+  a global rate limit that partitions to zero requests per replica) fails to
+  start 100% of the time with an error that names the misconfiguration and the
+  remedy.
+- **SC-007**: A reviewer who has never seen the code can stand up a correct 2+
+  replica deployment using only the operator runbook, and all P1/P2 acceptance
+  scenarios pass.
+- **SC-008**: All existing single-replica test suites pass unchanged, confirming
+  no regression for dev/local deployments.
+- **SC-009**: On startup, the server logs exactly one coordination-mode line that
+  correctly reports "shared" when backed by the external store and
+  "single-process" otherwise, including the resolved backend, stage, max replica
+  capacity, and cursor-secret source; an operator can determine the active mode
+  from that single line alone (mode follows the storage backend).
+
+## Assumptions
+
+- The shipped production image is built with the Postgres storage backend, so a
+  shared relational database is available to replicas and is the natural shared
+  coordination/state store for sessions, challenges, and leadership. (Rate
+  limiting is partitioned per-process, not a shared counter — see FR-010.) No
+  new infrastructure component (e.g. a separate cache or
+  queue) is assumed to be mandatory; if one is proposed it will be justified in
+  planning.
+- "Prod stage" is represented by the existing `GUARDIAN_ENV=prod` signal (today
+  used only for ACK secret sourcing), extended to gate HA guardrails. Confirming
+  this versus introducing a dedicated stage variable is a planning decision.
+- The cursor secret environment variable already exists
+  (`GUARDIAN_DASHBOARD_CURSOR_SECRET`); this feature changes its enforcement, not
+  its format.
+- The load balancer does not provide sticky sessions; correctness must not depend
+  on session affinity.
+- Replica clocks are synchronized within a few seconds (standard for the ECS
+  environment); expiry/lease logic must tolerate small skew.
+- Rate limiting is partitioned conservatively against the autoscaling **max**
+  capacity (not the current replica count), so it is never silently looser than
+  the global limit during scale-out and over-throttles (conservatively stricter)
+  when running below max capacity. A documented tolerance band for this
+  over-throttling is acceptable, consistent with the issue's "within some
+  documented tolerance".
+- The infrastructure already computes the autoscaling max capacity
+  (`infra/data.tf` `effective_server_autoscaling_max_capacity`, prod =
+  `max(desired, 6)`); `GUARDIAN_MAX_REPLICAS` defaults from it via Terraform
+  rather than a manually maintained value. It drives **rate-limit partitioning
+  only**; the coordination mode is backend-derived (FR-020).
+
+## Dependencies
+
+- Issue [#190](https://github.com/OpenZeppelin/guardian/issues/190) (single
+  canonicalization owner / no leader election) is subsumed by User Story 2.
+- Existing configuration surface: `GUARDIAN_DASHBOARD_CURSOR_SECRET`,
+  `GUARDIAN_ENV`, `GUARDIAN_RATE_LIMIT_ENABLED`, `GUARDIAN_RATE_BURST_PER_SEC`,
+  `GUARDIAN_RATE_PER_MIN`, `DATABASE_URL`, `GUARDIAN_STORAGE_PATH`,
+  `GUARDIAN_METADATA_PATH`.
+- New configuration: `GUARDIAN_MAX_REPLICAS` (maximum replica capacity; drives
+  **rate-limit partitioning only**; defaults from
+  `effective_server_autoscaling_max_capacity`).
+- Infrastructure wiring (in scope): `infra/data.tf`
+  (`effective_server_autoscaling_max_capacity`) and `infra/ecs.tf` (server env
+  block) must set `GUARDIAN_MAX_REPLICAS` so the correct default ships without
+  operator action.
+- Operator documentation set (`docs/CONFIGURATION.md`, AWS deploy docs, runbooks)
+  must be updated per the contributor docs table.
+
+## Out of Scope
+
+- Autoscaling policy, ALB/ECS provisioning, or Terraform changes beyond the
+  `GUARDIAN_MAX_REPLICAS` env-var wiring (in scope above) and documenting required
+  configuration.
+- Skipping shared coordination for a known single-instance Postgres deployment
+  (a per-request-lookup optimization); if pursued later it MUST be an explicit,
+  guarded opt-in, never inferred from `GUARDIAN_MAX_REPLICAS` or another tunable.
+- Changing the storage backend selection from a compile-time feature to a runtime
+  switch.
+- Multi-region or active/active cross-region deployment.
+- End-user (custody client) facing feature changes; this work is server-side
+  correctness and operability only.