From 94d6dfd3fb1e64671be6d11baa21d8b0c70c0d73 Mon Sep 17 00:00:00 2001
From: Miguel Ping <miguel.ping@gmail.com>
Date: Fri, 29 May 2026 14:59:27 +0100
Subject: [PATCH 1/9] Add jepsen infra

---
 deps.edn                                      |  12 +-
 dev/verify_bugs.clj                           | 334 +++++++++++++++
 docker/fdb.cluster                            |   2 +-
 improvements.md                               | 379 ++++++++++++++++++
 issues.md                                     | 218 ++++++++++
 test/intemporal/jepsen/README.md              | 199 +++++++++
 test/intemporal/jepsen/checker.clj            | 250 ++++++++++++
 test/intemporal/jepsen/client.clj             | 165 ++++++++
 test/intemporal/jepsen/db.clj                 | 130 ++++++
 test/intemporal/jepsen/nemesis.clj            | 138 +++++++
 test/intemporal/jepsen/runner.clj             | 251 ++++++++++++
 test/intemporal/jepsen/worker.clj             | 144 +++++++
 test/intemporal/jepsen/workflows.clj          | 138 +++++++
 .../20260529000001-jepsen-tables.down.sql     |   7 +
 .../20260529000001-jepsen-tables.up.sql       |  57 +++
 tests.edn                                     |  12 +-
 16 files changed, 2430 insertions(+), 6 deletions(-)
 create mode 100644 dev/verify_bugs.clj
 create mode 100644 improvements.md
 create mode 100644 issues.md
 create mode 100644 test/intemporal/jepsen/README.md
 create mode 100644 test/intemporal/jepsen/checker.clj
 create mode 100644 test/intemporal/jepsen/client.clj
 create mode 100644 test/intemporal/jepsen/db.clj
 create mode 100644 test/intemporal/jepsen/nemesis.clj
 create mode 100644 test/intemporal/jepsen/runner.clj
 create mode 100644 test/intemporal/jepsen/worker.clj
 create mode 100644 test/intemporal/jepsen/workflows.clj
 create mode 100644 test/resources/migrations/jepsen/postgres/20260529000001-jepsen-tables.down.sql
 create mode 100644 test/resources/migrations/jepsen/postgres/20260529000001-jepsen-tables.up.sql

diff --git a/deps.edn b/deps.edn
index ff0a562..d5a86bb 100644
--- a/deps.edn
+++ b/deps.edn
@@ -55,4 +55,14 @@
                       :ns-default  build}
 
               :test  {:jvm-opts  ["--enable-native-access=ALL-UNNAMED"]
-                      :main-opts ["-m" "kaocha.runner"]}}}
+                      :main-opts ["-m" "kaocha.runner"]}
+
+              ;; Run the chaos harness against a live Postgres instance:
+              ;;   clojure -X:dev:jdbc:jepsen intemporal.jepsen.runner/run
+              :jepsen {:extra-paths ["test" "test/resources"]
+                       :jvm-opts    ["--enable-native-access=ALL-UNNAMED"]
+                       :main-opts   ["-m" "intemporal.jepsen.runner"]}
+
+              ;; Entry point for forked worker JVMs launched by db.clj/fork!
+              :jepsen-worker {:extra-paths ["test" "test/resources"]
+                              :jvm-opts    ["--enable-native-access=ALL-UNNAMED"]}}}
diff --git a/dev/verify_bugs.clj b/dev/verify_bugs.clj
new file mode 100644
index 0000000..4172e10
--- /dev/null
+++ b/dev/verify_bugs.clj
@@ -0,0 +1,334 @@
+(ns verify-bugs
+  "Standalone verification of the five structural bugs described in
+  improvements.md.  Runs each scenario against the JDBC (Postgres) store
+  and the FoundationDB store and prints a side-by-side report.
+
+  Usage:
+    clojure -X:dev:jdbc:fdb verify-bugs/run
+
+  Environment / files required:
+    Postgres    — POSTGRES_JDBC_URI  or  jdbc:postgresql://localhost:5432/root?user=root&password=root
+    FoundationDB — docker/fdb.cluster  (written by the docker-compose foundation service)"
+  (:require [intemporal.core     :as intemporal]
+            [intemporal.protocol :as p]
+            [intemporal.store    :as mem-store]
+            [intemporal.store.jdbc :as jdbc-store]
+            [intemporal.store.fdb  :as fdb-store]
+            [me.vedang.clj-fdb.FDB :as cfdb]
+            [clojure.string :as str]))
+
+;; ── helpers ──────────────────────────────────────────────────────────────────
+
+(def ^:private pg-url
+  (or (System/getenv "POSTGRES_JDBC_URI")
+      "jdbc:postgresql://localhost:5432/root?user=root&password=root"))
+
+(defn- open-fdb []
+  (let [fdb (cfdb/select-api-version 730)]
+    (cfdb/open fdb "docker/fdb.cluster")))
+
+(defn- timeout-ms [ms f]
+  (let [res (future (f))]
+    (deref res ms ::timeout)))
+
+(defn- workflow-completed? [store wf-id]
+  (let [status (p/get-workflow-status store wf-id)]
+    (contains? #{:completed :failed :cancelled} status)))
+
+(defn- print-banner [title]
+  (let [line (apply str (repeat 70 "-"))]
+    (println line)
+    (println (str "  " title))
+    (println line)))
+
+;; ── workflow shapes used in scenarios ────────────────────────────────────────
+
+(defn- wait-signal-wf
+  "Suspends on signal 'go' and returns :woke."
+  []
+  (intemporal/wait-for-signal "go")
+  :woke)
+
+(defn- cancel-sleep-wf
+  "Suspends on signal 'wake', which is never sent — relies on cancel."
+  []
+  (intemporal/wait-for-signal "wake")
+  :woke)
+
+(defn- counting-activity [counter]
+  (swap! counter inc)
+  @counter)
+
+(defn- chain-wf [counter n]
+  (let [act (intemporal/stub #'counting-activity)]
+    (dotimes [_ n]
+      (act counter))
+    :done))
+
+;; ── Bug scenarios ─────────────────────────────────────────────────────────────
+
+(defn- scenario-1-1
+  "Bug 1.1 — Signal sent via a SECOND store instance (simulating another pod)
+  is not delivered because the callback atom is in the first store's memory.
+
+  Two store instances against the same database: store-a starts the workflow
+  and registers the callback; store-b sends the signal.  The signal row lands
+  in the DB but no pod fires the callback."
+  [make-store-a make-store-b label]
+  (let [store-a  (make-store-a)
+        store-b  (make-store-b)
+        wf-id    (str "bug11-" (random-uuid))
+        result   (promise)
+        engine-a (intemporal/make-workflow-engine :store store-a :threads 2)]
+    (try
+      ;; Start workflow on store-a in background (it will suspend on signal)
+      (future
+        (try
+          (let [r (intemporal/start-workflow engine-a wait-signal-wf []
+                                            :workflow-id wf-id)]
+            (deliver result r))
+          (catch Exception e (deliver result {:error (str e)}))))
+      ;; Let it reach the wait-for-signal suspension
+      (Thread/sleep 400)
+      ;; Send signal via store-b (another "process" – empty callback atom)
+      (p/add-signal store-b wf-id "go" {:source :store-b})
+      ;; Wait up to 2 s for the workflow to wake
+      (let [r (deref result 2000 ::timeout)]
+        {:store    label
+         :bug?     (= ::timeout r)
+         :detail   (if (= ::timeout r)
+                     "Workflow stuck: signal row written to DB but callback only in store-a"
+                     (str "Workflow woke unexpectedly: " r))})
+      (finally
+        (intemporal/shutdown-engine engine-a)
+        (when (instance? java.io.Closeable store-a) (.close store-a))
+        (when (instance? java.io.Closeable store-b) (.close store-b))))))
+
+(defn- scenario-1-2
+  "Bug 1.2 — Concurrent save-events with the same (workflow-id, seq).
+
+  JDBC: ON CONFLICT DO UPDATE silently overwrites — one write is lost, both
+        futures return without exception (silent data loss).
+  FDB:  UUID-keyed writes produce DUPLICATE events at the same seq (history
+        has >1 row at seq=0, violating the 'one event per seq' invariant)."
+  [make-store label]
+  (let [store    (make-store)
+        wf-id    (str "bug12-" (random-uuid))
+        event-a  {:event-type :workflow-started :seq 0 :writer "thread-a"
+                  :timestamp (System/currentTimeMillis)}
+        event-b  {:event-type :workflow-started :seq 0 :writer "thread-b"
+                  :timestamp (System/currentTimeMillis)}
+        latch    (promise)
+        t-a      (future (deref latch) (try (p/save-events store wf-id [event-a]) :ok
+                                            (catch Exception e {:error (str e)})))
+        t-b      (future (deref latch) (try (p/save-events store wf-id [event-b]) :ok
+                                            (catch Exception e {:error (str e)})))]
+    (deliver latch :go)
+    (let [ra @t-a
+          rb @t-b
+          history  (p/load-history store wf-id)
+          seq0     (filter #(= 0 (:seq %)) history)
+          cnt      (count seq0)
+          writers  (set (keep :writer seq0))
+          ;; JDBC: both writes succeed but only 1 row survives → silent clobber
+          ;; FDB:  both writes succeed and 2 rows survive → duplicate seq
+          jdbc-silent-overwrite? (and (= :ok ra) (= :ok rb) (= 1 cnt))
+          fdb-duplicate-seq?     (> cnt 1)
+          result {:store      label
+                  :bug?       (or jdbc-silent-overwrite? fdb-duplicate-seq?)
+                  :detail     (cond
+                                jdbc-silent-overwrite?
+                                (str "Both writes returned :ok but seq=0 has 1 row (writer="
+                                     (:writer (first seq0))
+                                     ") — one write silently clobbered by ON CONFLICT DO UPDATE")
+                                fdb-duplicate-seq?
+                                (str "seq=0 has " cnt " rows (writers=" writers
+                                     ") — UUID-keyed inserts produce duplicate-seq history")
+                                :else
+                                (str "No corruption detected: writes=" [ra rb] " seq0-count=" cnt))
+                  :seq0-count cnt}]
+      (when (instance? java.io.Closeable store) (.close store))
+      result)))
+
+(defn- scenario-2-1
+  "Bug 2.1 — Register-then-consume signal race.
+
+  process-signal does: (1) consume-signal, (2) if nil → register-callback.
+  If a sender fires between (1) and (2) the signal is consumed but the
+  callback fires into nothing (or the signal is already gone by the time
+  the callback tries to re-consume).
+
+  We maximise the window by having the sender fire 200 ms after the workflow
+  starts (before it has committed to suspending).  A stuck workflow after a
+  sent signal indicates the race was hit."
+  [make-store label]
+  (let [store   (make-store)
+        wf-id   (str "bug21-" (random-uuid))
+        result  (promise)
+        engine  (intemporal/make-workflow-engine :store store :threads 2)]
+    (try
+      (future
+        (try
+          (let [r (intemporal/start-workflow engine wait-signal-wf []
+                                            :workflow-id wf-id)]
+            (deliver result r))
+          (catch Exception e (deliver result {:error (str e)}))))
+      ;; Send the signal after a short window — trying to land between
+      ;; consume-check (step 1) and register-callback (step 2).
+      (Thread/sleep 200)
+      (p/add-signal store wf-id "go" {:source :race-test})
+      ;; Wait up to 3 s for the workflow to wake.
+      (let [r (deref result 3000 ::timeout)]
+        {:store   label
+         :bug?    (= ::timeout r)
+         :detail  (if (= ::timeout r)
+                    "Workflow stuck: signal sent before callback was registered (race hit)"
+                    "Workflow woke normally (race window not hit this run — try more iterations)")})
+      (finally
+        (intemporal/shutdown-engine engine)
+        (when (instance? java.io.Closeable store) (.close store))))))
+
+(defn- scenario-2-3
+  "Bug 2.3 — Cancellation can't reach a sleeping workflow.
+
+  cancel-workflow sets cancelled=true in the store but does NOT call any
+  wake mechanism.  A workflow sleeping in wait-for-signal never re-enters
+  the execution loop and therefore never observes the flag."
+  [make-store label]
+  (let [store   (make-store)
+        wf-id   (str "bug23-" (random-uuid))
+        result  (promise)
+        engine  (intemporal/make-workflow-engine :store store :threads 2)]
+    (try
+      (future
+        (try
+          (let [r (intemporal/start-workflow engine cancel-sleep-wf []
+                                            :workflow-id wf-id)]
+            (deliver result r))
+          (catch Exception e (deliver result {:error (str e)}))))
+      ;; Wait for workflow to suspend
+      (Thread/sleep 400)
+      ;; Cancel the workflow (sets the DB flag but sends no wake signal)
+      (intemporal/cancel-workflow store wf-id)
+      ;; Wait up to 2 s for the workflow to observe the cancellation
+      (let [r (deref result 2000 ::timeout)]
+        {:store   label
+         :bug?    (= ::timeout r)
+         :detail  (if (= ::timeout r)
+                    "Workflow stuck: cancelled flag set but sleeper never re-entered loop"
+                    (str "Workflow woke after cancel (status=" (:status r) ")"))})
+      (finally
+        (intemporal/shutdown-engine engine)
+        (when (instance? java.io.Closeable store) (.close store))))))
+
+(defn- scenario-no-recovery-poller
+  "Bug 1.3 — No recovery poller: resume requires caller to know the function.
+
+  Simulates a pod restart by using TWO separate store instances (store-a for
+  engine-a, store-b for engine-b) pointing at the same backing database.
+  This mirrors a real restart: each JVM gets a fresh store object with an
+  empty callbacks atom.
+
+  After engine-a crashes, engine-b sends the signal via store-b.  The signal
+  row lands in the DB, but store-a's callback atom (holding the wake-fn) is
+  gone.  Engine-b has no recovery poller to detect the suspended workflow —
+  it must be resumed explicitly."
+  [make-store-a make-store-b label]
+  (let [store-a   (make-store-a)
+        wf-id     (str "bug13-" (random-uuid))
+        result    (promise)
+        engine-a  (intemporal/make-workflow-engine :store store-a :threads 2)]
+    (try
+      (future
+        (try
+          (let [r (intemporal/start-workflow engine-a wait-signal-wf []
+                                            :workflow-id wf-id)]
+            (deliver result r))
+          (catch Exception e (deliver result {:error (str e)}))))
+      ;; Let it suspend and register its callback
+      (Thread/sleep 500)
+      ;; "Crash" engine-a
+      (intemporal/shutdown-engine engine-a)
+      (when (instance? java.io.Closeable store-a) (.close store-a))
+      ;; Create engine-b with a FRESH store instance — simulates pod restart
+      (let [store-b  (make-store-b)
+            engine-b (intemporal/make-workflow-engine :store store-b :threads 2)]
+        ;; Send signal via store-b (empty callback atom — just like a new process)
+        (p/add-signal store-b wf-id "go" {:source :engine-b-restart})
+        ;; Wait: engine-b has no poller to pick up the workflow
+        (let [r (deref result 2000 ::timeout)]
+          (intemporal/shutdown-engine engine-b)
+          (when (instance? java.io.Closeable store-b) (.close store-b))
+          {:store   label
+           :bug?    (= ::timeout r)
+           :detail  (if (= ::timeout r)
+                      "Engine-b (fresh store) sent signal but workflow never woke — no recovery poller"
+                      "Workflow woke unexpectedly after engine restart")}))
+      (finally nil))))
+
+;; ── Store factories ───────────────────────────────────────────────────────────
+
+(defn- make-mem-store [] (mem-store/->InMemoryStore (atom {})))
+
+(defn- make-jdbc-store [] (jdbc-store/make-jdbc-store pg-url))
+
+(defn- make-fdb-store []
+  (let [db (open-fdb)]
+    (fdb-store/make-fdb-store db (str "verify-" (random-uuid)))))
+
+;; ── Report rendering ──────────────────────────────────────────────────────────
+
+(defn- fmt-result [{:keys [store bug? detail]}]
+  (let [icon (if bug? "FAIL ✗" "PASS ✓")]
+    (format "  %-10s %s\n    %s" store icon detail)))
+
+(defn- print-scenario [bug-id title results]
+  (print-banner (str bug-id " — " title))
+  (doseq [r results]
+    (println (fmt-result r)))
+  (println))
+
+;; ── Main entry point ─────────────────────────────────────────────────────────
+
+(defn run
+  "Entry point: clojure -X:dev:jdbc:fdb verify-bugs/run"
+  [_opts]
+  (println "\n╔══════════════════════════════════════════════════════════════════╗")
+  (println   "║   intemporal bug verification — JDBC (Postgres) + FoundationDB  ║")
+  (println   "╚══════════════════════════════════════════════════════════════════╝\n")
+
+  ;; ----------------------------------------------------------------------------
+  (print-scenario
+    "Bug 1.1" "Lost wake on signal across store instances"
+    [(scenario-1-1 make-jdbc-store make-jdbc-store "JDBC")
+     (scenario-1-1 make-fdb-store  make-fdb-store  "FDB")])
+
+  ;; ----------------------------------------------------------------------------
+  (print-scenario
+    "Bug 1.2" "Concurrent write corruption at the same seq"
+    [(scenario-1-2 make-jdbc-store "JDBC")
+     (scenario-1-2 make-fdb-store  "FDB")])
+
+  ;; ----------------------------------------------------------------------------
+  (print-scenario
+    "Bug 1.3" "No recovery poller — engine restart does not resume workflows"
+    [(scenario-no-recovery-poller make-jdbc-store make-jdbc-store "JDBC")
+     (scenario-no-recovery-poller make-fdb-store  make-fdb-store  "FDB")])
+
+  ;; ----------------------------------------------------------------------------
+  (print-scenario
+    "Bug 2.1" "Register-then-consume signal race (intermittent)"
+    [(scenario-2-1 make-jdbc-store "JDBC")
+     (scenario-2-1 make-fdb-store  "FDB")])
+
+  ;; ----------------------------------------------------------------------------
+  (print-scenario
+    "Bug 2.3" "Cancellation cannot reach a sleeping workflow"
+    [(scenario-2-3 make-jdbc-store "JDBC")
+     (scenario-2-3 make-fdb-store  "FDB")])
+
+  ;; ----------------------------------------------------------------------------
+  (println "\nNote: Bug 2.1 is a race; a single run may not always hit the window.")
+  (println "      Increase Thread/sleep in scenario-2-1 or run multiple times.\n")
+
+  (System/exit 0))
diff --git a/docker/fdb.cluster b/docker/fdb.cluster
index 1e658b5..0c1fa8d 100644
--- a/docker/fdb.cluster
+++ b/docker/fdb.cluster
@@ -1 +1 @@
-docker:docker@172.19.0.2:4500
+docker:docker@172.18.0.3:4500
diff --git a/improvements.md b/improvements.md
new file mode 100644
index 0000000..a49f5cf
--- /dev/null
+++ b/improvements.md
@@ -0,0 +1,379 @@
+# Intemporal Design Critique — Multi-Pod / k8s Replica Set Context
+
+## Context
+
+The user asked for a design analysis of the `intemporal` library, with explicit focus on:
+- Deadlocks and "lost workflow" failure modes
+- Other criticism
+- Behaviour under a Kubernetes deployment where multiple pods of a replica set run the engine concurrently and can scale up/down
+
+The library positions itself as "Temporal/Cadence-inspired" — an event-sourced workflow engine where workflow state is reconstructed from a persisted event log so that activities don't re-execute after a process crash.
+
+This document is an **analysis deliverable**, not an implementation plan. It is structured to be useful as input for an architecture decision (adopt / fork / replace / contribute fixes).
+
+---
+
+## TL;DR
+
+**`intemporal` is a single-process resilient workflow engine. It is unsafe to run more than one replica against the same store. The README's "not production-ready" disclaimer is accurate — and the gap to a true distributed orchestrator is structural, not cosmetic.**
+
+The two showstoppers for any k8s replica-set deployment:
+
+1. **No durable wake mechanism.** Signal callbacks and timers live in a process-local atom. When the pod that registered them dies, nothing in the system knows to wake the workflow again. The workflow is *persisted but orphaned*.
+2. **No ownership / leasing.** Two pods can race on the same `workflow-id` with no detection. The JDBC store actively masks the race with `ON CONFLICT … DO UPDATE`, silently corrupting the event log.
+
+A third structural issue: there is **no poller / recovery worker**. After a crash, no pod scans for workflows that should be running. Resume only happens if an external actor explicitly calls `resume-workflow` with the right function and args. Durability of state without durability of execution is illusion.
+
+---
+
+## Architecture Summary (what I'm critiquing)
+
+Verified from the source:
+
+- **Engine** (`src/intemporal/core.cljc:445`): a map of `{:store :executor :scheduler :registry :observer}`. No identity, no node id, no clustering primitives.
+- **start-workflow** (`src/intemporal/internal/fns/start_workflow.clj:8-78`): generates a UUID, writes `:workflow-started`, calls `run-workflow-internal` **synchronously on the calling thread**, then loops blocking on a local `promise` until the workflow completes or is interrupted.
+- **resume-workflow** (`src/intemporal/core.cljc:366-390`): the caller must supply `workflow-id`, `workflow-fn`, and `args`. The engine replays history and re-enters execution.
+- **Stores** implement `IStore` (`src/intemporal/protocol.cljc:8-21`). The protocol contains only: history read/write, signal add/consume, callback register/unregister, cancellation flag, and status. **No claim, no lease, no heartbeat, no "list running workflows".**
+- **JDBC store** (`src/intemporal/store/jdbc.clj`): events written under transactions; uses `ON CONFLICT (workflow_id, seq) DO UPDATE` (line 100-103); signal consumption uses `FOR UPDATE SKIP LOCKED` (line 137). Signal callbacks are kept in a **per-process atom** on the store record (line 72, 143-147).
+- **InMemoryStore** (`src/intemporal/store.cljc:8-78`): identical callback semantics — a single atom.
+- **DefaultScheduler** (`src/intemporal/internal/runtime.clj`): timers held in an in-memory `pending-timers` atom. Lost on process exit.
+
+---
+
+## Section 1 — Distributed-Deployment Showstoppers
+
+### 1.1 No wake mechanism survives a pod restart  *(severity: critical)*
+
+`register-signal-callback` and `schedule-timer` store their continuation **inside the process** (atom for signals, `ScheduledFuture` for timers). Concretely:
+
+- `JdbcStore` carries `callbacks` as `(atom {})` at construction (`src/intemporal/store/jdbc.clj:72`). Two pods sharing the same Postgres each have their own empty atom.
+- `add-signal` in JDBC (`src/intemporal/store/jdbc.clj:122-132`) writes the signal under a transaction, then does `(when-let [callback (get-in @callbacks [workflow-id signal-name])] (future (callback)))` — **only the pod that registered the callback can fire it**.
+
+Failure trace:
+
+1. Pod A executes workflow X up to `(wait-for-signal :go)`. It writes the suspension to history, registers callback in **pod-A-local atom**, blocks the calling thread on a promise.
+2. Pod A crashes (k8s scale-down, OOM, node failure).
+3. Pod B receives an HTTP request → calls `(send-signal store "X" :go {})`. Postgres now has the signal row. Pod B's local callbacks atom is empty for X → **no wake**.
+4. Workflow X is permanently stuck. Its history is intact, its signal is queued, and no process knows to re-enter execution.
+
+Timers have the same problem with worse blast radius: a 1-hour timer scheduled on pod A *vanishes* the moment pod A dies, even if nobody sends a signal. No row in any table, no scheduled job, no poller.
+
+**This is the "losing workflows" failure mode**, and it does not require any race: it happens on every routine k8s rolling restart.
+
+### 1.2 No ownership → silent concurrent execution  *(severity: critical)*
+
+`start-workflow` and `resume-workflow` do not claim anything. Two pods can run the same workflow id concurrently. Specifically:
+
+- `JdbcStore.save-events` uses `ON CONFLICT (workflow_id, seq) DO UPDATE` (`src/intemporal/store/jdbc.clj:100-103`). When two pods append the same seq, **the loser silently wins** (last writer overwrites). The race is masked; the event log is non-deterministic.
+- Activities are re-executed on each pod — at-least-once degrades to at-many-times.
+- The replay invariant ("same input → same event stream") is violated because two engines may emit different events at the same seq.
+
+This becomes very easy to trigger: `start-workflow` blocks the caller. A reverse-proxy retry on a slow `POST /workflows` will re-invoke `start-workflow`, and if the client supplies `:workflow-id` for idempotency the second call appends a duplicate `:workflow-started` event to history rather than rejecting (no uniqueness check; the `seq` for the first event likely overwrites if both pods reach `seq=0` simultaneously).
+
+A safer schema would use `ON CONFLICT DO NOTHING` (or reject) on event inserts, plus a `(workflow_id, owner_lease, lease_expires_at)` claim row.
+
+### 1.3 No recovery poller  *(severity: critical)*
+
+There is no background process anywhere in the codebase that scans for workflows requiring execution. `resume-workflow` is **on-demand only** and **requires the caller to know the workflow function and args**.
+
+This couples recovery to application code: every pod that starts up must explicitly enumerate "things that might be suspended" and call `resume-workflow` with the right vars in scope. The library provides no list-by-status query and no API for "given a workflow id, find the function and resume". For a multi-replica deployment, recovery is essentially a problem the user has to solve outside the library.
+
+Temporal solves this with task queues + workers that long-poll the server. Intemporal has neither concept.
+
+---
+
+## Section 2 — Deadlock & Lost-Workflow Scenarios
+
+### 2.1 Register-after-consume race  *(severity: high, even on a single pod)*
+
+`process-signal` (`src/intemporal/internal/execution.clj:223-255`) is described by the explore agent as:
+
+```
+1. consume-signal — if present, return
+2. otherwise register-signal-callback
+3. suspend
+```
+
+This is a classic TOCTOU. Between (1) returning nil and (2) writing the callback, another thread (or another pod) can call `add-signal`. The signal lands in the store; the callback fires nothing (it isn't registered yet); the registration completes after the signal write; the workflow suspends forever.
+
+The fix is the standard one: register the callback first, then check, then unregister + consume if a signal was already present.
+
+### 2.2 Signal sent to a workflow not yet started
+
+`send-signal` will happily write a signal for an unknown workflow id (in JDBC, line 124 inserts a workflow row via upsert). If the workflow is later started but the start path doesn't drain pre-existing signals before reaching `wait-for-signal`, the signal may or may not be picked up depending on ordering — worth a targeted test, since signals are addressed by name and the workflow expects FIFO semantics per name.
+
+### 2.3 Cancellation cannot reach a suspended workflow
+
+`cancel-workflow` (`src/intemporal/core.cljc:411-418`) sets a flag. Cancellation is **polled at sequence points** (per the explore agent: `check-cancelled!` before each operation, and at the top of the execution loop). A workflow that is suspended on `wait-for-signal` with no signal will:
+
+- never re-enter the execution loop on its own,
+- never poll the flag,
+- be invisible to cancellation.
+
+So `cancel-workflow` is **not reliable for any workflow that is currently waiting**. The flag is set in the DB but the workflow only sees it next time it wakes — which may be never (see 1.1).
+
+### 2.4 Long-lived `start-workflow` thread
+
+`start-workflow` blocks the calling thread until the workflow either completes or is interrupted (`src/intemporal/internal/fns/start_workflow.clj:67-75`). A workflow that waits 30 days for a signal holds the caller's thread for 30 days. This is incompatible with HTTP request/response in any normal web framework and leaks pod resources at scale. The "right" pattern (return a workflow id immediately; durable wake later) is precisely the pattern that doesn't exist (see 1.1, 1.3).
+
+### 2.5 `max-iterations` foot-gun
+
+Default 1000 replay iterations (`src/intemporal/internal/fns/start_workflow.clj:23`). A workflow with thousands of activities or a long signal-driven loop will silently fail at replay. No clear surface to detect this in production.
+
+### 2.6 In-flight activities at shutdown become "interrupted"
+
+Per the crash-test agent: activities crashed mid-execution are marked `:activity-interrupted` and re-run on resume. This is correct behaviour for at-least-once. **However**, combined with 1.1, the resume never happens automatically — so the activity neither completes nor restarts. Worth distinguishing in docs: "at-least-once if you remember to call resume-workflow".
+
+---
+
+## Section 3 — Other Criticism
+
+### 3.1 JDBC schema lacks the columns it needs
+
+Looking at `resources/migrations/postgres/20260215214002-initial-schema.up.sql` (referenced by the explore agent): three tables, no `status` column on `intemporal_workflows`, no `owner`, no `lease_expires_at`, no `last_heartbeat_at`, no index for "find running workflows". Status is derived by scanning `intemporal_history` and reading `last(event-type)` — O(history-length) per status query.
+
+`ON CONFLICT (workflow_id, seq) DO UPDATE` (line 100-103) is the wrong policy. Two correct writers should not be allowed to coexist; the conflict should be loud (`DO NOTHING` + check `affected`, or a `version` column with CAS).
+
+### 3.2 The IStore protocol is too thin
+
+For a multi-tenant durable orchestrator you'd expect at minimum:
+
+- `claim-workflow [store workflow-id worker-id lease-ttl]` → boolean
+- `renew-lease [store workflow-id worker-id]`
+- `release [store workflow-id worker-id]`
+- `list-runnable [store worker-id batch-size]` (signals arrived / timers due / leases expired)
+- Persistent timer rows (`{workflow_id, seq, fire_at}`)
+- Persistent "needs wake" markers
+
+None of these exist. Adding them is not a small patch; it touches the execution engine's assumption that wakes are local.
+
+### 3.3 Signal callbacks duplicated as in-process state across store impls
+
+Every store maintains its own callback atom (`InMemoryStore` via `:signal-callbacks` in the state map, `JdbcStore` via a separate `(atom {})` field). For the JDBC store this is conceptually wrong: the store is shared, but a process-local atom shadows it. A correct multi-pod implementation would use a notification mechanism the database already provides — Postgres `LISTEN/NOTIFY`, an explicit watch table, or an external pub/sub — and would remove `register-signal-callback` from `IStore` entirely (it isn't really a store concern).
+
+### 3.4 No separation between "orchestrator" and "worker"
+
+`IActivityExecutor` runs activities in the same process that runs the workflow. There is no way to dispatch activities to a separate worker pool (e.g., a "heavy I/O" replica set distinct from "orchestrator" replicas). Heavy activities consume the same thread budget that drives workflows.
+
+### 3.5 Recovery requires the caller to know the workflow function
+
+`resume-workflow` takes `workflow-fn` and `args`. The library has no registry that maps `workflow-id → function var`. Every pod that wants to recover must:
+
+1. Query the store for workflows in `:running` status (no such query exists).
+2. Look up the right function var (no such mapping exists).
+3. Recover the original args (they live in the `:workflow-started` event — accessible, but undocumented).
+
+In practice this means the user writes their own dispatch table and recovery loop. The library does not provide a working recovery story out of the box.
+
+### 3.6 Observer protocol is a good idea, slightly under-spec'd
+
+`IWorkflowObserver` (`src/intemporal/protocol.cljc:45-62`) is clean and gives the right hooks for tracing. Two gaps worth noting:
+
+- No `on-store-write` / `on-suspension-persisted` — useful for "did the durability write succeed before we acked the activity?"
+- No `on-replay-iteration` — useful for diagnosing slow replays.
+
+### 3.7 Documentation gap
+
+The README says "not production-ready" but doesn't enumerate **why**. A short "Operational Caveats" section listing 1.1, 1.2, 1.3 would prevent users from misjudging the library based on the Temporal-flavoured API surface.
+
+---
+
+## Section 4 — What a Multi-Pod-Safe Version Would Need
+
+Not a request to implement; a calibration of how far the library is from the goal.
+
+1. **Lease-based ownership.** Add `claim_workflow(worker_id, ttl)` + `renew` + `release`. Reject all writes from a worker whose lease has expired. Heartbeat from a background thread.
+2. **Persistent timers.** Add a `intemporal_timers (workflow_id, seq, fire_at, claimed_by, claimed_until)` table and a poller (`SELECT … WHERE fire_at <= now() AND claimed_until < now() FOR UPDATE SKIP LOCKED`).
+3. **Persistent wake markers.** When a signal arrives or a timer fires, write a row to `intemporal_runnable (workflow_id)`. Each pod polls this table (or `LISTEN`s on `NOTIFY`).
+4. **Durable workflow registry.** Map `workflow_id → workflow_function_symbol + args`. Store the symbol in the `:workflow-started` event; have every pod register the symbols it can resolve.
+5. **Reject concurrent writers.** Change `ON CONFLICT DO UPDATE` to `DO NOTHING` and fail the workflow run on conflict (lease violation).
+6. **Async `start-workflow`.** Return `{:workflow-id …}` immediately; let the worker loop pick up the new workflow from the runnable queue.
+7. **Fix the signal register-then-consume race** (2.1) — even single-process correctness depends on this.
+8. **Cancellation that wakes a sleeper.** Cancellation should write a runnable marker that forces the workflow to wake and observe the flag (currently it only sets the flag).
+
+Items 1–6 are essentially "build a real distributed workflow engine". Item 7 is a bug fix. Item 8 is a small targeted change.
+
+---
+
+## Section 5 — Pragmatic Recommendations (no code changes implied)
+
+For someone evaluating this library:
+
+- **Safe today**: single process, in-memory store, side-effects inside short-lived activities — i.e., as a structured way to write resumable in-memory orchestrations. Fine for tests, batch jobs, single-node tools.
+- **Risky**: any deployment with `>1` replica, even with the JDBC store. Will not lose data, but **will lose execution liveness** on every pod restart, and **will corrupt history** under concurrent retries.
+- **Don't**: rely on it as a Temporal replacement in k8s without writing significant infrastructure on top (leasing, polling, dispatch, signal fan-out).
+
+---
+
+## Verification (how to confirm the above claims yourself)
+
+Quick reproductions, each ~10–30 minutes:
+
+1. **Lost wake on signal across processes.** Start two REPLs with the same Postgres URL. REPL A: `start-workflow` a workflow that calls `wait-for-signal`. Kill REPL A (`System/exit`). REPL B: `send-signal` for that workflow id. Confirm the signal sits in `intemporal_signals` and nothing happens. Restart REPL A: the workflow only resumes if you explicitly call `resume-workflow`.
+2. **Concurrent start corrupts history.** Two REPLs call `start-workflow` with the same `:workflow-id` simultaneously. Inspect `intemporal_history` — observe duplicate `:workflow-started` rows or silently overwritten events at the same `seq`.
+3. **Lost timer.** Start a workflow that sleeps for 5 minutes. Kill the JVM within 30 seconds. Restart it without calling `resume-workflow`. Confirm the workflow never fires.
+4. **Cancellation cannot reach a sleeper.** Start a workflow that does `(wait-for-signal :go)` and immediately `cancel-workflow`. Observe the cancelled flag is set but the workflow never terminates (it never re-enters the loop to observe the flag).
+5. **Register-then-consume race.** A targeted test that interleaves `wait-for-signal` and `send-signal` on the same workflow id at the consume-then-register window. May require thread sleep instrumentation in `process-signal` to reproduce reliably.
+
+If any of these *don't* reproduce, the analysis is wrong on that point and the relevant section should be revised.
+
+---
+
+## Section 6 — Improvement Plan
+
+**Chosen scope: Phases A + B + C (full multi-pod safety).** Estimated ~6–8 weeks. Phases D and E are listed below for completeness but explicitly deferred.
+
+**Target backends:** Postgres, MySQL/MariaDB, FoundationDB, and InMemoryStore (the latter for test fixtures and single-process use; it will implement the new `IStore` operations in-memory so the same execution code path works everywhere).
+
+Each phase is independently shippable (the library keeps working after each one). Distributed safety is opt-in via store choice — Phase A and B do not require any schema change; Phase C does.
+
+Effort scale: **S** = ≤1 day, **M** = 2–5 days, **L** = 1–2 weeks, **XL** = >2 weeks. Estimates assume one contributor familiar with the codebase.
+
+### Guiding principles
+
+- **Don't break the existing API.** Add new functions; deprecate old ones with shims.
+- **Don't tax single-process users with distributed costs.** Lease checks, runnable polling, etc. are only meaningful when the store implementation cares.
+- **Push correctness into the schema.** A constraint that throws is better than a callback that silently fails.
+- **Make wake-up durable and centralised.** Today there are three independent wake paths (signal callback atom, in-process timer, blocking promise). Collapse them into one: "write a runnable marker; a worker picks it up."
+
+---
+
+### Phase A — Single-pod correctness fixes  *(unblocks correctness even without distribution)*
+
+| ID | Issue | Files | Effort |
+|----|-------|-------|--------|
+| A1 | Fix register-then-consume signal race (2.1) | `src/intemporal/internal/execution.clj` (process-signal, ~L223-255) | S |
+| A2 | Cancellation wakes sleepers (2.3) | `src/intemporal/core.cljc:411`, `src/intemporal/protocol.cljc`, both stores | S |
+| A3 | Reject duplicate concurrent event writes (1.2) | `src/intemporal/store/jdbc.clj:100-103` | S |
+| A4 | Loud `max-iterations` failure (2.5) | `src/intemporal/internal/fns/start_workflow.clj`, execution loop | S |
+
+**A1 sketch.** Change `process-signal` from `consume → register → suspend` to `register → consume → if-found(unregister + return) → suspend`. Add a stress test: two threads, one calling `wait-for-signal`, one calling `send-signal`, interleaved with a configurable delay; assert no orphaned suspension.
+
+**A2 sketch.** Add `IStore/wake-workflow [store wf-id]` that fires every registered callback for that workflow id (both signal callbacks and a new generic "wake" callback). `cancel-workflow` calls it after `mark-cancelled`. The workflow then re-enters the loop and observes the cancelled flag at `check-cancelled!` (`src/intemporal/internal/context.cljc:37-40`).
+
+**A3 sketch.** Change `ON CONFLICT (workflow_id, seq) DO UPDATE …` → `ON CONFLICT (workflow_id, seq) DO NOTHING`, capture row counts from `jdbc/execute!`, and throw `ConcurrentWriterException` if any insert returned 0 rows. Add a test that spins two threads racing on the same workflow id and asserts exactly one succeeds.
+
+**A4 sketch.** When the replay loop exceeds `max-iterations`, throw a typed exception (`ReplayBudgetExceeded`) carrying workflow id + last seq, instead of silently returning the partial result. Default budget could scale with `(count history) * 2` rather than a flat constant.
+
+---
+
+### Phase B — Operational hardening  *(makes the library production-shaped for single-pod)*
+
+| ID | Improvement | Files | Effort |
+|----|-------------|-------|--------|
+| B1 | Idempotent `start-workflow` with `:request-id` | `src/intemporal/internal/fns/start_workflow.clj`, JDBC schema | M |
+| B2 | `status` column on `intemporal_workflows` (O(1) status reads) | new migration, `src/intemporal/store/jdbc.clj` | S |
+| B3 | Workflow registry: `register-workflow!` + resolve-on-resume | new ns `src/intemporal/internal/workflow_registry.cljc`, core API | M |
+| B4 | Async `submit-workflow` returning `{:workflow-id …}` | `src/intemporal/core.cljc`, executor wiring | M |
+| B5 | Observer hooks for store writes / replay iterations (3.6) | `src/intemporal/protocol.cljc`, call sites | S |
+
+**B3 is load-bearing for Phase C.** Without a registry mapping `workflow-id → workflow-fn`, the worker loop can't resume a workflow it didn't start. Design: register by name; store the name (symbol) inside the `:workflow-started` event payload (alongside `args`); `resume-workflow` accepts `(engine, workflow-id)` and resolves both the function and the args from the first event.
+
+**B4 sketch.** New function `submit-workflow` that writes `:workflow-started`, enqueues to the engine's executor, and returns immediately. The blocking `start-workflow` becomes a thin wrapper: `submit-workflow` + `await-workflow`. This unblocks the "HTTP request returns workflow id" pattern without forcing every caller off the blocking API.
+
+---
+
+### Phase C — Distributed primitives  *(makes multi-pod safe; opt-in per store)*
+
+This is the meat of the work. Each item adds new `IStore` operations and a corresponding JDBC implementation. InMemoryStore can either no-op these (single-process semantics) or implement them in-memory for tests.
+
+| ID | Primitive | Files | Effort |
+|----|-----------|-------|--------|
+| C1 | Lease protocol: `claim` / `renew` / `release` / `expire-stale` | `src/intemporal/protocol.cljc`, JDBC schema (add `owner_id`, `lease_until`), all `save-events` paths | L |
+| C2 | Persistent timers table + poller | new migration, `src/intemporal/internal/runtime.clj` scheduler, new poller component | L |
+| C3 | Runnable markers table: signals/timers/cancellations write a marker | new migration, every wake path | M |
+| C4 | Worker loop: `start-worker` polls runnable markers, claims lease, resumes | new ns `src/intemporal/internal/worker.clj`, core API | L |
+| C5 | Remove in-process signal callbacks; all wakes via runnable markers | `src/intemporal/store/jdbc.clj`, execution.clj | M |
+
+**C1 design.** Add columns `owner_id TEXT, lease_until TIMESTAMPTZ` to `intemporal_workflows`. `claim-workflow` is a single `UPDATE … WHERE id = ? AND (owner_id IS NULL OR owner_id = ? OR lease_until < now())` returning rows-affected. Every `save-events` call validates `(owner_id = ? AND lease_until > now())` in the same transaction as the inserts; on mismatch, throws `LeaseLostException`. The worker catches and aborts the in-flight execution cleanly.
+
+**C2 design.** Schema: `intemporal_timers (workflow_id, seq, fire_at, claimed_until, PRIMARY KEY(workflow_id, seq))`. `schedule-timer` upserts a row instead of (or in addition to) the in-memory `ScheduledFuture`. A poller thread runs `SELECT … WHERE fire_at <= now() AND claimed_until < now() FOR UPDATE SKIP LOCKED LIMIT N`, writes a runnable marker for each, and updates `claimed_until = now() + INTERVAL '1m'` for fencing. In-memory `ScheduledFuture`s become a latency optimisation (avoid the poll), not the source of truth.
+
+**C3 design.** Schema: `intemporal_runnable (workflow_id PRIMARY KEY, reason TEXT, enqueued_at TIMESTAMPTZ, claimed_until TIMESTAMPTZ)`. Use `PRIMARY KEY` (not unique-by-reason) so duplicates collapse — one workflow is either runnable or it isn't. `add-signal`, timer-fire, and `cancel-workflow` all `INSERT … ON CONFLICT DO UPDATE SET enqueued_at = now()`.
+
+**C4 design.** A worker is `{:engine :worker-id :poll-interval :concurrency}`. Loop:
+```
+loop:
+  rows = SELECT workflow_id FROM intemporal_runnable
+         WHERE claimed_until < now()
+         FOR UPDATE SKIP LOCKED LIMIT batch
+  for each row in parallel (concurrency-limited):
+    if claim-workflow(wf_id, worker_id, ttl): 
+      try: resume-workflow(engine, wf_id)  // uses B3 registry
+           DELETE FROM intemporal_runnable WHERE workflow_id = ?
+      finally: release(wf_id, worker_id)
+  if rows empty: sleep poll-interval (or LISTEN for wake)
+```
+Optional Postgres optimisation: `NOTIFY intemporal_runnable` on insert; worker uses `LISTEN` for sub-second wake. Default poll interval (e.g., 500ms) is the safety net.
+
+**C5 design.** Once C3/C4 land, in-process callbacks are vestigial. Delete the `callbacks` atom on `JdbcStore`; `register-signal-callback` and `unregister-signal-callback` become no-ops (kept for protocol compatibility for one release, then removed). All wake is via runnable markers. Closes the cross-pod signal loss path (1.1).
+
+#### Per-backend implementation notes for Phase C
+
+**Postgres** (primary target, full featureset):
+- `FOR UPDATE SKIP LOCKED` for marker claim and lease claim (already used for signals).
+- `LISTEN/NOTIFY intemporal_runnable` for sub-second wake; poll loop as the safety net.
+- Standard timestamptz for `lease_until` and `claimed_until`.
+- JSONB payload columns (consistent with current schema).
+
+**MySQL/MariaDB** (full featureset, polling only):
+- `SELECT … FOR UPDATE SKIP LOCKED` is supported (MySQL 8.0+, MariaDB 10.6+); pin to those versions in docs.
+- No `LISTEN/NOTIFY` equivalent — workers poll at configurable interval (default 500ms is acceptable for most use cases; faster requires busy-polling tradeoff).
+- Add an index on `intemporal_runnable (claimed_until, enqueued_at)` for poller scans.
+- JSON column type instead of JSONB.
+
+**FoundationDB** (full featureset, native transactional model):
+- No SQL — operations are transactional key-range reads/writes against the directory layer (`store/fdb.clj` already uses subspaces).
+- Lease: key `["lease", workflow-id] → {owner-id, lease-until}`. Claim is a serializable read-modify-write transaction; FDB rejects conflicting commits automatically (no SKIP LOCKED needed — that's the wrong model for FDB).
+- Runnable markers: subspace `["runnable"]`, keys are `[enqueued-at, workflow-id]`. Workers pop with a watch on the subspace (FDB's `getRangeWatchable` or equivalent). FDB watches are the native equivalent of `LISTEN/NOTIFY` — no polling needed.
+- Persistent timers: subspace `["timers", fire-at, workflow-id, seq]`. Timer poller scans the prefix `["timers"]` with `streamingMode :want-all` up to `now()`.
+- Cross-process callbacks don't exist in FDB; the watch primitive replaces them entirely. Cleaner than the Postgres/MySQL design.
+
+**InMemoryStore** (test fixture + single-process use):
+- All new operations implemented in-memory using existing atom-based state.
+- Lease: a `{owner-id, lease-until}` entry per workflow in the state map. `claim-workflow` is an atomic `swap!` with CAS semantics.
+- Runnable markers: a vector in the state map; workers `swap!` to pop.
+- Persistent timers: same as today (in-memory `ScheduledFuture`), but exposed through the new IStore API so the execution code doesn't branch.
+- `register-signal-callback` continues to work in-process for single-pod users (no behavioural regression). Multi-pod users would never use InMemoryStore.
+
+**After Phase C the library is multi-pod safe**: every k8s pod runs `start-worker`, work is distributed via runnable markers + lease, crashes are recovered by lease expiry, signals reach the right pod via the database.
+
+---
+
+### Phase D & E — Deferred (out of scope for this milestone)
+
+Listed here for context; not part of the current improvement plan. Revisit after Phase C is stable in production.
+
+- **Phase D** — Worker/orchestrator separation (activity task queues with specialised worker pools). Only worth doing if there's a real need to scale activity execution independently of workflow orchestration.
+- **Phase E** — Strict shard ownership and token-based write fencing. Protects against split-brain scenarios (network-partitioned pod whose lease expires, another pod takes over, then the original reconnects). Phase C's lease check on every `save-events` is sufficient for correctness under normal operation; Phase E is the "production at scale" defence-in-depth tier.
+
+---
+
+### Suggested ordering & shippable milestones
+
+1. **v0.x+1 (bug-fix release):** Phase A only. Closes the worst single-process correctness bugs in ~1–2 days of work.
+2. **v0.x+2 (hardening release):** Phase B. The library becomes usable as a production single-pod orchestrator with sensible HTTP integration.
+3. **v0.y (multi-pod release):** Phase C. Headline feature: "now safe to run multiple replicas". Requires schema migration and a documented worker setup. **This is the big one.**
+4. **v0.y+1+ (advanced):** Phases D and E as needed by users.
+
+Phases A and B do not require schema-breaking migrations and can ship as patch releases. Phase C requires a minor-version bump and an "operator's guide" doc.
+
+### Testing strategy per phase
+
+- **A:** Race condition unit tests (A1, A3). Cancellation-during-wait test (A2). Replay-budget exhaustion test (A4).
+- **B:** Idempotent-start tests (B1). Registry resolution tests (B3). Async submission tests (B4).
+- **C:** Multi-process integration tests using `docker-compose` with Postgres + 2–3 worker containers. Tests:
+  - Crash a worker mid-workflow; verify another picks it up after lease expiry.
+  - Send signal from worker B to workflow on (dead) worker A; verify resumption on worker C.
+  - Schedule a long timer; restart all workers; verify it still fires.
+  - Concurrent `start-workflow` with same `:request-id`; verify single execution.
+
+The crash tests in `test/intemporal/tests/crash/` are the right template — extend them to multi-process scenarios.
+
+### Out of scope (for this plan)
+
+- Phase D and Phase E (deferred — see above).
+- Migrating to a different persistence model (e.g., RocksDB, distributed log).
+- Changing the workflow programming model (it's already good).
+- Cross-language SDKs.
+- Workflow versioning / determinism upgrades (Temporal's `GetVersion`) — relevant but separate.
diff --git a/issues.md b/issues.md
new file mode 100644
index 0000000..b7cebba
--- /dev/null
+++ b/issues.md
@@ -0,0 +1,218 @@
+# Intemporal Design Critique — Multi-Pod / k8s Replica Set Context
+
+## Context
+
+The user asked for a design analysis of the `intemporal` library, with explicit focus on:
+- Deadlocks and "lost workflow" failure modes
+- Other criticism
+- Behaviour under a Kubernetes deployment where multiple pods of a replica set run the engine concurrently and can scale up/down
+
+The library positions itself as "Temporal/Cadence-inspired" — an event-sourced workflow engine where workflow state is reconstructed from a persisted event log so that activities don't re-execute after a process crash.
+
+This document is an **analysis deliverable**, not an implementation plan. It is structured to be useful as input for an architecture decision (adopt / fork / replace / contribute fixes).
+
+---
+
+## TL;DR
+
+**`intemporal` is a single-process resilient workflow engine. It is unsafe to run more than one replica against the same store. The README's "not production-ready" disclaimer is accurate — and the gap to a true distributed orchestrator is structural, not cosmetic.**
+
+The two showstoppers for any k8s replica-set deployment:
+
+1. **No durable wake mechanism.** Signal callbacks and timers live in a process-local atom. When the pod that registered them dies, nothing in the system knows to wake the workflow again. The workflow is *persisted but orphaned*.
+2. **No ownership / leasing.** Two pods can race on the same `workflow-id` with no detection. The JDBC store actively masks the race with `ON CONFLICT … DO UPDATE`, silently corrupting the event log.
+
+A third structural issue: there is **no poller / recovery worker**. After a crash, no pod scans for workflows that should be running. Resume only happens if an external actor explicitly calls `resume-workflow` with the right function and args. Durability of state without durability of execution is illusion.
+
+---
+
+## Architecture Summary (what I'm critiquing)
+
+Verified from the source:
+
+- **Engine** (`src/intemporal/core.cljc:445`): a map of `{:store :executor :scheduler :registry :observer}`. No identity, no node id, no clustering primitives.
+- **start-workflow** (`src/intemporal/internal/fns/start_workflow.clj:8-78`): generates a UUID, writes `:workflow-started`, calls `run-workflow-internal` **synchronously on the calling thread**, then loops blocking on a local `promise` until the workflow completes or is interrupted.
+- **resume-workflow** (`src/intemporal/core.cljc:366-390`): the caller must supply `workflow-id`, `workflow-fn`, and `args`. The engine replays history and re-enters execution.
+- **Stores** implement `IStore` (`src/intemporal/protocol.cljc:8-21`). The protocol contains only: history read/write, signal add/consume, callback register/unregister, cancellation flag, and status. **No claim, no lease, no heartbeat, no "list running workflows".**
+- **JDBC store** (`src/intemporal/store/jdbc.clj`): events written under transactions; uses `ON CONFLICT (workflow_id, seq) DO UPDATE` (line 100-103); signal consumption uses `FOR UPDATE SKIP LOCKED` (line 137). Signal callbacks are kept in a **per-process atom** on the store record (line 72, 143-147).
+- **InMemoryStore** (`src/intemporal/store.cljc:8-78`): identical callback semantics — a single atom.
+- **DefaultScheduler** (`src/intemporal/internal/runtime.clj`): timers held in an in-memory `pending-timers` atom. Lost on process exit.
+
+---
+
+## Section 1 — Distributed-Deployment Showstoppers
+
+### 1.1 No wake mechanism survives a pod restart  *(severity: critical)*
+
+`register-signal-callback` and `schedule-timer` store their continuation **inside the process** (atom for signals, `ScheduledFuture` for timers). Concretely:
+
+- `JdbcStore` carries `callbacks` as `(atom {})` at construction (`src/intemporal/store/jdbc.clj:72`). Two pods sharing the same Postgres each have their own empty atom.
+- `add-signal` in JDBC (`src/intemporal/store/jdbc.clj:122-132`) writes the signal under a transaction, then does `(when-let [callback (get-in @callbacks [workflow-id signal-name])] (future (callback)))` — **only the pod that registered the callback can fire it**.
+
+Failure trace:
+
+1. Pod A executes workflow X up to `(wait-for-signal :go)`. It writes the suspension to history, registers callback in **pod-A-local atom**, blocks the calling thread on a promise.
+2. Pod A crashes (k8s scale-down, OOM, node failure).
+3. Pod B receives an HTTP request → calls `(send-signal store "X" :go {})`. Postgres now has the signal row. Pod B's local callbacks atom is empty for X → **no wake**.
+4. Workflow X is permanently stuck. Its history is intact, its signal is queued, and no process knows to re-enter execution.
+
+Timers have the same problem with worse blast radius: a 1-hour timer scheduled on pod A *vanishes* the moment pod A dies, even if nobody sends a signal. No row in any table, no scheduled job, no poller.
+
+**This is the "losing workflows" failure mode**, and it does not require any race: it happens on every routine k8s rolling restart.
+
+### 1.2 No ownership → silent concurrent execution  *(severity: critical)*
+
+`start-workflow` and `resume-workflow` do not claim anything. Two pods can run the same workflow id concurrently. Specifically:
+
+- `JdbcStore.save-events` uses `ON CONFLICT (workflow_id, seq) DO UPDATE` (`src/intemporal/store/jdbc.clj:100-103`). When two pods append the same seq, **the loser silently wins** (last writer overwrites). The race is masked; the event log is non-deterministic.
+- Activities are re-executed on each pod — at-least-once degrades to at-many-times.
+- The replay invariant ("same input → same event stream") is violated because two engines may emit different events at the same seq.
+
+This becomes very easy to trigger: `start-workflow` blocks the caller. A reverse-proxy retry on a slow `POST /workflows` will re-invoke `start-workflow`, and if the client supplies `:workflow-id` for idempotency the second call appends a duplicate `:workflow-started` event to history rather than rejecting (no uniqueness check; the `seq` for the first event likely overwrites if both pods reach `seq=0` simultaneously).
+
+A safer schema would use `ON CONFLICT DO NOTHING` (or reject) on event inserts, plus a `(workflow_id, owner_lease, lease_expires_at)` claim row.
+
+### 1.3 No recovery poller  *(severity: critical)*
+
+There is no background process anywhere in the codebase that scans for workflows requiring execution. `resume-workflow` is **on-demand only** and **requires the caller to know the workflow function and args**.
+
+This couples recovery to application code: every pod that starts up must explicitly enumerate "things that might be suspended" and call `resume-workflow` with the right vars in scope. The library provides no list-by-status query and no API for "given a workflow id, find the function and resume". For a multi-replica deployment, recovery is essentially a problem the user has to solve outside the library.
+
+Temporal solves this with task queues + workers that long-poll the server. Intemporal has neither concept.
+
+---
+
+## Section 2 — Deadlock & Lost-Workflow Scenarios
+
+### 2.1 Register-after-consume race  *(severity: high, even on a single pod)*
+
+`process-signal` (`src/intemporal/internal/execution.clj:223-255`) is described by the explore agent as:
+
+```
+1. consume-signal — if present, return
+2. otherwise register-signal-callback
+3. suspend
+```
+
+This is a classic TOCTOU. Between (1) returning nil and (2) writing the callback, another thread (or another pod) can call `add-signal`. The signal lands in the store; the callback fires nothing (it isn't registered yet); the registration completes after the signal write; the workflow suspends forever.
+
+The fix is the standard one: register the callback first, then check, then unregister + consume if a signal was already present.
+
+### 2.2 Signal sent to a workflow not yet started
+
+`send-signal` will happily write a signal for an unknown workflow id (in JDBC, line 124 inserts a workflow row via upsert). If the workflow is later started but the start path doesn't drain pre-existing signals before reaching `wait-for-signal`, the signal may or may not be picked up depending on ordering — worth a targeted test, since signals are addressed by name and the workflow expects FIFO semantics per name.
+
+### 2.3 Cancellation cannot reach a suspended workflow
+
+`cancel-workflow` (`src/intemporal/core.cljc:411-418`) sets a flag. Cancellation is **polled at sequence points** (per the explore agent: `check-cancelled!` before each operation, and at the top of the execution loop). A workflow that is suspended on `wait-for-signal` with no signal will:
+
+- never re-enter the execution loop on its own,
+- never poll the flag,
+- be invisible to cancellation.
+
+So `cancel-workflow` is **not reliable for any workflow that is currently waiting**. The flag is set in the DB but the workflow only sees it next time it wakes — which may be never (see 1.1).
+
+### 2.4 Long-lived `start-workflow` thread
+
+`start-workflow` blocks the calling thread until the workflow either completes or is interrupted (`src/intemporal/internal/fns/start_workflow.clj:67-75`). A workflow that waits 30 days for a signal holds the caller's thread for 30 days. This is incompatible with HTTP request/response in any normal web framework and leaks pod resources at scale. The "right" pattern (return a workflow id immediately; durable wake later) is precisely the pattern that doesn't exist (see 1.1, 1.3).
+
+### 2.5 `max-iterations` foot-gun
+
+Default 1000 replay iterations (`src/intemporal/internal/fns/start_workflow.clj:23`). A workflow with thousands of activities or a long signal-driven loop will silently fail at replay. No clear surface to detect this in production.
+
+### 2.6 In-flight activities at shutdown become "interrupted"
+
+Per the crash-test agent: activities crashed mid-execution are marked `:activity-interrupted` and re-run on resume. This is correct behaviour for at-least-once. **However**, combined with 1.1, the resume never happens automatically — so the activity neither completes nor restarts. Worth distinguishing in docs: "at-least-once if you remember to call resume-workflow".
+
+---
+
+## Section 3 — Other Criticism
+
+### 3.1 JDBC schema lacks the columns it needs
+
+Looking at `resources/migrations/postgres/20260215214002-initial-schema.up.sql` (referenced by the explore agent): three tables, no `status` column on `intemporal_workflows`, no `owner`, no `lease_expires_at`, no `last_heartbeat_at`, no index for "find running workflows". Status is derived by scanning `intemporal_history` and reading `last(event-type)` — O(history-length) per status query.
+
+`ON CONFLICT (workflow_id, seq) DO UPDATE` (line 100-103) is the wrong policy. Two correct writers should not be allowed to coexist; the conflict should be loud (`DO NOTHING` + check `affected`, or a `version` column with CAS).
+
+### 3.2 The IStore protocol is too thin
+
+For a multi-tenant durable orchestrator you'd expect at minimum:
+
+- `claim-workflow [store workflow-id worker-id lease-ttl]` → boolean
+- `renew-lease [store workflow-id worker-id]`
+- `release [store workflow-id worker-id]`
+- `list-runnable [store worker-id batch-size]` (signals arrived / timers due / leases expired)
+- Persistent timer rows (`{workflow_id, seq, fire_at}`)
+- Persistent "needs wake" markers
+
+None of these exist. Adding them is not a small patch; it touches the execution engine's assumption that wakes are local.
+
+### 3.3 Signal callbacks duplicated as in-process state across store impls
+
+Every store maintains its own callback atom (`InMemoryStore` via `:signal-callbacks` in the state map, `JdbcStore` via a separate `(atom {})` field). For the JDBC store this is conceptually wrong: the store is shared, but a process-local atom shadows it. A correct multi-pod implementation would use a notification mechanism the database already provides — Postgres `LISTEN/NOTIFY`, an explicit watch table, or an external pub/sub — and would remove `register-signal-callback` from `IStore` entirely (it isn't really a store concern).
+
+### 3.4 No separation between "orchestrator" and "worker"
+
+`IActivityExecutor` runs activities in the same process that runs the workflow. There is no way to dispatch activities to a separate worker pool (e.g., a "heavy I/O" replica set distinct from "orchestrator" replicas). Heavy activities consume the same thread budget that drives workflows.
+
+### 3.5 Recovery requires the caller to know the workflow function
+
+`resume-workflow` takes `workflow-fn` and `args`. The library has no registry that maps `workflow-id → function var`. Every pod that wants to recover must:
+
+1. Query the store for workflows in `:running` status (no such query exists).
+2. Look up the right function var (no such mapping exists).
+3. Recover the original args (they live in the `:workflow-started` event — accessible, but undocumented).
+
+In practice this means the user writes their own dispatch table and recovery loop. The library does not provide a working recovery story out of the box.
+
+### 3.6 Observer protocol is a good idea, slightly under-spec'd
+
+`IWorkflowObserver` (`src/intemporal/protocol.cljc:45-62`) is clean and gives the right hooks for tracing. Two gaps worth noting:
+
+- No `on-store-write` / `on-suspension-persisted` — useful for "did the durability write succeed before we acked the activity?"
+- No `on-replay-iteration` — useful for diagnosing slow replays.
+
+### 3.7 Documentation gap
+
+The README says "not production-ready" but doesn't enumerate **why**. A short "Operational Caveats" section listing 1.1, 1.2, 1.3 would prevent users from misjudging the library based on the Temporal-flavoured API surface.
+
+---
+
+## Section 4 — What a Multi-Pod-Safe Version Would Need
+
+Not a request to implement; a calibration of how far the library is from the goal.
+
+1. **Lease-based ownership.** Add `claim_workflow(worker_id, ttl)` + `renew` + `release`. Reject all writes from a worker whose lease has expired. Heartbeat from a background thread.
+2. **Persistent timers.** Add a `intemporal_timers (workflow_id, seq, fire_at, claimed_by, claimed_until)` table and a poller (`SELECT … WHERE fire_at <= now() AND claimed_until < now() FOR UPDATE SKIP LOCKED`).
+3. **Persistent wake markers.** When a signal arrives or a timer fires, write a row to `intemporal_runnable (workflow_id)`. Each pod polls this table (or `LISTEN`s on `NOTIFY`).
+4. **Durable workflow registry.** Map `workflow_id → workflow_function_symbol + args`. Store the symbol in the `:workflow-started` event; have every pod register the symbols it can resolve.
+5. **Reject concurrent writers.** Change `ON CONFLICT DO UPDATE` to `DO NOTHING` and fail the workflow run on conflict (lease violation).
+6. **Async `start-workflow`.** Return `{:workflow-id …}` immediately; let the worker loop pick up the new workflow from the runnable queue.
+7. **Fix the signal register-then-consume race** (2.1) — even single-process correctness depends on this.
+8. **Cancellation that wakes a sleeper.** Cancellation should write a runnable marker that forces the workflow to wake and observe the flag (currently it only sets the flag).
+
+Items 1–6 are essentially "build a real distributed workflow engine". Item 7 is a bug fix. Item 8 is a small targeted change.
+
+---
+
+## Section 5 — Pragmatic Recommendations (no code changes implied)
+
+For someone evaluating this library:
+
+- **Safe today**: single process, in-memory store, side-effects inside short-lived activities — i.e., as a structured way to write resumable in-memory orchestrations. Fine for tests, batch jobs, single-node tools.
+- **Risky**: any deployment with `>1` replica, even with the JDBC store. Will not lose data, but **will lose execution liveness** on every pod restart, and **will corrupt history** under concurrent retries.
+- **Don't**: rely on it as a Temporal replacement in k8s without writing significant infrastructure on top (leasing, polling, dispatch, signal fan-out).
+
+---
+
+## Verification (how to confirm the above claims yourself)
+
+Quick reproductions, each ~10–30 minutes:
+
+1. **Lost wake on signal across processes.** Start two REPLs with the same Postgres URL. REPL A: `start-workflow` a workflow that calls `wait-for-signal`. Kill REPL A (`System/exit`). REPL B: `send-signal` for that workflow id. Confirm the signal sits in `intemporal_signals` and nothing happens. Restart REPL A: the workflow only resumes if you explicitly call `resume-workflow`.
+2. **Concurrent start corrupts history.** Two REPLs call `start-workflow` with the same `:workflow-id` simultaneously. Inspect `intemporal_history` — observe duplicate `:workflow-started` rows or silently overwritten events at the same `seq`.
+3. **Lost timer.** Start a workflow that sleeps for 5 minutes. Kill the JVM within 30 seconds. Restart it without calling `resume-workflow`. Confirm the workflow never fires.
+4. **Cancellation cannot reach a sleeper.** Start a workflow that does `(wait-for-signal :go)` and immediately `cancel-workflow`. Observe the cancelled flag is set but the workflow never terminates (it never re-enters the loop to observe the flag).
+5. **Register-then-consume race.** A targeted test that interleaves `wait-for-signal` and `send-signal` on the same workflow id at the consume-then-register window. May require thread sleep instrumentation in `process-signal` to reproduce reliably.
+
+If any of these *don't* reproduce, the analysis is wrong on that point and the relevant section should be revised.
diff --git a/test/intemporal/jepsen/README.md b/test/intemporal/jepsen/README.md
new file mode 100644
index 0000000..8bb1f16
--- /dev/null
+++ b/test/intemporal/jepsen/README.md
@@ -0,0 +1,199 @@
+# Jepsen Chaos Test for `intemporal`
+
+## Context
+
+**What this tests.** `intemporal` is a Clojure workflow engine inspired by Temporal/Cadence.
+This chaos harness runs multiple worker JVMs against a shared Postgres store, injects
+SIGKILL/SIGTERM faults, and checks four correctness invariants after a quiesce period.
+
+**Why a chaos test.** The library's event-sourcing design gives strong single-process
+resilience, but several structural bugs make it unsafe under multi-process deployment
+(see `improvements.md`). Existing unit tests and crash tests cover the happy path;
+this harness exercises the failure path by combining real process kills with
+concurrent access to the same Postgres schema.
+
+**Scope.** Local-only / on-demand — not in CI. Run with `clojure -X:dev:jdbc:jepsen`.
+Each "node" is a forked JVM, not a Docker/SSH container. We use our own orchestrator
+rather than the `jepsen/jepsen` library (same rationale as the ablauf Jepsen tests:
+the safety properties are DB-mediated, not OS-mediated).
+
+---
+
+## Bugs under test
+
+| Bug | improvements.md ref | Description |
+|-----|---------------------|-------------|
+| 1.1 | §1.1 | No wake mechanism survives pod restart. Signal callbacks live in a process-local atom; a dead worker's callbacks are gone forever. |
+| 1.2 | §1.2 | No ownership / silent concurrent execution. `ON CONFLICT DO UPDATE` masks concurrent writes to `intemporal_history`. |
+| 1.3 | §1.3 | No recovery poller. Restarting a worker does not resume the workflows it was running. |
+| 2.1 | §2.1 | Register-then-consume signal race. Between the consume-check and register-callback call, a concurrent sender's signal is dropped. |
+| 2.3 | §2.3 | Cancellation cannot reach a sleeping workflow. The cancelled flag is set but never observed by a workflow blocked in `wait-for-signal`. |
+
+---
+
+## Architecture
+
+```
+             ┌─────────────────────────────────────┐
+             │   runner.clj (host JVM)              │
+             │   - generator  (submit/cancel/signal) │
+             │   - nemesis    (kill/restart/signal)  │
+             │   - checker    (4 invariants)         │
+             └──────────┬──────────────────────────┘
+                        │ writes jepsen_work_queue
+                        │ reads  intemporal_* tables
+        ┌───────────────┼─────────────────────┐
+        │               │                     │
+    ┌───▼───┐       ┌───▼───┐     ...    ┌───▼───┐
+    │worker0│       │worker1│            │workerN│
+    │JVM    │       │JVM    │            │JVM    │
+    └───┬───┘       └───┬───┘            └───┬───┘
+        └───────────────┴────────────────────┘
+                        ▼
+               Postgres (docker or local)
+               ┌──────────────────────────────┐
+               │ intemporal_workflows          │
+               │ intemporal_history            │
+               │ intemporal_signals            │
+               │ jepsen_work_queue             │ ← test coordination
+               │ jepsen_invocations            │ ← side-channel
+               │ jepsen_signals_sent           │
+               │ jepsen_cancels_sent           │
+               └──────────────────────────────┘
+```
+
+**Process model.** Each worker is a JVM forked by `ProcessBuilder` from the runner.
+`destroyForcibly()` (SIGKILL) skips the JVM shutdown hook, destroying the
+process-local `callbacks` atom in `JdbcStore` — reproducing bug 1.1.
+
+---
+
+## Files
+
+| File | Role |
+|------|------|
+| [runner.clj](runner.clj) | Orchestrator: phases 1–5, entry point |
+| [worker.clj](worker.clj) | Forked-JVM entry: engine, work-queue poll loop |
+| [db.clj](db.clj) | Subprocess registry: `fork!`, `kill!`, `alive?`, schema setup |
+| [client.clj](client.clj) | Test operations: submit, signal, cancel, observe, concurrent-start |
+| [nemesis.clj](nemesis.clj) | Fault injector: kill/restart workers, signal dead workflows |
+| [checker.clj](checker.clj) | Post-quiesce invariants (4 checkers) |
+| [workflows.clj](workflows.clj) | Workflow shapes W1–W4, side-channel activity |
+| [test/resources/migrations/jepsen/postgres/](../../../../resources/migrations/jepsen/postgres/) | Side-channel table migrations |
+
+---
+
+## Workflow shapes
+
+| Shape | Type | Bug probed |
+|-------|------|------------|
+| W1 `signal-wait-workflow` | Records `:before`, waits on signal `"go"`, records `:after` | **1.1** lost wake |
+| W2 `activity-chain-workflow` | Runs N activities in sequence | **1.3** no recovery poller |
+| W3 `cancel-sleep-workflow` | Records `:started`, waits on `"wake"` forever | **2.3** cancel can't reach sleeper |
+| W4 `rapid-signal-workflow` | Suspends immediately on `"immediate"` | **2.1** signal race |
+
+---
+
+## Checkers
+
+All checkers run after the quiesce phase. Each returns `{:valid? bool :violations [...] :stats {...}}`.
+
+**1. Liveness** (bugs 1.1, 1.3)
+Every submitted workflow must be in a terminal state (`workflow-completed`, `workflow-failed`).
+Workflows stuck as `:running` after quiesce + grace are violations.
+
+**2. Signal consumed** (bug 2.1)
+Every signal row written to `intemporal_signals` by the test must eventually be consumed.
+Orphaned rows after quiesce flag either the lost-callback (1.1) or the register-then-consume
+race (2.1) — the distinction is visible in the nemesis history (was the worker alive?).
+
+**3. History integrity** (bug 1.2)
+For workflows started via the `concurrent-start` op, `intemporal_history` must contain
+`seq=0` with `event_type = 'workflow-started'` only. If a concurrent writer's
+`ON CONFLICT DO UPDATE` clobbered it with a different event type, the violation is recorded.
+
+**4. Cancellation liveness** (bug 2.3)
+Workflows with `cancelled = TRUE` in `intemporal_workflows` must have a terminal last event.
+If the workflow is still `:running` (last event not `:workflow-completed/failed/cancelled`),
+the cancel flag was never observed — the workflow is stuck sleeping.
+
+---
+
+## Expected results with the current (unfixed) codebase
+
+| Checker | Expected result | Reason |
+|---------|-----------------|--------|
+| liveness | **FAIL** | Workers crash; no auto-resume; W1/W2 workflows stuck |
+| signal-consumed | **FAIL** (intermittent) | Signals sent to dead workers land in DB; callbacks gone |
+| history-integrity | **FAIL** (if concurrent-start runs) | `DO UPDATE` silently clobbers seq=0 |
+| cancellation-liveness | **FAIL** | `cancel-workflow` sets flag but never wakes sleeper |
+
+After the Phase A + B + C fixes from `improvements.md`, all four should **PASS**.
+
+**Smoke checks:**
+
+- Run with `:no-kill true` — all checkers should pass (no chaos, happy path).
+- Run normally — checkers should fail as documented above.
+- After implementing A1 (signal race fix) — checker 2 should pass.
+- After implementing A2 (cancellation wake) — checker 4 should pass.
+- After implementing Phase C (distributed primitives) — checkers 1 and 2 should pass.
+
+---
+
+## Side-channel
+
+`jepsen_invocations` records every activity invocation with `:begin`/`:end`/`:fail` phases
+using a **separate auto-commit Hikari pool** (`*side-ds*`), so rows survive a SIGKILL.
+`jepsen_signals_sent` and `jepsen_cancels_sent` track what the test issued, enabling the
+checker to cross-reference intent vs. outcome.
+
+Dynamic vars (`*side-ds*`, `*test-run*`, `*owner*`) are bound in `worker.clj` before
+calling `start-workflow` or `resume-workflow`, so activities can write to the side-channel
+without the workflow function carrying a non-serialisable connection reference.
+
+---
+
+## Running
+
+```bash
+# 1. Start Postgres (if not already running)
+docker run -d --name intemporal-pg \
+  -e POSTGRES_USER=root -e POSTGRES_PASSWORD=root \
+  -p 5432:5432 postgres:16
+
+# 2. Run the chaos test (4 workers, 120s active, 90s grace)
+clojure -X:dev:jdbc:jepsen intemporal.jepsen.runner/run \
+  :workers 4 :duration 120
+
+# 3. No-kill baseline (should pass all checkers)
+clojure -X:dev:jdbc:jepsen intemporal.jepsen.runner/run \
+  :workers 4 :duration 60 :no-kill true
+
+# 4. Aggressive run (more workers, faster kills)
+clojure -X:dev:jdbc:jepsen intemporal.jepsen.runner/run \
+  :workers 6 :duration 180 :nemesis-min-ms 1500 :nemesis-jitter-ms 3000 \
+  :min-alive 1 :grace-s 120
+```
+
+The JDBC URL defaults to `POSTGRES_JDBC_URI` env var or `localhost:5432/root`.
+
+---
+
+## Risks / limitations
+
+1. **No jepsen/jepsen library.** We implement our own orchestrator (same approach as
+   the ablauf Jepsen tests). The history format is compatible with jepsen.history for
+   future migration. Adding sshd/containers would mostly be a `db.clj` swap.
+
+2. **Worker classpath boot time.** Each `clojure -X:...` invocation takes 10–30s to
+   compile on first run due to AOT. Subsequent runs are faster if the dep cache is warm.
+   Increase `boot-timeout-ms` in `db/fork!` if workers time out during setup.
+
+3. **Bug 1.2 detection is approximate.** We inject a sentinel `event_type` from the
+   second concurrent writer. The real damage (last-writer-wins on seq=0) is masked by
+   `DO UPDATE` — a production incident would manifest as non-deterministic replay, not
+   a visible row. The checker catches the sentinel as a proxy for the real corruption.
+
+4. **Bug 2.1 is intermittent.** The register-then-consume race requires precise timing.
+   The `rapid-signal` workflow + 50ms signal loop creates high contention, but the race
+   window is narrow. Run multiple times or increase `:submit-rps` to improve hit rate.
diff --git a/test/intemporal/jepsen/checker.clj b/test/intemporal/jepsen/checker.clj
new file mode 100644
index 0000000..ff7a5a2
--- /dev/null
+++ b/test/intemporal/jepsen/checker.clj
@@ -0,0 +1,250 @@
+(ns intemporal.jepsen.checker
+  "Post-quiesce invariant checkers.  Each fn returns
+    {:valid? bool :violations [...] :stats {...}}.
+
+  All four checkers operate on DB state after the quiesce phase: the generator
+  has stopped, the nemesis is paused, all workers have been restarted (so each
+  one's startup ran), and a grace period has elapsed.
+
+  Checkers are mapped to specific bugs in improvements.md:
+
+    1. liveness               — bugs 1.1, 1.3: workflows never complete after
+                                the owning worker crashes
+    2. signal-consumed        — bug  2.1: register-then-consume race leaves
+                                orphaned signal rows
+    3. history-integrity      — bug  1.2: concurrent writers corrupt event log
+                                via ON CONFLICT DO UPDATE
+    4. cancellation-liveness  — bug  2.3: cancel-workflow can't wake a sleeper
+
+  Expected post-quiesce state for the CURRENT (buggy) codebase:
+    checker 1 (liveness)              -> FAIL  (workflows stuck without resume)
+    checker 2 (signal-consumed)       -> FAIL  (if race is hit; intermittent)
+    checker 3 (history-integrity)     -> FAIL  (if concurrent-start ran)
+    checker 4 (cancellation-liveness) -> FAIL  (cancelled sleepers never wake)"
+  (:require [next.jdbc :as jdbc]
+            [taoensso.telemere :as log]))
+
+(def ^:private jdbc-opts {:builder-fn next.jdbc.result-set/as-unqualified-maps})
+
+;; ---------------------------------------------------------------------------
+;; Helper: submitted workflow-ids from history
+
+(defn- submitted-ids
+  "Set of workflow-ids that the generator successfully submitted."
+  [history]
+  (->> @history
+       (filter #(and (= :submit (:f %)) (= :ok (:type %))))
+       (keep #(get-in % [:value :workflow-id]))
+       set))
+
+(defn- cancelled-ids
+  "Set of workflow-ids for which cancel ops succeeded."
+  [history]
+  (->> @history
+       (filter #(and (= :cancel (:f %)) (= :ok (:type %))))
+       (keep #(get-in % [:value :workflow-id]))
+       set))
+
+(defn- signalled-ids
+  "Set of workflow-ids for which a signal op succeeded."
+  [history]
+  (->> @history
+       (filter #(and (= :signal (:f %)) (= :ok (:type %))))
+       (keep #(get-in % [:value :workflow-id]))
+       set))
+
+(defn- concurrent-start-ids
+  "Set of workflow-ids from concurrent-start ops."
+  [history]
+  (->> @history
+       (filter #(and (= :concurrent-start (:f %)) (= :ok (:type %))))
+       (keep #(get-in % [:value :workflow-id]))
+       set))
+
+;; ---------------------------------------------------------------------------
+;; Checker 1: Liveness (bugs 1.1, 1.3)
+;;
+;; Every submitted workflow must reach a terminal state (:completed, :failed,
+;; :cancelled).  Workflows stuck in :running after the quiesce + grace period
+;; mean that no worker auto-resumed them after its crash.
+
+(defn liveness-checker
+  "1. Every submitted workflow is in a terminal state after quiesce."
+  [db-spec history]
+  (let [ids (submitted-ids history)]
+    (if (empty? ids)
+      {:valid? true :violations [] :stats {:submitted 0}}
+      (let [in-clause (clojure.string/join "," (repeat (count ids) "?"))
+            stuck (jdbc/execute! db-spec
+                    (into [(str "SELECT w.id,
+                                        w.cancelled,
+                                        h.event_type AS last_event
+                                 FROM intemporal_workflows w
+                                 LEFT JOIN LATERAL (
+                                   SELECT event_type
+                                   FROM intemporal_history
+                                   WHERE workflow_id = w.id
+                                   ORDER BY id DESC LIMIT 1
+                                 ) h ON TRUE
+                                 WHERE w.id IN (" in-clause ")
+                                   AND w.cancelled = FALSE
+                                   AND (h.event_type IS NULL
+                                        OR h.event_type NOT IN
+                                            ('workflow-completed','workflow-failed'))")]
+                          ids)
+                    jdbc-opts)]
+        {:valid?     (empty? stuck)
+         :violations (vec stuck)
+         :stats      {:submitted (count ids)
+                      :stuck     (count stuck)}}))))
+
+;; ---------------------------------------------------------------------------
+;; Checker 2: Signal consumed (bug 2.1)
+;;
+;; Every signal the test client wrote (via jepsen_signals_sent or the nemesis's
+;; signal-dead-workflows!) should eventually be consumed by the workflow.  An
+;; unconsumed row in intemporal_signals after quiesce + grace either means:
+;;   a) the owning worker died and its callback atom was empty (bug 1.1), or
+;;   b) the signal arrived between consume-check and register-callback (bug 2.1).
+;;
+;; This checker flags both; the distinction is visible in the nemesis history
+;; (was the worker alive when the signal was sent?).
+
+(defn signal-consumed-checker
+  "2. No orphaned signal rows remain after quiesce."
+  [db-spec test-run]
+  (let [orphans (jdbc/execute! db-spec
+                  ["SELECT s.workflow_id, s.signal_name
+                    FROM intemporal_signals s
+                    JOIN jepsen_signals_sent ss
+                      ON ss.workflow_id = s.workflow_id
+                     AND ss.signal_name  = s.signal_name
+                    WHERE ss.test_run = ?"
+                   test-run]
+                  jdbc-opts)
+        total-sent (or (:c (jdbc/execute-one! db-spec
+                             ["SELECT COUNT(*) AS c FROM jepsen_signals_sent WHERE test_run = ?"
+                              test-run]
+                             jdbc-opts))
+                       0)]
+    {:valid?     (empty? orphans)
+     :violations (vec orphans)
+     :stats      {:signals-sent     total-sent
+                  :orphaned-signals (count orphans)}}))
+
+;; ---------------------------------------------------------------------------
+;; Checker 3: History integrity (bug 1.2)
+;;
+;; Concurrent calls to start-workflow with the same workflow-id use
+;; ON CONFLICT (workflow_id, seq) DO UPDATE, silently overwriting events.
+;; Symptoms:
+;;   a) Multiple :workflow-started events at seq=0 (last writer wins silently).
+;;   b) Two workers produce different event_type at the same seq — detected by
+;;      comparing event_type vs the "canonical" value stored in the first write.
+;;
+;; We detect this by looking for workflows where seq 0 has a non-canonical
+;; event type, or where the history contains duplicate seq numbers that were
+;; overwritten (the DO UPDATE mask hides them, but if two writers raced and
+;; produced DIFFERENT event_types at the same seq, one version is lost).
+;;
+;; We approximate: for any workflow that had a concurrent-start op, check
+;; whether intemporal_history has a :workflow-started at seq=0.  If the
+;; second writer overwrote seq=0 with a different event_type (our sentinel
+;; "workflow-started-duplicate"), that row proves a race.
+
+(defn history-integrity-checker
+  "3. No concurrent-write corruption in intemporal_history."
+  [db-spec history]
+  (let [cs-ids (concurrent-start-ids history)]
+    (if (empty? cs-ids)
+      {:valid? true :violations [] :stats {:concurrent-start-workflows 0}}
+      (let [in-clause (clojure.string/join "," (repeat (count cs-ids) "?"))
+            ;; Look for evidence of the silent overwrite: seq=0 with the
+            ;; sentinel event_type means the second writer clobbered the first.
+            corrupted (jdbc/execute! db-spec
+                        (into [(str "SELECT workflow_id, event_type
+                                     FROM intemporal_history
+                                     WHERE workflow_id IN (" in-clause ")
+                                       AND seq = 0
+                                       AND event_type = 'workflow-started-duplicate'")]
+                              cs-ids)
+                        jdbc-opts)
+            ;; Also look for seq=0 that is NOT workflow-started (any other
+            ;; winner in the race is also corruption).
+            unexpected (jdbc/execute! db-spec
+                         (into [(str "SELECT workflow_id, event_type
+                                      FROM intemporal_history
+                                      WHERE workflow_id IN (" in-clause ")
+                                        AND seq = 0
+                                        AND event_type <> 'workflow-started'")]
+                               cs-ids)
+                         jdbc-opts)]
+        {:valid?     (and (empty? corrupted) (empty? unexpected))
+         :violations {:overwritten-by-duplicate (vec corrupted)
+                      :unexpected-seq0           (vec unexpected)}
+         :stats      {:concurrent-start-workflows (count cs-ids)
+                      :corrupted (+ (count corrupted) (count unexpected))}}))))
+
+;; ---------------------------------------------------------------------------
+;; Checker 4: Cancellation liveness (bug 2.3)
+;;
+;; After cancel-workflow is called on a workflow that is blocked in
+;; wait-for-signal, the cancelled flag is set in intemporal_workflows but the
+;; workflow never observes it (no re-entry to the execution loop).  The checker
+;; looks for workflows where:
+;;   - cancelled = TRUE in intemporal_workflows
+;;   - The last history event is NOT workflow-completed / workflow-failed /
+;;     workflow-cancelled
+;; These are workflows that are "cancelled on paper" but still stuck.
+
+(defn cancellation-liveness-checker
+  "4. All cancelled workflows have reached a terminal state."
+  [db-spec history]
+  (let [c-ids (cancelled-ids history)]
+    (if (empty? c-ids)
+      {:valid? true :violations [] :stats {:cancelled-submitted 0}}
+      (let [in-clause (clojure.string/join "," (repeat (count c-ids) "?"))
+            stuck (jdbc/execute! db-spec
+                    (into [(str "SELECT w.id,
+                                        h.event_type AS last_event
+                                 FROM intemporal_workflows w
+                                 LEFT JOIN LATERAL (
+                                   SELECT event_type
+                                   FROM intemporal_history
+                                   WHERE workflow_id = w.id
+                                   ORDER BY id DESC LIMIT 1
+                                 ) h ON TRUE
+                                 WHERE w.id IN (" in-clause ")
+                                   AND w.cancelled = TRUE
+                                   AND (h.event_type IS NULL
+                                        OR h.event_type NOT IN
+                                            ('workflow-completed',
+                                             'workflow-failed',
+                                             'workflow-cancelled'))")]
+                          c-ids)
+                    jdbc-opts)]
+        {:valid?     (empty? stuck)
+         :violations (vec stuck)
+         :stats      {:cancelled-submitted (count c-ids)
+                      :stuck               (count stuck)}}))))
+
+;; ---------------------------------------------------------------------------
+;; Compose
+
+(defn check-all
+  "Runs all four checkers and returns a composed result."
+  [{:keys [db-spec history test-run]}]
+  (log/log! :info "[checker] running post-quiesce invariants")
+  (let [c1 (liveness-checker            db-spec history)
+        c2 (signal-consumed-checker     db-spec test-run)
+        c3 (history-integrity-checker   db-spec history)
+        c4 (cancellation-liveness-checker db-spec history)
+        valid? (every? :valid? [c1 c2 c3 c4])]
+    (when-not valid?
+      (log/log! :warn "[checker] INVARIANT VIOLATION(S) DETECTED"))
+    {:valid? valid?
+     :checkers
+     {:liveness               c1
+      :signal-consumed        c2
+      :history-integrity      c3
+      :cancellation-liveness  c4}}))
diff --git a/test/intemporal/jepsen/client.clj b/test/intemporal/jepsen/client.clj
new file mode 100644
index 0000000..9d5d2d9
--- /dev/null
+++ b/test/intemporal/jepsen/client.clj
@@ -0,0 +1,165 @@
+(ns intemporal.jepsen.client
+  "Operations issued by the test orchestrator against the shared store.
+
+  All operations talk directly to Postgres via next.jdbc (no HTTP layer).
+  Workers pick up submitted workflows by polling jepsen_work_queue.
+
+  Op types:
+    :submit          — inserts a workflow spec into jepsen_work_queue
+    :signal          — calls add-signal directly on the JDBC store
+    :cancel          — calls mark-cancelled directly on the JDBC store
+    :observe         — reads workflow status from intemporal_workflows + history
+    :concurrent-start — inserts the same workflow-id twice (different wf types
+                        accepted by different workers) to trigger bug 1.2
+
+  History entries are plain EDN maps compatible with jepsen.history format:
+    {:process <int> :type (:ok|:fail|:info) :f <op> :value {...} :time <ms>}"
+  (:require [intemporal.protocol :as p]
+            [intemporal.store.jdbc :as jdbc-store]
+            [next.jdbc :as jdbc]
+            [taoensso.telemere :as log]))
+
+(defn now-ms [] (System/currentTimeMillis))
+
+(defn record-op!
+  "Appends an op to the atom-wrapped history vector."
+  [history op]
+  (swap! history conj (assoc op :time (now-ms)))
+  op)
+
+;; ---------------------------------------------------------------------------
+;; Helpers
+
+(defn- wf-status
+  "Reads workflow status directly from the DB without going through the store
+  object (avoids creating a new engine just to read status)."
+  [db-spec workflow-id]
+  (let [wf (jdbc/execute-one! db-spec
+              ["SELECT cancelled FROM intemporal_workflows WHERE id = ?"
+               workflow-id])
+        last-evt (jdbc/execute-one! db-spec
+                   ["SELECT event_type FROM intemporal_history
+                     WHERE workflow_id = ?
+                     ORDER BY id DESC LIMIT 1"
+                    workflow-id])]
+    (cond
+      (nil? wf)                                    :not-found
+      (:intemporal_workflows/cancelled wf)          :cancelled
+      (nil? last-evt)                               :not-found
+      (= "workflow-completed"
+         (:intemporal_history/event_type last-evt)) :completed
+      (= "workflow-failed"
+         (:intemporal_history/event_type last-evt)) :failed
+      :else                                         :running)))
+
+;; ---------------------------------------------------------------------------
+;; Client operations
+
+(defn invoke-submit
+  "Picks a random workflow type, inserts it into jepsen_work_queue, and
+  returns {:type :ok :value {:workflow-id ... :wf-type ...}}."
+  [db-spec test-run]
+  (let [wf-type   (rand-nth [:signal-wait :activity-chain :cancel-sleep :rapid-signal])
+        wf-id     (str (random-uuid))
+        nonce     (str (random-uuid))
+        steps-arg (when (= wf-type :activity-chain) {:steps 5})]
+    (try
+      (jdbc/execute! db-spec
+        ["INSERT INTO jepsen_work_queue
+            (test_run, workflow_id, wf_type, nonce, args)
+          VALUES (?,?,?,?,?::jsonb)"
+         test-run wf-id (name wf-type) nonce
+         (if steps-arg (pr-str steps-arg) "{}")])
+      {:type :ok :value {:workflow-id wf-id :wf-type wf-type :nonce nonce}}
+      (catch Throwable t
+        {:type :fail :error (str t)}))))
+
+(defn invoke-signal
+  "Writes a signal directly to the store.  Does NOT go through a worker — this
+  models a separate process (e.g. an HTTP endpoint) calling send-signal.
+
+  When the owning worker is alive, its callback atom fires and the workflow
+  wakes.  When the worker is dead, the signal row persists in intemporal_signals
+  but no callback fires (bug 1.1)."
+  [db-spec test-run workflow-id signal-name]
+  (try
+    (jdbc/execute! db-spec
+      ["INSERT INTO intemporal_signals (workflow_id, signal_name, payload)
+        VALUES (?,?,'{}'::jsonb)"
+       workflow-id signal-name])
+    (jdbc/execute! db-spec
+      ["INSERT INTO jepsen_signals_sent (test_run, workflow_id, signal_name)
+        VALUES (?,?,?)"
+       test-run workflow-id signal-name])
+    {:type :ok :value {:workflow-id workflow-id :signal signal-name}}
+    (catch Throwable t
+      {:type :fail :error (str t)})))
+
+(defn invoke-cancel
+  "Sets the cancelled flag on the workflow.  If the workflow is sleeping on
+  wait-for-signal the flag will be set but the workflow will never observe it
+  (bug 2.3)."
+  [db-spec test-run workflow-id]
+  (try
+    (jdbc/execute! db-spec
+      ["INSERT INTO intemporal_workflows (id, cancelled) VALUES (?,TRUE)
+        ON CONFLICT (id) DO UPDATE SET cancelled = TRUE"
+       workflow-id])
+    (jdbc/execute! db-spec
+      ["INSERT INTO jepsen_cancels_sent (test_run, workflow_id) VALUES (?,?)"
+       test-run workflow-id])
+    {:type :ok :value {:workflow-id workflow-id}}
+    (catch Throwable t
+      {:type :fail :error (str t)})))
+
+(defn invoke-observe
+  "Reads the workflow status for reporting in the history."
+  [db-spec workflow-id]
+  (try
+    (let [status (wf-status db-spec workflow-id)]
+      {:type :ok :value {:workflow-id workflow-id :status status}})
+    (catch Throwable t
+      {:type :fail :error (str t)})))
+
+(defn invoke-concurrent-start
+  "Inserts the same workflow-id into the queue TWICE so that two workers race
+  to run it concurrently.  The UNIQUE constraint on workflow_id in the queue
+  prevents a second claim via the normal path, so we bypass the queue and
+  directly write to intemporal_history from two threads to reproduce bug 1.2.
+
+  Returns a map of {:workflow-id ... :threads-launched 2}."
+  [db-spec test-run]
+  (let [wf-id  (str (random-uuid))
+        nonce  (str (random-uuid))
+        result (promise)
+        write! (fn [seq-num event-type]
+                 (try
+                   (jdbc/with-transaction [tx db-spec]
+                     (jdbc/execute! tx
+                       ["INSERT INTO intemporal_workflows (id) VALUES (?)
+                         ON CONFLICT (id) DO NOTHING"
+                        wf-id])
+                     (jdbc/execute! tx
+                       ["INSERT INTO intemporal_history
+                           (workflow_id, seq, event_type, data)
+                         VALUES (?,?,?,'{}'::jsonb)
+                         ON CONFLICT (workflow_id, seq) DO UPDATE
+                           SET event_type = EXCLUDED.event_type,
+                               data = EXCLUDED.data"
+                        wf-id seq-num event-type]))
+                   :ok
+                   (catch Throwable t (str "error: " t))))
+        ;; Fire two threads simultaneously.
+        t1 (Thread/startVirtualThread
+             (fn [] (deliver result (write! 0 "workflow-started"))))
+        t2 (Thread/startVirtualThread
+             (fn [] (write! 0 "workflow-started-duplicate")))]
+    (.join ^Thread t1 5000)
+    (.join ^Thread t2 5000)
+    (jdbc/execute! db-spec
+      ["INSERT INTO jepsen_work_queue
+          (test_run, workflow_id, wf_type, nonce, args, completed)
+        VALUES (?,?,?,?,'{}'::jsonb, TRUE)"
+       test-run wf-id "concurrent-start" nonce])
+    {:type :ok
+     :value {:workflow-id wf-id :nonce nonce :threads-launched 2}}))
diff --git a/test/intemporal/jepsen/db.clj b/test/intemporal/jepsen/db.clj
new file mode 100644
index 0000000..07d3ce7
--- /dev/null
+++ b/test/intemporal/jepsen/db.clj
@@ -0,0 +1,130 @@
+(ns intemporal.jepsen.db
+  "Subprocess lifecycle for forked worker JVMs.
+
+  Each 'node' (owner-id) maps to a forked Process whose classpath is set by
+  `-X:dev:jdbc:jepsen-worker`.  We use ProcessBuilder for real SIGKILL
+  semantics: destroyForcibly() skips the JVM shutdown hook, exactly modelling
+  a hard crash.  This destroys the process-local signal-callback atom,
+  reproducing bug 1.1.
+
+  Process model deviation: we don't use SSH/sshd containers (local-only).
+  The jepsen library is not required here; we implement our own lightweight
+  orchestrator."
+  (:require [clojure.java.io :as io]
+            [next.jdbc :as jdbc]
+            [migratus.core :as migratus]
+            [taoensso.telemere :as log])
+  (:import [java.io BufferedReader InputStreamReader]
+           [java.util.concurrent TimeUnit]))
+
+(def ^:private registry (atom {}))
+
+;; ---------------------------------------------------------------------------
+;; I/O pumps
+
+(defn- pump-stdout
+  "Forwards child stdout to logger line-by-line.  Delivers :ready on the
+  ready-promise the first time 'READY <owner>' appears."
+  [^Process p owner ready-prom]
+  (Thread/startVirtualThread
+    (fn []
+      (with-open [r (BufferedReader. (InputStreamReader. (.getInputStream p)))]
+        (loop []
+          (when-let [line (.readLine r)]
+            (log/log! :info (str "[worker:" owner "] " line))
+            (when (and (not (realized? ready-prom))
+                       (.startsWith ^String line (str "READY " owner)))
+              (deliver ready-prom :ready))
+            (recur)))))))
+
+(defn- pump-stderr [^Process p owner]
+  (Thread/startVirtualThread
+    (fn []
+      (with-open [r (BufferedReader. (InputStreamReader. (.getErrorStream p)))]
+        (loop []
+          (when-let [line (.readLine r)]
+            (log/log! :warn (str "[worker:" owner "/err] " line))
+            (recur)))))))
+
+;; ---------------------------------------------------------------------------
+;; Lifecycle
+
+(defn alive?
+  "True iff a worker process is registered and alive."
+  [owner]
+  (boolean (some-> @registry (get owner) :process (.isAlive))))
+
+(defn fork!
+  "Forks a worker JVM via `clojure -X:dev:jdbc:jepsen-worker`.
+  Blocks up to boot-timeout-ms waiting for the READY handshake on stdout."
+  [{:keys [owner db-url test-run boot-timeout-ms repo-root]
+    :or   {boot-timeout-ms 90000 repo-root "."}}]
+  (when (alive? owner)
+    (throw (ex-info "Worker already alive for this owner" {:owner owner})))
+  (let [args ["clojure" "-X:dev:jdbc:jepsen-worker"
+              "intemporal.jepsen.worker/run"
+              ":owner"    (pr-str owner)
+              ":db-url"   (pr-str db-url)
+              ":test-run" (pr-str test-run)]
+        pb   (doto (ProcessBuilder. ^java.util.List args)
+               (.directory (io/file repo-root))
+               (.redirectErrorStream false))
+        proc (.start pb)
+        ready (promise)]
+    (pump-stdout proc owner ready)
+    (pump-stderr proc owner)
+    (let [v (deref ready boot-timeout-ms ::timeout)]
+      (when (= v ::timeout)
+        (.destroyForcibly proc)
+        (throw (ex-info "Worker boot timed out"
+                        {:owner owner :timeout-ms boot-timeout-ms}))))
+    (let [entry {:process proc :owner owner}]
+      (swap! registry assoc owner entry)
+      (log/log! :info (str "Forked worker " owner " pid=" (.pid proc)))
+      entry)))
+
+(defn kill!
+  "Sends a signal to the worker.
+  :sigkill -> destroyForcibly (no shutdown hook, models hard crash)
+  :sigterm -> destroy (shutdown hook fires, models graceful stop)"
+  [owner signal]
+  (when-let [{:keys [^Process process]} (get @registry owner)]
+    (case signal
+      :sigterm (.destroy process)
+      :sigkill (.destroyForcibly process))
+    (.waitFor process 30 TimeUnit/SECONDS)
+    (swap! registry dissoc owner)
+    (log/log! :info (str "Killed worker " owner " with " (name signal)
+                         " exit=" (try (.exitValue process) (catch Exception _ "?"))))))
+
+(defn kill-all! []
+  (doseq [owner (keys @registry)]
+    (try (kill! owner :sigkill)
+         (catch Throwable t
+           (log/log! :warn (str "kill-all failed for " owner ": " t))))))
+
+;; ---------------------------------------------------------------------------
+;; Schema setup
+
+(defn migrate-all!
+  "Runs intemporal migrations and Jepsen side-channel migrations against
+  the given db-spec."
+  [db-spec]
+  (doseq [[dir table] [["migrations/postgres"        "migrations"]
+                       ["migrations/jepsen/postgres"  "jepsen_migrations"]]]
+    (migratus/migrate {:store                :database
+                       :migration-dir        dir
+                       :migration-table-name table
+                       :db                   db-spec})))
+
+(defn truncate-all!
+  "Clears all intemporal and Jepsen tables between runs."
+  [db-spec]
+  (doseq [table ["jepsen_cancels_sent"
+                 "jepsen_signals_sent"
+                 "jepsen_invocations"
+                 "jepsen_work_queue"
+                 "intemporal_signals"
+                 "intemporal_history"
+                 "intemporal_workflows"]]
+    (jdbc/execute! db-spec [(str "DELETE FROM " table)])))
diff --git a/test/intemporal/jepsen/nemesis.clj b/test/intemporal/jepsen/nemesis.clj
new file mode 100644
index 0000000..1bb1d73
--- /dev/null
+++ b/test/intemporal/jepsen/nemesis.clj
@@ -0,0 +1,138 @@
+(ns intemporal.jepsen.nemesis
+  "Fault injector.  Periodically picks a random worker and kills it (SIGKILL or
+  SIGTERM), then later restarts it.  Maintains a min-alive floor so at least N
+  workers can make progress.
+
+  Also provides 'signal-dead-workers': sends signals to all W1/W3 workflows
+  whose owner is currently dead.  This is the primary way to trigger bug 1.1:
+  the signal row lands in intemporal_signals but no callback fires because the
+  owning worker is gone."
+  (:require [intemporal.jepsen.db :as db]
+            [next.jdbc :as jdbc]
+            [taoensso.telemere :as log]))
+
+(defn- pick-victim
+  "Returns a random alive owner-id to kill, respecting min-alive."
+  [owners min-alive]
+  (let [alive (filter db/alive? owners)]
+    (when (> (count alive) min-alive)
+      (rand-nth alive))))
+
+(defn- pick-dead
+  "Returns a random dead owner-id to revive."
+  [owners]
+  (let [dead (remove db/alive? owners)]
+    (when (seq dead) (rand-nth dead))))
+
+;; ---------------------------------------------------------------------------
+;; Per-tick fault
+
+(defn step!
+  "One nemesis tick.  Chooses an action:
+    50%  SIGKILL a random alive worker
+    25%  SIGTERM a random alive worker
+    25%  Start a random dead worker
+
+  Records the op in `history`."
+  [{:keys [owners history db-url test-run repo-root min-alive]
+    :or   {min-alive 2}}]
+  (let [r (rand)]
+    (cond
+      (< r 0.50)
+      (if-let [victim (pick-victim owners min-alive)]
+        (do
+          (log/log! :info (str "[nemesis] SIGKILL " victim))
+          (db/kill! victim :sigkill)
+          (swap! history conj {:process :nemesis :type :info
+                               :f :kill-9 :value victim
+                               :time (System/currentTimeMillis)}))
+        (swap! history conj {:process :nemesis :type :info
+                             :f :noop :value :min-alive-floor
+                             :time (System/currentTimeMillis)}))
+
+      (< r 0.75)
+      (if-let [victim (pick-victim owners min-alive)]
+        (do
+          (log/log! :info (str "[nemesis] SIGTERM " victim))
+          (db/kill! victim :sigterm)
+          (swap! history conj {:process :nemesis :type :info
+                               :f :kill-15 :value victim
+                               :time (System/currentTimeMillis)}))
+        (swap! history conj {:process :nemesis :type :info
+                             :f :noop :value :min-alive-floor
+                             :time (System/currentTimeMillis)}))
+
+      :else
+      (if-let [revive (pick-dead owners)]
+        (do
+          (log/log! :info (str "[nemesis] restart " revive))
+          (db/fork! {:owner revive :db-url db-url :test-run test-run
+                     :repo-root repo-root})
+          (swap! history conj {:process :nemesis :type :info
+                               :f :start :value revive
+                               :time (System/currentTimeMillis)}))
+        (swap! history conj {:process :nemesis :type :info
+                             :f :noop :value :all-alive
+                             :time (System/currentTimeMillis)})))))
+
+;; ---------------------------------------------------------------------------
+;; Signal-while-dead: exercises bug 1.1.
+
+(defn signal-dead-workflows!
+  "Finds W1 (signal-wait) and W3 (cancel-sleep) workflows whose owning worker
+  is currently dead, then sends the expected signal to each.  The signal row
+  lands in intemporal_signals but no callback fires — the workflow is stuck.
+
+  Records each signal in jepsen_signals_sent for the checker."
+  [{:keys [db-spec test-run owners history]}]
+  (let [dead-owners (->> owners (remove db/alive?) set)
+        ;; Find claimed-but-not-completed W1/W3 workflows owned by dead workers.
+        rows (when (seq dead-owners)
+               (jdbc/execute! db-spec
+                 (into [(str "SELECT workflow_id, wf_type
+                              FROM jepsen_work_queue
+                              WHERE test_run = ?
+                                AND completed = FALSE
+                                AND wf_type IN ('signal-wait','cancel-sleep','rapid-signal')
+                                AND claimed_by IN ("
+                             (clojure.string/join "," (repeat (count dead-owners) "?"))
+                             ")")
+                         test-run]
+                       dead-owners)))]
+    (doseq [{:jepsen_work_queue/keys [workflow_id wf_type]} rows]
+      (let [signal-name (case wf_type
+                          "signal-wait"  "go"
+                          "cancel-sleep" "wake"
+                          "rapid-signal" "immediate"
+                          nil)]
+        (when signal-name
+          (try
+            (jdbc/execute! db-spec
+              ["INSERT INTO intemporal_signals (workflow_id, signal_name, payload)
+                VALUES (?,?,'{}'::jsonb)"
+               workflow_id signal-name])
+            (jdbc/execute! db-spec
+              ["INSERT INTO jepsen_signals_sent (test_run, workflow_id, signal_name)
+                VALUES (?,?,?)"
+               test-run workflow_id signal-name])
+            (swap! history conj {:process :nemesis :type :info
+                                 :f :signal-dead :value {:workflow-id workflow_id
+                                                         :signal signal-name}
+                                 :time (System/currentTimeMillis)})
+            (log/log! :info (str "[nemesis] signalled dead workflow "
+                                  workflow_id " signal=" signal-name))
+            (catch Throwable t
+              (log/log! :warn (str "[nemesis] signal-dead-workflows! error: " t)))))))))
+
+;; ---------------------------------------------------------------------------
+;; Quiesce helper
+
+(defn ensure-all-alive!
+  "Revives all dead workers during the quiesce phase.  Since intemporal has no
+  auto-resume on restart (bug 1.3), restarting workers here does NOT cause
+  stuck workflows to complete — it only proves that point."
+  [{:keys [owners db-url test-run repo-root]}]
+  (doseq [owner owners
+          :when (not (db/alive? owner))]
+    (log/log! :info (str "[quiesce] reviving " owner))
+    (db/fork! {:owner owner :db-url db-url :test-run test-run :repo-root repo-root})))
diff --git a/test/intemporal/jepsen/runner.clj b/test/intemporal/jepsen/runner.clj
new file mode 100644
index 0000000..c66132c
--- /dev/null
+++ b/test/intemporal/jepsen/runner.clj
@@ -0,0 +1,251 @@
+(ns intemporal.jepsen.runner
+  "Top-level orchestrator for the intemporal chaos test.
+
+  Phases:
+    1. setup    — migrate schema, truncate state, fork N worker JVMs
+    2. active   — generator submits/cancels/signals workflows;
+                  nemesis kills & restarts workers;
+                  nemesis also fires signals at dead workers (bug 1.1 probe)
+    3. quiesce  — nemesis stops; all workers restarted (proves bug 1.3: no
+                  auto-resume on restart); grace period elapses
+    4. check    — run all four invariant checkers against final DB state
+    5. teardown — kill all workers
+
+  Run:
+    clojure -X:dev:jdbc:jepsen intemporal.jepsen.runner/run \\
+      :workers 4 :duration 120
+
+  No-kill baseline (useful to confirm happy-path correctness):
+    clojure -X:dev:jdbc:jepsen intemporal.jepsen.runner/run \\
+      :workers 4 :duration 60 :no-kill true
+
+  Expected outcome with the current (unfixed) codebase:
+    checker liveness              -> FAIL  (bug 1.1 / 1.3)
+    checker signal-consumed       -> FAIL  (bug 2.1, intermittent)
+    checker history-integrity     -> FAIL  (bug 1.2, if concurrent-start runs)
+    checker cancellation-liveness -> FAIL  (bug 2.3)
+
+  After the Phase A + B + C fixes from improvements.md, all four should PASS."
+  (:require [intemporal.jepsen.db      :as db]
+            [intemporal.jepsen.client  :as client]
+            [intemporal.jepsen.nemesis :as nemesis]
+            [intemporal.jepsen.checker :as checker]
+            [next.jdbc :as jdbc]
+            [clojure.pprint :as pp]
+            [taoensso.telemere :as log])
+  (:import [java.util.concurrent Executors TimeUnit]))
+
+;; ---------------------------------------------------------------------------
+;; Defaults
+
+(def ^:private default-db-url
+  (or (System/getenv "POSTGRES_JDBC_URI")
+      "jdbc:postgresql://localhost:5432/root?user=root&password=root"))
+
+(defn- jdbc-spec [db-url]
+  {:dbtype "postgresql" :connection-uri db-url :jdbcUrl db-url})
+
+;; ---------------------------------------------------------------------------
+;; Generator
+
+(defn- start-generator!
+  "Launches 3 submit threads + 1 cancel thread + 1 observe thread.
+  Returns a 0-arity stop fn."
+  [{:keys [db-spec history test-run submit-rps]
+    :or   {submit-rps 5}}]
+  (let [pool          (Executors/newFixedThreadPool 5)
+        running?      (atom true)
+        submit-period (long (/ 1000 (max 1 submit-rps)))]
+
+    ;; 3 submit threads
+    (dotimes [i 3]
+      (.submit pool ^Runnable
+        (fn []
+          (while @running?
+            (try
+              (let [op (client/invoke-submit db-spec test-run)]
+                (client/record-op! history (assoc op :process i :f :submit)))
+              (catch Throwable t (log/log! :warn (str "submit failed: " t))))
+            (Thread/sleep submit-period)))))
+
+    ;; 1 cancel thread — cancels a recently-submitted workflow every ~3s
+    (.submit pool ^Runnable
+      (fn []
+        (while @running?
+          (try
+            (let [candidates (->> @history
+                                  (filter #(and (= :submit (:f %))
+                                                (= :ok (:type %))
+                                                (= :cancel-sleep
+                                                   (get-in % [:value :wf-type]))))
+                                  (keep #(get-in % [:value :workflow-id]))
+                                  seq)]
+              (when candidates
+                (let [wf-id (rand-nth candidates)
+                      op    (client/invoke-cancel db-spec test-run wf-id)]
+                  (client/record-op! history (assoc op :process 98 :f :cancel)))))
+            (catch Throwable t (log/log! :warn (str "cancel failed: " t))))
+          (Thread/sleep 3000))))
+
+    ;; 1 rapid-signal thread — immediately signals rapid-signal workflows
+    (.submit pool ^Runnable
+      (fn []
+        (while @running?
+          (try
+            (let [candidates (->> @history
+                                  (filter #(and (= :submit (:f %))
+                                                (= :ok (:type %))
+                                                (= :rapid-signal
+                                                   (get-in % [:value :wf-type]))))
+                                  (keep #(get-in % [:value :workflow-id]))
+                                  seq)]
+              (when candidates
+                (let [wf-id (rand-nth candidates)
+                      op    (client/invoke-signal db-spec test-run wf-id "immediate")]
+                  (client/record-op! history (assoc op :process 97 :f :signal)))))
+            (catch Throwable t (log/log! :warn (str "rapid-signal failed: " t))))
+          ;; Very short sleep to maximise chance of hitting the race window.
+          (Thread/sleep 50))))
+
+    (fn stop-gen []
+      (reset! running? false)
+      (.shutdown pool)
+      (.awaitTermination pool 10 TimeUnit/SECONDS)
+      (.shutdownNow pool))))
+
+;; ---------------------------------------------------------------------------
+;; Nemesis loop
+
+(defn- start-nemesis!
+  [{:keys [owners history db-url db-spec test-run repo-root no-kill?
+           nemesis-min-ms nemesis-jitter-ms min-alive]
+    :or   {nemesis-min-ms 3000 nemesis-jitter-ms 6000 min-alive 2}}]
+  (let [running? (atom (not no-kill?))
+        thread   (Thread/startVirtualThread
+                   (fn []
+                     (while @running?
+                       (try
+                         (Thread/sleep (long (+ nemesis-min-ms
+                                                (rand-int nemesis-jitter-ms))))
+                         (when @running?
+                           ;; Occasionally inject a concurrent-start to probe bug 1.2.
+                           (when (< (rand) 0.15)
+                             (let [op (client/invoke-concurrent-start db-spec test-run)]
+                               (client/record-op! history
+                                 (assoc op :process :nemesis :f :concurrent-start))))
+                           ;; Main kill/restart fault.
+                           (nemesis/step! {:owners       owners
+                                           :history      history
+                                           :db-url       db-url
+                                           :test-run     test-run
+                                           :repo-root    repo-root
+                                           :min-alive    min-alive})
+                           ;; After any kill, signal the dead workflows (bug 1.1 probe).
+                           (nemesis/signal-dead-workflows! {:db-spec  db-spec
+                                                            :test-run test-run
+                                                            :owners   owners
+                                                            :history  history}))
+                         (catch InterruptedException _ (reset! running? false))
+                         (catch Throwable t
+                           (log/log! :warn (str "nemesis step error: " t)))))))]
+    (fn stop-nem []
+      (reset! running? false)
+      (.join thread 5000))))
+
+;; ---------------------------------------------------------------------------
+;; Entry point
+
+(defn run
+  "Top-level entry.  Options (all optional):
+    :workers           number of forked worker JVMs (default 4)
+    :duration          active phase length in seconds (default 120)
+    :db-url            JDBC url (default from POSTGRES_JDBC_URI or localhost)
+    :no-kill           disable nemesis (baseline mode)
+    :submit-rps        submit rate per thread (3 threads, default 5 → 15 RPS)
+    :nemesis-min-ms    minimum gap between nemesis ticks (default 3000)
+    :nemesis-jitter-ms random extra gap per tick (default 6000)
+    :min-alive         floor on simultaneously-alive workers (default 2)
+    :grace-s           quiesce drain time before checks (default 90)
+    :repo-root         working directory for forked workers (default \".\")"
+  [{:keys [workers duration db-url no-kill repo-root submit-rps
+           nemesis-min-ms nemesis-jitter-ms min-alive grace-s]
+    :or   {workers 4 duration 120 db-url default-db-url
+           repo-root "." submit-rps 5
+           nemesis-min-ms 3000 nemesis-jitter-ms 6000
+           min-alive 2 grace-s 90}}]
+  (let [test-run (str "run-" (System/currentTimeMillis))
+        owners   (mapv #(format "jepsen-%02d-%s" % test-run) (range workers))
+        db-spec  (jdbc-spec db-url)
+        history  (atom [])]
+
+    (println "\n=== intemporal Jepsen run" test-run "===")
+    (println (format "workers=%d  duration=%ds  no-kill=%s  grace=%ds"
+                     workers duration (boolean no-kill) grace-s))
+
+    ;; --- 1. setup ---
+    (println "[setup] migrating + truncating")
+    (db/migrate-all! db-spec)
+    (db/truncate-all! db-spec)
+    (println "[setup] forking workers")
+    (doseq [owner owners]
+      (db/fork! {:owner owner :db-url db-url :test-run test-run
+                 :repo-root repo-root}))
+
+    (try
+      ;; --- 2. active phase ---
+      (println (format "[active] running %ds with chaos=%s" duration (not no-kill)))
+      (let [stop-gen (start-generator! {:db-spec    db-spec
+                                        :history    history
+                                        :test-run   test-run
+                                        :submit-rps submit-rps})
+            stop-nem (start-nemesis!   {:owners          owners
+                                        :history         history
+                                        :db-url          db-url
+                                        :db-spec         db-spec
+                                        :test-run        test-run
+                                        :repo-root       repo-root
+                                        :no-kill?        no-kill
+                                        :nemesis-min-ms  nemesis-min-ms
+                                        :nemesis-jitter-ms nemesis-jitter-ms
+                                        :min-alive       min-alive})]
+        (Thread/sleep (* 1000 duration))
+        (println "[active->quiesce] stopping generator and nemesis")
+        (stop-gen)
+        (stop-nem))
+
+      ;; --- 3. quiesce ---
+      ;; Restart every worker.  This proves bug 1.3: restarting does NOT
+      ;; auto-resume workflows — no recovery poller exists.
+      (println "[quiesce] restarting all workers (proves no auto-resume on restart)")
+      (nemesis/ensure-all-alive! {:owners owners :db-url db-url
+                                  :test-run test-run :repo-root repo-root})
+      (println (format "[quiesce] grace period: %ds" grace-s))
+      (Thread/sleep (* 1000 grace-s))
+
+      ;; --- 4. check ---
+      (println "[check] running invariants")
+      (let [result (checker/check-all {:db-spec  db-spec
+                                       :history  history
+                                       :test-run test-run})]
+        (println "\n=== RESULTS ===")
+        (pp/pprint result)
+        (println "===============\n")
+        (println (format "Ops in history: %d" (count @history)))
+        (println (format "Submitted: %d"
+                         (count (filter #(and (= :submit (:f %))
+                                              (= :ok (:type %)))
+                                        @history))))
+        (if (:valid? result)
+          (println "ALL INVARIANTS PASSED")
+          (println "INVARIANTS VIOLATED — see results above"))
+        result)
+
+      (finally
+        ;; --- 5. teardown ---
+        (println "[teardown] killing workers")
+        (db/kill-all!)))))
+
+(defn -main [& args]
+  (let [opts (when (seq args) (read-string (first args)))]
+    (let [r (run (or opts {}))]
+      (System/exit (if (:valid? r) 0 1)))))
diff --git a/test/intemporal/jepsen/worker.clj b/test/intemporal/jepsen/worker.clj
new file mode 100644
index 0000000..9c22258
--- /dev/null
+++ b/test/intemporal/jepsen/worker.clj
@@ -0,0 +1,144 @@
+(ns intemporal.jepsen.worker
+  "Forked-JVM entry point for a single intemporal worker node.
+
+  Lifecycle:
+    1. Starts an intemporal engine backed by the shared Postgres store.
+    2. Polls jepsen_work_queue for unclaimed workflow specs (FOR UPDATE SKIP LOCKED).
+    3. Claims each spec and runs start-workflow in a virtual thread.
+    4. Prints 'READY <owner>' once the poll loop is running.
+
+  Signal semantics:
+    SIGTERM -> JVM shutdown hook fires -> engine shutdown -> graceful stop.
+    SIGKILL -> no hook runs.  Process-local signal-callback atom in the JDBC
+               store is destroyed.  Workflows waiting on a signal will never
+               wake on another worker (bug 1.1).
+
+  The worker does NOT call resume-workflow for previously-running workflows
+  on restart — this reproduces bug 1.3 (no recovery poller)."
+  (:require [intemporal.core :as intemporal]
+            [intemporal.store.jdbc :as jdbc-store]
+            [intemporal.jepsen.workflows :as wf]
+            [next.jdbc :as jdbc]
+            [hikari-cp.core :as hikari]
+            [taoensso.telemere :as log])
+  (:gen-class))
+
+;; ---------------------------------------------------------------------------
+;; Connection pools
+
+(defn- make-pool [db-url pool-size auto-commit?]
+  (hikari/make-datasource {:jdbc-url          db-url
+                           :maximum-pool-size pool-size
+                           :auto-commit       auto-commit?}))
+
+;; ---------------------------------------------------------------------------
+;; Work queue polling
+
+(def ^:private poll-interval-ms 200)
+
+(defn- claim-work-item!
+  "Claims one unclaimed queue item for this owner. Returns the row or nil."
+  [main-ds test-run owner]
+  (jdbc/with-transaction [tx main-ds]
+    (let [row (jdbc/execute-one! tx
+                ["SELECT id, workflow_id, wf_type, nonce, args
+                  FROM jepsen_work_queue
+                  WHERE test_run = ? AND claimed_by IS NULL AND completed = FALSE
+                  ORDER BY id ASC
+                  FOR UPDATE SKIP LOCKED
+                  LIMIT 1"
+                 test-run])]
+      (when row
+        (jdbc/execute! tx
+          ["UPDATE jepsen_work_queue SET claimed_by = ?, claimed_at = NOW()
+            WHERE id = ?"
+           owner (:jepsen_work_queue/id row)])
+        row))))
+
+(defn- mark-completed! [main-ds queue-id]
+  (jdbc/execute! main-ds
+    ["UPDATE jepsen_work_queue SET completed = TRUE WHERE id = ?" queue-id]))
+
+(defn- run-one-workflow!
+  "Starts a workflow in the current thread (intended to be called from a
+  virtual thread).  Binds *side-ds*, *test-run*, and *owner* so workflow
+  activities can record to the side-channel."
+  [engine main-ds side-ds test-run owner row]
+  (let [workflow-id (:jepsen_work_queue/workflow_id row)
+        wf-type     (:jepsen_work_queue/wf_type row)
+        args        (wf/build-args row)
+        wf-fn       (wf/workflow-fn-for wf-type)]
+    (binding [wf/*side-ds*  side-ds
+              wf/*test-run* test-run
+              wf/*owner*    owner]
+      (try
+        (log/log! :info (str "[" owner "] starting " wf-type " wf=" workflow-id))
+        (intemporal/start-workflow engine wf-fn args :workflow-id workflow-id)
+        (mark-completed! main-ds (:jepsen_work_queue/id row))
+        (log/log! :info (str "[" owner "] completed wf=" workflow-id))
+        (catch InterruptedException _
+          (log/log! :info (str "[" owner "] interrupted wf=" workflow-id)))
+        (catch Throwable t
+          (log/log! :warn (str "[" owner "] failed wf=" workflow-id " err=" (str t))))))))
+
+(defn- start-poll-loop!
+  "Starts the background work-queue poll loop. Returns a 0-arity stop fn."
+  [engine main-ds side-ds test-run owner]
+  (let [running? (atom true)]
+    (Thread/startVirtualThread
+      (fn []
+        (while @running?
+          (try
+            (if-let [row (claim-work-item! main-ds test-run owner)]
+              ;; Start workflow in its own virtual thread so the poll loop
+              ;; remains responsive.
+              (Thread/startVirtualThread
+                #(run-one-workflow! engine main-ds side-ds test-run owner row))
+              ;; Nothing in queue — sleep briefly.
+              (Thread/sleep poll-interval-ms))
+            (catch InterruptedException _
+              (reset! running? false))
+            (catch Throwable t
+              (log/log! :warn (str "[" owner "] poll loop error: " (str t)))
+              (Thread/sleep poll-interval-ms))))))
+    (fn [] (reset! running? false))))
+
+;; ---------------------------------------------------------------------------
+;; Public entry point
+
+(defn run
+  "deps.edn -X entry point.  Boots the engine, starts polling, and parks until
+  SIGKILL or SIGTERM.
+
+  Args (EDN keyword map):
+    :owner    — node identifier (stamped on side-channel rows)
+    :db-url   — JDBC URL for the shared Postgres instance
+    :test-run — run id matching the current jepsen_work_queue rows"
+  [{:keys [owner db-url test-run]}]
+  (assert owner   ":owner required")
+  (assert db-url  ":db-url required")
+  (assert test-run ":test-run required")
+
+  (let [store    (jdbc-store/make-jdbc-store db-url)
+        main-ds  (:datasource store)
+        side-ds  (make-pool db-url 2 true)   ; auto-commit for side-channel
+        engine   (intemporal/make-workflow-engine :store store :threads 8)
+        stop-fn  (start-poll-loop! engine main-ds side-ds test-run owner)]
+
+    (.addShutdownHook
+      (Runtime/getRuntime)
+      (Thread.
+        ^Runnable
+        (fn []
+          (log/log! :info (str "[" owner "] shutdown hook: stopping engine"))
+          (stop-fn)
+          (intemporal/shutdown-engine engine 5)
+          (hikari/close-datasource side-ds))))
+
+    (println "READY" owner)
+    (flush)
+    @(promise)))                             ; park until killed
+
+(defn -main [& args]
+  (let [[owner db-url test-run] args]
+    (run {:owner owner :db-url db-url :test-run (or test-run "default")})))
diff --git a/test/intemporal/jepsen/workflows.clj b/test/intemporal/jepsen/workflows.clj
new file mode 100644
index 0000000..55949f5
--- /dev/null
+++ b/test/intemporal/jepsen/workflows.clj
@@ -0,0 +1,138 @@
+(ns intemporal.jepsen.workflows
+  "Workflow shapes (W1–W4) submitted by the chaos test, plus the side-channel
+  recording activity.
+
+  Side-channel writes go through *side-ds* (a separate auto-commit Hikari pool),
+  so rows are durable even if the worker JVM is SIGKILLed mid-activity.
+
+  Each workflow shape probes a specific bug from improvements.md:
+    W1 (signal-wait)     — bug 1.1: lost wake on signal when worker is dead
+    W2 (activity-chain)  — bug 1.3: no recovery poller; activities not re-run
+    W3 (cancel-sleep)    — bug 2.3: cancellation can't reach a sleeping workflow
+    W4 (rapid-signal)    — bug 2.1: register-then-consume signal race"
+  (:require [intemporal.core :as intemporal]
+            [next.jdbc :as jdbc]
+            [taoensso.telemere :as log]))
+
+;; ---------------------------------------------------------------------------
+;; Dynamic bindings set by the worker before calling start-workflow / resume-workflow.
+
+(def ^:dynamic *side-ds*  nil)   ; auto-commit JDBC pool for side-channel writes
+(def ^:dynamic *test-run* nil)   ; test-run id stamped on every side-channel row
+(def ^:dynamic *owner*    nil)   ; worker owner-id for attribution
+
+;; ---------------------------------------------------------------------------
+;; Side-channel recording.
+
+(defn- record!
+  "Inserts one row into jepsen_invocations. Never throws — a side-channel
+  failure must not crash the workflow."
+  [workflow-id step nonce phase]
+  (when *side-ds*
+    (try
+      (jdbc/execute! *side-ds*
+        ["INSERT INTO jepsen_invocations (test_run, workflow_id, step, nonce, phase, owner)
+          VALUES (?,?,?,?,?,?)"
+         *test-run* workflow-id step nonce (name phase) *owner*])
+      (catch Throwable t
+        (log/warn! "jepsen side-channel write failed" {:err (str t)})))))
+
+;; ---------------------------------------------------------------------------
+;; Activities.
+
+(defn jepsen-activity
+  "Side-channel–recording activity.  Sleeps briefly to widen the crash window,
+  then records :begin / :end / :fail rows.  Longer sleep for non-trivial steps
+  so the nemesis can land a SIGKILL while the activity is in-flight."
+  [workflow-id step nonce]
+  (record! workflow-id step nonce :begin)
+  (try
+    (Thread/sleep (long (+ 100 (rand-int 150))))
+    (record! workflow-id step nonce :end)
+    :ok
+    (catch Throwable t
+      (record! workflow-id step nonce :fail)
+      (throw t))))
+
+;; ---------------------------------------------------------------------------
+;; W1: signal-wait — probes bug 1.1 (lost wake on signal across processes).
+;;
+;; Registers a wait-for-signal :go.  If the worker is killed while waiting and
+;; someone sends the signal from another process, the workflow should resume.
+;; With the current implementation it will NOT: the callback is in a dead atom.
+
+(defn signal-wait-workflow
+  "Records :before, suspends on signal 'go', records :after."
+  [workflow-id nonce]
+  (let [act (intemporal/stub #'jepsen-activity)]
+    (act workflow-id "before" nonce)
+    (intemporal/wait-for-signal "go")
+    (act workflow-id "after" nonce)))
+
+;; ---------------------------------------------------------------------------
+;; W2: activity-chain — probes bug 1.3 (no recovery poller).
+;;
+;; Runs a chain of activities.  If the worker crashes mid-chain and never
+;; explicitly calls resume-workflow, the remaining activities never run.
+
+(defn activity-chain-workflow
+  "Runs `steps` activities in sequence."
+  [workflow-id nonce steps]
+  (let [act (intemporal/stub #'jepsen-activity)]
+    (dotimes [i steps]
+      (act workflow-id (str "step-" i) nonce))))
+
+;; ---------------------------------------------------------------------------
+;; W3: cancel-sleep — probes bug 2.3 (cancellation can't reach a sleeper).
+;;
+;; Records :started, then waits for signal 'wake' forever.  The test client
+;; cancels the workflow via cancel-workflow.  With the current implementation
+;; the workflow never observes the cancellation because it never re-enters
+;; the execution loop.
+
+(defn cancel-sleep-workflow
+  "Records :started, then blocks on signal 'wake'."
+  [workflow-id nonce]
+  (let [act (intemporal/stub #'jepsen-activity)]
+    (act workflow-id "started" nonce)
+    (intemporal/wait-for-signal "wake")
+    (act workflow-id "woke" nonce)))
+
+;; ---------------------------------------------------------------------------
+;; W4: rapid-signal — probes bug 2.1 (register-then-consume signal race).
+;;
+;; Immediately waits for signal 'immediate'.  The test client sends the signal
+;; at nearly the same time, trying to hit the window between the consume-check
+;; and the register-callback call in process-signal.
+
+(defn rapid-signal-workflow
+  "Suspends immediately on signal 'immediate', records :completed after."
+  [workflow-id nonce]
+  (let [act (intemporal/stub #'jepsen-activity)]
+    (intemporal/wait-for-signal "immediate")
+    (act workflow-id "completed" nonce)))
+
+;; ---------------------------------------------------------------------------
+;; Registry: maps wf-type keyword -> {:fn workflow-fn :signal name-or-nil}.
+
+(def ^:private wf-registry
+  {:signal-wait    {:wf-fn #'signal-wait-workflow    :signal "go"}
+   :activity-chain {:wf-fn #'activity-chain-workflow :signal nil}
+   :cancel-sleep   {:wf-fn #'cancel-sleep-workflow   :signal "wake"}
+   :rapid-signal   {:wf-fn #'rapid-signal-workflow   :signal "immediate"}})
+
+(defn workflow-fn-for [wf-type]
+  (or (get-in wf-registry [(keyword wf-type) :wf-fn])
+      (throw (ex-info "Unknown workflow type" {:wf-type wf-type}))))
+
+(defn signal-name-for [wf-type]
+  (get-in wf-registry [(keyword wf-type) :signal]))
+
+(defn build-args
+  "Reconstructs the arg vector for a workflow from the queue row."
+  [{:jepsen_work_queue/keys [workflow_id nonce wf_type args]}]
+  (case (keyword wf_type)
+    :signal-wait    [workflow_id nonce]
+    :activity-chain [workflow_id nonce (or (:steps args) 5)]
+    :cancel-sleep   [workflow_id nonce]
+    :rapid-signal   [workflow_id nonce]))
diff --git a/test/resources/migrations/jepsen/postgres/20260529000001-jepsen-tables.down.sql b/test/resources/migrations/jepsen/postgres/20260529000001-jepsen-tables.down.sql
new file mode 100644
index 0000000..b7b58cf
--- /dev/null
+++ b/test/resources/migrations/jepsen/postgres/20260529000001-jepsen-tables.down.sql
@@ -0,0 +1,7 @@
+DROP TABLE IF EXISTS jepsen_cancels_sent;
+--;;
+DROP TABLE IF EXISTS jepsen_signals_sent;
+--;;
+DROP TABLE IF EXISTS jepsen_invocations;
+--;;
+DROP TABLE IF EXISTS jepsen_work_queue;
diff --git a/test/resources/migrations/jepsen/postgres/20260529000001-jepsen-tables.up.sql b/test/resources/migrations/jepsen/postgres/20260529000001-jepsen-tables.up.sql
new file mode 100644
index 0000000..c10a9e5
--- /dev/null
+++ b/test/resources/migrations/jepsen/postgres/20260529000001-jepsen-tables.up.sql
@@ -0,0 +1,57 @@
+-- Jepsen test side-channel tables.
+-- Applied by the Jepsen runner (not by make-jdbc-store) against the same
+-- Postgres instance as intemporal itself.
+
+-- Work queue: the test client inserts workflow specs here; worker JVMs poll and
+-- claim items with FOR UPDATE SKIP LOCKED.
+CREATE TABLE IF NOT EXISTS jepsen_work_queue (
+    id         BIGSERIAL PRIMARY KEY,
+    test_run   TEXT NOT NULL,
+    workflow_id TEXT NOT NULL UNIQUE,
+    wf_type    TEXT NOT NULL,  -- signal-wait | activity-chain | cancel-sleep | rapid-signal
+    nonce      TEXT NOT NULL,
+    args       JSONB,
+    claimed_by TEXT,
+    claimed_at TIMESTAMP WITH TIME ZONE,
+    completed  BOOLEAN DEFAULT FALSE,
+    created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
+);
+--;;
+CREATE INDEX IF NOT EXISTS idx_jepsen_work_queue_unclaimed
+    ON jepsen_work_queue (test_run, claimed_by, id)
+    WHERE claimed_by IS NULL AND completed = FALSE;
+--;;
+-- Side-channel: one row per activity invocation. Written with auto-commit so
+-- rows survive a SIGKILL between :begin and :end.
+CREATE TABLE IF NOT EXISTS jepsen_invocations (
+    id          BIGSERIAL PRIMARY KEY,
+    test_run    TEXT NOT NULL,
+    workflow_id TEXT NOT NULL,
+    step        TEXT NOT NULL,
+    nonce       TEXT,
+    phase       TEXT NOT NULL,  -- begin | end | fail
+    owner       TEXT,
+    ts          TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
+);
+--;;
+CREATE INDEX IF NOT EXISTS idx_jepsen_invocations_lookup
+    ON jepsen_invocations (test_run, workflow_id, nonce);
+--;;
+-- Signals sent by the test client.  Used by the checker to verify that every
+-- sent signal was eventually consumed.
+CREATE TABLE IF NOT EXISTS jepsen_signals_sent (
+    id          BIGSERIAL PRIMARY KEY,
+    test_run    TEXT NOT NULL,
+    workflow_id TEXT NOT NULL,
+    signal_name TEXT NOT NULL,
+    sent_at     TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
+);
+--;;
+-- Cancels issued by the client.  Used by the checker to verify that cancelled
+-- workflows eventually reach a terminal state.
+CREATE TABLE IF NOT EXISTS jepsen_cancels_sent (
+    id          BIGSERIAL PRIMARY KEY,
+    test_run    TEXT NOT NULL,
+    workflow_id TEXT NOT NULL,
+    sent_at     TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
+);
diff --git a/tests.edn b/tests.edn
index 0877e3c..fbf09e9 100644
--- a/tests.edn
+++ b/tests.edn
@@ -33,11 +33,15 @@
           :test-paths   ["test"]}
 
          ;; jvm based tests
-         {:id           :test
+         {:id                    :test
           ;:kaocha.filter/skip-meta [:crash]
-          :ns-patterns  ["-test$"]
-          :source-paths ["src"]
-          :test-paths   ["test"]}
+          :ns-patterns           ["-test$"]
+          ;; Exclude the jepsen namespace — it is not a kaocha test suite and
+          ;; its ns name ends in .runner / .worker (not -test) anyway, but
+          ;; exclude explicitly to be safe.
+          :kaocha.filter/skip-meta [:jepsen]
+          :source-paths          ["src"]
+          :test-paths            ["test"]}
          {:id   :test-cljs
           :type :kaocha.type/cljs
           :cljs/repl-env cljs.repl.node/repl-env

From 5a05ce002fece1ee8d28d95a3f87416145236eca Mon Sep 17 00:00:00 2001
From: Miguel Ping <miguel.ping@gmail.com>
Date: Sun, 31 May 2026 08:00:28 +0100
Subject: [PATCH 2/9] tests passing

---
 dev/verify_bugs.clj                           | 159 +++++++++++++-----
 docker/fdb.cluster                            |   2 +-
 src/intemporal/core.cljc                      |   5 +-
 src/intemporal/internal/execution.clj         |  62 ++++---
 src/intemporal/internal/execution.cljs        |  62 ++++---
 .../internal/fns/start_workflow.clj           |  73 ++++----
 src/intemporal/protocol.cljc                  |   2 +
 src/intemporal/store.cljc                     |   8 +
 src/intemporal/store/fdb.clj                  |   7 +
 src/intemporal/store/jdbc.clj                 |  12 +-
 test/intemporal/tests/jepsen/bug_1_1_test.clj |  83 +++++++++
 test/intemporal/tests/jepsen/bug_1_2_test.clj |  92 ++++++++++
 test/intemporal/tests/jepsen/bug_1_3_test.clj |  92 ++++++++++
 test/intemporal/tests/jepsen/bug_2_1_test.clj | 112 ++++++++++++
 test/intemporal/tests/jepsen/bug_2_3_test.clj |  95 +++++++++++
 test/intemporal/tests/jepsen/racing_store.clj |  55 ++++++
 16 files changed, 786 insertions(+), 135 deletions(-)
 create mode 100644 test/intemporal/tests/jepsen/bug_1_1_test.clj
 create mode 100644 test/intemporal/tests/jepsen/bug_1_2_test.clj
 create mode 100644 test/intemporal/tests/jepsen/bug_1_3_test.clj
 create mode 100644 test/intemporal/tests/jepsen/bug_2_1_test.clj
 create mode 100644 test/intemporal/tests/jepsen/bug_2_3_test.clj
 create mode 100644 test/intemporal/tests/jepsen/racing_store.clj

diff --git a/dev/verify_bugs.clj b/dev/verify_bugs.clj
index 4172e10..4aab136 100644
--- a/dev/verify_bugs.clj
+++ b/dev/verify_bugs.clj
@@ -65,6 +65,52 @@
       (act counter))
     :done))
 
+;; ── RacingStore: deterministic race injector ──────────────────────────────────
+;;
+;; Wraps any IStore so that the first time consume-signal returns nil for a
+;; specific (workflow-id, signal-name):
+;;
+;;   1. It delivers gate-nil  ("race window is open")
+;;   2. It blocks on gate-sent ("sender has injected signal into the window")
+;;   3. Then returns nil, letting process-signal proceed to register-callback
+;;
+;; The sender thread:
+;;   1. Waits on gate-nil
+;;   2. Calls p/add-signal on the INNER store directly, writing the signal row
+;;      but firing no callback (none registered yet — we're in the window)
+;;   3. Delivers gate-sent
+;;
+;; After the window closes:
+;;   - p/register-signal-callback is called → callback registered in inner store
+;;   - Signal is already in inner store; add-signal already ran with empty callbacks
+;;   - Callback will never fire retroactively
+;;   - Workflow is permanently stuck with an undelivered wake
+;;
+;; Proof of stuck:  p/get-pending-signals returns the signal row;
+;;                  workflow status remains :running; the workflow future times out.
+
+(defrecord RacingStore [inner gate-nil gate-sent armed?]
+  p/IStore
+  (load-history            [_ wf-id]                    (p/load-history inner wf-id))
+  (save-event              [_ wf-id ev]                 (p/save-event inner wf-id ev))
+  (save-events             [_ wf-id evs]                (p/save-events inner wf-id evs))
+  (find-event              [_ wf-id et sq]              (p/find-event inner wf-id et sq))
+  (get-pending-signals     [_ wf-id]                    (p/get-pending-signals inner wf-id))
+  (add-signal              [_ wf-id sn sd]              (p/add-signal inner wf-id sn sd))
+  (register-signal-callback [_ wf-id sn cb]             (p/register-signal-callback inner wf-id sn cb))
+  (unregister-signal-callback [_ wf-id sn]              (p/unregister-signal-callback inner wf-id sn))
+  (is-cancelled?           [_ wf-id]                    (p/is-cancelled? inner wf-id))
+  (mark-cancelled          [_ wf-id]                    (p/mark-cancelled inner wf-id))
+  (get-workflow-status     [_ wf-id]                    (p/get-workflow-status inner wf-id))
+
+  (consume-signal [_ wf-id sig-name]
+    (let [result (p/consume-signal inner wf-id sig-name)]
+      ;; Only intercept once (armed? tracks first nil-return)
+      (when (and (nil? result) (compare-and-set! armed? true false))
+        (deliver gate-nil {:wf-id wf-id :sig-name sig-name})
+        (deref gate-sent 5000 :timeout-waiting-for-sender))
+      result)))
+
 ;; ── Bug scenarios ─────────────────────────────────────────────────────────────
 
 (defn- scenario-1-1
@@ -151,42 +197,76 @@
       result)))
 
 (defn- scenario-2-1
-  "Bug 2.1 — Register-then-consume signal race.
-
-  process-signal does: (1) consume-signal, (2) if nil → register-callback.
-  If a sender fires between (1) and (2) the signal is consumed but the
-  callback fires into nothing (or the signal is already gone by the time
-  the callback tries to re-consume).
-
-  We maximise the window by having the sender fire 200 ms after the workflow
-  starts (before it has committed to suspending).  A stuck workflow after a
-  sent signal indicates the race was hit."
-  [make-store label]
-  (let [store   (make-store)
-        wf-id   (str "bug21-" (random-uuid))
-        result  (promise)
-        engine  (intemporal/make-workflow-engine :store store :threads 2)]
-    (try
-      (future
-        (try
-          (let [r (intemporal/start-workflow engine wait-signal-wf []
-                                            :workflow-id wf-id)]
-            (deliver result r))
-          (catch Exception e (deliver result {:error (str e)}))))
-      ;; Send the signal after a short window — trying to land between
-      ;; consume-check (step 1) and register-callback (step 2).
-      (Thread/sleep 200)
-      (p/add-signal store wf-id "go" {:source :race-test})
-      ;; Wait up to 3 s for the workflow to wake.
-      (let [r (deref result 3000 ::timeout)]
-        {:store   label
-         :bug?    (= ::timeout r)
-         :detail  (if (= ::timeout r)
-                    "Workflow stuck: signal sent before callback was registered (race hit)"
-                    "Workflow woke normally (race window not hit this run — try more iterations)")})
-      (finally
-        (intemporal/shutdown-engine engine)
-        (when (instance? java.io.Closeable store) (.close store))))))
+  "Bug 2.1 — Register-then-consume signal race (deterministic via RacingStore).
+
+  process-signal executes:
+    (1) consume-signal → nil   (no signal available)
+    (2) register-signal-callback
+
+  The RacingStore intercepts step (1): after consume-signal returns nil it
+  blocks on gate-nil/gate-sent, letting us inject a signal into the INNER
+  store BEFORE step (2) runs.  After the sender delivers gate-sent the
+  consume returns nil and process-signal proceeds to register-callback.
+
+  At that point:
+    • Signal row IS in inner store (written by add-signal in the window)
+    • add-signal checked inner callbacks atom → found empty → fired no wake
+    • Callback IS now registered (step 2 ran after the window)
+    • But add-signal already ran with empty callbacks → wake was lost
+    • Callback will never fire retroactively
+    • Workflow is permanently stuck
+
+  Proof:
+    • workflow future times out  (stuck)
+    • p/get-pending-signals returns the signal row  (unconsumed)
+    • workflow status is :running"
+  [make-inner-store label]
+  (let [inner     (make-inner-store)
+        gate-nil  (promise)
+        gate-sent (promise)
+        store     (->RacingStore inner gate-nil gate-sent (atom true))
+        wf-id     (str "bug21-" (random-uuid))
+        result    (promise)
+        engine    (intemporal/make-workflow-engine :store store :threads 2)]
+    ;; Workflow thread
+    (future
+      (try
+        (let [r (intemporal/start-workflow engine wait-signal-wf []
+                                           :workflow-id wf-id)]
+          (deliver result r))
+        (catch Exception e (deliver result {:error (str e)}))))
+
+    ;; Wait until consume-signal returned nil (race window is open)
+    (let [gate-info (deref gate-nil 5000 ::timeout)]
+      (if (= ::timeout gate-info)
+        (do (intemporal/shutdown-engine engine)
+            (when (instance? java.io.Closeable inner) (.close inner))
+            {:store label :bug? false
+             :detail "Gate never opened — workflow did not reach consume-signal in time"})
+        (do
+          ;; Inject signal directly into the inner store.
+          ;; At this moment process-signal is parked between consume-check and register-callback.
+          ;; inner.add-signal writes the signal and checks callbacks atom → empty → no wake.
+          (p/add-signal inner wf-id "go" {:source :injected-in-race-window})
+          ;; Release the gate — let consume-signal return nil to process-signal
+          (deliver gate-sent :signal-injected)
+          ;; Give process-signal time to register the callback (step 2)
+          (Thread/sleep 200)
+          ;; Check outcome
+          (let [r       (deref result 2000 ::timeout)
+                pending (p/get-pending-signals inner wf-id)
+                status  (p/get-workflow-status inner wf-id)]
+            (intemporal/shutdown-engine engine)
+            (when (instance? java.io.Closeable inner) (.close inner))
+            {:store        label
+             :bug?         (= ::timeout r)
+             :detail       (if (= ::timeout r)
+                             (str "RACE CONFIRMED — signal injected in race window; "
+                                  "wake never fired; status=" status
+                                  "; orphaned signal keys=" (keys pending))
+                             (str "Workflow woke (race not reproduced): " r))
+             :pending-signals (keys pending)
+             :final-status    status}))))))  ; closes: map let[r] do if let[gate-info] outer-let, defn
 
 (defn- scenario-2-3
   "Bug 2.3 — Cancellation can't reach a sleeping workflow.
@@ -317,7 +397,7 @@
 
   ;; ----------------------------------------------------------------------------
   (print-scenario
-    "Bug 2.1" "Register-then-consume signal race (intermittent)"
+    "Bug 2.1" "Register-then-consume signal race (deterministic)"
     [(scenario-2-1 make-jdbc-store "JDBC")
      (scenario-2-1 make-fdb-store  "FDB")])
 
@@ -328,7 +408,8 @@
      (scenario-2-3 make-fdb-store  "FDB")])
 
   ;; ----------------------------------------------------------------------------
-  (println "\nNote: Bug 2.1 is a race; a single run may not always hit the window.")
-  (println "      Increase Thread/sleep in scenario-2-1 or run multiple times.\n")
+  (println "\nNote: Bug 2.1 uses a latch-synchronized RacingStore to deterministically")
+  (println "      inject a signal into the consume-nil→register-callback window.")
+  (println "      The race is guaranteed to reproduce on every run.\n")
 
   (System/exit 0))
diff --git a/docker/fdb.cluster b/docker/fdb.cluster
index 0c1fa8d..74a077b 100644
--- a/docker/fdb.cluster
+++ b/docker/fdb.cluster
@@ -1 +1 @@
-docker:docker@172.18.0.3:4500
+docker:docker@172.18.0.2:4500
diff --git a/src/intemporal/core.cljc b/src/intemporal/core.cljc
index a0502ed..c0a48ac 100644
--- a/src/intemporal/core.cljc
+++ b/src/intemporal/core.cljc
@@ -410,10 +410,13 @@
 
 (defn cancel-workflow
   "Cancel a running workflow.
-   The workflow will be cancelled at the next suspension point."
+   The workflow is cancelled at the next suspension point. If it is currently
+   suspended (e.g. waiting on a signal), wake-workflow forces it to re-enter its
+   loop so it observes the cancellation flag rather than waiting forever."
   [store workflow-id]
   (log/with-mdc {:workflow-id workflow-id}
     (p/mark-cancelled store workflow-id)
+    (p/wake-workflow store workflow-id)
     (log/debugf "Cancelling workflow"))
   {:cancelled true :workflow-id workflow-id})
 
diff --git a/src/intemporal/internal/execution.clj b/src/intemporal/internal/execution.clj
index 8b7fe8a..53c699b 100644
--- a/src/intemporal/internal/execution.clj
+++ b/src/intemporal/internal/execution.clj
@@ -221,38 +221,38 @@
         :wait-timer))))
 
 (defn process-signal [store workflow-id suspension-data pending-events wake-fn observer]
-  (let [{:keys [seq signal-name]} suspension-data]
+  (let [{:keys [seq signal-name]} suspension-data
+        save-received (fn [signal-data]
+                        (p/save-event store workflow-id {:event-type  :signal-received
+                                                         :seq         seq
+                                                         :signal-name signal-name
+                                                         :signal-id   (:id signal-data)
+                                                         :payload     (:payload signal-data)
+                                                         :timestamp   (utils/current-time-ms)})
+                        (-notify p/on-signal-received observer workflow-id signal-name (:payload signal-data)))]
     ;; Save pending events
     (p/save-events store workflow-id pending-events)
+    ;; Register the wake callback FIRST, then check for an already-available
+    ;; signal (fixes bug 2.1: a signal arriving between the consume-check and
+    ;; the registration could previously be lost). consume-signal is atomic in
+    ;; every store, so exactly one of {the inline check below, the callback}
+    ;; consumes the signal — the other observes nil and no-ops. The callback
+    ;; only wakes if it was the one that consumed, so the inline :continue path
+    ;; never double-executes the workflow.
+    (p/register-signal-callback store workflow-id signal-name
+                               (fn []
+                                 (when-let [signal-data (p/consume-signal store workflow-id signal-name)]
+                                   (save-received signal-data)
+                                   (p/unregister-signal-callback store workflow-id signal-name)
+                                   (when wake-fn (wake-fn)))))
     (if-let [signal-data (p/consume-signal store workflow-id signal-name)]
-      ;; Signal already available - process immediately
+      ;; We won the race inline: handle the signal and continue synchronously.
       (do
-        (p/save-event store workflow-id {:event-type  :signal-received
-                                         :seq         seq
-                                         :signal-name signal-name
-                                         :signal-id   (:id signal-data)
-                                         :payload     (:payload signal-data)
-                                         :timestamp   (utils/current-time-ms)})
-        (-notify p/on-signal-received observer workflow-id signal-name (:payload signal-data))
+        (p/unregister-signal-callback store workflow-id signal-name)
+        (save-received signal-data)
         :continue)
-      ;; ELSE Signal not yet available - register callback and wait
-      (do
-        (p/register-signal-callback store workflow-id signal-name
-                                   (fn []
-                                     ;; When signal arrives, consume it and save event
-                                     (when-let [signal-data (p/consume-signal store workflow-id signal-name)]
-                                       (p/save-event store workflow-id {:event-type  :signal-received
-                                                                        :seq         seq
-                                                                        :signal-name signal-name
-                                                                        :signal-id   (:id signal-data)
-                                                                        :payload     (:payload signal-data)
-                                                                        :timestamp   (utils/current-time-ms)})
-                                       (-notify p/on-signal-received observer workflow-id signal-name (:payload signal-data)))
-                                     ;; Unregister callback
-                                     (p/unregister-signal-callback store workflow-id signal-name)
-                                     ;; Wake up the workflow
-                                     (when wake-fn (wake-fn))))
-        :wait-signal))))
+      ;; No signal yet: stay suspended; the armed callback will wake us.
+      :wait-signal)))
 
 (defn process-signal-with-timeout [store scheduler workflow-id suspension-data
                                     pending-events wake-fn observer]
@@ -569,7 +569,13 @@
 
               (if (= action :continue)
                 (recur (inc iteration))
-                (action->result action workflow-id)))
+                ;; About to wait: register a generic wake callback so an external
+                ;; actor (e.g. cancel-workflow) can force this workflow to
+                ;; re-enter its loop and observe state such as the cancel flag.
+                (do
+                  (when wake-fn
+                    (p/register-wake-callback store workflow-id wake-fn))
+                  (action->result action workflow-id))))
 
             :failed
             (finalize-failed store workflow-id
diff --git a/src/intemporal/internal/execution.cljs b/src/intemporal/internal/execution.cljs
index 5add223..e29449b 100644
--- a/src/intemporal/internal/execution.cljs
+++ b/src/intemporal/internal/execution.cljs
@@ -247,38 +247,38 @@
         :wait-timer))))
 
 (defn process-signal [store workflow-id suspension-data pending-events wake-fn observer]
-  (let [{:keys [seq signal-name]} suspension-data]
+  (let [{:keys [seq signal-name]} suspension-data
+        save-received (fn [signal-data]
+                        (p/save-event store workflow-id {:event-type  :signal-received
+                                                         :seq         seq
+                                                         :signal-name signal-name
+                                                         :signal-id   (:id signal-data)
+                                                         :payload     (:payload signal-data)
+                                                         :timestamp   (utils/current-time-ms)})
+                        (-notify p/on-signal-received observer workflow-id signal-name (:payload signal-data)))]
     ;; Save pending events
     (p/save-events store workflow-id pending-events)
+    ;; Register the wake callback FIRST, then check for an already-available
+    ;; signal (fixes bug 2.1: a signal arriving between the consume-check and
+    ;; the registration could previously be lost). consume-signal is atomic in
+    ;; every store, so exactly one of {the inline check below, the callback}
+    ;; consumes the signal — the other observes nil and no-ops. The callback
+    ;; only wakes if it was the one that consumed, so the inline :continue path
+    ;; never double-executes the workflow.
+    (p/register-signal-callback store workflow-id signal-name
+                               (fn []
+                                 (when-let [signal-data (p/consume-signal store workflow-id signal-name)]
+                                   (save-received signal-data)
+                                   (p/unregister-signal-callback store workflow-id signal-name)
+                                   (when wake-fn (wake-fn)))))
     (if-let [signal-data (p/consume-signal store workflow-id signal-name)]
-      ;; Signal already available - process immediately
+      ;; We won the race inline: handle the signal and continue synchronously.
       (do
-        (p/save-event store workflow-id {:event-type  :signal-received
-                                         :seq         seq
-                                         :signal-name signal-name
-                                         :signal-id   (:id signal-data)
-                                         :payload     (:payload signal-data)
-                                         :timestamp   (utils/current-time-ms)})
-        (-notify p/on-signal-received observer workflow-id signal-name (:payload signal-data))
+        (p/unregister-signal-callback store workflow-id signal-name)
+        (save-received signal-data)
         :continue)
-      ;; ELSE Signal not yet available - register callback and wait
-      (do
-        (p/register-signal-callback store workflow-id signal-name
-                                   (fn []
-                                     ;; When signal arrives, consume it and save event
-                                     (when-let [signal-data (p/consume-signal store workflow-id signal-name)]
-                                       (p/save-event store workflow-id {:event-type  :signal-received
-                                                                        :seq         seq
-                                                                        :signal-name signal-name
-                                                                        :signal-id   (:id signal-data)
-                                                                        :payload     (:payload signal-data)
-                                                                        :timestamp   (utils/current-time-ms)})
-                                       (-notify p/on-signal-received observer workflow-id signal-name (:payload signal-data)))
-                                     ;; Unregister callback
-                                     (p/unregister-signal-callback store workflow-id signal-name)
-                                     ;; Wake up the workflow
-                                     (when wake-fn (wake-fn))))
-        :wait-signal))))
+      ;; No signal yet: stay suspended; the armed callback will wake us.
+      :wait-signal)))
 
 (defn process-signal-with-timeout [store scheduler workflow-id suspension-data
                                     pending-events wake-fn observer]
@@ -596,7 +596,13 @@
 
                              (if (= action :continue)
                                (prom/recur (inc iteration))
-                               (action->result action workflow-id)))
+                               ;; About to wait: register a generic wake callback so an
+                               ;; external actor (e.g. cancel-workflow) can force this
+                               ;; workflow to re-enter and observe the cancel flag.
+                               (do
+                                 (when wake-fn
+                                   (p/register-wake-callback store workflow-id wake-fn))
+                                 (action->result action workflow-id))))
 
                            :failed
                            (finalize-failed store workflow-id
diff --git a/src/intemporal/internal/fns/start_workflow.clj b/src/intemporal/internal/fns/start_workflow.clj
index 29b2b48..3bea34d 100644
--- a/src/intemporal/internal/fns/start_workflow.clj
+++ b/src/intemporal/internal/fns/start_workflow.clj
@@ -3,7 +3,11 @@
             [intemporal.internal.logging :as log]
             [intemporal.internal.activity :as a]
             [intemporal.protocol :as p]
-            [intemporal.utils :as utils]))
+            [intemporal.utils :as utils])
+  (:import [java.util.concurrent LinkedBlockingQueue]))
+
+(def ^:private waiting-statuses
+  #{:waiting-timer :waiting-signal :waiting-signal-timeout :waiting-async})
 
 (defn start-workflow
   "Start a workflow execution.
@@ -27,29 +31,26 @@
       :or {max-iterations 1000}}]
   (doseq [[proto impl] protocols]
     (a/register-protocol-activities! registry proto impl))
-  (let [wf-id (or workflow-id (str (random-uuid)))
-        resume-promise-atom (atom nil)
+  (let [wf-id    (or workflow-id (str (random-uuid)))
         observer (or observer (get engine :observer))
-        wake-fn (fn wake-fn-impl []
-                  (log/with-mdc {:workflow-id wf-id}
-                    (try
-                      (when observer
-                        (p/on-workflow-resumed observer wf-id))
-                      (log/debugf "Waking workflow for resume")
-                      (let [old-promise @resume-promise-atom
-                            new-promise (promise)
-                            result (exec/run-workflow-internal engine wf-id workflow-fn args
-                                                         {:observer observer
-                                                          :max-iterations max-iterations
-                                                          :wake-fn wake-fn-impl})]
-                        (reset! resume-promise-atom new-promise)
-                        (deliver old-promise result))
-                      (catch Exception e
-                        (when-let [p @resume-promise-atom]
-                          (deliver p {:status :failed :error e}))))))]
+        ;; Wake channel. wake-fn (invoked from store signal/timer callbacks and
+        ;; from cancel-workflow via wake-workflow) only enqueues a token — it
+        ;; never runs execution itself. All run-workflow-internal calls happen
+        ;; on THIS thread, in the loop below. This:
+        ;;  (a) makes the wake edge-safe: a wake that fires while the workflow
+        ;;      is still suspending sits in the queue and is observed by the
+        ;;      next take, instead of racing a resume-promise handshake; and
+        ;;  (b) prevents two threads from executing the same workflow at once.
+        wake-q   (LinkedBlockingQueue.)
+        run-once (fn []
+                   (exec/run-workflow-internal engine wf-id workflow-fn args
+                                               {:observer       observer
+                                                :max-iterations max-iterations
+                                                :wake-fn        (fn wake-fn []
+                                                                  (when observer
+                                                                    (p/on-workflow-resumed observer wf-id))
+                                                                  (.offer wake-q :wake))}))]
     (log/with-mdc {:workflow-id wf-id}
-      ;; Initialize with first promise
-      (reset! resume-promise-atom (promise))
       (p/save-event store wf-id {:event-type :workflow-started
                                  :workflow-id wf-id
                                  :args (vec args)
@@ -58,21 +59,19 @@
         (p/on-workflow-started observer wf-id args))
       (log/info "Workflow started")
       (try
-        ;; Execute initial workflow run
-        (let [initial-result (exec/run-workflow-internal engine wf-id workflow-fn args
-                                                   {:observer observer
-                                                    :max-iterations max-iterations
-                                                    :wake-fn wake-fn})]
-          ;; Loop to handle multiple wait cycles
-          (loop [result initial-result]
-            (log/infof "Got result %s with status %s" (:result initial-result) (:status initial-result))
-            (if (#{:waiting-timer :waiting-signal :waiting-signal-timeout :waiting-async} (:status result))
-              (do
-                (log/infof "Workflow waiting for promise: %s" (:status result))
-                (let [next-promise @resume-promise-atom
-                      next-result  @next-promise]
-                  (recur next-result)))
-              result)))
+        (loop [result (run-once)]
+          (log/infof "Got result %s with status %s" (:result result) (:status result))
+          (if (waiting-statuses (:status result))
+            (do
+              (log/infof "Workflow waiting: %s" (:status result))
+              ;; Block until woken. A token enqueued before this take (signal
+              ;; arrived during suspension setup) returns immediately — no edge
+              ;; is lost. Drain any extra tokens so one re-run covers coalesced
+              ;; wakes; a wake arriving during the re-run queues for next take.
+              (.take wake-q)
+              (.clear wake-q)
+              (recur (run-once)))
+            result))
         (catch Exception e
           (log/warnf e "Caught exception")
           (throw e))))))
diff --git a/src/intemporal/protocol.cljc b/src/intemporal/protocol.cljc
index 9ee6f07..2a8a873 100644
--- a/src/intemporal/protocol.cljc
+++ b/src/intemporal/protocol.cljc
@@ -16,6 +16,8 @@
   (consume-signal [store workflow-id signal-name] "Consume and remove a signal")
   (register-signal-callback [store workflow-id signal-name callback] "Register callback to be invoked when signal arrives")
   (unregister-signal-callback [store workflow-id signal-name] "Unregister signal callback")
+  (register-wake-callback [store workflow-id callback] "Register a generic wake callback, fired by wake-workflow to force the workflow to re-enter its execution loop (e.g. to observe cancellation)")
+  (wake-workflow [store workflow-id] "Fire the registered wake callback for a workflow, forcing it to re-enter its loop and re-evaluate state such as the cancellation flag. No-op if none registered.")
   (is-cancelled? [store workflow-id] "Check if workflow is cancelled")
   (mark-cancelled [store workflow-id] "Mark workflow as cancelled")
   (get-workflow-status [store workflow-id] "Get current workflow status"))
diff --git a/src/intemporal/store.cljc b/src/intemporal/store.cljc
index 277128f..874b428 100644
--- a/src/intemporal/store.cljc
+++ b/src/intemporal/store.cljc
@@ -60,6 +60,14 @@
   (unregister-signal-callback [_ workflow-id signal-name]
     (swap! state update-in [:workflows workflow-id :signal-callbacks] dissoc signal-name))
 
+  (register-wake-callback [_ workflow-id callback]
+    (swap! state assoc-in [:workflows workflow-id :wake-callback] callback))
+
+  (wake-workflow [_ workflow-id]
+    (when-let [callback (get-in @state [:workflows workflow-id :wake-callback])]
+      #?(:clj (future (callback))
+         :cljs (js/setTimeout callback 0))))
+
   (is-cancelled? [_ workflow-id]
     (get-in @state [:workflows workflow-id :cancelled] false))
 
diff --git a/src/intemporal/store/fdb.clj b/src/intemporal/store/fdb.clj
index 1bbb1a2..65a73ef 100644
--- a/src/intemporal/store/fdb.clj
+++ b/src/intemporal/store/fdb.clj
@@ -112,6 +112,13 @@
   (unregister-signal-callback [_ workflow-id signal-name]
     (swap! callbacks update workflow-id dissoc signal-name))
 
+  (register-wake-callback [_ workflow-id callback]
+    (swap! callbacks assoc-in [workflow-id ::wake] callback))
+
+  (wake-workflow [_ workflow-id]
+    (when-let [callback (get-in @callbacks [workflow-id ::wake])]
+      (future (callback))))
+
   (is-cancelled? [_ workflow-id]
     (ftr/run db
       (fn [tx]
diff --git a/src/intemporal/store/jdbc.clj b/src/intemporal/store/jdbc.clj
index 30be0e1..b65a02d 100644
--- a/src/intemporal/store/jdbc.clj
+++ b/src/intemporal/store/jdbc.clj
@@ -92,7 +92,10 @@
         ;; Ensure workflow exists
         (jdbc/execute! tx ["INSERT INTO intemporal_workflows (id) VALUES (?) ON CONFLICT (id) DO NOTHING"
                            workflow-id])
-        ;; Insert events
+        ;; Insert events. DO UPDATE keeps the write idempotent under normal
+        ;; replay (the engine re-writes the same seq with identical data on
+        ;; each pass). Rejecting a *concurrent* writer is the lease's job
+        ;; (Phase C) — see validate-lease in save-events there.
         (doseq [event events]
           (let [seq-num    (:seq event)
                 event-type (name (:event-type event))
@@ -146,6 +149,13 @@
   (unregister-signal-callback [_ workflow-id signal-name]
     (swap! callbacks update workflow-id dissoc signal-name))
 
+  (register-wake-callback [_ workflow-id callback]
+    (swap! callbacks assoc-in [workflow-id ::wake] callback))
+
+  (wake-workflow [_ workflow-id]
+    (when-let [callback (get-in @callbacks [workflow-id ::wake])]
+      (future (callback))))
+
   (is-cancelled? [_ workflow-id]
     (let [row (jdbc/execute-one! datasource
                                  ["SELECT cancelled FROM intemporal_workflows WHERE id = ?"
diff --git a/test/intemporal/tests/jepsen/bug_1_1_test.clj b/test/intemporal/tests/jepsen/bug_1_1_test.clj
new file mode 100644
index 0000000..5513d3a
--- /dev/null
+++ b/test/intemporal/tests/jepsen/bug_1_1_test.clj
@@ -0,0 +1,83 @@
+(ns intemporal.tests.jepsen.bug-1-1-test
+  "Bug 1.1 — Signal sent via a second store instance never wakes a workflow.
+
+  Root cause (improvements.md §1.1):
+    register-signal-callback stores the wake-fn in a process-local atom on
+    the store record (JdbcStore.callbacks, FDBStore.callbacks, InMemoryStore.state).
+    When add-signal is called from a DIFFERENT store instance — representing a
+    second pod, a new engine, or any caller that didn't start the workflow — the
+    callback atom is empty and the workflow is never woken.
+
+  These tests assert the CURRENT (buggy) behaviour.  They will fail once the
+  fix from improvements.md §C3/C5 is applied (durable runnable markers)."
+  (:require [clojure.test :refer [deftest is testing]]
+            [intemporal.core :as intemporal]
+            [intemporal.protocol :as p]
+            [intemporal.store :as mem]
+            [intemporal.store.jdbc :as jdbc-store]
+            [intemporal.store.fdb :as fdb-store]
+            [me.vedang.clj-fdb.FDB :as cfdb]))
+
+;; ── Shared workflow ───────────────────────────────────────────────────────────
+
+(defn- wait-signal-wf []
+  (intemporal/wait-for-signal "go")
+  :woke)
+
+;; ── Shared scenario ───────────────────────────────────────────────────────────
+
+(defn- run-scenario
+  "Starts the workflow using store-a, then sends the signal via store-b.
+  Returns :stuck if the workflow never wakes, :woke otherwise."
+  [store-a store-b]
+  (let [wf-id  (str "bug11-" (random-uuid))
+        result (promise)
+        engine (intemporal/make-workflow-engine :store store-a :threads 2)]
+    (future
+      (try
+        (deliver result (intemporal/start-workflow engine wait-signal-wf []
+                                                   :workflow-id wf-id))
+        (catch Exception e (deliver result {:error (str e)}))))
+    (Thread/sleep 400)
+    ;; Send signal via a DIFFERENT store instance — simulates another pod.
+    ;; store-b has an empty callbacks atom so the wake-fn is never called.
+    (p/add-signal store-b wf-id "go" {:source :store-b})
+    (let [r (deref result 2000 :stuck)]
+      (intemporal/shutdown-engine engine)
+      r)))
+
+;; ── In-memory tests (always run) ─────────────────────────────────────────────
+
+(deftest signal-not-delivered-across-in-memory-stores
+  (testing "Two separate InMemoryStore instances do not share callbacks"
+    (let [store-a (mem/->InMemoryStore (atom {}))
+          store-b (mem/->InMemoryStore (atom {}))]
+      (is (= :stuck (run-scenario store-a store-b))
+          "Signal written to store-b; store-a's callback atom is empty → workflow never wakes (bug 1.1)"))))
+
+;; ── JDBC tests (require Postgres) ────────────────────────────────────────────
+
+(deftest ^:integration signal-not-delivered-across-jdbc-stores
+  (testing "Two JdbcStore instances against the same Postgres do not share callbacks"
+    (let [url    (or (System/getenv "DATABASE_URL")
+                     "jdbc:postgresql://localhost:5432/root?user=root&password=root")
+          store-a (jdbc-store/make-jdbc-store url)
+          store-b (jdbc-store/make-jdbc-store url)]
+      (try
+        (is (= :stuck (run-scenario store-a store-b))
+            "Signal row in intemporal_signals; store-b's callbacks atom empty → no wake (bug 1.1)")
+        (finally
+          (.close store-a)
+          (.close store-b))))))
+
+;; ── FDB tests (require FoundationDB) ─────────────────────────────────────────
+
+(deftest ^:integration signal-not-delivered-across-fdb-stores
+  (testing "Two FDBStore instances against the same FoundationDB do not share callbacks"
+    (let [root    (str "bug11-" (random-uuid))
+          fdb     (cfdb/select-api-version 730)
+          db      (.open fdb "docker/fdb.cluster")
+          store-a (fdb-store/make-fdb-store db root)
+          store-b (fdb-store/make-fdb-store db root)]
+      (is (= :stuck (run-scenario store-a store-b))
+          "Signal in FDB; store-b's callbacks atom empty → no wake (bug 1.1)"))))
diff --git a/test/intemporal/tests/jepsen/bug_1_2_test.clj b/test/intemporal/tests/jepsen/bug_1_2_test.clj
new file mode 100644
index 0000000..183f38e
--- /dev/null
+++ b/test/intemporal/tests/jepsen/bug_1_2_test.clj
@@ -0,0 +1,92 @@
+(ns intemporal.tests.jepsen.bug-1-2-test
+  "Bug 1.2 — Concurrent save-events at the same (workflow-id, seq) corrupts history.
+
+  Root cause (improvements.md §1.2):
+    JDBC:  INSERT … ON CONFLICT (workflow_id, seq) DO UPDATE silently overwrites
+           the losing write.  Both callers receive no error, but only one event
+           survives in intemporal_history.  The discarded write is invisible.
+    FDB:   save-events keys events as [seq, uuid], so two concurrent writes at
+           the same seq both survive as separate rows.  load-history returns
+           both, making the history non-deterministic.
+    Mem:   InMemoryStore.save-events appends unconditionally (swap! conj), so
+           duplicate-seq events accumulate in the vector.
+
+  Both outcomes violate the invariant that seq numbers are unique within a
+  workflow's history — breaking deterministic replay.
+
+  These tests assert the CURRENT (buggy) behaviour.  They will fail once the
+  fix from improvements.md §A3 is applied (DO NOTHING + conflict exception)."
+  (:require [clojure.test :refer [deftest is testing]]
+            [intemporal.protocol :as p]
+            [intemporal.store :as mem]
+            [intemporal.store.jdbc :as jdbc-store]
+            [intemporal.store.fdb :as fdb-store]
+            [me.vedang.clj-fdb.FDB :as cfdb]))
+
+;; ── Shared scenario ───────────────────────────────────────────────────────────
+
+(defn- run-scenario
+  "Fires two concurrent writes at seq=0, waits for both, then reads back history.
+  Returns {:writes [result-a result-b] :seq0-count n :seq0-events [...]}."
+  [store]
+  (let [wf-id   (str "bug12-" (random-uuid))
+        event-a {:event-type :workflow-started :seq 0 :writer "thread-a"
+                 :timestamp  (System/currentTimeMillis)}
+        event-b {:event-type :workflow-started :seq 0 :writer "thread-b"
+                 :timestamp  (System/currentTimeMillis)}
+        latch   (promise)
+        fa      (future (deref latch)
+                        (try (p/save-events store wf-id [event-a]) :ok
+                             (catch Exception e {:error (str e)})))
+        fb      (future (deref latch)
+                        (try (p/save-events store wf-id [event-b]) :ok
+                             (catch Exception e {:error (str e)})))]
+    (deliver latch :go)
+    (let [ra @fa
+          rb @fb
+          h  (p/load-history store wf-id)]
+      {:writes       [ra rb]
+       :seq0-count   (count (filter #(= 0 (:seq %)) h))
+       :seq0-events  (filter #(= 0 (:seq %)) h)})))
+
+;; ── In-memory tests (always run) ─────────────────────────────────────────────
+
+(deftest concurrent-seq-write-appends-both-in-memory
+  (testing "InMemoryStore appends both events, producing duplicate seq=0"
+    (let [store  (mem/->InMemoryStore (atom {}))
+          {:keys [writes seq0-count]} (run-scenario store)]
+      (is (every? #{:ok} writes)
+          "Both writes return :ok — no conflict signalled")
+      (is (> seq0-count 1)
+          (str "History has " seq0-count " events at seq=0 — duplicate seq (bug 1.2)")))))
+
+;; ── JDBC tests (require Postgres) ────────────────────────────────────────────
+
+(deftest ^:integration concurrent-seq-write-silently-clobbered-jdbc
+  (testing "JDBC: ON CONFLICT DO UPDATE silently discards one write"
+    (let [url   (or (System/getenv "DATABASE_URL")
+                    "jdbc:postgresql://localhost:5432/root?user=root&password=root")
+          store (jdbc-store/make-jdbc-store url)]
+      (try
+        (let [{:keys [writes seq0-count seq0-events]} (run-scenario store)]
+          (is (every? #{:ok} writes)
+              "Both writes return :ok — DO UPDATE never raises a conflict error")
+          (is (= 1 seq0-count)
+              "Exactly one row at seq=0 — the other write was silently discarded (bug 1.2)")
+          (is (contains? #{"thread-a" "thread-b"} (:writer (first seq0-events)))
+              "Surviving writer is whichever won the race — non-deterministic"))
+        (finally (.close store))))))
+
+;; ── FDB tests (require FoundationDB) ─────────────────────────────────────────
+
+(deftest ^:integration concurrent-seq-write-produces-duplicates-fdb
+  (testing "FDB: UUID-keyed writes store both events at seq=0"
+    (let [root  (str "bug12-" (random-uuid))
+          fdb   (cfdb/select-api-version 730)
+          db    (.open fdb "docker/fdb.cluster")
+          store (fdb-store/make-fdb-store db root)]
+      (let [{:keys [writes seq0-count]} (run-scenario store)]
+        (is (every? #{:ok} writes)
+            "Both writes return :ok")
+        (is (> seq0-count 1)
+            (str "History has " seq0-count " events at seq=0 — duplicate seq (bug 1.2)"))))))
diff --git a/test/intemporal/tests/jepsen/bug_1_3_test.clj b/test/intemporal/tests/jepsen/bug_1_3_test.clj
new file mode 100644
index 0000000..d6a66a0
--- /dev/null
+++ b/test/intemporal/tests/jepsen/bug_1_3_test.clj
@@ -0,0 +1,92 @@
+(ns intemporal.tests.jepsen.bug-1-3-test
+  "Bug 1.3 — No recovery poller: engine restart does not resume suspended workflows.
+
+  Root cause (improvements.md §1.3):
+    There is no background process that scans for workflows requiring execution
+    after a restart.  resume-workflow is on-demand only and requires the caller
+    to supply both the workflow function and original arguments.  A new engine
+    with a fresh store (empty callbacks atom) has no way to discover or re-enter
+    workflows that were suspended before the restart.
+
+  Scenario:
+    1. engine-a starts a workflow that suspends on wait-for-signal.
+    2. engine-a is shut down (simulating a pod crash or rolling restart).
+    3. engine-b is created with a FRESH store instance pointing at the same
+       backing database — exactly what a restarted pod would do.
+    4. The signal is sent via engine-b's store.
+    5. engine-b has no poller: the workflow is never resumed automatically.
+
+  These tests assert the CURRENT (buggy) behaviour.  They will fail once the
+  fix from improvements.md §B3 + §C4 is applied (workflow registry + worker loop)."
+  (:require [clojure.test :refer [deftest is testing]]
+            [intemporal.core :as intemporal]
+            [intemporal.protocol :as p]
+            [intemporal.store :as mem]
+            [intemporal.store.jdbc :as jdbc-store]
+            [intemporal.store.fdb :as fdb-store]
+            [me.vedang.clj-fdb.FDB :as cfdb]))
+
+;; ── Shared workflow ───────────────────────────────────────────────────────────
+
+(defn- wait-signal-wf []
+  (intemporal/wait-for-signal "go")
+  :woke)
+
+;; ── Shared scenario ───────────────────────────────────────────────────────────
+
+(defn- run-scenario
+  "Starts workflow on store-a/engine-a, shuts down engine-a, creates engine-b
+  with a fresh store-b, sends signal via store-b, waits.
+  Returns :stuck if engine-b does not auto-resume the workflow."
+  [make-store-a make-store-b]
+  (let [store-a  (make-store-a)
+        wf-id    (str "bug13-" (random-uuid))
+        result   (promise)
+        engine-a (intemporal/make-workflow-engine :store store-a :threads 2)]
+    (future
+      (try
+        (deliver result (intemporal/start-workflow engine-a wait-signal-wf []
+                                                   :workflow-id wf-id))
+        (catch Exception e (deliver result {:error (str e)}))))
+    ;; Wait for the workflow to register its callback
+    (Thread/sleep 500)
+    ;; Simulate crash: shut down engine-a and discard store-a
+    (intemporal/shutdown-engine engine-a)
+    (when (instance? java.io.Closeable store-a) (.close store-a))
+    ;; Simulate pod restart: new store with empty callbacks atom
+    (let [store-b  (make-store-b)
+          engine-b (intemporal/make-workflow-engine :store store-b :threads 2)]
+      (p/add-signal store-b wf-id "go" {:source :engine-b-restart})
+      (let [r (deref result 2000 :stuck)]
+        (intemporal/shutdown-engine engine-b)
+        (when (instance? java.io.Closeable store-b) (.close store-b))
+        r))))
+
+;; ── In-memory tests (always run) ─────────────────────────────────────────────
+
+(deftest engine-restart-does-not-resume-in-memory
+  (testing "A fresh InMemoryStore after engine restart has empty callbacks"
+    (is (= :stuck (run-scenario #(mem/->InMemoryStore (atom {}))
+                                #(mem/->InMemoryStore (atom {}))))
+        "No recovery poller: workflow stays suspended after engine-a crash + engine-b start (bug 1.3)")))
+
+;; ── JDBC tests (require Postgres) ────────────────────────────────────────────
+
+(deftest ^:integration engine-restart-does-not-resume-jdbc
+  (testing "A fresh JdbcStore after engine restart has empty callbacks atom"
+    (let [url (or (System/getenv "DATABASE_URL")
+                  "jdbc:postgresql://localhost:5432/root?user=root&password=root")]
+      (is (= :stuck (run-scenario #(jdbc-store/make-jdbc-store url)
+                                  #(jdbc-store/make-jdbc-store url)))
+          "Signal row in intemporal_signals; engine-b has no poller to find it (bug 1.3)"))))
+
+;; ── FDB tests (require FoundationDB) ─────────────────────────────────────────
+
+(deftest ^:integration engine-restart-does-not-resume-fdb
+  (testing "A fresh FDBStore after engine restart has empty callbacks atom"
+    (let [root (str "bug13-" (random-uuid))
+          fdb  (cfdb/select-api-version 730)
+          db   (.open fdb "docker/fdb.cluster")]
+      (is (= :stuck (run-scenario #(fdb-store/make-fdb-store db root)
+                                  #(fdb-store/make-fdb-store db root)))
+          "Signal in FDB; engine-b has no poller to find it (bug 1.3)"))))
diff --git a/test/intemporal/tests/jepsen/bug_2_1_test.clj b/test/intemporal/tests/jepsen/bug_2_1_test.clj
new file mode 100644
index 0000000..b30fcfe
--- /dev/null
+++ b/test/intemporal/tests/jepsen/bug_2_1_test.clj
@@ -0,0 +1,112 @@
+(ns intemporal.tests.jepsen.bug-2-1-test
+  "Bug 2.1 — Register-then-consume signal race in process-signal.  REGRESSION GUARD.
+
+  Root cause (improvements.md §2.1) — now FIXED (Phase A1):
+    process-signal previously did consume-check THEN register-callback.  A
+    signal arriving in that window fired into an empty callbacks atom and was
+    lost, stranding the workflow forever.
+
+    The fix (execution.clj/.cljs process-signal) reverses the order: register
+    the callback FIRST, then consume-check.  consume-signal is atomic, so
+    exactly one of {the inline check, the callback} consumes the signal; the
+    callback only wakes if it consumed, so the inline path never double-runs.
+
+  Mechanism:
+    RacingStore (intemporal.tests.jepsen.racing-store) deterministically pins
+    the executing thread at the consume-check and lets the test inject a signal
+    at exactly the adversarial moment.  Because the callback is now registered
+    BEFORE that consume-check, inner.add-signal finds it and fires it — the
+    workflow wakes and completes on every run.
+
+  These tests assert the FIXED behaviour: the workflow wakes, completes, and
+  leaves no orphaned signal.  They will fail again if the race is reintroduced."
+  (:require [clojure.test :refer [deftest is testing]]
+            [intemporal.core :as intemporal]
+            [intemporal.protocol :as p]
+            [intemporal.store :as mem]
+            [intemporal.store.jdbc :as jdbc-store]
+            [intemporal.store.fdb :as fdb-store]
+            [me.vedang.clj-fdb.FDB :as cfdb]
+            [intemporal.tests.jepsen.racing-store :refer [->RacingStore]]))
+
+;; ── Shared workflow ───────────────────────────────────────────────────────────
+
+(defn- wait-signal-wf []
+  (intemporal/wait-for-signal "go")
+  :woke)
+
+;; ── Shared scenario ───────────────────────────────────────────────────────────
+
+(defn- run-scenario
+  "Drives the race against any store via RacingStore.  Returns a map:
+    :result   — the start-workflow result map, or ::timeout if it never woke
+    :pending  — pending-signal names still in the store after the race
+    :status   — workflow status from the store"
+  [inner]
+  (let [gate-nil  (promise)
+        gate-sent (promise)
+        store     (->RacingStore inner gate-nil gate-sent (atom true))
+        wf-id     (str "bug21-" (random-uuid))
+        result    (promise)
+        engine    (intemporal/make-workflow-engine :store store :threads 2)]
+    (future
+      (try
+        (deliver result (intemporal/start-workflow engine wait-signal-wf []
+                                                   :workflow-id wf-id))
+        (catch Exception e (deliver result {:error (str e)}))))
+
+    (let [gate-info (deref gate-nil 5000 ::timeout)]
+      (when (= ::timeout gate-info)
+        (intemporal/shutdown-engine engine)
+        (throw (ex-info "Race gate never opened" {:wf-id wf-id})))
+      ;; Gate open: the callback is already registered (Phase A1).  Inject the
+      ;; signal in the window — inner.add-signal finds the callback and fires it.
+      (p/add-signal inner wf-id "go" {:source :injected-in-race-window})
+      (deliver gate-sent :signal-injected)
+      (let [r       (deref result 3000 ::timeout)
+            pending (p/get-pending-signals inner wf-id)
+            status  (p/get-workflow-status inner wf-id)]
+        (intemporal/shutdown-engine engine)
+        {:result  r
+         ;; Count remaining signal *values*, not keys: InMemoryStore leaves an
+         ;; empty vector under the signal name after consuming, while JDBC/FDB
+         ;; delete the row. Both mean "nothing left to deliver".
+         :pending-count (reduce + 0 (map count (vals pending)))
+         :status  status}))))
+
+(defn- assert-woke [{:keys [result pending-count status]}]
+  (is (not= ::timeout result)
+      "Workflow woke and completed — the in-window signal was delivered (bug 2.1 fixed)")
+  (is (= :completed (:status result))
+      "start-workflow returned a :completed result")
+  (is (zero? pending-count)
+      "No signal left pending — it was consumed exactly once")
+  (is (= :completed status)
+      "Workflow status is :completed"))
+
+;; ── In-memory (always runs) ───────────────────────────────────────────────────
+
+(deftest signal-delivered-in-register-consume-window-in-memory
+  (testing "RacingStore on InMemoryStore: in-window signal wakes the workflow"
+    (assert-woke (run-scenario (mem/->InMemoryStore (atom {}))))))
+
+;; ── JDBC (requires Postgres) ──────────────────────────────────────────────────
+
+(deftest ^:integration signal-delivered-in-register-consume-window-jdbc
+  (testing "RacingStore on JdbcStore: in-window signal wakes the workflow"
+    (let [url   (or (System/getenv "DATABASE_URL")
+                    "jdbc:postgresql://localhost:5432/root?user=root&password=root")
+          inner (jdbc-store/make-jdbc-store url)]
+      (try
+        (assert-woke (run-scenario inner))
+        (finally (.close inner))))))
+
+;; ── FDB (requires FoundationDB) ───────────────────────────────────────────────
+
+(deftest ^:integration signal-delivered-in-register-consume-window-fdb
+  (testing "RacingStore on FDBStore: in-window signal wakes the workflow"
+    (let [root  (str "bug21-" (random-uuid))
+          fdb   (cfdb/select-api-version 730)
+          db    (.open fdb "docker/fdb.cluster")
+          inner (fdb-store/make-fdb-store db root)]
+      (assert-woke (run-scenario inner)))))
diff --git a/test/intemporal/tests/jepsen/bug_2_3_test.clj b/test/intemporal/tests/jepsen/bug_2_3_test.clj
new file mode 100644
index 0000000..157dd47
--- /dev/null
+++ b/test/intemporal/tests/jepsen/bug_2_3_test.clj
@@ -0,0 +1,95 @@
+(ns intemporal.tests.jepsen.bug-2-3-test
+  "Bug 2.3 — Cancellation reaching a workflow sleeping in wait-for-signal.  REGRESSION GUARD.
+
+  Root cause (improvements.md §2.3) — now FIXED (Phase A2):
+    cancel-workflow set the cancelled flag but did nothing to wake a workflow
+    parked on wait-for-signal.  Such a workflow never re-entered its loop, so
+    check-cancelled! never fired and the cancellation was silently ignored —
+    the workflow (and its thread) stayed alive forever.
+
+    The fix adds IStore/wake-workflow plus a generic wake callback registered
+    whenever a workflow suspends (execution.clj/.cljs run-workflow-internal).
+    cancel-workflow now calls mark-cancelled THEN wake-workflow, forcing the
+    sleeper to re-enter, observe the flag at the loop-top cancel check, and
+    finalize.
+
+  These tests assert the FIXED behaviour:
+    • the workflow TERMINATES (start-workflow returns; no longer stuck)
+    • is-cancelled? is true
+    • get-workflow-status is :cancelled
+  They will fail again if cancel stops waking sleepers."
+  (:require [clojure.test :refer [deftest is testing]]
+            [intemporal.core :as intemporal]
+            [intemporal.protocol :as p]
+            [intemporal.store :as mem]
+            [intemporal.store.jdbc :as jdbc-store]
+            [intemporal.store.fdb :as fdb-store]
+            [me.vedang.clj-fdb.FDB :as cfdb]))
+
+;; ── Shared workflow ───────────────────────────────────────────────────────────
+
+(defn- cancel-sleep-wf []
+  (intemporal/wait-for-signal "wake")
+  :woke)
+
+;; ── Shared scenario ───────────────────────────────────────────────────────────
+
+(defn- run-scenario
+  "Starts a workflow that sleeps on a signal, cancels it, and observes whether
+  the cancellation actually terminates it.  Returns
+    :terminated? :cancelled-flag-set? :status."
+  [store]
+  (let [wf-id  (str "bug23-" (random-uuid))
+        result (promise)
+        engine (intemporal/make-workflow-engine :store store :threads 2)]
+    (future
+      (try
+        (deliver result (intemporal/start-workflow engine cancel-sleep-wf []
+                                                   :workflow-id wf-id))
+        (catch Exception e (deliver result {:error (str e)}))))
+    ;; Wait for the workflow to suspend and register its wake callback
+    (Thread/sleep 400)
+    ;; Cancel: sets the flag AND wakes the sleeper (Phase A2)
+    (intemporal/cancel-workflow store wf-id)
+    (let [r      (deref result 2000 :stuck)
+          flag?  (p/is-cancelled? store wf-id)
+          status (p/get-workflow-status store wf-id)]
+      (intemporal/shutdown-engine engine)
+      {:terminated?         (not= :stuck r)
+       :cancelled-flag-set? flag?
+       :status              status})))
+
+(defn- assert-cancelled [{:keys [terminated? cancelled-flag-set? status]}]
+  (is terminated?
+      "Workflow terminated after cancel — wake-workflow forced loop re-entry (bug 2.3 fixed)")
+  (is cancelled-flag-set?
+      "Cancelled flag is set in the store")
+  (is (= :cancelled status)
+      "Workflow status is :cancelled"))
+
+;; ── In-memory (always runs) ───────────────────────────────────────────────────
+
+(deftest cancellation-reaches-sleeping-workflow-in-memory
+  (testing "cancel-workflow terminates a signal-sleeping workflow (InMemoryStore)"
+    (assert-cancelled (run-scenario (mem/->InMemoryStore (atom {}))))))
+
+;; ── JDBC (requires Postgres) ──────────────────────────────────────────────────
+
+(deftest ^:integration cancellation-reaches-sleeping-workflow-jdbc
+  (testing "cancel-workflow terminates a signal-sleeping workflow (JdbcStore)"
+    (let [url   (or (System/getenv "DATABASE_URL")
+                    "jdbc:postgresql://localhost:5432/root?user=root&password=root")
+          store (jdbc-store/make-jdbc-store url)]
+      (try
+        (assert-cancelled (run-scenario store))
+        (finally (.close store))))))
+
+;; ── FDB (requires FoundationDB) ───────────────────────────────────────────────
+
+(deftest ^:integration cancellation-reaches-sleeping-workflow-fdb
+  (testing "cancel-workflow terminates a signal-sleeping workflow (FDBStore)"
+    (let [root  (str "bug23-" (random-uuid))
+          fdb   (cfdb/select-api-version 730)
+          db    (.open fdb "docker/fdb.cluster")
+          store (fdb-store/make-fdb-store db root)]
+      (assert-cancelled (run-scenario store)))))
diff --git a/test/intemporal/tests/jepsen/racing_store.clj b/test/intemporal/tests/jepsen/racing_store.clj
new file mode 100644
index 0000000..04e86c2
--- /dev/null
+++ b/test/intemporal/tests/jepsen/racing_store.clj
@@ -0,0 +1,55 @@
+(ns intemporal.tests.jepsen.racing-store
+  "RacingStore: a store wrapper that deterministically injects the
+  register-then-consume signal race described in improvements.md §2.1.
+
+  The race window in process-signal (execution.clj:227-255) is:
+
+    (1) (p/consume-signal store wf-id name) → nil       ; no signal present
+        ← WINDOW: sender calls add-signal here →
+    (2) (p/register-signal-callback store wf-id name f) ; now registers wake-fn
+
+  add-signal in step (1.5) checks the callbacks atom and finds it empty —
+  the wake-fn fires into nothing.  After step (2) the callback is registered
+  but add-signal has already run; it will never re-fire retroactively.
+  The workflow is permanently stuck with the signal row sitting in the store.
+
+  RacingStore widens and synchronises this window with two promises:
+    gate-nil  — delivered by RacingStore after consume-signal returns nil
+    gate-sent — delivered by the test after add-signal is called
+
+  Usage:
+    (let [gate-nil  (promise)
+          gate-sent (promise)
+          inner     (make-your-store)
+          store     (->RacingStore inner gate-nil gate-sent (atom true))]
+      ;; start workflow using `store` …
+      (deref gate-nil 5000 :timeout)          ; wait: consume-check done
+      (p/add-signal inner wf-id sig payload)  ; inject signal into window
+      (deliver gate-sent :go)                 ; close window, let process-signal continue
+      …)"
+  (:require [intemporal.protocol :as p]))
+
+(defrecord RacingStore [inner gate-nil gate-sent armed?]
+  p/IStore
+  (load-history            [_ wf-id]         (p/load-history inner wf-id))
+  (save-event              [_ wf-id ev]      (p/save-event inner wf-id ev))
+  (save-events             [_ wf-id evs]     (p/save-events inner wf-id evs))
+  (find-event              [_ wf-id et sq]   (p/find-event inner wf-id et sq))
+  (get-pending-signals     [_ wf-id]         (p/get-pending-signals inner wf-id))
+  (add-signal              [_ wf-id sn sd]   (p/add-signal inner wf-id sn sd))
+  (register-signal-callback [_ wf-id sn cb] (p/register-signal-callback inner wf-id sn cb))
+  (unregister-signal-callback [_ wf-id sn]  (p/unregister-signal-callback inner wf-id sn))
+  (register-wake-callback   [_ wf-id cb]    (p/register-wake-callback inner wf-id cb))
+  (wake-workflow            [_ wf-id]       (p/wake-workflow inner wf-id))
+  (is-cancelled?           [_ wf-id]        (p/is-cancelled? inner wf-id))
+  (mark-cancelled          [_ wf-id]        (p/mark-cancelled inner wf-id))
+  (get-workflow-status     [_ wf-id]        (p/get-workflow-status inner wf-id))
+
+  (consume-signal [_ wf-id sig-name]
+    (let [result (p/consume-signal inner wf-id sig-name)]
+      ;; Only intercept the FIRST nil return (armed? guards re-entrant calls).
+      (when (and (nil? result) (compare-and-set! armed? true false))
+        (deliver gate-nil {:wf-id wf-id :sig-name sig-name})
+        ;; Block until the test has injected the signal into the window.
+        (deref gate-sent 5000 :timeout-in-racing-store))
+      result)))

From ea9a385091d80c3065fb5d521e9b34d0c102fd7d Mon Sep 17 00:00:00 2001
From: Miguel Ping <miguel.ping@gmail.com>
Date: Sun, 31 May 2026 18:36:02 +0100
Subject: [PATCH 3/9] phase b

---
 DEVELOPMENT.md                                | 174 +++++++++++++-----
 .../20260531000001-add-status.down.sql        |   3 +
 .../postgres/20260531000001-add-status.up.sql |   7 +
 src/intemporal/core.cljc                      |  79 +++++++-
 .../internal/fns/start_workflow.clj           |  10 +-
 .../internal/workflow_registry.cljc           |  55 ++++++
 src/intemporal/store.cljc                     |  24 ++-
 src/intemporal/store/fdb.clj                  |  45 +++--
 src/intemporal/store/jdbc.clj                 |  18 +-
 test/intemporal/tests/status_test.clj         |  52 ++++++
 .../intemporal/tests/submit_workflow_test.clj |  39 ++++
 .../tests/workflow_registry_test.clj          |  57 ++++++
 12 files changed, 484 insertions(+), 79 deletions(-)
 create mode 100644 resources/migrations/postgres/20260531000001-add-status.down.sql
 create mode 100644 resources/migrations/postgres/20260531000001-add-status.up.sql
 create mode 100644 src/intemporal/internal/workflow_registry.cljc
 create mode 100644 test/intemporal/tests/status_test.clj
 create mode 100644 test/intemporal/tests/submit_workflow_test.clj
 create mode 100644 test/intemporal/tests/workflow_registry_test.clj

diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md
index ae6aa85..7cfd1a0 100644
--- a/DEVELOPMENT.md
+++ b/DEVELOPMENT.md
@@ -1,82 +1,158 @@
-# cljs repl
+# Development Guide
 
-```shell
-npx shadow-cljs watch dev
-```
+This document describes how to set up your development environment and contribute to the project.
 
-Observe browser window open with a message like:
-> Code entered in a browser-repl prompt will be evaluated here.
+## Prerequisites
 
-The, connect and select the appropriate shadow repl.
+- **Java 21+** (uses virtual threads)
+- **Clojure CLI** 1.12+
+- **Node.js** (for ClojureScript tests)
+- **Docker** + **Docker Compose** (for integration tests with PostgreSQL and FoundationDB)
 
-```clojure
-(require '[shadow.cljs.devtools.api :as shadow])
-(shadow/browser-repl)
-```
+## Quick Start
 
-# doc
+```bash
+# Install dependencies and run tests
+bin/kaocha
+```
 
-```shell
-npx shadow-cljs watch doc
-...
-shadow-cljs - HTTP server available at http://localhost:8000
-#open the browser
+## Project Structure
 
-# or
-npx shadow-cljs compile doc
-python -m http.server --directory public
+```
+intemporal/
+├── src/intemporal/        # Main source code
+│   ├── core.cljc          # Public API
+│   ├── protocol.cljc      # Core protocols (IStore, etc.)
+│   ├── store.cljc         # In-memory store
+│   ├── store/             # JDBC and FDB stores
+│   └── internal/          # Internal implementation
+├── test/                  # Tests
+├── dev/                   # Development utilities
+└── resources/migrations/  # Database migrations
 ```
 
-# cljs repl
+## Database Setup
 
+For integration and chaos tests, start the databases:
+
+```bash
+docker compose up -d postgresql foundation
 ```
-clj -A:dev:doc:cljs
-```
 
-# Tests
+- **PostgreSQL** on port 5432 — `jdbc:postgresql://localhost:5432/root?user=root&password=root`
+- **FoundationDB** on port 4500 — cluster file at `docker/fdb.cluster`
+
+Override the Postgres URL with `DATABASE_URL` (kaocha store/integration tests) or
+`POSTGRES_JDBC_URI` (the chaos harness) if your setup differs.
 
-```shell
+## Running Tests
+
+```bash
+# Everything: JVM + ClojureScript
+bin/kaocha
+
+# Fast JVM tests, skips ^:integration (no DB needed)
+bin/kaocha :in-memory
+
+# JVM tests incl. ^:integration (needs PostgreSQL + FoundationDB)
 bin/kaocha :test
+
+# ClojureScript tests (Node)
 bin/kaocha :test-cljs
 
-# or run everything
-bin/run-coverage
+# Focus a single namespace (use hyphens, not underscores)
+bin/kaocha :test --focus intemporal.tests.signal-test
+```
 
-# focusing
-./bin/kaocha :test --focus intemporal.tests.signal-test
+## Jepsen / Chaos Tests
 
-# cljs focus is a bit different
-./bin/kaocha :test-cljs --focus 'cljs:intemporal.tests.signal-test'
+There are **two** distinct things under the "jepsen" name.
 
-```
+### 1. Per-scenario bug guard tests — `test/intemporal/tests/jepsen/`
 
-# CI runs
+Deterministic single-JVM tests, one namespace per known failure mode, each exercising
+InMemory + JDBC + FDB. They double as regression guards: a *fixed* bug's test asserts the
+correct behaviour, an *unfixed* bug's test asserts the buggy behaviour it still exhibits.
 
-Install earthly: https://earthly.dev
+| Namespace | Bug (see `improvements.md`) | State |
+|---|---|---|
+| `bug-1-1-test` | Lost wake on signal across pods | buggy (Phase C) |
+| `bug-1-2-test` | Concurrent same-seq write corruption | buggy (Phase C) |
+| `bug-1-3-test` | No recovery poller on restart | buggy (Phase C) |
+| `bug-2-1-test` | Register-then-consume signal race | **fixed** (Phase A) |
+| `bug-2-3-test` | Cancel can't reach a sleeper | **fixed** (Phase A) |
 
+```bash
+# in-memory variants only (no DB)
+bin/kaocha :in-memory --focus intemporal.tests.jepsen.bug-2-1-test \
+                      --focus intemporal.tests.jepsen.bug-2-3-test
+
+# all three stores (start PG + FDB first)
+docker compose up -d postgresql foundation
+bin/kaocha :test --focus intemporal.tests.jepsen.bug-1-1-test \
+                 --focus intemporal.tests.jepsen.bug-1-2-test \
+                 --focus intemporal.tests.jepsen.bug-1-3-test \
+                 --focus intemporal.tests.jepsen.bug-2-1-test \
+                 --focus intemporal.tests.jepsen.bug-2-3-test
 ```
-earthly -P -i +test
-```
 
-# Check FDB is working for your architecture
+`racing_store.clj` is a shared `IStore` wrapper that pins the executing thread inside the
+signal consume/register window so `bug-2-1` reproduces its race 100% deterministically.
+
+### 2. Forked-JVM chaos harness — `test/intemporal/jepsen/`
+
+Boots N worker JVMs against one Postgres, drives a submit/signal/cancel generator and a
+nemesis that SIGKILL/SIGTERMs and restarts workers, then checks invariants after a quiesce
+phase. This is the integration vehicle for the Phase C multi-pod work. Full design:
+[test/intemporal/jepsen/README.md](test/intemporal/jepsen/README.md).
+
+```bash
+docker compose up -d postgresql
+
+# default chaos run: 4 workers, 120s active, 90s grace
+clojure -X:dev:jdbc:jepsen intemporal.jepsen.runner/run :workers 4 :duration 120
 
-```shell
+# no-kill baseline (should pass all checkers)
+clojure -X:dev:jdbc:jepsen intemporal.jepsen.runner/run :workers 4 :duration 60 :no-kill true
 
-$ JAVA_OPTS="-DFDB_LIBRARY_PATH_FDB_C=/usr/local/lib/libfdb_c.dylib -DFDB_LIBRARY_PATH_FDB_JAVA=/usr/local/lib/libfdb_java.jnilib" clj -A:fdb:jdbc
+# aggressive
+clojure -X:dev:jdbc:jepsen intemporal.jepsen.runner/run \
+  :workers 6 :duration 180 :nemesis-min-ms 1500 :nemesis-jitter-ms 3000 :min-alive 1 :grace-s 120
+```
+
+The runner forks workers via the `:jepsen-worker` alias; both `:jepsen` and `:jepsen-worker`
+are defined in `deps.edn`. The Postgres URL comes from `POSTGRES_JDBC_URI` (default localhost).
+
+### Standalone bug reproducer
 
-(import 'com.apple.foundationdb.JNIUtil)
-(let [method (.getDeclaredMethod com.apple.foundationdb.JNIUtil "loadLibrary" (into-array Class [String]))]
-  (.setAccessible method true)
-  (.invoke method com.apple.foundationdb.JNIUtil (object-array ["fdb_java"]))
-  (.invoke method com.apple.foundationdb.JNIUtil (object-array ["fdb_c"])))
+`dev/verify_bugs.clj` runs all five scenarios against JDBC + FDB and prints a pass/fail
+report — a quick end-to-end smoke check:
 
+```bash
+clojure -X:dev:jdbc:fdb verify-bugs/run
 ```
 
-# Telemetry
+### Known flaky test
 
-# Get the OT javaagent
+`intemporal.tests.replay-check-test/test-log-once-workflow` can fail under full-suite load
+(`run-once` persists its dedup marker lazily; parallel `async`/`join-all` can re-run the
+thunk). It is **pre-existing** (reproduces on pre-Phase-A commits) and unrelated to the
+signal/cancel work. It passes reliably in isolation.
 
-```shell
-wget --content-disposition https://github.com/open-telemetry/opentelemetry-java-instrumentation/releases/download/v2.21.0/opentelemetry-javaagent.jar
+## REPL Development
+
+```bash
+clojure -A:dev            # REPL with dev + test deps
+clojure -A:dev:jdbc       # + PostgreSQL/JDBC
+clojure -A:dev:fdb        # + FoundationDB
+clojure -M:nrepl          # nREPL server on port 7888
 ```
-Run with the `dev` profile to activate the java agent.
\ No newline at end of file
+
+## Code Style
+
+- Follow standard Clojure conventions
+- Use `kebab-case` for functions and variables
+- Keep functions small and focused
+- Write tests for new functionality
+- File names use underscores (`signal_test.clj`); namespaces use hyphens (`signal-test`)
+- Always pass `--color=never` to `grep`
diff --git a/resources/migrations/postgres/20260531000001-add-status.down.sql b/resources/migrations/postgres/20260531000001-add-status.down.sql
new file mode 100644
index 0000000..a4cb13d
--- /dev/null
+++ b/resources/migrations/postgres/20260531000001-add-status.down.sql
@@ -0,0 +1,3 @@
+DROP INDEX IF EXISTS idx_intemporal_workflows_status;
+--;;
+ALTER TABLE intemporal_workflows DROP COLUMN IF EXISTS status;
diff --git a/resources/migrations/postgres/20260531000001-add-status.up.sql b/resources/migrations/postgres/20260531000001-add-status.up.sql
new file mode 100644
index 0000000..96a418a
--- /dev/null
+++ b/resources/migrations/postgres/20260531000001-add-status.up.sql
@@ -0,0 +1,7 @@
+-- Phase B2: O(1) workflow status. Avoids scanning intemporal_history to derive
+-- the current status, and gives the Phase C recovery poller a cheap predicate.
+ALTER TABLE intemporal_workflows
+    ADD COLUMN IF NOT EXISTS status TEXT NOT NULL DEFAULT 'running';
+--;;
+CREATE INDEX IF NOT EXISTS idx_intemporal_workflows_status
+    ON intemporal_workflows (status);
diff --git a/src/intemporal/core.cljc b/src/intemporal/core.cljc
index c0a48ac..724f5d5 100644
--- a/src/intemporal/core.cljc
+++ b/src/intemporal/core.cljc
@@ -6,6 +6,7 @@
             [intemporal.internal.execution :as exec]
             [intemporal.internal.logging :as log]
             [intemporal.internal.fns.start-workflow :as sw]
+            [intemporal.internal.workflow-registry :as wreg]
             [intemporal.protocol :as p]
             [intemporal.store :as store]
             [intemporal.observer :as obs]
@@ -363,6 +364,50 @@
   [engine workflow-fn args & opts]
   (apply sw/start-workflow engine workflow-fn args opts))
 
+#?(:clj
+   (defn submit-workflow
+     "Start a workflow asynchronously and return {:workflow-id id} immediately,
+      without blocking the caller until completion (improvements.md §B4). The
+      workflow runs on a background thread; use await-workflow to wait for the
+      result, or resume-workflow/get-workflow-status to observe it later.
+
+      Accepts the same options as start-workflow (:workflow-id, :observer, …)."
+     [engine workflow-fn args & opts]
+     (let [m     (apply hash-map opts)
+           wid   (or (:workflow-id m) (str (random-uuid)))
+           opts' (mapcat identity (assoc m :workflow-id wid))]
+       (future
+         (try
+           (apply sw/start-workflow engine workflow-fn args opts')
+           (catch Throwable t
+             (log/warnf t "submit-workflow background run failed"))))
+       {:workflow-id wid})))
+
+#?(:clj
+   (defn await-workflow
+     "Block until the workflow reaches a terminal state (:completed, :failed,
+      :cancelled) and return {:status … :result …}. Polls get-workflow-status;
+      a workflow id that is briefly :not-found (still starting) is tolerated.
+      Returns {:status :timeout} if the deadline elapses first."
+     [{:keys [store]} workflow-id & {:keys [poll-ms timeout-ms]
+                                     :or   {poll-ms 50 timeout-ms 30000}}]
+     (let [deadline (+ (System/currentTimeMillis) timeout-ms)]
+       (loop []
+         (let [st (p/get-workflow-status store workflow-id)]
+           (cond
+             (#{:completed :failed :cancelled} st)
+             {:status st
+              :result (->> (p/load-history store workflow-id)
+                           (filter #(= :workflow-completed (:event-type %)))
+                           first
+                           :result)}
+
+             (> (System/currentTimeMillis) deadline)
+             {:status :timeout :workflow-id workflow-id}
+
+             :else
+             (do (Thread/sleep (long poll-ms)) (recur))))))))
+
 (defn resume-workflow
   "Resume a waiting workflow (e.g., after signal delivery or timer).
 
@@ -379,15 +424,31 @@
    Options:
    - :observer - IWorkflowObserver
    - :max-iterations - Maximum replay iterations"
-  [{:keys [store executor scheduler registry] :as engine} workflow-id workflow-fn args
-   & {:keys [observer max-iterations]
-      :or {max-iterations 1000}}]
-  (when observer
-    (p/on-workflow-resumed observer workflow-id))
-  (log/info "Workflow resumed")
-  (exec/run-workflow-internal engine workflow-id workflow-fn args
-                         {:observer observer
-                          :max-iterations max-iterations}))
+  ([{:keys [store] :as engine} workflow-id]
+   ;; Resolve fn + args from the :workflow-started event via the workflow
+   ;; registry (improvements.md §B3). Requires the workflow fn to have been
+   ;; registered in this process (start-workflow does so automatically; a
+   ;; restarted/other process must register its workflow vars at startup).
+   (let [history (p/load-history store workflow-id)
+         started (first (filter #(= :workflow-started (:event-type %)) history))]
+     (when-not started
+       (throw (ex-info "Cannot resume: no :workflow-started event in history"
+                       {:workflow-id workflow-id})))
+     (let [wf-name (:workflow-fn-name started)
+           wf-fn   (wreg/resolve-workflow wf-name)]
+       (when-not wf-fn
+         (throw (ex-info "Cannot resume: workflow function not registered"
+                         {:workflow-id workflow-id :workflow-fn-name wf-name})))
+       (resume-workflow engine workflow-id wf-fn (vec (:args started))))))
+  ([{:keys [store executor scheduler registry] :as engine} workflow-id workflow-fn args
+    & {:keys [observer max-iterations]
+       :or {max-iterations 1000}}]
+   (when observer
+     (p/on-workflow-resumed observer workflow-id))
+   (log/info "Workflow resumed")
+   (exec/run-workflow-internal engine workflow-id workflow-fn args
+                          {:observer observer
+                           :max-iterations max-iterations})))
 
 
 (defn send-signal
diff --git a/src/intemporal/internal/fns/start_workflow.clj b/src/intemporal/internal/fns/start_workflow.clj
index 3bea34d..d1e3c06 100644
--- a/src/intemporal/internal/fns/start_workflow.clj
+++ b/src/intemporal/internal/fns/start_workflow.clj
@@ -2,6 +2,7 @@
   (:require [intemporal.internal.execution :as exec]
             [intemporal.internal.logging :as log]
             [intemporal.internal.activity :as a]
+            [intemporal.internal.workflow-registry :as wreg]
             [intemporal.protocol :as p]
             [intemporal.utils :as utils])
   (:import [java.util.concurrent LinkedBlockingQueue]))
@@ -50,9 +51,14 @@
                                                                   (when observer
                                                                     (p/on-workflow-resumed observer wf-id))
                                                                   (.offer wake-q :wake))}))]
-    (log/with-mdc {:workflow-id wf-id}
+    ;; Record the workflow function under its stable name so the workflow can be
+    ;; resumed later by id alone (resume-workflow [engine wf-id]); the name is
+    ;; stored in the :workflow-started event below. (improvements.md §B3)
+    (let [wf-name (wreg/register-workflow! workflow-fn)]
+     (log/with-mdc {:workflow-id wf-id}
       (p/save-event store wf-id {:event-type :workflow-started
                                  :workflow-id wf-id
+                                 :workflow-fn-name wf-name
                                  :args (vec args)
                                  :timestamp (utils/current-time-ms)})
       (when observer
@@ -74,4 +80,4 @@
             result))
         (catch Exception e
           (log/warnf e "Caught exception")
-          (throw e))))))
+          (throw e)))))))
diff --git a/src/intemporal/internal/workflow_registry.cljc b/src/intemporal/internal/workflow_registry.cljc
new file mode 100644
index 0000000..d270c1b
--- /dev/null
+++ b/src/intemporal/internal/workflow_registry.cljc
@@ -0,0 +1,55 @@
+(ns intemporal.internal.workflow-registry
+  "Maps a stable workflow name -> workflow function so a workflow can be resumed
+   knowing only its id (the name + args are recorded in the :workflow-started
+   event). This is what lets a restarted process — or, in a multi-pod
+   deployment, a different pod — resume a workflow it did not itself start
+   (improvements.md §B3, load-bearing for the Phase C worker loop).
+
+   The registry is a process-global atom: each process registers the workflow
+   functions it can resolve (Temporal's model). start-workflow auto-registers
+   the function it is given, which covers same-process resume; for cross-process
+   resume the application must register its workflow vars at startup."
+  #?(:cljs (:require [clojure.string :as str])))
+
+(defonce ^{:doc "Process-global name -> workflow-fn registry."}
+  registry
+  (atom {}))
+
+(defn workflow-name
+  "Stable string name for a workflow function (a var or a top-level fn)."
+  [f]
+  #?(:clj
+     (if (var? f)
+       (subs (str f) 2)                      ; #'ns/name -> "ns/name"
+       (clojure.lang.Compiler/demunge        ; ns$fn_name -> "ns/fn-name"
+        (.getName (class f))))
+     :cljs
+     (if-let [raw (and (fn? f) (.-name f))]
+       (if (str/blank? raw)
+         (str f)
+         (let [parts (str/split raw #"\$")]
+           (if (> (count parts) 1)
+             (str (str/join "." (map #(str/replace % "_" "-") (butlast parts)))
+                  "/"
+                  (str/replace (last parts) "_" "-"))
+             (str/replace raw "_" "-"))))
+       (str f))))
+
+(defn register-workflow!
+  "Register a workflow function under its derived name (or an explicit name).
+   Accepts a var or a fn. Returns the name used."
+  ([f] (register-workflow! (workflow-name f) f))
+  ([name f]
+   (let [resolved (if (var? f) #?(:clj @f :cljs f) f)]
+     (swap! registry assoc name resolved)
+     name)))
+
+(defn resolve-workflow
+  "Return the registered workflow fn for `name`, or nil."
+  [name]
+  (get @registry name))
+
+(defn clear-registry!
+  "Test helper: empties the global registry."
+  []
+  (reset! registry {}))
diff --git a/src/intemporal/store.cljc b/src/intemporal/store.cljc
index 874b428..14a1fb8 100644
--- a/src/intemporal/store.cljc
+++ b/src/intemporal/store.cljc
@@ -11,14 +11,29 @@
     (get-in @state [:workflows workflow-id :history] []))
 
   (save-event [_ workflow-id event]
-    (swap! state update-in [:workflows workflow-id :history]
-           (fnil conj []) event)
+    (swap! state
+           (fn [s]
+             (let [s (update-in s [:workflows workflow-id :history] (fnil conj []) event)]
+               (case (:event-type event)
+                 :workflow-completed (assoc-in s [:workflows workflow-id :status] :completed)
+                 :workflow-failed    (assoc-in s [:workflows workflow-id :status] :failed)
+                 s))))
     event)
 
   (save-events [_ workflow-id events]
     (when (seq events)
-      (swap! state update-in [:workflows workflow-id :history]
-             (fnil into []) events))
+      (swap! state
+             (fn [s]
+               (let [s    (update-in s [:workflows workflow-id :history] (fnil into []) events)
+                     ;; Phase B2: cache terminal status for O(1) reads.
+                     term (some #(case (:event-type %)
+                                   :workflow-completed :completed
+                                   :workflow-failed    :failed
+                                   nil)
+                                events)]
+                 (if term
+                   (assoc-in s [:workflows workflow-id :status] term)
+                   s)))))
     events)
 
   (find-event [this worfklow-id event-type seq-num]
@@ -78,6 +93,7 @@
     (let [wf (get-in @state [:workflows workflow-id])]
       (cond
         (:cancelled wf) :cancelled
+        (#{:completed :failed} (:status wf)) (:status wf)   ; Phase B2 O(1) fast path
         (empty? (:history wf)) :not-found
         :else (let [last-event (last (:history wf))]
                 (case (:event-type last-event)
diff --git a/src/intemporal/store/fdb.clj b/src/intemporal/store/fdb.clj
index 65a73ef..04aa4da 100644
--- a/src/intemporal/store/fdb.clj
+++ b/src/intemporal/store/fdb.clj
@@ -47,21 +47,35 @@
   (save-event [_ workflow-id event]
     (let [history-sub (fsub/get root-subspace (->tuple ["history" workflow-id]))
           seq-num (:seq event (System/currentTimeMillis))
-          key (->tuple [seq-num (str (java.util.UUID/randomUUID))])]
+          key (->tuple [seq-num (str (java.util.UUID/randomUUID))])
+          term (case (:event-type event)
+                 :workflow-completed "completed"
+                 :workflow-failed    "failed"
+                 nil)]
       (ftr/run db
         (fn [tx]
-          (fdb-core/set tx history-sub key (->bytes event))))
+          (fdb-core/set tx history-sub key (->bytes event))
+          ;; Phase B2: cache terminal status for O(1) reads.
+          (when term
+            (fdb-core/set tx root-subspace (->tuple ["state" workflow-id "status"]) (->bytes term)))))
       event))
 
   (save-events [_ workflow-id events]
     (when (seq events)
-      (let [history-sub (fsub/get root-subspace (->tuple ["history" workflow-id]))]
+      (let [history-sub (fsub/get root-subspace (->tuple ["history" workflow-id]))
+            term        (some #(case (:event-type %)
+                                 :workflow-completed "completed"
+                                 :workflow-failed    "failed"
+                                 nil)
+                              events)]
         (ftr/run db
           (fn [tx]
             (doseq [event events]
               (let [seq-num (:seq event (System/currentTimeMillis))
                     key (->tuple [seq-num (str (java.util.UUID/randomUUID))])]
-                (fdb-core/set tx history-sub key (->bytes event))))))))
+                (fdb-core/set tx history-sub key (->bytes event))))
+            (when term
+              (fdb-core/set tx root-subspace (->tuple ["state" workflow-id "status"]) (->bytes term)))))))
     events)
 
   (find-event [this workflow-id event-type seq-num]
@@ -132,14 +146,21 @@
   (get-workflow-status [this workflow-id]
     (if (p/is-cancelled? this workflow-id)
       :cancelled
-      (let [history (p/load-history this workflow-id)]
-        (if (empty? history)
-          :not-found
-          (let [last-event (last history)]
-            (case (:event-type last-event)
-              :workflow-completed :completed
-              :workflow-failed :failed
-              :running)))))))
+      ;; Phase B2 fast path: terminal status cached at ["state" id "status"].
+      (let [cached (<-bytes (ftr/run db
+                              (fn [tx]
+                                (fdb-core/get tx root-subspace
+                                              (->tuple ["state" workflow-id "status"])))))]
+        (if (#{"completed" "failed"} cached)
+          (keyword cached)
+          (let [history (p/load-history this workflow-id)]
+            (if (empty? history)
+              :not-found
+              (let [last-event (last history)]
+                (case (:event-type last-event)
+                  :workflow-completed :completed
+                  :workflow-failed :failed
+                  :running)))))))))
 
 (defn make-fdb-store [db subspace-name]
   (let [root (fsub/create (->tuple [subspace-name]))]
diff --git a/src/intemporal/store/jdbc.clj b/src/intemporal/store/jdbc.clj
index b65a02d..fd1a4f5 100644
--- a/src/intemporal/store/jdbc.clj
+++ b/src/intemporal/store/jdbc.clj
@@ -103,7 +103,15 @@
             (jdbc/execute! tx ["INSERT INTO intemporal_history (workflow_id, seq, event_type, data)
                                 VALUES (?, ?, ?, ?)
                                 ON CONFLICT (workflow_id, seq) DO UPDATE SET event_type = EXCLUDED.event_type, data = EXCLUDED.data"
-                               workflow-id seq-num event-type data])))))
+                               workflow-id seq-num event-type data])))
+        ;; Phase B2: maintain the O(1) status column on terminal events.
+        (when-let [term (some (fn [e] (case (:event-type e)
+                                        :workflow-completed "completed"
+                                        :workflow-failed    "failed"
+                                        nil))
+                              events)]
+          (jdbc/execute! tx ["UPDATE intemporal_workflows SET status = ? WHERE id = ?"
+                             term workflow-id]))))
     events)
 
   (find-event [_ workflow-id event-type seq-num]
@@ -170,11 +178,15 @@
 
   (get-workflow-status [this workflow-id]
     (let [wf-row (jdbc/execute-one! datasource
-                                    ["SELECT cancelled FROM intemporal_workflows WHERE id = ?"
-                                     workflow-id])]
+                                    ["SELECT cancelled, status FROM intemporal_workflows WHERE id = ?"
+                                     workflow-id])
+          status (:intemporal_workflows/status wf-row)]
       (cond
         (nil? wf-row) :not-found
         (:intemporal_workflows/cancelled wf-row) :cancelled
+        ;; Phase B2 fast path: terminal status is cached in the column (O(1)).
+        (#{"completed" "failed"} status) (keyword status)
+        ;; Otherwise (running / pre-migration) derive from history as before.
         :else (let [history (p/load-history this workflow-id)]
                 (if (empty? history)
                   :not-found
diff --git a/test/intemporal/tests/status_test.clj b/test/intemporal/tests/status_test.clj
new file mode 100644
index 0000000..56da8b6
--- /dev/null
+++ b/test/intemporal/tests/status_test.clj
@@ -0,0 +1,52 @@
+(ns intemporal.tests.status-test
+  "Phase B2 — get-workflow-status reflects lifecycle via the cached status
+  column/key (O(1) for terminal states), across InMemory + JDBC + FDB."
+  (:require [clojure.test :refer [deftest is testing]]
+            [intemporal.core :as intemporal]
+            [intemporal.protocol :as p]
+            [intemporal.store :as store]
+            [intemporal.store.jdbc :as jdbc-store]
+            [intemporal.store.fdb :as fdb-store]
+            [me.vedang.clj-fdb.FDB :as cfdb]))
+
+(defn dbl [x] (* x 2))
+(defn done-wf [x] (let [a (intemporal/stub #'dbl)] (a x)))
+(defn sleep-wf [] (intemporal/wait-for-signal "go"))
+
+(defn- check-status [store]
+  ;; unknown id
+  (is (= :not-found (p/get-workflow-status store (str (random-uuid)))))
+  ;; completed (terminal -> cached fast path)
+  (let [e (intemporal/make-workflow-engine :store store :threads 2)]
+    (try
+      (let [{:keys [workflow-id]} (intemporal/submit-workflow e done-wf [21])]
+        (is (= {:status :completed :result 42}
+               (intemporal/await-workflow e workflow-id :timeout-ms 5000)))
+        (is (= :completed (p/get-workflow-status store workflow-id))))
+      ;; cancelled (flag precedence)
+      (let [wid (str "cancel-" (random-uuid))
+            f   (future (intemporal/start-workflow e sleep-wf [] :workflow-id wid))]
+        (Thread/sleep 300)
+        (intemporal/cancel-workflow store wid)
+        @f
+        (is (= :cancelled (p/get-workflow-status store wid))))
+      (finally (intemporal/shutdown-engine e)))))
+
+(deftest status-in-memory
+  (testing "status lifecycle on InMemoryStore"
+    (check-status (store/->InMemoryStore (atom {})))))
+
+(deftest ^:integration status-jdbc
+  (testing "status lifecycle on JdbcStore"
+    (let [url   (or (System/getenv "DATABASE_URL")
+                    "jdbc:postgresql://localhost:5432/root?user=root&password=root")
+          store (jdbc-store/make-jdbc-store url)]
+      (try (check-status store) (finally (.close store))))))
+
+(deftest ^:integration status-fdb
+  (testing "status lifecycle on FDBStore"
+    (let [root  (str "status-" (random-uuid))
+          fdb   (cfdb/select-api-version 730)
+          db    (.open fdb "docker/fdb.cluster")
+          store (fdb-store/make-fdb-store db root)]
+      (check-status store))))
diff --git a/test/intemporal/tests/submit_workflow_test.clj b/test/intemporal/tests/submit_workflow_test.clj
new file mode 100644
index 0000000..bd6636a
--- /dev/null
+++ b/test/intemporal/tests/submit_workflow_test.clj
@@ -0,0 +1,39 @@
+(ns intemporal.tests.submit-workflow-test
+  "Phase B4 — async submit-workflow + await-workflow.
+
+  submit-workflow returns {:workflow-id …} immediately (the workflow runs on a
+  background thread); await-workflow blocks until the workflow reaches a
+  terminal state and returns its result."
+  (:require [clojure.test :refer [deftest is testing]]
+            [intemporal.core :as intemporal]
+            [intemporal.store :as store]))
+
+(defn dbl [x] (* x 2))
+
+(defn submit-wf [x]
+  (let [a (intemporal/stub #'dbl)]
+    (a x)))
+
+(deftest submit-returns-id-then-await-completes
+  (testing "submit-workflow returns an id immediately; await-workflow yields the result"
+    (let [st (store/->InMemoryStore (atom {}))
+          e  (intemporal/make-workflow-engine :store st :threads 2)]
+      (try
+        (let [{:keys [workflow-id]} (intemporal/submit-workflow e submit-wf [21])]
+          (is (string? workflow-id) "submit-workflow returns a workflow-id immediately")
+          (let [r (intemporal/await-workflow e workflow-id :timeout-ms 5000)]
+            (is (= :completed (:status r)) "await sees the workflow reach terminal state")
+            (is (= 42 (:result r)) "21*2 = 42")))
+        (finally (intemporal/shutdown-engine e))))))
+
+(deftest submit-honours-explicit-id
+  (testing "submit-workflow uses a caller-supplied :workflow-id"
+    (let [st (store/->InMemoryStore (atom {}))
+          e  (intemporal/make-workflow-engine :store st :threads 2)]
+      (try
+        (let [{:keys [workflow-id]} (intemporal/submit-workflow e submit-wf [50]
+                                                                :workflow-id "explicit-1")]
+          (is (= "explicit-1" workflow-id))
+          (is (= {:status :completed :result 100}
+                 (intemporal/await-workflow e "explicit-1" :timeout-ms 5000))))
+        (finally (intemporal/shutdown-engine e))))))
diff --git a/test/intemporal/tests/workflow_registry_test.clj b/test/intemporal/tests/workflow_registry_test.clj
new file mode 100644
index 0000000..6e501a1
--- /dev/null
+++ b/test/intemporal/tests/workflow_registry_test.clj
@@ -0,0 +1,57 @@
+(ns intemporal.tests.workflow-registry-test
+  "Phase B3 — workflow registry + resume-by-id.
+
+  Verifies that resume-workflow [engine workflow-id] (no fn, no args) can resolve
+  both the workflow function and its original arguments from the :workflow-started
+  event via the process-global registry, and resume to completion without
+  re-running already-completed activities."
+  (:require [clojure.test :refer [deftest is testing]]
+            [intemporal.core :as intemporal]
+            [intemporal.protocol :as p]
+            [intemporal.store :as store]
+            [intemporal.internal.workflow-registry :as wreg]))
+
+(def exec-count (atom 0))
+
+(defn reg-activity [x]
+  (swap! exec-count inc)
+  (* x 2))
+
+(defn reg-workflow [a b]
+  (let [act (intemporal/stub #'reg-activity)
+        r1  (act a)]
+    (intemporal/wait-for-signal "go")
+    (+ r1 (act b))))
+
+(deftest registry-basic-ops
+  (testing "register-workflow! / resolve-workflow / clear-registry!"
+    (wreg/clear-registry!)
+    (let [nm (wreg/register-workflow! #'reg-workflow)]
+      (is (= "intemporal.tests.workflow-registry-test/reg-workflow" nm))
+      (is (= @#'reg-workflow (wreg/resolve-workflow nm)))
+      (wreg/clear-registry!)
+      (is (nil? (wreg/resolve-workflow nm))))))
+
+(deftest resume-by-id-resolves-fn-and-args
+  (testing "resume-workflow [engine wf-id] resolves fn+args from history"
+    (reset! exec-count 0)
+    (wreg/clear-registry!)
+    (let [st  (store/->InMemoryStore (atom {}))
+          wid "reg-resume-1"]
+      ;; Phase 1: start, run until it suspends on signal, then simulate a crash.
+      (let [e1 (intemporal/make-workflow-engine :store st :threads 2)
+            f1 (future (intemporal/start-workflow e1 reg-workflow [10 5]
+                                                  :workflow-id wid))]
+        (Thread/sleep 300)
+        (future-cancel f1)
+        (intemporal/shutdown-engine e1))
+      (is (= 1 @exec-count) "only the first activity ran before suspension")
+      ;; Phase 2: fresh engine, deliver signal, resume BY ID ONLY.
+      (let [e2 (intemporal/make-workflow-engine :store st :threads 2)]
+        (intemporal/send-signal st wid "go" {})
+        (let [r (intemporal/resume-workflow e2 wid)]   ; no fn, no args
+          (is (= :completed (:status r)) "resumed-by-id workflow completes")
+          (is (= 30 (:result r)) "10*2 + 5*2 = 30")
+          (is (= 2 @exec-count)
+              "second activity ran once on resume; first not re-executed"))
+        (intemporal/shutdown-engine e2)))))

From 30f4a522308600fdedb9605addca1d74e67c6e64 Mon Sep 17 00:00:00 2001
From: Miguel Ping <miguel.ping@gmail.com>
Date: Sun, 31 May 2026 19:16:02 +0100
Subject: [PATCH 4/9] lease fix

---
 .../postgres/20260531000002-multipod.down.sql |   7 +
 .../postgres/20260531000002-multipod.up.sql   |  23 +++
 src/intemporal/core.cljc                      |  58 ++++++++
 src/intemporal/internal/error.cljc            |  19 +++
 src/intemporal/internal/lease.cljc            |  10 ++
 src/intemporal/protocol.cljc                  |  21 ++-
 src/intemporal/store.cljc                     |  88 +++++++++++-
 src/intemporal/store/fdb.clj                  | 109 ++++++++++++--
 src/intemporal/store/jdbc.clj                 |  82 ++++++++++-
 test/intemporal/tests/jepsen/bug_1_1_test.clj | 116 +++++++--------
 test/intemporal/tests/jepsen/bug_1_2_test.clj | 118 +++++++--------
 test/intemporal/tests/jepsen/bug_1_3_test.clj | 136 +++++++++---------
 test/intemporal/tests/jepsen/racing_store.clj |   6 +
 test/intemporal/tests/worker_test.clj         | 114 +++++++++++++++
 14 files changed, 687 insertions(+), 220 deletions(-)
 create mode 100644 resources/migrations/postgres/20260531000002-multipod.down.sql
 create mode 100644 resources/migrations/postgres/20260531000002-multipod.up.sql
 create mode 100644 src/intemporal/internal/lease.cljc
 create mode 100644 test/intemporal/tests/worker_test.clj

diff --git a/resources/migrations/postgres/20260531000002-multipod.down.sql b/resources/migrations/postgres/20260531000002-multipod.down.sql
new file mode 100644
index 0000000..b2e2f72
--- /dev/null
+++ b/resources/migrations/postgres/20260531000002-multipod.down.sql
@@ -0,0 +1,7 @@
+DROP INDEX IF EXISTS idx_intemporal_runnable_claim;
+--;;
+DROP TABLE IF EXISTS intemporal_runnable;
+--;;
+ALTER TABLE intemporal_workflows
+    DROP COLUMN IF EXISTS owner_id,
+    DROP COLUMN IF EXISTS lease_until;
diff --git a/resources/migrations/postgres/20260531000002-multipod.up.sql b/resources/migrations/postgres/20260531000002-multipod.up.sql
new file mode 100644
index 0000000..733eba0
--- /dev/null
+++ b/resources/migrations/postgres/20260531000002-multipod.up.sql
@@ -0,0 +1,23 @@
+-- Phase C: multi-pod safety primitives.
+
+-- C1: lease / ownership. A worker claims a workflow before executing it; every
+-- save-events validates the lease so a worker that lost ownership cannot keep
+-- writing (closes the silent concurrent-execution path, bug 1.2).
+ALTER TABLE intemporal_workflows
+    ADD COLUMN IF NOT EXISTS owner_id    TEXT,
+    ADD COLUMN IF NOT EXISTS lease_until TIMESTAMPTZ;
+--;;
+-- C3: durable runnable markers. Whenever a workflow needs attention (a signal
+-- arrived, it was cancelled, a timer fired) a marker is written here. Workers
+-- poll this table instead of relying on a process-local callback (closes the
+-- lost-wake-across-pods path, bug 1.1). PRIMARY KEY collapses duplicates: a
+-- workflow is either runnable or it isn't.
+CREATE TABLE IF NOT EXISTS intemporal_runnable (
+    workflow_id   TEXT PRIMARY KEY REFERENCES intemporal_workflows(id) ON DELETE CASCADE,
+    reason        TEXT,
+    enqueued_at   TIMESTAMPTZ NOT NULL DEFAULT now(),
+    claimed_until TIMESTAMPTZ NOT NULL DEFAULT to_timestamp(0)
+);
+--;;
+CREATE INDEX IF NOT EXISTS idx_intemporal_runnable_claim
+    ON intemporal_runnable (claimed_until, enqueued_at);
diff --git a/src/intemporal/core.cljc b/src/intemporal/core.cljc
index 724f5d5..832b19d 100644
--- a/src/intemporal/core.cljc
+++ b/src/intemporal/core.cljc
@@ -7,6 +7,7 @@
             [intemporal.internal.logging :as log]
             [intemporal.internal.fns.start-workflow :as sw]
             [intemporal.internal.workflow-registry :as wreg]
+            [intemporal.internal.lease :as lease]
             [intemporal.protocol :as p]
             [intemporal.store :as store]
             [intemporal.observer :as obs]
@@ -450,6 +451,63 @@
                           {:observer observer
                            :max-iterations max-iterations})))
 
+#?(:clj
+   (defn start-worker
+     "Start a background recovery worker (Phase C). It polls the store's durable
+      runnable markers, claims a lease on each workflow, and resumes it by id —
+      so a workflow whose original pod crashed, or one signalled/cancelled from
+      another pod, is driven to completion. Returns a 0-arg stop fn.
+
+      The worker resumes via resume-workflow [engine workflow-id], so the workflow
+      function must be registered in this process (start-workflow registers it
+      automatically; a fresh process must register its workflow vars at startup).
+
+      Options:
+        :owner-id    unique id for this worker (default: random uuid)
+        :poll-ms     idle poll interval when no markers are due (default 100)
+        :batch-size  max markers claimed per poll (default 10)
+        :lease-ms    lease duration per claimed workflow (default 30000)
+        :claim-ms    marker fencing duration while processing (default 30000)"
+     [{:keys [store] :as engine}
+      & {:keys [owner-id poll-ms batch-size lease-ms claim-ms]
+         :or   {owner-id (str (random-uuid)) poll-ms 100 batch-size 10
+                lease-ms 30000 claim-ms 30000}}]
+     (let [running (atom true)
+           process-one
+           (fn [wf-id]
+             (when (p/claim-workflow store wf-id owner-id lease-ms)
+               (binding [lease/*owner* owner-id]
+                 (try
+                   (resume-workflow engine wf-id)
+                   (p/delete-runnable store wf-id)
+                   (catch Throwable t
+                     (if (error/lease-lost? t)
+                       (log/debugf "Worker %s lost lease on %s; skipping" owner-id wf-id)
+                       (log/warnf t "Worker %s failed resuming %s" owner-id wf-id)))
+                   (finally
+                     (p/release-lease store wf-id owner-id))))))
+           thread
+           (Thread.
+             ^Runnable
+             (fn []
+               (while @running
+                 (try
+                   (let [ids (p/claim-runnable store owner-id batch-size claim-ms)]
+                     (if (seq ids)
+                       (doseq [wf-id ids :while @running]
+                         (process-one wf-id))
+                       (Thread/sleep (long poll-ms))))
+                   (catch InterruptedException _ (reset! running false))
+                   (catch Throwable t
+                     (log/warnf t "Worker %s loop error" owner-id)
+                     (Thread/sleep (long poll-ms)))))))]
+       (doto thread
+         (.setDaemon true)
+         (.setName (str "intemporal-worker-" owner-id))
+         (.start))
+       (fn stop-worker []
+         (reset! running false)
+         (.interrupt thread)))))
 
 (defn send-signal
   "Send a signal to a workflow.
diff --git a/src/intemporal/internal/error.cljc b/src/intemporal/internal/error.cljc
index f1539eb..544734b 100644
--- a/src/intemporal/internal/error.cljc
+++ b/src/intemporal/internal/error.cljc
@@ -81,6 +81,25 @@
           (.-data e)
           (::cancelled (.-data e)))))
 
+(defn lease-lost-exception
+  "Thrown by IStore/save-events when the writer's lease on a workflow is no
+   longer valid (another worker took ownership, or the lease expired). The
+   worker catches this and aborts the in-flight execution cleanly. (Phase C)"
+  [workflow-id owner-id]
+  (ex-info "Workflow lease lost"
+           {::lease-lost true
+            :workflow-id workflow-id
+            :owner-id    owner-id}))
+
+(defn lease-lost? [e]
+  #?(:clj
+     (and (instance? IExceptionInfo e)
+          (::lease-lost (ex-data e)))
+     :cljs
+     (and (instance? js/Error e)
+          (or (and (.-data e) (::lease-lost (.-data e)))
+              (::lease-lost (ex-data e))))))
+
 (defn activity-rejected-exception [activity-name cause]
   (ex-info "Execution rejected"
            {::rejected     true
diff --git a/src/intemporal/internal/lease.cljc b/src/intemporal/internal/lease.cljc
new file mode 100644
index 0000000..8186d45
--- /dev/null
+++ b/src/intemporal/internal/lease.cljc
@@ -0,0 +1,10 @@
+(ns intemporal.internal.lease
+  "Shared dynamic binding for the worker that currently owns an executing
+   workflow. The worker binds *owner* around resume-workflow; stores read it in
+   save-events to validate the lease in the same transaction. When *owner* is
+   nil (plain single-process start-workflow, no worker) lease validation is
+   skipped entirely, so existing single-process behaviour is unchanged.")
+
+(def ^:dynamic *owner*
+  "Owner-id of the worker executing the current workflow, or nil."
+  nil)
diff --git a/src/intemporal/protocol.cljc b/src/intemporal/protocol.cljc
index 2a8a873..10c33ff 100644
--- a/src/intemporal/protocol.cljc
+++ b/src/intemporal/protocol.cljc
@@ -20,7 +20,26 @@
   (wake-workflow [store workflow-id] "Fire the registered wake callback for a workflow, forcing it to re-enter its loop and re-evaluate state such as the cancellation flag. No-op if none registered.")
   (is-cancelled? [store workflow-id] "Check if workflow is cancelled")
   (mark-cancelled [store workflow-id] "Mark workflow as cancelled")
-  (get-workflow-status [store workflow-id] "Get current workflow status"))
+  (get-workflow-status [store workflow-id] "Get current workflow status")
+
+  ;; --- Phase C: multi-pod primitives (opt-in; single-process callers ignore) ---
+  (claim-workflow [store workflow-id owner-id lease-ms]
+    "Atomically claim or renew ownership of a workflow if it is unowned, owned by
+     owner-id already, or its lease has expired. Sets owner_id=owner-id and
+     lease_until=now+lease-ms. Returns true on success, false if another live
+     owner holds it.")
+  (renew-lease [store workflow-id owner-id lease-ms]
+    "Extend the lease to now+lease-ms iff owner-id still owns it. Returns boolean.")
+  (release-lease [store workflow-id owner-id]
+    "Release ownership (clear owner_id/lease_until) iff held by owner-id.")
+  (add-runnable [store workflow-id reason]
+    "Durably mark a workflow as needing execution. Replaces the process-local
+     wake callback for cross-pod wake. Idempotent: one marker per workflow.")
+  (claim-runnable [store owner-id batch-size claim-ms]
+    "Claim up to batch-size runnable markers whose claim has lapsed, fencing them
+     for claim-ms so other workers skip them. Returns a vector of workflow-ids.")
+  (delete-runnable [store workflow-id]
+    "Remove a workflow's runnable marker (after it has been resumed)."))
 
 (defprotocol IActivityExecutor
   "Protocol for executing activities"
diff --git a/src/intemporal/store.cljc b/src/intemporal/store.cljc
index 14a1fb8..e61a100 100644
--- a/src/intemporal/store.cljc
+++ b/src/intemporal/store.cljc
@@ -1,5 +1,8 @@
 (ns intemporal.store
-  (:require [intemporal.protocol :as p]))
+  (:require [intemporal.protocol :as p]
+            [intemporal.utils :as utils]
+            [intemporal.internal.lease :as lease]
+            [intemporal.internal.error :as error]))
 
 ;; ============================================================================
 ;; In-Memory Store Implementation
@@ -22,6 +25,14 @@
 
   (save-events [_ workflow-id events]
     (when (seq events)
+      ;; Phase C: when running under a worker lease, refuse to write if this
+      ;; owner no longer holds a valid lease (another worker took over / expired).
+      (when-let [owner lease/*owner*]
+        (let [s   @state
+              cur (get-in s [:workflows workflow-id :owner])
+              lu  (get-in s [:workflows workflow-id :lease-until] 0)]
+          (when (or (not= cur owner) (< lu (utils/current-time-ms)))
+            (throw (error/lease-lost-exception workflow-id owner)))))
       (swap! state
              (fn [s]
                (let [s    (update-in s [:workflows workflow-id :history] (fnil into []) events)
@@ -49,7 +60,9 @@
   (add-signal [this workflow-id signal-name signal-data]
     (swap! state update-in [:workflows workflow-id :signals signal-name]
            (fnil conj []) signal-data)
-    ;; Check if there's a callback registered for this signal
+    ;; Phase C: durable wake marker so a worker (possibly another pod) resumes it.
+    (p/add-runnable this workflow-id :signal)
+    ;; Check if there's a callback registered for this signal (single-process path)
     (when-let [callback (get-in @state [:workflows workflow-id :signal-callbacks signal-name])]
       ;; Invoke callback asynchronously
       #?(:clj (future (callback))
@@ -86,8 +99,10 @@
   (is-cancelled? [_ workflow-id]
     (get-in @state [:workflows workflow-id :cancelled] false))
 
-  (mark-cancelled [_ workflow-id]
-    (swap! state assoc-in [:workflows workflow-id :cancelled] true))
+  (mark-cancelled [this workflow-id]
+    (swap! state assoc-in [:workflows workflow-id :cancelled] true)
+    ;; Phase C: wake a sleeper via a durable marker too (worker path).
+    (p/add-runnable this workflow-id :cancel))
 
   (get-workflow-status [_ workflow-id]
     (let [wf (get-in @state [:workflows workflow-id])]
@@ -99,4 +114,67 @@
                 (case (:event-type last-event)
                   :workflow-completed :completed
                   :workflow-failed :failed
-                  :running))))))
+                  :running)))))
+
+  ;; --- Phase C: lease / ownership ---
+  (claim-workflow [_ workflow-id owner-id lease-ms]
+    (let [ok (atom false)]
+      (swap! state
+             (fn [s]
+               (let [cur (get-in s [:workflows workflow-id :owner])
+                     lu  (get-in s [:workflows workflow-id :lease-until] 0)
+                     now (utils/current-time-ms)]
+                 (if (or (nil? cur) (= cur owner-id) (< lu now))
+                   (do (reset! ok true)
+                       (-> s
+                           (assoc-in [:workflows workflow-id :owner] owner-id)
+                           (assoc-in [:workflows workflow-id :lease-until] (+ now lease-ms))))
+                   s))))
+      @ok))
+
+  (renew-lease [_ workflow-id owner-id lease-ms]
+    (let [ok (atom false)]
+      (swap! state
+             (fn [s]
+               (if (= owner-id (get-in s [:workflows workflow-id :owner]))
+                 (do (reset! ok true)
+                     (assoc-in s [:workflows workflow-id :lease-until]
+                               (+ (utils/current-time-ms) lease-ms)))
+                 s)))
+      @ok))
+
+  (release-lease [_ workflow-id owner-id]
+    (swap! state
+           (fn [s]
+             (if (= owner-id (get-in s [:workflows workflow-id :owner]))
+               (update-in s [:workflows workflow-id] dissoc :owner :lease-until)
+               s)))
+    nil)
+
+  ;; --- Phase C: runnable markers ---
+  (add-runnable [_ workflow-id reason]
+    (swap! state update-in [:runnable workflow-id]
+           (fn [m] (assoc (or m {}) :reason reason
+                          :enqueued-at (utils/current-time-ms)
+                          :claimed-until (get m :claimed-until 0))))
+    nil)
+
+  (claim-runnable [_ _owner-id batch-size claim-ms]
+    (let [claimed (atom [])]
+      (swap! state
+             (fn [s]
+               (let [now (utils/current-time-ms)
+                     due (->> (:runnable s)
+                              (filter (fn [[_ m]] (< (:claimed-until m 0) now)))
+                              (map first)
+                              (take batch-size)
+                              vec)]
+                 (reset! claimed due)
+                 (reduce (fn [s wid]
+                           (assoc-in s [:runnable wid :claimed-until] (+ now claim-ms)))
+                         s due))))
+      @claimed))
+
+  (delete-runnable [_ workflow-id]
+    (swap! state update :runnable dissoc workflow-id)
+    nil))
diff --git a/src/intemporal/store/fdb.clj b/src/intemporal/store/fdb.clj
index 04aa4da..bd6907e 100644
--- a/src/intemporal/store/fdb.clj
+++ b/src/intemporal/store/fdb.clj
@@ -1,5 +1,7 @@
 (ns intemporal.store.fdb
   (:require [intemporal.protocol :as p]
+            [intemporal.internal.lease :as lease]
+            [intemporal.internal.error :as error]
             [me.vedang.clj-fdb.core :as fdb-core]
             [me.vedang.clj-fdb.transaction :as ftr]
             [me.vedang.clj-fdb.subspace.subspace :as fsub]
@@ -68,14 +70,25 @@
                                  :workflow-failed    "failed"
                                  nil)
                               events)]
-        (ftr/run db
-          (fn [tx]
-            (doseq [event events]
-              (let [seq-num (:seq event (System/currentTimeMillis))
-                    key (->tuple [seq-num (str (java.util.UUID/randomUUID))])]
-                (fdb-core/set tx history-sub key (->bytes event))))
-            (when term
-              (fdb-core/set tx root-subspace (->tuple ["state" workflow-id "status"]) (->bytes term)))))))
+        ;; FDB's run wraps a body exception in CompletionException; unwrap so the
+        ;; lease-lost ExceptionInfo propagates cleanly (worker/error checks rely on it).
+        (try
+          (ftr/run db
+            (fn [tx]
+              ;; Phase C: validate the lease within the serializable transaction.
+              (when-let [owner lease/*owner*]
+                (let [cur (<-bytes (fdb-core/get tx root-subspace (->tuple ["lease" workflow-id])))]
+                  (when (or (not= (:owner-id cur) owner)
+                            (< (:lease-until cur 0) (System/currentTimeMillis)))
+                    (throw (error/lease-lost-exception workflow-id owner)))))
+              (doseq [event events]
+                (let [seq-num (:seq event (System/currentTimeMillis))
+                      key (->tuple [seq-num (str (java.util.UUID/randomUUID))])]
+                  (fdb-core/set tx history-sub key (->bytes event))))
+              (when term
+                (fdb-core/set tx root-subspace (->tuple ["state" workflow-id "status"]) (->bytes term)))))
+          (catch java.util.concurrent.CompletionException ce
+            (throw (or (.getCause ce) ce))))))
     events)
 
   (find-event [this workflow-id event-type seq-num]
@@ -103,7 +116,9 @@
         (fn [tx]
           (fdb-core/set tx signals-sub key (->bytes signal-data))))
 
-      ;; Invoke callback asynchronously
+      ;; Phase C: durable, cross-pod wake (a worker on any pod resumes the workflow).
+      (p/add-runnable this workflow-id :signal)
+      ;; In-process fast path for an embedded (no-worker) engine in THIS process.
       (when-let [callback (get-in @callbacks [workflow-id signal-name])]
         (future (callback)))
 
@@ -138,10 +153,12 @@
       (fn [tx]
         (boolean (<-bytes (fdb-core/get tx root-subspace (->tuple ["state" workflow-id "cancelled"])))))))
 
-  (mark-cancelled [_ workflow-id]
+  (mark-cancelled [this workflow-id]
     (ftr/run db
       (fn [tx]
-        (fdb-core/set tx root-subspace (->tuple ["state" workflow-id "cancelled"]) (->bytes true)))))
+        (fdb-core/set tx root-subspace (->tuple ["state" workflow-id "cancelled"]) (->bytes true))))
+    ;; Phase C: durable wake so a worker resumes the sleeper and it observes the flag.
+    (p/add-runnable this workflow-id :cancel))
 
   (get-workflow-status [this workflow-id]
     (if (p/is-cancelled? this workflow-id)
@@ -160,7 +177,75 @@
                 (case (:event-type last-event)
                   :workflow-completed :completed
                   :workflow-failed :failed
-                  :running)))))))))
+                  :running))))))))
+
+  ;; --- Phase C: lease / ownership (serializable read-modify-write) ---
+  (claim-workflow [_ workflow-id owner-id lease-ms]
+    (ftr/run db
+      (fn [tx]
+        (let [k   (->tuple ["lease" workflow-id])
+              cur (<-bytes (fdb-core/get tx root-subspace k))
+              now (System/currentTimeMillis)]
+          (if (or (nil? cur) (= (:owner-id cur) owner-id) (< (:lease-until cur 0) now))
+            (do (fdb-core/set tx root-subspace k
+                              (->bytes {:owner-id owner-id :lease-until (+ now lease-ms)}))
+                true)
+            false)))))
+
+  (renew-lease [_ workflow-id owner-id lease-ms]
+    (ftr/run db
+      (fn [tx]
+        (let [k   (->tuple ["lease" workflow-id])
+              cur (<-bytes (fdb-core/get tx root-subspace k))
+              now (System/currentTimeMillis)]
+          (if (= (:owner-id cur) owner-id)
+            (do (fdb-core/set tx root-subspace k
+                              (->bytes {:owner-id owner-id :lease-until (+ now lease-ms)}))
+                true)
+            false)))))
+
+  (release-lease [_ workflow-id owner-id]
+    (ftr/run db
+      (fn [tx]
+        (let [k   (->tuple ["lease" workflow-id])
+              cur (<-bytes (fdb-core/get tx root-subspace k))]
+          (when (= (:owner-id cur) owner-id)
+            (fdb-core/clear tx root-subspace k)))))
+    nil)
+
+  ;; --- Phase C: runnable markers (subspace ["runnable" wf-id]) ---
+  (add-runnable [_ workflow-id reason]
+    (ftr/run db
+      (fn [tx]
+        (fdb-core/set tx root-subspace (->tuple ["runnable" workflow-id])
+                      (->bytes {:reason (name reason)
+                                :enqueued-at (System/currentTimeMillis)
+                                :claimed-until 0}))))
+    nil)
+
+  (claim-runnable [_ _owner-id batch-size claim-ms]
+    (ftr/run db
+      (fn [tx]
+        (let [run-sub (fsub/get root-subspace (->tuple ["runnable"]))
+              rows    (fdb-core/get-range tx (fsub/range run-sub))
+              now     (System/currentTimeMillis)
+              due     (->> rows
+                           (keep (fn [[key value]]
+                                   (let [m   (<-bytes value)
+                                         wid (nth key (dec (count key)))]
+                                     (when (< (:claimed-until m 0) now) [wid m]))))
+                           (take batch-size)
+                           vec)]
+          (doseq [[wid m] due]
+            (fdb-core/set tx root-subspace (->tuple ["runnable" wid])
+                          (->bytes (assoc m :claimed-until (+ now claim-ms)))))
+          (mapv first due)))))
+
+  (delete-runnable [_ workflow-id]
+    (ftr/run db
+      (fn [tx]
+        (fdb-core/clear tx root-subspace (->tuple ["runnable" workflow-id]))))
+    nil))
 
 (defn make-fdb-store [db subspace-name]
   (let [root (fsub/create (->tuple [subspace-name]))]
diff --git a/src/intemporal/store/jdbc.clj b/src/intemporal/store/jdbc.clj
index fd1a4f5..fe3283b 100644
--- a/src/intemporal/store/jdbc.clj
+++ b/src/intemporal/store/jdbc.clj
@@ -1,5 +1,7 @@
 (ns intemporal.store.jdbc
   (:require [intemporal.protocol :as p]
+            [intemporal.internal.lease :as lease]
+            [intemporal.internal.error :as error]
             [migratus.core :as migratus]
             [next.jdbc :as jdbc]
             [next.jdbc.prepare :as prepare]
@@ -92,6 +94,15 @@
         ;; Ensure workflow exists
         (jdbc/execute! tx ["INSERT INTO intemporal_workflows (id) VALUES (?) ON CONFLICT (id) DO NOTHING"
                            workflow-id])
+        ;; Phase C: validate the lease in the same transaction. If this owner no
+        ;; longer holds a live lease (another worker took over / it expired),
+        ;; refuse the write so concurrent execution can't corrupt history.
+        (when-let [owner lease/*owner*]
+          (when-not (jdbc/execute-one! tx
+                      ["SELECT 1 FROM intemporal_workflows
+                        WHERE id = ? AND owner_id = ? AND lease_until > now()"
+                       workflow-id owner])
+            (throw (error/lease-lost-exception workflow-id owner))))
         ;; Insert events. DO UPDATE keeps the write idempotent under normal
         ;; replay (the engine re-writes the same seq with identical data on
         ;; each pass). Rejecting a *concurrent* writer is the lease's job
@@ -136,8 +147,10 @@
                          workflow-id])
       (jdbc/execute! tx ["INSERT INTO intemporal_signals (workflow_id, signal_name, payload) VALUES (?, ?, ?)"
                          workflow-id signal-name signal-data]))
-
-    ;; Trigger callback if registered
+    ;; Phase C: durable, cross-pod wake (a worker on any pod resumes the workflow).
+    (p/add-runnable this workflow-id :signal)
+    ;; In-process fast path: fire the callback for an embedded (no-worker) engine
+    ;; running in THIS process. Cross-pod wake goes through the marker above.
     (when-let [callback (get-in @callbacks [workflow-id signal-name])]
       (future (callback)))
     signal-data)
@@ -170,11 +183,13 @@
                                   workflow-id])]
       (boolean (:intemporal_workflows/cancelled row))))
 
-  (mark-cancelled [_ workflow-id]
+  (mark-cancelled [this workflow-id]
     (jdbc/execute! datasource
                    ["INSERT INTO intemporal_workflows (id, cancelled) VALUES (?, true)
                      ON CONFLICT (id) DO UPDATE SET cancelled = true"
-                    workflow-id]))
+                    workflow-id])
+    ;; Phase C: durable wake so a worker resumes the sleeper and it observes the flag.
+    (p/add-runnable this workflow-id :cancel))
 
   (get-workflow-status [this workflow-id]
     (let [wf-row (jdbc/execute-one! datasource
@@ -194,7 +209,64 @@
                     (case (:event-type last-event)
                       :workflow-completed :completed
                       :workflow-failed :failed
-                      :running))))))))
+                      :running)))))))
+
+  ;; --- Phase C: lease / ownership ---
+  (claim-workflow [_ workflow-id owner-id lease-ms]
+    (let [res (jdbc/execute-one! datasource
+                ["UPDATE intemporal_workflows
+                  SET owner_id = ?, lease_until = now() + ((?)::bigint * interval '1 millisecond')
+                  WHERE id = ?
+                    AND (owner_id IS NULL OR owner_id = ? OR lease_until IS NULL OR lease_until < now())"
+                 owner-id lease-ms workflow-id owner-id])]
+      (pos? (or (:next.jdbc/update-count res) 0))))
+
+  (renew-lease [_ workflow-id owner-id lease-ms]
+    (let [res (jdbc/execute-one! datasource
+                ["UPDATE intemporal_workflows
+                  SET lease_until = now() + ((?)::bigint * interval '1 millisecond')
+                  WHERE id = ? AND owner_id = ?"
+                 lease-ms workflow-id owner-id])]
+      (pos? (or (:next.jdbc/update-count res) 0))))
+
+  (release-lease [_ workflow-id owner-id]
+    (jdbc/execute! datasource
+                   ["UPDATE intemporal_workflows SET owner_id = NULL, lease_until = NULL
+                     WHERE id = ? AND owner_id = ?"
+                    workflow-id owner-id])
+    nil)
+
+  ;; --- Phase C: runnable markers ---
+  (add-runnable [_ workflow-id reason]
+    (jdbc/execute! datasource
+                   ["INSERT INTO intemporal_runnable (workflow_id, reason, enqueued_at, claimed_until)
+                     VALUES (?, ?, now(), to_timestamp(0))
+                     ON CONFLICT (workflow_id) DO UPDATE SET reason = EXCLUDED.reason, enqueued_at = now()"
+                    workflow-id (name reason)])
+    nil)
+
+  (claim-runnable [_ _owner-id batch-size claim-ms]
+    (jdbc/with-transaction [tx datasource]
+      (let [rows (jdbc/execute! tx
+                   ["SELECT workflow_id FROM intemporal_runnable
+                     WHERE claimed_until < now()
+                     ORDER BY enqueued_at
+                     FOR UPDATE SKIP LOCKED
+                     LIMIT ?" batch-size])
+            ids  (mapv :intemporal_runnable/workflow_id rows)]
+        (when (seq ids)
+          (let [ph (apply str (interpose "," (repeat (count ids) "?")))]
+            (jdbc/execute! tx
+              (into [(str "UPDATE intemporal_runnable
+                           SET claimed_until = now() + ((?)::bigint * interval '1 millisecond')
+                           WHERE workflow_id IN (" ph ")")
+                     claim-ms]
+                    ids))))
+        ids)))
+
+  (delete-runnable [_ workflow-id]
+    (jdbc/execute! datasource ["DELETE FROM intemporal_runnable WHERE workflow_id = ?" workflow-id])
+    nil))
 
 ;; TODO use more complete opts
 (defn make-jdbc-store
diff --git a/test/intemporal/tests/jepsen/bug_1_1_test.clj b/test/intemporal/tests/jepsen/bug_1_1_test.clj
index 5513d3a..825cb7b 100644
--- a/test/intemporal/tests/jepsen/bug_1_1_test.clj
+++ b/test/intemporal/tests/jepsen/bug_1_1_test.clj
@@ -1,83 +1,87 @@
 (ns intemporal.tests.jepsen.bug-1-1-test
-  "Bug 1.1 — Signal sent via a second store instance never wakes a workflow.
+  "Bug 1.1 — Wake on signal across pods.  REGRESSION GUARD.
 
-  Root cause (improvements.md §1.1):
-    register-signal-callback stores the wake-fn in a process-local atom on
-    the store record (JdbcStore.callbacks, FDBStore.callbacks, InMemoryStore.state).
-    When add-signal is called from a DIFFERENT store instance — representing a
-    second pod, a new engine, or any caller that didn't start the workflow — the
-    callback atom is empty and the workflow is never woken.
+  Root cause (improvements.md §1.1) — now FIXED (Phase C):
+    Wake callbacks lived in a process-local atom on the store record, so a signal
+    delivered through a DIFFERENT store instance (another pod) never woke the
+    workflow — it was persisted but orphaned.
 
-  These tests assert the CURRENT (buggy) behaviour.  They will fail once the
-  fix from improvements.md §C3/C5 is applied (durable runnable markers)."
+    The fix: add-signal writes a durable runnable marker (C3); a worker (C4) on
+    any pod claims the marker, leases the workflow (C1), and resumes it by id
+    (B3). The wake no longer depends on the process that started the workflow.
+
+  These tests assert the FIXED behaviour: a signal written through a SEPARATE
+  store instance, with a worker running, resumes the workflow to completion.
+  InMemory models a shared store by having both instances share one state atom;
+  JDBC and FDB use two store objects over the same backing."
   (:require [clojure.test :refer [deftest is testing]]
             [intemporal.core :as intemporal]
             [intemporal.protocol :as p]
             [intemporal.store :as mem]
             [intemporal.store.jdbc :as jdbc-store]
             [intemporal.store.fdb :as fdb-store]
-            [me.vedang.clj-fdb.FDB :as cfdb]))
+            [me.vedang.clj-fdb.FDB :as cfdb]
+            [intemporal.internal.workflow-registry :as wreg]))
 
-;; ── Shared workflow ───────────────────────────────────────────────────────────
+(defn sig-act [x] (* x 2))
 
-(defn- wait-signal-wf []
-  (intemporal/wait-for-signal "go")
-  :woke)
+(defn sig-wf [x]
+  (let [a (intemporal/stub #'sig-act)
+        r (a x)]
+    (intemporal/wait-for-signal "go")
+    (+ r 100)))
 
-;; ── Shared scenario ───────────────────────────────────────────────────────────
+(defn- await-status [store wf-id terminal timeout-ms]
+  (let [deadline (+ (System/currentTimeMillis) timeout-ms)]
+    (loop []
+      (let [s (p/get-workflow-status store wf-id)]
+        (if (or (= terminal s) (> (System/currentTimeMillis) deadline))
+          s
+          (do (Thread/sleep 50) (recur)))))))
 
 (defn- run-scenario
-  "Starts the workflow using store-a, then sends the signal via store-b.
-  Returns :stuck if the workflow never wakes, :woke otherwise."
+  "store-a runs the workflow (suspends on signal); store-b (a separate instance
+  over the same backing) delivers the signal; a worker resumes it."
   [store-a store-b]
-  (let [wf-id  (str "bug11-" (random-uuid))
-        result (promise)
-        engine (intemporal/make-workflow-engine :store store-a :threads 2)]
-    (future
+  (wreg/clear-registry!)
+  (let [wid (str "bug11-" (random-uuid))]
+    (let [e1 (intemporal/make-workflow-engine :store store-a :threads 2)
+          f1 (future (intemporal/start-workflow e1 sig-wf [6] :workflow-id wid))]
+      (Thread/sleep 300)
+      (future-cancel f1)
+      (intemporal/shutdown-engine e1))
+    (let [e2   (intemporal/make-workflow-engine :store store-b :threads 2)
+          stop (intemporal/start-worker e2 :poll-ms 50 :owner-id "bug11-w")]
       (try
-        (deliver result (intemporal/start-workflow engine wait-signal-wf []
-                                                   :workflow-id wf-id))
-        (catch Exception e (deliver result {:error (str e)}))))
-    (Thread/sleep 400)
-    ;; Send signal via a DIFFERENT store instance — simulates another pod.
-    ;; store-b has an empty callbacks atom so the wake-fn is never called.
-    (p/add-signal store-b wf-id "go" {:source :store-b})
-    (let [r (deref result 2000 :stuck)]
-      (intemporal/shutdown-engine engine)
-      r)))
-
-;; ── In-memory tests (always run) ─────────────────────────────────────────────
+        ;; Signal delivered through the SECOND store instance.
+        (intemporal/send-signal store-b wid "go" {})
+        {:status (await-status store-b wid :completed 5000)
+         :result (intemporal/get-workflow-result store-b wid)}
+        (finally (stop) (intemporal/shutdown-engine e2))))))
 
-(deftest signal-not-delivered-across-in-memory-stores
-  (testing "Two separate InMemoryStore instances do not share callbacks"
-    (let [store-a (mem/->InMemoryStore (atom {}))
-          store-b (mem/->InMemoryStore (atom {}))]
-      (is (= :stuck (run-scenario store-a store-b))
-          "Signal written to store-b; store-a's callback atom is empty → workflow never wakes (bug 1.1)"))))
+(defn- assert-woke [{:keys [status result]}]
+  (is (= :completed status) "cross-instance signal woke the workflow via durable marker (bug 1.1 fixed)")
+  (is (= 112 result) "6*2 + 100 = 112"))
 
-;; ── JDBC tests (require Postgres) ────────────────────────────────────────────
+(deftest signal-across-instances-in-memory
+  (testing "InMemoryStore sharing one backing atom"
+    (let [state (atom {})]
+      (assert-woke (run-scenario (mem/->InMemoryStore state) (mem/->InMemoryStore state))))))
 
-(deftest ^:integration signal-not-delivered-across-jdbc-stores
-  (testing "Two JdbcStore instances against the same Postgres do not share callbacks"
-    (let [url    (or (System/getenv "DATABASE_URL")
-                     "jdbc:postgresql://localhost:5432/root?user=root&password=root")
+(deftest ^:integration signal-across-instances-jdbc
+  (testing "two JdbcStore instances over the same Postgres"
+    (let [url     (or (System/getenv "DATABASE_URL")
+                      "jdbc:postgresql://localhost:5432/root?user=root&password=root")
           store-a (jdbc-store/make-jdbc-store url)
           store-b (jdbc-store/make-jdbc-store url)]
-      (try
-        (is (= :stuck (run-scenario store-a store-b))
-            "Signal row in intemporal_signals; store-b's callbacks atom empty → no wake (bug 1.1)")
-        (finally
-          (.close store-a)
-          (.close store-b))))))
-
-;; ── FDB tests (require FoundationDB) ─────────────────────────────────────────
+      (try (assert-woke (run-scenario store-a store-b))
+           (finally (.close store-a) (.close store-b))))))
 
-(deftest ^:integration signal-not-delivered-across-fdb-stores
-  (testing "Two FDBStore instances against the same FoundationDB do not share callbacks"
+(deftest ^:integration signal-across-instances-fdb
+  (testing "two FDBStore instances over the same FoundationDB"
     (let [root    (str "bug11-" (random-uuid))
           fdb     (cfdb/select-api-version 730)
           db      (.open fdb "docker/fdb.cluster")
           store-a (fdb-store/make-fdb-store db root)
           store-b (fdb-store/make-fdb-store db root)]
-      (is (= :stuck (run-scenario store-a store-b))
-          "Signal in FDB; store-b's callbacks atom empty → no wake (bug 1.1)"))))
+      (assert-woke (run-scenario store-a store-b)))))
diff --git a/test/intemporal/tests/jepsen/bug_1_2_test.clj b/test/intemporal/tests/jepsen/bug_1_2_test.clj
index 183f38e..da8863d 100644
--- a/test/intemporal/tests/jepsen/bug_1_2_test.clj
+++ b/test/intemporal/tests/jepsen/bug_1_2_test.clj
@@ -1,92 +1,72 @@
 (ns intemporal.tests.jepsen.bug-1-2-test
-  "Bug 1.2 — Concurrent save-events at the same (workflow-id, seq) corrupts history.
+  "Bug 1.2 — Concurrent execution corrupting history.  REGRESSION GUARD.
 
-  Root cause (improvements.md §1.2):
-    JDBC:  INSERT … ON CONFLICT (workflow_id, seq) DO UPDATE silently overwrites
-           the losing write.  Both callers receive no error, but only one event
-           survives in intemporal_history.  The discarded write is invisible.
-    FDB:   save-events keys events as [seq, uuid], so two concurrent writes at
-           the same seq both survive as separate rows.  load-history returns
-           both, making the history non-deterministic.
-    Mem:   InMemoryStore.save-events appends unconditionally (swap! conj), so
-           duplicate-seq events accumulate in the vector.
+  Root cause (improvements.md §1.2) — now FIXED (Phase C):
+    Two pods could run the same workflow and both write history; JDBC's
+    ON CONFLICT DO UPDATE silently overwrote, FDB produced duplicate-seq rows.
+    There was nothing stopping two concurrent writers.
 
-  Both outcomes violate the invariant that seq numbers are unique within a
-  workflow's history — breaking deterministic replay.
+    The fix: a lease (C1). A worker claims ownership before executing; every
+    save-events validates the lease in the same transaction and throws
+    LeaseLostException if this owner no longer holds it. Two workers cannot both
+    write — the one without a live lease is rejected, so history can't be
+    corrupted by concurrent execution.
 
-  These tests assert the CURRENT (buggy) behaviour.  They will fail once the
-  fix from improvements.md §A3 is applied (DO NOTHING + conflict exception)."
+  These tests assert the FIXED behaviour: once a second owner takes over, the
+  first owner's writes are rejected rather than silently corrupting history."
   (:require [clojure.test :refer [deftest is testing]]
             [intemporal.protocol :as p]
             [intemporal.store :as mem]
             [intemporal.store.jdbc :as jdbc-store]
             [intemporal.store.fdb :as fdb-store]
-            [me.vedang.clj-fdb.FDB :as cfdb]))
-
-;; ── Shared scenario ───────────────────────────────────────────────────────────
+            [me.vedang.clj-fdb.FDB :as cfdb]
+            [intemporal.internal.lease :as lease]
+            [intemporal.internal.error :as error]))
 
 (defn- run-scenario
-  "Fires two concurrent writes at seq=0, waits for both, then reads back history.
-  Returns {:writes [result-a result-b] :seq0-count n :seq0-events [...]}."
+  "owner-A claims and writes; ownership moves to owner-B; A's next write must be
+  rejected. Returns {:a-wrote? :b-claimed? :a-rejected? :seq-count}."
   [store]
-  (let [wf-id   (str "bug12-" (random-uuid))
-        event-a {:event-type :workflow-started :seq 0 :writer "thread-a"
-                 :timestamp  (System/currentTimeMillis)}
-        event-b {:event-type :workflow-started :seq 0 :writer "thread-b"
-                 :timestamp  (System/currentTimeMillis)}
-        latch   (promise)
-        fa      (future (deref latch)
-                        (try (p/save-events store wf-id [event-a]) :ok
-                             (catch Exception e {:error (str e)})))
-        fb      (future (deref latch)
-                        (try (p/save-events store wf-id [event-b]) :ok
-                             (catch Exception e {:error (str e)})))]
-    (deliver latch :go)
-    (let [ra @fa
-          rb @fb
-          h  (p/load-history store wf-id)]
-      {:writes       [ra rb]
-       :seq0-count   (count (filter #(= 0 (:seq %)) h))
-       :seq0-events  (filter #(= 0 (:seq %)) h)})))
-
-;; ── In-memory tests (always run) ─────────────────────────────────────────────
+  (let [wid (str "bug12-" (random-uuid))]
+    (p/save-event store wid {:event-type :workflow-started :workflow-id wid :args []})
+    (let [a-claim (p/claim-workflow store wid "owner-A" 60000)
+          _       (binding [lease/*owner* "owner-A"]
+                    (p/save-events store wid [{:event-type :activity-completed :seq 0 :result 1}]))
+          _       (p/release-lease store wid "owner-A")
+          b-claim (p/claim-workflow store wid "owner-B" 60000)
+          a-rejected?
+          (try
+            (binding [lease/*owner* "owner-A"]
+              (p/save-events store wid [{:event-type :activity-completed :seq 1 :result 2}]))
+            false
+            (catch Exception e (error/lease-lost? e)))
+          seq0 (->> (p/load-history store wid) (filter #(= 0 (:seq %))) count)]
+      {:a-wrote?    a-claim
+       :b-claimed?  b-claim
+       :a-rejected? a-rejected?
+       :seq0-count  seq0})))
 
-(deftest concurrent-seq-write-appends-both-in-memory
-  (testing "InMemoryStore appends both events, producing duplicate seq=0"
-    (let [store  (mem/->InMemoryStore (atom {}))
-          {:keys [writes seq0-count]} (run-scenario store)]
-      (is (every? #{:ok} writes)
-          "Both writes return :ok — no conflict signalled")
-      (is (> seq0-count 1)
-          (str "History has " seq0-count " events at seq=0 — duplicate seq (bug 1.2)")))))
+(defn- assert-fixed [{:keys [a-wrote? b-claimed? a-rejected? seq0-count]}]
+  (is a-wrote?    "owner-A held the lease and wrote")
+  (is b-claimed?  "ownership moved to owner-B after release")
+  (is a-rejected? "stale owner-A's write was rejected with LeaseLostException (bug 1.2 fixed)")
+  (is (= 1 seq0-count) "exactly one event at seq=0 — no concurrent-write corruption"))
 
-;; ── JDBC tests (require Postgres) ────────────────────────────────────────────
+(deftest lease-prevents-corruption-in-memory
+  (testing "InMemoryStore"
+    (assert-fixed (run-scenario (mem/->InMemoryStore (atom {}))))))
 
-(deftest ^:integration concurrent-seq-write-silently-clobbered-jdbc
-  (testing "JDBC: ON CONFLICT DO UPDATE silently discards one write"
+(deftest ^:integration lease-prevents-corruption-jdbc
+  (testing "JdbcStore"
     (let [url   (or (System/getenv "DATABASE_URL")
                     "jdbc:postgresql://localhost:5432/root?user=root&password=root")
           store (jdbc-store/make-jdbc-store url)]
-      (try
-        (let [{:keys [writes seq0-count seq0-events]} (run-scenario store)]
-          (is (every? #{:ok} writes)
-              "Both writes return :ok — DO UPDATE never raises a conflict error")
-          (is (= 1 seq0-count)
-              "Exactly one row at seq=0 — the other write was silently discarded (bug 1.2)")
-          (is (contains? #{"thread-a" "thread-b"} (:writer (first seq0-events)))
-              "Surviving writer is whichever won the race — non-deterministic"))
-        (finally (.close store))))))
-
-;; ── FDB tests (require FoundationDB) ─────────────────────────────────────────
+      (try (assert-fixed (run-scenario store)) (finally (.close store))))))
 
-(deftest ^:integration concurrent-seq-write-produces-duplicates-fdb
-  (testing "FDB: UUID-keyed writes store both events at seq=0"
+(deftest ^:integration lease-prevents-corruption-fdb
+  (testing "FDBStore"
     (let [root  (str "bug12-" (random-uuid))
           fdb   (cfdb/select-api-version 730)
           db    (.open fdb "docker/fdb.cluster")
           store (fdb-store/make-fdb-store db root)]
-      (let [{:keys [writes seq0-count]} (run-scenario store)]
-        (is (every? #{:ok} writes)
-            "Both writes return :ok")
-        (is (> seq0-count 1)
-            (str "History has " seq0-count " events at seq=0 — duplicate seq (bug 1.2)"))))))
+      (assert-fixed (run-scenario store)))))
diff --git a/test/intemporal/tests/jepsen/bug_1_3_test.clj b/test/intemporal/tests/jepsen/bug_1_3_test.clj
index d6a66a0..86b24e1 100644
--- a/test/intemporal/tests/jepsen/bug_1_3_test.clj
+++ b/test/intemporal/tests/jepsen/bug_1_3_test.clj
@@ -1,92 +1,84 @@
 (ns intemporal.tests.jepsen.bug-1-3-test
-  "Bug 1.3 — No recovery poller: engine restart does not resume suspended workflows.
+  "Bug 1.3 — Recovery after restart.  REGRESSION GUARD.
 
-  Root cause (improvements.md §1.3):
-    There is no background process that scans for workflows requiring execution
-    after a restart.  resume-workflow is on-demand only and requires the caller
-    to supply both the workflow function and original arguments.  A new engine
-    with a fresh store (empty callbacks atom) has no way to discover or re-enter
-    workflows that were suspended before the restart.
+  Root cause (improvements.md §1.3) — now FIXED (Phase C):
+    There was no background process that resumed workflows after a restart, and
+    resume required the caller to know the workflow fn + args. A workflow whose
+    engine crashed stayed suspended forever.
 
-  Scenario:
-    1. engine-a starts a workflow that suspends on wait-for-signal.
-    2. engine-a is shut down (simulating a pod crash or rolling restart).
-    3. engine-b is created with a FRESH store instance pointing at the same
-       backing database — exactly what a restarted pod would do.
-    4. The signal is sent via engine-b's store.
-    5. engine-b has no poller: the workflow is never resumed automatically.
+    The fix: durable runnable markers (C3) written on every signal, a lease (C1)
+    so only one worker runs a workflow, the workflow registry (B3) so a workflow
+    can be resumed by id alone, and start-worker (C4) which polls markers, claims
+    the lease, and resumes. A restarted process running a worker recovers
+    workflows it never started.
 
-  These tests assert the CURRENT (buggy) behaviour.  They will fail once the
-  fix from improvements.md §B3 + §C4 is applied (workflow registry + worker loop)."
+  These tests assert the FIXED behaviour: after the engine crashes, a worker on a
+  fresh engine (same shared store) resumes the workflow to completion once the
+  signal arrives. InMemory shares one state atom to model a shared store; JDBC and
+  FDB use the same backing."
   (:require [clojure.test :refer [deftest is testing]]
             [intemporal.core :as intemporal]
             [intemporal.protocol :as p]
             [intemporal.store :as mem]
             [intemporal.store.jdbc :as jdbc-store]
             [intemporal.store.fdb :as fdb-store]
-            [me.vedang.clj-fdb.FDB :as cfdb]))
+            [me.vedang.clj-fdb.FDB :as cfdb]
+            [intemporal.internal.workflow-registry :as wreg]))
 
-;; ── Shared workflow ───────────────────────────────────────────────────────────
+(defn rec-act [x] (* x 10))
 
-(defn- wait-signal-wf []
-  (intemporal/wait-for-signal "go")
-  :woke)
+(defn recover-wf [x]
+  (let [a (intemporal/stub #'rec-act)
+        r (a x)]
+    (intemporal/wait-for-signal "go")
+    (+ r 7)))
 
-;; ── Shared scenario ───────────────────────────────────────────────────────────
+(defn- await-status [store wf-id terminal timeout-ms]
+  (let [deadline (+ (System/currentTimeMillis) timeout-ms)]
+    (loop []
+      (let [s (p/get-workflow-status store wf-id)]
+        (if (or (= terminal s) (> (System/currentTimeMillis) deadline))
+          s
+          (do (Thread/sleep 50) (recur)))))))
 
 (defn- run-scenario
-  "Starts workflow on store-a/engine-a, shuts down engine-a, creates engine-b
-  with a fresh store-b, sends signal via store-b, waits.
-  Returns :stuck if engine-b does not auto-resume the workflow."
-  [make-store-a make-store-b]
-  (let [store-a  (make-store-a)
-        wf-id    (str "bug13-" (random-uuid))
-        result   (promise)
-        engine-a (intemporal/make-workflow-engine :store store-a :threads 2)]
-    (future
+  "Start on engine-a (suspends on signal), crash it, then a worker on engine-b
+  resumes after a signal. Returns the terminal status + result."
+  [store]
+  (wreg/clear-registry!)
+  (let [wid (str "bug13-" (random-uuid))]
+    (let [e1 (intemporal/make-workflow-engine :store store :threads 2)
+          f1 (future (intemporal/start-workflow e1 recover-wf [4] :workflow-id wid))]
+      (Thread/sleep 300)
+      (future-cancel f1)
+      (intemporal/shutdown-engine e1))
+    (let [e2   (intemporal/make-workflow-engine :store store :threads 2)
+          stop (intemporal/start-worker e2 :poll-ms 50 :owner-id "bug13-w")]
       (try
-        (deliver result (intemporal/start-workflow engine-a wait-signal-wf []
-                                                   :workflow-id wf-id))
-        (catch Exception e (deliver result {:error (str e)}))))
-    ;; Wait for the workflow to register its callback
-    (Thread/sleep 500)
-    ;; Simulate crash: shut down engine-a and discard store-a
-    (intemporal/shutdown-engine engine-a)
-    (when (instance? java.io.Closeable store-a) (.close store-a))
-    ;; Simulate pod restart: new store with empty callbacks atom
-    (let [store-b  (make-store-b)
-          engine-b (intemporal/make-workflow-engine :store store-b :threads 2)]
-      (p/add-signal store-b wf-id "go" {:source :engine-b-restart})
-      (let [r (deref result 2000 :stuck)]
-        (intemporal/shutdown-engine engine-b)
-        (when (instance? java.io.Closeable store-b) (.close store-b))
-        r))))
+        (intemporal/send-signal store wid "go" {})
+        {:status (await-status store wid :completed 5000)
+         :result (intemporal/get-workflow-result store wid)}
+        (finally (stop) (intemporal/shutdown-engine e2))))))
 
-;; ── In-memory tests (always run) ─────────────────────────────────────────────
+(defn- assert-recovered [{:keys [status result]}]
+  (is (= :completed status) "worker on a fresh engine resumed the crashed workflow (bug 1.3 fixed)")
+  (is (= 47 result) "4*10 + 7 = 47"))
 
-(deftest engine-restart-does-not-resume-in-memory
-  (testing "A fresh InMemoryStore after engine restart has empty callbacks"
-    (is (= :stuck (run-scenario #(mem/->InMemoryStore (atom {}))
-                                #(mem/->InMemoryStore (atom {}))))
-        "No recovery poller: workflow stays suspended after engine-a crash + engine-b start (bug 1.3)")))
+(deftest engine-restart-recovers-in-memory
+  (testing "shared InMemoryStore: worker recovers after crash"
+    (assert-recovered (run-scenario (mem/->InMemoryStore (atom {}))))))
 
-;; ── JDBC tests (require Postgres) ────────────────────────────────────────────
+(deftest ^:integration engine-restart-recovers-jdbc
+  (testing "JdbcStore: worker recovers after crash"
+    (let [url   (or (System/getenv "DATABASE_URL")
+                    "jdbc:postgresql://localhost:5432/root?user=root&password=root")
+          store (jdbc-store/make-jdbc-store url)]
+      (try (assert-recovered (run-scenario store)) (finally (.close store))))))
 
-(deftest ^:integration engine-restart-does-not-resume-jdbc
-  (testing "A fresh JdbcStore after engine restart has empty callbacks atom"
-    (let [url (or (System/getenv "DATABASE_URL")
-                  "jdbc:postgresql://localhost:5432/root?user=root&password=root")]
-      (is (= :stuck (run-scenario #(jdbc-store/make-jdbc-store url)
-                                  #(jdbc-store/make-jdbc-store url)))
-          "Signal row in intemporal_signals; engine-b has no poller to find it (bug 1.3)"))))
-
-;; ── FDB tests (require FoundationDB) ─────────────────────────────────────────
-
-(deftest ^:integration engine-restart-does-not-resume-fdb
-  (testing "A fresh FDBStore after engine restart has empty callbacks atom"
-    (let [root (str "bug13-" (random-uuid))
-          fdb  (cfdb/select-api-version 730)
-          db   (.open fdb "docker/fdb.cluster")]
-      (is (= :stuck (run-scenario #(fdb-store/make-fdb-store db root)
-                                  #(fdb-store/make-fdb-store db root)))
-          "Signal in FDB; engine-b has no poller to find it (bug 1.3)"))))
+(deftest ^:integration engine-restart-recovers-fdb
+  (testing "FDBStore: worker recovers after crash"
+    (let [root  (str "bug13-" (random-uuid))
+          fdb   (cfdb/select-api-version 730)
+          db    (.open fdb "docker/fdb.cluster")
+          store (fdb-store/make-fdb-store db root)]
+      (assert-recovered (run-scenario store)))))
diff --git a/test/intemporal/tests/jepsen/racing_store.clj b/test/intemporal/tests/jepsen/racing_store.clj
index 04e86c2..29c870b 100644
--- a/test/intemporal/tests/jepsen/racing_store.clj
+++ b/test/intemporal/tests/jepsen/racing_store.clj
@@ -44,6 +44,12 @@
   (is-cancelled?           [_ wf-id]        (p/is-cancelled? inner wf-id))
   (mark-cancelled          [_ wf-id]        (p/mark-cancelled inner wf-id))
   (get-workflow-status     [_ wf-id]        (p/get-workflow-status inner wf-id))
+  (claim-workflow          [_ wf-id o l]    (p/claim-workflow inner wf-id o l))
+  (renew-lease             [_ wf-id o l]    (p/renew-lease inner wf-id o l))
+  (release-lease           [_ wf-id o]      (p/release-lease inner wf-id o))
+  (add-runnable            [_ wf-id r]      (p/add-runnable inner wf-id r))
+  (claim-runnable          [_ o b c]       (p/claim-runnable inner o b c))
+  (delete-runnable         [_ wf-id]        (p/delete-runnable inner wf-id))
 
   (consume-signal [_ wf-id sig-name]
     (let [result (p/consume-signal inner wf-id sig-name)]
diff --git a/test/intemporal/tests/worker_test.clj b/test/intemporal/tests/worker_test.clj
new file mode 100644
index 0000000..5726737
--- /dev/null
+++ b/test/intemporal/tests/worker_test.clj
@@ -0,0 +1,114 @@
+(ns intemporal.tests.worker-test
+  "Phase C — lease (C1), runnable markers (C3) and the recovery worker (C4).
+
+  Proves the durable, cross-pod wake model:
+   - a workflow whose original engine crashed is resumed by a worker after a
+     signal is delivered (the recovery-poller story, bug 1.3 / 1.1 model);
+   - the lease rejects a writer that no longer owns the workflow (bug 1.2)."
+  (:require [clojure.test :refer [deftest is testing]]
+            [intemporal.core :as intemporal]
+            [intemporal.protocol :as p]
+            [intemporal.store :as store]
+            [intemporal.store.jdbc :as jdbc-store]
+            [intemporal.store.fdb :as fdb-store]
+            [me.vedang.clj-fdb.FDB :as cfdb]
+            [intemporal.internal.lease :as lease]
+            [intemporal.internal.error :as error]
+            [intemporal.internal.workflow-registry :as wreg]))
+
+(defn w-act [x] (* x 10))
+
+(defn worker-wf [x]
+  (let [a (intemporal/stub #'w-act)
+        r (a x)]
+    (intemporal/wait-for-signal "go")
+    (+ r 1)))
+
+(defn- await-status [store wf-id terminal timeout-ms]
+  (let [deadline (+ (System/currentTimeMillis) timeout-ms)]
+    (loop []
+      (let [s (p/get-workflow-status store wf-id)]
+        (cond
+          (= terminal s) s
+          (> (System/currentTimeMillis) deadline) s
+          :else (do (Thread/sleep 50) (recur)))))))
+
+;; ── C4: worker resumes a crashed workflow after a cross-instance signal ──────────
+
+(defn- check-worker-recovery [store]
+  (wreg/clear-registry!)
+  (let [wid (str "worker-" (random-uuid))]
+    ;; Phase 1: start, suspend on signal, then crash (no signal sent).
+    (let [e1 (intemporal/make-workflow-engine :store store :threads 2)
+          f1 (future (intemporal/start-workflow e1 worker-wf [5] :workflow-id wid))]
+      (Thread/sleep 300)
+      (future-cancel f1)
+      (intemporal/shutdown-engine e1))
+    (is (= :running (p/get-workflow-status store wid))
+        "workflow is durably suspended, not terminal, after the crash")
+    ;; Phase 2: a worker (fresh engine) + a signal delivered via the shared store.
+    (let [e2   (intemporal/make-workflow-engine :store store :threads 2)
+          stop (intemporal/start-worker e2 :poll-ms 50 :owner-id "w2")]
+      (try
+        (intemporal/send-signal store wid "go" {})   ; writes a durable runnable marker
+        (is (= :completed (await-status store wid :completed 5000))
+            "worker claimed the marker, leased, and resumed the workflow to completion")
+        (is (= 51 (intemporal/get-workflow-result store wid)) "5*10 + 1 = 51")
+        (finally (stop) (intemporal/shutdown-engine e2))))))
+
+(deftest worker-recovery-in-memory
+  (testing "shared InMemoryStore: worker resumes a crashed, then-signalled workflow"
+    (check-worker-recovery (store/->InMemoryStore (atom {})))))
+
+(deftest ^:integration worker-recovery-jdbc
+  (testing "JdbcStore: worker resumes via durable runnable marker"
+    (let [url   (or (System/getenv "DATABASE_URL")
+                    "jdbc:postgresql://localhost:5432/root?user=root&password=root")
+          store (jdbc-store/make-jdbc-store url)]
+      (try (check-worker-recovery store) (finally (.close store))))))
+
+(deftest ^:integration worker-recovery-fdb
+  (testing "FDBStore: worker resumes via durable runnable marker"
+    (let [root  (str "worker-" (random-uuid))
+          fdb   (cfdb/select-api-version 730)
+          db    (.open fdb "docker/fdb.cluster")
+          store (fdb-store/make-fdb-store db root)]
+      (check-worker-recovery store))))
+
+;; ── C1: lease rejects a stale writer ─────────────────────────────────────────────
+
+(defn- check-lease [store]
+  (let [wid (str "lease-" (random-uuid))]
+    (p/save-event store wid {:event-type :workflow-started :workflow-id wid :args []})
+    (is (p/claim-workflow store wid "owner-A" 60000) "A claims the unowned workflow")
+    (is (false? (p/claim-workflow store wid "owner-B" 60000)) "B cannot claim A's live lease")
+    ;; A may write while it holds the lease
+    (binding [lease/*owner* "owner-A"]
+      (p/save-events store wid [{:event-type :activity-completed :seq 0 :result 1}]))
+    ;; A releases; B claims
+    (p/release-lease store wid "owner-A")
+    (is (p/claim-workflow store wid "owner-B" 60000) "B claims after release")
+    ;; A is now stale: its writes must be rejected
+    (is (thrown? clojure.lang.ExceptionInfo
+                 (binding [lease/*owner* "owner-A"]
+                   (p/save-events store wid [{:event-type :activity-completed :seq 1 :result 2}])))
+        "stale owner A's write is rejected (lease lost)")))
+
+(deftest lease-rejects-stale-writer-in-memory
+  (testing "InMemoryStore lease validation"
+    (check-lease (store/->InMemoryStore (atom {})))))
+
+(deftest ^:integration lease-rejects-stale-writer-jdbc
+  (testing "JdbcStore lease validation"
+    (let [url   (or (System/getenv "DATABASE_URL")
+                    "jdbc:postgresql://localhost:5432/root?user=root&password=root")
+          store (jdbc-store/make-jdbc-store url)]
+      (try (check-lease store) (finally (.close store))))))
+
+(deftest ^:integration lease-rejects-stale-writer-fdb
+  (testing "FDBStore lease validation"
+    (let [root  (str "lease-" (random-uuid))
+          fdb   (cfdb/select-api-version 730)
+          db    (.open fdb "docker/fdb.cluster")
+          store (fdb-store/make-fdb-store db root)]
+      (check-lease store))))

From e25a11b2689f0cf65fdd8c7e0071a76fa4609094 Mon Sep 17 00:00:00 2001
From: Miguel Ping <miguel.ping@gmail.com>
Date: Mon, 1 Jun 2026 17:02:34 +0100
Subject: [PATCH 5/9] simplify

---
 improvements.md                               | 935 +++++++++++-------
 .../postgres/20260531000002-multipod.down.sql |   7 -
 .../postgres/20260531000002-multipod.up.sql   |  23 -
 .../20260531000002-ownership.down.sql         |   3 +
 .../postgres/20260531000002-ownership.up.sql  |   9 +
 .../postgres/20260531000003-wake-at.down.sql  |   3 +
 .../postgres/20260531000003-wake-at.up.sql    |  13 +
 src/intemporal/core.cljc                      |  71 +-
 src/intemporal/internal/error.cljc            |  19 -
 src/intemporal/internal/execution.clj         |   9 +
 src/intemporal/internal/execution.cljs        |   7 +
 .../internal/fns/start_workflow.clj           |   7 +-
 src/intemporal/internal/lease.cljc            |  10 -
 src/intemporal/internal/runtime.clj           |  26 +-
 src/intemporal/internal/runtime.cljs          |  23 +-
 src/intemporal/protocol.cljc                  |  34 +-
 src/intemporal/store.cljc                     | 106 +-
 src/intemporal/store/fdb.clj                  | 172 ++--
 src/intemporal/store/jdbc.clj                 | 103 +-
 src2/intemporal/error.cljc                    |  27 -
 src2/intemporal/macros.cljc                   | 227 -----
 src2/intemporal/store.cljc                    | 340 -------
 src2/intemporal/store/foundationdb.clj        | 255 -----
 src2/intemporal/store/internal.cljc           | 139 ---
 src2/intemporal/store/jdbc.clj                | 290 ------
 src2/intemporal/workflow.cljc                 | 222 -----
 src2/intemporal/workflow/internal.cljc        | 362 -------
 test/intemporal/jepsen/checker.clj            |  18 +-
 test/intemporal/jepsen/client.clj             |   5 +-
 test/intemporal/jepsen/nemesis.clj            |   3 +-
 test/intemporal/jepsen/runner.clj             |   7 +-
 test/intemporal/jepsen/worker.clj             |   4 +-
 test/intemporal/jepsen/workflows.clj          |   2 +-
 test/intemporal/tests/bench/memory_test.clj   |   2 +-
 test/intemporal/tests/jepsen/bug_1_2_test.clj |  66 +-
 test/intemporal/tests/jepsen/racing_store.clj |  10 +-
 test/intemporal/tests/timer_recovery_test.clj | 161 +++
 test/intemporal/tests/worker_test.clj         |  70 +-
 .../tests/workflow_registry_test.clj          |   1 -
 test2/intemporal/failures_test.cljc           |  47 -
 test2/intemporal/internal_failures_test.cljc  |  47 -
 test2/intemporal/matchers.cljc                |  20 -
 test2/intemporal/recovery_failure.edn         |  10 -
 test2/intemporal/recovery_failure_test.clj    |  66 --
 test2/intemporal/shutdown_restart_test.clj    |  81 --
 test2/intemporal/shutdown_restart_test.cljs   |  78 --
 test2/intemporal/store_test.cljc              |  91 --
 test2/intemporal/stores/basic_test.clj        | 204 ----
 .../intemporal/stores/basic_workflow_test.clj |  47 -
 .../stores/lots_of_workflows_test.clj         |  71 --
 .../stores/multiple_shutdown_test.clj         |  71 --
 .../stores/release_reenqueue_test.clj         |  79 --
 test2/intemporal/stores/saga_test.clj         |  77 --
 test2/intemporal/test_executor.clj            |  42 -
 test2/intemporal/test_utils.cljc              | 179 ----
 test2/intemporal/vthread-recovery.edn         |  62 --
 test2/intemporal/vthread_recovery_test.clj    |  57 --
 test2/intemporal/vthread_test.cljc            |  95 --
 test2/intemporal/workflow_test.cljc           | 119 ---
 59 files changed, 1133 insertions(+), 4201 deletions(-)
 delete mode 100644 resources/migrations/postgres/20260531000002-multipod.down.sql
 delete mode 100644 resources/migrations/postgres/20260531000002-multipod.up.sql
 create mode 100644 resources/migrations/postgres/20260531000002-ownership.down.sql
 create mode 100644 resources/migrations/postgres/20260531000002-ownership.up.sql
 create mode 100644 resources/migrations/postgres/20260531000003-wake-at.down.sql
 create mode 100644 resources/migrations/postgres/20260531000003-wake-at.up.sql
 delete mode 100644 src/intemporal/internal/lease.cljc
 delete mode 100644 src2/intemporal/error.cljc
 delete mode 100644 src2/intemporal/macros.cljc
 delete mode 100644 src2/intemporal/store.cljc
 delete mode 100644 src2/intemporal/store/foundationdb.clj
 delete mode 100644 src2/intemporal/store/internal.cljc
 delete mode 100644 src2/intemporal/store/jdbc.clj
 delete mode 100644 src2/intemporal/workflow.cljc
 delete mode 100644 src2/intemporal/workflow/internal.cljc
 create mode 100644 test/intemporal/tests/timer_recovery_test.clj
 delete mode 100644 test2/intemporal/failures_test.cljc
 delete mode 100644 test2/intemporal/internal_failures_test.cljc
 delete mode 100644 test2/intemporal/matchers.cljc
 delete mode 100644 test2/intemporal/recovery_failure.edn
 delete mode 100644 test2/intemporal/recovery_failure_test.clj
 delete mode 100644 test2/intemporal/shutdown_restart_test.clj
 delete mode 100644 test2/intemporal/shutdown_restart_test.cljs
 delete mode 100644 test2/intemporal/store_test.cljc
 delete mode 100644 test2/intemporal/stores/basic_test.clj
 delete mode 100644 test2/intemporal/stores/basic_workflow_test.clj
 delete mode 100644 test2/intemporal/stores/lots_of_workflows_test.clj
 delete mode 100644 test2/intemporal/stores/multiple_shutdown_test.clj
 delete mode 100644 test2/intemporal/stores/release_reenqueue_test.clj
 delete mode 100644 test2/intemporal/stores/saga_test.clj
 delete mode 100644 test2/intemporal/test_executor.clj
 delete mode 100644 test2/intemporal/test_utils.cljc
 delete mode 100644 test2/intemporal/vthread-recovery.edn
 delete mode 100644 test2/intemporal/vthread_recovery_test.clj
 delete mode 100644 test2/intemporal/vthread_test.cljc
 delete mode 100644 test2/intemporal/workflow_test.cljc

diff --git a/improvements.md b/improvements.md
index a49f5cf..29b1b77 100644
--- a/improvements.md
+++ b/improvements.md
@@ -1,379 +1,652 @@
-# Intemporal Design Critique — Multi-Pod / k8s Replica Set Context
+# Source Improvement Pass (current task)
 
 ## Context
 
-The user asked for a design analysis of the `intemporal` library, with explicit focus on:
-- Deadlocks and "lost workflow" failure modes
-- Other criticism
-- Behaviour under a Kubernetes deployment where multiple pods of a replica set run the engine concurrently and can scale up/down
-
-The library positions itself as "Temporal/Cadence-inspired" — an event-sourced workflow engine where workflow state is reconstructed from a persisted event log so that activities don't re-execute after a process crash.
-
-This document is an **analysis deliverable**, not an implementation plan. It is structured to be useful as input for an architecture decision (adopt / fork / replace / contribute fixes).
+A read-only review of `src/` (3 parallel explore agents over execution/concurrency, stores/protocol,
+and supporting modules) surfaced a backlog of improvements. The multi-pod milestones (A/B/C + C2)
+below this section are all **landed**; this pass is about correctness/quality gaps in the existing
+code, **verified against the source** (several agent "HIGH" claims were false positives and are
+excluded — see "Rejected" at the end). Goal: fix the real correctness bugs, defer/curate the rest.
+
+Two cross-cutting facts that shape the fixes:
+- **Cancellation writes `:workflow-failed`, never `:workflow-cancelled`.** `finalize-cancelled`
+  (execution.clj:356-368) and the loop-top cancel check (execution.clj:522-536) both persist a
+  `:workflow-failed` event, yet `get-workflow-status` reports `:cancelled` from the `cancelled`
+  flag. So status and history disagree, and the `:workflow-cancelled` *event* is never written
+  (only the observer hook fires). This underlies several findings below.
+- **`mark-cancelled` only sets a flag; it doesn't change `status`.** So a cancelled-but-not-yet-
+  resumed workflow still satisfies `list-pending`'s `status NOT IN (completed,failed)` and gets
+  re-resumed. Resume is self-correcting (the loop-top check finalizes it), so this is **wasteful,
+  not corrupting** — but it's a real inefficiency and a consistency smell.
+
+## Tier 1 — Real correctness/consistency bugs (recommended to fix)
+
+1. **Cancelled workflows leak into `list-pending`** (all 3 stores: store.cljc:119,
+   jdbc.clj:207, fdb.clj — wf-owner index). A cancelled workflow is re-listed and re-resumed every
+   poll until the resume happens to finalize it; on FDB the wf-owner index entry is also never
+   cleared on `mark-cancelled` (fdb.clj `maintain-owner-index!` only handles started/terminal),
+   so it lingers. **Fix:** exclude cancelled from `list-pending` in all three
+   (JDBC `AND cancelled = FALSE`; InMemory add `(not (:cancelled wf))`; FDB clear the wf-owner
+   entry in `mark-cancelled`). Net: a cancel becomes terminal-for-scan immediately.
+
+2. **Cancellation is recorded as failure** (execution.clj:356-368, 522-536). Persist a real
+   `:workflow-cancelled` event and make `get-workflow-status` derive `:cancelled` from history too
+   (so status↔history agree, and a resumed cancel ends `:cancelled` not `:failed`). Touches the
+   `terminal-status?`/status-column logic in all 3 stores + the B2 status maintenance in
+   save-event(s) (recognise `:workflow-cancelled` as terminal). **This is the highest-value fix**
+   and subsumes #1's "self-correcting" wastefulness by giving cancel a true terminal state.
+
+3. **Lossy error round-trip** (error.cljc:115-136). `throwable->map` stringifies the type and
+   `map->exception` always rebuilds a generic `ex-info`, so the typed predicates
+   (`cancelled-exception?`, and the *never-referenced* `activity-timeout?/failed?/rejected?` —
+   which don't even exist as predicates, only constructors) can't classify a deserialized error.
+   **Fix:** stamp an `:exception-kind` keyword into the map from the `::*` marker in ex-data, and
+   have `map->exception` re-dispatch to the right constructor; add the missing predicates. Scope to
+   what's actually consumed — don't gold-plate.
+
+4. **`process-one` strands a workflow on resume failure** (core.cljc:486-492). If
+   `resume-workflow` throws (e.g. fn not in registry), the workflow stays owned by this worker,
+   never released, never retried, no error persisted. **Fix:** on throw, persist a
+   `:workflow-failed` event (so it leaves `list-pending` and is observable) — releasing ownership
+   alone would just hot-loop across pods. Pair with #5.
+
+5. **`resolve-workflow` returns nil silently** (workflow_registry.cljc:47-50) → the resume failure
+   in #4 is an opaque NPE-ish path. **Fix:** throw a clear "workflow not registered" ex-info with
+   the name, so #4's persisted error is meaningful.
+
+## Tier 2 — Robustness (worth doing, lower risk)
+
+6. **start-worker error loop has no backoff** (core.cljc:496-507): a persistent failure
+   (DB down) hot-loops at `poll-ms` hammering the store. **Fix:** exponential backoff on
+   consecutive failures, reset on success. (Keep it simple — no circuit-breaker/max-retries; a
+   worker that gives up permanently is worse than one that keeps trying slowly.)
+
+7. **`stop-worker` doesn't join the thread** (core.cljc:512-515): returns before the in-flight
+   resume finishes. **Fix:** `(.join thread timeout)` after interrupt, before `release-owner`.
+
+8. **Observer errors `println` to stdout** (context.cljc:70-76). **Fix:** route through
+   `log/` (telemere) at warn. One-liner.
+
+9. **`max-iterations` throws instead of finalizing** (execution.clj:508-510, .cljs:534-536). The
+   workflow never reaches a terminal state — it just crashes the resume. **Fix:** `finalize-failed`
+   with a typed "replay budget exceeded" error so it's terminal + observable. (This is the §2.5
+   foot-gun from improvements.md.)
+
+## Tier 3 — Curate / document, do NOT auto-fix
+
+- **`.clj`/`.cljs` execution duplication** (process-signal/timer, suspension branch): real drift
+  risk, but extracting to `.cljc` is a large refactor touching the hottest path — out of scope for
+  a quality pass; note it as a tracked follow-up.
+- **Observer timestamps captured at replay time, not event time** (observer.cljc, every method):
+  real determinism smell, but observers are side-channel (logging/otel), not replay inputs, so it
+  doesn't corrupt execution. The event itself already carries a `:timestamp`; document that
+  observers should prefer it. Low priority.
+- **Anonymous-activity / closure name derivation** (activity.cljc:8-29, workflow_registry.cljc):
+  fragile for non-var fns, but the documented API is var-based (`#'my-fn`); add a note/guard rather
+  than a redesign.
+- **JDBC `intemporal_history (workflow_id, id)` index**: `load-history`'s `ORDER BY id` leans on
+  the PK; add the covering index only if a perf need shows up (note in DEVELOPMENT.md).
+- **Resource-leak hardening** (pending-timers/pending-asyncs unbounded; CallerRunsPolicy blocking):
+  theoretical under pathological workflows; leave as documented caveats.
+
+## Rejected (false positives — verified against source, do not action)
+
+- "`doseq` over nil `:protocols` crashes" (start_workflow.clj:33) — **false**; `(doseq [x nil] …)`
+  is a no-op in Clojure (verified by eval).
+- "FDB `get-pending-signals` off-by-one key extraction" — the reviewing agent **self-retracted**;
+  `(- (count key) 3)` is correct for the `["signals" wf-id sig-name ts uuid]` layout.
+- "submit-workflow fire-and-forget loses errors" — by design; the durable workflow record +
+  `await-workflow` are the observability path, not the future. Doc-only at most.
+
+## Critical files
+
+`src/intemporal/internal/execution.clj` (+ `.cljs`) — cancellation finalize + max-iterations;
+`src/intemporal/core.cljc` — `start-worker` / `process-one`; `src/intemporal/store.cljc`,
+`src/intemporal/store/jdbc.clj`, `src/intemporal/store/fdb.clj` — `list-pending` cancelled filter +
+status-derivation + FDB index-on-cancel; `src/intemporal/internal/error.cljc` — error round-trip +
+predicates; `src/intemporal/internal/workflow_registry.cljc` — resolve-or-throw;
+`src/intemporal/internal/context.cljc` — observer log. Reuse existing
+`error/throwable->map`/`map->exception`, `finalize-failed`, `terminal-status?`, the B2 status
+maintenance, and `intemporal.internal.logging`.
+
+## Verification
+
+- Per fix, add/extend a targeted test and run it on **InMemory + JDBC + FDB** (PG + FDB up via
+  `docker compose up -d postgresql foundation`):
+  - Tier 1.1/1.2: cancel a workflow, assert it is **absent from `list-pending`**, `get-workflow-
+    status` = `:cancelled`, and history ends with `:workflow-cancelled` (new) — across all 3 stores;
+    FDB: assert the wf-owner index entry is gone.
+  - Tier 1.3: round-trip each typed error through `throwable->map`→`map->exception` and assert the
+    predicate still classifies it.
+  - Tier 1.4/1.5: a worker resuming an unregistered workflow persists `:workflow-failed` and stops
+    re-listing it; `resolve-workflow` throws a named error.
+  - Tier 2.9: a workflow exceeding `max-iterations` ends `:failed` (terminal), not an escaped throw.
+- Full `bin/kaocha` (in-memory + test + test-cljs) green except the known
+  `replay-check-test/test-log-once-workflow` flake.
+- `clj-kondo --lint src test` stays clean in all touched namespaces.
+- The existing `worker_test`, `timer_recovery_test`, and `bug_2_3_test` (cancellation) guards stay
+  green — #2 changes the cancel terminal event, so `bug_2_3` assertions may need updating to expect
+  `:workflow-cancelled` / status `:cancelled` consistently.
 
 ---
 
-## TL;DR
-
-**`intemporal` is a single-process resilient workflow engine. It is unsafe to run more than one replica against the same store. The README's "not production-ready" disclaimer is accurate — and the gap to a true distributed orchestrator is structural, not cosmetic.**
-
-The two showstoppers for any k8s replica-set deployment:
+# Fix Plan — intemporal multi-pod / replica-set safety  (LANDED — history below)
 
-1. **No durable wake mechanism.** Signal callbacks and timers live in a process-local atom. When the pod that registered them dies, nothing in the system knows to wake the workflow again. The workflow is *persisted but orphaned*.
-2. **No ownership / leasing.** Two pods can race on the same `workflow-id` with no detection. The JDBC store actively masks the race with `ON CONFLICT … DO UPDATE`, silently corrupting the event log.
+## Context
 
-A third structural issue: there is **no poller / recovery worker**. After a crash, no pod scans for workflows that should be running. Resume only happens if an external actor explicitly calls `resume-workflow` with the right function and args. Durability of state without durability of execution is illusion.
+`intemporal` is an event-sourced workflow engine. A design review (`improvements.md`)
+plus a new Jepsen-style test suite (`test/intemporal/tests/jepsen/`) confirmed **five
+structural bugs**, all reproduced deterministically against InMemory, JDBC (Postgres),
+and FDB stores:
 
----
+| Bug | Symptom | Root cause |
+|-----|---------|-----------|
+| **2.1** | Signal lost if it arrives between consume-check and callback registration | `process-signal` is non-atomic: `consume → register` |
+| **2.3** | `cancel-workflow` never terminates a workflow sleeping on `wait-for-signal` | flag is set but no wake fires; sleeper never re-enters the loop |
+| **1.2** | Two writers at the same `seq` silently corrupt history | JDBC `ON CONFLICT DO UPDATE`; FDB UUID-suffixed keys; InMemory blind append |
+| **1.1** | Signal sent from another pod never wakes the workflow | wake callbacks live in a process-local atom on the store record |
+| **1.3** | A restarted pod never resumes the workflows it was running | no recovery poller, no workflow→fn registry |
 
-## Architecture Summary (what I'm critiquing)
+The engine is currently **single-process-safe only**. The goal is to make it safe to run
+multiple replicas against one store. Per decisions taken:
 
-Verified from the source:
+- **Delivery: three incremental, independently-shippable milestones (A → B → C).**
+- **CLJS: parity for the Phase-A correctness fixes only.** `execution`/`runtime`/`start_workflow`
+  each have split `.clj`/`.cljs` versions; the distributed primitives (lease, poller) are
+  JVM-only by nature (CLJS has no Postgres/FDB and is single-process).
+- **Backends for Phase C: Postgres + FDB + InMemory** (the three with tests today).
 
-- **Engine** (`src/intemporal/core.cljc:445`): a map of `{:store :executor :scheduler :registry :observer}`. No identity, no node id, no clustering primitives.
-- **start-workflow** (`src/intemporal/internal/fns/start_workflow.clj:8-78`): generates a UUID, writes `:workflow-started`, calls `run-workflow-internal` **synchronously on the calling thread**, then loops blocking on a local `promise` until the workflow completes or is interrupted.
-- **resume-workflow** (`src/intemporal/core.cljc:366-390`): the caller must supply `workflow-id`, `workflow-fn`, and `args`. The engine replays history and re-enters execution.
-- **Stores** implement `IStore` (`src/intemporal/protocol.cljc:8-21`). The protocol contains only: history read/write, signal add/consume, callback register/unregister, cancellation flag, and status. **No claim, no lease, no heartbeat, no "list running workflows".**
-- **JDBC store** (`src/intemporal/store/jdbc.clj`): events written under transactions; uses `ON CONFLICT (workflow_id, seq) DO UPDATE` (line 100-103); signal consumption uses `FOR UPDATE SKIP LOCKED` (line 137). Signal callbacks are kept in a **per-process atom** on the store record (line 72, 143-147).
-- **InMemoryStore** (`src/intemporal/store.cljc:8-78`): identical callback semantics — a single atom.
-- **DefaultScheduler** (`src/intemporal/internal/runtime.clj`): timers held in an in-memory `pending-timers` atom. Lost on process exit.
+The existing Jepsen tests currently **assert the buggy behaviour** (each expects `:stuck`).
+As each bug is fixed, its test is **inverted into a regression guard** asserting the correct
+behaviour — this is the primary acceptance signal for every milestone.
 
 ---
 
-## Section 1 — Distributed-Deployment Showstoppers
-
-### 1.1 No wake mechanism survives a pod restart  *(severity: critical)*
-
-`register-signal-callback` and `schedule-timer` store their continuation **inside the process** (atom for signals, `ScheduledFuture` for timers). Concretely:
-
-- `JdbcStore` carries `callbacks` as `(atom {})` at construction (`src/intemporal/store/jdbc.clj:72`). Two pods sharing the same Postgres each have their own empty atom.
-- `add-signal` in JDBC (`src/intemporal/store/jdbc.clj:122-132`) writes the signal under a transaction, then does `(when-let [callback (get-in @callbacks [workflow-id signal-name])] (future (callback)))` — **only the pod that registered the callback can fire it**.
-
-Failure trace:
-
-1. Pod A executes workflow X up to `(wait-for-signal :go)`. It writes the suspension to history, registers callback in **pod-A-local atom**, blocks the calling thread on a promise.
-2. Pod A crashes (k8s scale-down, OOM, node failure).
-3. Pod B receives an HTTP request → calls `(send-signal store "X" :go {})`. Postgres now has the signal row. Pod B's local callbacks atom is empty for X → **no wake**.
-4. Workflow X is permanently stuck. Its history is intact, its signal is queued, and no process knows to re-enter execution.
-
-Timers have the same problem with worse blast radius: a 1-hour timer scheduled on pod A *vanishes* the moment pod A dies, even if nobody sends a signal. No row in any table, no scheduled job, no poller.
-
-**This is the "losing workflows" failure mode**, and it does not require any race: it happens on every routine k8s rolling restart.
-
-### 1.2 No ownership → silent concurrent execution  *(severity: critical)*
+## Milestone 1 — Phase A: single-process correctness — ✅ LANDED (fixes 2.1, 2.3)
 
-`start-workflow` and `resume-workflow` do not claim anything. Two pods can run the same workflow id concurrently. Specifically:
+No schema change. Each fix touches both `.clj` and `.cljs`. Low risk.
 
-- `JdbcStore.save-events` uses `ON CONFLICT (workflow_id, seq) DO UPDATE` (`src/intemporal/store/jdbc.clj:100-103`). When two pods append the same seq, **the loser silently wins** (last writer overwrites). The race is masked; the event log is non-deterministic.
-- Activities are re-executed on each pod — at-least-once degrades to at-many-times.
-- The replay invariant ("same input → same event stream") is violated because two engines may emit different events at the same seq.
+**Outcome:** A1 (bug 2.1) and A2 (bug 2.3) implemented and verified on InMemory + JDBC + FDB;
+CLJS parity confirmed. A3 was reclassified to Phase C during implementation (see below).
+Full `bin/kaocha` (all suites): **test 40/0/0, in-memory 40/0/0, test-cljs 44/0/0** — zero
+failures across JVM (incl. live JDBC + FDB integration) and ClojureScript. `bug_2_1`/`bug_2_3`
+inverted to assert the fix; `bug_1_1`/`bug_1_2`/`bug_1_3` still assert the (multi-pod) buggy
+behaviour, awaiting Phase C.
 
-This becomes very easy to trigger: `start-workflow` blocks the caller. A reverse-proxy retry on a slow `POST /workflows` will re-invoke `start-workflow`, and if the client supplies `:workflow-id` for idempotency the second call appends a duplicate `:workflow-started` event to history rather than rejecting (no uniqueness check; the `seq` for the first event likely overwrites if both pods reach `seq=0` simultaneously).
+**Additional fix surfaced during A1 — `start_workflow.clj` wake handshake (JVM only).**
+Making signal delivery prompt exposed a pre-existing race in the blocking resume mechanism:
+`wake-fn` re-entered `run-workflow-internal` on the callback thread and swapped
+`resume-promise-atom` (new promise) before the main loop read it, so a wake firing *during*
+suspension setup left the main loop waiting on an undelivered promise (and allowed two threads
+to execute one workflow). Rewrote it so `wake-fn` only enqueues a token on a
+`LinkedBlockingQueue`; **all** `run-workflow-internal` calls run on the main thread in the
+loop. The queue makes the wake edge-safe (a token enqueued before `take` is not lost) and
+serializes execution. Confirmed deterministic over repeated runs. CLJS is single-threaded and
+does not exhibit the race (44/44 cljs tests green), so `start_workflow.cljs` was left
+unchanged.
 
-A safer schema would use `ON CONFLICT DO NOTHING` (or reject) on event inserts, plus a `(workflow_id, owner_lease, lease_expires_at)` claim row.
+### A1 — Register-then-consume signal race (bug 2.1)
 
-### 1.3 No recovery poller  *(severity: critical)*
+**Files:** `src/intemporal/internal/execution.clj` (`process-signal`, L249-281;
+`process-signal-with-timeout`, L283-322) **and** `src/intemporal/internal/execution.cljs`
+(same fns, L249-281 / mirrored).
 
-There is no background process anywhere in the codebase that scans for workflows requiring execution. `resume-workflow` is **on-demand only** and **requires the caller to know the workflow function and args**.
-
-This couples recovery to application code: every pod that starts up must explicitly enumerate "things that might be suspended" and call `resume-workflow` with the right vars in scope. The library provides no list-by-status query and no API for "given a workflow id, find the function and resume". For a multi-replica deployment, recovery is essentially a problem the user has to solve outside the library.
-
-Temporal solves this with task queues + workers that long-poll the server. Intemporal has neither concept.
-
----
-
-## Section 2 — Deadlock & Lost-Workflow Scenarios
-
-### 2.1 Register-after-consume race  *(severity: high, even on a single pod)*
-
-`process-signal` (`src/intemporal/internal/execution.clj:223-255`) is described by the explore agent as:
+**Change:** invert the order to **register-first, then check**:
 
 ```
-1. consume-signal — if present, return
-2. otherwise register-signal-callback
-3. suspend
+register-signal-callback        ; callback is idempotent: it consumes atomically + wakes
+if (consume-signal) present:
+    unregister-signal-callback  ; we won the inline race
+    save :signal-received event ; handle inline
+    :continue
+else:
+    :wait-signal                ; callback stays armed; whoever consumes first wins
 ```
 
-This is a classic TOCTOU. Between (1) returning nil and (2) writing the callback, another thread (or another pod) can call `add-signal`. The signal lands in the store; the callback fires nothing (it isn't registered yet); the registration completes after the signal write; the workflow suspends forever.
-
-The fix is the standard one: register the callback first, then check, then unregister + consume if a signal was already present.
-
-### 2.2 Signal sent to a workflow not yet started
-
-`send-signal` will happily write a signal for an unknown workflow id (in JDBC, line 124 inserts a workflow row via upsert). If the workflow is later started but the start path doesn't drain pre-existing signals before reaching `wait-for-signal`, the signal may or may not be picked up depending on ordering — worth a targeted test, since signals are addressed by name and the workflow expects FIFO semantics per name.
-
-### 2.3 Cancellation cannot reach a suspended workflow
-
-`cancel-workflow` (`src/intemporal/core.cljc:411-418`) sets a flag. Cancellation is **polled at sequence points** (per the explore agent: `check-cancelled!` before each operation, and at the top of the execution loop). A workflow that is suspended on `wait-for-signal` with no signal will:
-
-- never re-enter the execution loop on its own,
-- never poll the flag,
-- be invisible to cancellation.
-
-So `cancel-workflow` is **not reliable for any workflow that is currently waiting**. The flag is set in the DB but the workflow only sees it next time it wakes — which may be never (see 1.1).
-
-### 2.4 Long-lived `start-workflow` thread
-
-`start-workflow` blocks the calling thread until the workflow either completes or is interrupted (`src/intemporal/internal/fns/start_workflow.clj:67-75`). A workflow that waits 30 days for a signal holds the caller's thread for 30 days. This is incompatible with HTTP request/response in any normal web framework and leaks pod resources at scale. The "right" pattern (return a workflow id immediately; durable wake later) is precisely the pattern that doesn't exist (see 1.1, 1.3).
-
-### 2.5 `max-iterations` foot-gun
-
-Default 1000 replay iterations (`src/intemporal/internal/fns/start_workflow.clj:23`). A workflow with thousands of activities or a long signal-driven loop will silently fail at replay. No clear surface to detect this in production.
-
-### 2.6 In-flight activities at shutdown become "interrupted"
-
-Per the crash-test agent: activities crashed mid-execution are marked `:activity-interrupted` and re-run on resume. This is correct behaviour for at-least-once. **However**, combined with 1.1, the resume never happens automatically — so the activity neither completes nor restarts. Worth distinguishing in docs: "at-least-once if you remember to call resume-workflow".
+`consume-signal` is already atomic in all three stores (InMemory `swap!`,
+JDBC `FOR UPDATE SKIP LOCKED` + `DELETE`, FDB range-limit-1 + `clear`), so only the inline
+path **or** the callback consumes — never both. A benign double-wake is harmless (wake just
+replays). Apply the identical reorder to `process-signal-with-timeout`.
+
+**Guard test:** invert `bug_2_1_test.clj` (all 3 stores) to assert the workflow **wakes and
+completes** and no signal is left pending. Keep `racing_store.clj` — it now proves the fix
+holds under the exact adversarial interleaving.
+
+### A2 — Cancellation wakes a sleeper (bug 2.3)
+
+**Files:** `src/intemporal/protocol.cljc` (add `wake-workflow`), all three stores
+(`store.cljc`, `store/jdbc.clj`, `store/fdb.clj`), `src/intemporal/core.cljc`
+(`cancel-workflow`, L411-418), and the suspension path in `execution.clj`/`.cljs`.
+
+**Change:**
+1. Add `wake-workflow [store workflow-id]` to `IStore`. It fires every wake callback
+   registered for that workflow (reuse the existing per-`[workflow-id signal-name]`
+   callbacks atom; also register the bare `wake-fn` under a reserved key
+   `[workflow-id ::wake]` whenever a workflow suspends — done in `handle-suspension`).
+2. `cancel-workflow` calls `mark-cancelled` **then** `wake-workflow`.
+3. On wake the workflow re-enters `run-workflow-internal`; `wait-for-signal`
+   (core.cljc:246) calls `check-cancelled!` first, which throws
+   `workflow-cancelled-exception` → `finalize-cancelled` → terminal
+   `:workflow-cancelled` event.
+
+Reuses the existing wake mechanism (`wake-fn` in `start_workflow.clj` L33-49) and
+`check-cancelled!` (`context.cljc` L37-40). No new execution machinery.
+
+**Guard test:** invert `bug_2_3_test.clj` (all 3 stores) to assert the workflow reaches a
+**terminal** state (future returns; last history event is `workflow-cancelled`) — not just
+that the flag is set.
+
+### A3 — Reject concurrent same-seq writes (bug 1.2) — **MOVED TO PHASE C**
+
+**Discovery during implementation (kept here as a record):** an attempt to make
+`save-events` throw on any pre-existing concrete `seq` broke the happy path. The engine
+**legitimately re-writes the same concrete seq on every replay pass** (async/join completion
+events in `process-pending-asyncs-parallel`, execution.clj L148-196), which is exactly why
+`ON CONFLICT DO UPDATE` exists. In normal single-process operation there is only **one
+execution thread per workflow**, so two *divergent* same-seq writes never occur — they only
+arise across pods. Therefore **bug 1.2 has no single-process manifestation and its correct
+fix is the Phase C lease**: every `save-events` validates the owner lease in the same
+transaction and throws `LeaseLostException` when a pod that lost ownership tries to write.
+The stores are left at their original idempotent-upsert behaviour for Phase A.
+
+**Net effect:** Phase A now ships **A1 (bug 2.1) + A2 (bug 2.3)**. The `bug_1_2_test`
+guard stays asserting the (multi-pod-only) buggy behaviour until Phase C, where it is
+inverted to assert the losing writer sees `LeaseLostException`.
 
 ---
 
-## Section 3 — Other Criticism
-
-### 3.1 JDBC schema lacks the columns it needs
-
-Looking at `resources/migrations/postgres/20260215214002-initial-schema.up.sql` (referenced by the explore agent): three tables, no `status` column on `intemporal_workflows`, no `owner`, no `lease_expires_at`, no `last_heartbeat_at`, no index for "find running workflows". Status is derived by scanning `intemporal_history` and reading `last(event-type)` — O(history-length) per status query.
-
-`ON CONFLICT (workflow_id, seq) DO UPDATE` (line 100-103) is the wrong policy. Two correct writers should not be allowed to coexist; the conflict should be loud (`DO NOTHING` + check `affected`, or a `version` column with CAS).
-
-### 3.2 The IStore protocol is too thin
-
-For a multi-tenant durable orchestrator you'd expect at minimum:
-
-- `claim-workflow [store workflow-id worker-id lease-ttl]` → boolean
-- `renew-lease [store workflow-id worker-id]`
-- `release [store workflow-id worker-id]`
-- `list-runnable [store worker-id batch-size]` (signals arrived / timers due / leases expired)
-- Persistent timer rows (`{workflow_id, seq, fire_at}`)
-- Persistent "needs wake" markers
-
-None of these exist. Adding them is not a small patch; it touches the execution engine's assumption that wakes are local.
-
-### 3.3 Signal callbacks duplicated as in-process state across store impls
-
-Every store maintains its own callback atom (`InMemoryStore` via `:signal-callbacks` in the state map, `JdbcStore` via a separate `(atom {})` field). For the JDBC store this is conceptually wrong: the store is shared, but a process-local atom shadows it. A correct multi-pod implementation would use a notification mechanism the database already provides — Postgres `LISTEN/NOTIFY`, an explicit watch table, or an external pub/sub — and would remove `register-signal-callback` from `IStore` entirely (it isn't really a store concern).
-
-### 3.4 No separation between "orchestrator" and "worker"
-
-`IActivityExecutor` runs activities in the same process that runs the workflow. There is no way to dispatch activities to a separate worker pool (e.g., a "heavy I/O" replica set distinct from "orchestrator" replicas). Heavy activities consume the same thread budget that drives workflows.
-
-### 3.5 Recovery requires the caller to know the workflow function
-
-`resume-workflow` takes `workflow-fn` and `args`. The library has no registry that maps `workflow-id → function var`. Every pod that wants to recover must:
-
-1. Query the store for workflows in `:running` status (no such query exists).
-2. Look up the right function var (no such mapping exists).
-3. Recover the original args (they live in the `:workflow-started` event — accessible, but undocumented).
-
-In practice this means the user writes their own dispatch table and recovery loop. The library does not provide a working recovery story out of the box.
-
-### 3.6 Observer protocol is a good idea, slightly under-spec'd
-
-`IWorkflowObserver` (`src/intemporal/protocol.cljc:45-62`) is clean and gives the right hooks for tracing. Two gaps worth noting:
-
-- No `on-store-write` / `on-suspension-persisted` — useful for "did the durability write succeed before we acked the activity?"
-- No `on-replay-iteration` — useful for diagnosing slow replays.
-
-### 3.7 Documentation gap
-
-The README says "not production-ready" but doesn't enumerate **why**. A short "Operational Caveats" section listing 1.1, 1.2, 1.3 would prevent users from misjudging the library based on the Temporal-flavoured API surface.
+## Milestone 2 — Phase B: operational hardening (enables Phase C)
+
+No bug flips here, but B2/B3 are load-bearing for the multi-pod poller.
+
+- **B2 — `status` column.** New Postgres migration adding
+  `intemporal_workflows.status` (`running|completed|failed|cancelled`); FDB
+  `["state" wf-id "status"]` key; InMemory state key. Maintain it in the
+  `finalize-*` paths (`execution.clj`/`.cljs`). Makes `get-workflow-status` O(1) and gives
+  the poller a cheap "find runnable" predicate.
+- **B3 — Workflow registry (load-bearing).** New `src/intemporal/internal/workflow_registry.cljc`:
+  `register-workflow!` maps a name → fn var. Store the **name** in the `:workflow-started`
+  event payload (already written in `start_workflow.clj` L53-56). Add an arity
+  `resume-workflow [engine workflow-id]` (core.cljc:366) that resolves both fn and args from
+  the first history event — removing today's requirement that the caller supply them.
+- **B4 — Async `submit-workflow` (optional this milestone).** `submit-workflow` writes
+  `:workflow-started` and returns `{:workflow-id …}` immediately; `start-workflow` becomes
+  `submit` + `await`. Unblocks the "HTTP returns an id" pattern and is the entry the worker
+  loop uses in Phase C.
 
 ---
 
-## Section 4 — What a Multi-Pod-Safe Version Would Need
-
-Not a request to implement; a calibration of how far the library is from the goal.
-
-1. **Lease-based ownership.** Add `claim_workflow(worker_id, ttl)` + `renew` + `release`. Reject all writes from a worker whose lease has expired. Heartbeat from a background thread.
-2. **Persistent timers.** Add a `intemporal_timers (workflow_id, seq, fire_at, claimed_by, claimed_until)` table and a poller (`SELECT … WHERE fire_at <= now() AND claimed_until < now() FOR UPDATE SKIP LOCKED`).
-3. **Persistent wake markers.** When a signal arrives or a timer fires, write a row to `intemporal_runnable (workflow_id)`. Each pod polls this table (or `LISTEN`s on `NOTIFY`).
-4. **Durable workflow registry.** Map `workflow_id → workflow_function_symbol + args`. Store the symbol in the `:workflow-started` event; have every pod register the symbols it can resolve.
-5. **Reject concurrent writers.** Change `ON CONFLICT DO UPDATE` to `DO NOTHING` and fail the workflow run on conflict (lease violation).
-6. **Async `start-workflow`.** Return `{:workflow-id …}` immediately; let the worker loop pick up the new workflow from the runnable queue.
-7. **Fix the signal register-then-consume race** (2.1) — even single-process correctness depends on this.
-8. **Cancellation that wakes a sleeper.** Cancellation should write a runnable marker that forces the workflow to wake and observe the flag (currently it only sets the flag).
-
-Items 1–6 are essentially "build a real distributed workflow engine". Item 7 is a bug fix. Item 8 is a small targeted change.
+## Milestone 3 (REVISED) — Ownership-based recovery, NO leases — ✅ LANDED
+
+**Outcome:** the lease model was fully removed and replaced by the ablauf-style ownership model.
+All bug guards pass on InMemory + JDBC + FDB. Full `bin/kaocha`: **in-memory 47/0/0,
+test-cljs 27/0/0, test 67** (sole failure = the documented pre-existing `test-log-once-workflow`
+flake). `clj-kondo --lint src test`: **0 errors, 5 warnings** (down from 3/37), every warning in a
+pre-existing file untouched by this milestone (unused `is` refer in two store/bench fdb tests;
+`promesa.core` / `with-trace-logging` in three cljs tests, used via macroexpansion clj-kondo can't
+see). All namespaces changed in this work lint clean.
+
+- **Removed:** `internal/lease.cljc`, `lease-lost-exception`/`lease-lost?`, the 6 lease+runnable
+  `IStore` methods, all `save-events` lease validation, the `intemporal_runnable` table, and the
+  `add-runnable` calls in `add-signal`/`mark-cancelled` (the in-process callback fire stays as the
+  embedded single-process wake).
+- **Added:** migration `20260531000002-ownership` (adds the `owner` column + index; the prior
+  lease/runnable WIP migrations were collapsed away so no lease/runnable SQL exists anywhere);
+  3 `IStore` methods `claim-owner` / `list-pending` /
+  `release-owner` on InMemory + JDBC + FDB (FDB keeps a `["wf-owner" owner wf-id]` index, kept in
+  sync in `save-event(s)` via `maintain-owner-index!`, moved on claim/release). `start-worker`
+  reworked to the scan model: each poll `list-pending` → `claim-owner` → `resume-workflow [engine
+  wf-id]`, sequentially on the poll thread; stop calls `release-owner`. Default `:poll-ms` 500,
+  `:batch-size` 100, stable `:owner-id` expected in production.
+- **Why it fixes the bugs:** 1.2 — `claim-owner` (`UPDATE … WHERE owner IS NULL OR owner=me`) is
+  the atomic exclusivity gate, and the worker runs owned workflows one at a time → no concurrent
+  writers. 1.1 — the persisted signal is picked up by the owning/any pod's next scan; replay
+  consumes it. 1.3 — the scan *is* recovery: a restarted pod with the same owner-id re-lists and
+  resumes its own non-terminal workflows. No time-based lease anywhere.
+- **Tests reworked:** `worker_test` (recovery + claim-exclusivity, 3 stores); `bug_1_2` asserts
+  claim exclusivity; `bug_1_1`/`bug_1_3` use the scan worker unchanged; `racing_store` delegates
+  the 3 new methods. Shared-DB assertions are wid-scoped (membership, not global emptiness).
+
+### Original revised design (for reference)
+
+
+### Context
+
+The lease-based Phase C (below, "LANDED") is being **replaced**. The user does not want
+time-based leases. Instead: mark every workflow with an **ownership column**, and on pod start
+(and on every poll) pick up all non-terminal workflows for this owner — the ablauf
+`job_owner` / `release-tasks!` model. Exclusivity comes from atomically stamping the owner
+(`UPDATE … WHERE owner IS NULL`), not from a lease with expiry. Decisions taken:
+
+- **Pure ownership scan, no runnable markers.** Drop the `intemporal_runnable` table entirely.
+  A worker periodically lists and resumes every non-terminal workflow it owns-or-null; replay
+  consumes any pending signal and observes cancellation. (Accepted trade-off: O(N) re-resume per
+  poll even for correctly-waiting workflows — mitigated by a coarse default poll interval.)
+- **Stable per-pod owner-id; workflows start unowned.** `start-workflow` leaves `owner = NULL`;
+  a worker stamps `owner = self` on claim. A crashed pod reclaims its own work when it restarts
+  with the **same** owner-id (so production must pass a stable `:owner-id`, e.g. StatefulSet
+  ordinal / config). This mirrors ablauf exactly.
+
+Why this fixes the bugs (no lease, no save-events validation):
+- **1.2 (concurrent execution):** `claim-owner` = `UPDATE … SET owner=? WHERE owner IS NULL`
+  is atomic; only one pod claims a workflow, and the worker resumes claimed workflows
+  **sequentially on its poll thread**, so neither cross-pod nor intra-pod double-execution occurs.
+- **1.1 (cross-pod signal):** the signal is persisted; the owning (or any, if unowned) pod's next
+  scan resumes the workflow and replay consumes the signal. A dead owner's workflows
+  (`owner = dead-id`) are skipped by others (`owner=me OR owner IS NULL`) until that owner restarts.
+- **1.3 (recovery):** the worker's scan **is** the recovery — its first poll lists `owner=me`
+  non-terminal workflows (its orphans from a prior lifetime) and resumes them. intemporal replays
+  from history (re-running uncached activities), so no ablauf-style task-status bookkeeping is
+  needed.
+
+### Remove (all lease + marker machinery)
+
+- Delete `src/intemporal/internal/lease.cljc` (the `*owner*` dynamic var) and the **save-events
+  lease validation** in all three stores (`store.cljc`, `store/jdbc.clj`, `store/fdb.clj`).
+- Remove `lease-lost-exception` / `lease-lost?` from `src/intemporal/internal/error.cljc`.
+- Remove `IStore` methods `claim-workflow`, `renew-lease`, `release-lease`, `add-runnable`,
+  `claim-runnable`, `delete-runnable` (protocol + all stores + `racing_store.clj`).
+- Remove the `add-runnable` calls from `add-signal` / `mark-cancelled` (keep the in-process
+  callback fire — it is the embedded single-process wake path; worker mode uses the scan).
+- New migration drops `intemporal_runnable` (+ index) and `intemporal_workflows.lease_until`.
+
+### Add (ownership)
+
+- **Schema** — new `resources/migrations/postgres/20260531000003-ownership.up.sql`:
+  `RENAME COLUMN owner_id TO owner`; `DROP COLUMN lease_until`; `DROP TABLE intemporal_runnable`
+  (drop its index first); `CREATE INDEX … ON intemporal_workflows (owner)`. (`status` from B2 is
+  reused to detect non-terminal.) `.down` reverses it. Previous migration `…-multipod` stays
+  (history); this one reverses its lease/runnable parts.
+- **`IStore` (3 new methods, replacing the 6 removed):**
+  - `claim-owner [store workflow-id owner-id]` → boolean. Atomic
+    `UPDATE intemporal_workflows SET owner=? WHERE id=? AND (owner IS NULL OR owner=?)`; true iff
+    now owned by `owner-id`. (ablauf `own!`.)
+  - `list-pending [store owner-id limit]` → vector of workflow-ids that are **non-terminal** and
+    `(owner = owner-id OR owner IS NULL)`. Used for both the live poll and startup recovery (same
+    query). PG: `WHERE status NOT IN ('completed','failed') AND (owner=? OR owner IS NULL) LIMIT ?`.
+  - `release-owner [store owner-id]` → `UPDATE … SET owner=NULL WHERE owner=? AND status NOT IN
+    ('completed','failed')` (clean-shutdown rebalance).
+- **Store impls.** InMemory: atom CAS / filter (trivial). JDBC: the three SQL statements above
+  (`claim-owner` uses `RETURNING`/update-count for the boolean). FDB: needs an **owner index**
+  subspace `["wf-owner" <owner-or-""> <wf-id>]` since it can't SQL-scan — add a wf-id on first
+  `:workflow-started` save (owner ""), move it on `claim-owner` / `release-owner`, remove it on a
+  terminal event. `list-pending` scans `["wf-owner" owner-id]` + `["wf-owner" ""]`.
+- **`start-worker`** (rework in `src/intemporal/core.cljc`, drop the `lease`/runnable/`claim-ms`
+  bits): loop → `list-pending store owner-id batch` → for each, `claim-owner`; if true,
+  `resume-workflow [engine wf-id]` (sequentially, on the poll thread); sleep `poll-ms` when empty.
+  Stop fn calls `release-owner` then stops the thread. Recovery needs no special step — the first
+  scan covers it. Default `:poll-ms` ~500–1000, `:owner-id` random (production passes a stable id).
+
+### Critical files
+
+`src/intemporal/protocol.cljc`, `src/intemporal/store.cljc`, `src/intemporal/store/jdbc.clj`,
+`src/intemporal/store/fdb.clj`, `src/intemporal/core.cljc` (`start-worker`),
+`src/intemporal/internal/error.cljc`; delete `src/intemporal/internal/lease.cljc`;
+new migration under `resources/migrations/postgres/`; tests
+`test/intemporal/tests/worker_test.clj`, `test/intemporal/tests/jepsen/racing_store.clj`,
+`test/intemporal/tests/jepsen/bug_1_1_test.clj`, `bug_1_2_test.clj`, `bug_1_3_test.clj`.
+
+### Tests (rework to the ownership model)
+
+- `worker_test`: (a) **recovery** — start+suspend+crash, then a worker with the same/another
+  owner-id resumes via the scan to completion (InMemory shared atom + JDBC + FDB); (b)
+  **claim exclusivity** — `claim-owner` returns true for the first owner, false for the second
+  (this replaces the lease-rejects-stale-writer test and is the bug-1.2 proof).
+- `bug_1_1` (cross-instance signal): signal via a second store instance + a worker → completion.
+- `bug_1_3` (recovery): crash + worker scan → completion.
+- `bug_1_2` (concurrency): two `claim-owner` attempts on one unowned workflow → exactly one true;
+  assert the loser cannot resume.
+- `bug_2_1` / `bug_2_3` (Phase A) unchanged.
+- `racing_store.clj`: delegate the 3 new methods, drop the 6 removed.
+
+### Verification
+
+1. `bin/kaocha` (in-memory + test + test-cljs) green except the documented pre-existing
+   `replay-check-test/test-log-once-workflow` flake. Confirm the reworked `worker_test` and
+   `bug_1_1/1_2/1_3` guards pass on InMemory + JDBC + FDB (Postgres + FoundationDB up via
+   `docker compose up -d postgresql foundation`; migration auto-applies on store creation).
+2. **clj-kondo** on both source and tests: `clj-kondo --lint src test` — fix all warnings/errors
+   it reports in the touched namespaces (and confirm a clean run overall).
+3. Sanity: a workflow started (unowned) and signalled, with a worker running under a stable
+   owner-id, completes; after a simulated crash, a worker restarted with the same owner-id
+   resumes it.
 
 ---
 
-## Section 5 — Pragmatic Recommendations (no code changes implied)
-
-For someone evaluating this library:
-
-- **Safe today**: single process, in-memory store, side-effects inside short-lived activities — i.e., as a structured way to write resumable in-memory orchestrations. Fine for tests, batch jobs, single-node tools.
-- **Risky**: any deployment with `>1` replica, even with the JDBC store. Will not lose data, but **will lose execution liveness** on every pod restart, and **will corrupt history** under concurrent retries.
-- **Don't**: rely on it as a Temporal replacement in k8s without writing significant infrastructure on top (leasing, polling, dispatch, signal fan-out).
+## Milestone 3 — Phase C: multi-pod safety — ✅ LANDED (lease impl — SUPERSEDED by the ownership model above) (fixes 1.1, 1.2, 1.3)
+
+**Outcome:** C1 + C3 + C4 + C5 implemented across InMemory + JDBC + FDB; all five bug guards
+now assert fixed behaviour. Full `bin/kaocha`: **test-cljs 27/0/0, in-memory 47/0/0, test 67**
+(sole failure = the documented pre-existing run-once flake). CLJS green confirms the cljc store
+changes are cross-platform.
+
+- **C1 lease** (`claim-workflow`/`renew-lease`/`release-lease` on `IStore`): Postgres
+  `owner_id`+`lease_until` columns (migration `…-multipod`); FDB `["lease" id]` serializable RMW;
+  InMemory CAS. **Every `save-events` validates the lease** when `intemporal.internal.lease/*owner*`
+  is bound (set by the worker) and throws `lease-lost-exception` on mismatch — closes bug 1.2.
+  FDB wraps body exceptions in `CompletionException`, so FDB `save-events` unwraps to surface the
+  clean `ExceptionInfo`.
+- **C3 runnable markers** (`add-runnable`/`claim-runnable`/`delete-runnable`): Postgres
+  `intemporal_runnable` (PK collapses dups, `FOR UPDATE SKIP LOCKED` claim); FDB `["runnable"]`
+  subspace; InMemory map. `add-signal` and `mark-cancelled` write a marker — the durable,
+  cross-pod wake path that closes bug 1.1.
+- **C4 worker** (`intemporal.core/start-worker`, JVM-only): polls `claim-runnable` → `claim-workflow`
+  (lease) → `resume-workflow [engine wf-id]` (B3 registry) → `delete-runnable` → `release-lease`;
+  catches `lease-lost?` to skip. Closes bug 1.3.
+- **C5 — deviation from plan (documented):** the plan said make in-process callbacks no-ops. That
+  would break the **embedded single-process** mode (blocking `start-workflow` with no worker, which
+  the store suite and README's "safe today" rely on). Instead callbacks are **kept** as the
+  in-process fast-path and the durable marker is added alongside as the cross-pod path. Both
+  coexist; no behavioural regression.
+- **Tests:** `worker_test` (recovery + lease, 3 stores) and the flipped `bug_1_1`/`bug_1_2`/`bug_1_3`
+  guards (3 stores each). InMemory models a shared store by sharing one state atom across instances.
+
+## Milestone 4 — C2: persistent / cross-pod timers — ✅ LANDED
+
+**Outcome:** timers now survive pod death. Full `bin/kaocha`: **in-memory 50/0/0,
+test-cljs 27/0/0, test 76** (sole failure = the pre-existing `test-log-once-workflow` flake).
+`clj-kondo --lint src test`: **0 errors, 5 warnings** (unchanged pre-existing baseline; every
+touched namespace clean). New `timer_recovery_test` passes on InMemory + JDBC + FDB (24 assertions
+across determinism / recovery / wake_at filtering). Migration reverses + re-applies cleanly.
+
+- **Determinism fix** ([core.cljc `sleep`](src/intemporal/core.cljc#L283), cljc): reads back the
+  persisted `:timer-scheduled` `fire-at` on replay instead of recomputing `now+ms` — no deadline
+  drift, crash-resumed sleeps fire on their original schedule.
+- **Idempotent `schedule-timer`** (runtime.clj + runtime.cljs): a re-resumed timer workflow keeps
+  its already-armed future/timeout instead of arming a duplicate.
+- **`wake_at` filter**: new `IStore/set-wake-at`; `list-pending` adds `(wake_at IS NULL OR
+  wake_at <= now())`. Migration `20260531000003-wake-at` (column + partial index). JDBC =
+  `to_timestamp` UPDATE + WHERE clause; InMemory = state key + filter; FDB = wake-at carried in the
+  `["wf-owner" bucket wf-id]` index **value** (preserved across claim/release bucket moves),
+  scanned with a due? predicate. `set-wake-at` is called at the `:suspended` branch
+  (execution.clj + .cljs): `:fire-at` for `:wait-timer`, `:deadline` for `:wait-signal-timeout`,
+  nil otherwise (always eligible).
+- **Tests:** new `test/intemporal/tests/timer_recovery_test.clj` (3 properties × 3 stores);
+  `racing_store` delegates `set-wake-at`. Existing `timer_test` unaffected.
+
+### Original design (for reference)
+
+### Context
+
+A workflow that calls `sleep` (or `wait-for-signal` with a timeout) suspends with a timer. Today
+that timer lives **only** in the in-process `DefaultScheduler.pending-timers` atom
+([runtime.clj:13](src/intemporal/internal/runtime.clj#L13)), so a pod death loses it: the
+workflow is durably suspended but nothing re-arms the timer on another pod. Signal-waiters
+already recover (the ownership scan re-resumes every non-terminal workflow each poll, and replay
+consumes the persisted signal) — timers are the one suspension type that doesn't, purely because
+the fire time isn't durable and the scan would re-resume long sleepers wastefully.
+
+Two concrete defects to fix, plus one efficiency addition (per decisions taken):
+
+1. **Latent determinism bug in `sleep`** ([core.cljc:283-302](src/intemporal/core.cljc#L283)):
+   it recomputes `fire-at = (now + ms)` on **every** replay and never reads back the
+   `:timer-scheduled` event it persists. So each resume pushes the deadline later (drift), and a
+   crash-resumed sleep would never reliably fire. Must read the persisted `fire-at` back.
+2. **Non-idempotent `schedule-timer`**: under the scan, a suspended timer workflow gets re-resumed
+   every poll → `process-timer` calls `schedule-timer` again → a second `ScheduledFuture` for the
+   same `[wf,seq]`, leaking futures and risking duplicate `:timer-fired`. Make it idempotent.
+3. **`wake_at` filter (chosen)**: re-resuming a 30-day sleeper every 500ms (replaying full history)
+   is wasteful. Add an earliest-wake timestamp per workflow; `list-pending` skips workflows whose
+   `wake_at` is in the future. This also quiets polling for *all* waiters.
+
+### Changes
+
+- **Determinism fix** ([core.cljc `sleep`](src/intemporal/core.cljc#L283), cljc — shared by JVM +
+  CLJS): on entry, if a `:timer-scheduled` event already exists for this seq, reuse its `fire-at`
+  (and skip if `:timer-fired` exists, as today); else compute `now+ms` and persist. Add
+  `find-event store wf-id :timer-scheduled seq-num` lookup alongside the existing `:timer-fired`
+  one.
+- **Idempotent scheduler** ([runtime.clj `schedule-timer`](src/intemporal/internal/runtime.clj#L16)
+  + runtime.cljs): if `[wf,seq]` already in `pending-timers`, return the existing key without
+  scheduling a second future. (process-timer/​.cljs unchanged.)
+- **`wake_at` durable filter — new IStore op `set-wake-at [store wf-id wake-at-ms|nil]`** and a
+  `list-pending` predicate change:
+  - **Migration** `20260531000003-wake-at.up/down.sql`: `ADD COLUMN wake_at TIMESTAMPTZ` on
+    `intemporal_workflows` + partial index for the due-scan.
+  - **JDBC**: `set-wake-at` = `UPDATE … SET wake_at = to_timestamp(?/1000)` (or NULL);
+    `list-pending` adds `AND (wake_at IS NULL OR wake_at <= now())`.
+  - **InMemory**: `wake-at` state key; `list-pending` filter `(or (nil? wa) (<= wa now))`.
+  - **FDB**: store `wake-at` (ms, or 0 = always-eligible) as the **value** of the existing
+    `["wf-owner" bucket wf-id]` index entry instead of `true`; `list-pending` keeps entries with
+    `wake-at <= now`. `maintain-owner-index!`, `claim-owner`, `release-owner` must carry `wake-at`
+    through when they move the entry between buckets.
+- **Set `wake_at` at suspension** ([execution.clj:558-578](src/intemporal/internal/execution.clj#L558)
+  + execution.cljs:585-605, right where `register-wake-callback` already fires): from
+  `exec-result`’s `:suspension-data`, call `set-wake-at` with `:fire-at` for `:wait-timer` /
+  `:wait-signal-timeout`, and **nil** for `:wait-signal` / `:wait-async` (always eligible — they
+  wait on external events, not the clock). Resume/terminal paths clear it implicitly (a re-suspend
+  re-sets it; `release-owner`/terminal drop the row from scans).
+- **RacingStore** ([test/…/jepsen/racing_store.clj](test/intemporal/tests/jepsen/racing_store.clj)):
+  delegate the new `set-wake-at`.
+
+### Tests
+
+- **Determinism** (strengthen, per decision): extend `timer_test` — resume a sleeping workflow
+  twice and assert the persisted `:timer-scheduled` `fire-at` is **identical** across resumes (no
+  drift). Today `test-timer-replay` only checks `:result` equality.
+- **Persistent-timer recovery** (new, InMemory + JDBC + FDB, in `worker_test` or a new
+  `timer_recovery_test`): start a workflow that `sleep`s, crash the engine before it fires, start a
+  worker on a fresh engine with the same store → the timer fires and the workflow completes.
+- **wake_at filtering** (new): a workflow sleeping far in the future is **not** returned by
+  `list-pending` until `wake_at <= now`; a due/expired one is.
+
+### Verification
+
+`bin/kaocha` (in-memory + test + test-cljs) green except the known `test-log-once-workflow` flake;
+the three new/extended timer tests pass on InMemory + JDBC + FDB (PG + FDB up via
+`docker compose up -d postgresql foundation`; migration auto-applies on store creation).
+`clj-kondo --lint src test` stays clean in all touched namespaces.
 
 ---
 
-## Verification (how to confirm the above claims yourself)
-
-Quick reproductions, each ~10–30 minutes:
-
-1. **Lost wake on signal across processes.** Start two REPLs with the same Postgres URL. REPL A: `start-workflow` a workflow that calls `wait-for-signal`. Kill REPL A (`System/exit`). REPL B: `send-signal` for that workflow id. Confirm the signal sits in `intemporal_signals` and nothing happens. Restart REPL A: the workflow only resumes if you explicitly call `resume-workflow`.
-2. **Concurrent start corrupts history.** Two REPLs call `start-workflow` with the same `:workflow-id` simultaneously. Inspect `intemporal_history` — observe duplicate `:workflow-started` rows or silently overwritten events at the same `seq`.
-3. **Lost timer.** Start a workflow that sleeps for 5 minutes. Kill the JVM within 30 seconds. Restart it without calling `resume-workflow`. Confirm the workflow never fires.
-4. **Cancellation cannot reach a sleeper.** Start a workflow that does `(wait-for-signal :go)` and immediately `cancel-workflow`. Observe the cancelled flag is set but the workflow never terminates (it never re-enters the loop to observe the flag).
-5. **Register-then-consume race.** A targeted test that interleaves `wait-for-signal` and `send-signal` on the same workflow id at the consume-then-register window. May require thread sleep instrumentation in `process-signal` to reproduce reliably.
-
-If any of these *don't* reproduce, the analysis is wrong on that point and the relevant section should be revised.
+**C2 (persistent timers) — original DEFERRED note (now superseded by Milestone 4 above).** No bug guard covers timer-across-pod-death,
+and it is a substantial separate addition (durable timers table/subspace + a due-timer poller that
+writes runnable markers). Timers currently fire via the in-process scheduler, so a workflow sleeping
+on `sleep` whose pod dies will not wake on another pod until C2 lands. Tracked as the remaining
+Phase C gap.
+
+(Original Phase C design notes follow.)
+
+JVM-only. Opt-in per store. Postgres + FDB + InMemory. This is the structural milestone.
+
+### C1 — Lease / ownership
+Add `claim-workflow` / `renew-lease` / `release-lease` to `IStore`.
+- **Postgres:** migration adds `owner_id TEXT, lease_until TIMESTAMPTZ` to
+  `intemporal_workflows`. `claim` = `UPDATE … WHERE id=? AND (owner_id IS NULL OR owner_id=?
+  OR lease_until < now())` returning affected-rows. **Every `save-events` validates the lease
+  in the same txn** and throws `LeaseLostException` on mismatch.
+- **FDB:** `["lease" wf-id] → {owner-id, lease-until}`, claim via serializable read-modify-write
+  (FDB rejects conflicting commits natively).
+- **InMemory:** `{owner-id, lease-until}` per workflow; `claim` is an atomic `swap!` CAS.
+
+### C2 — Persistent timers
+Today timers live only in `DefaultScheduler.pending-timers` (runtime.clj) and die with the
+pod. Add a durable timers table/subspace `(workflow_id, seq, fire_at, claimed_until)` plus a
+poller that finds due timers and writes a runnable marker. In-memory `ScheduledFuture`s become
+a latency optimisation, not the source of truth.
+
+### C3 — Runnable markers (replaces in-process wake; closes 1.1)
+New `intemporal_runnable (workflow_id PK, reason, enqueued_at, claimed_until)` table / FDB
+`["runnable"]` subspace. `add-signal`, timer-fire, and `cancel-workflow` all write a marker
+(`INSERT … ON CONFLICT DO UPDATE`). This makes the wake **durable and cross-pod**: the signal
+no longer depends on a process-local callback.
+
+### C4 — Worker loop (closes 1.3)
+New `src/intemporal/internal/worker.clj` + `start-worker` API. Loop:
+`SELECT workflow_id FROM intemporal_runnable WHERE claimed_until < now() FOR UPDATE SKIP
+LOCKED LIMIT n` → `claim-workflow` (C1) → `resume-workflow [engine wf-id]` (B3 registry) →
+`DELETE` marker → `release-lease`. Postgres may add `LISTEN/NOTIFY` for sub-second wake; the
+poll interval is the safety net. The Jepsen harness already under `test/intemporal/jepsen/`
+(forked-JVM workers, nemesis, checkers) is the integration vehicle.
+
+### C5 — Retire in-process callbacks
+Once C3/C4 land, `register-signal-callback`/`unregister-signal-callback` become no-ops (kept
+one release for protocol compatibility). All wake goes through runnable markers. This is what
+finally closes the cross-pod path in **bug 1.1**.
+
+**Guard tests:** invert `bug_1_1_test.clj` and `bug_1_3_test.clj` (JDBC + FDB) to assert that
+a signal/marker written by a *second* store instance, plus a running worker, **resumes the
+workflow to completion**. Extend the forked-JVM Jepsen run (`intemporal.jepsen.runner`) with a
+lease-expiry-recovery scenario: kill a worker mid-workflow, assert another picks it up.
 
 ---
 
-## Section 6 — Improvement Plan
-
-**Chosen scope: Phases A + B + C (full multi-pod safety).** Estimated ~6–8 weeks. Phases D and E are listed below for completeness but explicitly deferred.
-
-**Target backends:** Postgres, MySQL/MariaDB, FoundationDB, and InMemoryStore (the latter for test fixtures and single-process use; it will implement the new `IStore` operations in-memory so the same execution code path works everywhere).
-
-Each phase is independently shippable (the library keeps working after each one). Distributed safety is opt-in via store choice — Phase A and B do not require any schema change; Phase C does.
+## CLJS parity scope
 
-Effort scale: **S** = ≤1 day, **M** = 2–5 days, **L** = 1–2 weeks, **XL** = >2 weeks. Estimates assume one contributor familiar with the codebase.
+| Source | Phase A | Phase C |
+|--------|---------|---------|
+| `execution.clj` / `execution.cljs` | **both** (A1, A2) | `.clj` only |
+| `runtime.clj` / `runtime.cljs` | both, only if timer paths touched | `.clj` only |
+| `start_workflow.clj` / `.cljs` | both, only if wake path touched | `.clj` only |
+| `core.cljc`, `protocol.cljc`, `store.cljc`, `context.cljc`, `error.cljc` | cross-platform (write once) | new ops are no-ops/in-memory on CLJS |
+| `store/jdbc.clj`, `store/fdb.clj` | n/a | JVM-only |
 
-### Guiding principles
-
-- **Don't break the existing API.** Add new functions; deprecate old ones with shims.
-- **Don't tax single-process users with distributed costs.** Lease checks, runnable polling, etc. are only meaningful when the store implementation cares.
-- **Push correctness into the schema.** A constraint that throws is better than a callback that silently fails.
-- **Make wake-up durable and centralised.** Today there are three independent wake paths (signal callback atom, in-process timer, blocking promise). Collapse them into one: "write a runnable marker; a worker picks it up."
+CLJS gets the A1/A2/A3 correctness fixes (InMemory only) and treats all Phase-C IStore
+additions as in-memory/no-op — CLJS is inherently single-process.
 
 ---
 
-### Phase A — Single-pod correctness fixes  *(unblocks correctness even without distribution)*
-
-| ID | Issue | Files | Effort |
-|----|-------|-------|--------|
-| A1 | Fix register-then-consume signal race (2.1) | `src/intemporal/internal/execution.clj` (process-signal, ~L223-255) | S |
-| A2 | Cancellation wakes sleepers (2.3) | `src/intemporal/core.cljc:411`, `src/intemporal/protocol.cljc`, both stores | S |
-| A3 | Reject duplicate concurrent event writes (1.2) | `src/intemporal/store/jdbc.clj:100-103` | S |
-| A4 | Loud `max-iterations` failure (2.5) | `src/intemporal/internal/fns/start_workflow.clj`, execution loop | S |
-
-**A1 sketch.** Change `process-signal` from `consume → register → suspend` to `register → consume → if-found(unregister + return) → suspend`. Add a stress test: two threads, one calling `wait-for-signal`, one calling `send-signal`, interleaved with a configurable delay; assert no orphaned suspension.
-
-**A2 sketch.** Add `IStore/wake-workflow [store wf-id]` that fires every registered callback for that workflow id (both signal callbacks and a new generic "wake" callback). `cancel-workflow` calls it after `mark-cancelled`. The workflow then re-enters the loop and observes the cancelled flag at `check-cancelled!` (`src/intemporal/internal/context.cljc:37-40`).
-
-**A3 sketch.** Change `ON CONFLICT (workflow_id, seq) DO UPDATE …` → `ON CONFLICT (workflow_id, seq) DO NOTHING`, capture row counts from `jdbc/execute!`, and throw `ConcurrentWriterException` if any insert returned 0 rows. Add a test that spins two threads racing on the same workflow id and asserts exactly one succeeds.
-
-**A4 sketch.** When the replay loop exceeds `max-iterations`, throw a typed exception (`ReplayBudgetExceeded`) carrying workflow id + last seq, instead of silently returning the partial result. Default budget could scale with `(count history) * 2` rather than a flat constant.
+## Critical files
+
+- **Execution / wake:** `src/intemporal/internal/execution.clj` + `.cljs`
+  (`process-signal`, `process-signal-with-timeout`, `handle-suspension`,
+  `run-workflow-internal`); `src/intemporal/internal/fns/start_workflow.clj` + `.cljs`
+  (`wake-fn`, blocking loop).
+- **Public API:** `src/intemporal/core.cljc` (`cancel-workflow` L411, `resume-workflow` L366,
+  new `submit-workflow`/`start-worker`).
+- **Protocol:** `src/intemporal/protocol.cljc` (add `wake-workflow`, lease ops, runnable ops).
+- **Stores:** `src/intemporal/store.cljc`, `src/intemporal/store/jdbc.clj`,
+  `src/intemporal/store/fdb.clj`.
+- **Errors:** `src/intemporal/internal/error.cljc` (add `ConcurrentWriteException`,
+  `LeaseLostException`).
+- **New:** `src/intemporal/internal/workflow_registry.cljc` (B3),
+  `src/intemporal/internal/worker.clj` (C4).
+- **Schema:** new migrations under `resources/migrations/postgres/` (status; owner/lease;
+  runnable; timers).
+- **Context (reuse, do not change semantics):** `src/intemporal/internal/context.cljc`
+  (`check-cancelled!` L37-40, `next-seq!` L42-47).
 
 ---
 
-### Phase B — Operational hardening  *(makes the library production-shaped for single-pod)*
-
-| ID | Improvement | Files | Effort |
-|----|-------------|-------|--------|
-| B1 | Idempotent `start-workflow` with `:request-id` | `src/intemporal/internal/fns/start_workflow.clj`, JDBC schema | M |
-| B2 | `status` column on `intemporal_workflows` (O(1) status reads) | new migration, `src/intemporal/store/jdbc.clj` | S |
-| B3 | Workflow registry: `register-workflow!` + resolve-on-resume | new ns `src/intemporal/internal/workflow_registry.cljc`, core API | M |
-| B4 | Async `submit-workflow` returning `{:workflow-id …}` | `src/intemporal/core.cljc`, executor wiring | M |
-| B5 | Observer hooks for store writes / replay iterations (3.6) | `src/intemporal/protocol.cljc`, call sites | S |
-
-**B3 is load-bearing for Phase C.** Without a registry mapping `workflow-id → workflow-fn`, the worker loop can't resume a workflow it didn't start. Design: register by name; store the name (symbol) inside the `:workflow-started` event payload (alongside `args`); `resume-workflow` accepts `(engine, workflow-id)` and resolves both the function and the args from the first event.
-
-**B4 sketch.** New function `submit-workflow` that writes `:workflow-started`, enqueues to the engine's executor, and returns immediately. The blocking `start-workflow` becomes a thin wrapper: `submit-workflow` + `await-workflow`. This unblocks the "HTTP request returns workflow id" pattern without forcing every caller off the blocking API.
-
----
-
-### Phase C — Distributed primitives  *(makes multi-pod safe; opt-in per store)*
-
-This is the meat of the work. Each item adds new `IStore` operations and a corresponding JDBC implementation. InMemoryStore can either no-op these (single-process semantics) or implement them in-memory for tests.
-
-| ID | Primitive | Files | Effort |
-|----|-----------|-------|--------|
-| C1 | Lease protocol: `claim` / `renew` / `release` / `expire-stale` | `src/intemporal/protocol.cljc`, JDBC schema (add `owner_id`, `lease_until`), all `save-events` paths | L |
-| C2 | Persistent timers table + poller | new migration, `src/intemporal/internal/runtime.clj` scheduler, new poller component | L |
-| C3 | Runnable markers table: signals/timers/cancellations write a marker | new migration, every wake path | M |
-| C4 | Worker loop: `start-worker` polls runnable markers, claims lease, resumes | new ns `src/intemporal/internal/worker.clj`, core API | L |
-| C5 | Remove in-process signal callbacks; all wakes via runnable markers | `src/intemporal/store/jdbc.clj`, execution.clj | M |
-
-**C1 design.** Add columns `owner_id TEXT, lease_until TIMESTAMPTZ` to `intemporal_workflows`. `claim-workflow` is a single `UPDATE … WHERE id = ? AND (owner_id IS NULL OR owner_id = ? OR lease_until < now())` returning rows-affected. Every `save-events` call validates `(owner_id = ? AND lease_until > now())` in the same transaction as the inserts; on mismatch, throws `LeaseLostException`. The worker catches and aborts the in-flight execution cleanly.
-
-**C2 design.** Schema: `intemporal_timers (workflow_id, seq, fire_at, claimed_until, PRIMARY KEY(workflow_id, seq))`. `schedule-timer` upserts a row instead of (or in addition to) the in-memory `ScheduledFuture`. A poller thread runs `SELECT … WHERE fire_at <= now() AND claimed_until < now() FOR UPDATE SKIP LOCKED LIMIT N`, writes a runnable marker for each, and updates `claimed_until = now() + INTERVAL '1m'` for fencing. In-memory `ScheduledFuture`s become a latency optimisation (avoid the poll), not the source of truth.
-
-**C3 design.** Schema: `intemporal_runnable (workflow_id PRIMARY KEY, reason TEXT, enqueued_at TIMESTAMPTZ, claimed_until TIMESTAMPTZ)`. Use `PRIMARY KEY` (not unique-by-reason) so duplicates collapse — one workflow is either runnable or it isn't. `add-signal`, timer-fire, and `cancel-workflow` all `INSERT … ON CONFLICT DO UPDATE SET enqueued_at = now()`.
-
-**C4 design.** A worker is `{:engine :worker-id :poll-interval :concurrency}`. Loop:
-```
-loop:
-  rows = SELECT workflow_id FROM intemporal_runnable
-         WHERE claimed_until < now()
-         FOR UPDATE SKIP LOCKED LIMIT batch
-  for each row in parallel (concurrency-limited):
-    if claim-workflow(wf_id, worker_id, ttl): 
-      try: resume-workflow(engine, wf_id)  // uses B3 registry
-           DELETE FROM intemporal_runnable WHERE workflow_id = ?
-      finally: release(wf_id, worker_id)
-  if rows empty: sleep poll-interval (or LISTEN for wake)
-```
-Optional Postgres optimisation: `NOTIFY intemporal_runnable` on insert; worker uses `LISTEN` for sub-second wake. Default poll interval (e.g., 500ms) is the safety net.
-
-**C5 design.** Once C3/C4 land, in-process callbacks are vestigial. Delete the `callbacks` atom on `JdbcStore`; `register-signal-callback` and `unregister-signal-callback` become no-ops (kept for protocol compatibility for one release, then removed). All wake is via runnable markers. Closes the cross-pod signal loss path (1.1).
-
-#### Per-backend implementation notes for Phase C
-
-**Postgres** (primary target, full featureset):
-- `FOR UPDATE SKIP LOCKED` for marker claim and lease claim (already used for signals).
-- `LISTEN/NOTIFY intemporal_runnable` for sub-second wake; poll loop as the safety net.
-- Standard timestamptz for `lease_until` and `claimed_until`.
-- JSONB payload columns (consistent with current schema).
-
-**MySQL/MariaDB** (full featureset, polling only):
-- `SELECT … FOR UPDATE SKIP LOCKED` is supported (MySQL 8.0+, MariaDB 10.6+); pin to those versions in docs.
-- No `LISTEN/NOTIFY` equivalent — workers poll at configurable interval (default 500ms is acceptable for most use cases; faster requires busy-polling tradeoff).
-- Add an index on `intemporal_runnable (claimed_until, enqueued_at)` for poller scans.
-- JSON column type instead of JSONB.
-
-**FoundationDB** (full featureset, native transactional model):
-- No SQL — operations are transactional key-range reads/writes against the directory layer (`store/fdb.clj` already uses subspaces).
-- Lease: key `["lease", workflow-id] → {owner-id, lease-until}`. Claim is a serializable read-modify-write transaction; FDB rejects conflicting commits automatically (no SKIP LOCKED needed — that's the wrong model for FDB).
-- Runnable markers: subspace `["runnable"]`, keys are `[enqueued-at, workflow-id]`. Workers pop with a watch on the subspace (FDB's `getRangeWatchable` or equivalent). FDB watches are the native equivalent of `LISTEN/NOTIFY` — no polling needed.
-- Persistent timers: subspace `["timers", fire-at, workflow-id, seq]`. Timer poller scans the prefix `["timers"]` with `streamingMode :want-all` up to `now()`.
-- Cross-process callbacks don't exist in FDB; the watch primitive replaces them entirely. Cleaner than the Postgres/MySQL design.
-
-**InMemoryStore** (test fixture + single-process use):
-- All new operations implemented in-memory using existing atom-based state.
-- Lease: a `{owner-id, lease-until}` entry per workflow in the state map. `claim-workflow` is an atomic `swap!` with CAS semantics.
-- Runnable markers: a vector in the state map; workers `swap!` to pop.
-- Persistent timers: same as today (in-memory `ScheduledFuture`), but exposed through the new IStore API so the execution code doesn't branch.
-- `register-signal-callback` continues to work in-process for single-pod users (no behavioural regression). Multi-pod users would never use InMemoryStore.
-
-**After Phase C the library is multi-pod safe**: every k8s pod runs `start-worker`, work is distributed via runnable markers + lease, crashes are recovered by lease expiry, signals reach the right pod via the database.
-
----
-
-### Phase D & E — Deferred (out of scope for this milestone)
-
-Listed here for context; not part of the current improvement plan. Revisit after Phase C is stable in production.
-
-- **Phase D** — Worker/orchestrator separation (activity task queues with specialised worker pools). Only worth doing if there's a real need to scale activity execution independently of workflow orchestration.
-- **Phase E** — Strict shard ownership and token-based write fencing. Protects against split-brain scenarios (network-partitioned pod whose lease expires, another pod takes over, then the original reconnects). Phase C's lease check on every `save-events` is sufficient for correctness under normal operation; Phase E is the "production at scale" defence-in-depth tier.
-
----
-
-### Suggested ordering & shippable milestones
-
-1. **v0.x+1 (bug-fix release):** Phase A only. Closes the worst single-process correctness bugs in ~1–2 days of work.
-2. **v0.x+2 (hardening release):** Phase B. The library becomes usable as a production single-pod orchestrator with sensible HTTP integration.
-3. **v0.y (multi-pod release):** Phase C. Headline feature: "now safe to run multiple replicas". Requires schema migration and a documented worker setup. **This is the big one.**
-4. **v0.y+1+ (advanced):** Phases D and E as needed by users.
-
-Phases A and B do not require schema-breaking migrations and can ship as patch releases. Phase C requires a minor-version bump and an "operator's guide" doc.
-
-### Testing strategy per phase
-
-- **A:** Race condition unit tests (A1, A3). Cancellation-during-wait test (A2). Replay-budget exhaustion test (A4).
-- **B:** Idempotent-start tests (B1). Registry resolution tests (B3). Async submission tests (B4).
-- **C:** Multi-process integration tests using `docker-compose` with Postgres + 2–3 worker containers. Tests:
-  - Crash a worker mid-workflow; verify another picks it up after lease expiry.
-  - Send signal from worker B to workflow on (dead) worker A; verify resumption on worker C.
-  - Schedule a long timer; restart all workers; verify it still fires.
-  - Concurrent `start-workflow` with same `:request-id`; verify single execution.
-
-The crash tests in `test/intemporal/tests/crash/` are the right template — extend them to multi-process scenarios.
-
-### Out of scope (for this plan)
-
-- Phase D and Phase E (deferred — see above).
-- Migrating to a different persistence model (e.g., RocksDB, distributed log).
-- Changing the workflow programming model (it's already good).
-- Cross-language SDKs.
-- Workflow versioning / determinism upgrades (Temporal's `GetVersion`) — relevant but separate.
+## Verification
+
+**Per-milestone acceptance = the inverted Jepsen guard tests pass on all 3 stores.**
+
+1. **Phase A.** Invert `bug_2_1_test`, `bug_2_3_test`, `bug_1_2_test`. Run:
+   - In-memory: `bin/kaocha :in-memory --focus intemporal.tests.jepsen.bug-2-1-test …`
+   - JDBC + FDB (live): `DATABASE_URL=… bin/kaocha :test --focus …`
+   - Regression: full `bin/kaocha` (JVM + CLJS) and the existing crash tests
+     (`signal_wait_crash_test`, `future_cancel_test`) stay green.
+2. **Phase B.** New unit tests: registry resolve-on-resume; `get-workflow-status` reads the
+   `status` column (no history scan); `submit-workflow` returns an id and the workflow still
+   completes.
+3. **Phase C.** Invert `bug_1_1_test` and `bug_1_3_test` (JDBC + FDB) with a `start-worker`
+   running. Then the forked-JVM chaos run:
+   `clojure -X:dev:jdbc:jepsen intemporal.jepsen.runner/run :workers 4 :duration 120`
+   — kill workers mid-flight; assert every submitted workflow reaches a terminal state, no
+   double non-idempotent execution, no orphaned signals/timers.
+4. **Negative control.** Confirm tests are not vacuous: temporarily revert one fix (e.g.
+   restore `ON CONFLICT DO UPDATE`) and confirm the corresponding guard test fails.
+
+`dev/verify_bugs.clj` (the standalone 5-bug reproducer) should flip from "all FAIL" to "all
+PASS" as milestones land — a quick end-to-end smoke check across both stores.
diff --git a/resources/migrations/postgres/20260531000002-multipod.down.sql b/resources/migrations/postgres/20260531000002-multipod.down.sql
deleted file mode 100644
index b2e2f72..0000000
--- a/resources/migrations/postgres/20260531000002-multipod.down.sql
+++ /dev/null
@@ -1,7 +0,0 @@
-DROP INDEX IF EXISTS idx_intemporal_runnable_claim;
---;;
-DROP TABLE IF EXISTS intemporal_runnable;
---;;
-ALTER TABLE intemporal_workflows
-    DROP COLUMN IF EXISTS owner_id,
-    DROP COLUMN IF EXISTS lease_until;
diff --git a/resources/migrations/postgres/20260531000002-multipod.up.sql b/resources/migrations/postgres/20260531000002-multipod.up.sql
deleted file mode 100644
index 733eba0..0000000
--- a/resources/migrations/postgres/20260531000002-multipod.up.sql
+++ /dev/null
@@ -1,23 +0,0 @@
--- Phase C: multi-pod safety primitives.
-
--- C1: lease / ownership. A worker claims a workflow before executing it; every
--- save-events validates the lease so a worker that lost ownership cannot keep
--- writing (closes the silent concurrent-execution path, bug 1.2).
-ALTER TABLE intemporal_workflows
-    ADD COLUMN IF NOT EXISTS owner_id    TEXT,
-    ADD COLUMN IF NOT EXISTS lease_until TIMESTAMPTZ;
---;;
--- C3: durable runnable markers. Whenever a workflow needs attention (a signal
--- arrived, it was cancelled, a timer fired) a marker is written here. Workers
--- poll this table instead of relying on a process-local callback (closes the
--- lost-wake-across-pods path, bug 1.1). PRIMARY KEY collapses duplicates: a
--- workflow is either runnable or it isn't.
-CREATE TABLE IF NOT EXISTS intemporal_runnable (
-    workflow_id   TEXT PRIMARY KEY REFERENCES intemporal_workflows(id) ON DELETE CASCADE,
-    reason        TEXT,
-    enqueued_at   TIMESTAMPTZ NOT NULL DEFAULT now(),
-    claimed_until TIMESTAMPTZ NOT NULL DEFAULT to_timestamp(0)
-);
---;;
-CREATE INDEX IF NOT EXISTS idx_intemporal_runnable_claim
-    ON intemporal_runnable (claimed_until, enqueued_at);
diff --git a/resources/migrations/postgres/20260531000002-ownership.down.sql b/resources/migrations/postgres/20260531000002-ownership.down.sql
new file mode 100644
index 0000000..222f4e4
--- /dev/null
+++ b/resources/migrations/postgres/20260531000002-ownership.down.sql
@@ -0,0 +1,3 @@
+DROP INDEX IF EXISTS idx_intemporal_workflows_owner;
+--;;
+ALTER TABLE intemporal_workflows DROP COLUMN IF EXISTS owner;
diff --git a/resources/migrations/postgres/20260531000002-ownership.up.sql b/resources/migrations/postgres/20260531000002-ownership.up.sql
new file mode 100644
index 0000000..bee9b8c
--- /dev/null
+++ b/resources/migrations/postgres/20260531000002-ownership.up.sql
@@ -0,0 +1,9 @@
+-- Phase C: ownership-based recovery.
+-- A workflow is owned by at most one pod (a stable owner-id). A worker resumes
+-- the non-terminal workflows it owns-or-null; a crashed pod's work is reclaimed
+-- when it restarts with the same owner-id. No time-based leases.
+ALTER TABLE intemporal_workflows
+    ADD COLUMN IF NOT EXISTS owner TEXT;
+--;;
+CREATE INDEX IF NOT EXISTS idx_intemporal_workflows_owner
+    ON intemporal_workflows (owner);
diff --git a/resources/migrations/postgres/20260531000003-wake-at.down.sql b/resources/migrations/postgres/20260531000003-wake-at.down.sql
new file mode 100644
index 0000000..71d3650
--- /dev/null
+++ b/resources/migrations/postgres/20260531000003-wake-at.down.sql
@@ -0,0 +1,3 @@
+DROP INDEX IF EXISTS idx_intemporal_workflows_wake_at;
+--;;
+ALTER TABLE intemporal_workflows DROP COLUMN IF EXISTS wake_at;
diff --git a/resources/migrations/postgres/20260531000003-wake-at.up.sql b/resources/migrations/postgres/20260531000003-wake-at.up.sql
new file mode 100644
index 0000000..3a217aa
--- /dev/null
+++ b/resources/migrations/postgres/20260531000003-wake-at.up.sql
@@ -0,0 +1,13 @@
+-- C2: earliest-wake filter for the ownership scan. A workflow suspended on a
+-- timer (sleep / signal-with-timeout) records when it next needs attention, so
+-- the recovery worker can skip long-sleeping workflows until they are due
+-- instead of replaying them every poll. NULL = always eligible (e.g. waiting on
+-- an external signal, not the clock).
+ALTER TABLE intemporal_workflows
+    ADD COLUMN IF NOT EXISTS wake_at TIMESTAMPTZ;
+--;;
+-- Partial index for the due-scan: only non-terminal rows with a future wake_at
+-- are interesting to the poller's "skip until due" predicate.
+CREATE INDEX IF NOT EXISTS idx_intemporal_workflows_wake_at
+    ON intemporal_workflows (wake_at)
+    WHERE wake_at IS NOT NULL;
diff --git a/src/intemporal/core.cljc b/src/intemporal/core.cljc
index 832b19d..38baec0 100644
--- a/src/intemporal/core.cljc
+++ b/src/intemporal/core.cljc
@@ -7,7 +7,6 @@
             [intemporal.internal.logging :as log]
             [intemporal.internal.fns.start-workflow :as sw]
             [intemporal.internal.workflow-registry :as wreg]
-            [intemporal.internal.lease :as lease]
             [intemporal.protocol :as p]
             [intemporal.store :as store]
             [intemporal.observer :as obs]
@@ -292,13 +291,19 @@
         existing (p/find-event store workflow-id :timer-fired seq-num)]
     (if existing
       nil
-      (let [fire-at (+ (utils/current-time-ms) ms)]
-        (ctx/add-pending-event! {:event-type :timer-scheduled
-                                 :seq seq-num
-                                 :fire-at fire-at
-                                 :duration-ms ms
-                                 :timestamp (utils/current-time-ms)})
-        (ctx/notify-observer p/on-timer-scheduled (:workflow-id ctx) seq-num fire-at)
+      ;; Reuse the fire-at from a prior :timer-scheduled event if one was already
+      ;; persisted for this seq. Recomputing (now + ms) on every replay would push
+      ;; the deadline later on each resume (drift) and make a crash-resumed sleep
+      ;; never reliably fire. The fire time must be deterministic across replays.
+      (let [prior   (p/find-event store workflow-id :timer-scheduled seq-num)
+            fire-at (or (:fire-at prior) (+ (utils/current-time-ms) ms))]
+        (when-not prior
+          (ctx/add-pending-event! {:event-type :timer-scheduled
+                                   :seq seq-num
+                                   :fire-at fire-at
+                                   :duration-ms ms
+                                   :timestamp (utils/current-time-ms)})
+          (ctx/notify-observer p/on-timer-scheduled (:workflow-id ctx) seq-num fire-at))
         (throw (error/make-suspension :timer {:seq seq-num
                                               :fire-at fire-at}))))))
 ;; ============================================================================
@@ -453,46 +458,45 @@
 
 #?(:clj
    (defn start-worker
-     "Start a background recovery worker (Phase C). It polls the store's durable
-      runnable markers, claims a lease on each workflow, and resumes it by id —
-      so a workflow whose original pod crashed, or one signalled/cancelled from
-      another pod, is driven to completion. Returns a 0-arg stop fn.
+     "Start a background recovery worker (Phase C, ownership model). Each poll it
+      lists the non-terminal workflows this owner may run — its own plus any
+      unowned (`owner = owner-id OR owner IS NULL`) — claims each by stamping
+      ownership, and resumes it by id. This is the cross-pod wake AND the crash
+      recovery: the first poll re-picks this owner's orphaned workflows, and a
+      later poll re-resumes a signalled/cancelled one (replay consumes the
+      signal / observes the cancellation). Workflows are resumed sequentially on
+      the poll thread, so neither cross-pod nor intra-pod double-execution occurs.
+
+      Use a STABLE owner-id per pod (e.g. StatefulSet ordinal / config) so a
+      crashed pod reclaims its own work on restart. Returns a 0-arg stop fn that
+      releases this owner's workflows (so other pods can pick them up).
 
       The worker resumes via resume-workflow [engine workflow-id], so the workflow
       function must be registered in this process (start-workflow registers it
       automatically; a fresh process must register its workflow vars at startup).
 
       Options:
-        :owner-id    unique id for this worker (default: random uuid)
-        :poll-ms     idle poll interval when no markers are due (default 100)
-        :batch-size  max markers claimed per poll (default 10)
-        :lease-ms    lease duration per claimed workflow (default 30000)
-        :claim-ms    marker fencing duration while processing (default 30000)"
+        :owner-id    stable id for this worker (default: random uuid)
+        :poll-ms     poll interval (default 500)
+        :batch-size  max workflows scanned per poll (default 100)"
      [{:keys [store] :as engine}
-      & {:keys [owner-id poll-ms batch-size lease-ms claim-ms]
-         :or   {owner-id (str (random-uuid)) poll-ms 100 batch-size 10
-                lease-ms 30000 claim-ms 30000}}]
+      & {:keys [owner-id poll-ms batch-size]
+         :or   {owner-id (str (random-uuid)) poll-ms 500 batch-size 100}}]
      (let [running (atom true)
            process-one
            (fn [wf-id]
-             (when (p/claim-workflow store wf-id owner-id lease-ms)
-               (binding [lease/*owner* owner-id]
-                 (try
-                   (resume-workflow engine wf-id)
-                   (p/delete-runnable store wf-id)
-                   (catch Throwable t
-                     (if (error/lease-lost? t)
-                       (log/debugf "Worker %s lost lease on %s; skipping" owner-id wf-id)
-                       (log/warnf t "Worker %s failed resuming %s" owner-id wf-id)))
-                   (finally
-                     (p/release-lease store wf-id owner-id))))))
+             (when (p/claim-owner store wf-id owner-id)
+               (try
+                 (resume-workflow engine wf-id)
+                 (catch Throwable t
+                   (log/warnf t "Worker %s failed resuming %s" owner-id wf-id)))))
            thread
            (Thread.
              ^Runnable
              (fn []
                (while @running
                  (try
-                   (let [ids (p/claim-runnable store owner-id batch-size claim-ms)]
+                   (let [ids (p/list-pending store owner-id batch-size)]
                      (if (seq ids)
                        (doseq [wf-id ids :while @running]
                          (process-one wf-id))
@@ -507,7 +511,8 @@
          (.start))
        (fn stop-worker []
          (reset! running false)
-         (.interrupt thread)))))
+         (.interrupt thread)
+         (p/release-owner store owner-id)))))
 
 (defn send-signal
   "Send a signal to a workflow.
diff --git a/src/intemporal/internal/error.cljc b/src/intemporal/internal/error.cljc
index 544734b..f1539eb 100644
--- a/src/intemporal/internal/error.cljc
+++ b/src/intemporal/internal/error.cljc
@@ -81,25 +81,6 @@
           (.-data e)
           (::cancelled (.-data e)))))
 
-(defn lease-lost-exception
-  "Thrown by IStore/save-events when the writer's lease on a workflow is no
-   longer valid (another worker took ownership, or the lease expired). The
-   worker catches this and aborts the in-flight execution cleanly. (Phase C)"
-  [workflow-id owner-id]
-  (ex-info "Workflow lease lost"
-           {::lease-lost true
-            :workflow-id workflow-id
-            :owner-id    owner-id}))
-
-(defn lease-lost? [e]
-  #?(:clj
-     (and (instance? IExceptionInfo e)
-          (::lease-lost (ex-data e)))
-     :cljs
-     (and (instance? js/Error e)
-          (or (and (.-data e) (::lease-lost (.-data e)))
-              (::lease-lost (ex-data e))))))
-
 (defn activity-rejected-exception [activity-name cause]
   (ex-info "Execution rejected"
            {::rejected     true
diff --git a/src/intemporal/internal/execution.clj b/src/intemporal/internal/execution.clj
index 53c699b..ab189fe 100644
--- a/src/intemporal/internal/execution.clj
+++ b/src/intemporal/internal/execution.clj
@@ -575,6 +575,15 @@
                 (do
                   (when wake-fn
                     (p/register-wake-callback store workflow-id wake-fn))
+                  ;; C2: record when this workflow next needs attention so the
+                  ;; ownership scan can skip it until due. Timer waits carry a
+                  ;; clock deadline; signal/async waits are always eligible (nil).
+                  (let [sd (:suspension-data exec-result)
+                        wake-at (case action
+                                  :wait-timer          (:fire-at sd)
+                                  :wait-signal-timeout (:deadline sd)
+                                  nil)]
+                    (p/set-wake-at store workflow-id wake-at))
                   (action->result action workflow-id))))
 
             :failed
diff --git a/src/intemporal/internal/execution.cljs b/src/intemporal/internal/execution.cljs
index e29449b..89e887d 100644
--- a/src/intemporal/internal/execution.cljs
+++ b/src/intemporal/internal/execution.cljs
@@ -602,6 +602,13 @@
                                (do
                                  (when wake-fn
                                    (p/register-wake-callback store workflow-id wake-fn))
+                                 ;; C2: record when this workflow next needs attention.
+                                 (let [sd (:suspension-data exec-result)
+                                       wake-at (case action
+                                                 :wait-timer          (:fire-at sd)
+                                                 :wait-signal-timeout (:deadline sd)
+                                                 nil)]
+                                   (p/set-wake-at store workflow-id wake-at))
                                  (action->result action workflow-id))))
 
                            :failed
diff --git a/src/intemporal/internal/fns/start_workflow.clj b/src/intemporal/internal/fns/start_workflow.clj
index d1e3c06..344b536 100644
--- a/src/intemporal/internal/fns/start_workflow.clj
+++ b/src/intemporal/internal/fns/start_workflow.clj
@@ -54,11 +54,10 @@
     ;; Record the workflow function under its stable name so the workflow can be
     ;; resumed later by id alone (resume-workflow [engine wf-id]); the name is
     ;; stored in the :workflow-started event below. (improvements.md §B3)
-    (let [wf-name (wreg/register-workflow! workflow-fn)]
-     (log/with-mdc {:workflow-id wf-id}
+    (log/with-mdc {:workflow-id wf-id}
       (p/save-event store wf-id {:event-type :workflow-started
                                  :workflow-id wf-id
-                                 :workflow-fn-name wf-name
+                                 :workflow-fn-name (wreg/register-workflow! workflow-fn)
                                  :args (vec args)
                                  :timestamp (utils/current-time-ms)})
       (when observer
@@ -80,4 +79,4 @@
             result))
         (catch Exception e
           (log/warnf e "Caught exception")
-          (throw e)))))))
+          (throw e))))))
diff --git a/src/intemporal/internal/lease.cljc b/src/intemporal/internal/lease.cljc
deleted file mode 100644
index 8186d45..0000000
--- a/src/intemporal/internal/lease.cljc
+++ /dev/null
@@ -1,10 +0,0 @@
-(ns intemporal.internal.lease
-  "Shared dynamic binding for the worker that currently owns an executing
-   workflow. The worker binds *owner* around resume-workflow; stores read it in
-   save-events to validate the lease in the same transaction. When *owner* is
-   nil (plain single-process start-workflow, no worker) lease validation is
-   skipped entirely, so existing single-process behaviour is unchanged.")
-
-(def ^:dynamic *owner*
-  "Owner-id of the worker executing the current workflow, or nil."
-  nil)
diff --git a/src/intemporal/internal/runtime.clj b/src/intemporal/internal/runtime.clj
index 7f781a9..88e3af0 100644
--- a/src/intemporal/internal/runtime.clj
+++ b/src/intemporal/internal/runtime.clj
@@ -14,16 +14,22 @@
                              pending-timers]
   p/IScheduler
   (schedule-timer [_ workflow-id seq-num fire-at callback]
-    (let [delay-ms  (max 0 (- fire-at (System/currentTimeMillis)))
-          timer-key [workflow-id seq-num]
-          future    (.schedule pool
-                               ^Runnable (fn []
-                                           (swap! pending-timers dissoc timer-key)
-                                           (callback))
-                               delay-ms
-                               TimeUnit/MILLISECONDS)]
-      (swap! pending-timers assoc timer-key future)
-      timer-key))
+    (let [timer-key [workflow-id seq-num]]
+      ;; Idempotent: under the ownership scan a suspended timer workflow is
+      ;; re-resumed every poll, so process-timer may call schedule-timer again
+      ;; for the same [wf,seq]. Scheduling a second future would leak it and
+      ;; risk a duplicate :timer-fired. If one is already armed, keep it.
+      (if (contains? @pending-timers timer-key)
+        timer-key
+        (let [delay-ms (max 0 (- fire-at (System/currentTimeMillis)))
+              future   (.schedule pool
+                                  ^Runnable (fn []
+                                              (swap! pending-timers dissoc timer-key)
+                                              (callback))
+                                  delay-ms
+                                  TimeUnit/MILLISECONDS)]
+          (swap! pending-timers assoc timer-key future)
+          timer-key))))
 
   (cancel-timer [_ workflow-id seq-num]
     (let [timer-key [workflow-id seq-num]]
diff --git a/src/intemporal/internal/runtime.cljs b/src/intemporal/internal/runtime.cljs
index 299d8d5..2d76260 100644
--- a/src/intemporal/internal/runtime.cljs
+++ b/src/intemporal/internal/runtime.cljs
@@ -46,15 +46,20 @@
   p/IScheduler
 
   (schedule-timer [_ workflow-id seq-num fire-at callback]
-    (let [delay-ms  (max 0 (- fire-at (utils/current-time-ms)))
-          timer-key [workflow-id seq-num]
-          timer-id  (js/setTimeout
-                      (fn []
-                        (swap! pending-timers dissoc timer-key)
-                        (callback))
-                      delay-ms)]
-      (swap! pending-timers assoc timer-key timer-id)
-      timer-key))
+    (let [timer-key [workflow-id seq-num]]
+      ;; Idempotent: a re-resumed timer workflow may call schedule-timer again
+      ;; for the same [wf,seq]; keep the already-armed timer rather than arming
+      ;; a second one (which would risk a duplicate :timer-fired).
+      (if (contains? @pending-timers timer-key)
+        timer-key
+        (let [delay-ms (max 0 (- fire-at (utils/current-time-ms)))
+              timer-id (js/setTimeout
+                         (fn []
+                           (swap! pending-timers dissoc timer-key)
+                           (callback))
+                         delay-ms)]
+          (swap! pending-timers assoc timer-key timer-id)
+          timer-key))))
 
   (cancel-timer [_ workflow-id seq-num]
     (let [timer-key [workflow-id seq-num]]
diff --git a/src/intemporal/protocol.cljc b/src/intemporal/protocol.cljc
index 10c33ff..470a91a 100644
--- a/src/intemporal/protocol.cljc
+++ b/src/intemporal/protocol.cljc
@@ -22,24 +22,22 @@
   (mark-cancelled [store workflow-id] "Mark workflow as cancelled")
   (get-workflow-status [store workflow-id] "Get current workflow status")
 
-  ;; --- Phase C: multi-pod primitives (opt-in; single-process callers ignore) ---
-  (claim-workflow [store workflow-id owner-id lease-ms]
-    "Atomically claim or renew ownership of a workflow if it is unowned, owned by
-     owner-id already, or its lease has expired. Sets owner_id=owner-id and
-     lease_until=now+lease-ms. Returns true on success, false if another live
-     owner holds it.")
-  (renew-lease [store workflow-id owner-id lease-ms]
-    "Extend the lease to now+lease-ms iff owner-id still owns it. Returns boolean.")
-  (release-lease [store workflow-id owner-id]
-    "Release ownership (clear owner_id/lease_until) iff held by owner-id.")
-  (add-runnable [store workflow-id reason]
-    "Durably mark a workflow as needing execution. Replaces the process-local
-     wake callback for cross-pod wake. Idempotent: one marker per workflow.")
-  (claim-runnable [store owner-id batch-size claim-ms]
-    "Claim up to batch-size runnable markers whose claim has lapsed, fencing them
-     for claim-ms so other workers skip them. Returns a vector of workflow-ids.")
-  (delete-runnable [store workflow-id]
-    "Remove a workflow's runnable marker (after it has been resumed)."))
+  ;; --- Phase C: ownership-based recovery (opt-in; single-process callers ignore) ---
+  (claim-owner [store workflow-id owner-id]
+    "Atomically stamp ownership: UPDATE owner=owner-id WHERE owner IS NULL OR
+     owner=owner-id. Returns true iff the workflow is now owned by owner-id. The
+     exclusivity gate — only one pod can claim an unowned workflow.")
+  (list-pending [store owner-id limit]
+    "Return up to `limit` workflow-ids that are NON-TERMINAL, DUE (wake-at is null
+     or in the past), and (owner=owner-id OR owner IS NULL): the workflows this
+     owner may resume right now. Used for both the live poll and startup recovery.")
+  (release-owner [store owner-id]
+    "Clear ownership (owner=NULL) for this owner's non-terminal workflows, so
+     other pods may pick them up. Called on clean shutdown.")
+  (set-wake-at [store workflow-id wake-at-ms]
+    "Record the earliest time (epoch ms) this workflow next needs attention, or
+     nil for 'always eligible' (waiting on an external event, not the clock).
+     list-pending skips workflows whose wake-at is still in the future (C2)."))
 
 (defprotocol IActivityExecutor
   "Protocol for executing activities"
diff --git a/src/intemporal/store.cljc b/src/intemporal/store.cljc
index e61a100..9571741 100644
--- a/src/intemporal/store.cljc
+++ b/src/intemporal/store.cljc
@@ -1,8 +1,8 @@
 (ns intemporal.store
   (:require [intemporal.protocol :as p]
-            [intemporal.utils :as utils]
-            [intemporal.internal.lease :as lease]
-            [intemporal.internal.error :as error]))
+            [intemporal.utils :as utils]))
+
+(def ^:private terminal-status? #{:completed :failed})
 
 ;; ============================================================================
 ;; In-Memory Store Implementation
@@ -25,14 +25,6 @@
 
   (save-events [_ workflow-id events]
     (when (seq events)
-      ;; Phase C: when running under a worker lease, refuse to write if this
-      ;; owner no longer holds a valid lease (another worker took over / expired).
-      (when-let [owner lease/*owner*]
-        (let [s   @state
-              cur (get-in s [:workflows workflow-id :owner])
-              lu  (get-in s [:workflows workflow-id :lease-until] 0)]
-          (when (or (not= cur owner) (< lu (utils/current-time-ms)))
-            (throw (error/lease-lost-exception workflow-id owner)))))
       (swap! state
              (fn [s]
                (let [s    (update-in s [:workflows workflow-id :history] (fnil into []) events)
@@ -57,14 +49,12 @@
   (get-pending-signals [_ workflow-id]
     (get-in @state [:workflows workflow-id :signals] {}))
 
-  (add-signal [this workflow-id signal-name signal-data]
+  (add-signal [_ workflow-id signal-name signal-data]
     (swap! state update-in [:workflows workflow-id :signals signal-name]
            (fnil conj []) signal-data)
-    ;; Phase C: durable wake marker so a worker (possibly another pod) resumes it.
-    (p/add-runnable this workflow-id :signal)
-    ;; Check if there's a callback registered for this signal (single-process path)
+    ;; In-process wake for an embedded (no-worker) engine in THIS process.
+    ;; Worker mode picks the workflow up via the ownership scan (list-pending).
     (when-let [callback (get-in @state [:workflows workflow-id :signal-callbacks signal-name])]
-      ;; Invoke callback asynchronously
       #?(:clj (future (callback))
          :cljs (js/setTimeout callback 0)))
     signal-data)
@@ -99,10 +89,8 @@
   (is-cancelled? [_ workflow-id]
     (get-in @state [:workflows workflow-id :cancelled] false))
 
-  (mark-cancelled [this workflow-id]
-    (swap! state assoc-in [:workflows workflow-id :cancelled] true)
-    ;; Phase C: wake a sleeper via a durable marker too (worker path).
-    (p/add-runnable this workflow-id :cancel))
+  (mark-cancelled [_ workflow-id]
+    (swap! state assoc-in [:workflows workflow-id :cancelled] true))
 
   (get-workflow-status [_ workflow-id]
     (let [wf (get-in @state [:workflows workflow-id])]
@@ -116,65 +104,43 @@
                   :workflow-failed :failed
                   :running)))))
 
-  ;; --- Phase C: lease / ownership ---
-  (claim-workflow [_ workflow-id owner-id lease-ms]
+  ;; --- Phase C: ownership-based recovery ---
+  (claim-owner [_ workflow-id owner-id]
     (let [ok (atom false)]
       (swap! state
              (fn [s]
-               (let [cur (get-in s [:workflows workflow-id :owner])
-                     lu  (get-in s [:workflows workflow-id :lease-until] 0)
-                     now (utils/current-time-ms)]
-                 (if (or (nil? cur) (= cur owner-id) (< lu now))
+               (let [cur (get-in s [:workflows workflow-id :owner])]
+                 (if (or (nil? cur) (= cur owner-id))
                    (do (reset! ok true)
-                       (-> s
-                           (assoc-in [:workflows workflow-id :owner] owner-id)
-                           (assoc-in [:workflows workflow-id :lease-until] (+ now lease-ms))))
+                       (assoc-in s [:workflows workflow-id :owner] owner-id))
                    s))))
       @ok))
 
-  (renew-lease [_ workflow-id owner-id lease-ms]
-    (let [ok (atom false)]
-      (swap! state
-             (fn [s]
-               (if (= owner-id (get-in s [:workflows workflow-id :owner]))
-                 (do (reset! ok true)
-                     (assoc-in s [:workflows workflow-id :lease-until]
-                               (+ (utils/current-time-ms) lease-ms)))
-                 s)))
-      @ok))
-
-  (release-lease [_ workflow-id owner-id]
+  (list-pending [_ owner-id limit]
+    (let [now (utils/current-time-ms)]
+      (->> (:workflows @state)
+           (filter (fn [[_ wf]]
+                     (and (seq (:history wf))
+                          (not (terminal-status? (:status wf)))
+                          ;; C2: skip workflows not yet due to wake
+                          (let [wa (:wake-at wf)] (or (nil? wa) (<= wa now)))
+                          (let [o (:owner wf)] (or (nil? o) (= o owner-id))))))
+           (map first)
+           (take limit)
+           vec)))
+
+  (release-owner [_ owner-id]
     (swap! state
            (fn [s]
-             (if (= owner-id (get-in s [:workflows workflow-id :owner]))
-               (update-in s [:workflows workflow-id] dissoc :owner :lease-until)
-               s)))
-    nil)
-
-  ;; --- Phase C: runnable markers ---
-  (add-runnable [_ workflow-id reason]
-    (swap! state update-in [:runnable workflow-id]
-           (fn [m] (assoc (or m {}) :reason reason
-                          :enqueued-at (utils/current-time-ms)
-                          :claimed-until (get m :claimed-until 0))))
+             (reduce (fn [s [wid wf]]
+                       (if (and (= owner-id (:owner wf))
+                                (not (terminal-status? (:status wf))))
+                         (update-in s [:workflows wid] dissoc :owner)
+                         s))
+                     s
+                     (:workflows s))))
     nil)
 
-  (claim-runnable [_ _owner-id batch-size claim-ms]
-    (let [claimed (atom [])]
-      (swap! state
-             (fn [s]
-               (let [now (utils/current-time-ms)
-                     due (->> (:runnable s)
-                              (filter (fn [[_ m]] (< (:claimed-until m 0) now)))
-                              (map first)
-                              (take batch-size)
-                              vec)]
-                 (reset! claimed due)
-                 (reduce (fn [s wid]
-                           (assoc-in s [:runnable wid :claimed-until] (+ now claim-ms)))
-                         s due))))
-      @claimed))
-
-  (delete-runnable [_ workflow-id]
-    (swap! state update :runnable dissoc workflow-id)
+  (set-wake-at [_ workflow-id wake-at-ms]
+    (swap! state assoc-in [:workflows workflow-id :wake-at] wake-at-ms)
     nil))
diff --git a/src/intemporal/store/fdb.clj b/src/intemporal/store/fdb.clj
index bd6907e..8505a9c 100644
--- a/src/intemporal/store/fdb.clj
+++ b/src/intemporal/store/fdb.clj
@@ -1,7 +1,5 @@
 (ns intemporal.store.fdb
   (:require [intemporal.protocol :as p]
-            [intemporal.internal.lease :as lease]
-            [intemporal.internal.error :as error]
             [me.vedang.clj-fdb.core :as fdb-core]
             [me.vedang.clj-fdb.transaction :as ftr]
             [me.vedang.clj-fdb.subspace.subspace :as fsub]
@@ -23,6 +21,34 @@
 (defn ->tuple [v]
   (Tuple/from (into-array Object (map #(if (keyword? %) (name %) %) v))))
 
+;; ============================================================================
+;; Ownership index (Phase C)
+;;
+;; FDB cannot SQL-scan by owner, so non-terminal workflows are indexed under
+;; ["wf-owner" <owner-or-""> <wf-id>]. list-pending scans the owner's bucket
+;; plus the unowned ("") bucket. The entry is added when a workflow starts,
+;; moved on claim-owner / release-owner, and removed when it terminates.
+;;
+;; The index entry VALUE carries the C2 wake-at (epoch ms, or nil = always due),
+;; so list-pending can skip not-yet-due timer workflows without a separate read.
+;; Bucket moves (claim-owner / release-owner) preserve the value.
+;; ============================================================================
+
+(defn- read-owner [tx root-subspace workflow-id]
+  (<-bytes (fdb-core/get tx root-subspace (->tuple ["owner" workflow-id]))))
+
+(defn- owner-index-key [root-subspace bucket workflow-id]
+  (->tuple ["wf-owner" bucket workflow-id]))
+
+(defn- maintain-owner-index! [tx root-subspace workflow-id events]
+  (let [started?  (some #(= :workflow-started (:event-type %)) events)
+        terminal? (some #(#{:workflow-completed :workflow-failed} (:event-type %)) events)
+        bucket    (or (read-owner tx root-subspace workflow-id) "")]
+    (cond
+      terminal? (fdb-core/clear tx root-subspace (owner-index-key root-subspace bucket workflow-id))
+      started?  (fdb-core/set tx root-subspace (owner-index-key root-subspace bucket workflow-id)
+                              (->bytes {:wake-at nil})))))
+
 ;; ============================================================================
 ;; FDB Store Implementation
 ;; ============================================================================
@@ -59,7 +85,9 @@
           (fdb-core/set tx history-sub key (->bytes event))
           ;; Phase B2: cache terminal status for O(1) reads.
           (when term
-            (fdb-core/set tx root-subspace (->tuple ["state" workflow-id "status"]) (->bytes term)))))
+            (fdb-core/set tx root-subspace (->tuple ["state" workflow-id "status"]) (->bytes term)))
+          ;; Phase C: keep the ownership index in sync.
+          (maintain-owner-index! tx root-subspace workflow-id [event])))
       event))
 
   (save-events [_ workflow-id events]
@@ -70,25 +98,16 @@
                                  :workflow-failed    "failed"
                                  nil)
                               events)]
-        ;; FDB's run wraps a body exception in CompletionException; unwrap so the
-        ;; lease-lost ExceptionInfo propagates cleanly (worker/error checks rely on it).
-        (try
-          (ftr/run db
-            (fn [tx]
-              ;; Phase C: validate the lease within the serializable transaction.
-              (when-let [owner lease/*owner*]
-                (let [cur (<-bytes (fdb-core/get tx root-subspace (->tuple ["lease" workflow-id])))]
-                  (when (or (not= (:owner-id cur) owner)
-                            (< (:lease-until cur 0) (System/currentTimeMillis)))
-                    (throw (error/lease-lost-exception workflow-id owner)))))
-              (doseq [event events]
-                (let [seq-num (:seq event (System/currentTimeMillis))
-                      key (->tuple [seq-num (str (java.util.UUID/randomUUID))])]
-                  (fdb-core/set tx history-sub key (->bytes event))))
-              (when term
-                (fdb-core/set tx root-subspace (->tuple ["state" workflow-id "status"]) (->bytes term)))))
-          (catch java.util.concurrent.CompletionException ce
-            (throw (or (.getCause ce) ce))))))
+        (ftr/run db
+          (fn [tx]
+            (doseq [event events]
+              (let [seq-num (:seq event (System/currentTimeMillis))
+                    key (->tuple [seq-num (str (java.util.UUID/randomUUID))])]
+                (fdb-core/set tx history-sub key (->bytes event))))
+            (when term
+              (fdb-core/set tx root-subspace (->tuple ["state" workflow-id "status"]) (->bytes term)))
+            ;; Phase C: keep the ownership index in sync.
+            (maintain-owner-index! tx root-subspace workflow-id events)))))
     events)
 
   (find-event [this workflow-id event-type seq-num]
@@ -109,16 +128,15 @@
                     {}
                     r))))))
 
-  (add-signal [this workflow-id signal-name signal-data]
+  (add-signal [_ workflow-id signal-name signal-data]
     (let [signals-sub (fsub/get root-subspace (->tuple ["signals" workflow-id signal-name]))
           key (->tuple [(System/currentTimeMillis) (str (java.util.UUID/randomUUID))])]
       (ftr/run db
         (fn [tx]
           (fdb-core/set tx signals-sub key (->bytes signal-data))))
 
-      ;; Phase C: durable, cross-pod wake (a worker on any pod resumes the workflow).
-      (p/add-runnable this workflow-id :signal)
       ;; In-process fast path for an embedded (no-worker) engine in THIS process.
+      ;; Worker mode picks the workflow up via the ownership scan (list-pending).
       (when-let [callback (get-in @callbacks [workflow-id signal-name])]
         (future (callback)))
 
@@ -153,12 +171,10 @@
       (fn [tx]
         (boolean (<-bytes (fdb-core/get tx root-subspace (->tuple ["state" workflow-id "cancelled"])))))))
 
-  (mark-cancelled [this workflow-id]
+  (mark-cancelled [_ workflow-id]
     (ftr/run db
       (fn [tx]
-        (fdb-core/set tx root-subspace (->tuple ["state" workflow-id "cancelled"]) (->bytes true))))
-    ;; Phase C: durable wake so a worker resumes the sleeper and it observes the flag.
-    (p/add-runnable this workflow-id :cancel))
+        (fdb-core/set tx root-subspace (->tuple ["state" workflow-id "cancelled"]) (->bytes true)))))
 
   (get-workflow-status [this workflow-id]
     (if (p/is-cancelled? this workflow-id)
@@ -179,72 +195,64 @@
                   :workflow-failed :failed
                   :running))))))))
 
-  ;; --- Phase C: lease / ownership (serializable read-modify-write) ---
-  (claim-workflow [_ workflow-id owner-id lease-ms]
+  ;; --- Phase C: ownership-based recovery (serializable read-modify-write) ---
+  (claim-owner [_ workflow-id owner-id]
     (ftr/run db
       (fn [tx]
-        (let [k   (->tuple ["lease" workflow-id])
-              cur (<-bytes (fdb-core/get tx root-subspace k))
-              now (System/currentTimeMillis)]
-          (if (or (nil? cur) (= (:owner-id cur) owner-id) (< (:lease-until cur 0) now))
-            (do (fdb-core/set tx root-subspace k
-                              (->bytes {:owner-id owner-id :lease-until (+ now lease-ms)}))
-                true)
-            false)))))
-
-  (renew-lease [_ workflow-id owner-id lease-ms]
-    (ftr/run db
-      (fn [tx]
-        (let [k   (->tuple ["lease" workflow-id])
-              cur (<-bytes (fdb-core/get tx root-subspace k))
-              now (System/currentTimeMillis)]
-          (if (= (:owner-id cur) owner-id)
-            (do (fdb-core/set tx root-subspace k
-                              (->bytes {:owner-id owner-id :lease-until (+ now lease-ms)}))
-                true)
+        (let [k   (->tuple ["owner" workflow-id])
+              cur (<-bytes (fdb-core/get tx root-subspace k))]
+          (if (or (nil? cur) (= cur owner-id))
+            (let [old-bucket (or cur "")
+                  ;; preserve the index value (carries C2 wake-at) across the move
+                  entry      (or (<-bytes (fdb-core/get tx root-subspace
+                                            (owner-index-key root-subspace old-bucket workflow-id)))
+                                 {:wake-at nil})]
+              (fdb-core/set tx root-subspace k (->bytes owner-id))
+              (fdb-core/clear tx root-subspace (owner-index-key root-subspace old-bucket workflow-id))
+              (fdb-core/set tx root-subspace (owner-index-key root-subspace owner-id workflow-id)
+                            (->bytes entry))
+              true)
             false)))))
 
-  (release-lease [_ workflow-id owner-id]
+  (list-pending [_ owner-id limit]
     (ftr/run db
       (fn [tx]
-        (let [k   (->tuple ["lease" workflow-id])
-              cur (<-bytes (fdb-core/get tx root-subspace k))]
-          (when (= (:owner-id cur) owner-id)
-            (fdb-core/clear tx root-subspace k)))))
-    nil)
+        (let [now  (System/currentTimeMillis)
+              due? (fn [v] (let [wa (:wake-at v)] (or (nil? wa) (<= wa now))))
+              scan (fn [bucket]
+                     (let [sub (fsub/get root-subspace (->tuple ["wf-owner" bucket]))]
+                       (->> (fdb-core/get-range tx (fsub/range sub))
+                            (keep (fn [[key value]]
+                                    (when (due? (<-bytes value))
+                                      (nth key (dec (count key)))))))))]
+          (->> (concat (scan owner-id) (scan ""))
+               distinct
+               (take limit)
+               vec)))))
 
-  ;; --- Phase C: runnable markers (subspace ["runnable" wf-id]) ---
-  (add-runnable [_ workflow-id reason]
+  (release-owner [_ owner-id]
     (ftr/run db
       (fn [tx]
-        (fdb-core/set tx root-subspace (->tuple ["runnable" workflow-id])
-                      (->bytes {:reason (name reason)
-                                :enqueued-at (System/currentTimeMillis)
-                                :claimed-until 0}))))
+        (let [sub     (fsub/get root-subspace (->tuple ["wf-owner" owner-id]))
+              entries (->> (fdb-core/get-range tx (fsub/range sub))
+                           (mapv (fn [[key value]]
+                                   [(nth key (dec (count key))) (<-bytes value)])))]
+          (doseq [[wid entry] entries]
+            ;; entries in the owner bucket are non-terminal by construction;
+            ;; preserve the index value (C2 wake-at) when moving to the "" bucket
+            (fdb-core/clear tx root-subspace (->tuple ["owner" wid]))
+            (fdb-core/clear tx root-subspace (owner-index-key root-subspace owner-id wid))
+            (fdb-core/set tx root-subspace (owner-index-key root-subspace "" wid)
+                          (->bytes (or entry {:wake-at nil})))))))
     nil)
 
-  (claim-runnable [_ _owner-id batch-size claim-ms]
-    (ftr/run db
-      (fn [tx]
-        (let [run-sub (fsub/get root-subspace (->tuple ["runnable"]))
-              rows    (fdb-core/get-range tx (fsub/range run-sub))
-              now     (System/currentTimeMillis)
-              due     (->> rows
-                           (keep (fn [[key value]]
-                                   (let [m   (<-bytes value)
-                                         wid (nth key (dec (count key)))]
-                                     (when (< (:claimed-until m 0) now) [wid m]))))
-                           (take batch-size)
-                           vec)]
-          (doseq [[wid m] due]
-            (fdb-core/set tx root-subspace (->tuple ["runnable" wid])
-                          (->bytes (assoc m :claimed-until (+ now claim-ms)))))
-          (mapv first due)))))
-
-  (delete-runnable [_ workflow-id]
+  (set-wake-at [_ workflow-id wake-at-ms]
     (ftr/run db
       (fn [tx]
-        (fdb-core/clear tx root-subspace (->tuple ["runnable" workflow-id]))))
+        (let [bucket (or (read-owner tx root-subspace workflow-id) "")
+              k      (owner-index-key root-subspace bucket workflow-id)
+              entry  (or (<-bytes (fdb-core/get tx root-subspace k)) {})]
+          (fdb-core/set tx root-subspace k (->bytes (assoc entry :wake-at wake-at-ms))))))
     nil))
 
 (defn make-fdb-store [db subspace-name]
diff --git a/src/intemporal/store/jdbc.clj b/src/intemporal/store/jdbc.clj
index fe3283b..17b3a18 100644
--- a/src/intemporal/store/jdbc.clj
+++ b/src/intemporal/store/jdbc.clj
@@ -1,7 +1,5 @@
 (ns intemporal.store.jdbc
   (:require [intemporal.protocol :as p]
-            [intemporal.internal.lease :as lease]
-            [intemporal.internal.error :as error]
             [migratus.core :as migratus]
             [next.jdbc :as jdbc]
             [next.jdbc.prepare :as prepare]
@@ -94,19 +92,10 @@
         ;; Ensure workflow exists
         (jdbc/execute! tx ["INSERT INTO intemporal_workflows (id) VALUES (?) ON CONFLICT (id) DO NOTHING"
                            workflow-id])
-        ;; Phase C: validate the lease in the same transaction. If this owner no
-        ;; longer holds a live lease (another worker took over / it expired),
-        ;; refuse the write so concurrent execution can't corrupt history.
-        (when-let [owner lease/*owner*]
-          (when-not (jdbc/execute-one! tx
-                      ["SELECT 1 FROM intemporal_workflows
-                        WHERE id = ? AND owner_id = ? AND lease_until > now()"
-                       workflow-id owner])
-            (throw (error/lease-lost-exception workflow-id owner))))
         ;; Insert events. DO UPDATE keeps the write idempotent under normal
         ;; replay (the engine re-writes the same seq with identical data on
-        ;; each pass). Rejecting a *concurrent* writer is the lease's job
-        ;; (Phase C) — see validate-lease in save-events there.
+        ;; each pass). Concurrent execution is prevented by exclusive ownership
+        ;; (claim-owner) + the worker resuming owned workflows one at a time.
         (doseq [event events]
           (let [seq-num    (:seq event)
                 event-type (name (:event-type event))
@@ -141,16 +130,14 @@
               {}
               rows)))
 
-  (add-signal [this workflow-id signal-name signal-data]
+  (add-signal [_ workflow-id signal-name signal-data]
     (jdbc/with-transaction [tx datasource]
       (jdbc/execute! tx ["INSERT INTO intemporal_workflows (id) VALUES (?) ON CONFLICT (id) DO NOTHING"
                          workflow-id])
       (jdbc/execute! tx ["INSERT INTO intemporal_signals (workflow_id, signal_name, payload) VALUES (?, ?, ?)"
                          workflow-id signal-name signal-data]))
-    ;; Phase C: durable, cross-pod wake (a worker on any pod resumes the workflow).
-    (p/add-runnable this workflow-id :signal)
-    ;; In-process fast path: fire the callback for an embedded (no-worker) engine
-    ;; running in THIS process. Cross-pod wake goes through the marker above.
+    ;; In-process fast path for an embedded (no-worker) engine in THIS process.
+    ;; Worker mode picks the workflow up via the ownership scan (list-pending).
     (when-let [callback (get-in @callbacks [workflow-id signal-name])]
       (future (callback)))
     signal-data)
@@ -183,13 +170,11 @@
                                   workflow-id])]
       (boolean (:intemporal_workflows/cancelled row))))
 
-  (mark-cancelled [this workflow-id]
+  (mark-cancelled [_ workflow-id]
     (jdbc/execute! datasource
                    ["INSERT INTO intemporal_workflows (id, cancelled) VALUES (?, true)
                      ON CONFLICT (id) DO UPDATE SET cancelled = true"
-                    workflow-id])
-    ;; Phase C: durable wake so a worker resumes the sleeper and it observes the flag.
-    (p/add-runnable this workflow-id :cancel))
+                    workflow-id]))
 
   (get-workflow-status [this workflow-id]
     (let [wf-row (jdbc/execute-one! datasource
@@ -211,61 +196,39 @@
                       :workflow-failed :failed
                       :running)))))))
 
-  ;; --- Phase C: lease / ownership ---
-  (claim-workflow [_ workflow-id owner-id lease-ms]
+  ;; --- Phase C: ownership-based recovery ---
+  (claim-owner [_ workflow-id owner-id]
     (let [res (jdbc/execute-one! datasource
-                ["UPDATE intemporal_workflows
-                  SET owner_id = ?, lease_until = now() + ((?)::bigint * interval '1 millisecond')
-                  WHERE id = ?
-                    AND (owner_id IS NULL OR owner_id = ? OR lease_until IS NULL OR lease_until < now())"
-                 owner-id lease-ms workflow-id owner-id])]
+                ["UPDATE intemporal_workflows SET owner = ?
+                  WHERE id = ? AND (owner IS NULL OR owner = ?)"
+                 owner-id workflow-id owner-id])]
       (pos? (or (:next.jdbc/update-count res) 0))))
 
-  (renew-lease [_ workflow-id owner-id lease-ms]
-    (let [res (jdbc/execute-one! datasource
-                ["UPDATE intemporal_workflows
-                  SET lease_until = now() + ((?)::bigint * interval '1 millisecond')
-                  WHERE id = ? AND owner_id = ?"
-                 lease-ms workflow-id owner-id])]
-      (pos? (or (:next.jdbc/update-count res) 0))))
-
-  (release-lease [_ workflow-id owner-id]
+  (list-pending [_ owner-id limit]
+    (let [rows (jdbc/execute! datasource
+                 ["SELECT id FROM intemporal_workflows
+                   WHERE status NOT IN ('completed','failed')
+                     AND (wake_at IS NULL OR wake_at <= now())
+                     AND (owner = ? OR owner IS NULL)
+                   ORDER BY created_at
+                   LIMIT ?"
+                  owner-id limit])]
+      (mapv :intemporal_workflows/id rows)))
+
+  (release-owner [_ owner-id]
     (jdbc/execute! datasource
-                   ["UPDATE intemporal_workflows SET owner_id = NULL, lease_until = NULL
-                     WHERE id = ? AND owner_id = ?"
-                    workflow-id owner-id])
+                   ["UPDATE intemporal_workflows SET owner = NULL
+                     WHERE owner = ? AND status NOT IN ('completed','failed')"
+                    owner-id])
     nil)
 
-  ;; --- Phase C: runnable markers ---
-  (add-runnable [_ workflow-id reason]
+  (set-wake-at [_ workflow-id wake-at-ms]
     (jdbc/execute! datasource
-                   ["INSERT INTO intemporal_runnable (workflow_id, reason, enqueued_at, claimed_until)
-                     VALUES (?, ?, now(), to_timestamp(0))
-                     ON CONFLICT (workflow_id) DO UPDATE SET reason = EXCLUDED.reason, enqueued_at = now()"
-                    workflow-id (name reason)])
-    nil)
-
-  (claim-runnable [_ _owner-id batch-size claim-ms]
-    (jdbc/with-transaction [tx datasource]
-      (let [rows (jdbc/execute! tx
-                   ["SELECT workflow_id FROM intemporal_runnable
-                     WHERE claimed_until < now()
-                     ORDER BY enqueued_at
-                     FOR UPDATE SKIP LOCKED
-                     LIMIT ?" batch-size])
-            ids  (mapv :intemporal_runnable/workflow_id rows)]
-        (when (seq ids)
-          (let [ph (apply str (interpose "," (repeat (count ids) "?")))]
-            (jdbc/execute! tx
-              (into [(str "UPDATE intemporal_runnable
-                           SET claimed_until = now() + ((?)::bigint * interval '1 millisecond')
-                           WHERE workflow_id IN (" ph ")")
-                     claim-ms]
-                    ids))))
-        ids)))
-
-  (delete-runnable [_ workflow-id]
-    (jdbc/execute! datasource ["DELETE FROM intemporal_runnable WHERE workflow_id = ?" workflow-id])
+      ["UPDATE intemporal_workflows
+        SET wake_at = CASE WHEN ?::bigint IS NULL THEN NULL
+                           ELSE to_timestamp(?::bigint / 1000.0) END
+        WHERE id = ?"
+       wake-at-ms wake-at-ms workflow-id])
     nil))
 
 ;; TODO use more complete opts
diff --git a/src2/intemporal/error.cljc b/src2/intemporal/error.cljc
deleted file mode 100644
index 85ca2a6..0000000
--- a/src2/intemporal/error.cljc
+++ /dev/null
@@ -1,27 +0,0 @@
-(ns intemporal.error
-  #?(:clj (:import [java.lang InterruptedException]
-                   [java.util.concurrent RejectedExecutionException])))
-
-
-(defn interrupted? [e]
-  #?(:clj (instance? InterruptedException e)
-     :cljs false))
-
-(defn rejected? [e]
-  #?(:clj (instance? RejectedExecutionException e)
-     :cljs false))
-
-(defn internal-error? [ex]
-  (when-let [t (-> ex ex-data ::type)]
-    (or (= :internal t)
-        (= :panic t))))
-
-(defn panic? [ex]
-  (and (instance? #?(:clj Exception :cljs js/Error) ex)
-       (= :panic (-> ex ex-data ::type))))
-
-(defn internal-error [msg data]
-  (ex-info msg (merge data {::type :internal})))
-
-(defn panic [msg]
-  (ex-info msg {::type :panic}))
diff --git a/src2/intemporal/macros.cljc b/src2/intemporal/macros.cljc
deleted file mode 100644
index 3cb4a47..0000000
--- a/src2/intemporal/macros.cljc
+++ /dev/null
@@ -1,227 +0,0 @@
-(ns intemporal.macros
-  (:require [cljs.analyzer.api :as api]
-            [intemporal.workflow :as w]
-            [intemporal.workflow.internal :as i]
-            [md5.core :as md5]
-            [promesa.core :as p]
-            [taoensso.telemere :as t])
-  #?(:clj  (:require [net.cgrand.macrovich :as macros]
-                     [intemporal.workflow.internal :refer [trace! trace-async! add-event!]])
-     :cljs (:require-macros [net.cgrand.macrovich :as macros]
-                            [intemporal.workflow.internal :refer [trace! trace-async! add-event!]]
-                            [intemporal.macros :refer [env-let defn-workflow stub-function stub-protocol]])))
-
-(def cljs-available?
-  #?(:cljs
-     false
-     :clj
-     (try
-       (require '[cljs.analyzer])
-       ;; Ensure clojurescript is recent enough:
-       (-> 'cljs.analyzer/var-meta resolve boolean)
-       (catch Exception _ false))))
-
-;;;;
-;; userland
-
-
-;; utility function: since stubs return promises,
-;; we want to use p/let
-;; but p/let runs a thunk that is not env-aware so we fix that
-;; actually, since js promises can't block, we need a new fn
-;; to chain the value, hence we will always need to wrap any thunk
-;; in a `(with-env...)
-(defmacro env-let
-  "Only useful for clojurescript. Wraps the `body` and each `bindings` val with a `(with-env current-env val)`), ensuring
-  that if the binding value is function stub, its value will be unrapped
-  with the same environment at the callsite.
-
-  Uses `(promesa.core.cljc/let ...` under the hood so promises are resolved via
-  a thunk with the current environment."
-  [bindings & body]
-  (let [env-sym   (gensym)
-        wrap-vals (fn [i b]
-                    (if (even? i)
-                      b
-                      `(w/with-env ~env-sym ~b)))
-        wrapped   (map-indexed wrap-vals bindings)]
-    `(let [~env-sym (w/current-env)]
-       (p/let ~wrapped
-         (w/with-env ~env-sym
-           ~@body)))))
-
-(defmacro vthread
-  "Runs `body` within a virtual thread, returning a promise."
-  [& body]
-  `(binding [i/*env* (assoc i/*env* :vthread? true)]
-     (do ~@body)))
-
-(defmacro defn-workflow
-  "Defines a workflow. Workflows are functions that are resillient to crashes, as
-  long as side-effects are run via activities."
-  [sym argv & body]
-  (let [wname (symbol (str sym "-"))
-        sig   (md5/string->md5-hex (str body))]
-    ;; TODO save signature
-    `(do
-       (defn- ~wname ~argv (do ~@body))
-       (defn ~sym ~argv
-         ;; workflow should be called within a with-env block:
-         ;; (with-env {:store ..}
-         ;;   (my-workflow ...
-         ;; TODO: fixme: task id generator must be deterministic for a given workflow
-         (assert (some? (:store i/*env*)) "Environment does not have a `:store`, did you call `\n(with-env {:store ..}\n\t(my-workflow ...` ?")
-         (let [id#   (or (:id i/*env*) (i/random-id))
-               fvar# #'~wname
-               ;; #'my-workflown-fn- => my-workflow-fn
-               orig# (subs (str fvar#) 2 (dec (count (str fvar#))))]
-           (trace! {:name (format "workflow: %s" orig#) :attributes {:task-id id#}}
-             (let [ref#  (:ref i/*env*)
-                   root# (:root i/*env*)
-                   ;; id can be passed by env if we're dequeuing a task from store
-                   task# (i/create-workflow-task ref# root# (symbol fvar#) (macros/case :cljs fvar# :clj (var-get fvar#)) ~argv id#)]
-               (t/log! {:level :debug :_data {:env i/*env* :task task#}} ["Invoking task with id" (:id task#)])
-               (add-event! ::w/enqueue-and-wait {})
-               (w/enqueue-and-wait i/*env* task#))))))))
-
-(defmacro stub-function
-  "Stubs `f`, wrapping it in an activity-aware function."
-  [f]
-  `(fn [& argv#]
-     (assert (some? (:next-id i/*env*)) "No next-id function, are you inside `defn-workflow`?")
-     (let [ref#  (:ref i/*env*)
-           root# (:root i/*env*)
-           fvar# (var ~f)]
-       ;; TODO we can use &form to determine eg checksum of activity
-
-       ;; prepare call
-       (let [store#  (:store i/*env*)
-             protos# (:protos i/*env*)
-             id#     ((:next-id i/*env*))
-             ref#    nil                                    ;; no enqueued task => no ref
-             task#   (i/create-activity-task ref# root# (symbol fvar#) (macros/case :cljs fvar# :clj (var-get fvar#)) argv# id#)]
-
-         ;; an embedded workflow engine doesn't need to have a task per invocation
-         (t/log! {:level :debug :_data {:env i/*env* :task task#}} ["Invoking task with id " id#])
-         (trace! {:name (format "activity: %s" (symbol fvar#)) :attributes {:task-id id#}}
-           (w/enqueue-and-wait i/*env* task#)
-           #_(let [res# (i/resume-task i/*env* store# protos# task#)]
-               (macros/case
-                 :cljs res#
-                 :clj (deref res#))))))))
-
-(defmacro stub-protocol
-  "Stub a protocol definition. Opts are currently unused.
-  Example: `(stub-protocol EventHandler {:some-opts true})`"
-  [proto & opts]
-  (macros/case
-    :cljs
-    (when cljs-available?
-      (let [resolved     (api/resolve &env proto)
-            curr-ns      (:name (:ns &env))
-            proto-ns     (:ns resolved)
-            in-proto-ns? (= curr-ns proto-ns)
-            sig+args     (-> (for [[sig val] (:sigs resolved)
-                                   :let [arglist (:arglists val)
-                                         qname   (str (name proto-ns) "/" (name sig))
-                                         invname (if in-proto-ns?
-                                                   (name sig)
-                                                   (str (namespace proto) "/" (name sig)))]]
-                               [(name sig) arglist (symbol invname) (symbol qname) (str (:name resolved))])
-                             (doall))]
-        ;; TODO we can use &form to determine eg checksum of proto def
-        `(reify ~proto
-           ~@(for [[mname arglist invname qname pname] sig+args
-                   :let [sname (symbol mname)
-                         args  (rest (first arglist))]]
-               ;; implement ~sname
-               `(~sname [this# ~@args]
-                  (let [aid#      '~qname
-                        act-opts# ~(first opts)
-                        ref#      (:ref i/*env*)
-                        root#     (:root i/*env*)]
-
-                    ;; prepare call
-                    (let [store#  (:store i/*env*)
-                          protos# (:protos i/*env*)
-                          id#     ((:next-id i/*env*))
-                          ref#    nil                       ;; no task => no ref
-                          task#   (i/create-proto-activity-task
-                                    (symbol ~pname)
-                                    ref#
-                                    root#
-                                    (symbol aid#)
-                                    ;aid# ;; >> doesn't work!
-                                    ;; protos are not reified like in clj https://clojurescript.org/about/differences#_protocols
-                                    ;; we create a "fake" fvar that can be invokeable just like the real thing
-                                    (fn [& impl+args#] (apply ~qname impl+args#))
-                                    [~@args]
-                                    id#)]
-
-                      (t/log! {:level :debug :_data {:env i/*env* :task task#}} ["Invoking task with id" id#])
-                      ;(i/resume-task i/*env* store# protos# task#))))))))
-                      (w/enqueue-and-wait i/*env* task#))))))))
-
-    :clj
-    #_{:clj-kondo/ignore [:unresolved-symbol]}
-    (let [proto-var    (var-get (resolve proto))
-          curr-ns      (name (ns-name *ns*))
-          proto-ns     (namespace (symbol (subs (str (:var proto-var)) 2)))
-          in-proto-ns? (= curr-ns proto-ns)
-          sig+args     (-> (for [[sig val] (:sigs proto-var)
-                                 :let [arglist (:arglists val)
-                                       qname   (str (name proto-ns) "/" (name sig))
-                                       invname (if in-proto-ns?
-                                                 (name sig)
-                                                 (str (namespace proto) "/" (name sig)))]]
-                             [(name sig) arglist (symbol invname) (symbol qname)])
-                           (doall))]
-      `(reify ~proto
-         ~@(for [[mname arglist invname qname] sig+args
-                 :let [sname (symbol mname)
-                       args  (rest (first arglist))]]
-             ;; implement ~sname
-             `(~sname [this# ~@args]
-                (let [aid#      '~qname
-                      act-opts# ~(first opts)
-                      ref#      (:ref i/*env*)
-                      root#     (:root i/*env*)]
-
-                  ;; prepare call
-                  (let [store#  (:store i/*env*)
-                        protos# (:protos i/*env*)
-                        id#     ((:next-id i/*env*))
-                        ref#    nil                         ;; no task => no ref
-                        task#   (i/create-proto-activity-task
-                                  (-> ~proto :var symbol)
-                                  ref#
-                                  root#
-                                  (symbol aid#)
-                                  (var-get (requiring-resolve aid#))
-                                  [~@args]
-                                  id#)]
-
-                    (t/log! {:level :debug :_data {:env i/*env* :task task#}} ["Invoking task with id" id#])
-                    (if (:vthread? i/*env*)
-
-                      (trace-async! {:name (format "activity: %s" aid#) :attributes {:task-id id# :protocol (-> ~proto :var symbol)}}
-                        @(i/resume-task i/*env* store# protos# task#))
-                      #_
-                      (trace! {:name (format "activity: %s" aid#) :attributes {:task-id id# :protocol (-> ~proto :var symbol)}}
-                        (w/enqueue-and-wait i/*env* task#))
-                      (trace! {:name (format "activity: %s" aid#) :attributes {:task-id id# :protocol (-> ~proto :var symbol)}}
-                        ;@(i/resume-task i/*env* store# protos# task#)))))))))))
-                        (w/enqueue-and-wait i/*env* task#)))))))))))
-
-(defmacro with-failure
-  "Runs `body`, ensuring that if it fails, compensation will always run.
-  - if `body` fails, `binding` will have the value `intemporal.activity/failure`.
-  - if `body` succeeds, but compensation is invoked later (eg other activity failure), `binding` will have its return value
-
-  (with-failure [v (book-hotel stub \"hotel\")]
-    (cancel-hotel stub v n))
-  "
-  [[binding body] comp-fn]
-  `(let [val# (atom :intemporal.activity/failure)]
-     (w/add-compensation (fn [] (let [~binding @val#] (do ~comp-fn))))
-     (reset! val# (do ~body))))
\ No newline at end of file
diff --git a/src2/intemporal/store.cljc b/src2/intemporal/store.cljc
deleted file mode 100644
index 3548f32..0000000
--- a/src2/intemporal/store.cljc
+++ /dev/null
@@ -1,340 +0,0 @@
-(ns intemporal.store
-  (:require [clojure.tools.reader.edn :as edn]
-            [intemporal.store.internal :as si]
-            [promesa.core :as p]
-            [taoensso.telemere :as t]
-            #?(:clj [clojure.java.io :as io])
-            #?(:clj [net.cgrand.macrovich :as macros]))
-  #?(:cljs (:require-macros
-             [net.cgrand.macrovich :as macros]
-             [intemporal.store :refer [bfn]]))
-  #?(:clj (:import [java.io File])))
-
-#?(:clj (set! *warn-on-reflection* true))
-
-(defmacro bfn
-  "Like bound-fn on JVM; falls back to fn on CLJS."
-  [args & body]
-  (macros/case
-    :clj  `(clojure.core/bound-fn ~args ~@body)
-    :cljs `(fn ~args ~@body)))
-
-;;;;
-;; main protos
-
-(defprotocol TaskStore
-  (list-tasks [this] "Lists all tasks")
-  (task<-panic [this task-id error]
-    "Terminates the task via panic; events should not be stored")
-  (task<-event [this task-id event-descr]
-    "Transitions the task. The task should be dequeued beforehand. Returns the event.
-    `event-descr` is one of:
-    `{:sym 'ns/f :args [1]}`
-    `{:sym 'ns/f :result :ok}`
-    `{:sym 'ns/f :error <some error>}`
-    ")
-  (watch-task [this id callback]
-    "Observes state changes, calling `callback` for any task that matches `predicate`.")
-  (await-task [this id] [this id opts]
-    "Waits for workflow to finish. Returns a deref'able value. Can throw.
-    Opts include
-    - `timeout-ms`: timeout for task await")
-  (find-task [this id]
-    "Finds the task on the db by id")
-  (reenqueue-pending-tasks [this callback]
-    "Marks all pending tasks belonging to the store's `owner` (or `nil` owner) as `new`")
-  (release-pending-tasks [this]
-    "Disowns all tasks that are pending for the store's `owner` (or `nil` owner), making them available")
-  (enqueue-task [this task]
-    "Atomically enqueues a protocol, workflow or activity task execution")
-  (dequeue-task [this] [this opts]
-    "Atomically dequeues some workflow, protocol or activity task.
-    For deterministic purposes, should dequeue the oldest task first.
-    If the task was deserialized, its `fvar` attribute must be a `fn`
-    Opts:
-    * `lease-ms`- duration of lease for dequeue. After lease expires, the task is eligible for dequeueing again")
-  (clear-tasks [this]
-    "Deletes all tasks"))
-
-(defprotocol HistoryStore
-  (list-events [this] "Lists all events")
-  (save-event [this task-id event] "Saves the event for the given task id. Returns the saved event")
-  (all-events [this task-id] "Returns all the events for a given task id")
-  (clear-events [this] "Deletes all events"))
-
-(defprotocol InternalVarStore
-  (register [this sym var] "Register the symbol with the var")
-  (lookup [this sym] "Finds the var for the given symbol"))
-
-;;;;
-;; helpers
-
-(defn now []
-  #?(:clj  (System/currentTimeMillis)
-     :cljs (.getTime (js/Date.))))
-
-(def default-lease "Default lease time in millis - 15mins"
-  (* 15 60 1000))
-
-(defn sym->var [store {:keys [sym fvar] :as task}]
-  #?(:clj  (or fvar (requiring-resolve sym))
-     :cljs (or fvar (lookup store sym))))
-
-(defn- edn-exists? [file]
-  #?(:clj  (.exists (File. ^String file))
-     :cljs (seq (.getItem (.-localStorage js/window) file))))
-
-(defn read-edn [file readers]
-  #?(:clj  (with-open [f (io/reader file)]
-             (edn/read-string {:readers readers} (slurp f)))
-     :cljs (let [f (.getItem (.-localStorage js/window) file)]
-             (edn/read-string {:readers readers} f))))
-
-(defn write-edn [file val]
-  #?(:clj  (spit file val)
-     :cljs (.setItem (.-localStorage js/window) file (pr-str val))))
-
-;;;;
-;; main impl
-;;
-
-(def default-owner "intemporal")
-
-(defn make-store
-  "Creates a new memory-based store. All workflows will belong to the store's owner.
-  When calling `release-pending-tasks` or `reenqueue-pending-tasks`, only tasks that either belong to the
-  store's `owner` or have `owner = nil` will be picked up."
-  ([]
-   (make-store nil))
-  ([{:keys [owner file readers failures]
-     :or   {owner    default-owner
-            failures {:validation 0}}}]
-   ;; TODO use single atom?
-   (let [tasks       (atom {})
-         history     (atom {})
-         counter     (atom 0)
-         pcounter    (atom 0)
-         ecounter    (atom 0)
-         tcounter    (atom 0)
-         vars        (atom {})
-         maybe-fail! (fn []
-                       (when (< (rand-int 100)
-                                (* 100 (get failures :validation)))
-                         (throw (ex-info "Forced error via failure rate" {:intemporal.workflow.internal/type :internal}))))
-
-         ;;persistence
-         persist!    (fn [k ref old new]
-                       (when (and file (not= old new))
-                         (t/log! :debug ["Persisting store to file" file])
-                         (write-edn file {:tasks    @tasks
-                                          :history  @history
-                                          :counter  @counter
-                                          :pcounter @pcounter
-                                          :ecounter @ecounter})))
-
-         find-task   (fn [this id]
-                       (get @tasks id))
-
-         update-task (fn [this id attrs]
-                       (when-let [w (find-task this id)]
-                         (maybe-fail!)
-                         (si/validate-transition! w attrs)
-                         (->> (merge w attrs)
-                              (si/validate-task!)
-                              (swap! tasks assoc id))))]
-
-     ;; deser the db
-     (when file
-       ;; add hooks to persist on change
-       (add-watch tasks :persist persist!)
-       (add-watch history :persist persist!)
-       (add-watch counter :persist persist!)
-       (add-watch pcounter :persist persist!)
-       (add-watch ecounter :persist persist!)
-
-       (when (edn-exists? file)
-         (t/log! :info ["Reading store file" file])
-         (let [data (read-edn file readers)]
-           (reset! tasks (or (:tasks data) {}))
-           (reset! history (or (:history data) {}))
-           (reset! counter (or (:counter data) 0))
-           (reset! pcounter (or (:pcounter data) 0))
-           (reset! ecounter (or (:ecounter data) 0)))))
-
-     (reify
-       InternalVarStore
-       (register [this sym var]
-         #?(:cljs (swap! vars assoc sym var)))
-       (lookup [this sym]
-         #?(:clj (requiring-resolve sym)
-            :cljs (get @vars sym)))
-
-       HistoryStore
-       (list-events [this]
-         (apply concat (vals @history)))
-       (save-event [this task-id event]
-         (let [evt+id (assoc event :id (swap! counter inc))]
-           (si/validate-event! evt+id)
-           (swap! history (fn [v]
-                            (assoc v task-id (-> (or (get v task-id) [])
-                                                 (conj evt+id)))))
-           evt+id))
-       (all-events [this task-id]
-         (get @history task-id))
-
-       (clear-events [this]
-         (reset! history {}))
-
-       TaskStore
-       (list-tasks [this]
-         (filter #(or (= owner (:owner %))
-                      (nil? (:owner %)))
-                 (vals @tasks)))
-
-       (task<-panic [this task-id error]
-         (update-task this task-id {:result error}))
-
-       (task<-event [this task-id {:keys [id ref root type sym args result error] :as event-descr}]
-         ;; some redundancy between :result in task and event
-         ;; note that we save the event first, because update-task can trigger some watchers
-         ;; and they would expect the event to be present in the history
-         (cond
-           (some? args)
-           (let [evt {:ref ref :root root :type type :sym sym :args args :error nil :result nil}]
-             (when-not id
-               (save-event this task-id evt))
-             (update-task this task-id {:state :pending})
-             evt)
-
-           (some? error)
-           (let [evt {:ref ref :root root :type type :sym sym :args nil :error error :result nil}]
-             (when-not id
-               (save-event this task-id evt))
-             (update-task this task-id {:state :failure :result error})
-             evt)
-
-           ;;(some? result) ;result can be nil
-           :else
-           (let [evt {:ref ref :root root :type type :sym sym :args nil :error nil :result result}]
-             (when-not id
-               (save-event this task-id evt))
-             (update-task this task-id {:state :success :result result})
-             evt)))
-
-       (find-task [this id]
-         (->> (vals @tasks)
-              (filter #(= (:id %) id))
-              (first)))
-
-       (watch-task [this id f]
-         (let [k       (keyword (str "watcher-" (swap! pcounter inc)))
-               watchfn (fn [k atm old new]
-                         (let [xf        (comp
-                                           (filter #(= id (:id %)))
-                                           (filter #(not= (get old (:id %)) %))
-                                           (take 1))
-                               changeset (transduce xf conj (vals new))]
-
-                           (when (and (first changeset)
-                                      (f (first changeset)))
-                             (remove-watch tasks k))))]
-           (add-watch tasks k watchfn)))
-
-       (await-task [this id]
-         (await-task this id {:timeout-ms default-lease}))
-
-       (await-task [this id {:keys [timeout-ms] :as opts}]
-         (maybe-fail!)
-         (let [task        (find-task this id)
-               deferred    (p/deferred)
-               wrap-result (fn [{:keys [result] :as task}]
-                             (cond
-                               (si/success? task) (p/resolved result)
-                               (si/failure? task) (p/rejected result)
-                               :else (p/rejected (ex-info "Unknown state" {:task task}))))]
-
-           (if (si/terminal? task)
-             (wrap-result task)
-             ;;else
-             (do
-               (watch-task this id (bfn [task]
-                                     (when (si/terminal? task)
-                                       (p/resolve! deferred task)
-                                       true)))
-               ;; wait for resolution
-               ;; remember: js doesnt have blocking op so we need to chain
-               (-> (p/timeout deferred timeout-ms ::timeout)
-                   (p/then (bfn [resolved]
-                             (if (= ::timeout resolved)
-                               (throw (ex-info "Timeout waiting for task to be completed" {:task task}))
-                               (wrap-result resolved)))))))))
-
-       (release-pending-tasks [this]
-         (swap! tasks
-                update-vals
-                (fn [{:keys [state] :as task}]
-                  (cond-> task
-                          (and (= :pending state)
-                               (= (:owner task) owner))
-                          (assoc :owner nil)))))
-
-       (reenqueue-pending-tasks [this f]
-         (let [task->run? (atom #{})]
-           (swap! tasks
-                  update-vals
-                  (fn [{:keys [state] :as task}]
-                    (if (and (= :pending state)
-                             (or (= (:owner task) owner)
-                                 (nil? (:owner task))))
-                      (try
-                        ;; ensure we only run f once - swap! might run the fn multiple times
-                        (assoc task :state :new :owner owner)
-                        ;; TODO log reenqueued task
-                        (finally
-                          (when-not (contains? @task->run? task)
-                            (try
-                              (f task)
-                              (finally
-                                (swap! task->run? conj task))))))
-                      ;; else
-                      task)))))
-
-       (enqueue-task [this task]
-         (maybe-fail!)
-         (let [task+owner (assoc task :owner owner :order (swap! tcounter inc))]
-           (si/validate-task! task+owner)
-           (swap! tasks assoc (:id task) task+owner)
-           #?(:cljs (register this (:sym task+owner) (:fvar task+owner)))
-           task+owner))
-
-       (dequeue-task [this]
-         (dequeue-task this {:lease-ms nil}))
-
-       (dequeue-task [this {:keys [lease-ms]}]
-         (let [first-new (fn [v] (->> (vals v)
-                                      (filter #(and
-                                                 (or (= owner (:owner %)) (nil? (:owner %)))
-                                                 (or (= :new (:state %))
-                                                     (some-> (:lease-end %)
-                                                             (< (now))))))
-                                      (sort-by :order)
-                                      (first)))
-               found?    (atom nil)]
-
-           (swap-vals! tasks
-                       (fn [v] (let [found (first-new v)]
-                                 (if found
-                                   (->> (assoc found :state :pending
-                                                     :fvar (sym->var this found)
-                                                     ;; watch for overflow?
-                                                     :lease-end (when lease-ms
-                                                                  (+ (now) lease-ms)))
-                                        (reset! found?)
-                                        (assoc v (:id found)))
-                                   v))))
-           ;; highest first
-           (->> @found?)))
-
-       (clear-tasks [this]
-         (reset! tasks {}))))))
-
diff --git a/src2/intemporal/store/foundationdb.clj b/src2/intemporal/store/foundationdb.clj
deleted file mode 100644
index a12dbf0..0000000
--- a/src2/intemporal/store/foundationdb.clj
+++ /dev/null
@@ -1,255 +0,0 @@
-(ns intemporal.store.foundationdb
-  (:require [intemporal.store :as store]
-            [intemporal.workflow.internal :as i]
-            [intemporal.store.internal :as si :refer [resolve-fvar serialize deserialize next-id]]
-            [me.vedang.clj-fdb.FDB :as cfdb]
-            [me.vedang.clj-fdb.core :as fc]
-            [me.vedang.clj-fdb.transaction :as ftr]
-            [me.vedang.clj-fdb.subspace.subspace :as fsub]
-            [promesa.core :as p])
-  (:import [com.apple.foundationdb FDB FDBTransaction KeyValue]
-           [com.apple.foundationdb.tuple Tuple]))
-
-;; FDB is a KV store; this store impl will use the subspace feature for namespacing
-;; <tasks>  <id>          => task
-;; <events> <taskid> <id> => event
-;; event ids are scoped to a task
-;; values are (de)serialized via nippy
-
-(def fdb-api-version cfdb/clj-fdb-api-version)
-
-(defmacro with-tx [binding & body]
-  (let [[tx-sym db-sym] binding
-        database (with-meta db-sym {:tag 'com.apple.foundationdb.Database})]
-    ;; TODO type hint Closeable?
-    `(with-open [db# ~database]
-       (ftr/run db#
-                (fn [~tx-sym] (do ~@body))))))
-
-(defn make-store
-  ([]
-   (make-store nil))
-  ([{:keys [owner cluster-file-path]
-     :or   {owner store/default-owner}}]
-   (let [^FDB fdb             (cfdb/select-api-version fdb-api-version)
-         open-db              #(if cluster-file-path
-                                 (cfdb/open fdb cluster-file-path)
-                                 (cfdb/open fdb))
-         subspace-tasks       (fsub/create ["tasks"])
-         subspace-owned-tasks (fsub/create [(str owner "_tasks")])
-         subspace-history     (fsub/create ["history"])]
-     (reify
-       store/InternalVarStore
-       (register [this sym var])
-       (lookup [this sym]
-         (requiring-resolve sym))
-
-       store/HistoryStore
-       (list-events [this]
-         (-> (with-tx [tx (open-db)]
-               (fc/get-range tx subspace-history {:valfn deserialize}))
-             (vals)))
-
-       (save-event [this task-id {:keys [type ref root sym args result] :as event}]
-         (si/validate-serializable! args "Event args should be serializable")
-         (si/validate-serializable! result "Event result should be serializable")
-         (let [evt-id (next-id)
-               evt+id (assoc event :id evt-id)]
-           (si/validate-serializable! evt+id "Event should be serializable")
-           (si/validate-event! evt+id)
-
-           (with-tx [tx (open-db)]
-             (fc/set tx subspace-history [task-id evt-id] (serialize evt+id)))
-           evt+id))
-
-       (all-events [this task-id]
-         (-> (with-tx [tx (open-db)]
-               (fc/get-range tx subspace-history [task-id] {:valfn deserialize}))
-             (vals)))
-
-       (clear-events [this]
-         (with-tx [tx (open-db)]
-           (fc/clear-range tx subspace-history)))
-
-       store/TaskStore
-       (list-tasks [this]
-         (let [owned (-> (with-tx [tx (open-db)]
-                           (fc/get-range tx subspace-owned-tasks {:valfn (comp resolve-fvar deserialize)}))
-                         (vals))
-               free  (-> (with-tx [tx (open-db)]
-                           (fc/get-range tx subspace-tasks {:valfn (comp resolve-fvar deserialize)}))
-                         (vals))]
-           (into owned free)))
-
-       (task<-panic [this task-id error]
-         (with-tx [tx (open-db)]
-           (let [task         (fc/get tx subspace-owned-tasks task-id {:valfn (comp resolve-fvar deserialize)})
-                 updated-task (assoc task :result error)]
-             (when task
-               (si/validate-task! updated-task)
-               (fc/set tx subspace-owned-tasks task-id (serialize updated-task))))))
-
-       (task<-event [this task-id {:keys [id ref root type sym args result error] :as event-descr}]
-         ;; some redundancy between :result in task and event
-         ;; note that we save the event first, because update-task can trigger some watchers
-         ;; and they would expect the event to be present in the history
-         (with-tx [tx (open-db)]
-           (let [task         (fc/get tx subspace-owned-tasks task-id {:valfn (comp resolve-fvar deserialize)})
-                 evt          {:ref ref :root root :type type :sym sym :args args}
-                 updated-task (cond
-                                (some? args) (assoc task :state :pending)
-                                (some? error) (assoc task :state :failure :result error)
-                                :else (assoc task :state :success :result result))
-                 updated-evt  (cond
-                                (some? args) (assoc evt :args args)
-                                (some? error) (assoc evt :error error)
-                                :else (assoc evt :result result))]
-             (si/validate-serializable! task "Task should be serializable")
-             (when-not id
-               (store/save-event this task-id updated-evt))
-             ;; not every invocation will come from a persisted task
-             (when task
-               (si/validate-task! updated-task)
-               (si/validate-transition! task updated-task)
-               (fc/set tx subspace-owned-tasks task-id (serialize updated-task)))
-             updated-evt)))
-
-       (find-task [this id]
-         (with-tx [^FDBTransaction tx (open-db)]
-           (when-let [task? (fc/get tx subspace-owned-tasks id)]
-             (resolve-fvar (deserialize task?)))))
-
-       (watch-task [this id f]
-         (let [watch? (atom true)]
-           (i/libthread (format "Watcher-%s" id)
-             (while @watch?
-               @(with-tx [^FDBTransaction tx (open-db)]
-                  (when (fc/get tx subspace-owned-tasks id)
-                    (.watch tx (fsub/pack subspace-owned-tasks (Tuple/from (object-array [id]))))))
-
-               (with-tx [^FDBTransaction tx (open-db)]
-                 (when-let [task? (fc/get tx subspace-owned-tasks id)]
-                   (when (f (resolve-fvar (deserialize task?)))
-                     (reset! watch? false))))))))
-
-       (await-task [this id]
-         (store/await-task this id {:timeout-ms store/default-lease}))
-
-       (await-task [this id {:keys [timeout-ms] :as opts}]
-         (let [task        (store/find-task this id)
-               deferred    (p/deferred)
-               wrap-result (fn [{:keys [state result] :as task}]
-                             (cond
-                               (si/success? task) (p/resolved result)
-                               (si/failure? task) (p/rejected result)
-                               :else (p/rejected (ex-info "Unknown state" {:task task}))))]
-
-           (if (si/terminal? task)
-             (wrap-result task)
-             ;;else
-             (do
-               (store/watch-task this id (fn [{:keys [state] :as task}]
-                                           (when (si/terminal? task)
-                                             (p/resolve! deferred task)
-                                             true)))
-               ;; wait for resolution
-               (-> (p/timeout deferred timeout-ms ::timeout)
-                   (p/then (fn [resolved]
-                             (if (= ::timeout resolved)
-                               (throw (ex-info "Timeout waiting for task to be completed" {:task task}))
-                               (wrap-result resolved)))))))))
-
-       (release-pending-tasks [this]
-         (with-tx [tx (open-db)]
-           (let [owned-tasks @(.asList (ftr/get-range tx (fsub/range subspace-owned-tasks)))]
-             (doseq [kv owned-tasks]
-               (let [task (-> kv .getValue deserialize resolve-fvar)]
-                 (when (= :pending (:state task))
-                   (fc/set tx subspace-tasks [(:id task)] (serialize (assoc task :owner nil)))
-                   (fc/clear tx subspace-owned-tasks (:id task))))))))
-
-       (reenqueue-pending-tasks [this f]
-         (with-tx [tx (open-db)]
-           (let [owned-tasks @(.asList (ftr/get-range tx (fsub/range subspace-owned-tasks)))
-                 free-tasks  @(.asList (ftr/get-range tx (fsub/range subspace-tasks)))]
-             (doseq [kv owned-tasks]
-               (let [task (-> kv .getValue deserialize resolve-fvar)]
-                 (when (= :pending (:state task))
-                   (f task)
-                   (fc/set tx subspace-owned-tasks [(:id task)] (serialize (assoc task :state :new))))))
-
-             (doseq [kv free-tasks]
-               (let [task (-> kv .getValue deserialize resolve-fvar)]
-                 (when (= :pending (:state task))
-                   (f task)
-                   (fc/clear tx subspace-tasks (:id task))
-                   (fc/set tx subspace-owned-tasks [(:id task)] (serialize (assoc task :state :new :owner owner)))))))))
-
-       (enqueue-task [this task]
-         (let [task+owner (assoc task :owner owner)
-               task-id    (:id task+owner)]
-           (si/validate-serializable! task+owner "Task should be serializable")
-           (si/validate-task! task+owner)
-
-           (with-tx [tx (open-db)]
-             (fc/set tx subspace-owned-tasks [task-id] (serialize (dissoc task+owner :fvar))))
-           task+owner))
-
-       (dequeue-task [this]
-         (store/dequeue-task this {:lease-ms nil}))
-
-       (dequeue-task [this {:keys [lease-ms]}]
-         (let [dequeuable? (fn [{:keys [state lease-end]}]
-                             (or (= :new state)
-                                 (some-> lease-end
-                                         (< (store/now)))))
-               update-task (fn [task]
-                             (assoc task
-                               :owner owner
-                               :state :pending
-                               :fvar (store/sym->var this task)
-                               :lease-end (when lease-ms (+ (store/now) lease-ms))))
-               found?      (with-tx [tx (open-db)]
-                             (reduce
-                               (fn [_ ^KeyValue kv]
-                                 (let [task (-> kv .getValue deserialize resolve-fvar)]
-                                   (when (dequeuable? task)
-                                     (let [updated-task (update-task task)]
-                                       (fc/set tx subspace-owned-tasks [(:id task)] (serialize (dissoc updated-task :fvar)))
-                                       (reduced updated-task)))))
-                               nil
-                               (ftr/get-range tx (fsub/range subspace-owned-tasks))))]
-
-           ;; if we cant find any task that we own,
-           ;; try the tasks that were released
-           (if found?
-             found?
-             (with-tx [tx (open-db)]
-               (reduce
-                 (fn [_ ^KeyValue kv]
-                   (let [task (-> kv .getValue deserialize resolve-fvar)]
-                     (when (dequeuable? task)
-                       (let [updated-task (update-task task)]
-                         (fc/clear tx subspace-tasks (:id task))
-                         (fc/set tx subspace-owned-tasks [(:id task)] (serialize (dissoc updated-task :fvar)))
-                         (reduced updated-task)))))
-                 nil
-                 (ftr/get-range tx (fsub/range subspace-tasks)))))))
-
-       (clear-tasks [this]
-         (with-tx [tx (open-db)]
-           (fc/clear-range tx subspace-owned-tasks)))))))
-
-
-(comment
-  (def s (make-store {:cluster-file-path "docker/fdb.cluster"}))
-  (def t (i/create-workflow-task "ref#" "root#" 'clojure.core/+ (var-get #'+) [] 1))
-
-  (store/save-event s 1 {:a 1})
-  (store/list-events s)
-  (store/list-tasks s)
-
-  (store/enqueue-task s t)
-  (store/dequeue-task s))
-
-;(store/watch-task s 1 (partial println ">>>"))
\ No newline at end of file
diff --git a/src2/intemporal/store/internal.cljc b/src2/intemporal/store/internal.cljc
deleted file mode 100644
index 3c302b6..0000000
--- a/src2/intemporal/store/internal.cljc
+++ /dev/null
@@ -1,139 +0,0 @@
-(ns intemporal.store.internal
-  #?(:clj  (:require [intemporal.error :as error]
-                     [taoensso.nippy :as nippy]
-                     [malli.core :as m])
-     :cljs (:require [clojure.edn :as edn]
-                     [intemporal.error :as error]
-                     [malli.core :as m])))
-
-(defn next-id []
-  #?(:clj  (System/currentTimeMillis)
-     :cljs (.getTime (js/Date.))))
-
-;;;;
-;; serialization
-
-(defn resolve-fvar [{:keys [sym] :as task}]
-  ;; TODO does it work in cljs?
-  (assoc task :fvar #?(:clj (requiring-resolve sym) :cljs nil)))
-
-(defn serializable?
-  "Indicates if an object is serializable"
-  [x]
-  #?(:clj  (nippy/freezable? x {:allow-java-serializable? true?})
-     :cljs true))
-
-(defn serialize
-  "Serializes an object"
-  [x]
-  (when x
-    #?(:clj  (nippy/freeze x)
-       :cljs (pr-str x))))
-
-(defn deserialize
-  "Deserializes an object"
-  [x]
-  (when x
-    #?(:clj  (nippy/thaw x)
-       :cljs (edn/read x))))
-
-;;;;
-;; validation
-
-#_:clj-kondo/ignore
-#?(:clj (when (= "true" (System/getenv "DEV"))
-          ((requiring-resolve 'malli.dev/start!))))
-
-;;;;
-;; validation
-(def registry
-  (merge
-    (m/class-schemas)
-    (m/comparator-schemas)
-    (m/base-schemas)
-    (m/type-schemas)
-    {:var (m/-simple-schema {:type :var, :pred #(or (fn? %) (var? %))})}))
-
-(def ^:private RuntimeConfig
-  [:map {:closed false}
-   [:timeout-ms {:optional true} :int]
-   [:telemetry-context {:optional true} [:maybe :map]]])
-
-(def ^:private Task
-  [:map {:closed true}
-   [:id [:or :string :uuid]]
-   [:owner [:maybe :string]]
-   [:sym :symbol]
-   [:ref [:maybe :string]]
-   [:root [:maybe :string]]
-   [:proto {:optional true} :symbol]
-   [:fvar :var]
-   [:args {:optional true} [:maybe [:sequential :any]]]
-   [:result :any]
-   [:state [:enum :new :pending :failure :success]]
-   [:type [:enum :workflow :activity :proto-activity]]
-   [:lease-end {:optional true} [:maybe :int]]
-   [:order {:optional true} :int]
-   [:runtime {:optional true} RuntimeConfig]])
-
-(def ^:private Event
-  [:map {:closed true}
-   [:id :int]
-   [:ref [:maybe :string]]
-   [:root [:maybe :string]]
-   [:type [:enum
-           :intemporal.workflow/invoke :intemporal.workflow/success :intemporal.workflow/failure
-           :intemporal.activity/invoke :intemporal.activity/success :intemporal.activity/failure
-           :intemporal.protocol/invoke :intemporal.protocol/success :intemporal.protocol/failure
-           :intemporal.workflow.internal/failure]]
-   [:sym :symbol]
-   [:args {:optional true} [:maybe [:sequential :any]]]
-   [:result {:optional true} :any]
-   [:error {:optional true} :any]])
-
-;; valid task states
-(def valid-state-transitions {:new     #{:pending}
-                              :pending #{:new :success :failure}})
-
-(defn validate-transition!
-  "Ensures that the task's new `:state`, if any, is allowed.
-  Useful to implement compare-and-swap semantics"
-  [{:keys [state id]} attrs]
-  (let [next-states (get valid-state-transitions state)]
-    ;; if we are updating state
-    ;; and the new state is not allowed
-    ;; error out
-    (when (and (contains? attrs :state)
-               (not= (:state attrs) state)
-               (not (contains? next-states (:state attrs))))
-      (throw (ex-info (str "Cannot update task with id " id " from state " state " to " (:state attrs)) {:task-id    id
-                                                                                                         :state      state
-                                                                                                         :next-state (:state attrs)})))))
-(def validate-task!
-  "Throws if the task is not valid"
-  (m/coercer Task nil {:registry registry}))
-
-(def validate-event!
-  "Throws if the event is not valid"
-  (m/coercer Event nil {:registry registry}))
-
-(defn validate-serializable!
-  "Throws if the object is not serializable"
-  ([obj]
-   (validate-serializable! obj "Object is not serializable"))
-  ([obj msg]
-   (when-not (serializable? obj)
-     (throw (ex-info msg {:object obj})))))
-
-
-(defn success? [{:keys [state] :as task}]
-  (= :success state))
-
-(defn failure? [{:keys [state result] :as task}]
-  (or (= :failure state)
-      (and (= :pending state)
-           (error/panic? result))))
-
-(defn terminal? [task]
-  (or (success? task)
-      (failure? task)))
\ No newline at end of file
diff --git a/src2/intemporal/store/jdbc.clj b/src2/intemporal/store/jdbc.clj
deleted file mode 100644
index 07dc171..0000000
--- a/src2/intemporal/store/jdbc.clj
+++ /dev/null
@@ -1,290 +0,0 @@
-(ns intemporal.store.jdbc
-  (:require [hikari-cp.core :as hikari]
-            [intemporal.store :as store]
-            [intemporal.workflow.internal :as i]
-            [intemporal.store.internal :as si :refer [serialize deserialize]]
-            [migratus.core :as migratus]
-            [next.jdbc :as jdbc]
-            [next.jdbc.sql.builder :as builder]
-            [next.jdbc.result-set :as rs]
-            [promesa.core :as p])
-  (:import [java.sql Timestamp]
-           [java.util Date]))
-
-(comment
-  (let [cfg {:store            :database
-             :migration-dir    "migrations/postgres"
-             :watch-polling-ms 100
-             :db               {:jdbcUrl "jdbc:postgresql://localhost:5432/root?user=root&password=root"}}]
-    (migratus/rollback cfg)
-    (migratus/migrate cfg))
-  "")
-
-;;;;
-;; utilities
-
-(defn- kw->db [kw]
-  (when kw
-    (if (keyword? kw)
-      (.substring (str kw) 1)
-      (name kw))))
-
-(defn- db->kw [v]
-  (when v (keyword v)))
-
-(defn- db->task [{:keys [id proto type ref root sym args result state lease_end runtime owner] :as task}]
-  (let [dargs    (deserialize args)
-        dresult  (deserialize result)
-        druntime (deserialize runtime)
-        ssym     (symbol sym)
-        sproto   (when proto (symbol proto))
-        kstate   (db->kw state)]
-    (cond-> (condp = type
-              "workflow" (i/create-workflow-task ref root ssym (resolve ssym) dargs id dresult kstate druntime)
-              "activity" (i/create-activity-task ref root ssym (resolve ssym) dargs id dresult kstate druntime)
-              "proto-activity" (i/create-proto-activity-task sproto ref root ssym (resolve ssym) dargs id dresult kstate druntime))
-            lease_end (assoc :lease-end lease_end)
-            owner (assoc :owner owner))))
-
-(defn- db->event [{:keys [id type ref root sym args result] :as event}]
-  (let [dargs   (deserialize args)
-        dresult (deserialize result)]
-    (assoc event
-      :type (db->kw type)
-      :ref ref
-      :root root
-      :sym (symbol sym)
-      :args dargs
-      :result dresult)))
-
-;;;;
-;; main
-
-(defn make-store
-  "Creates a new Postgres-based store."
-  [{:keys [owner migration-dir migrate? watch-polling-ms jdbcUrl]
-    :or   {owner store/default-owner migrate? true watch-polling-ms 100} :as opts}]
-  (let [db-spec      (-> opts
-                         (dissoc  :migration-dir :migrate? :watch-polling-ms)
-                         (assoc :jdbc-url jdbcUrl))
-        datasource   (hikari/make-datasource db-spec)
-        config       {:store         :database
-                      :migration-dir migration-dir
-                      :db            db-spec}
-        default-opts {:builder-fn rs/as-unqualified-lower-maps}]
-
-    (when migrate?
-      (migratus/migrate config))
-
-    (reify
-      store/InternalVarStore
-      (register [this sym var])
-      (lookup [this sym]
-        (requiring-resolve sym))
-
-      store/HistoryStore
-      (list-events [this]
-        (->> (jdbc/with-transaction [tx datasource]
-               (jdbc/execute! tx ["select * from events"] default-opts))
-             (map db->event)))
-
-      (save-event [this task-id {:keys [type ref root sym args result] :as event}]
-        (si/validate-serializable! args "Event args should be serializable")
-        (si/validate-serializable! result "Event result should be serializable")
-        (si/validate-event! (assoc event :id Integer/MAX_VALUE))
-
-        (let [args   (serialize args)
-              result (serialize result)
-              res    (jdbc/with-transaction [tx datasource]
-                       (jdbc/execute-one! tx ["INSERT INTO events(type, ref, root, sym, args, result) values (?,?,?,?,?,?) RETURNING id"
-                                              (kw->db type) ref root (str sym) args result]
-                                          default-opts))]
-          (assoc event :id (:id res))))
-
-      (all-events [this task-id]
-        (->> (jdbc/with-transaction [tx datasource]
-               (jdbc/execute! tx ["select * from events where ref=?" task-id] default-opts))
-             (map db->event)))
-
-      (clear-events [this]
-        (jdbc/with-transaction [tx datasource]
-          (jdbc/execute! tx ["delete from events"])))
-
-      store/TaskStore
-      (list-tasks [this]
-        (->> (jdbc/with-transaction [tx datasource]
-               (jdbc/execute! tx ["select * from tasks where (owner is null or owner=?)" owner] default-opts))
-             (map db->task)))
-
-      (task<-panic [this task-id error]
-        (jdbc/with-transaction [tx datasource]
-          (let [updated-task {:result (serialize error)}]
-            (jdbc/execute-one! tx (builder/for-update "tasks" updated-task {:id task-id} default-opts)))))
-
-      (task<-event [this task-id {:keys [id ref root type sym args result error] :as event-descr}]
-        ;; some redundancy between :result in task and event
-        ;; note that we save the event first, because update-task can trigger some watchers
-        ;; and they would expect the event to be present in the history
-        (jdbc/with-transaction [tx datasource]
-          (let [evt          {:ref ref :root root :type type :sym sym :args args}
-                expected-state (cond
-                                 (some? args) :new
-                                 (or (some? result) (some? error)) :pending
-                                 :else :unknown)
-                updated-task (cond
-                               (some? args) {:state (kw->db :pending) :args (serialize args)}
-                               (some? error) {:state (kw->db :failure) :result (serialize error)}
-                               :else {:state (kw->db :success) :result (serialize result)})
-                updated-evt  (cond
-                               (some? args) (assoc evt :args args)
-                               (some? error) (assoc evt :error error)
-                               :else (assoc evt :result result))]
-
-            (when-not id
-              (store/save-event this task-id updated-evt))
-            ;; cant really validate because its a partial task
-            ;(validate-task! updated-task)
-            (let [updated (jdbc/execute-one! tx (builder/for-update "tasks" updated-task {:id task-id :state (name expected-state)} default-opts))]
-              (when (empty? updated)
-                (throw (ex-info (format "Cannot update task with id %s, expected state %s did not match" id expected-state)
-                                {:task-id id :expected-state expected-state})))
-              updated-evt))))
-
-      (find-task [this id]
-        (some-> (jdbc/with-transaction [tx datasource]
-                  (jdbc/execute-one! tx ["select * from tasks where id=?" id] default-opts))
-                (db->task)))
-
-      (watch-task [this id f]
-        (let [query-state! (fn []
-                             (jdbc/with-transaction [tx datasource]
-                               (jdbc/execute-one! tx ["select state from tasks where id=?" id] default-opts)))
-              state        (query-state!)
-              watch?       (atom true)]
-          (i/libthread (format "Watcher-%s" id)
-            (while (and @watch? state)
-              (Thread/sleep (long watch-polling-ms))
-              (when (not= state (query-state!))
-                (let [task (some-> (jdbc/with-transaction [tx datasource]
-                                     (jdbc/execute-one! tx ["select * from tasks where id=?" id] default-opts))
-                                   (db->task))]
-                  (when (and task (f task))
-                    (reset! watch? false))))))))
-
-      (await-task [this id]
-        (store/await-task this id {:timeout-ms store/default-lease}))
-
-      (await-task [this id {:keys [timeout-ms] :as opts}]
-        ;; TODO use owner
-        ;; TODO use promise if available
-        (let [task        (store/find-task this id)
-              deferred    (p/deferred)
-              wrap-result (fn [{:keys [state result] :as task}]
-                            (cond
-                              (si/success? task) (p/resolved result)
-                              (si/failure? task) (p/rejected result)
-                              :else (p/rejected (ex-info "Unknown state" {:task task}))))]
-
-          (if (si/terminal? task)
-            (wrap-result task)
-            ;;else
-            (do
-              (store/watch-task this id (fn [task]
-                                          (when (si/terminal? task)
-                                            (p/resolve! deferred task)
-                                            true)))
-              ;; wait for resolution
-              (-> (p/timeout deferred timeout-ms ::timeout)
-                  (p/then (fn [resolved]
-                            (if (= ::timeout resolved)
-                              (throw (ex-info "Timeout waiting for task to be completed" {:task task}))
-                              (wrap-result resolved)))))))))
-
-      (release-pending-tasks [this]
-        (jdbc/with-transaction [tx datasource]
-          (jdbc/execute-one! tx ["update tasks set owner=null where owner=?" owner])))
-
-      (reenqueue-pending-tasks [this f]
-        (let [tasks? (jdbc/with-transaction [tx datasource]
-                       (let [tasks (jdbc/execute! tx ["select * from tasks where state='pending' and (owner is null or owner=?)" owner] default-opts)]
-                         (jdbc/execute-one! tx ["update tasks set state='new', owner=? where id = ANY(?)" owner
-                                                (into-array String (mapv :id tasks))])
-                         (doseq [row tasks]
-                           (f (db->task row)))
-                         tasks))]
-          tasks?))
-
-      (enqueue-task [this {:keys [id proto type ref root sym args result state lease-end runtime] :as task}]
-        (assert (or (nil? proto) (some? (:on proto)) "Task protocol not valid, missing :on attribute"))
-
-        (let [task+owner (assoc task :owner owner)]
-          (si/validate-serializable! args "Task args should be serializable")
-          (si/validate-serializable! result "Task result should be serializable")
-          (si/validate-serializable! runtime "Task runtime should be serializable")
-          (si/validate-task! task+owner)
-
-          (let [proto?  (cond (symbol? proto) (str proto)
-                              (some? (:on proto)) (str (:on proto))
-                              (string? proto) proto)
-                args    (serialize args)
-                result  (serialize result)
-                runtime (serialize runtime)]
-            (jdbc/with-transaction [tx datasource]
-              (jdbc/execute! tx ["INSERT INTO tasks(id,owner,proto,type,ref,root,sym,args,result,state,lease_end,runtime) values (?,?,?,?,?,?,?,?,?,?,?,?) RETURNING id"
-                                 id owner proto? (kw->db type) (kw->db ref) (kw->db root) (str sym) args result (kw->db state) lease-end runtime])))
-          task+owner))
-
-      (dequeue-task [this]
-        (store/dequeue-task this {:lease-ms nil}))
-
-      (dequeue-task [this {:keys [lease-ms]}]
-        ;; TODO check owner
-        ;; TODO select for update skip locked
-        (let [query  "select * from tasks where (owner=? or owner is null) and (state='new' or lease_end < now()) order by id asc limit 1"
-              found? (jdbc/with-transaction [tx datasource]
-                       (when-let [task (some-> (jdbc/execute-one! tx [query owner] default-opts)
-                                               (db->task))]
-                         (let [lease-epoch (when lease-ms
-                                             (* 1000 (+ (store/now) lease-ms)))
-                               lease-ts    (when lease-epoch
-                                             (-> (Date. (long lease-epoch))
-                                                 (.toInstant)
-                                                 (Timestamp/from)))]
-                           (jdbc/execute-one! tx ["update tasks set state='pending', lease_end=? where id=?" lease-ts (:id task)])
-                           (assoc task
-                             :state :pending
-                             :fvar (store/sym->var this task)
-                             :lease-end (when lease-epoch
-                                          (/ lease-epoch 1000))))))]
-          found?))
-
-      (clear-tasks [this]
-        (jdbc/with-transaction [tx datasource]
-          (jdbc/execute! tx ["delete from tasks"]))))))
-
-#_:clj-kondo/ignore
-(comment
-  (require '[intemporal.workflow.internal :as i])
-  (defprotocol TestProto
-    (a [this] "x"))
-  (def s (make-store {:jdbcUrl       "jdbc:postgresql://localhost:5432/root?user=root&password=root"
-                      :migration-dir "migrations/postgres"}))
-
-  (def t (i/create-workflow-task nil nil 'clojure.core/+ (var-get #'+) [{:a 1}]))
-  (def t2 (i/create-proto-activity-task TestProto (:id t) (:id t) 'clojure.core/+ (var-get #'+) [{:b 2 :c #inst "2011-01-01"}]))
-
-  (store/clear-events s)
-  (store/clear-tasks s)
-
-  (store/enqueue-task s t)
-  (store/enqueue-task s t2)
-
-  (store/list-tasks s)
-
-  (store/watch-task s (:id t) (fn [task] (println "CALLBACK" task)))
-
-  (prn (store/dequeue-task s))
-  (store/save-event s 1 {:type :intemporal.workflow/invoke
-                         :ref  (:id t) :root (:id t) :sym 'clojure.core/+ :args [] :result [1]})
-
-  (store/list-events s))
diff --git a/src2/intemporal/workflow.cljc b/src2/intemporal/workflow.cljc
deleted file mode 100644
index 1d05a24..0000000
--- a/src2/intemporal/workflow.cljc
+++ /dev/null
@@ -1,222 +0,0 @@
-(ns intemporal.workflow
-  (:require [intemporal.store :as store]
-            [intemporal.workflow.internal :as internal]
-            [intemporal.error :as error]
-            [promesa.core :as p]
-            [taoensso.telemere :as t])
-  #?(:cljs (:require-macros
-             #_:clj-kondo/ignore
-             [intemporal.workflow.internal :refer [with-env-internal trace! trace-async!]]
-             [intemporal.workflow :refer [with-env]]))
-  #?(:clj (:require [intemporal.error :as error]
-                    [intemporal.workflow.internal :refer [trace! trace-async!]]
-                    [steffan-westcott.clj-otel.context :as otctx]))
-  #?(:clj (:import [java.util.concurrent Executors TimeUnit]
-                   [java.lang AutoCloseable])))
-
-#?(:clj (set! *warn-on-reflection* true))
-
-;;;;
-;; runtime
-
-(defmacro with-env
-  "Creates a new environment for workflow execution. Options:
-  - `:store`: the underlying store to persist workflow metadata
-  - `:id`: optional workflow id
-  - `:timeout-ms`: optional timeout for workflow execution
-  "
-  [m & body]
-  `(internal/with-env-internal ~m (do ~@body)))
-
-(defn current-env
-  "Returns the workflow execution environment for the current thread"
-  []
-  (assert (some? internal/*env*) "No workflow env detected, should only be called within a workflow function")
-  internal/*env*)
-
-(defn workflow-id
-  "Returns the current workflow uuid"
-  []
-  (assert (some? internal/*env*) "No workflow env detected, should only be called within a workflow function")
-  (-> internal/*env* :root))
-
-;;;;
-;; worker
-(defprotocol ITaskExecutor
-  (submit [this f] "Submits the function `f` for execution")
-  (shutdown [this grace-period-ms] "Shuts down the task executor")
-  (terminated? [this] "Indicates if the executor has terminated")
-  (shutting-down? [this] "Indicates if the executor has entered shutdown state"))
-
-(defn make-task-executor
-  "Creates an object that satisfies `ITaskExecutor`."
-  []
-  (let [terminated? (atom false)
-        shutdown?   (atom false)]
-    #?(:cljs
-       (reify ITaskExecutor
-         (submit [_ f]
-           (when (not @terminated?)
-             (p/vthread (f))))
-         (shutdown [_ grace-period-ms]
-           (t/log! {:level :debug} ["Executor shutdown"])
-           (reset! terminated? true)
-           (reset! shutdown? true))
-         (terminated? [_] @terminated?)
-         (shutting-down? [_] @shutdown?))
-       :clj
-       (let [factory (-> (Thread/ofVirtual)
-                         (.name "Task Thread")
-                         (.factory))
-             exec    (Executors/newThreadPerTaskExecutor factory)]
-         (reify
-           ITaskExecutor
-           (submit [_ f]
-             (.submit exec ^Runnable f))
-           (shutdown [_ grace-period-ms]
-             (try
-               ;; reject tasks
-               (.shutdown exec)
-               (reset! shutdown? true)
-               (t/log! {:level :debug} ["Executor shutdown"])
-               ;; await ongoing tasks
-               (when-not (.awaitTermination exec grace-period-ms TimeUnit/MILLISECONDS)
-                 (t/log! {:level :debug} ["Executor shutdown grace period over, shutting down NOW"])
-                 (.shutdownNow exec))
-               ;; in case we got interrupted exception, make sure to set the flag
-               ;; so ongoing ops fail
-               (finally
-                 (reset! terminated? true))))
-           (terminated? [_]
-             @terminated?)
-           (shutting-down? [_]
-             @shutdown?)
-           ;; allow expressions like (with-open [executor (w/start-poller ....
-           AutoCloseable
-           (close [this]
-             (shutdown this 0)))))))
-
-(defn- worker-execute-fn
-  "Executes a given protocol, activity or workflow `task`"
-  [store protocols {:keys [type id root runtime fvar] :as task} task-counter terminated? shutting-down?]
-  (let [runtime      (:runtime task)
-        base-env     {:store        store
-                      :type         type
-                      :ref          id
-                      :id           id
-                      :root         (or root id)
-                      :protos       protocols
-                      :next-id      (fn [] (str (or root id) "-" (swap! task-counter inc)))
-                      :terminated?  terminated?
-                      :shutdown?    shutting-down?}
-        internal-env (merge internal/default-env base-env runtime)]
-    ;; root task: we only enqueue workflows
-    (with-env internal-env
-      (t/log! {:level :debug :data {:sym (:sym task)}} ["Resuming task with id" (:id task)])
-      ;; this span creation is required in order for
-      ;; subsequent workflow traces to have a "parent" span, otherwise
-      ;; they won't show up correctly in jaeger
-      ;; TODO test with eg loki
-      (trace-async! {:name "worker: worker-execute-fn" :attributes {:task-id (:id task)}}
-        #?(:cljs (internal/resume-task internal-env store protocols task)
-           :clj (otctx/bind-context! (otctx/headers->merged-context (:telemetry-context runtime))
-                  (internal/resume-task internal-env store protocols task)))))))
-
-(defn- worker-poll-fn
-  "Continously polls for task while `task-executor` is active."
-  [store protocols task-executor polling-ms]
-  (let [task-counter (atom 0)
-        stopped? (fn [] (terminated? task-executor))
-        shutdown?   (fn [] (shutting-down? task-executor))]
-    #_{:clj-kondo/ignore [:loop-without-recur :invalid-arity]}
-    @(p/loop []
-             (-> (p/delay polling-ms)
-                 (p/chain (fn [_]
-                            (loop []
-                              ;(t/log! {:level :debug} ["Polling for tasks..."])
-                              ;; TODO add another check for shutting-down?
-                              (when-let [task (and
-                                                (not (shutting-down? task-executor))
-                                                (store/dequeue-task store))]
-                                (t/log! {:level :debug :_data {:task task}} ["Dequeued task with id" (:id task)])
-                                (try
-                                  (submit task-executor (fn []
-                                                          (worker-execute-fn store protocols task task-counter stopped? shutdown?)))
-                                  (catch  #?(:clj Exception :cljs js/Error) e
-                                    ;; dequeued updated the state atomically (so other txs dont do the same)
-                                    ;; but if the executor stopped in the meantime we need to revert the task's state
-                                    (when (error/rejected? e)
-                                      (t/log! {:level :warn} ["Task execution rejected, reverting state to :new"])
-                                      (store/enqueue-task store (assoc task :state :new)))))
-                                (when-not (stopped?)
-                                  (recur))))
-                            (when-not (stopped?)
-                              (p/recur))))
-                 (p/catch (fn [e]
-                            (t/log! {:level :warn :data {:exception e}} ["Caught error during task polling, continuing"])
-                            (when-not (stopped?)
-                              (p/recur))))))))
-
-(defn start-poller!
-  "Starts a poller that will submit tasks to the `task-executor`.
-  Protocol implementations are resolved via a map of `:protocols {my.ns Impl}`
-  Returns an `ITaskExecutor` that can be shutdown.
-  For clj runtimes, task-executor should be `(Executors/newVirtualThreadPerTaskExecutor)`, as
-  each execution will be blocked while they await for a given task dependencie's execution."
-  ([store {:keys [protocols polling-ms] :or {protocols {} polling-ms 100} :as opts}]
-   (start-poller! store (make-task-executor) opts))
-  ([store task-executor & {:keys [protocols polling-ms] :or {protocols {} polling-ms 100}}]
-   (assert (satisfies? ITaskExecutor task-executor) "Supplied task executor does not satisfy ITaskExecutor")
-   ;; start poller in a out-of-executor thread so it doesnt prevent the executor from shutting down
-   ;; the only way to stop the poller is via shutdown
-   (p/vthread
-     (worker-poll-fn store protocols task-executor polling-ms))
-   task-executor))
-
-(defn start-worker!
-  "Starts a single worker thread that periodically polls for tasks and executes them in a
-  separate thread. Mostly used for testing purposes."
-  ([store]
-   (start-worker! store {}))
-  ([store & {:keys [protocols polling-ms] :or {protocols {} polling-ms 100}}]
-   (let [run?         (atom true)
-         task-counter (atom 0)]
-     (internal/libthread "Worker"
-       #_{:clj-kondo/ignore [:loop-without-recur :invalid-arity]}
-       @(p/loop []
-          (-> (p/delay polling-ms)
-              (p/chain (fn [_]
-                         (when-let [task (store/dequeue-task store)]
-                           (t/log! {:level :debug :data {:sym (:sym task)}} ["Dequeued task with id" (:id task)])
-                           (internal/libthread (str "Worker-" (:id task))
-                             (worker-execute-fn store protocols task task-counter
-                                                (fn [] (not @run?))
-                                                (fn [] (not @run?)))))
-
-                         (when @run?
-                           (p/recur)))))))
-     (fn []
-       (t/log! {:level :info} ["Stopping worker"])
-       (reset! run? false)))))
-
-(defn enqueue-and-wait
-  "Adds the task to the internal queue, awaits for its execution.
-  Task might be fulfilled by other threads"
-  [{:keys [store] :as opts} task]
-  (t/log! {:level :debug :data {:sym (:sym task)}} ["Enqueuing task with id" (:id task)])
-  (internal/enqueue-and-wait opts task))
-
-(defn add-compensation
-  "Adds a compensation action to the current workflow."
-  [thunk]
-  (assert (ifn? thunk) "Compensation action should implement IFn")
-  (swap! (:compensations internal/*env*) conj thunk))
-
-(defn compensate
-  "Runs compensation in program order. A failure of the compensation action will stop running other compensations."
-  []
-  (let [thunks (-> internal/*env* :compensations)]
-    (trace! {:name "compensations" :attributes {:fn-count (count @thunks)}}
-            (doseq [f @thunks]
-              (swap! thunks pop)
-              (f)))))
\ No newline at end of file
diff --git a/src2/intemporal/workflow/internal.cljc b/src2/intemporal/workflow/internal.cljc
deleted file mode 100644
index 28f8009..0000000
--- a/src2/intemporal/workflow/internal.cljc
+++ /dev/null
@@ -1,362 +0,0 @@
-(ns ^:private intemporal.workflow.internal
-  "Private namespace for workflow support."
-  (:require [intemporal.store :as store]
-            [intemporal.error :as error]
-            [promesa.core :as p]
-            [taoensso.telemere :as t])
-  #?(:clj (:require [steffan-westcott.clj-otel.context :as otctx]
-                    [steffan-westcott.clj-otel.api.trace.span :as otspan]
-                    [net.cgrand.macrovich :as macros]
-                    [intemporal.store :refer [bfn]]))
-  #?(:cljs (:require-macros
-             [net.cgrand.macrovich :as macros]
-             [intemporal.workflow.internal :refer [trace! trace-async!]]
-             [intemporal.store :refer [bfn]]))
-  #?(:clj (:import [java.util.concurrent CompletableFuture])))
-
-#?(:clj (set! *warn-on-reflection* true))
-
-;;;;
-;; utils
-
-(defmacro libthread
-  "Creates a thread for internal usage. Client code should not rely on this.
-   Returns a promise."
-  [label & body]
-  `(p/vthread ~@body))
-
-;;;;
-;; runtime
-
-(def ^:dynamic *env* nil)
-(def default-env {:compensations (atom '())
-                  :timeout-ms    (* 15 60 1000)})
-
-(defn- env->runtime
-  "Derives the `runtime` attrs from the current env."
-  []
-  (select-keys *env* [:timeout-ms :telemetry-context]))
-
-(defn random-id
-  "Generates a random id. if env var `DEV` is defined, generates a two-word human-readable id."
-  []
-  ;; debugging purposes only
-  ;; https://github.com/moby/moby/blob/master/pkg/namesgenerator/names-generator.go
-  ;; TODO use https://github.com/adzerk-oss/env ?
-  (if #?(:clj  (= "true" (System/getenv "DEV"))
-         :cljs false)
-    (let [left  ["admiring" "adoring" "affectionate" "agitated" "amazing" "angry" "awesome" "beautiful" "blissful" "bold" "boring" "brave" "busy" "charming" "clever" "compassionate" "competent" "condescending" "confident" "cool" "cranky" "crazy" "dazzling" "determined" "distracted" "dreamy" "eager" "ecstatic" "elastic" "elated" "elegant" "eloquent" "epic" "exciting" "fervent" "festive" "flamboyant" "focused" "friendly" "frosty" "funny" "gallant" "gifted" "goofy" "gracious" "great" "happy" "hardcore" "heuristic" "hopeful" "hungry" "infallible" "inspiring" "intelligent" "interesting" "jolly" "jovial" "keen" "kind" "laughing" "loving" "lucid" "magical" "modest" "musing" "mystifying" "naughty" "nervous" "nice" "nifty" "nostalgic" "objective" "optimistic" "peaceful" "pedantic" "pensive" "practical" "priceless" "quirky" "quizzical" "recursing" "relaxed" "reverent" "romantic" "sad" "serene" "sharp" "silly" "sleepy" "stoic" "strange" "stupefied" "suspicious" "sweet" "tender" "thirsty" "trusting" "unruffled" "upbeat" "vibrant" "vigilant" "vigorous" "wizardly" "wonderful" "xenodochial" "youthful" "zealous" "zen"]
-          right ["agnesi" "albattani" "allen" "almeida" "antonelli" "archimedes" "ardinghelli" "aryabhata" "austin" "babbage" "banach" "banzai" "bardeen" "bartik" "bassi" "beaver" "bell" "benz" "bhabha" "bhaskara" "black" "blackburn" "blackwell" "bohr" "booth" "borg" "bose" "bouman" "boyd" "brahmagupta" "brattain" "brown" "buck" "burnell" "cannon" "carson" "cartwright" "carver" "cerf" "chandrasekhar" "chaplygin" "chatelet" "chatterjee" "chaum" "chebyshev" "clarke" "cohen" "colden" "cori" "cray" "curie" "curran" "darwin" "davinci" "dewdney" "dhawan" "diffie" "dijkstra" "dirac" "driscoll" "dubinsky" "easley" "edison" "einstein" "elbakyan" "elgamal" "elion" "ellis" "engelbart" "euclid" "euler" "faraday" "feistel" "fermat" "fermi" "feynman" "franklin" "gagarin" "galileo" "galois" "ganguly" "gates" "gauss" "germain" "goldberg" "goldstine" "goldwasser" "golick" "goodall" "gould" "greider" "grothendieck" "haibt" "hamilton" "haslett" "hawking" "heisenberg" "hellman" "hermann" "herschel" "hertz" "heyrovsky" "hodgkin" "hofstadter" "hoover" "hopper" "hugle" "hypatia" "ishizaka" "jackson" "jang" "jemison" "jennings" "jepsen" "johnson" "joliot" "jones" "kalam" "kapitsa" "kare" "keldysh" "keller" "kepler" "khayyam" "khorana" "kilby" "kirch" "knuth" "kowalevski" "lalande" "lamarr" "lamport" "leakey" "leavitt" "lederberg" "lehmann" "lewin" "lichterman" "liskov" "lovelace" "lumiere" "mahavira" "margulis" "matsumoto" "maxwell" "mayer" "mccarthy" "mcclintock" "mclaren" "mclean" "mcnulty" "meitner" "mendel" "mendeleev" "meninsky" "merkle" "mestorf" "mirzakhani" "montalcini" "moore" "morse" "moser" "murdock" "napier" "nash" "neumann" "newton" "nightingale" "nobel" "noether" "northcutt" "noyce" "panini" "pare" "pascal" "pasteur" "payne" "perlman" "pike" "poincare" "poitras" "proskuriakova" "ptolemy" "raman" "ramanujan" "rhodes" "ride" "ritchie" "robinson" "roentgen" "rosalind" "rubin" "saha" "sammet" "sanderson" "satoshi" "shamir" "shannon" "shaw" "shirley" "shockley" "shtern" "sinoussi" "snyder" "solomon" "spence" "stonebraker" "sutherland" "swanson" "swartz" "swirles" "taussig" "tesla" "tharp" "thompson" "torvalds" "tu" "turing" "varahamihira" "vaughan" "villani" "visvesvaraya" "volhard" "wescoff" "wilbur" "wiles" "williams" "williamson" "wilson" "wing" "wozniak" "wright" "wu" "yalow" "yonath" "zhukovsky"]]
-      (str (rand-nth left) "-" (rand-nth right)))
-    (str (random-uuid))))
-
-(defmacro with-env-internal
-  "Merges `m` on top of the current internal environment, then runs `body` with the new environment."
-  [m & body]
-  `(binding [*env* (merge default-env ~m)]
-     (do ~@body)))
-
-;;;;
-;; telemetry
-
-(defn ->telemetry-context []
-  #?(:clj (otctx/->headers)
-     :cljs {}))
-
-(defmacro trace!
-  "Wraps body in a tracing context. "
-  [{:keys [name attributes] :as attrs} & body]
-  (macros/case
-    ;; cljs: no telemetry
-    :cljs `(do ~@body)
-    :clj `(let [attrs# (do ~attrs)]
-            (otspan/with-span! attrs#
-              (with-env-internal (merge *env* {:telemetry-context (->telemetry-context)})
-                (let [res# (do ~@body)]
-                  res#))))))
-
-(defmacro trace-async!
-  "Wraps body in a tracing context. "
-  [{:keys [name attributes] :as attrs} & body]
-  (macros/case
-    ;; cljs: no telemetry
-    :cljs `(do ~@body)
-    :clj `(let [attrs# (do ~attrs)
-                span# (otspan/new-span! attrs#)]
-            ;(otspan/async-bound-cf-span attrs#)
-            ;(with-env-internal (merge *env* {:telemetry-context (->telemetry-context)}))
-            (let [res# (do ~@body)]
-              (.whenComplete ^CompletableFuture res#
-                             (fn [t# e#]
-                               (when e# (otspan/add-exception! {:context span#} e#))
-                               (otspan/end-span! {:context span#})))))))
-
-(defn add-event!
-  ([task ename attrs]
-   #?(:clj (when-let [ctx (-> task :runtime :telemetry-context)]
-             (otctx/with-context! (otctx/headers->merged-context ctx)
-               (add-event! ename attrs)))))
-  ([ename attrs]
-   #?(:clj (otspan/add-event! ename attrs))))
-
-;;;;
-;; task definitions
-
-;;;;
-;; Tasks
-
-(defn create-workflow-task
-  ([ref root sym fvar args id]
-   (create-workflow-task ref root sym fvar args id nil :new nil))
-  ([ref root sym fvar args id result state runtime]
-   (let [runtime (or runtime (env->runtime))]
-     {:type    :workflow :id id :ref ref :root root :sym sym :fvar fvar :args args :result result :state state
-      :runtime runtime})))
-
-(defn create-activity-task
-  ([ref root sym fvar args id]
-   (create-activity-task ref root sym fvar args id nil :new nil))
-  ([ref root sym fvar args id result state runtime]
-   (let [runtime (or runtime (env->runtime))]
-     {:type    :activity :id id :ref ref :root root :sym sym :fvar fvar :args args :result result :state state
-      :runtime runtime})))
-
-(defn create-proto-activity-task
-  ([proto ref root sym fvar args id]
-   (create-proto-activity-task proto ref root sym fvar args id nil :new nil))
-  ([proto ref root sym fvar args id result state runtime]
-   (let [runtime (or runtime (env->runtime))]
-     {:type    :proto-activity :proto proto :id id :ref ref :root root :sym sym :fvar fvar :args args :result result :state state
-      :runtime runtime})))
-
-(defn event-matches? [{t :type s :sym} {t2 :type s2 :sym}]
-  (and (= t t2) (= s s2)))
-
-;;;;
-;; traced store fns
-
-(defn- all-events [store id]
-  (add-event! ::store/all-events {:task-id id})
-  (store/all-events store id))
-
-(defn- task<-event [store task-id event-descr]
-  ;; TODO patch this to use a compare-and-swap
-  ;; must send the expected state as arg
-  (add-event! (:type event-descr) {:task-id task-id})
-  (store/task<-event store task-id event-descr))
-
-(defn- task<-panic [store task-id error]
-  (add-event! ::store/task<-panic {:task-id task-id})
-  (store/task<-panic store task-id error))
-
-(defn- find-task [store task-id]
-  (add-event! ::store/find-task {:task-id task-id})
-  (store/find-task store task-id))
-
-(defn- enqueue-task [store task]
-  (add-event! ::store/enqueue-task {:task-id (:id task)})
-  (store/enqueue-task store task))
-
-(defn- await-task [store task-id opts]
-  (add-event! ::store/await-task {:task-id task-id})
-  (store/await-task store task-id opts))
-
-;;;;
-;; task execution/replay
-
-(defn resume-fn-task
-  "Resumes a generic fn call task"
-  [{:keys [vthread? terminated? shutdown?] :as env} store protos {:keys [type proto id root sym fvar args] :as task} [invoke success failure]]
-  (when (and (= :proto-activity type)
-             (nil? (get protos proto)))
-    (throw (ex-info (str "Protocol implementation for "
-                         (pr-str proto)
-                         " not found; available protocols:"
-                         (pr-str protos)
-                         ". Make sure to pass `:protocols` key when starting poller or worker ")
-                    {::type :internal
-                     :protocols protos
-                     :required proto})))
-  ;; do we have invocation and result events for this task?
-  (t/log! {:level :debug :sym sym} ["Resuming try/catch task with id" id])
-
-  (try
-    (let [terminated? (fn [] (and (ifn? terminated?) (terminated?)))
-          [inv? res?] (all-events store id)]
-
-      ;; mark invoke/replay
-      (let [next-event {:ref id :root (or root id) :type invoke :sym sym :args args}]
-        (when inv?
-          (t/log! {:level :debug :data {:sym (:sym task)}} ["Found replay event for task with id" (:id task)]))
-        (when res?
-          (t/log! {:level :debug :data {:sym (:sym task)}} ["Found result event for task with id" (:id task)]))
-
-        (cond
-          ;; do we have an invocation event? if not, save this one
-          (not inv?)
-          (task<-event store id next-event)
-
-          ;; we do have an invocation event, is it a match of the above?
-          (not (event-matches? inv? next-event))
-          (throw (error/internal-error "Transition unexpected" {:got      (:type inv?)
-                                                                :expected invoke}))))
-
-      ;; mark success/failure or replay
-      (let [next-event   {:ref id :root (or root id) :type success :sym sym}
-            next-failure (assoc next-event :type failure)
-            handle-ok    (bfn [r]
-                           ;; TODO assert r is serializable!
-                           ;; we check for shutdown because in js runtime, there is no thread interruption
-                           ;; at this point, if we are shutting down it means we exhausted the grace period
-                           (let [panic? (terminated?)]
-                             (try
-                               (if panic?
-                                 (task<-panic store id (error/panic "Worker shutting down during invocation result handling"))
-                                 (let [new-event (assoc next-event :result r)]
-                                   #?(:clj (otspan/add-span-data! {:attributes {:replayed false :result r}}))
-                                   (task<-event store id new-event)
-                                   r))
-                               (finally
-                                 (if panic?
-                                   (t/log! {:level :debug :data {:sym sym :result r}} ["Shutting down, interrupted result" id])
-                                   (t/log! {:level :debug :data {:sym sym :result r}} ["Got actual function result for task" id]))))))
-            handle-fail  (bfn [e]
-                           (cond
-                             ;; if its a java.lang.InterruptedException it means
-                             ;; we killed the executor
-                             ;; - we must leave the task pending (assuming its idempotent)
-                             (error/interrupted? e)
-                             (t/log! {:level :debug :data {:sym sym}} ["InterruptedException caught during actual function invocation for task" id])
-
-                             ;; executor has terminated, it means we exhausted the graceful shutdown period
-                             ;; panic the task
-                             (terminated?)
-                             (do
-                               (t/log! {:level :warn :data {:exception e}} ["Exception caught during shutdown, panicking task"])
-                               (task<-panic store id (error/panic "Worker shutting down during invocation failure handling")))
-
-                             ;; regular task failure
-                             :else
-                             (do
-                               (t/log! {:level :debug :data {:sym sym :exception e}} ["Exception caught during actual function invocation for task" id])
-                               (task<-event store id (cond-> (assoc next-failure :error e)
-                                                             (error/internal-error? e) (assoc :type ::failure)))))
-                           ;; finally, return error
-                           (p/rejected e))
-            retval       (cond
-                           ;; are we replaying a result?
-                           (some? res?)
-                           (let [success? (contains? res? :result)
-                                 retval   (if success? (:result res?) (:error? res?))
-                                 ;; we need to ensure replay events return the same type
-                                 ;; as if they were called via a vthread
-                                 wrapped (if vthread?
-                                           (p/vthread retval)
-                                           retval)]
-                             #?(:clj (otspan/add-span-data! {:attributes {:replayed true :result retval}}))
-                             (task<-event store id res?)
-                             (if success?
-                               (p/resolved wrapped)
-                               (p/rejected wrapped)))
-
-                           ;; no replay, lets do the actual call
-                           (not res?)
-                           (-> (let [;; if we're calling a prototype, we need to prepend the
-                                     ;; prot impl and then its args
-                                     args' (if (= :proto-activity type)
-                                             (cons (get protos proto) args)
-                                             args)
-                                     ;; this is the result
-                                     r     (binding [*env* (merge default-env env)]
-                                             (t/log! {:level :debug :data {:sym sym :args args'}} ["Calling actual function for task" id])
-                                             ;; vthread calls are special because we only want to process its
-                                             ;; result when deref is called, to ensure determinism:
-                                             ;; - first we must save all events
-                                             ;; - then we can process the underlying impl call
-                                             (if vthread?
-                                               (let [inner (p/create (fn [res rej]
-                                                                       (-> (p/vthread ;TODO: user thread
-                                                                             (binding [*env* (-> env
-                                                                                                 (dissoc :vhtread?)
-                                                                                                 (assoc :telemetry-context (->telemetry-context)))]
-                                                                               ;(trace! {:id sym})
-                                                                               #?(:clj (otctx/bind-context! (otctx/headers->merged-context (:telemetry-context env))
-                                                                                         (apply fvar args'))
-                                                                                  :cljs (apply fvar args'))))
-                                                                           (p/then res)
-                                                                           (p/catch rej))))]
-                                                 ;; in cljs we dont need delay bc its single threaded
-                                                 ;; in clj, the delayed value will be deref'd
-                                                 ;; but at this point we ensure that any other eg vthread calls have been saved in history
-                                                 (#?(:cljs do :clj delay)
-                                                   (-> inner
-                                                       (p/then handle-ok)
-                                                       (p/catch handle-fail))))
-                                               ;; ensure handle-fail always has a chance to catch any fvar
-                                               ;; exceptions
-                                               (-> nil
-                                                   (p/then (fn [_] (binding [*env* env]
-                                                                     ;(trace! {:id sym})
-                                                                     #?(:clj (otctx/bind-context! (otctx/headers->merged-context (:telemetry-context env))
-                                                                               (apply fvar args'))
-                                                                        :cljs (apply fvar args')))))
-                                                   (p/then' handle-ok)
-                                                   (p/catch handle-fail))))]
-                                 ;; r can be a value or a promise
-                                 r))
-
-                           (not (or (event-matches? res? next-event) ;; replay success
-                                    (event-matches? res? next-failure))) ;; replay failure
-                           (throw (error/internal-error "Transition unexpected" {:got      (:type res?)
-                                                                                 :expected [success failure]})))]
-        (t/log! {:level :debug :data {:sym sym :retval retval}} ["Finished internal execution for task" id])
-        ;; if userland called a vthread, retval will be delayed
-        retval))
-    ;; ensure we terminate the fn call, even if the next event wouldnt be the expected type
-    (catch #?(:clj Exception :cljs js/Error) e
-      ;; TODO at this point we should just panic, "userland" exceptions should be handled in the handle-fail
-      ;; on theory there is no other way for a user exception to bubble out
-      (let [wrapped (ex-info "Internal error while resuming execution" {::type :internal} e)]
-        (task<-event store id {:ref id :root (or root id) :type ::failure :sym sym :error wrapped}))
-      (p/rejected e))))
-
-#?(:clj (ns-unmap *ns* 'resume-task))
-(defmulti resume-task
-          "Continues a task that has been queued for execution. Replays events if they exist."
-          (fn [env store protos task]
-            (:type task)))
-
-(defmethod resume-task :workflow
-  [env store protos {:keys [id root sym fvar args] :as task}]
-  (resume-fn-task env store protos task [:intemporal.workflow/invoke :intemporal.workflow/success :intemporal.workflow/failure]))
-
-(defmethod resume-task :activity
-  [env store protos {:keys [id root sym fvar args] :as task}]
-  (resume-fn-task env store protos task [:intemporal.activity/invoke :intemporal.activity/success :intemporal.activity/failure]))
-
-(defmethod resume-task :proto-activity
-  [env store protos {:keys [id root sym fvar args] :as task}]
-  (resume-fn-task env store protos task [:intemporal.protocol/invoke :intemporal.protocol/success :intemporal.protocol/failure]))
-
-(defn enqueue-and-wait
-  "Enqueues `task` onto the store and awaits its execution.
-  If the exact task is alread present (eg we are resuming a crashed workflow),
-  the existing task will be awaited instead."
-  [{:keys [store] :as opts} {:keys [id] :as task}]
-  ;; because execution engine is supposed to be deterministic,
-  ;; we can safely assume that if an identic task exists at this point
-  ;; we are replaying some events
-  (assert (some? store) "Store should exist")
-  (assert (some? task) "Task should exist")
-
-  ;; TODO trace if we pick the task from the db?
-  ;; the db task should have a telemetry context already no?
-  ;; (trace! {:name (format "workflow: %s" orig#) :attributes {:task-id id#}}
-  (let [db-task (or (find-task store id)
-                    (enqueue-task store task))
-
-        _       (add-event! :intemporal.workflow.internal.enqueue-and-wait/db-task {})
-        prom    (await-task store (:id db-task) opts)]
-
-    #?(:clj (deref prom)
-       :cljs prom)))
diff --git a/test/intemporal/jepsen/checker.clj b/test/intemporal/jepsen/checker.clj
index ff7a5a2..aaaa2e0 100644
--- a/test/intemporal/jepsen/checker.clj
+++ b/test/intemporal/jepsen/checker.clj
@@ -22,9 +22,11 @@
     checker 3 (history-integrity)     -> FAIL  (if concurrent-start ran)
     checker 4 (cancellation-liveness) -> FAIL  (cancelled sleepers never wake)"
   (:require [next.jdbc :as jdbc]
+            [next.jdbc.result-set :as rs]
+            [clojure.string :as str]
             [taoensso.telemere :as log]))
 
-(def ^:private jdbc-opts {:builder-fn next.jdbc.result-set/as-unqualified-maps})
+(def ^:private jdbc-opts {:builder-fn rs/as-unqualified-maps})
 
 ;; ---------------------------------------------------------------------------
 ;; Helper: submitted workflow-ids from history
@@ -45,14 +47,6 @@
        (keep #(get-in % [:value :workflow-id]))
        set))
 
-(defn- signalled-ids
-  "Set of workflow-ids for which a signal op succeeded."
-  [history]
-  (->> @history
-       (filter #(and (= :signal (:f %)) (= :ok (:type %))))
-       (keep #(get-in % [:value :workflow-id]))
-       set))
-
 (defn- concurrent-start-ids
   "Set of workflow-ids from concurrent-start ops."
   [history]
@@ -74,7 +68,7 @@
   (let [ids (submitted-ids history)]
     (if (empty? ids)
       {:valid? true :violations [] :stats {:submitted 0}}
-      (let [in-clause (clojure.string/join "," (repeat (count ids) "?"))
+      (let [in-clause (str/join "," (repeat (count ids) "?"))
             stuck (jdbc/execute! db-spec
                     (into [(str "SELECT w.id,
                                         w.cancelled,
@@ -158,7 +152,7 @@
   (let [cs-ids (concurrent-start-ids history)]
     (if (empty? cs-ids)
       {:valid? true :violations [] :stats {:concurrent-start-workflows 0}}
-      (let [in-clause (clojure.string/join "," (repeat (count cs-ids) "?"))
+      (let [in-clause (str/join "," (repeat (count cs-ids) "?"))
             ;; Look for evidence of the silent overwrite: seq=0 with the
             ;; sentinel event_type means the second writer clobbered the first.
             corrupted (jdbc/execute! db-spec
@@ -203,7 +197,7 @@
   (let [c-ids (cancelled-ids history)]
     (if (empty? c-ids)
       {:valid? true :violations [] :stats {:cancelled-submitted 0}}
-      (let [in-clause (clojure.string/join "," (repeat (count c-ids) "?"))
+      (let [in-clause (str/join "," (repeat (count c-ids) "?"))
             stuck (jdbc/execute! db-spec
                     (into [(str "SELECT w.id,
                                         h.event_type AS last_event
diff --git a/test/intemporal/jepsen/client.clj b/test/intemporal/jepsen/client.clj
index 9d5d2d9..462c656 100644
--- a/test/intemporal/jepsen/client.clj
+++ b/test/intemporal/jepsen/client.clj
@@ -14,10 +14,7 @@
 
   History entries are plain EDN maps compatible with jepsen.history format:
     {:process <int> :type (:ok|:fail|:info) :f <op> :value {...} :time <ms>}"
-  (:require [intemporal.protocol :as p]
-            [intemporal.store.jdbc :as jdbc-store]
-            [next.jdbc :as jdbc]
-            [taoensso.telemere :as log]))
+  (:require [next.jdbc :as jdbc]))
 
 (defn now-ms [] (System/currentTimeMillis))
 
diff --git a/test/intemporal/jepsen/nemesis.clj b/test/intemporal/jepsen/nemesis.clj
index 1bb1d73..94295c6 100644
--- a/test/intemporal/jepsen/nemesis.clj
+++ b/test/intemporal/jepsen/nemesis.clj
@@ -9,6 +9,7 @@
   owning worker is gone."
   (:require [intemporal.jepsen.db :as db]
             [next.jdbc :as jdbc]
+            [clojure.string :as str]
             [taoensso.telemere :as log]))
 
 (defn- pick-victim
@@ -95,7 +96,7 @@
                                 AND completed = FALSE
                                 AND wf_type IN ('signal-wait','cancel-sleep','rapid-signal')
                                 AND claimed_by IN ("
-                             (clojure.string/join "," (repeat (count dead-owners) "?"))
+                             (str/join "," (repeat (count dead-owners) "?"))
                              ")")
                          test-run]
                        dead-owners)))]
diff --git a/test/intemporal/jepsen/runner.clj b/test/intemporal/jepsen/runner.clj
index c66132c..34a8685 100644
--- a/test/intemporal/jepsen/runner.clj
+++ b/test/intemporal/jepsen/runner.clj
@@ -30,7 +30,6 @@
             [intemporal.jepsen.client  :as client]
             [intemporal.jepsen.nemesis :as nemesis]
             [intemporal.jepsen.checker :as checker]
-            [next.jdbc :as jdbc]
             [clojure.pprint :as pp]
             [taoensso.telemere :as log])
   (:import [java.util.concurrent Executors TimeUnit]))
@@ -246,6 +245,6 @@
         (db/kill-all!)))))
 
 (defn -main [& args]
-  (let [opts (when (seq args) (read-string (first args)))]
-    (let [r (run (or opts {}))]
-      (System/exit (if (:valid? r) 0 1)))))
+  (let [opts (when (seq args) (read-string (first args)))
+        r    (run (or opts {}))]
+    (System/exit (if (:valid? r) 0 1))))
diff --git a/test/intemporal/jepsen/worker.clj b/test/intemporal/jepsen/worker.clj
index 9c22258..05ec031 100644
--- a/test/intemporal/jepsen/worker.clj
+++ b/test/intemporal/jepsen/worker.clj
@@ -79,7 +79,7 @@
         (catch InterruptedException _
           (log/log! :info (str "[" owner "] interrupted wf=" workflow-id)))
         (catch Throwable t
-          (log/log! :warn (str "[" owner "] failed wf=" workflow-id " err=" (str t))))))))
+          (log/log! :warn (str "[" owner "] failed wf=" workflow-id " err=" t)))))))
 
 (defn- start-poll-loop!
   "Starts the background work-queue poll loop. Returns a 0-arity stop fn."
@@ -99,7 +99,7 @@
             (catch InterruptedException _
               (reset! running? false))
             (catch Throwable t
-              (log/log! :warn (str "[" owner "] poll loop error: " (str t)))
+              (log/log! :warn (str "[" owner "] poll loop error: " t))
               (Thread/sleep poll-interval-ms))))))
     (fn [] (reset! running? false))))
 
diff --git a/test/intemporal/jepsen/workflows.clj b/test/intemporal/jepsen/workflows.clj
index 55949f5..ed915b2 100644
--- a/test/intemporal/jepsen/workflows.clj
+++ b/test/intemporal/jepsen/workflows.clj
@@ -35,7 +35,7 @@
           VALUES (?,?,?,?,?,?)"
          *test-run* workflow-id step nonce (name phase) *owner*])
       (catch Throwable t
-        (log/warn! "jepsen side-channel write failed" {:err (str t)})))))
+        (log/log! :warn (str "jepsen side-channel write failed: " t))))))
 
 ;; ---------------------------------------------------------------------------
 ;; Activities.
diff --git a/test/intemporal/tests/bench/memory_test.clj b/test/intemporal/tests/bench/memory_test.clj
index cd2210b..bbd5d02 100644
--- a/test/intemporal/tests/bench/memory_test.clj
+++ b/test/intemporal/tests/bench/memory_test.clj
@@ -11,5 +11,5 @@
 (comment
   (time
     ;; 100k => 3GB, 10s
-    (run-store-tests (store/->InMemoryStore (atom {})) 100000))
+    (suite/run-store-tests (store/->InMemoryStore (atom {})) 100000))
   "")
\ No newline at end of file
diff --git a/test/intemporal/tests/jepsen/bug_1_2_test.clj b/test/intemporal/tests/jepsen/bug_1_2_test.clj
index da8863d..0d611fb 100644
--- a/test/intemporal/tests/jepsen/bug_1_2_test.clj
+++ b/test/intemporal/tests/jepsen/bug_1_2_test.clj
@@ -1,69 +1,55 @@
 (ns intemporal.tests.jepsen.bug-1-2-test
   "Bug 1.2 — Concurrent execution corrupting history.  REGRESSION GUARD.
 
-  Root cause (improvements.md §1.2) — now FIXED (Phase C):
+  Root cause (improvements.md §1.2) — now FIXED (Phase C, ownership model):
     Two pods could run the same workflow and both write history; JDBC's
     ON CONFLICT DO UPDATE silently overwrote, FDB produced duplicate-seq rows.
-    There was nothing stopping two concurrent writers.
+    Nothing stopped two concurrent writers.
 
-    The fix: a lease (C1). A worker claims ownership before executing; every
-    save-events validates the lease in the same transaction and throws
-    LeaseLostException if this owner no longer holds it. Two workers cannot both
-    write — the one without a live lease is rejected, so history can't be
-    corrupted by concurrent execution.
+    The fix: an ownership column. claim-owner atomically stamps
+    `owner WHERE owner IS NULL OR owner = me`, so exactly one pod can own (and
+    therefore run) a workflow; the worker resumes owned workflows one at a time.
+    No two writers execute concurrently, so history cannot be corrupted.
 
-  These tests assert the FIXED behaviour: once a second owner takes over, the
-  first owner's writes are rejected rather than silently corrupting history."
+  These tests assert the FIXED behaviour: of two pods racing to claim one
+  unowned workflow, exactly one succeeds; the loser cannot run it."
   (:require [clojure.test :refer [deftest is testing]]
             [intemporal.protocol :as p]
             [intemporal.store :as mem]
             [intemporal.store.jdbc :as jdbc-store]
             [intemporal.store.fdb :as fdb-store]
-            [me.vedang.clj-fdb.FDB :as cfdb]
-            [intemporal.internal.lease :as lease]
-            [intemporal.internal.error :as error]))
+            [me.vedang.clj-fdb.FDB :as cfdb]))
 
 (defn- run-scenario
-  "owner-A claims and writes; ownership moves to owner-B; A's next write must be
-  rejected. Returns {:a-wrote? :b-claimed? :a-rejected? :seq-count}."
+  "Two owners race to claim one unowned workflow. Returns
+  {:a-claimed? :b-claimed? :pending-for-loser}."
   [store]
   (let [wid (str "bug12-" (random-uuid))]
     (p/save-event store wid {:event-type :workflow-started :workflow-id wid :args []})
-    (let [a-claim (p/claim-workflow store wid "owner-A" 60000)
-          _       (binding [lease/*owner* "owner-A"]
-                    (p/save-events store wid [{:event-type :activity-completed :seq 0 :result 1}]))
-          _       (p/release-lease store wid "owner-A")
-          b-claim (p/claim-workflow store wid "owner-B" 60000)
-          a-rejected?
-          (try
-            (binding [lease/*owner* "owner-A"]
-              (p/save-events store wid [{:event-type :activity-completed :seq 1 :result 2}]))
-            false
-            (catch Exception e (error/lease-lost? e)))
-          seq0 (->> (p/load-history store wid) (filter #(= 0 (:seq %))) count)]
-      {:a-wrote?    a-claim
-       :b-claimed?  b-claim
-       :a-rejected? a-rejected?
-       :seq0-count  seq0})))
-
-(defn- assert-fixed [{:keys [a-wrote? b-claimed? a-rejected? seq0-count]}]
-  (is a-wrote?    "owner-A held the lease and wrote")
-  (is b-claimed?  "ownership moved to owner-B after release")
-  (is a-rejected? "stale owner-A's write was rejected with LeaseLostException (bug 1.2 fixed)")
-  (is (= 1 seq0-count) "exactly one event at seq=0 — no concurrent-write corruption"))
-
-(deftest lease-prevents-corruption-in-memory
+    (let [a (p/claim-owner store wid "owner-A")
+          b (p/claim-owner store wid "owner-B")]   ; A already owns it -> B must fail
+      {:a-claimed? a
+       :b-claimed? b
+       ;; scope to this wid — the shared DB may hold unowned rows from prior runs
+       :wid-pending-for-b? (contains? (set (p/list-pending store "owner-B" 1000)) wid)})))
+
+(defn- assert-fixed [{:keys [a-claimed? b-claimed? wid-pending-for-b?]}]
+  (is a-claimed?            "owner-A claimed the unowned workflow")
+  (is (false? b-claimed?)   "owner-B could NOT claim A's workflow — exclusive ownership (bug 1.2 fixed)")
+  (is (not wid-pending-for-b?) "the workflow is not runnable by B, so B never executes it"))
+
+(deftest claim-is-exclusive-in-memory
   (testing "InMemoryStore"
     (assert-fixed (run-scenario (mem/->InMemoryStore (atom {}))))))
 
-(deftest ^:integration lease-prevents-corruption-jdbc
+(deftest ^:integration claim-is-exclusive-jdbc
   (testing "JdbcStore"
     (let [url   (or (System/getenv "DATABASE_URL")
                     "jdbc:postgresql://localhost:5432/root?user=root&password=root")
           store (jdbc-store/make-jdbc-store url)]
       (try (assert-fixed (run-scenario store)) (finally (.close store))))))
 
-(deftest ^:integration lease-prevents-corruption-fdb
+(deftest ^:integration claim-is-exclusive-fdb
   (testing "FDBStore"
     (let [root  (str "bug12-" (random-uuid))
           fdb   (cfdb/select-api-version 730)
diff --git a/test/intemporal/tests/jepsen/racing_store.clj b/test/intemporal/tests/jepsen/racing_store.clj
index 29c870b..cf1ca92 100644
--- a/test/intemporal/tests/jepsen/racing_store.clj
+++ b/test/intemporal/tests/jepsen/racing_store.clj
@@ -44,12 +44,10 @@
   (is-cancelled?           [_ wf-id]        (p/is-cancelled? inner wf-id))
   (mark-cancelled          [_ wf-id]        (p/mark-cancelled inner wf-id))
   (get-workflow-status     [_ wf-id]        (p/get-workflow-status inner wf-id))
-  (claim-workflow          [_ wf-id o l]    (p/claim-workflow inner wf-id o l))
-  (renew-lease             [_ wf-id o l]    (p/renew-lease inner wf-id o l))
-  (release-lease           [_ wf-id o]      (p/release-lease inner wf-id o))
-  (add-runnable            [_ wf-id r]      (p/add-runnable inner wf-id r))
-  (claim-runnable          [_ o b c]       (p/claim-runnable inner o b c))
-  (delete-runnable         [_ wf-id]        (p/delete-runnable inner wf-id))
+  (claim-owner             [_ wf-id o]      (p/claim-owner inner wf-id o))
+  (list-pending            [_ o lim]        (p/list-pending inner o lim))
+  (release-owner           [_ o]            (p/release-owner inner o))
+  (set-wake-at             [_ wf-id wa]     (p/set-wake-at inner wf-id wa))
 
   (consume-signal [_ wf-id sig-name]
     (let [result (p/consume-signal inner wf-id sig-name)]
diff --git a/test/intemporal/tests/timer_recovery_test.clj b/test/intemporal/tests/timer_recovery_test.clj
new file mode 100644
index 0000000..547d41b
--- /dev/null
+++ b/test/intemporal/tests/timer_recovery_test.clj
@@ -0,0 +1,161 @@
+(ns intemporal.tests.timer-recovery-test
+  "Milestone 4 (C2) — persistent / cross-pod timers.
+
+  Three properties, each across InMemory + JDBC + FDB:
+   1. fire-at determinism — a crash-resumed sleep keeps its original deadline
+      (the persisted :timer-scheduled fire-at is reused, not recomputed);
+   2. timer recovery — a workflow that sleeps, then loses its engine, is driven
+      to completion by a worker on a fresh engine when the timer comes due;
+   3. wake_at filtering — list-pending skips a workflow whose wake-at is still in
+      the future, and returns it once due."
+  (:require [clojure.test :refer [deftest is testing]]
+            [intemporal.core :as intemporal]
+            [intemporal.protocol :as p]
+            [intemporal.store :as store]
+            [intemporal.store.jdbc :as jdbc-store]
+            [intemporal.store.fdb :as fdb-store]
+            [me.vedang.clj-fdb.FDB :as cfdb]
+            [intemporal.internal.workflow-registry :as wreg]))
+
+(defn t-act [x] (* x 3))
+
+(defn sleeper-wf [x ms]
+  (let [a (intemporal/stub #'t-act)
+        r (a x)]
+    (intemporal/sleep ms)
+    (+ r 1)))
+
+(defn- fire-at-for [store wf-id]
+  (->> (p/load-history store wf-id)
+       (filter #(= :timer-scheduled (:event-type %)))
+       first
+       :fire-at))
+
+(defn- await-status [store wf-id terminal timeout-ms]
+  (let [deadline (+ (System/currentTimeMillis) timeout-ms)]
+    (loop []
+      (let [s (p/get-workflow-status store wf-id)]
+        (cond
+          (= terminal s) s
+          (> (System/currentTimeMillis) deadline) s
+          :else (do (Thread/sleep 50) (recur)))))))
+
+;; ── 1. fire-at determinism across a crash-resume ────────────────────────────────
+
+(defn- check-determinism [store]
+  (wreg/clear-registry!)
+  (let [wid (str "det-" (random-uuid))]
+    ;; Start with a long sleep so it suspends on the timer, then crash.
+    (let [e1 (intemporal/make-workflow-engine :store store :threads 2)
+          f1 (future (intemporal/start-workflow e1 sleeper-wf [7 60000] :workflow-id wid))]
+      (Thread/sleep 300)
+      (future-cancel f1)
+      (intemporal/shutdown-engine e1))
+    (let [fire-at-1 (fire-at-for store wid)]
+      (is (some? fire-at-1) "a :timer-scheduled fire-at was persisted")
+      ;; Resume on a fresh engine; it re-suspends on the same timer.
+      (let [e2 (intemporal/make-workflow-engine :store store :threads 2)
+            f2 (future (intemporal/resume-workflow e2 wid sleeper-wf [7 60000]))]
+        (Thread/sleep 300)
+        (future-cancel f2)
+        (intemporal/shutdown-engine e2))
+      (let [fire-at-2 (fire-at-for store wid)]
+        (is (= fire-at-1 fire-at-2)
+            "fire-at is identical across resume — no deadline drift (C2 determinism)")))))
+
+(deftest fire-at-deterministic-in-memory
+  (testing "InMemoryStore"
+    (check-determinism (store/->InMemoryStore (atom {})))))
+
+(deftest ^:integration fire-at-deterministic-jdbc
+  (testing "JdbcStore"
+    (let [url   (or (System/getenv "DATABASE_URL")
+                    "jdbc:postgresql://localhost:5432/root?user=root&password=root")
+          store (jdbc-store/make-jdbc-store url)]
+      (try (check-determinism store) (finally (.close store))))))
+
+(deftest ^:integration fire-at-deterministic-fdb
+  (testing "FDBStore"
+    (let [root  (str "det-" (random-uuid))
+          fdb   (cfdb/select-api-version 730)
+          db    (.open fdb "docker/fdb.cluster")
+          store (fdb-store/make-fdb-store db root)]
+      (check-determinism store))))
+
+;; ── 2. timer recovery: worker drives a crashed sleeper to completion ────────────
+
+(defn- check-timer-recovery [store]
+  (wreg/clear-registry!)
+  (let [wid (str "trec-" (random-uuid))]
+    ;; Short sleep (300ms) so the timer becomes due quickly after the crash.
+    (let [e1 (intemporal/make-workflow-engine :store store :threads 2)
+          f1 (future (intemporal/start-workflow e1 sleeper-wf [8 300] :workflow-id wid))]
+      (Thread/sleep 150)            ; suspend on the timer, before it fires
+      (future-cancel f1)
+      (intemporal/shutdown-engine e1))
+    (is (= :running (p/get-workflow-status store wid))
+        "workflow is durably suspended on the timer after the crash")
+    ;; A worker on a fresh engine picks it up once the timer is due.
+    (let [e2   (intemporal/make-workflow-engine :store store :threads 2)
+          stop (intemporal/start-worker e2 :poll-ms 50 :owner-id "trec-w")]
+      (try
+        (is (= :completed (await-status store wid :completed 5000))
+            "worker resumed the crashed timer workflow once it came due (C2 recovery)")
+        (is (= 25 (intemporal/get-workflow-result store wid)) "8*3 + 1 = 25")
+        (finally (stop) (intemporal/shutdown-engine e2))))))
+
+(deftest timer-recovery-in-memory
+  (testing "InMemoryStore"
+    (check-timer-recovery (store/->InMemoryStore (atom {})))))
+
+(deftest ^:integration timer-recovery-jdbc
+  (testing "JdbcStore"
+    (let [url   (or (System/getenv "DATABASE_URL")
+                    "jdbc:postgresql://localhost:5432/root?user=root&password=root")
+          store (jdbc-store/make-jdbc-store url)]
+      (try (check-timer-recovery store) (finally (.close store))))))
+
+(deftest ^:integration timer-recovery-fdb
+  (testing "FDBStore"
+    (let [root  (str "trec-" (random-uuid))
+          fdb   (cfdb/select-api-version 730)
+          db    (.open fdb "docker/fdb.cluster")
+          store (fdb-store/make-fdb-store db root)]
+      (check-timer-recovery store))))
+
+;; ── 3. wake_at filtering: list-pending skips not-yet-due workflows ──────────────
+
+(defn- check-wake-at-filter [store]
+  (let [wid (str "wake-" (random-uuid))]
+    (p/save-event store wid {:event-type :workflow-started :workflow-id wid :args []})
+    ;; Far-future wake-at -> not due -> excluded from list-pending.
+    (p/set-wake-at store wid (+ (System/currentTimeMillis) 3600000))
+    (is (not (contains? (set (p/list-pending store "any-owner" 1000)) wid))
+        "a workflow whose wake-at is in the future is skipped (C2 filtering)")
+    ;; Past wake-at -> due -> included.
+    (p/set-wake-at store wid (- (System/currentTimeMillis) 1000))
+    (is (contains? (set (p/list-pending store "any-owner" 1000)) wid)
+        "a workflow whose wake-at has passed is returned")
+    ;; nil wake-at -> always eligible -> included.
+    (p/set-wake-at store wid nil)
+    (is (contains? (set (p/list-pending store "any-owner" 1000)) wid)
+        "a workflow with nil wake-at is always eligible")))
+
+(deftest wake-at-filter-in-memory
+  (testing "InMemoryStore"
+    (check-wake-at-filter (store/->InMemoryStore (atom {})))))
+
+(deftest ^:integration wake-at-filter-jdbc
+  (testing "JdbcStore"
+    (let [url   (or (System/getenv "DATABASE_URL")
+                    "jdbc:postgresql://localhost:5432/root?user=root&password=root")
+          store (jdbc-store/make-jdbc-store url)]
+      (try (check-wake-at-filter store) (finally (.close store))))))
+
+(deftest ^:integration wake-at-filter-fdb
+  (testing "FDBStore"
+    (let [root  (str "wake-" (random-uuid))
+          fdb   (cfdb/select-api-version 730)
+          db    (.open fdb "docker/fdb.cluster")
+          store (fdb-store/make-fdb-store db root)]
+      (check-wake-at-filter store))))
diff --git a/test/intemporal/tests/worker_test.clj b/test/intemporal/tests/worker_test.clj
index 5726737..a37949f 100644
--- a/test/intemporal/tests/worker_test.clj
+++ b/test/intemporal/tests/worker_test.clj
@@ -1,10 +1,11 @@
 (ns intemporal.tests.worker-test
-  "Phase C — lease (C1), runnable markers (C3) and the recovery worker (C4).
+  "Phase C (ownership model) — claim exclusivity + the recovery worker.
 
-  Proves the durable, cross-pod wake model:
-   - a workflow whose original engine crashed is resumed by a worker after a
-     signal is delivered (the recovery-poller story, bug 1.3 / 1.1 model);
-   - the lease rejects a writer that no longer owns the workflow (bug 1.2)."
+  Proves the durable, cross-pod recovery model WITHOUT leases:
+   - a workflow whose original engine crashed is resumed by a worker (the
+     ownership scan is both the live wake and the crash recovery);
+   - claim-owner is the exclusivity gate: only one owner can claim a workflow,
+     so concurrent execution (and history corruption) cannot occur (bug 1.2)."
   (:require [clojure.test :refer [deftest is testing]]
             [intemporal.core :as intemporal]
             [intemporal.protocol :as p]
@@ -12,8 +13,6 @@
             [intemporal.store.jdbc :as jdbc-store]
             [intemporal.store.fdb :as fdb-store]
             [me.vedang.clj-fdb.FDB :as cfdb]
-            [intemporal.internal.lease :as lease]
-            [intemporal.internal.error :as error]
             [intemporal.internal.workflow-registry :as wreg]))
 
 (defn w-act [x] (* x 10))
@@ -33,7 +32,7 @@
           (> (System/currentTimeMillis) deadline) s
           :else (do (Thread/sleep 50) (recur)))))))
 
-;; ── C4: worker resumes a crashed workflow after a cross-instance signal ──────────
+;; ── recovery: worker resumes a crashed workflow via the ownership scan ──────────
 
 (defn- check-worker-recovery [store]
   (wreg/clear-registry!)
@@ -50,9 +49,9 @@
     (let [e2   (intemporal/make-workflow-engine :store store :threads 2)
           stop (intemporal/start-worker e2 :poll-ms 50 :owner-id "w2")]
       (try
-        (intemporal/send-signal store wid "go" {})   ; writes a durable runnable marker
+        (intemporal/send-signal store wid "go" {})
         (is (= :completed (await-status store wid :completed 5000))
-            "worker claimed the marker, leased, and resumed the workflow to completion")
+            "worker scan claimed ownership and resumed the workflow to completion")
         (is (= 51 (intemporal/get-workflow-result store wid)) "5*10 + 1 = 51")
         (finally (stop) (intemporal/shutdown-engine e2))))))
 
@@ -61,54 +60,49 @@
     (check-worker-recovery (store/->InMemoryStore (atom {})))))
 
 (deftest ^:integration worker-recovery-jdbc
-  (testing "JdbcStore: worker resumes via durable runnable marker"
+  (testing "JdbcStore: worker resumes via the ownership scan"
     (let [url   (or (System/getenv "DATABASE_URL")
                     "jdbc:postgresql://localhost:5432/root?user=root&password=root")
           store (jdbc-store/make-jdbc-store url)]
       (try (check-worker-recovery store) (finally (.close store))))))
 
 (deftest ^:integration worker-recovery-fdb
-  (testing "FDBStore: worker resumes via durable runnable marker"
+  (testing "FDBStore: worker resumes via the ownership scan"
     (let [root  (str "worker-" (random-uuid))
           fdb   (cfdb/select-api-version 730)
           db    (.open fdb "docker/fdb.cluster")
           store (fdb-store/make-fdb-store db root)]
       (check-worker-recovery store))))
 
-;; ── C1: lease rejects a stale writer ─────────────────────────────────────────────
+;; ── exclusivity: claim-owner lets exactly one owner run a workflow ──────────────
 
-(defn- check-lease [store]
-  (let [wid (str "lease-" (random-uuid))]
+(defn- check-claim-exclusivity [store]
+  (let [wid (str "claim-" (random-uuid))]
     (p/save-event store wid {:event-type :workflow-started :workflow-id wid :args []})
-    (is (p/claim-workflow store wid "owner-A" 60000) "A claims the unowned workflow")
-    (is (false? (p/claim-workflow store wid "owner-B" 60000)) "B cannot claim A's live lease")
-    ;; A may write while it holds the lease
-    (binding [lease/*owner* "owner-A"]
-      (p/save-events store wid [{:event-type :activity-completed :seq 0 :result 1}]))
-    ;; A releases; B claims
-    (p/release-lease store wid "owner-A")
-    (is (p/claim-workflow store wid "owner-B" 60000) "B claims after release")
-    ;; A is now stale: its writes must be rejected
-    (is (thrown? clojure.lang.ExceptionInfo
-                 (binding [lease/*owner* "owner-A"]
-                   (p/save-events store wid [{:event-type :activity-completed :seq 1 :result 2}])))
-        "stale owner A's write is rejected (lease lost)")))
+    (is (p/claim-owner store wid "owner-A") "A claims the unowned workflow")
+    (is (p/claim-owner store wid "owner-A") "A re-claims its own (idempotent)")
+    (is (false? (p/claim-owner store wid "owner-B")) "B cannot claim A's workflow")
+    ;; scope to this wid — the shared DB may hold unowned rows from prior runs
+    (is (contains? (set (p/list-pending store "owner-A" 1000)) wid) "the workflow is pending for A")
+    (is (not (contains? (set (p/list-pending store "owner-B" 1000)) wid)) "and not pending for B")
+    (p/release-owner store "owner-A")
+    (is (p/claim-owner store wid "owner-B") "B claims after A releases")))
 
-(deftest lease-rejects-stale-writer-in-memory
-  (testing "InMemoryStore lease validation"
-    (check-lease (store/->InMemoryStore (atom {})))))
+(deftest claim-exclusivity-in-memory
+  (testing "InMemoryStore claim-owner exclusivity"
+    (check-claim-exclusivity (store/->InMemoryStore (atom {})))))
 
-(deftest ^:integration lease-rejects-stale-writer-jdbc
-  (testing "JdbcStore lease validation"
+(deftest ^:integration claim-exclusivity-jdbc
+  (testing "JdbcStore claim-owner exclusivity"
     (let [url   (or (System/getenv "DATABASE_URL")
                     "jdbc:postgresql://localhost:5432/root?user=root&password=root")
           store (jdbc-store/make-jdbc-store url)]
-      (try (check-lease store) (finally (.close store))))))
+      (try (check-claim-exclusivity store) (finally (.close store))))))
 
-(deftest ^:integration lease-rejects-stale-writer-fdb
-  (testing "FDBStore lease validation"
-    (let [root  (str "lease-" (random-uuid))
+(deftest ^:integration claim-exclusivity-fdb
+  (testing "FDBStore claim-owner exclusivity"
+    (let [root  (str "claim-" (random-uuid))
           fdb   (cfdb/select-api-version 730)
           db    (.open fdb "docker/fdb.cluster")
           store (fdb-store/make-fdb-store db root)]
-      (check-lease store))))
+      (check-claim-exclusivity store))))
diff --git a/test/intemporal/tests/workflow_registry_test.clj b/test/intemporal/tests/workflow_registry_test.clj
index 6e501a1..cddba83 100644
--- a/test/intemporal/tests/workflow_registry_test.clj
+++ b/test/intemporal/tests/workflow_registry_test.clj
@@ -7,7 +7,6 @@
   re-running already-completed activities."
   (:require [clojure.test :refer [deftest is testing]]
             [intemporal.core :as intemporal]
-            [intemporal.protocol :as p]
             [intemporal.store :as store]
             [intemporal.internal.workflow-registry :as wreg]))
 
diff --git a/test2/intemporal/failures_test.cljc b/test2/intemporal/failures_test.cljc
deleted file mode 100644
index b0f0e69..0000000
--- a/test2/intemporal/failures_test.cljc
+++ /dev/null
@@ -1,47 +0,0 @@
-(ns intemporal.failures-test
-  #?(:cljs (:require [cljs.test :as t :refer-macros [deftest is testing]]
-                     [intemporal.store :as store]
-                     [intemporal.workflow :as w]
-                     [intemporal.test-utils :as tu]
-                     [promesa.core :as p])
-     :clj  (:require [clojure.test :as t :refer [deftest is testing]]
-                     [intemporal.store :as store]
-                     [intemporal.workflow :as w]
-                     [intemporal.test-utils :as tu]))
-  #?(:cljs (:require-macros [intemporal.macros :refer [stub-protocol defn-workflow]]
-                            [intemporal.test-utils :refer [with-result]])
-     :clj  (:require [intemporal.macros :refer [stub-protocol defn-workflow]]
-                     [intemporal.test-utils :refer [with-result]])))
-
-(t/use-fixtures :once tu/with-trace-logging)
-
-(defprotocol MyActivities
-  (foo [this a])
-  (forced-failure [this]))
-
-(defrecord MyActivitiesImpl []
-  MyActivities
-  (foo [this a] [:proto a])
-  (forced-failure [this] (throw (ex-info "Forced" {:a 1}))))
-
-(defn-workflow my-workflow [k]
-  (let [stub (stub-protocol MyActivities {})
-        prr  (if (= :ok k)
-               (foo stub :pr)
-               (forced-failure stub))]
-
-    ;; chain values: ensure tests work under cljs too
-    #_:clj-kondo/ignore
-    (#?(:clj let :cljs p/let) [res prr]
-      res)))
-
-;;;; test proper
-
-(deftest activity-failure-test
-  (testing "failure: activity throws"
-    (let [mstore (store/make-store)
-          ex     (w/start-poller! mstore {:protocols {`MyActivities (->MyActivitiesImpl)}})]
-      (with-result [res (w/with-env {:store mstore}
-                                    (my-workflow :nok))]
-        (is (instance? #?(:clj Exception :cljs js/Error) res))
-        (w/shutdown ex 1000)))))
diff --git a/test2/intemporal/internal_failures_test.cljc b/test2/intemporal/internal_failures_test.cljc
deleted file mode 100644
index ecf6c98..0000000
--- a/test2/intemporal/internal_failures_test.cljc
+++ /dev/null
@@ -1,47 +0,0 @@
-(ns intemporal.internal-failures-test
-  #?(:cljs (:require [cljs.test :as t :refer-macros [deftest is testing]]
-                     [intemporal.store :as store]
-                     [intemporal.workflow :as w]
-                     [intemporal.test-utils :as tu]
-                     [promesa.core :as p])
-     :clj  (:require [clojure.test :as t :refer [deftest is testing]]
-                     [intemporal.store :as store]
-                     [intemporal.workflow :as w]
-                     [intemporal.test-utils :as tu]))
-  #?(:cljs (:require-macros [intemporal.macros :refer [stub-protocol defn-workflow]]
-                            [intemporal.test-utils :refer [with-result]])
-     :clj  (:require [intemporal.macros :refer [stub-protocol defn-workflow]]
-                     [intemporal.test-utils :refer [with-result]])))
-
-(t/use-fixtures :once tu/with-trace-logging)
-
-(defprotocol MyActivities
-  (foo [this a]))
-
-(defrecord MyActivitiesImpl []
-  MyActivities
-  (foo [this a] [:proto a]))
-
-(defn-workflow my-workflow [k]
-  (let [stub (stub-protocol MyActivities {})
-        prr  (foo stub :pr)]
-
-    ;; chain values: ensure tests work under cljs too
-    #_:clj-kondo/ignore
-    (#?(:clj let :cljs p/let) [res prr]
-      res)))
-
-;;;; test proper
-
-(deftest store-failure-test
-  (testing "failure: task validation fails"
-    (let [mstore (store/make-store {:failures {:validation 1.0}})
-          ex     (w/start-poller! mstore {:protocols {`MyActivities (->MyActivitiesImpl)}})]
-
-      (with-result [res (w/with-env {:store mstore}
-                                    (my-workflow :ok))]
-        (is (instance? #?(:clj Exception :cljs js/Error) res))
-        (is (= {:intemporal.workflow.internal/type :internal} (ex-data (or (ex-cause res) res))))
-        (w/shutdown ex 1000)))))
-
-;(cljs.test/run-tests *ns*)
\ No newline at end of file
diff --git a/test2/intemporal/matchers.cljc b/test2/intemporal/matchers.cljc
deleted file mode 100644
index 5a41ae6..0000000
--- a/test2/intemporal/matchers.cljc
+++ /dev/null
@@ -1,20 +0,0 @@
-(ns intemporal.matchers
-  (:require [matcher-combinators.core :as mc]
-            [matcher-combinators.result :as result]))
-
-(defrecord Nilable []
-  mc/Matcher
-  (-matcher-for [this] this)
-  (-matcher-for [this _] this)
-  (-match [this actual]
-    (if (or (nil? actual)
-            (= :matcher-combinators.core/missing actual))
-      {::result/type   :match
-       ::result/value  nil
-       ::result/weight 1}
-      {::result/type   :mismatch
-       ::result/value  actual
-       ::result/weight 1}))
-  (-base-name [_] 'nilable))
-
-(def nilable? (->Nilable))
diff --git a/test2/intemporal/recovery_failure.edn b/test2/intemporal/recovery_failure.edn
deleted file mode 100644
index eeeae44..0000000
--- a/test2/intemporal/recovery_failure.edn
+++ /dev/null
@@ -1,10 +0,0 @@
-{:tasks    {"elegant-robinson" {:args [1], :ref nil, :type :workflow, :state :pending, :sym intemporal.recovery-failure-test/my-workflow-,
-                                :root nil, :owner "intemporal", :id "elegant-robinson", :runtime {:timeout-ms 900000}, :order 1, :lease-end nil}},
- :history  {"elegant-robinson"
-            [{:ref "elegant-robinson", :root "elegant-robinson", :type :intemporal.workflow/invoke, :sym intemporal.recovery-failure-test/my-workflow-, :args [1], :error nil, :result nil, :id 1}]
-
-            "elegant-robinson-1"
-            [{:ref "elegant-robinson-1", :root "elegant-robinson", :type :intemporal.activity/failure, :sym intemporal.recovery-failure-test/activity-fn, :args (1), :error nil, :result nil, :id 2}]}
- :counter  2,
- :pcounter 1,
- :ecounter 0}
diff --git a/test2/intemporal/recovery_failure_test.clj b/test2/intemporal/recovery_failure_test.clj
deleted file mode 100644
index f882ebf..0000000
--- a/test2/intemporal/recovery_failure_test.clj
+++ /dev/null
@@ -1,66 +0,0 @@
-(ns intemporal.recovery-failure-test
-  (:require [clojure.test :as t :refer [deftest is testing]]
-            [intemporal.store :as store]
-            [intemporal.workflow :as w]
-            [intemporal.test-utils :as tu]
-            [clojure.java.io :as io]
-            [intemporal.macros :refer [stub-function stub-protocol defn-workflow]]))
-
-(t/use-fixtures :once tu/with-trace-logging)
-
-(defn nested-fn [a]
-  [a :nested])
-
-(defn activity-fn [a]
-  (let [f (stub-function nested-fn)]
-    (f :sub)))
-
-(defprotocol MyActivities
-  (foo [this a]))
-
-(defrecord MyActivitiesImpl []
-  MyActivities
-  (foo [this a] [:proto a]))
-
-(defn-workflow my-workflow [i]
-  (let [sf  (stub-function activity-fn)
-        pr  (stub-protocol MyActivities {})
-        sfr (sf 1)
-        prr (foo pr :pr)]
-
-    ;; chain values: ensure tests work under cljs too
-    #_:clj-kondo/ignore
-    (let [v1 sfr
-          v2 prr]
-      [:root v1 v2])))
-
-;;;; test proper
-
-(deftest recovery-failure-test
-  ;; make a backup of the db to allow replay
-  (io/copy (io/file "./test/intemporal/recovery_failure.edn")
-           (io/file "/tmp/recovery_failure.edn"))
-  (testing "workflow"
-    (let [mstore (store/make-store {:file "/tmp/recovery_failure.edn"})
-          [task] (store/list-tasks mstore)
-          ex     (w/start-poller! mstore {:protocols {`MyActivities (->MyActivitiesImpl)}})]
-
-      (try
-        (store/reenqueue-pending-tasks mstore println)
-        (tu/wait-for-task mstore (:id task))
-
-        (testing "workflow failed with unexpected transition"
-          (let [[task] (store/list-tasks mstore)
-                [_ _ crash-ev last-ev] (->> (store/list-events mstore)
-                                            (sort-by :id))]
-
-            (is (= :failure (:state task)))
-            (is (= :intemporal.workflow.internal/failure (:type crash-ev)))
-            (is (= :intemporal.workflow/failure (:type last-ev)))))
-        (finally
-          (tu/print-tables mstore)
-          (w/shutdown ex 1000))))))
-
-#_:clj-kondo/ignore
-(comment
-  (cljs.test/run-tests *ns*))
diff --git a/test2/intemporal/shutdown_restart_test.clj b/test2/intemporal/shutdown_restart_test.clj
deleted file mode 100644
index 91ab757..0000000
--- a/test2/intemporal/shutdown_restart_test.clj
+++ /dev/null
@@ -1,81 +0,0 @@
-(ns intemporal.shutdown-restart-test
-  (:require [clojure.test :as t :refer [deftest is testing]]
-            [intemporal.store :as store]
-            [intemporal.workflow :as w]
-            [matcher-combinators.test :refer [match?]]
-            [intemporal.macros :refer [stub-protocol defn-workflow]]
-            [intemporal.test-utils :as tu :refer [with-result]]
-            [intemporal.test-executor :as te])
-  (:import (java.util.concurrent CountDownLatch)))
-
-;(t/use-fixtures :once tu/with-trace-logging)
-
-(def activity-invoked? (CountDownLatch. 1))
-(def executor-shutdown? (CountDownLatch. 1))
-
-(defprotocol MyActivities
-  (foo [this a])
-  (foo2 [this a]))
-
-(defrecord MyActivitiesImpl []
-  MyActivities
-  (foo [this a]
-    (.countDown activity-invoked?)
-    (.await executor-shutdown?)
-    :foo)
-  (foo2 [this a] a))
-
-(defn-workflow my-workflow [k]
-  (let [stub (stub-protocol MyActivities {})
-        r1  (foo stub :pr)
-        r2  (foo2 stub :pr)]
-    [r1 r2]))
-
-;;;; test proper
-
-(deftest shutdown-restart-test
-  (testing "failure: task validation fails"
-    (let [mstore      (store/make-store {})
-          custom-ex   (te/make-test-executor (fn [] (.countDown executor-shutdown?)) nil)
-          executor    (w/start-poller! mstore custom-ex {:protocols  {`MyActivities (->MyActivitiesImpl)}
-                                                         :polling-ms 500})]
-
-      (testing "shutdown of ongoing workflow"
-        (future
-          ;; ensure activity is inflight
-          (.await activity-invoked?)
-          ;; immediately countdown the latch
-          (w/shutdown executor 0)
-          (is (w/shutting-down? executor)))
-
-        (with-result [res (w/with-env {:store mstore}
-                                      (my-workflow :ok))]
-
-          (is (instance? Exception res))
-
-          (testing "workflow is not in failed state"
-            (tu/print-tables mstore)
-
-            (testing "workflow task"
-              (let [tasks (store/list-tasks mstore)
-                    [w1] tasks]
-                (is (match? {:type :workflow :sym 'intemporal.shutdown-restart-test/my-workflow- :state :pending} w1))
-
-                (testing "workflow events: workflow has not finished"
-                  (let [[e1 e2] (store/list-events mstore)]
-                    (is (match? {:type :intemporal.workflow/invoke :sym 'intemporal.shutdown-restart-test/my-workflow-} e1))
-                    (is (match? {:type :intemporal.protocol/invoke :sym 'intemporal.shutdown-restart-test/foo} e2))
-
-                    (let [[w1] (store/list-tasks mstore)]
-                      (is (match? {:type :workflow :sym 'intemporal.shutdown-restart-test/my-workflow- :state :pending} w1)))))
-
-                (testing "workflow resumes"
-                  (with-open [_ (w/start-poller! mstore {:protocols  {`MyActivities (->MyActivitiesImpl)}
-                                                         :polling-ms 100})]
-                    (store/reenqueue-pending-tasks mstore (constantly nil))
-                    (tu/wait-for-task mstore (:id w1))
-                    (tu/print-tables mstore)
-
-                    (testing "workflow succeeded"
-                      (let [[w1] (store/list-tasks mstore)]
-                        (is (match? {:type :workflow :sym 'intemporal.shutdown-restart-test/my-workflow- :state :success} w1))))))))))))))
diff --git a/test2/intemporal/shutdown_restart_test.cljs b/test2/intemporal/shutdown_restart_test.cljs
deleted file mode 100644
index 0f24a45..0000000
--- a/test2/intemporal/shutdown_restart_test.cljs
+++ /dev/null
@@ -1,78 +0,0 @@
-(ns intemporal.shutdown-restart-test
-  (:require [cljs.test :as t :refer-macros [deftest is testing]]
-            [intemporal.error :as error]
-            [intemporal.store :as store]
-            [intemporal.workflow :as w]
-            [intemporal.test-utils :as tu]
-            [matcher-combinators.test :refer [match?]]
-            [promesa.core :as p])
-  (:require-macros [intemporal.macros :refer [stub-protocol defn-workflow]]
-                   [intemporal.test-utils :refer [with-result]]))
-
-(t/use-fixtures :once tu/with-trace-logging)
-
-(declare stop-worker)
-(defprotocol MyActivities
-  (foo [this a]))
-
-(defrecord MyActivitiesImpl []
-  MyActivities
-  (foo [this a]
-    (stop-worker)
-    :foo))
-
-(defn-workflow my-workflow [k]
-  (let [stub (stub-protocol MyActivities {})
-        prr  (foo stub :pr)]
-    ;; chain values: ensure tests work under cljs too
-    (p/let [res prr]
-      res)))
-
-;;;; test proper
-
-(def mstore (store/make-store {}))
-(def ex (w/start-poller! mstore {:protocols  {`MyActivities (->MyActivitiesImpl)}
-                                 :polling-ms 10}))
-
-(defn stop-worker []
-  (w/shutdown ex 1000))
-
-(deftest executor-shutdown-test
-  (testing "shutdown of ongoing workflow"
-
-    (with-result [res (w/with-env {:store mstore}
-                                  (my-workflow :ok))]
-      (w/shutdown ex 1000)
-
-      (is (instance? js/Error res))
-      (is (error/panic? res))
-
-      (testing "Workflow is not in failed state"
-        (tu/print-tables mstore)
-
-        (testing "workflow task"
-          (let [[w1] (store/list-tasks mstore)]
-            (is (match? {:type :workflow :sym 'intemporal.shutdown-restart-test/my-workflow- :state :pending} w1))))
-
-        (testing "workflow events"
-             (let [[e1 e2 e3] (store/list-events mstore)]
-               (is (match? {:type :intemporal.workflow/invoke :sym 'intemporal.shutdown-restart-test/my-workflow-} e1)
-                 (is (match? {:type :intemporal.protocol/invoke :sym 'intemporal.shutdown-restart-test/foo} e2)))
-               (is (nil? e3))))))))
-
-#_(deftest executor-shutdown-resume-test
-    (testing "workflow resumes"
-      (let [stop-worker (w/start-worker mstore {:protocols  {`MyActivities (->MyActivitiesImpl)}
-                                                :polling-ms 10})]
-        (store/reenqueue-pending-tasks mstore (constantly nil))
-        (with-result [_ (p/delay 2000)]
-
-          (tu/print-tables mstore)
-
-          (testing "workflow succeeded"
-            (let [[w1] (store/list-tasks mstore)]
-              (is (match? {:type :workflow :sym 'intemporal.shutdown-restart-test/my-workflow- :state :success} w1))))
-
-          (stop-worker)))))
-
-;(cljs.test/run-tests *ns*)
\ No newline at end of file
diff --git a/test2/intemporal/store_test.cljc b/test2/intemporal/store_test.cljc
deleted file mode 100644
index a1c3638..0000000
--- a/test2/intemporal/store_test.cljc
+++ /dev/null
@@ -1,91 +0,0 @@
-(ns intemporal.store-test
-  (:require [clojure.test :as t :refer [deftest is testing]]
-            [intemporal.test-utils :as tu]
-            [intemporal.store :as s]
-            [matcher-combinators.test :refer [match?]]
-            [promesa.core :as p]))
-
-(t/use-fixtures :once tu/with-trace-logging)
-
-(defn- is-promise-ok [prom]
-  (-> prom
-      (p/then (fn [_] (is true)))
-      (p/catch (fn [err] (is false (prn-str err))))))
-
-(defn- to-map [rec]
-  (into {} rec))
-
-(deftest basic-store-tests
-
-  (testing "enqueue/dequeue"
-    (let [store (s/make-store)
-          task  (tu/make-workflow-task)]
-      (s/enqueue-task store task)
-
-      (testing "dequeueing updates state"
-        (is (match? (to-map (assoc task :state :pending))
-                    (to-map (s/dequeue-task store)))))))
-
-  (testing "enqueue/dequeue with lease"
-    (let [store (s/make-store)
-          task  (tu/make-workflow-task)]
-      (s/enqueue-task store task)
-
-      (testing "dequeueing with lease"
-        (is (match? (to-map (assoc task :state :pending))
-                    (to-map (s/dequeue-task store {:lease-ms 100}))))
-        ;; wait for expire
-        #?(:clj
-           (do
-             @(p/delay 1000)
-             (is (match?
-                   (to-map (assoc task :state :pending))
-                   (to-map (s/dequeue-task store)))))
-
-           :cljs
-           (t/async done
-             (p/finally (p/delay 1000)
-                        (fn [_ c]
-                          (t/is (nil? c))
-                          (is (match?
-                                (to-map (assoc task :state :pending))
-                                (to-map (s/dequeue-task store))))
-                          (done))))))))
-
-  (testing "await task"
-    (let [store (s/make-store)
-          task  (tu/make-workflow-task)
-          prom  (p/vthread
-                  (s/await-task store (:id task) {:timeout-ms 1000}))]
-
-      (is-promise-ok prom)))
-
-  (testing "watch task"
-    (let [store   (s/make-store)
-          task    (tu/make-workflow-task)
-          evt     {:ref "some-ref" :root "some-root" :type :intemporal.workflow/invoke :sym 'identity :args []}
-          called? (p/deferred)]
-
-      (is-promise-ok (p/timeout called? 1000))
-      ;; if the watch doesnt happen, the test times out
-      (s/watch-task store (:id task) #(p/resolve! called? %))
-      (s/enqueue-task store task)
-
-      (testing "apply fn event"
-        (s/task<-event store (:id task) evt)
-
-        (testing "task state updated"
-          (let [db-task (s/find-task store (:id task))]
-            (is (= (dissoc db-task :id)
-                   {:type    :workflow
-                    :owner   "intemporal"
-                    :ref     "some-ref"
-                    :root    "some-root"
-                    :sym     'identity
-                    :fvar    #'clojure.core/identity
-                    :args    []
-                    :result  nil
-                    :state   :pending
-                    :order   1
-                    :runtime {}}))))))))
-
diff --git a/test2/intemporal/stores/basic_test.clj b/test2/intemporal/stores/basic_test.clj
deleted file mode 100644
index 5b2ee44..0000000
--- a/test2/intemporal/stores/basic_test.clj
+++ /dev/null
@@ -1,204 +0,0 @@
-(ns ^:integration ^:fdb ^:sql intemporal.stores.basic-test
-  (:require [clojure.test :as t :refer [deftest is testing]]
-            [intemporal.store :as store]
-            [intemporal.store.foundationdb :as fdb]
-            [intemporal.store.jdbc :as jdbc]
-            [intemporal.test-utils :as tu]
-            [intemporal.workflow.internal :as internal]
-            [intemporal.matchers :refer [nilable?]]
-            [matcher-combinators.test :refer [match?]]))
-
-(t/use-fixtures :once tu/with-trace-logging)
-
-(def stores (delay {:memory   (store/make-store)
-                    :fdb      (fdb/make-store {:cluster-file-path "docker/fdb.cluster"})
-                    :postgres (jdbc/make-store {:jdbcUrl       "jdbc:postgresql://localhost:5432/root?user=root&password=root"
-                                                :migration-dir "migrations/postgres"})}))
-
-(deftest basic-test
-  (doseq [[label store] @stores]
-    (testing (format "store: %s" label)
-      (let [evt {:ref  "some-ref" :root "some-root"
-                 :type :intemporal.activity/invoke
-                 :sym  'clojure.core/+
-                 :args [1 "a"]}]
-
-        (testing "clear"
-          (store/clear-events store)
-          (store/clear-tasks store))
-
-        (testing "event store"
-          (testing "event save"
-
-            (let [t1 (internal/create-workflow-task nil nil 'clojure.core/+ (var-get #'+) nil "1")
-                  t1 (store/enqueue-task store t1)
-
-                  t2 (internal/create-workflow-task (:id t1) (:id t1) 'clojure.core/+ (var-get #'+) nil "2")
-                  t2 (store/enqueue-task store t2)
-
-                  ev (store/save-event store 1 (assoc evt :ref (:id t2) :root (:id t2)))]
-
-              (testing "events list"
-                (is (match? [(assoc evt
-                               :id (:id ev)
-                               :root (:id t2)
-                               :ref (:id t2)
-                               :result nilable?)] (store/list-events store))))
-
-              (testing "events clear"
-                (store/clear-events store)
-                (is (empty? (store/list-events store)))))))
-
-        (testing "task store"
-          (store/clear-events store)
-          (store/clear-tasks store)
-          (let [task (internal/create-workflow-task "self" "self" 'clojure.core/+ (var-get #'+) ["invoke" 333]
-                                                    "self" nil :new
-                                                    nil)]
-
-            (testing "enqueue task"
-              (is (=
-                    (assoc task :owner store/default-owner)
-                    (-> (store/enqueue-task store task)
-                        (dissoc :order)))))
-
-            (testing "list tasks"
-              (is (match? [(dissoc task :fvar)]
-                          (store/list-tasks store))))
-
-            (testing "dequeue tasks"
-              (is (match? {:args      ["invoke" 333],
-                           :ref       "self",
-                           :root      "self",
-                           :type      :workflow,
-                           :state     :pending,
-                           :result    nil,
-                           :id        string?,
-                           :sym       'clojure.core/+,
-                           :fvar      #(or (fn? %) (var? %))
-                           :lease-end nil}
-                          (store/dequeue-task store))))
-
-            (testing "matching task"
-              (is (nil? (store/find-task store "")))
-              (is (match? (-> task
-                              (assoc :state :pending)
-                              (dissoc :fvar))
-                          (store/find-task store (:id task)))))
-
-            (testing "reenqueue pending"
-              (let [args (atom nil)
-                    cb   (fn [t] (reset! args t))]
-                (store/reenqueue-pending-tasks store cb)
-
-                (testing "callback"
-                  (is (match? (-> task
-                                  (assoc :state :pending)
-                                  (dissoc :fvar))
-                              @args)))
-
-                (testing "result"
-                  (is (match? [{:args   ["invoke" 333],
-                                :ref    "self",
-                                :root   "self",
-                                :type   :workflow,
-                                :state  :new,
-                                :result nil,
-                                :id     string?,
-                                :sym    'clojure.core/+}]
-                              (store/list-tasks store))))))
-
-            (testing "task event handling"
-              ;; move to pending
-              (store/dequeue-task store)
-
-              (let [[db-task] (store/list-tasks store)]
-
-                (testing "invoke"
-                  (let [ev-descr {:ref "self" :root "self" :type :intemporal.workflow/invoke :sym 'clojure.core/+ :args ["invoke" 333]}
-                        ev       (store/task<-event store (:id db-task) ev-descr)
-                        [task] (store/list-tasks store)]
-                    (is (match? {:ref  "self"
-                                 :root "self"
-                                 :type :intemporal.workflow/invoke
-                                 :sym  'clojure.core/+
-                                 :args ["invoke" 333]}
-                                ev))
-                    (is (match? {:args   ["invoke" 333],
-                                 :ref    "self",
-                                 :root   "self",
-                                 :type   :workflow,
-                                 :state  :pending,
-                                 :sym    'clojure.core/+,
-                                 :result nil,
-                                 :id     string?,}
-                                task)))
-
-                  (testing "ok"
-                    (let [ev-descr {:ref "self" :root "self" :type :intemporal.workflow/success :sym 'clojure.core/+ :result ["result"]}
-                          ev       (store/task<-event store (:id db-task) ev-descr)
-                          [task] (store/list-tasks store)]
-                      (is (match? {:ref    "self"
-                                   :root   "self"
-                                   :type   :intemporal.workflow/success
-                                   :sym    'clojure.core/+
-                                   :result ["result"]}
-                                  ev))
-                      (is (match? {:args   ["invoke" 333],
-                                   :ref    "self",
-                                   :root   "self",
-                                   :type   :workflow,
-                                   :state  :success,
-                                   :sym    'clojure.core/+,
-                                   :result ["result"],
-                                   :id     string?}
-                                  task)))))
-
-                ;; TODO need to reenqueue another task
-                #_(testing "error"
-                    (let [ex       {:some "exception" :data false}
-                          ev-descr {:ref "self" :root "self" :type :intemporal.workflow/failure :sym 'clojure.core/+ :error ex}
-                          ev       (store/task<-event store (:id db-task) ev-descr)
-                          [task] (store/list-tasks store)]
-
-                      (is (match? {:ref   "self"
-                                   :root  "self"
-                                   :type  :intemporal.workflow/failure
-                                   :sym   'clojure.core/+
-                                   :error ex}
-                                  ev))
-
-                      (is (match? {:args   ["invoke" 333],
-                                   :ref    "self",
-                                   :root   "self",
-                                   :type   :workflow,
-                                   :state  :failure,
-                                   :sym    'clojure.core/+,
-                                   :result ex
-                                   :id     string?}
-                                  task))))))))
-
-        (testing "task await+watch"
-          (let [task    (internal/create-workflow-task "self" "self" 'clojure.core/- (var-get #'-) ["invoke" 333] "4")
-                task-id (:id task)
-                storage (atom nil)]
-
-            (store/enqueue-task store task)
-            (store/watch-task store task-id (fn [t] (reset! storage t)))
-            (store/dequeue-task store)
-
-            ;; wait a bit so watchers can fire
-            (Thread/sleep 3000)
-            (is (match? {:args   ["invoke" 333]
-                         :ref    "self"
-                         :root   "self"
-                         :type   :workflow
-                         :state  :pending
-                         :sym    'clojure.core/-
-                         :result nil
-                         :id     string?}
-                        @storage))))))))
-
-(comment
-  (clojure.test/run-tests *ns*))
-
diff --git a/test2/intemporal/stores/basic_workflow_test.clj b/test2/intemporal/stores/basic_workflow_test.clj
deleted file mode 100644
index f5e2615..0000000
--- a/test2/intemporal/stores/basic_workflow_test.clj
+++ /dev/null
@@ -1,47 +0,0 @@
-(ns ^:integration ^:fdb ^:sql intemporal.stores.basic-workflow-test
-  (:require [clojure.test :as t :refer [deftest is testing]]
-            [intemporal.store :as store]
-            [intemporal.store.foundationdb :as fdb]
-            [intemporal.store.jdbc :as jdbc]
-            [intemporal.workflow :as w]
-            [intemporal.macros :as macros :refer [stub-protocol defn-workflow]]
-            [intemporal.test-utils :as tu]))
-
-(t/use-fixtures :once tu/with-trace-logging)
-
-(defprotocol ProtocolActivity
-  (some-io [this val]))
-
-(def example-impl
-  (reify
-    ProtocolActivity
-    (some-io [_ val] val)))
-
-;;;;
-;; workflow registration
-
-(defn-workflow run-workflow []
-  (let [stub (stub-protocol ProtocolActivity)]
-    (some-io stub :ok)))
-
-(def stores (delay {:memory   (store/make-store)
-                    :fdb      (fdb/make-store {:cluster-file-path "docker/fdb.cluster"})
-                    :postgres (jdbc/make-store {:jdbcUrl       "jdbc:postgresql://localhost:5432/root?user=root&password=root"
-                                                :migration-dir "migrations/postgres"
-                                                :polling-ms 10})}))
-
-(deftest basic-workflow-test
-  (doseq [[label store] @stores]
-    (testing (format "store: %s" label)
-
-      (testing "running a workflow"
-        (store/clear-events store)
-        (store/clear-tasks store)
-
-        (let [ex (w/start-poller! store {:protocols {`ProtocolActivity example-impl}
-                                         :polling-ms 10})]
-          (try
-            (is (= :ok (w/with-env {:store store}
-                         (run-workflow))))
-            (finally
-              (w/shutdown ex 0))))))))
diff --git a/test2/intemporal/stores/lots_of_workflows_test.clj b/test2/intemporal/stores/lots_of_workflows_test.clj
deleted file mode 100644
index bb1f98d..0000000
--- a/test2/intemporal/stores/lots_of_workflows_test.clj
+++ /dev/null
@@ -1,71 +0,0 @@
-(ns ^:integration ^:fdb ^:sql intemporal.stores.lots-of-workflows-test
-  (:require [clojure.test :refer [deftest is testing]]
-            [intemporal.store :as store]
-            [intemporal.store.foundationdb :as fdb]
-            [intemporal.store.jdbc :as jdbc]
-            [intemporal.workflow :as w]
-            [intemporal.macros :refer [stub-protocol defn-workflow]]
-            [intemporal.test-utils :as tu :refer [wait]]
-            [matcher-combinators.test :refer [match?]]
-            [promesa.core :as p]))
-
-(defprotocol MyActivities
-  (foo [this a]))
-
-(defrecord MyActivitiesImpl []
-  MyActivities
-  (foo [this a]
-    (Thread/sleep (long (rand-int 100)))
-    [:proto a]))
-
-(defn-workflow my-workflow []
-  (let [pr  (stub-protocol MyActivities {})
-        prr (foo pr :pr)]
-    prr))
-
-(def iterations 100)
-
-(def stores (delay {:memory   (store/make-store)
-                    :fdb      (fdb/make-store {:cluster-file-path "docker/fdb.cluster"})
-                    :postgres (jdbc/make-store {:jdbcUrl       "jdbc:postgresql://localhost:5432/root?user=root&password=root"
-                                                :migration-dir "migrations/postgres"
-                                                :maximum-pool-size 20})}))
-
-
-(deftest lots-of-workflows-test
-  (with-redefs [tu/wait-default-timeout 10000]
-    (doseq [[label store] @stores]
-      (testing (format "store: %s" label)
-
-        (store/clear-events store)
-        (store/clear-tasks store)
-
-        (testing "multiple iterations"
-          (dotimes [_ iterations]
-            (p/vthread
-              (w/with-env {:store store}
-                ;; workflows are blocking, we wrap in a virtual thread
-                (my-workflow))))
-
-          ;; check that all tasks are enqueued
-          (wait (= iterations (count (store/list-tasks store)))
-            (let [wflows (store/list-tasks store)]
-              (testing "workflows are all new"
-                (is (= iterations (count wflows)))
-                (is (= #{:new} (set (map :state wflows))))))))
-
-        (testing "enqueue all jobs"
-          (let [ex (w/start-poller! store {:protocols {`MyActivities (->MyActivitiesImpl)}
-                                           :polling-ms 100})]
-            ;; lets wait for all pending
-            (try
-              (wait (not (contains? (into #{} (map :state (store/list-tasks store))) :new))
-                (w/shutdown ex 10000))
-
-              (testing "workflows are all completed"
-                (let [tasks (store/list-tasks store)]
-                  (is (= (* 2 iterations) (count tasks)))
-                  (is (match? {:success (* 2 iterations)}
-                              (frequencies (map :state tasks))))))
-              (finally
-                (w/shutdown ex 0)))))))))
diff --git a/test2/intemporal/stores/multiple_shutdown_test.clj b/test2/intemporal/stores/multiple_shutdown_test.clj
deleted file mode 100644
index 03144f6..0000000
--- a/test2/intemporal/stores/multiple_shutdown_test.clj
+++ /dev/null
@@ -1,71 +0,0 @@
-(ns intemporal.stores.multiple-shutdown-test
-  (:require [clojure.test :as t :refer [deftest is testing]]
-            [intemporal.store :as store]
-            [intemporal.store.foundationdb :as fdb]
-            [intemporal.store.jdbc :as jdbc]
-            [intemporal.workflow :as w]
-            [intemporal.macros :refer [stub-protocol defn-workflow]]
-            [intemporal.test-utils :as tu]
-            [matcher-combinators.test :refer [match?]]
-            [matcher-combinators.matchers :as m]))
-
-(t/use-fixtures :once tu/with-trace-logging)
-
-(def stores (delay {:memory   (store/make-store)
-                    :fdb      (fdb/make-store {:cluster-file-path "docker/fdb.cluster"})
-                    :postgres (jdbc/make-store {:jdbcUrl       "jdbc:postgresql://localhost:5432/root?user=root&password=root"
-                                                :migration-dir "migrations/postgres"})}))
-
-(def activity-counter (atom 0))
-
-(defprotocol MyActivities
-  (sleep [this time]))
-
-(defrecord MyActivitiesImpl []
-  MyActivities
-  (sleep [this a]
-    (swap! activity-counter inc)
-    (Thread/sleep (long a))))
-
-(defn-workflow my-workflow [steps max-sleep]
-  (let [stub (stub-protocol MyActivities {})]
-    (dotimes [i steps]
-      (sleep stub max-sleep))
-    :done))
-
-;;;; test proper
-
-(deftest executor-shutdown-test
-  (testing "workflow eventually finishes"
-    (let [store          (store/make-store {})
-          polling-ms     500
-          make-poller    (fn [] (w/start-poller! store {:protocols  {`MyActivities (->MyActivitiesImpl)}
-                                                        :polling-ms polling-ms}))
-          executor       (atom (make-poller))
-
-          steps          2
-          max-timeout    500
-
-          workflow-id    "f100ded0-0000-4000-a000-000000000000"
-          future-res     (future
-                           (w/with-env {:store store
-                                        :id    workflow-id}
-                                       (my-workflow steps max-timeout)))
-          reenqueued     (atom [])]
-
-      (testing "shutdown of ongoing workflow"
-        (add-watch activity-counter ::watch (fn [_ _ _ v]
-                                              (when (and (zero? (mod v 2))
-                                                         (empty? @reenqueued))
-                                                (w/shutdown @executor max-timeout)
-                                                (store/reenqueue-pending-tasks store #(swap! reenqueued conj %))
-                                                (reset! executor (make-poller)))))
-        (try
-          (is (= :done (deref future-res 10000 ::error)))
-
-          (finally
-            (testing "workflow was re-enqueued"
-              (is (match? (m/embeds [{:type :workflow :sym 'intemporal.stores.multiple-shutdown-test/my-workflow-}])
-                          @reenqueued)))
-            (w/shutdown @executor 0)
-            (tu/print-tables store)))))))
diff --git a/test2/intemporal/stores/release_reenqueue_test.clj b/test2/intemporal/stores/release_reenqueue_test.clj
deleted file mode 100644
index a41c32d..0000000
--- a/test2/intemporal/stores/release_reenqueue_test.clj
+++ /dev/null
@@ -1,79 +0,0 @@
-(ns ^:integration ^:fdb ^:sql intemporal.stores.release-reenqueue-test
-  (:require [clojure.test :as t :refer [deftest is testing]]
-            [intemporal.store :as store]
-            [intemporal.store.foundationdb :as fdb]
-            [intemporal.store.jdbc :as jdbc]
-            [intemporal.workflow :as w]
-            [intemporal.macros :refer [stub-protocol defn-workflow]]
-            [intemporal.test-utils :as tu :refer [with-result]])
-  (:import (java.util.concurrent Phaser)))
-
-(t/use-fixtures :once tu/with-trace-logging)
-
-(def stores (delay {:memory   (store/make-store)
-                    :fdb      (fdb/make-store {:cluster-file-path "docker/fdb.cluster"})
-                    :postgres (jdbc/make-store {:jdbcUrl       "jdbc:postgresql://localhost:5432/root?user=root&password=root"
-                                                :migration-dir "migrations/postgres"})}))
-
-(def activity-invoked? (Phaser. 1))
-(def executor-shutdown? (Phaser. 1))
-
-(defprotocol MyActivities
-  (foo [this a]))
-
-(defrecord MyActivitiesImpl []
-  MyActivities
-  (foo [this a]
-    (.arrive activity-invoked?)
-    (.awaitAdvance executor-shutdown? (.getPhase executor-shutdown?))
-    :foo))
-
-(defn-workflow my-workflow [k]
-  (let [stub (stub-protocol MyActivities {})
-        prr  (foo stub :pr)]
-    prr))
-
-;;;; test proper
-
-(deftest release-reenqueue-test
-  (doseq [[label store] @stores]
-
-    (store/clear-events store)
-    (store/clear-tasks store)
-
-    (testing (format "store: %s" label)
-      (let [executor (w/start-poller! store {:protocols  {`MyActivities (->MyActivitiesImpl)}
-                                             :polling-ms 100})]
-
-        (testing "shutdown of ongoing workflow"
-          ;; give it some time so the poller can pick it up but just once
-          (let [fut (future
-                      ;; ensure activity is inflight
-                      (.awaitAdvance activity-invoked? (.getPhase activity-invoked?))
-                      (w/shutdown executor 0)
-                      ;; proceed activity, it will fail
-                      (.arrive executor-shutdown?)
-                      :done)]
-
-            (with-result [res (w/with-env {:store store}
-                                (my-workflow :ok))]
-
-              (is (instance? Exception res)))
-
-            (is (= :done (deref fut 1000 ::error)))))
-
-        (testing "Tasks are pending"
-          (let [[task] (store/list-tasks store)]
-            (tu/print-tables store)
-            (is (= :pending (:state task)))))
-
-        (testing "Tasks are released"
-          (store/release-pending-tasks store)
-          (let [[task] (store/list-tasks store)]
-            (is (nil? (:owner task)))))
-
-        (testing "Tasks are reenqueued"
-          (store/reenqueue-pending-tasks store (constantly nil))
-          (let [[task] (store/list-tasks store)]
-            (is (= store/default-owner (:owner task)))
-            (is (= :new (:state task)))))))))
diff --git a/test2/intemporal/stores/saga_test.clj b/test2/intemporal/stores/saga_test.clj
deleted file mode 100644
index 03b95d0..0000000
--- a/test2/intemporal/stores/saga_test.clj
+++ /dev/null
@@ -1,77 +0,0 @@
-(ns ^:integration ^:fdb ^:sql intemporal.stores.saga-test
-  (:require [clojure.test :as t :refer [deftest is testing]]
-            [intemporal.store :as store]
-            [intemporal.store.foundationdb :as fdb]
-            [intemporal.store.jdbc :as jdbc]
-            [intemporal.workflow :as w]
-            [intemporal.macros :as macros :refer [stub-protocol defn-workflow with-failure]]
-            [intemporal.test-utils :as tu]
-            [spy.core :as spy]
-            [spy.protocol :as pspy]))
-
-(t/use-fixtures :once tu/with-trace-logging)
-
-(defprotocol ProtocolActivity
-  (some-io [this val])
-  (some-failing-io [this val])
-  (finalize [this val])
-  (compensate [this val]))
-
-(def example-impl
-  (reify
-    ProtocolActivity
-    (some-io [_ val] val)
-    (some-failing-io [_ val] (throw (RuntimeException. "forced")))
-    (finalize [_ val] val)
-    (compensate [_ val] {:compensate val})))
-
-;;;;
-;; workflow registration
-
-(defn-workflow run-workflow []
-  (let [stub (stub-protocol ProtocolActivity)]
-    (try
-      (let [v  (with-failure [r (some-io stub "initial")]
-                 (compensate stub r))
-            v2 (with-failure [r (some-failing-io stub v)]
-                 (compensate stub r))
-            v3 (finalize stub v2)]
-        v3)
-      (catch Exception e
-        (w/compensate)
-        ::failed))))
-
-(def stores (delay {:memory   (store/make-store)
-                    :fdb      (fdb/make-store {:cluster-file-path "docker/fdb.cluster"})
-                    :postgres (jdbc/make-store {:jdbcUrl       "jdbc:postgresql://localhost:5432/root?user=root&password=root"
-                                                :migration-dir "migrations/postgres"})}))
-
-(deftest saga-test
-  (doseq [[label store] @stores]
-    (testing (format "store: %s" label)
-
-      (testing "running a workflow"
-        (store/clear-events store)
-        (store/clear-tasks store)
-
-        (let [spied-impl  (pspy/spy ProtocolActivity example-impl)
-              stop-worker (w/start-worker! store {:protocols {`ProtocolActivity spied-impl}})]
-          (try
-            (testing "workflow run"
-              (is (= ::failed (w/with-env {:store store}
-                                (run-workflow)))))
-
-            (testing "protocol calls"
-              (let [{:keys [some-io some-failing-io finalize compensate]} (meta spied-impl)]
-                (is (spy/called-once-with? some-io spied-impl "initial"))
-                (is (spy/called-once-with? some-failing-io spied-impl "initial"))
-                (is (not (spy/called-once? finalize)))
-
-                (testing "compensation calls in reverse order"
-                  (let [calls (spy/calls compensate)]
-                    (is (= [[spied-impl :intemporal.activity/failure]
-                            [spied-impl "initial"]]
-                           calls))))))
-
-            (finally
-              (stop-worker))))))))
diff --git a/test2/intemporal/test_executor.clj b/test2/intemporal/test_executor.clj
deleted file mode 100644
index 31a38c7..0000000
--- a/test2/intemporal/test_executor.clj
+++ /dev/null
@@ -1,42 +0,0 @@
-(ns intemporal.test-executor
-  (:require [intemporal.workflow :as w]
-            [taoensso.telemere :as t])
-  (:import (java.lang AutoCloseable)
-           (java.util.concurrent Executors TimeUnit)))
-
-(defn make-test-executor [shutdown-fn? terminated-fn?]
-  (let [factory (-> (Thread/ofVirtual)
-                    (.name "Task Thread")
-                    (.factory))
-        exec    (Executors/newThreadPerTaskExecutor factory)
-        shutdown? (atom false)
-        terminated? (atom false)]
-    (reify
-      w/ITaskExecutor
-      (submit [_ f]
-        (.submit exec ^Runnable f))
-      (shutdown [_ grace-period-ms]
-        (try
-          ;; reject tasks
-          (.shutdown exec)
-          (when (ifn? shutdown-fn?)
-            (shutdown-fn?))
-          (reset! shutdown? true)
-          (t/log! {:level :debug} ["Executor shutdown"])
-          ;; await ongoing tasks
-          (when-not (.awaitTermination exec grace-period-ms TimeUnit/MILLISECONDS)
-            (t/log! {:level :debug} ["Executor shutdown grace period over, shutting down NOW"])
-            (.shutdownNow exec))
-          ;; in case we got interrupted exception, make sure to set the flag
-          ;; so ongoing ops fail
-          (finally
-            (when (ifn? terminated-fn?)
-              (terminated-fn?))
-            (reset! terminated? true))))
-      (terminated? [_]
-        @terminated?)
-      (shutting-down? [_]
-        @shutdown?)
-      AutoCloseable
-      (close [this]
-        (w/shutdown this 0)))))
\ No newline at end of file
diff --git a/test2/intemporal/test_utils.cljc b/test2/intemporal/test_utils.cljc
deleted file mode 100644
index 3c6f3c4..0000000
--- a/test2/intemporal/test_utils.cljc
+++ /dev/null
@@ -1,179 +0,0 @@
-(ns intemporal.test-utils
-  #?(:cljs (:require [intemporal.store :as store]
-                     [intemporal.workflow.internal :as in]
-                     [promesa.core :as p]
-                     [taoensso.telemere :as telemere]
-                     [taoensso.telemere.utils :as tutils]
-                     [cljs.test :as t]
-                     [cljs.pprint :as pprint]))
-  #?(:clj (:require [intemporal.store :as store]
-                    [intemporal.workflow.internal :as in]
-                    [promesa.core :as p]
-                    [taoensso.telemere :as telemere]
-                    [taoensso.telemere.utils :as tutils]
-                    [taoensso.telemere.open-telemetry :as tot]
-                    [net.cgrand.macrovich :as macros]
-                    [clojure.pprint :as pprint]))
-  #?(:cljs (:require-macros [net.cgrand.macrovich :as macros]
-                            [intemporal.test-utils :refer [with-result wait]]))
-  #?(:clj (:import [java.util.concurrent TimeoutException])))
-
-;;;;
-;; helpers
-
-(defn now []
-  #?(:clj  (System/currentTimeMillis)
-     :cljs (.getTime (js/Date.))))
-
-(defn- make-task [& {:keys [proto type id ref root sym fvar args result state]
-                     :or   {proto  nil
-                            type   :workflow
-                            id     (in/random-id)
-                            ref    "some-ref"
-                            root   "some-root"
-                            sym    'identity
-                            fvar   #'identity
-                            args   []
-                            result nil
-                            state  :new}}]
-  (cond
-    (= type :workflow)
-    (in/create-workflow-task ref root sym fvar args id result state nil)
-    (= type :activity)
-    (in/create-activity-task ref root sym fvar args id result state nil)
-    (= type :proto-activity)
-    (in/create-proto-activity-task proto ref root sym fvar args id result state nil)
-    :else (throw (ex-info (str "Unknown task type:" type) {:type type}))))
-
-(defn make-workflow-task [& {:keys [] :as args}]
-  (make-task (assoc args :type :workflow)))
-
-(comment "unused"
-  (defn make-activity-task [& {:keys [] :as args}]
-    (make-task (assoc args :type :activity)))
-
-  (defn make-protocol-task [& {:keys [] :as args}]
-    (make-task (assoc args :type :proto-activity))))
-
-;;;;
-;; misc
-
-(defn print-tables
-  "Prints the task and events tables to sysout via pprint"
-  [store]
-  (let [tasks  (store/list-tasks store)
-        events (->> (store/list-events store)
-                    (sort-by :id))]
-    (locking *out*
-      (print "==================== TASKS")
-      (pprint/print-table tasks)
-      (println "\n==================== EVENTS")
-      (pprint/print-table events)
-      (flush))))
-
-(defn wait-for-task
-  "Waits for the task with given id to reach terminal state"
-  ;; only works in clj, should probably take a body and be a macro
-  ([store id]
-   (wait-for-task store id {:timeout 10000 :sleep-ms 100}))
-  ([store id {:keys [timeout sleep-ms]}]
-   (let [start (now)]
-     #_:clj-kondo/ignore
-     @(p/loop [task (store/find-task store id)]
-        (when (not (#{:failure :success} (:state task)))
-          (let [elapsed (- (now) start)]
-            (when (> elapsed timeout)
-              (throw (ex-info (str "More than " timeout " ms (" elapsed " ms) elapsed while waiting for task " id " to finish") {:task task})))
-            (p/then (p/delay sleep-ms id)
-                    (fn [_] (p/recur (store/find-task store id))))))))))
-
-;;;;
-;; macros
-
-(def ^:dynamic with-result-default-timeout 10000)
-(def ^:dynamic wait-default-timeout 3000)
-
-(defmacro with-result
-  "Promise-aware macro: the result can either be a value or a thrown exception.
-  Waits for result for 10 secs, then times out
-  Doesn't really work for exceptions returned as values
-  ```
-  (with-result [r (my-worfklow 1)]
-    (is (instance? Exception r))
-    (is (= 1 2)))
-  ```
-  "
-  [bindings & body]
-  (assert (vector? bindings) "First argument should be a binding of [res resbody]")
-  (let [[res resbody] bindings]
-    (macros/case
-      :clj
-      `(let [~res (let [future# (future (do ~resbody))]
-                    (try
-                      (deref future# with-result-default-timeout (TimeoutException. "Operation timed out."))
-                      (catch Exception e# e#)))]
-         ~@body)
-      :cljs
-      `(t/async done#
-         (js/setTimeout
-           (fn []
-             ;; force wrap resbody in a deferred
-             (p/finally (-> nil
-                            (p/then (fn [_#]
-                                      (do ~resbody)))
-                            (p/timeout with-result-default-timeout))
-                        (fn [res# err#]
-                          (try
-                            (let [~res (or res# err#)]
-                              (do ~@body))
-                            (finally
-                              (done#)))))
-             0))))))
-
-(defmacro wait
-  "Waits for 10 secs until the result is true, or throws;
-  In `clj` it polls every 100ms
-  In `cljs` it continuously loops
-  ```
-  (wait (db/find id)
-    (is (= 1 1))
-  ```
-  "
-  [condition & body]
-  (macros/case
-    :clj
-    `(let [timeout# wait-default-timeout
-           start# (System/currentTimeMillis)]
-       (loop []
-         (if ~condition
-           (do ~@body)
-           (if (> (- (System/currentTimeMillis) start#) timeout#)
-             (throw (ex-info "Timed out" {:timeout timeout#}))
-             (do (Thread/sleep 100)
-                 (recur))))))
-
-    :cljs
-    `(let [timeout# 3000
-           start# (.getTime (js/Date.))]
-       (loop []
-         (if ~condition
-           (do ~@body)
-           (if (> (- (.getTime (js/Date.)) start#) timeout#)
-             (throw (ex-info "Timed out" {:timeout timeout#}))
-             ;; Note: In CLJS this is a "busy wait" loop
-             (recur)))))))
-
-(defn setup-telemere []
-  ;#?(:clj (clojure.pprint/pprint (telemere/check-interop)))
-  (telemere/set-min-level! :info)
-  (telemere/remove-handler! ::custom)
-  #?(:clj (telemere/add-handler! :default/open-telemetry (tot/handler:open-telemetry)))
-  (telemere/add-handler! ::custom
-                  (telemere/handler:console
-                    {:output-fn
-                     (tutils/format-signal-fn
-                       {:content-fn (taoensso.telemere.utils/signal-content-fn {:incl-keys #{:thread}})})})))
-
-(def with-trace-logging
-  #?(:cljs {:before setup-telemere}
-     :clj  (fn with-trace-logging [f] (setup-telemere) (f))))
diff --git a/test2/intemporal/vthread-recovery.edn b/test2/intemporal/vthread-recovery.edn
deleted file mode 100644
index e66e892..0000000
--- a/test2/intemporal/vthread-recovery.edn
+++ /dev/null
@@ -1,62 +0,0 @@
-{:tasks    {"silly-mcclintock" {:args      [],
-                                :ref       nil,
-                                :type      :workflow,
-                                :state     :pending,
-                                :sym       intemporal.vthread-recovery-test/my-workflow-,
-                                :root      nil,
-                                :result    nil,
-                                :id        "silly-mcclintock",
-                                :owner     "intemporal",
-                                :order     1,
-                                :lease-end nil}},
- :history  {"silly-mcclintock"   [{:ref    "silly-mcclintock",
-                                   :root   "silly-mcclintock",
-                                   :type   :intemporal.workflow/invoke,
-                                   :sym    intemporal.vthread-recovery-test/my-workflow-,
-                                   :args   [],
-                                   :error  nil,
-                                   :result nil,
-                                   :id     17}
-                                  #_{:ref    "silly-mcclintock",
-                                     :root   "silly-mcclintock",
-                                     :type   :intemporal.workflow/success,
-                                     :sym    intemporal.vthread-recovery-test/my-workflow-,
-                                     :args   nil,
-                                     :error  nil,
-                                     :result [0 1 2 3 4 5 6 7 8 9],
-                                     :id     38}],
-            "silly-mcclintock-1" [{:ref    "silly-mcclintock-1",
-                                   :root   "silly-mcclintock",
-                                   :type   :intemporal.protocol/invoke,
-                                   :sym    intemporal.vthread-recovery-test/with-thread,
-                                   :args   [0],
-                                   :error  nil,
-                                   :result nil,
-                                   :id     18}
-                                  {:ref    "silly-mcclintock-1",
-                                   :root   "silly-mcclintock",
-                                   :type   :intemporal.protocol/success,
-                                   :sym    intemporal.vthread-recovery-test/with-thread,
-                                   :args   nil,
-                                   :error  nil,
-                                   :result 0,
-                                   :id     28}],
-            "silly-mcclintock-2" [{:ref    "silly-mcclintock-2",
-                                   :root   "silly-mcclintock",
-                                   :type   :intemporal.protocol/invoke,
-                                   :sym    intemporal.vthread-recovery-test/with-thread,
-                                   :args   [1],
-                                   :error  nil,
-                                   :result nil,
-                                   :id     19}
-                                  {:ref    "silly-mcclintock-2",
-                                   :root   "silly-mcclintock",
-                                   :type   :intemporal.protocol/success,
-                                   :sym    intemporal.vthread-recovery-test/with-thread,
-                                   :args   nil,
-                                   :error  nil,
-                                   :result 1,
-                                   :id     29}]}
- :counter  38,
- :pcounter 2,
- :ecounter 0}
diff --git a/test2/intemporal/vthread_recovery_test.clj b/test2/intemporal/vthread_recovery_test.clj
deleted file mode 100644
index bbb6c23..0000000
--- a/test2/intemporal/vthread_recovery_test.clj
+++ /dev/null
@@ -1,57 +0,0 @@
-(ns intemporal.vthread-recovery-test
-  (:require [clojure.java.io :as io]
-            [clojure.test :as t :refer [deftest is testing]]
-            [intemporal.store :as store]
-            [intemporal.workflow :as w]
-            [intemporal.macros :refer [stub-protocol vthread defn-workflow]]
-            [intemporal.test-utils :as tu]
-            [promesa.core :as p]))
-
-;;;;
-;; demo - recovery of a crashed process
-
-(t/use-fixtures :once tu/with-trace-logging)
-
-(defprotocol ThreadActivity
-  (with-thread [this id]))
-
-(defrecord ThreadActivityImpl []
-  ThreadActivity
-  (with-thread [this id]
-    (Thread/sleep 200)
-    id))
-
-(def nthreads 10)
-
-(defn-workflow my-workflow []
-  (let [pr    (stub-protocol ThreadActivity {})
-        proms (for [i (range nthreads)]
-                (vthread
-                  (with-thread pr i)))]
-    ;; at this point, all of `with-thread` calls are queued, so
-    ;; this code is deterministic up to here
-    @(p/all proms)))
-
-(deftest vthread-recovery-test
-  ;; make a backup of the db to allow replay
-  (io/copy (io/file "./test/intemporal/vthread-recovery.edn")
-           (io/file "/tmp/intemporal-vthread-recovery.edn"))
-  (let [mstore (store/make-store {:file "/tmp/intemporal-vthread-recovery.edn"})
-        ex     (w/start-poller! mstore {:protocols {`ThreadActivity (->ThreadActivityImpl)}})]
-
-    (store/reenqueue-pending-tasks mstore println)
-
-    (let [[task] (store/list-tasks mstore)]
-      (tu/wait-for-task mstore (:id task))
-      (tu/print-tables mstore))
-
-    (testing "linear history"
-      (testing "stored events"
-        (let [evts (store/list-events mstore)
-              evts (sort-by :id evts)]
-
-          (testing "workflow has result"
-            (is (= (into [] (range nthreads))
-                   (-> evts last :result)))))))
-
-    (w/shutdown ex 1000)))
diff --git a/test2/intemporal/vthread_test.cljc b/test2/intemporal/vthread_test.cljc
deleted file mode 100644
index 7d751c1..0000000
--- a/test2/intemporal/vthread_test.cljc
+++ /dev/null
@@ -1,95 +0,0 @@
-(ns intemporal.vthread-test
-  #?(:cljs (:require [cljs.test :as t :refer-macros [deftest is testing]]
-                     [cljs.pprint :as pprint]
-                     [intemporal.store :as store]
-                     [intemporal.workflow :as w]
-                     [intemporal.test-utils :as tu]
-                     [promesa.core :as p])
-     :clj  (:require [clojure.test :as t :refer [deftest is testing]]
-                     [clojure.pprint :as pprint]
-                     [intemporal.store :as store]
-                     [intemporal.workflow :as w]
-                     [intemporal.test-utils :as tu]
-                     [promesa.core :as p]))
-  #?(:cljs (:require-macros [intemporal.macros :refer [stub-protocol defn-workflow vthread]]
-                            [intemporal.test-utils :refer [with-result]])
-     :clj  (:require [intemporal.macros :refer [stub-protocol defn-workflow vthread]]
-                     [intemporal.test-utils :refer [with-result]])))
-
-(t/use-fixtures :once tu/with-trace-logging)
-
-(defprotocol ThreadActivity
-  (sleep [this id ms]))
-
-(defrecord ThreadActivityImpl []
-  ThreadActivity
-  (sleep [this id ms]
-    #?(:clj (do
-              (Thread/sleep (long ms))
-              id)
-       :cljs (p/then (p/delay ms id)
-                     (fn [_] id)))))
-
-(defn-workflow my-workflow [sleep-time]
-  (let [pr    (stub-protocol ThreadActivity {})
-        proms (->> (for [i (range 10)]
-                     (vthread
-                       (sleep pr i sleep-time)))
-                   (doall))]
-    #?(:clj (Thread/sleep (long sleep-time)))
-    (p/all proms)))
-
-(deftest workflow-with-vthread-test
-  (let [sleep-time (+ 1000 (rand-int 500))]
-    (testing "workflow"
-      (let [mstore   (store/make-store)
-            executor (w/start-poller! mstore {:protocols {`ThreadActivity (->ThreadActivityImpl)}
-                                              :polling-ms 10})
-
-            start    (store/now)]
-
-        ;; cljs runtimes return promises
-        ;; clj runtime will run synchronously
-        (with-result [v (w/with-env {:store mstore}
-                          (my-workflow sleep-time))]
-
-          (testing "result"
-            (is (= (range 10)
-                   v)))
-          (testing "ran every activity concurrently"
-            (let [elapsed (- (store/now) start)]
-              (is (>= elapsed sleep-time) "Should take at least `sleep-time` to run")
-              (is (< elapsed (* sleep-time 2)) "Should not take more than 2x sleep time to run")))
-
-          (testing "linear history"
-            (testing "stored events"
-              (let [evts  (->> (store/list-events mstore)
-                               (filterv #(= (:type %) :intemporal.protocol/invoke))
-                               (sort-by :id))
-                    aargs (map :args evts)]
-
-                (testing "sequential activity invocation args"
-                  ;; even though each activity runs in a thread, they are started in order
-                  ;; this ensures determinism
-
-                  (is (= [[0 sleep-time] [1 sleep-time] [2 sleep-time] [3 sleep-time] [4 sleep-time] [5 sleep-time] [6 sleep-time] [7 sleep-time] [8 sleep-time] [9 sleep-time]]
-                         aargs))))))
-
-          (w/shutdown executor 0)
-
-          ;; debugging
-          (let [tasks        (sort-by :order (store/list-tasks mstore))
-                events       (->> (store/list-events mstore)
-                                  (sort-by :id))
-                pprint-table (fn [table]
-                               (->> table
-                                    (map (fn [r]
-                                           (cond-> r
-                                                   (contains? r :fvar) (assoc :fvar "<fn...>"))))
-                                    (pprint/print-table)))]
-            (pprint-table tasks)
-            (pprint-table events)))))))
-
-#_:clj-kondo/ignore
-(comment
-  (cljs.test/run-tests *ns*))
diff --git a/test2/intemporal/workflow_test.cljc b/test2/intemporal/workflow_test.cljc
deleted file mode 100644
index 4d64941..0000000
--- a/test2/intemporal/workflow_test.cljc
+++ /dev/null
@@ -1,119 +0,0 @@
-(ns intemporal.workflow-test
-  #?(:cljs (:require [cljs.test :as t :refer-macros [deftest is testing]]
-                     [intemporal.store :as store]
-                     [intemporal.workflow :as w]
-                     [intemporal.test-utils :as tu]
-                     [matcher-combinators.test :refer [match?]]
-                     [promesa.core :as p])
-     :clj  (:require [clojure.test :as t :refer [deftest is testing]]
-                     [intemporal.store :as store]
-                     [intemporal.workflow :as w]
-                     [intemporal.test-utils :as tu]
-                     [matcher-combinators.test :refer [match?]]))
-  #?(:cljs (:require-macros [intemporal.macros :refer [env-let stub-function stub-protocol defn-workflow]]
-                            [intemporal.test-utils :refer [with-result]])
-     :clj  (:require [intemporal.macros :refer [stub-function stub-protocol defn-workflow]]
-                     [intemporal.test-utils :refer [with-result]])))
-
-(t/use-fixtures :once tu/with-trace-logging)
-
-(defn nested-fn [a]
-  [a :nested])
-
-(defn activity-fn [a]
-  #?(:clj
-     (let [f (stub-function nested-fn)]
-       (f :sub))
-
-     :cljs
-     (env-let [f (stub-function nested-fn)]
-              (f :sub))))
-
-(defprotocol MyActivities
-  (foo [this a]))
-
-(defrecord MyActivitiesImpl []
-  MyActivities
-  (foo [this a] [:proto a]))
-
-(defn-workflow my-workflow [atm]
-  (reset! atm (w/workflow-id))
-
-  (let [sf  (stub-function activity-fn)
-        pr  (stub-protocol MyActivities {})
-        sfr (sf 1)
-        prr (foo pr :pr)]
-
-    ;; chain values: ensure tests work under cljs too
-    #_:clj-kondo/ignore
-    (#?(:clj let :cljs p/let) [v1 sfr
-                               v2 prr]
-
-      [:root v1 v2])))
-
-;;;; test proper
-
-(deftest workflow-happy-path-test
-  (testing "workflow"
-    (let [mstore      (store/make-store)
-          ex          (w/start-poller! mstore {:protocols {`MyActivities (->MyActivitiesImpl)}})
-          uuid-store  (atom nil)
-          workflow-id (str (random-uuid))]
-
-      (with-result [v (w/with-env {:store mstore
-                                   :id    workflow-id}
-                                  (my-workflow uuid-store))]
-
-        (testing "workflow result"
-          (is (= [:root [:sub :nested] [:proto :pr]]
-                 v)))
-
-        (testing "stored events"
-          (let [evts (store/list-events mstore)
-                evts (sort-by :id evts)
-                ;; cljs is promise based, so stubs dont run in lexical order
-                ;; due to p/let
-                #?(:clj  [w1 a1 n1 n2 a2 p1 p2 w2]
-                   :cljs [w1 a1 p1 p2 n1 n2 a2 w2]) evts]
-
-            (tu/print-tables mstore)
-
-            (testing "workflow uuid"
-              (is (every? #(= @uuid-store %) (map :root evts))))
-
-            (testing "workflow events"
-              (is (match? {:type :intemporal.workflow/invoke :sym 'intemporal.workflow-test/my-workflow- #_#_:args [uuid-store]} w1))
-              (is (match? {:type :intemporal.workflow/success :sym 'intemporal.workflow-test/my-workflow-} w2)))
-
-            (testing "activity events"
-              (is (match? {:type :intemporal.activity/invoke :sym 'intemporal.workflow-test/activity-fn :args [1]} a1))
-              (is (match? {:type :intemporal.activity/success :sym 'intemporal.workflow-test/activity-fn} a2)))
-
-            (testing "nested activity events"
-              (is (match? {:type :intemporal.activity/invoke :sym 'intemporal.workflow-test/nested-fn :args '(:sub)} n1))
-              (is (match? {:type :intemporal.activity/success :sym 'intemporal.workflow-test/nested-fn} n2)))
-
-            (testing "protocol activity events"
-              (is (match? {:type :intemporal.protocol/invoke :sym 'intemporal.workflow-test/foo :args [:pr]} p1))
-              (is (match? {:type :intemporal.protocol/success :sym 'intemporal.workflow-test/foo} p2)))))
-
-        (testing "stored tasks"
-          (let [tasks (store/list-tasks mstore)
-                ;; due to promises,
-                ;; the order of execution is not exactly the same between clj/cljs
-                #?(:clj  [w1]
-                   :cljs [w1]) tasks]
-            (tu/print-tables mstore)
-
-            (testing "workflow task"
-              (is (match? {:type :workflow :sym 'intemporal.workflow-test/my-workflow- :state :success} w1)))
-
-            (testing "workflow uuid"
-              (is (some #(= @uuid-store %) (map :id tasks)))
-              (is (= @uuid-store workflow-id)))))
-
-        (w/shutdown ex 1000)))))
-
-#_:clj-kondo/ignore
-(comment
-  (cljs.test/run-tests *ns*))

From 7de63957f059abc80ce1409481975bce11b9eade Mon Sep 17 00:00:00 2001
From: Miguel Ping <miguel.ping@gmail.com>
Date: Wed, 3 Jun 2026 18:01:08 +0100
Subject: [PATCH 6/9] add saga

---
 README.md                                     |  26 +++
 src/intemporal/core.cljc                      |  24 +++
 src/intemporal/internal/context.cljc          |  16 +-
 src/intemporal/internal/execution.clj         | 131 ++++++++---
 src/intemporal/internal/execution.cljs        | 143 +++++++++---
 src/intemporal/observer.cljc                  |  21 +-
 src/intemporal/observer/otel.clj              |  20 +-
 src/intemporal/protocol.cljc                  |   5 +-
 .../crash/saga_compensation_crash_test.clj    |  88 ++++++++
 test/intemporal/tests/saga_test.clj           | 204 ++++++++++++++++++
 test/intemporal/tests/saga_test.cljs          | 108 ++++++++++
 11 files changed, 720 insertions(+), 66 deletions(-)
 create mode 100644 test/intemporal/tests/crash/saga_compensation_crash_test.clj
 create mode 100644 test/intemporal/tests/saga_test.clj
 create mode 100644 test/intemporal/tests/saga_test.cljs

diff --git a/README.md b/README.md
index ddb1580..ff4fdbb 100644
--- a/README.md
+++ b/README.md
@@ -55,6 +55,32 @@ Examples:
     (println result)))
 ```
 
+### Saga / compensations
+
+`with-failure` registers a compensation for a step. If the step succeeds but the
+workflow later fails, registered compensations run in reverse order (LIFO). A
+step that fails registers no compensation (nothing was created, so nothing to
+undo). Compensations should themselves be activity stubs so they are durable and
+replay-safe.
+
+```clojure
+(defn booking-saga [order]
+  (let [book-hotel    (intemporal/stub #'book-hotel)
+        book-flight   (intemporal/stub #'book-flight)
+        charge-card   (intemporal/stub #'charge-card)
+        cancel-hotel  (intemporal/stub #'cancel-hotel)
+        cancel-flight (intemporal/stub #'cancel-flight)]
+    (intemporal/with-failure [h (book-hotel order)]
+      (cancel-hotel h))
+    (intemporal/with-failure [f (book-flight order)]
+      (cancel-flight f))
+    ;; if charge-card throws, cancel-flight then cancel-hotel run automatically
+    (charge-card order)))
+```
+
+`intemporal/add-compensation` is the underlying function if you need to register
+a compensation thunk directly.
+
 # TODO
 
 - [X] Activites + Workflows
diff --git a/src/intemporal/core.cljc b/src/intemporal/core.cljc
index 38baec0..c2a7c78 100644
--- a/src/intemporal/core.cljc
+++ b/src/intemporal/core.cljc
@@ -565,6 +565,30 @@
   [proto & opts]
   `(im/stub-protocol ~proto ~@opts))
 
+;; ============================================================================
+;; Saga / Compensations
+;; ============================================================================
+
+(defn add-compensation
+  "Register a 0-arg compensation thunk. If the workflow later fails, registered
+   compensations run in reverse order (LIFO). Compensations should call activity
+   stubs so they are durable / replay-safe."
+  [f]
+  (ctx/add-compensation! f))
+
+(defmacro with-failure
+  "Run `body`. If `body` succeeds, register `comp-fn` (with `binding` bound to
+   body's result) to run if the workflow later fails. If `body` fails, no
+   compensation is registered - a step that never completed needs no undo.
+
+   Example:
+   (with-failure [v (book-hotel order)]
+     (cancel-hotel v))"
+  [[binding body] comp-fn]
+  `(let [~binding ~body]
+     (ctx/add-compensation! (fn [] ~comp-fn))
+     ~binding))
+
 ;; ============================================================================
 ;; Convenience Functions
 ;; ============================================================================
diff --git a/src/intemporal/internal/context.cljc b/src/intemporal/internal/context.cljc
index 548a790..e7d8dac 100644
--- a/src/intemporal/internal/context.cljc
+++ b/src/intemporal/internal/context.cljc
@@ -20,6 +20,7 @@
     :seq-counter (atom 0)
     :pending-events pending-events
     :pending-asyncs pending-asyncs
+    :compensations (atom [])
     :store store
     :registry registry
     :observer observer
@@ -36,7 +37,13 @@
 
 (defn check-cancelled! []
   (let [ctx (current-context)]
-    (when (p/is-cancelled? (:store ctx) (:workflow-id ctx))
+    ;; :compensating-cancel? is set on the context during the cancellation
+    ;; compensation pass so the body can replay (rebuilding the compensation
+    ;; stack) and compensating activities can schedule despite the cancel flag.
+    ;; It lives in the context map (not a dynamic var) so it propagates across
+    ;; cljs async boundaries via blet/bthen. See cancellation-compensation-pass.
+    (when (and (not (:compensating-cancel? ctx))
+               (p/is-cancelled? (:store ctx) (:workflow-id ctx)))
       (throw (error/workflow-cancelled-exception)))))
 
 (defn next-seq! []
@@ -67,6 +74,13 @@
   (let [ctx (current-context)]
     (swap! (:pending-asyncs ctx) conj async-info)))
 
+(defn add-compensation!
+  "Register a 0-arg compensation thunk onto the current workflow's compensation
+   stack. Compensations run in reverse (LIFO) when the workflow fails."
+  [f]
+  (let [ctx (current-context)]
+    (swap! (:compensations ctx) conj f)))
+
 (defn notify-observer [event-fn & args]
   (when-let [observer (:observer (current-context))]
     (try
diff --git a/src/intemporal/internal/execution.clj b/src/intemporal/internal/execution.clj
index ab189fe..b973384 100644
--- a/src/intemporal/internal/execution.clj
+++ b/src/intemporal/internal/execution.clj
@@ -17,6 +17,29 @@
   `(when ~observer
      (~proto-fn ~observer ~@args)))
 
+(defn- run-compensations!
+  "Run registered compensations in reverse (LIFO). A compensating activity
+   suspends on first execution -> rethrow the suspension so the loop schedules
+   it. Real errors from a compensation are logged and skipped (best-effort).
+
+   Observer notes: -started fires once per replay pass that has compensations,
+   -completed only on the pass where the stack drains without suspending. Like
+   on-workflow-suspended/-resumed, these may fire across multiple passes; dedup
+   by workflow-id if exactly-once is needed."
+  [comps]
+  (when (seq comps)
+    (ctx/notify-observer p/on-compensation-started (ctx/current-workflow-id)))
+  (doseq [c (reverse comps)]
+    (try
+      (c)
+      (catch Throwable t
+        (when (error/suspension? t) (throw t))
+        (ctx/notify-observer p/on-compensation-failed
+                             (ctx/current-workflow-id) (error/throwable->map t))
+        (log/warnf "Compensation failed, continuing: %s" (ex-message t)))))
+  (when (seq comps)
+    (ctx/notify-observer p/on-compensation-completed (ctx/current-workflow-id))))
+
 (defn execute-workflow-fn [workflow-fn args]
   (try
     {:status :completed
@@ -37,9 +60,60 @@
          :pending-events @(:pending-events (ctx/current-context))}
 
         :else
-        {:status :failed
-         :error e
-         :pending-events @(:pending-events (ctx/current-context))}))))
+        ;; Real failure: run any registered compensations (saga rollback) before
+        ;; finalizing. A compensating activity suspends on first execution, which
+        ;; we surface as :suspended so the loop schedules + resumes it; on replay
+        ;; the body re-throws and already-run compensations return cached results.
+        (let [ctx (ctx/current-context)]
+          (try
+            (run-compensations! @(:compensations ctx))
+            {:status :failed
+             :error e
+             :pending-events @(:pending-events ctx)}
+            (catch Throwable s
+              (if (error/suspension? s)
+                {:status :suspended
+                 :suspension-type (error/suspension-type s)
+                 :suspension-data (error/suspension-data s)
+                 :pending-asyncs @(:pending-asyncs ctx)
+                 :pending-events @(:pending-events ctx)}
+                (throw s)))))))))
+
+(defn cancellation-compensation-pass
+  "Run when a workflow is cancelled. Replays the body with cancellation
+   suppressed (ctx/*compensating-cancel?* must be bound true by the caller) so
+   the compensation stack is rebuilt, then runs the compensations.
+
+   Returns the same result shape as execute-workflow-fn:
+   - :suspended  -> a compensating activity is pending; the loop schedules and
+                    resumes it, then (still cancelled) re-enters this pass.
+   - :cancelled  -> the stack drained; the caller finalizes."
+  [workflow-fn args]
+  (let [ctx (ctx/current-context)]
+    ;; 1. Rebuild compensations by replaying. Completed steps return cached and
+    ;;    re-register their compensations; the frontier (first un-cached op)
+    ;;    throws a forward suspension we discard - we are NOT doing forward work.
+    (try (apply workflow-fn args) (catch Throwable _ nil))
+    ;; 2. Drop any forward pending events the frontier accumulated; keep the
+    ;;    seq-counter so compensating activities get stable, continuing seq nums.
+    (reset! (:pending-events ctx) [])
+    (reset! (:pending-asyncs ctx) [])
+    ;; 3. Run compensations (suppression still on, so comp activities can schedule).
+    (try
+      (run-compensations! @(:compensations ctx))
+      ;; :compensated? distinguishes the drained pass (-> finalize) from
+      ;; execute-workflow-fn's mid-run :cancelled (-> recur into this pass).
+      {:status :cancelled
+       :compensated? true
+       :pending-events @(:pending-events ctx)}
+      (catch Throwable s
+        (if (error/suspension? s)
+          {:status :suspended
+           :suspension-type (error/suspension-type s)
+           :suspension-data (error/suspension-data s)
+           :pending-asyncs @(:pending-asyncs ctx)
+           :pending-events @(:pending-events ctx)}
+          (throw s))))))
 
 (defn execute-with-retry
   "Execute an activity with retry policy"
@@ -329,6 +403,7 @@
    :seq-counter (atom 0)
    :pending-events (atom [])
    :pending-asyncs (atom [])
+   :compensations (atom [])
    :store store
    :registry registry
    :observer observer})
@@ -518,28 +593,21 @@
         {:status :suspended
          :workflow-id workflow-id})
 
-      ;; Check cancellation at start of each iteration
-      (if (p/is-cancelled? store workflow-id)
-        (let [error-map {:type "clojure.lang.ExceptionInfo"
-                         :message "Workflow cancelled"
-                         :data {:workflow-id workflow-id}}]
-
-          (-notify p/on-workflow-cancelled observer workflow-id)
-          (p/save-event store workflow-id {:event-type :workflow-failed
-                                           :error      error-map
-                                           :timestamp  (utils/current-time-ms)})
-
-          (log/info "Workflow cancelled, failing")
-          (-notify p/on-workflow-failed observer workflow-id error-map)
-          {:status :failed
-           :workflow-id workflow-id
-           :error error-map})
-        ;; else
-        (let [history (p/load-history store workflow-id)
-              ctx (make-workflow-context workflow-id history store registry observer)
-              exec-result (binding [ctx/*workflow-context* ctx]
-                            (log/debugf "Executing workflow function %s..." workflow-fn)
-                            (execute-workflow-fn workflow-fn args))]
+      ;; Check cancellation at start of each iteration. When cancelled we still
+      ;; run the body - via cancellation-compensation-pass under suppression - so
+      ;; the compensation stack is rebuilt and compensations roll back completed
+      ;; steps. The result feeds the same dispatch below (:suspended schedules a
+      ;; compensating activity and recurs; :cancelled finalizes).
+      (let [cancelled?  (p/is-cancelled? store workflow-id)
+            history     (p/load-history store workflow-id)
+            ctx         (cond-> (make-workflow-context workflow-id history store registry observer)
+                          cancelled? (assoc :compensating-cancel? true))
+            exec-result (binding [ctx/*workflow-context* ctx]
+                          (log/debugf "Executing workflow function %s (cancelled? %s)..."
+                                      workflow-fn cancelled?)
+                          (if cancelled?
+                            (cancellation-compensation-pass workflow-fn args)
+                            (execute-workflow-fn workflow-fn args)))]
 
           (log/debugf "Workflow function executed, got: %s" (:status exec-result))
           (case (:status exec-result)
@@ -551,9 +619,14 @@
                                 observer)
 
             :cancelled
-            (finalize-cancelled store workflow-id
-                                (:pending-events exec-result)
-                                observer)
+            ;; From the compensation pass (drained) -> finalize. From
+            ;; execute-workflow-fn observing the flag mid-run -> recur so the
+            ;; top-of-loop runs the compensation pass.
+            (if (:compensated? exec-result)
+              (finalize-cancelled store workflow-id
+                                  (:pending-events exec-result)
+                                  observer)
+              (recur (inc iteration)))
 
             :suspended
             (let [action (handle-suspension engine
@@ -590,7 +663,7 @@
             (finalize-failed store workflow-id
                              (:pending-events exec-result)
                              (:error exec-result)
-                             observer)))))))
+                             observer))))))
 
 (defn process-child-workflow [{:keys [store executor scheduler registry] :as engine} workflow-id
                                suspension-data pending-events observer]
diff --git a/src/intemporal/internal/execution.cljs b/src/intemporal/internal/execution.cljs
index 89e887d..39a9915 100644
--- a/src/intemporal/internal/execution.cljs
+++ b/src/intemporal/internal/execution.cljs
@@ -14,6 +14,48 @@
 ;; Workflow Execution Engine
 ;; ============================================================================
 
+(defn- run-compensations!
+  "Run registered compensations in reverse (LIFO). A compensating activity
+   suspends on first execution -> rethrow the suspension so the loop schedules
+   it. Real errors from a compensation are logged and skipped (best-effort).
+
+   Observer notes: -started fires once per replay pass that has compensations,
+   -completed only on the pass where the stack drains without suspending. Like
+   on-workflow-suspended/-resumed, these may fire across multiple passes; dedup
+   by workflow-id if exactly-once is needed."
+  [comps]
+  (when (seq comps)
+    (ctx/notify-observer p/on-compensation-started (ctx/current-workflow-id)))
+  (doseq [c (reverse comps)]
+    (try
+      (c)
+      (catch js/Error t
+        (when (error/suspension? t) (throw t))
+        (ctx/notify-observer p/on-compensation-failed
+                             (ctx/current-workflow-id) (error/throwable->map t))
+        (log/warnf "Compensation failed, continuing: %s" (ex-message t)))))
+  (when (seq comps)
+    (ctx/notify-observer p/on-compensation-completed (ctx/current-workflow-id))))
+
+(defn- compensate-result
+  "Run compensations then build the terminal result for a real failure `e`.
+   If a compensating activity suspends, surfaces :suspended so the loop schedules
+   it; on replay the body re-throws and already-run compensations are cached."
+  [ctx e pending-asyncs pending-events]
+  (try
+    (run-compensations! @(:compensations ctx))
+    {:status :failed
+     :error e
+     :pending-events @pending-events}
+    (catch js/Error s
+      (if (error/suspension? s)
+        {:status :suspended
+         :suspension-type (error/suspension-type s)
+         :suspension-data (error/suspension-data s)
+         :pending-asyncs @pending-asyncs
+         :pending-events @pending-events}
+        (throw s)))))
+
 (defn execute-workflow-fn [workflow-fn args]
   ;; Capture context so async callbacks (from p/let, etc.) can access it
   ;; after the dynamic binding scope has exited
@@ -51,9 +93,7 @@
                                  :pending-events @pending-events}
 
                                 :else
-                                {:status :failed
-                                 :error e
-                                 :pending-events @pending-events})))))
+                                (compensate-result ctx e pending-asyncs pending-events))))))
           ;; Synchronous result
           {:status :completed
            :result result
@@ -73,9 +113,7 @@
            :pending-events @pending-events}
 
           :else
-          {:status :failed
-           :error e
-           :pending-events @pending-events})))))
+          (compensate-result ctx e pending-asyncs pending-events))))))
 
 (defn- execute-once
   "Execute activity once, returns a promise of result map."
@@ -354,6 +392,7 @@
            :seq-counter (atom 0)
            :pending-events (atom [])
            :pending-asyncs (atom [])
+           :compensations (atom [])
            :store store
            :registry registry
            :observer observer}
@@ -517,6 +556,46 @@
                                  :timestamp (utils/current-time-ms)})
         result))))
 
+(defn cancellation-compensation-pass
+  "Run when a workflow is cancelled. Replays the body with cancellation
+   suppressed (the context carries :compensating-cancel?) so the compensation
+   stack is rebuilt, then runs the compensations. cljs mirror of the clj pass.
+
+   The body may return a promise (p/let), so step 1 may settle asynchronously;
+   we swallow its result/rejection (forward work is discarded) then run
+   compensations. Returns the execute-workflow-fn result shape (a map, or a
+   promise of one): :suspended (a compensating activity is pending) or
+   :cancelled (the stack drained)."
+  [workflow-fn args]
+  (let [ctx       (ctx/current-context)
+        run-comps (fn []
+                    ;; Drop forward pending events from the frontier; keep the
+                    ;; seq-counter so comp activities get stable seq nums.
+                    (reset! (:pending-events ctx) [])
+                    (reset! (:pending-asyncs ctx) [])
+                    (try
+                      (run-compensations! @(:compensations ctx))
+                      ;; :compensated? distinguishes the drained pass (-> finalize)
+                      ;; from execute-workflow-fn's mid-run :cancelled (-> recur).
+                      {:status :cancelled
+                       :compensated? true
+                       :pending-events @(:pending-events ctx)}
+                      (catch js/Error s
+                        (if (error/suspension? s)
+                          {:status :suspended
+                           :suspension-type (error/suspension-type s)
+                           :suspension-data (error/suspension-data s)
+                           :pending-asyncs @(:pending-asyncs ctx)
+                           :pending-events @(:pending-events ctx)}
+                          (throw s)))))
+        ;; Step 1: replay body to rebuild comps; swallow sync throw or rejection.
+        replay    (try (apply workflow-fn args) (catch js/Error _ nil))]
+    (if (prom/promise? replay)
+      (-> replay
+          (prom/catch (fn [_] nil))    ;; discard forward suspension/rejection
+          (bthen (fn [_] (run-comps)))) ;; bthen rebinds ctx (carries the flag)
+      (run-comps))))
+
 (defn run-workflow-internal
   "Main workflow execution loop - orchestrates replay and execution.
 
@@ -544,29 +623,22 @@
         {:status :suspended
          :workflow-id workflow-id})
 
-      ;; Check cancellation at start of each iteration
-      (if (p/is-cancelled? store workflow-id)
-        (let [error-map {:type "clojure.lang.ExceptionInfo"
-                         :message "Workflow cancelled"
-                         :data {:workflow-id workflow-id}}]
-
-          (-notify p/on-workflow-cancelled observer workflow-id)
-          (p/save-event store workflow-id {:event-type :workflow-failed
-                                           :error      error-map
-                                           :timestamp  (utils/current-time-ms)})
-
-          (log/info "Workflow cancelled, failing")
-          (-notify p/on-workflow-failed observer workflow-id error-map)
-          {:status :failed
-           :workflow-id workflow-id
-           :error error-map})
-        ;; else
-        (let [history (p/load-history store workflow-id)
-              ctx (make-workflow-context workflow-id history store registry observer
-                                       :protocols (:protocols engine))
-              exec-result (binding [ctx/*workflow-context* ctx]
-                            (log/debugf "Executing workflow function %s..." workflow-fn)
-                            (execute-workflow-fn workflow-fn args))
+      ;; Check cancellation at start of each iteration. When cancelled we run the
+      ;; body via cancellation-compensation-pass (context carries
+      ;; :compensating-cancel?) so the compensation stack is rebuilt and rolled
+      ;; back; the result feeds the same dispatch (:suspended schedules a comp
+      ;; activity and recurs; :cancelled finalizes).
+      (let [cancelled?  (p/is-cancelled? store workflow-id)
+            history     (p/load-history store workflow-id)
+            ctx         (cond-> (make-workflow-context workflow-id history store registry observer
+                                                       :protocols (:protocols engine))
+                          cancelled? (assoc :compensating-cancel? true))
+            exec-result (binding [ctx/*workflow-context* ctx]
+                          (log/debugf "Executing workflow function %s (cancelled? %s)..."
+                                      workflow-fn cancelled?)
+                          (if cancelled?
+                            (cancellation-compensation-pass workflow-fn args)
+                            (execute-workflow-fn workflow-fn args)))
               dispatch (fn [exec-result]
                          (log/debugf "Workflow function executed, got: %s" (:status exec-result))
                          (case (:status exec-result)
@@ -578,9 +650,14 @@
                                                observer)
 
                            :cancelled
-                           (finalize-cancelled store workflow-id
-                                               (:pending-events exec-result)
-                                               observer)
+                           ;; From the compensation pass (drained) -> finalize.
+                           ;; From execute-workflow-fn observing the flag mid-run
+                           ;; -> recur so the top-of-loop runs the pass.
+                           (if (:compensated? exec-result)
+                             (finalize-cancelled store workflow-id
+                                                 (:pending-events exec-result)
+                                                 observer)
+                             (prom/recur (inc iteration)))
 
                            :suspended
                            (blet [action (handle-suspension engine
@@ -619,7 +696,7 @@
           ;; exec-result may be a Promise if workflow-fn returned a Promise (e.g. from p/let)
           (if (prom/promise? exec-result)
             (bthen exec-result dispatch)
-            (dispatch exec-result)))))))
+            (dispatch exec-result))))))
 
 (defn process-child-workflow [{:keys [store executor scheduler registry] :as engine} workflow-id
                                suspension-data pending-events observer]
diff --git a/src/intemporal/observer.cljc b/src/intemporal/observer.cljc
index 6b04252..09b19b0 100644
--- a/src/intemporal/observer.cljc
+++ b/src/intemporal/observer.cljc
@@ -112,6 +112,22 @@
 
   (on-workflow-cancelled [_ workflow-id]
     (swap! log-atom conj {:event :workflow-cancelled
+                          :workflow-id workflow-id
+                          :timestamp (utils/current-time-ms)}))
+
+  (on-compensation-started [_ workflow-id]
+    (swap! log-atom conj {:event :compensation-started
+                          :workflow-id workflow-id
+                          :timestamp (utils/current-time-ms)}))
+
+  (on-compensation-failed [_ workflow-id error]
+    (swap! log-atom conj {:event :compensation-failed
+                          :workflow-id workflow-id
+                          :error error
+                          :timestamp (utils/current-time-ms)}))
+
+  (on-compensation-completed [_ workflow-id]
+    (swap! log-atom conj {:event :compensation-completed
                           :workflow-id workflow-id
                           :timestamp (utils/current-time-ms)})))
 
@@ -139,4 +155,7 @@
     (on-signal-received [_ _ _ _])
     (on-workflow-completed [_ _ _])
     (on-workflow-failed [_ _ _])
-    (on-workflow-cancelled [_ _])))
+    (on-workflow-cancelled [_ _])
+    (on-compensation-started [_ _])
+    (on-compensation-failed [_ _ _])
+    (on-compensation-completed [_ _])))
diff --git a/src/intemporal/observer/otel.clj b/src/intemporal/observer/otel.clj
index 7509a42..fb3e2f1 100644
--- a/src/intemporal/observer/otel.clj
+++ b/src/intemporal/observer/otel.clj
@@ -126,7 +126,25 @@
                               :event {:name "workflow.cancelled"
                                       :attributes {:intemporal/cancelled true}}})
       (otspan/end-span! {:context span-ctx})
-      (swap! spans-atom update :workflows dissoc workflow-id))))
+      (swap! spans-atom update :workflows dissoc workflow-id)))
+
+  ;; Compensations run before the workflow span is ended by on-workflow-failed/
+  ;; -cancelled, so we add events to the still-open workflow span.
+  (on-compensation-started [_ workflow-id]
+    (when-let [span-ctx (get-in @spans-atom [:workflows workflow-id])]
+      (otspan/add-span-data! {:context span-ctx
+                              :event {:name "compensation.started"}})))
+
+  (on-compensation-failed [_ workflow-id error]
+    (when-let [span-ctx (get-in @spans-atom [:workflows workflow-id])]
+      (otspan/add-span-data! {:context span-ctx
+                              :event {:name "compensation.failed"
+                                      :attributes {:intemporal/error (pr-str error)}}})))
+
+  (on-compensation-completed [_ workflow-id]
+    (when-let [span-ctx (get-in @spans-atom [:workflows workflow-id])]
+      (otspan/add-span-data! {:context span-ctx
+                              :event {:name "compensation.completed"}}))))
 
 (defn make-otel-observer
   "Create an OpenTelemetry observer that emits traces for workflows and activities"
diff --git a/src/intemporal/protocol.cljc b/src/intemporal/protocol.cljc
index 470a91a..0bfa389 100644
--- a/src/intemporal/protocol.cljc
+++ b/src/intemporal/protocol.cljc
@@ -78,4 +78,7 @@
   (on-signal-received [observer workflow-id signal-name payload])
   (on-workflow-completed [observer workflow-id result])
   (on-workflow-failed [observer workflow-id error])
-  (on-workflow-cancelled [observer workflow-id]))
+  (on-workflow-cancelled [observer workflow-id])
+  (on-compensation-started [observer workflow-id])
+  (on-compensation-failed [observer workflow-id error])
+  (on-compensation-completed [observer workflow-id]))
diff --git a/test/intemporal/tests/crash/saga_compensation_crash_test.clj b/test/intemporal/tests/crash/saga_compensation_crash_test.clj
new file mode 100644
index 0000000..87a4b97
--- /dev/null
+++ b/test/intemporal/tests/crash/saga_compensation_crash_test.clj
@@ -0,0 +1,88 @@
+(ns ^:crash intemporal.tests.crash.saga-compensation-crash-test
+  "Crash recovery test for saga compensations.
+   A compensation suspends mid-way (waiting on a signal) to simulate a crash
+   between compensating activities. After resume, each compensating activity
+   must run exactly once and the workflow finalizes :failed."
+  (:require [intemporal.core :as intemporal]
+            [intemporal.store :as store]
+            [intemporal.protocol :as p]
+            [clojure.test :refer [deftest is testing]]))
+
+;; ============================================================================
+;; Activities - count actual executions (replays don't re-run the fn)
+;; ============================================================================
+
+(def exec-counts (atom {}))
+(defn- bump! [k] (swap! exec-counts update k (fnil inc 0)))
+
+(defn book-hotel  [order] (bump! :book-hotel)  {:hotel order})
+(defn book-flight [order] (bump! :book-flight) {:flight order})
+(defn charge-card-fails [order]
+  (bump! :charge-card)
+  (throw (ex-info "card declined" {:order order})))
+
+(defn cancel-hotel  [_] (bump! :cancel-hotel)  :hotel-cancelled)
+(defn cancel-flight [_] (bump! :cancel-flight) :flight-cancelled)
+
+;; The flight compensation cancels the flight, then waits for a signal. The
+;; missing signal is our deterministic "crash" point: the workflow suspends
+;; mid-compensation and is resumed by a fresh engine in phase 2.
+(defn crash-saga [order]
+  (let [hotel   (intemporal/stub #'book-hotel)
+        flight  (intemporal/stub #'book-flight)
+        charge  (intemporal/stub #'charge-card-fails)
+        chotel  (intemporal/stub #'cancel-hotel)
+        cflight (intemporal/stub #'cancel-flight)]
+    (intemporal/with-failure [h (hotel order)]
+      (chotel h))
+    (intemporal/with-failure [f (flight order)]
+      (do (cflight f)
+          (intemporal/wait-for-signal "continue-compensation")))
+    (charge order)
+    :booked))
+
+(defn- count-events [store workflow-id event-type]
+  (->> (p/load-history store workflow-id)
+       (filter #(= event-type (:event-type %)))
+       count))
+
+;; ============================================================================
+;; Test
+;; ============================================================================
+
+(deftest test-compensation-survives-crash
+  (testing "Compensation suspended mid-way resumes and runs each step exactly once"
+    (reset! exec-counts {})
+    (let [workflow-id "saga-crash-1"
+          persistent-store (store/->InMemoryStore (atom {}))]
+
+      ;; Phase 1: run until the flight compensation suspends waiting for a signal
+      (testing "Phase 1: fails, begins compensation, suspends mid-compensation"
+        (let [engine-1 (intemporal/make-workflow-engine :store persistent-store :threads 2)
+              fut (future
+                    (intemporal/start-workflow engine-1 crash-saga ["o1"]
+                                               :workflow-id workflow-id))]
+          ;; Give it time to: book hotel+flight, fail charge, cancel-flight,
+          ;; then suspend at wait-for-signal.
+          (Thread/sleep 300)
+          (future-cancel fut)
+          (intemporal/shutdown-engine engine-1)
+
+          ;; flight was cancelled, hotel not yet (we suspend before its comp)
+          (is (= 1 (get @exec-counts :cancel-flight)))
+          (is (nil? (get @exec-counts :cancel-hotel)))
+          ;; not yet finalized
+          (is (zero? (count-events persistent-store workflow-id :workflow-failed)))))
+
+      ;; Phase 2: fresh engine, signal + resume -> finishes compensating, fails
+      (testing "Phase 2: resume completes compensation and finalizes :failed"
+        (let [engine-2 (intemporal/make-workflow-engine :store persistent-store :threads 2)]
+          (intemporal/send-signal persistent-store workflow-id "continue-compensation" {})
+          (let [result (intemporal/resume-workflow engine-2 workflow-id crash-saga ["o1"])]
+            (is (= :failed (:status result)))
+            ;; each compensating activity ran exactly once across the crash
+            (is (= 1 (get @exec-counts :cancel-flight)))
+            (is (= 1 (get @exec-counts :cancel-hotel)))
+            ;; exactly one terminal failure event
+            (is (= 1 (count-events persistent-store workflow-id :workflow-failed)))
+            (intemporal/shutdown-engine engine-2)))))))
diff --git a/test/intemporal/tests/saga_test.clj b/test/intemporal/tests/saga_test.clj
new file mode 100644
index 0000000..f6d758a
--- /dev/null
+++ b/test/intemporal/tests/saga_test.clj
@@ -0,0 +1,204 @@
+(ns intemporal.tests.saga-test
+  "Tests for saga / compensation support (with-failure + add-compensation).
+   A compensation registered for a successful step runs (in reverse order)
+   when the workflow later fails."
+  (:require [intemporal.core :as intemporal]
+            [intemporal.tests.utils :refer [with-result]]
+            [clojure.test :refer [deftest is testing]]
+            [matcher-combinators.test :refer [match?]]))
+
+;; ============================================================================
+;; Activities - record execution order + args into a shared atom
+;; ============================================================================
+
+(def events (atom []))
+(defn- record! [e] (swap! events conj e))
+
+(defn book-hotel  [order] (record! [:book-hotel order])  {:hotel order})
+(defn book-flight [order] (record! [:book-flight order]) {:flight order})
+(defn charge-card [order] (record! [:charge-card order]) {:charge order})
+
+(defn charge-card-fails [order]
+  (record! [:charge-card order])
+  (throw (ex-info "card declined" {:order order})))
+
+(defn book-flight-fails [order]
+  (record! [:book-flight order])
+  (throw (ex-info "no seats" {:order order})))
+
+(defn cancel-hotel  [v] (record! [:cancel-hotel v])  :hotel-cancelled)
+(defn cancel-flight [v] (record! [:cancel-flight v]) :flight-cancelled)
+
+(defn failing-cancel-flight [v]
+  (record! [:cancel-flight v])
+  (throw (ex-info "refund provider down" {:v v})))
+
+(defn slow-step [x] (record! [:slow x]) (Thread/sleep 50) x)
+
+;; ============================================================================
+;; Workflows
+;; ============================================================================
+
+(defn happy-saga [order]
+  (let [hotel  (intemporal/stub #'book-hotel)
+        flight (intemporal/stub #'book-flight)
+        charge (intemporal/stub #'charge-card)
+        chotel  (intemporal/stub #'cancel-hotel)
+        cflight (intemporal/stub #'cancel-flight)]
+    (intemporal/with-failure [h (hotel order)]
+      (chotel h))
+    (intemporal/with-failure [f (flight order)]
+      (cflight f))
+    (charge order)
+    :booked))
+
+(defn failing-saga [order]
+  (let [hotel  (intemporal/stub #'book-hotel)
+        flight (intemporal/stub #'book-flight)
+        charge (intemporal/stub #'charge-card-fails)
+        chotel  (intemporal/stub #'cancel-hotel)
+        cflight (intemporal/stub #'cancel-flight)]
+    (intemporal/with-failure [h (hotel order)]
+      (chotel h))
+    (intemporal/with-failure [f (flight order)]
+      (cflight f))
+    (charge order)
+    :booked))
+
+(defn fail-on-flight-saga [order]
+  (let [hotel  (intemporal/stub #'book-hotel)
+        flight (intemporal/stub #'book-flight-fails)
+        chotel  (intemporal/stub #'cancel-hotel)
+        cflight (intemporal/stub #'cancel-flight)]
+    (intemporal/with-failure [h (hotel order)]
+      (chotel h))
+    (intemporal/with-failure [f (flight order)]
+      (cflight f))
+    :booked))
+
+;; Books hotel + flight, then stays busy in a loop so a cancel arrives after the
+;; bookings have completed (mirrors cancellation-test/long-flow).
+(defn cancel-rollback-saga [order]
+  (let [hotel   (intemporal/stub #'book-hotel)
+        flight  (intemporal/stub #'book-flight)
+        chotel  (intemporal/stub #'cancel-hotel)
+        cflight (intemporal/stub #'cancel-flight)
+        slow    (intemporal/stub #'slow-step)]
+    (intemporal/with-failure [h (hotel order)]
+      (chotel h))
+    (intemporal/with-failure [f (flight order)]
+      (cflight f))
+    (loop [i 0]
+      (if (< i 40) (do (slow i) (recur (inc i))) :booked))))
+
+;; Cancel lands before any with-failure step completes (busy first).
+(defn cancel-early-saga [order]
+  (let [hotel (intemporal/stub #'book-hotel)
+        chotel (intemporal/stub #'cancel-hotel)
+        slow  (intemporal/stub #'slow-step)]
+    (loop [i 0]
+      (when (< i 3) (slow i) (recur (inc i))))
+    (intemporal/with-failure [h (hotel order)]
+      (chotel h))
+    :booked))
+
+;; Compensation activity itself fails -> swallowed, others still run.
+(defn failing-comp-saga [order]
+  (let [hotel  (intemporal/stub #'book-hotel)
+        flight (intemporal/stub #'book-flight)
+        charge (intemporal/stub #'charge-card-fails)
+        chotel  (intemporal/stub #'cancel-hotel)
+        cflight (intemporal/stub #'failing-cancel-flight)]
+    (intemporal/with-failure [h (hotel order)]
+      (chotel h))
+    (intemporal/with-failure [f (flight order)]
+      (cflight f))
+    (charge order)
+    :booked))
+
+;; ============================================================================
+;; Tests
+;; ============================================================================
+
+(deftest test-happy-path-no-compensation
+  (testing "When the workflow succeeds, no compensation runs"
+    (reset! events [])
+    (intemporal/with-workflow-engine [engine {:threads 2}]
+      (with-result [result (intemporal/start-workflow engine happy-saga ["o1"])]
+        (is (match? {:status :completed :result :booked} result))
+        (is (= [[:book-hotel "o1"] [:book-flight "o1"] [:charge-card "o1"]]
+               @events))))))
+
+(deftest test-compensation-runs-lifo-on-failure
+  (testing "On a later failure, compensations run in reverse order with the forward result"
+    (reset! events [])
+    (intemporal/with-workflow-engine [engine {:threads 2}]
+      (with-result [result (intemporal/start-workflow engine failing-saga ["o2"])]
+        (is (match? {:status :failed} result))
+        ;; forward steps, the failing charge, then compensations in reverse (LIFO)
+        (is (= [[:book-hotel "o2"]
+                [:book-flight "o2"]
+                [:charge-card "o2"]
+                [:cancel-flight {:flight "o2"}]
+                [:cancel-hotel {:hotel "o2"}]]
+               @events))))))
+
+(deftest test-failed-step-registers-no-compensation
+  (testing "A step whose own body fails registers no compensation; earlier steps still compensate"
+    (reset! events [])
+    (intemporal/with-workflow-engine [engine {:threads 2}]
+      (with-result [result (intemporal/start-workflow engine fail-on-flight-saga ["o3"])]
+        (is (match? {:status :failed} result))
+        ;; flight failed -> no :cancel-flight; only hotel compensates
+        (is (= [[:book-hotel "o3"]
+                [:book-flight "o3"]
+                [:cancel-hotel {:hotel "o3"}]]
+               @events))
+        (is (not (some #(= :cancel-flight (first %)) @events)))))))
+
+(defn- compensations [events]
+  (filterv #(#{:cancel-flight :cancel-hotel} (first %)) events))
+
+(deftest test-cancellation-rolls-back-completed-steps
+  (testing "Cancelling a running saga runs compensations (LIFO) for completed steps"
+    (reset! events [])
+    (intemporal/with-workflow-engine [engine {:threads 2}]
+      (let [wf-id "saga-cancel-1"
+            fut (future (intemporal/start-workflow engine cancel-rollback-saga ["c1"]
+                                                   :workflow-id wf-id))]
+        ;; let hotel + flight + a few slow steps run, then cancel
+        (Thread/sleep 250)
+        (intemporal/cancel-workflow (:store engine) wf-id)
+        (let [result @fut]
+          (is (match? {:status :failed
+                       :workflow-id wf-id
+                       :error (m/embeds {:message #"cancelled"})}
+                      result))
+          ;; both completed steps rolled back, in reverse order, with their values
+          (is (= [[:cancel-flight {:flight "c1"}]
+                  [:cancel-hotel {:hotel "c1"}]]
+                 (compensations @events))))))))
+
+(deftest test-cancellation-with-no-completed-steps
+  (testing "Cancelling before any with-failure step completes runs no compensations"
+    (reset! events [])
+    (intemporal/with-workflow-engine [engine {:threads 2}]
+      (let [wf-id "saga-cancel-2"
+            fut (future (intemporal/start-workflow engine cancel-early-saga ["c2"]
+                                                   :workflow-id wf-id))]
+        (Thread/sleep 60)            ;; mid first slow step, before the with-failure
+        (intemporal/cancel-workflow (:store engine) wf-id)
+        (let [result @fut]
+          (is (match? {:status :failed :workflow-id wf-id} result))
+          (is (empty? (compensations @events))))))))
+
+(deftest test-observer-compensation-lifecycle
+  (testing "Observer sees compensation-started/-completed, and -failed for a failing compensation"
+    (reset! events [])
+    (intemporal/with-workflow-engine [engine {:threads 2 :enable-logging true}]
+      (with-result [_ (intemporal/start-workflow engine failing-comp-saga ["c3"])]
+        (let [evs (set (map :event @(:log engine)))]
+          (is (contains? evs :compensation-started))
+          (is (contains? evs :compensation-completed))
+          ;; failing-cancel-flight throws -> swallowed + surfaced to the observer
+          (is (contains? evs :compensation-failed)))))))
diff --git a/test/intemporal/tests/saga_test.cljs b/test/intemporal/tests/saga_test.cljs
new file mode 100644
index 0000000..c55e3b5
--- /dev/null
+++ b/test/intemporal/tests/saga_test.cljs
@@ -0,0 +1,108 @@
+(ns intemporal.tests.saga-test
+  "Tests for saga / compensation support (with-failure + add-compensation)."
+  (:require [intemporal.core :as intemporal]
+            [intemporal.tests.utils :refer [with-result]]
+            [cljs.test :as t :refer [deftest is testing]]
+            [matcher-combinators.test :refer [match?]])
+  (:require-macros [intemporal.tests.utils :refer [with-result]]))
+
+;; ============================================================================
+;; Activities - record execution order + args into a shared atom
+;; ============================================================================
+
+(def events (atom []))
+(defn- record! [e] (swap! events conj e))
+
+(defn book-hotel  [order] (record! [:book-hotel order])  {:hotel order})
+(defn book-flight [order] (record! [:book-flight order]) {:flight order})
+(defn charge-card [order] (record! [:charge-card order]) {:charge order})
+
+(defn charge-card-fails [order]
+  (record! [:charge-card order])
+  (throw (ex-info "card declined" {:order order})))
+
+(defn book-flight-fails [order]
+  (record! [:book-flight order])
+  (throw (ex-info "no seats" {:order order})))
+
+(defn cancel-hotel  [v] (record! [:cancel-hotel v])  :hotel-cancelled)
+(defn cancel-flight [v] (record! [:cancel-flight v]) :flight-cancelled)
+
+;; ============================================================================
+;; Workflows
+;; ============================================================================
+
+(defn happy-saga [order]
+  (let [hotel  (intemporal/stub #'book-hotel)
+        flight (intemporal/stub #'book-flight)
+        charge (intemporal/stub #'charge-card)
+        chotel  (intemporal/stub #'cancel-hotel)
+        cflight (intemporal/stub #'cancel-flight)]
+    (intemporal/with-failure [h (hotel order)]
+      (chotel h))
+    (intemporal/with-failure [f (flight order)]
+      (cflight f))
+    (charge order)
+    :booked))
+
+(defn failing-saga [order]
+  (let [hotel  (intemporal/stub #'book-hotel)
+        flight (intemporal/stub #'book-flight)
+        charge (intemporal/stub #'charge-card-fails)
+        chotel  (intemporal/stub #'cancel-hotel)
+        cflight (intemporal/stub #'cancel-flight)]
+    (intemporal/with-failure [h (hotel order)]
+      (chotel h))
+    (intemporal/with-failure [f (flight order)]
+      (cflight f))
+    (charge order)
+    :booked))
+
+(defn fail-on-flight-saga [order]
+  (let [hotel  (intemporal/stub #'book-hotel)
+        flight (intemporal/stub #'book-flight-fails)
+        chotel  (intemporal/stub #'cancel-hotel)
+        cflight (intemporal/stub #'cancel-flight)]
+    (intemporal/with-failure [h (hotel order)]
+      (chotel h))
+    (intemporal/with-failure [f (flight order)]
+      (cflight f))
+    :booked))
+
+;; ============================================================================
+;; Tests
+;; ============================================================================
+
+(deftest test-happy-path-no-compensation
+  (testing "When the workflow succeeds, no compensation runs"
+    (reset! events [])
+    (let [engine (intemporal/make-workflow-engine :threads 2)]
+      (with-result [result (intemporal/start-workflow engine happy-saga ["o1"])]
+        (is (match? {:status :completed :result :booked} result))
+        (is (= [[:book-hotel "o1"] [:book-flight "o1"] [:charge-card "o1"]]
+               @events))))))
+
+(deftest test-compensation-runs-lifo-on-failure
+  (testing "On a later failure, compensations run in reverse order with the forward result"
+    (reset! events [])
+    (let [engine (intemporal/make-workflow-engine :threads 2)]
+      (with-result [result (intemporal/start-workflow engine failing-saga ["o2"])]
+        (is (match? {:status :failed} result))
+        (is (= [[:book-hotel "o2"]
+                [:book-flight "o2"]
+                [:charge-card "o2"]
+                [:cancel-flight {:flight "o2"}]
+                [:cancel-hotel {:hotel "o2"}]]
+               @events))))))
+
+(deftest test-failed-step-registers-no-compensation
+  (testing "A step whose own body fails registers no compensation; earlier steps still compensate"
+    (reset! events [])
+    (let [engine (intemporal/make-workflow-engine :threads 2)]
+      (with-result [result (intemporal/start-workflow engine fail-on-flight-saga ["o3"])]
+        (is (match? {:status :failed} result))
+        (is (= [[:book-hotel "o3"]
+                [:book-flight "o3"]
+                [:cancel-hotel {:hotel "o3"}]]
+               @events))
+        (is (not (some #(= :cancel-flight (first %)) @events)))))))

From 1e3921fdadf8f1783fb634851819872ca0f02016 Mon Sep 17 00:00:00 2001
From: Miguel Ping <miguel.ping@gmail.com>
Date: Thu, 4 Jun 2026 13:53:53 +0100
Subject: [PATCH 7/9] simplify saga

---
 README.md                                     |  50 +++++--
 dev/verify_bugs.clj                           |   2 +-
 docker/fdb.cluster                            |   2 +-
 src/intemporal/core.cljc                      |  96 ++++++++++---
 src/intemporal/internal/context.cljc          |  79 +++++++++--
 src/intemporal/internal/error.cljc            |   9 +-
 src/intemporal/internal/execution.clj         | 118 +++-------------
 src/intemporal/internal/execution.cljs        | 131 +++---------------
 test/intemporal/tests/bench/fdb_test.clj      |   6 +-
 .../intemporal/tests/context_macros_test.cljs |   2 +-
 .../crash/saga_compensation_crash_test.clj    |  23 +--
 test/intemporal/tests/jepsen/bug_1_1_test.clj |   2 +-
 test/intemporal/tests/jepsen/bug_1_2_test.clj |   2 +-
 test/intemporal/tests/jepsen/bug_1_3_test.clj |   2 +-
 test/intemporal/tests/jepsen/bug_2_1_test.clj |   2 +-
 test/intemporal/tests/jepsen/bug_2_3_test.clj |   2 +-
 test/intemporal/tests/saga_test.clj           | 120 ++++++++++------
 test/intemporal/tests/saga_test.cljs          |  60 +++++---
 test/intemporal/tests/signal_test.cljs        |   2 +-
 test/intemporal/tests/status_test.clj         |   2 +-
 test/intemporal/tests/store/fdb_test.clj      |   4 +-
 test/intemporal/tests/timer_recovery_test.clj |   6 +-
 test/intemporal/tests/timer_test.cljs         |   2 +-
 test/intemporal/tests/worker_test.clj         |   4 +-
 24 files changed, 374 insertions(+), 354 deletions(-)

diff --git a/README.md b/README.md
index ff4fdbb..1fe2de4 100644
--- a/README.md
+++ b/README.md
@@ -57,29 +57,53 @@ Examples:
 
 ### Saga / compensations
 
-`with-failure` registers a compensation for a step. If the step succeeds but the
-workflow later fails, registered compensations run in reverse order (LIFO). A
-step that fails registers no compensation (nothing was created, so nothing to
-undo). Compensations should themselves be activity stubs so they are durable and
+Create a saga with `intemporal/saga`, register a compensation for each step *after*
+it succeeds with `intemporal/add-compensation`, and roll back from a catch block
+with `intemporal/compensate`. Compensations run in reverse registration order
+(LIFO). A step that fails before its `add-compensation` registers nothing to undo.
+Compensations should themselves call activity stubs so they are durable and
 replay-safe.
 
+Both real failures and workflow cancellation flow through the catch, so the one
+idiom rolls back in either case. Catch `Exception`: the engine's normal
+control-flow *suspensions* subclass `Error`, so they are excluded automatically
+and propagate to the engine untouched.
+
 ```clojure
 (defn booking-saga [order]
-  (let [book-hotel    (intemporal/stub #'book-hotel)
+  (let [saga          (intemporal/saga)
+        book-hotel    (intemporal/stub #'book-hotel)
         book-flight   (intemporal/stub #'book-flight)
         charge-card   (intemporal/stub #'charge-card)
         cancel-hotel  (intemporal/stub #'cancel-hotel)
         cancel-flight (intemporal/stub #'cancel-flight)]
-    (intemporal/with-failure [h (book-hotel order)]
-      (cancel-hotel h))
-    (intemporal/with-failure [f (book-flight order)]
-      (cancel-flight f))
-    ;; if charge-card throws, cancel-flight then cancel-hotel run automatically
-    (charge-card order)))
+    (try
+      (let [h (book-hotel order)]
+        (intemporal/add-compensation saga #(cancel-hotel h)))
+      (let [f (book-flight order)]
+        (intemporal/add-compensation saga #(cancel-flight f)))
+      ;; if charge-card throws, the catch runs compensate -> cancel-flight then
+      ;; cancel-hotel (LIFO) -> then rethrows so the workflow finalizes :failed
+      (charge-card order)
+      :booked
+      (catch Exception e
+        (intemporal/compensate saga)
+        (throw e)))))
 ```
 
-`intemporal/add-compensation` is the underlying function if you need to register
-a compensation thunk directly.
+Cancellation is a catchable `Exception`, so any `(catch Exception ...)` in a
+workflow will intercept it — that is what lets a cancelled saga roll back.
+
+In **ClojureScript** there is no `Error`/`Exception` split (everything is a
+`js/Error`), so `(catch :default e)` would also catch suspensions. There, rethrow
+them explicitly:
+
+```clojure
+      (catch :default e
+        (when (intemporal/suspension? e) (throw e))   ;; engine control flow
+        (intemporal/compensate saga)
+        (throw e))
+```
 
 # TODO
 
diff --git a/dev/verify_bugs.clj b/dev/verify_bugs.clj
index 4aab136..8c36ffb 100644
--- a/dev/verify_bugs.clj
+++ b/dev/verify_bugs.clj
@@ -24,7 +24,7 @@
       "jdbc:postgresql://localhost:5432/root?user=root&password=root"))
 
 (defn- open-fdb []
-  (let [fdb (cfdb/select-api-version 730)]
+  (let [fdb (cfdb/select-api-version 710)]
     (cfdb/open fdb "docker/fdb.cluster")))
 
 (defn- timeout-ms [ms f]
diff --git a/docker/fdb.cluster b/docker/fdb.cluster
index 74a077b..fc3b14b 100644
--- a/docker/fdb.cluster
+++ b/docker/fdb.cluster
@@ -1 +1 @@
-docker:docker@172.18.0.2:4500
+docker:docker@192.168.107.2:4500
diff --git a/src/intemporal/core.cljc b/src/intemporal/core.cljc
index c2a7c78..7dc2a81 100644
--- a/src/intemporal/core.cljc
+++ b/src/intemporal/core.cljc
@@ -33,10 +33,8 @@
         effective-timeout (or timeout-ms (:timeout-ms activity-info))
         effective-retry (or retry-policy (:retry-policy activity-info))]
     (fn [& args]
-      (let [seq-num (ctx/next-seq!)]
+      (let [seq-num (ctx/next-seq!)]            ;; next-seq! already checks cancellation
         (log/with-mdc {:activity activity-name :seqnum seq-num}
-
-          (ctx/check-cancelled!)
           (let [ctx             (ctx/current-context)
                 store           (ctx/current-store)
                 workflow-id     (ctx/current-workflow-id)
@@ -569,25 +567,81 @@
 ;; Saga / Compensations
 ;; ============================================================================
 
+(defn suspension?
+  "True if `e` is an internal workflow suspension (the engine's normal control
+   flow for activities, timers, signals, etc.). Mainly needed in ClojureScript,
+   where every throwable is a js/Error and `(catch :default e)` catches
+   suspensions too - a saga catch there must rethrow them via this predicate.
+   On the JVM suspensions subclass Error, so `(catch Exception e)` already
+   excludes them and no guard is needed. See `saga`."
+  [e]
+  (error/suspension? e))
+
+(defn saga
+  "Create a saga: a handle that collects compensation thunks for the steps a
+   workflow has completed. Register compensations as you go with
+   `add-compensation`, and run them with `compensate` from a catch block.
+
+   Both real failures and workflow cancellation flow through the catch (so this
+   rolls back in either case); the engine's normal control-flow suspensions do
+   not. On the JVM, catch `Exception` - suspensions subclass Error and are
+   excluded automatically:
+
+   (let [s (saga)]
+     (try
+       (let [h (book-hotel order)]
+         (add-compensation s #(cancel-hotel h)))
+       (charge-card order)
+       (catch Exception e
+         (compensate s)             ;; rolls back completed steps, LIFO
+         (throw e))))
+
+   In ClojureScript there is no Error/Exception split, so catch :default and
+   rethrow suspensions explicitly:
+
+       (catch :default e
+         (when (suspension? e) (throw e))
+         (compensate s)
+         (throw e))"
+  []
+  {::compensations (atom [])})
+
 (defn add-compensation
-  "Register a 0-arg compensation thunk. If the workflow later fails, registered
-   compensations run in reverse order (LIFO). Compensations should call activity
-   stubs so they are durable / replay-safe."
-  [f]
-  (ctx/add-compensation! f))
-
-(defmacro with-failure
-  "Run `body`. If `body` succeeds, register `comp-fn` (with `binding` bound to
-   body's result) to run if the workflow later fails. If `body` fails, no
-   compensation is registered - a step that never completed needs no undo.
-
-   Example:
-   (with-failure [v (book-hotel order)]
-     (cancel-hotel v))"
-  [[binding body] comp-fn]
-  `(let [~binding ~body]
-     (ctx/add-compensation! (fn [] ~comp-fn))
-     ~binding))
+  "Register a 0-arg compensation thunk on `saga`. Compensations run in reverse
+   registration order (LIFO) when `compensate` is called. The thunk should call
+   activity stubs (closing over the step's result) so it is durable / replay-safe.
+   Register a step's compensation only after the step succeeds, so a step that
+   never completed registers nothing to undo."
+  [saga thunk]
+  (swap! (::compensations saga) conj thunk))
+
+(defn compensate
+  "Run `saga`'s registered compensations in reverse (LIFO). Real errors from a
+   compensation are logged and skipped (best-effort rollback); a suspension (a
+   compensating activity running for the first time) is rethrown so the engine
+   schedules and resumes it - on replay already-run compensations return cached
+   results."
+  [saga]
+  (let [comps @(::compensations saga)]
+    (when (seq comps)
+      (ctx/notify-observer p/on-compensation-started (ctx/current-workflow-id)))
+    ;; Suppress the cancellation check so compensating activities can run even
+    ;; when this rollback was triggered by a cancellation (the cancel exception
+    ;; was already caught by the user before calling compensate).
+    (ctx/set-compensating! true)
+    (try
+      (doseq [c (reverse comps)]
+        (try
+          (c)
+          (catch #?(:clj Throwable :cljs js/Error) t
+            (when (error/suspension? t) (throw t))
+            (ctx/notify-observer p/on-compensation-failed
+                                 (ctx/current-workflow-id) (error/throwable->map t))
+            (log/warnf "Compensation failed, continuing: %s" (ex-message t)))))
+      (finally
+        (ctx/set-compensating! false)))
+    (when (seq comps)
+      (ctx/notify-observer p/on-compensation-completed (ctx/current-workflow-id)))))
 
 ;; ============================================================================
 ;; Convenience Functions
diff --git a/src/intemporal/internal/context.cljc b/src/intemporal/internal/context.cljc
index e7d8dac..380f4e8 100644
--- a/src/intemporal/internal/context.cljc
+++ b/src/intemporal/internal/context.cljc
@@ -20,7 +20,7 @@
     :seq-counter (atom 0)
     :pending-events pending-events
     :pending-asyncs pending-asyncs
-    :compensations (atom [])
+    :compensating? (atom false)
     :store store
     :registry registry
     :observer observer
@@ -35,16 +35,72 @@
 (defn current-store []
   (:store (current-context)))
 
+(defn compensating?
+  "True while the workflow is inside intemporal/compensate. Used to suppress the
+   cancellation check so compensating activities can run even though the workflow
+   is being cancelled (the cancel exception was already caught by the user)."
+  []
+  (boolean (some-> (:compensating? (current-context)) deref)))
+
+(defn set-compensating! [v]
+  (some-> (:compensating? (current-context)) (reset! v)))
+
+(declare find-event add-pending-event!)
+
+(defn- seq-has-event?
+  "True if history (or pending events) holds any event at sequence `s`. The
+   pending-events scan is load-bearing: it lets a :workflow-cancelling marker
+   added earlier in the *current* pass count as present, so the frontier op does
+   not record a second marker / throw twice at the same seq within one pass."
+  [ctx s]
+  (or (some #(= (:seq %) s) @(:history ctx))
+      (some #(= (:seq %) s) @(:pending-events ctx))))
+
+(defn replaying?
+  "True when the operation about to run at the current sequence position already
+   has recorded history (it is being replayed, not executed for the first time).
+   Used to defer the cancellation check to the frontier - the first un-cached
+   operation - so that a saga's compensation registrations (which re-run during
+   replay) are rebuilt before cancellation surfaces into the user's catch.
+   Per-seq equality (not max-seq) so that compensation events, which take higher
+   seq numbers, don't make a not-yet-reached forward op look replayed."
+  []
+  (seq-has-event? (current-context) @(:seq-counter (current-context))))
+
+(defn- surface-cancellation!
+  "Decide where a cancellation surfaces into the workflow body, then throw.
+
+   Cancellation must surface deterministically so that a saga's compensations
+   (registered as the body re-runs) are rebuilt before the user's catch runs, and
+   so the compensation seq space stays stable across crashes/resumes. We anchor it
+   to a single frontier sequence number, recorded once as a :workflow-cancelling
+   marker and re-thrown at that same seq on every later pass (like a recorded
+   :activity-failed):
+
+   - marker already at `cur`  -> re-throw (deterministic replay frontier);
+   - still replaying cached steps -> return nil so the body advances toward the
+     frontier (re-registering compensations along the way);
+   - frontier (first un-cached op) -> record the marker, then throw."
+  [ctx cur]
+  (cond
+    (find-event @(:history ctx) :workflow-cancelling cur)
+    (throw (error/workflow-cancelled-exception))
+
+    (replaying?)
+    nil
+
+    :else
+    (do
+      (add-pending-event! {:event-type :workflow-cancelling :seq cur})
+      (throw (error/workflow-cancelled-exception)))))
+
 (defn check-cancelled! []
   (let [ctx (current-context)]
-    ;; :compensating-cancel? is set on the context during the cancellation
-    ;; compensation pass so the body can replay (rebuilding the compensation
-    ;; stack) and compensating activities can schedule despite the cancel flag.
-    ;; It lives in the context map (not a dynamic var) so it propagates across
-    ;; cljs async boundaries via blet/bthen. See cancellation-compensation-pass.
-    (when (and (not (:compensating-cancel? ctx))
+    ;; Suppress while compensating: the cancel exception was already caught by
+    ;; the user and the compensating activities must run.
+    (when (and (not (compensating?))
                (p/is-cancelled? (:store ctx) (:workflow-id ctx)))
-      (throw (error/workflow-cancelled-exception)))))
+      (surface-cancellation! ctx @(:seq-counter ctx)))))
 
 (defn next-seq! []
   (check-cancelled!)
@@ -74,13 +130,6 @@
   (let [ctx (current-context)]
     (swap! (:pending-asyncs ctx) conj async-info)))
 
-(defn add-compensation!
-  "Register a 0-arg compensation thunk onto the current workflow's compensation
-   stack. Compensations run in reverse (LIFO) when the workflow fails."
-  [f]
-  (let [ctx (current-context)]
-    (swap! (:compensations ctx) conj f)))
-
 (defn notify-observer [event-fn & args]
   (when-let [observer (:observer (current-context))]
     (try
diff --git a/src/intemporal/internal/error.cljc b/src/intemporal/internal/error.cljc
index f1539eb..c0827ab 100644
--- a/src/intemporal/internal/error.cljc
+++ b/src/intemporal/internal/error.cljc
@@ -69,12 +69,15 @@
              (-> e ex-data :data))))
 
 (defn workflow-cancelled-exception []
-  (internal-error "Workflow cancelled" {::cancelled true}))
+  ;; A plain ex-info (catchable by `(catch Exception ...)`) - unlike suspensions,
+  ;; which subclass Error to stay invisible to userland catches. This lets a saga
+  ;; workflow catch cancellation and run compensations to roll completed steps
+  ;; back, while still letting suspensions propagate to the engine untouched.
+  (ex-info "Workflow cancelled" {::cancelled true}))
 
 (defn cancelled-exception? [e]
   #?(:clj
-     (and (instance? Error e)
-          (instance? IExceptionInfo e)
+     (and (instance? IExceptionInfo e)
           (::cancelled (ex-data e)))
      :cljs
      (and (instance? js/Error e)
diff --git a/src/intemporal/internal/execution.clj b/src/intemporal/internal/execution.clj
index b973384..7012896 100644
--- a/src/intemporal/internal/execution.clj
+++ b/src/intemporal/internal/execution.clj
@@ -17,29 +17,6 @@
   `(when ~observer
      (~proto-fn ~observer ~@args)))
 
-(defn- run-compensations!
-  "Run registered compensations in reverse (LIFO). A compensating activity
-   suspends on first execution -> rethrow the suspension so the loop schedules
-   it. Real errors from a compensation are logged and skipped (best-effort).
-
-   Observer notes: -started fires once per replay pass that has compensations,
-   -completed only on the pass where the stack drains without suspending. Like
-   on-workflow-suspended/-resumed, these may fire across multiple passes; dedup
-   by workflow-id if exactly-once is needed."
-  [comps]
-  (when (seq comps)
-    (ctx/notify-observer p/on-compensation-started (ctx/current-workflow-id)))
-  (doseq [c (reverse comps)]
-    (try
-      (c)
-      (catch Throwable t
-        (when (error/suspension? t) (throw t))
-        (ctx/notify-observer p/on-compensation-failed
-                             (ctx/current-workflow-id) (error/throwable->map t))
-        (log/warnf "Compensation failed, continuing: %s" (ex-message t)))))
-  (when (seq comps)
-    (ctx/notify-observer p/on-compensation-completed (ctx/current-workflow-id))))
-
 (defn execute-workflow-fn [workflow-fn args]
   (try
     {:status :completed
@@ -60,60 +37,13 @@
          :pending-events @(:pending-events (ctx/current-context))}
 
         :else
-        ;; Real failure: run any registered compensations (saga rollback) before
-        ;; finalizing. A compensating activity suspends on first execution, which
-        ;; we surface as :suspended so the loop schedules + resumes it; on replay
-        ;; the body re-throws and already-run compensations return cached results.
-        (let [ctx (ctx/current-context)]
-          (try
-            (run-compensations! @(:compensations ctx))
-            {:status :failed
-             :error e
-             :pending-events @(:pending-events ctx)}
-            (catch Throwable s
-              (if (error/suspension? s)
-                {:status :suspended
-                 :suspension-type (error/suspension-type s)
-                 :suspension-data (error/suspension-data s)
-                 :pending-asyncs @(:pending-asyncs ctx)
-                 :pending-events @(:pending-events ctx)}
-                (throw s)))))))))
-
-(defn cancellation-compensation-pass
-  "Run when a workflow is cancelled. Replays the body with cancellation
-   suppressed (ctx/*compensating-cancel?* must be bound true by the caller) so
-   the compensation stack is rebuilt, then runs the compensations.
-
-   Returns the same result shape as execute-workflow-fn:
-   - :suspended  -> a compensating activity is pending; the loop schedules and
-                    resumes it, then (still cancelled) re-enters this pass.
-   - :cancelled  -> the stack drained; the caller finalizes."
-  [workflow-fn args]
-  (let [ctx (ctx/current-context)]
-    ;; 1. Rebuild compensations by replaying. Completed steps return cached and
-    ;;    re-register their compensations; the frontier (first un-cached op)
-    ;;    throws a forward suspension we discard - we are NOT doing forward work.
-    (try (apply workflow-fn args) (catch Throwable _ nil))
-    ;; 2. Drop any forward pending events the frontier accumulated; keep the
-    ;;    seq-counter so compensating activities get stable, continuing seq nums.
-    (reset! (:pending-events ctx) [])
-    (reset! (:pending-asyncs ctx) [])
-    ;; 3. Run compensations (suppression still on, so comp activities can schedule).
-    (try
-      (run-compensations! @(:compensations ctx))
-      ;; :compensated? distinguishes the drained pass (-> finalize) from
-      ;; execute-workflow-fn's mid-run :cancelled (-> recur into this pass).
-      {:status :cancelled
-       :compensated? true
-       :pending-events @(:pending-events ctx)}
-      (catch Throwable s
-        (if (error/suspension? s)
-          {:status :suspended
-           :suspension-type (error/suspension-type s)
-           :suspension-data (error/suspension-data s)
-           :pending-asyncs @(:pending-asyncs ctx)
-           :pending-events @(:pending-events ctx)}
-          (throw s))))))
+        ;; Real failure. Any saga rollback happens inside the workflow body (the
+        ;; user's catch calls intemporal/compensate); a compensating activity that
+        ;; suspends throws out of compensate and arrives here as a suspension,
+        ;; caught above, so the loop schedules + resumes it.
+        {:status :failed
+         :error e
+         :pending-events @(:pending-events (ctx/current-context))}))))
 
 (defn execute-with-retry
   "Execute an activity with retry policy"
@@ -403,7 +333,7 @@
    :seq-counter (atom 0)
    :pending-events (atom [])
    :pending-asyncs (atom [])
-   :compensations (atom [])
+   :compensating? (atom false)
    :store store
    :registry registry
    :observer observer})
@@ -593,21 +523,11 @@
         {:status :suspended
          :workflow-id workflow-id})
 
-      ;; Check cancellation at start of each iteration. When cancelled we still
-      ;; run the body - via cancellation-compensation-pass under suppression - so
-      ;; the compensation stack is rebuilt and compensations roll back completed
-      ;; steps. The result feeds the same dispatch below (:suspended schedules a
-      ;; compensating activity and recurs; :cancelled finalizes).
-      (let [cancelled?  (p/is-cancelled? store workflow-id)
-            history     (p/load-history store workflow-id)
-            ctx         (cond-> (make-workflow-context workflow-id history store registry observer)
-                          cancelled? (assoc :compensating-cancel? true))
+      (let [history     (p/load-history store workflow-id)
+            ctx         (make-workflow-context workflow-id history store registry observer)
             exec-result (binding [ctx/*workflow-context* ctx]
-                          (log/debugf "Executing workflow function %s (cancelled? %s)..."
-                                      workflow-fn cancelled?)
-                          (if cancelled?
-                            (cancellation-compensation-pass workflow-fn args)
-                            (execute-workflow-fn workflow-fn args)))]
+                          (log/debugf "Executing workflow function %s..." workflow-fn)
+                          (execute-workflow-fn workflow-fn args))]
 
           (log/debugf "Workflow function executed, got: %s" (:status exec-result))
           (case (:status exec-result)
@@ -619,14 +539,12 @@
                                 observer)
 
             :cancelled
-            ;; From the compensation pass (drained) -> finalize. From
-            ;; execute-workflow-fn observing the flag mid-run -> recur so the
-            ;; top-of-loop runs the compensation pass.
-            (if (:compensated? exec-result)
-              (finalize-cancelled store workflow-id
-                                  (:pending-events exec-result)
-                                  observer)
-              (recur (inc iteration)))
+            ;; Cancellation surfaced from the body (a stub's check-cancelled!).
+            ;; Any saga rollback already ran inside the user's catch before the
+            ;; cancel exception was rethrown, so just finalize.
+            (finalize-cancelled store workflow-id
+                                (:pending-events exec-result)
+                                observer)
 
             :suspended
             (let [action (handle-suspension engine
diff --git a/src/intemporal/internal/execution.cljs b/src/intemporal/internal/execution.cljs
index 39a9915..40d032a 100644
--- a/src/intemporal/internal/execution.cljs
+++ b/src/intemporal/internal/execution.cljs
@@ -14,48 +14,6 @@
 ;; Workflow Execution Engine
 ;; ============================================================================
 
-(defn- run-compensations!
-  "Run registered compensations in reverse (LIFO). A compensating activity
-   suspends on first execution -> rethrow the suspension so the loop schedules
-   it. Real errors from a compensation are logged and skipped (best-effort).
-
-   Observer notes: -started fires once per replay pass that has compensations,
-   -completed only on the pass where the stack drains without suspending. Like
-   on-workflow-suspended/-resumed, these may fire across multiple passes; dedup
-   by workflow-id if exactly-once is needed."
-  [comps]
-  (when (seq comps)
-    (ctx/notify-observer p/on-compensation-started (ctx/current-workflow-id)))
-  (doseq [c (reverse comps)]
-    (try
-      (c)
-      (catch js/Error t
-        (when (error/suspension? t) (throw t))
-        (ctx/notify-observer p/on-compensation-failed
-                             (ctx/current-workflow-id) (error/throwable->map t))
-        (log/warnf "Compensation failed, continuing: %s" (ex-message t)))))
-  (when (seq comps)
-    (ctx/notify-observer p/on-compensation-completed (ctx/current-workflow-id))))
-
-(defn- compensate-result
-  "Run compensations then build the terminal result for a real failure `e`.
-   If a compensating activity suspends, surfaces :suspended so the loop schedules
-   it; on replay the body re-throws and already-run compensations are cached."
-  [ctx e pending-asyncs pending-events]
-  (try
-    (run-compensations! @(:compensations ctx))
-    {:status :failed
-     :error e
-     :pending-events @pending-events}
-    (catch js/Error s
-      (if (error/suspension? s)
-        {:status :suspended
-         :suspension-type (error/suspension-type s)
-         :suspension-data (error/suspension-data s)
-         :pending-asyncs @pending-asyncs
-         :pending-events @pending-events}
-        (throw s)))))
-
 (defn execute-workflow-fn [workflow-fn args]
   ;; Capture context so async callbacks (from p/let, etc.) can access it
   ;; after the dynamic binding scope has exited
@@ -93,7 +51,13 @@
                                  :pending-events @pending-events}
 
                                 :else
-                                (compensate-result ctx e pending-asyncs pending-events))))))
+                                ;; Real failure. Saga rollback happens inside the
+                                ;; body (user's catch -> intemporal/compensate); a
+                                ;; suspending compensation surfaces above as a
+                                ;; suspension so the loop schedules + resumes it.
+                                {:status :failed
+                                 :error e
+                                 :pending-events @pending-events})))))
           ;; Synchronous result
           {:status :completed
            :result result
@@ -113,7 +77,9 @@
            :pending-events @pending-events}
 
           :else
-          (compensate-result ctx e pending-asyncs pending-events))))))
+          {:status :failed
+           :error e
+           :pending-events @pending-events})))))
 
 (defn- execute-once
   "Execute activity once, returns a promise of result map."
@@ -392,7 +358,7 @@
            :seq-counter (atom 0)
            :pending-events (atom [])
            :pending-asyncs (atom [])
-           :compensations (atom [])
+           :compensating? (atom false)
            :store store
            :registry registry
            :observer observer}
@@ -556,46 +522,6 @@
                                  :timestamp (utils/current-time-ms)})
         result))))
 
-(defn cancellation-compensation-pass
-  "Run when a workflow is cancelled. Replays the body with cancellation
-   suppressed (the context carries :compensating-cancel?) so the compensation
-   stack is rebuilt, then runs the compensations. cljs mirror of the clj pass.
-
-   The body may return a promise (p/let), so step 1 may settle asynchronously;
-   we swallow its result/rejection (forward work is discarded) then run
-   compensations. Returns the execute-workflow-fn result shape (a map, or a
-   promise of one): :suspended (a compensating activity is pending) or
-   :cancelled (the stack drained)."
-  [workflow-fn args]
-  (let [ctx       (ctx/current-context)
-        run-comps (fn []
-                    ;; Drop forward pending events from the frontier; keep the
-                    ;; seq-counter so comp activities get stable seq nums.
-                    (reset! (:pending-events ctx) [])
-                    (reset! (:pending-asyncs ctx) [])
-                    (try
-                      (run-compensations! @(:compensations ctx))
-                      ;; :compensated? distinguishes the drained pass (-> finalize)
-                      ;; from execute-workflow-fn's mid-run :cancelled (-> recur).
-                      {:status :cancelled
-                       :compensated? true
-                       :pending-events @(:pending-events ctx)}
-                      (catch js/Error s
-                        (if (error/suspension? s)
-                          {:status :suspended
-                           :suspension-type (error/suspension-type s)
-                           :suspension-data (error/suspension-data s)
-                           :pending-asyncs @(:pending-asyncs ctx)
-                           :pending-events @(:pending-events ctx)}
-                          (throw s)))))
-        ;; Step 1: replay body to rebuild comps; swallow sync throw or rejection.
-        replay    (try (apply workflow-fn args) (catch js/Error _ nil))]
-    (if (prom/promise? replay)
-      (-> replay
-          (prom/catch (fn [_] nil))    ;; discard forward suspension/rejection
-          (bthen (fn [_] (run-comps)))) ;; bthen rebinds ctx (carries the flag)
-      (run-comps))))
-
 (defn run-workflow-internal
   "Main workflow execution loop - orchestrates replay and execution.
 
@@ -623,22 +549,12 @@
         {:status :suspended
          :workflow-id workflow-id})
 
-      ;; Check cancellation at start of each iteration. When cancelled we run the
-      ;; body via cancellation-compensation-pass (context carries
-      ;; :compensating-cancel?) so the compensation stack is rebuilt and rolled
-      ;; back; the result feeds the same dispatch (:suspended schedules a comp
-      ;; activity and recurs; :cancelled finalizes).
-      (let [cancelled?  (p/is-cancelled? store workflow-id)
-            history     (p/load-history store workflow-id)
-            ctx         (cond-> (make-workflow-context workflow-id history store registry observer
-                                                       :protocols (:protocols engine))
-                          cancelled? (assoc :compensating-cancel? true))
+      (let [history     (p/load-history store workflow-id)
+            ctx         (make-workflow-context workflow-id history store registry observer
+                                               :protocols (:protocols engine))
             exec-result (binding [ctx/*workflow-context* ctx]
-                          (log/debugf "Executing workflow function %s (cancelled? %s)..."
-                                      workflow-fn cancelled?)
-                          (if cancelled?
-                            (cancellation-compensation-pass workflow-fn args)
-                            (execute-workflow-fn workflow-fn args)))
+                          (log/debugf "Executing workflow function %s..." workflow-fn)
+                          (execute-workflow-fn workflow-fn args))
               dispatch (fn [exec-result]
                          (log/debugf "Workflow function executed, got: %s" (:status exec-result))
                          (case (:status exec-result)
@@ -650,14 +566,13 @@
                                                observer)
 
                            :cancelled
-                           ;; From the compensation pass (drained) -> finalize.
-                           ;; From execute-workflow-fn observing the flag mid-run
-                           ;; -> recur so the top-of-loop runs the pass.
-                           (if (:compensated? exec-result)
-                             (finalize-cancelled store workflow-id
-                                                 (:pending-events exec-result)
-                                                 observer)
-                             (prom/recur (inc iteration)))
+                           ;; Cancellation surfaced from the body (a stub's
+                           ;; check-cancelled!). Any saga rollback already ran
+                           ;; inside the user's catch before the cancel exception
+                           ;; was rethrown, so just finalize.
+                           (finalize-cancelled store workflow-id
+                                               (:pending-events exec-result)
+                                               observer)
 
                            :suspended
                            (blet [action (handle-suspension engine
diff --git a/test/intemporal/tests/bench/fdb_test.clj b/test/intemporal/tests/bench/fdb_test.clj
index 8395cd5..3969164 100644
--- a/test/intemporal/tests/bench/fdb_test.clj
+++ b/test/intemporal/tests/bench/fdb_test.clj
@@ -1,12 +1,12 @@
 (ns ^:integration intemporal.tests.bench.fdb-test
-  (:require [clojure.test :refer [deftest testing is]]
+  (:require [clojure.test :refer [deftest testing]]
             [intemporal.store.fdb :as fdb-store]
             [intemporal.tests.bench.test-suite :as suite]
             [me.vedang.clj-fdb.FDB :as cfdb]))
 
 (deftest fdb-store-test
   (testing "FoundationDB Store Implementation"
-    (let [db (cfdb/select-api-version 730)
+    (let [db (cfdb/select-api-version 710)
           db (cfdb/open db)]
 
       ;; Run shared suite
@@ -19,5 +19,5 @@
     ;; 1k => ~1s
     ;; 10k => ~5s
     ;; 100k => 6GB, 53s
-    (suite/run-store-tests (fdb-store/make-fdb-store (cfdb/open (cfdb/select-api-version 730)) "intemporal-tests") 100000))
+    (suite/run-store-tests (fdb-store/make-fdb-store (cfdb/open (cfdb/select-api-version 710)) "intemporal-tests") 100000))
   "")
\ No newline at end of file
diff --git a/test/intemporal/tests/context_macros_test.cljs b/test/intemporal/tests/context_macros_test.cljs
index e74ed70..80615cd 100644
--- a/test/intemporal/tests/context_macros_test.cljs
+++ b/test/intemporal/tests/context_macros_test.cljs
@@ -2,7 +2,7 @@
   (:require [intemporal.core :as intemporal]
             [cljs.test :refer [deftest is testing]]
             [intemporal.internal.context :as ctx]
-            [intemporal.tests.utils :refer [with-trace-logging]]
+            [intemporal.tests.utils :as utils]
             [promesa.core :as p])
   (:require-macros [intemporal.internal.context :as ctx :refer [blet bthen bfinally]]
                    [intemporal.tests.utils :refer [with-result]]))
diff --git a/test/intemporal/tests/crash/saga_compensation_crash_test.clj b/test/intemporal/tests/crash/saga_compensation_crash_test.clj
index 87a4b97..2ede838 100644
--- a/test/intemporal/tests/crash/saga_compensation_crash_test.clj
+++ b/test/intemporal/tests/crash/saga_compensation_crash_test.clj
@@ -28,18 +28,25 @@
 ;; missing signal is our deterministic "crash" point: the workflow suspends
 ;; mid-compensation and is resumed by a fresh engine in phase 2.
 (defn crash-saga [order]
-  (let [hotel   (intemporal/stub #'book-hotel)
+  (let [s       (intemporal/saga)
+        hotel   (intemporal/stub #'book-hotel)
         flight  (intemporal/stub #'book-flight)
         charge  (intemporal/stub #'charge-card-fails)
         chotel  (intemporal/stub #'cancel-hotel)
         cflight (intemporal/stub #'cancel-flight)]
-    (intemporal/with-failure [h (hotel order)]
-      (chotel h))
-    (intemporal/with-failure [f (flight order)]
-      (do (cflight f)
-          (intemporal/wait-for-signal "continue-compensation")))
-    (charge order)
-    :booked))
+    (try
+      (let [h (hotel order)]
+        (intemporal/add-compensation s #(chotel h)))
+      (let [f (flight order)]
+        ;; flight compensation cancels the flight, then waits for a signal -
+        ;; the deterministic "crash" point mid-compensation.
+        (intemporal/add-compensation s #(do (cflight f)
+                                            (intemporal/wait-for-signal "continue-compensation"))))
+      (charge order)
+      :booked
+      (catch Exception e
+        (intemporal/compensate s)
+        (throw e)))))
 
 (defn- count-events [store workflow-id event-type]
   (->> (p/load-history store workflow-id)
diff --git a/test/intemporal/tests/jepsen/bug_1_1_test.clj b/test/intemporal/tests/jepsen/bug_1_1_test.clj
index 825cb7b..462d54b 100644
--- a/test/intemporal/tests/jepsen/bug_1_1_test.clj
+++ b/test/intemporal/tests/jepsen/bug_1_1_test.clj
@@ -80,7 +80,7 @@
 (deftest ^:integration signal-across-instances-fdb
   (testing "two FDBStore instances over the same FoundationDB"
     (let [root    (str "bug11-" (random-uuid))
-          fdb     (cfdb/select-api-version 730)
+          fdb     (cfdb/select-api-version 710)
           db      (.open fdb "docker/fdb.cluster")
           store-a (fdb-store/make-fdb-store db root)
           store-b (fdb-store/make-fdb-store db root)]
diff --git a/test/intemporal/tests/jepsen/bug_1_2_test.clj b/test/intemporal/tests/jepsen/bug_1_2_test.clj
index 0d611fb..ef15c39 100644
--- a/test/intemporal/tests/jepsen/bug_1_2_test.clj
+++ b/test/intemporal/tests/jepsen/bug_1_2_test.clj
@@ -52,7 +52,7 @@
 (deftest ^:integration claim-is-exclusive-fdb
   (testing "FDBStore"
     (let [root  (str "bug12-" (random-uuid))
-          fdb   (cfdb/select-api-version 730)
+          fdb   (cfdb/select-api-version 710)
           db    (.open fdb "docker/fdb.cluster")
           store (fdb-store/make-fdb-store db root)]
       (assert-fixed (run-scenario store)))))
diff --git a/test/intemporal/tests/jepsen/bug_1_3_test.clj b/test/intemporal/tests/jepsen/bug_1_3_test.clj
index 86b24e1..5a85457 100644
--- a/test/intemporal/tests/jepsen/bug_1_3_test.clj
+++ b/test/intemporal/tests/jepsen/bug_1_3_test.clj
@@ -78,7 +78,7 @@
 (deftest ^:integration engine-restart-recovers-fdb
   (testing "FDBStore: worker recovers after crash"
     (let [root  (str "bug13-" (random-uuid))
-          fdb   (cfdb/select-api-version 730)
+          fdb   (cfdb/select-api-version 710)
           db    (.open fdb "docker/fdb.cluster")
           store (fdb-store/make-fdb-store db root)]
       (assert-recovered (run-scenario store)))))
diff --git a/test/intemporal/tests/jepsen/bug_2_1_test.clj b/test/intemporal/tests/jepsen/bug_2_1_test.clj
index b30fcfe..fa36a71 100644
--- a/test/intemporal/tests/jepsen/bug_2_1_test.clj
+++ b/test/intemporal/tests/jepsen/bug_2_1_test.clj
@@ -106,7 +106,7 @@
 (deftest ^:integration signal-delivered-in-register-consume-window-fdb
   (testing "RacingStore on FDBStore: in-window signal wakes the workflow"
     (let [root  (str "bug21-" (random-uuid))
-          fdb   (cfdb/select-api-version 730)
+          fdb   (cfdb/select-api-version 710)
           db    (.open fdb "docker/fdb.cluster")
           inner (fdb-store/make-fdb-store db root)]
       (assert-woke (run-scenario inner)))))
diff --git a/test/intemporal/tests/jepsen/bug_2_3_test.clj b/test/intemporal/tests/jepsen/bug_2_3_test.clj
index 157dd47..f421385 100644
--- a/test/intemporal/tests/jepsen/bug_2_3_test.clj
+++ b/test/intemporal/tests/jepsen/bug_2_3_test.clj
@@ -89,7 +89,7 @@
 (deftest ^:integration cancellation-reaches-sleeping-workflow-fdb
   (testing "cancel-workflow terminates a signal-sleeping workflow (FDBStore)"
     (let [root  (str "bug23-" (random-uuid))
-          fdb   (cfdb/select-api-version 730)
+          fdb   (cfdb/select-api-version 710)
           db    (.open fdb "docker/fdb.cluster")
           store (fdb-store/make-fdb-store db root)]
       (assert-cancelled (run-scenario store)))))
diff --git a/test/intemporal/tests/saga_test.clj b/test/intemporal/tests/saga_test.clj
index f6d758a..42144cc 100644
--- a/test/intemporal/tests/saga_test.clj
+++ b/test/intemporal/tests/saga_test.clj
@@ -1,10 +1,11 @@
 (ns intemporal.tests.saga-test
-  "Tests for saga / compensation support (with-failure + add-compensation).
+  "Tests for saga / compensation support (saga + add-compensation + compensate).
    A compensation registered for a successful step runs (in reverse order)
-   when the workflow later fails."
+   when the workflow later fails and the catch block calls compensate."
   (:require [intemporal.core :as intemporal]
             [intemporal.tests.utils :refer [with-result]]
             [clojure.test :refer [deftest is testing]]
+            [matcher-combinators.matchers :as m]
             [matcher-combinators.test :refer [match?]]))
 
 ;; ============================================================================
@@ -40,81 +41,112 @@
 ;; ============================================================================
 
 (defn happy-saga [order]
-  (let [hotel  (intemporal/stub #'book-hotel)
+  (let [s       (intemporal/saga)
+        hotel  (intemporal/stub #'book-hotel)
         flight (intemporal/stub #'book-flight)
         charge (intemporal/stub #'charge-card)
         chotel  (intemporal/stub #'cancel-hotel)
         cflight (intemporal/stub #'cancel-flight)]
-    (intemporal/with-failure [h (hotel order)]
-      (chotel h))
-    (intemporal/with-failure [f (flight order)]
-      (cflight f))
-    (charge order)
-    :booked))
+    (try
+      (let [h (hotel order)]
+        (intemporal/add-compensation s #(chotel h)))
+      (let [f (flight order)]
+        (intemporal/add-compensation s #(cflight f)))
+      (charge order)
+      :booked
+      (catch Exception e
+        (intemporal/compensate s)
+        (throw e)))))
 
 (defn failing-saga [order]
-  (let [hotel  (intemporal/stub #'book-hotel)
+  (let [s       (intemporal/saga)
+        hotel  (intemporal/stub #'book-hotel)
         flight (intemporal/stub #'book-flight)
         charge (intemporal/stub #'charge-card-fails)
         chotel  (intemporal/stub #'cancel-hotel)
         cflight (intemporal/stub #'cancel-flight)]
-    (intemporal/with-failure [h (hotel order)]
-      (chotel h))
-    (intemporal/with-failure [f (flight order)]
-      (cflight f))
-    (charge order)
-    :booked))
+    (try
+      (let [h (hotel order)]
+        (intemporal/add-compensation s #(chotel h)))
+      (let [f (flight order)]
+        (intemporal/add-compensation s #(cflight f)))
+      (charge order)
+      :booked
+      (catch Exception e
+        (intemporal/compensate s)
+        (throw e)))))
 
 (defn fail-on-flight-saga [order]
-  (let [hotel  (intemporal/stub #'book-hotel)
+  (let [s       (intemporal/saga)
+        hotel  (intemporal/stub #'book-hotel)
         flight (intemporal/stub #'book-flight-fails)
         chotel  (intemporal/stub #'cancel-hotel)
         cflight (intemporal/stub #'cancel-flight)]
-    (intemporal/with-failure [h (hotel order)]
-      (chotel h))
-    (intemporal/with-failure [f (flight order)]
-      (cflight f))
-    :booked))
+    (try
+      (let [h (hotel order)]
+        (intemporal/add-compensation s #(chotel h)))
+      ;; flight fails before its compensation is registered
+      (let [f (flight order)]
+        (intemporal/add-compensation s #(cflight f)))
+      :booked
+      (catch Exception e
+        (intemporal/compensate s)
+        (throw e)))))
 
 ;; Books hotel + flight, then stays busy in a loop so a cancel arrives after the
 ;; bookings have completed (mirrors cancellation-test/long-flow).
 (defn cancel-rollback-saga [order]
-  (let [hotel   (intemporal/stub #'book-hotel)
+  (let [s       (intemporal/saga)
+        hotel   (intemporal/stub #'book-hotel)
         flight  (intemporal/stub #'book-flight)
         chotel  (intemporal/stub #'cancel-hotel)
         cflight (intemporal/stub #'cancel-flight)
         slow    (intemporal/stub #'slow-step)]
-    (intemporal/with-failure [h (hotel order)]
-      (chotel h))
-    (intemporal/with-failure [f (flight order)]
-      (cflight f))
-    (loop [i 0]
-      (if (< i 40) (do (slow i) (recur (inc i))) :booked))))
-
-;; Cancel lands before any with-failure step completes (busy first).
+    (try
+      (let [h (hotel order)]
+        (intemporal/add-compensation s #(chotel h)))
+      (let [f (flight order)]
+        (intemporal/add-compensation s #(cflight f)))
+      (loop [i 0]
+        (if (< i 40) (do (slow i) (recur (inc i))) :booked))
+      (catch Exception e
+        (intemporal/compensate s)
+        (throw e)))))
+
+;; Cancel lands before any compensation is registered (busy first).
 (defn cancel-early-saga [order]
-  (let [hotel (intemporal/stub #'book-hotel)
+  (let [s      (intemporal/saga)
+        hotel (intemporal/stub #'book-hotel)
         chotel (intemporal/stub #'cancel-hotel)
         slow  (intemporal/stub #'slow-step)]
-    (loop [i 0]
-      (when (< i 3) (slow i) (recur (inc i))))
-    (intemporal/with-failure [h (hotel order)]
-      (chotel h))
-    :booked))
+    (try
+      (loop [i 0]
+        (when (< i 3) (slow i) (recur (inc i))))
+      (let [h (hotel order)]
+        (intemporal/add-compensation s #(chotel h)))
+      :booked
+      (catch Exception e
+        (intemporal/compensate s)
+        (throw e)))))
 
 ;; Compensation activity itself fails -> swallowed, others still run.
 (defn failing-comp-saga [order]
-  (let [hotel  (intemporal/stub #'book-hotel)
+  (let [s       (intemporal/saga)
+        hotel  (intemporal/stub #'book-hotel)
         flight (intemporal/stub #'book-flight)
         charge (intemporal/stub #'charge-card-fails)
         chotel  (intemporal/stub #'cancel-hotel)
         cflight (intemporal/stub #'failing-cancel-flight)]
-    (intemporal/with-failure [h (hotel order)]
-      (chotel h))
-    (intemporal/with-failure [f (flight order)]
-      (cflight f))
-    (charge order)
-    :booked))
+    (try
+      (let [h (hotel order)]
+        (intemporal/add-compensation s #(chotel h)))
+      (let [f (flight order)]
+        (intemporal/add-compensation s #(cflight f)))
+      (charge order)
+      :booked
+      (catch Exception e
+        (intemporal/compensate s)
+        (throw e)))))
 
 ;; ============================================================================
 ;; Tests
diff --git a/test/intemporal/tests/saga_test.cljs b/test/intemporal/tests/saga_test.cljs
index c55e3b5..0928f74 100644
--- a/test/intemporal/tests/saga_test.cljs
+++ b/test/intemporal/tests/saga_test.cljs
@@ -1,5 +1,5 @@
 (ns intemporal.tests.saga-test
-  "Tests for saga / compensation support (with-failure + add-compensation)."
+  "Tests for saga / compensation support (saga + add-compensation + compensate)."
   (:require [intemporal.core :as intemporal]
             [intemporal.tests.utils :refer [with-result]]
             [cljs.test :as t :refer [deftest is testing]]
@@ -33,41 +33,59 @@
 ;; ============================================================================
 
 (defn happy-saga [order]
-  (let [hotel  (intemporal/stub #'book-hotel)
+  (let [s       (intemporal/saga)
+        hotel  (intemporal/stub #'book-hotel)
         flight (intemporal/stub #'book-flight)
         charge (intemporal/stub #'charge-card)
         chotel  (intemporal/stub #'cancel-hotel)
         cflight (intemporal/stub #'cancel-flight)]
-    (intemporal/with-failure [h (hotel order)]
-      (chotel h))
-    (intemporal/with-failure [f (flight order)]
-      (cflight f))
-    (charge order)
-    :booked))
+    (try
+      (let [h (hotel order)]
+        (intemporal/add-compensation s #(chotel h)))
+      (let [f (flight order)]
+        (intemporal/add-compensation s #(cflight f)))
+      (charge order)
+      :booked
+      (catch :default e
+        (when (intemporal/suspension? e) (throw e))
+        (intemporal/compensate s)
+        (throw e)))))
 
 (defn failing-saga [order]
-  (let [hotel  (intemporal/stub #'book-hotel)
+  (let [s       (intemporal/saga)
+        hotel  (intemporal/stub #'book-hotel)
         flight (intemporal/stub #'book-flight)
         charge (intemporal/stub #'charge-card-fails)
         chotel  (intemporal/stub #'cancel-hotel)
         cflight (intemporal/stub #'cancel-flight)]
-    (intemporal/with-failure [h (hotel order)]
-      (chotel h))
-    (intemporal/with-failure [f (flight order)]
-      (cflight f))
-    (charge order)
-    :booked))
+    (try
+      (let [h (hotel order)]
+        (intemporal/add-compensation s #(chotel h)))
+      (let [f (flight order)]
+        (intemporal/add-compensation s #(cflight f)))
+      (charge order)
+      :booked
+      (catch :default e
+        (when (intemporal/suspension? e) (throw e))
+        (intemporal/compensate s)
+        (throw e)))))
 
 (defn fail-on-flight-saga [order]
-  (let [hotel  (intemporal/stub #'book-hotel)
+  (let [s       (intemporal/saga)
+        hotel  (intemporal/stub #'book-hotel)
         flight (intemporal/stub #'book-flight-fails)
         chotel  (intemporal/stub #'cancel-hotel)
         cflight (intemporal/stub #'cancel-flight)]
-    (intemporal/with-failure [h (hotel order)]
-      (chotel h))
-    (intemporal/with-failure [f (flight order)]
-      (cflight f))
-    :booked))
+    (try
+      (let [h (hotel order)]
+        (intemporal/add-compensation s #(chotel h)))
+      (let [f (flight order)]
+        (intemporal/add-compensation s #(cflight f)))
+      :booked
+      (catch :default e
+        (when (intemporal/suspension? e) (throw e))
+        (intemporal/compensate s)
+        (throw e)))))
 
 ;; ============================================================================
 ;; Tests
diff --git a/test/intemporal/tests/signal_test.cljs b/test/intemporal/tests/signal_test.cljs
index 6d2e5fa..9fac97d 100644
--- a/test/intemporal/tests/signal_test.cljs
+++ b/test/intemporal/tests/signal_test.cljs
@@ -3,7 +3,7 @@
             [intemporal.tests.utils :refer [with-result]]
             [cljs.test :as t :refer [deftest is testing]]
             [matcher-combinators.test :refer [match?]]
-            [promesa.core :as p])
+)
   (:require-macros [intemporal.tests.utils :refer [with-result]]
                    [intemporal.internal.context :refer [blet]]))
 
diff --git a/test/intemporal/tests/status_test.clj b/test/intemporal/tests/status_test.clj
index 56da8b6..8064f8e 100644
--- a/test/intemporal/tests/status_test.clj
+++ b/test/intemporal/tests/status_test.clj
@@ -46,7 +46,7 @@
 (deftest ^:integration status-fdb
   (testing "status lifecycle on FDBStore"
     (let [root  (str "status-" (random-uuid))
-          fdb   (cfdb/select-api-version 730)
+          fdb   (cfdb/select-api-version 710)
           db    (.open fdb "docker/fdb.cluster")
           store (fdb-store/make-fdb-store db root)]
       (check-status store))))
diff --git a/test/intemporal/tests/store/fdb_test.clj b/test/intemporal/tests/store/fdb_test.clj
index aadad6f..4c17cd2 100644
--- a/test/intemporal/tests/store/fdb_test.clj
+++ b/test/intemporal/tests/store/fdb_test.clj
@@ -1,12 +1,12 @@
 (ns ^:integration intemporal.tests.store.fdb-test
-  (:require [clojure.test :refer [deftest testing is]]
+  (:require [clojure.test :refer [deftest testing]]
             [intemporal.store.fdb :as fdb-store]
             [intemporal.tests.store.test-suite :as suite]
             [me.vedang.clj-fdb.FDB :as cfdb]))
 
 (deftest fdb-store-test
   (testing "FoundationDB Store Implementation"
-    (let [db (cfdb/select-api-version 730)
+    (let [db (cfdb/select-api-version 710)
           db (cfdb/open db)]
 
       ;; Run shared suite
diff --git a/test/intemporal/tests/timer_recovery_test.clj b/test/intemporal/tests/timer_recovery_test.clj
index 547d41b..f361cb1 100644
--- a/test/intemporal/tests/timer_recovery_test.clj
+++ b/test/intemporal/tests/timer_recovery_test.clj
@@ -77,7 +77,7 @@
 (deftest ^:integration fire-at-deterministic-fdb
   (testing "FDBStore"
     (let [root  (str "det-" (random-uuid))
-          fdb   (cfdb/select-api-version 730)
+          fdb   (cfdb/select-api-version 710)
           db    (.open fdb "docker/fdb.cluster")
           store (fdb-store/make-fdb-store db root)]
       (check-determinism store))))
@@ -118,7 +118,7 @@
 (deftest ^:integration timer-recovery-fdb
   (testing "FDBStore"
     (let [root  (str "trec-" (random-uuid))
-          fdb   (cfdb/select-api-version 730)
+          fdb   (cfdb/select-api-version 710)
           db    (.open fdb "docker/fdb.cluster")
           store (fdb-store/make-fdb-store db root)]
       (check-timer-recovery store))))
@@ -155,7 +155,7 @@
 (deftest ^:integration wake-at-filter-fdb
   (testing "FDBStore"
     (let [root  (str "wake-" (random-uuid))
-          fdb   (cfdb/select-api-version 730)
+          fdb   (cfdb/select-api-version 710)
           db    (.open fdb "docker/fdb.cluster")
           store (fdb-store/make-fdb-store db root)]
       (check-wake-at-filter store))))
diff --git a/test/intemporal/tests/timer_test.cljs b/test/intemporal/tests/timer_test.cljs
index d481349..0dba50b 100644
--- a/test/intemporal/tests/timer_test.cljs
+++ b/test/intemporal/tests/timer_test.cljs
@@ -3,7 +3,7 @@
             [intemporal.tests.utils :refer [with-result]]
             [cljs.test :as t :refer [deftest is testing]]
             [matcher-combinators.test :refer [match?]]
-            [promesa.core :as p])
+)
   (:require-macros [intemporal.tests.utils :refer [with-result]]
                    [intemporal.internal.context :refer [blet]]))
 
diff --git a/test/intemporal/tests/worker_test.clj b/test/intemporal/tests/worker_test.clj
index a37949f..9343226 100644
--- a/test/intemporal/tests/worker_test.clj
+++ b/test/intemporal/tests/worker_test.clj
@@ -69,7 +69,7 @@
 (deftest ^:integration worker-recovery-fdb
   (testing "FDBStore: worker resumes via the ownership scan"
     (let [root  (str "worker-" (random-uuid))
-          fdb   (cfdb/select-api-version 730)
+          fdb   (cfdb/select-api-version 710)
           db    (.open fdb "docker/fdb.cluster")
           store (fdb-store/make-fdb-store db root)]
       (check-worker-recovery store))))
@@ -102,7 +102,7 @@
 (deftest ^:integration claim-exclusivity-fdb
   (testing "FDBStore claim-owner exclusivity"
     (let [root  (str "claim-" (random-uuid))
-          fdb   (cfdb/select-api-version 730)
+          fdb   (cfdb/select-api-version 710)
           db    (.open fdb "docker/fdb.cluster")
           store (fdb-store/make-fdb-store db root)]
       (check-claim-exclusivity store))))

From 6825b16d104e1def70d2b394df0c6a0b2725b3bb Mon Sep 17 00:00:00 2001
From: Miguel Ping <miguel.ping@gmail.com>
Date: Thu, 4 Jun 2026 17:24:42 +0100
Subject: [PATCH 8/9] fix fdb path

---
 test/intemporal/tests/bench/fdb_test.clj | 4 ++--
 test/intemporal/tests/store/fdb_test.clj | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/intemporal/tests/bench/fdb_test.clj b/test/intemporal/tests/bench/fdb_test.clj
index 3969164..a2b024f 100644
--- a/test/intemporal/tests/bench/fdb_test.clj
+++ b/test/intemporal/tests/bench/fdb_test.clj
@@ -7,7 +7,7 @@
 (deftest fdb-store-test
   (testing "FoundationDB Store Implementation"
     (let [db (cfdb/select-api-version 710)
-          db (cfdb/open db)]
+          db (cfdb/open db "docker/fdb.cluster")]
 
       ;; Run shared suite
       (with-open [store (fdb-store/make-fdb-store db "intemporal-tests")]
@@ -19,5 +19,5 @@
     ;; 1k => ~1s
     ;; 10k => ~5s
     ;; 100k => 6GB, 53s
-    (suite/run-store-tests (fdb-store/make-fdb-store (cfdb/open (cfdb/select-api-version 710)) "intemporal-tests") 100000))
+    (suite/run-store-tests (fdb-store/make-fdb-store (cfdb/open (cfdb/select-api-version 710) "docker/fdb.cluster") "intemporal-tests") 100000))
   "")
\ No newline at end of file
diff --git a/test/intemporal/tests/store/fdb_test.clj b/test/intemporal/tests/store/fdb_test.clj
index 4c17cd2..1d459bb 100644
--- a/test/intemporal/tests/store/fdb_test.clj
+++ b/test/intemporal/tests/store/fdb_test.clj
@@ -7,7 +7,7 @@
 (deftest fdb-store-test
   (testing "FoundationDB Store Implementation"
     (let [db (cfdb/select-api-version 710)
-          db (cfdb/open db)]
+          db (cfdb/open db "docker/fdb.cluster")]
 
       ;; Run shared suite
       (with-open [store (fdb-store/make-fdb-store db "intemporal-tests")]

From 84dd412850eb18cc049114c087dfefb9b188f162 Mon Sep 17 00:00:00 2001
From: Miguel Ping <miguel.ping@gmail.com>
Date: Thu, 4 Jun 2026 19:55:53 +0100
Subject: [PATCH 9/9] cleanup cancellation

---
 README.md                                     | 16 ++--
 src/intemporal/core.cljc                      |  6 +-
 .../crash/saga_compensation_crash_test.clj    | 30 +++---
 test/intemporal/tests/saga_test.clj           | 96 +++++++++----------
 test/intemporal/tests/saga_test.cljs          | 54 +++++------
 5 files changed, 101 insertions(+), 101 deletions(-)

diff --git a/README.md b/README.md
index 1fe2de4..72046d1 100644
--- a/README.md
+++ b/README.md
@@ -78,14 +78,14 @@ and propagate to the engine untouched.
         cancel-hotel  (intemporal/stub #'cancel-hotel)
         cancel-flight (intemporal/stub #'cancel-flight)]
     (try
-      (let [h (book-hotel order)]
-        (intemporal/add-compensation saga #(cancel-hotel h)))
-      (let [f (book-flight order)]
-        (intemporal/add-compensation saga #(cancel-flight f)))
-      ;; if charge-card throws, the catch runs compensate -> cancel-flight then
-      ;; cancel-hotel (LIFO) -> then rethrows so the workflow finalizes :failed
-      (charge-card order)
-      :booked
+      (let [h (book-hotel order)
+            _ (intemporal/add-compensation saga #(cancel-hotel h))]
+        (let [f (book-flight order)
+              _ (intemporal/add-compensation saga #(cancel-flight f))]
+          ;; if charge-card throws, the catch runs compensate -> cancel-flight then
+          ;; cancel-hotel (LIFO) -> then rethrows so the workflow finalizes :failed
+          (charge-card order)
+          :booked))
       (catch Exception e
         (intemporal/compensate saga)
         (throw e)))))
diff --git a/src/intemporal/core.cljc b/src/intemporal/core.cljc
index 7dc2a81..1b23861 100644
--- a/src/intemporal/core.cljc
+++ b/src/intemporal/core.cljc
@@ -589,9 +589,9 @@
 
    (let [s (saga)]
      (try
-       (let [h (book-hotel order)]
-         (add-compensation s #(cancel-hotel h)))
-       (charge-card order)
+       (let [h (book-hotel order)
+             _ (add-compensation s #(cancel-hotel h))]
+         (charge-card order))
        (catch Exception e
          (compensate s)             ;; rolls back completed steps, LIFO
          (throw e))))
diff --git a/test/intemporal/tests/crash/saga_compensation_crash_test.clj b/test/intemporal/tests/crash/saga_compensation_crash_test.clj
index 2ede838..7414927 100644
--- a/test/intemporal/tests/crash/saga_compensation_crash_test.clj
+++ b/test/intemporal/tests/crash/saga_compensation_crash_test.clj
@@ -15,13 +15,13 @@
 (def exec-counts (atom {}))
 (defn- bump! [k] (swap! exec-counts update k (fnil inc 0)))
 
-(defn book-hotel  [order] (bump! :book-hotel)  {:hotel order})
+(defn book-hotel [order] (bump! :book-hotel) {:hotel order})
 (defn book-flight [order] (bump! :book-flight) {:flight order})
 (defn charge-card-fails [order]
   (bump! :charge-card)
   (throw (ex-info "card declined" {:order order})))
 
-(defn cancel-hotel  [_] (bump! :cancel-hotel)  :hotel-cancelled)
+(defn cancel-hotel [_] (bump! :cancel-hotel) :hotel-cancelled)
 (defn cancel-flight [_] (bump! :cancel-flight) :flight-cancelled)
 
 ;; The flight compensation cancels the flight, then waits for a signal. The
@@ -35,15 +35,15 @@
         chotel  (intemporal/stub #'cancel-hotel)
         cflight (intemporal/stub #'cancel-flight)]
     (try
-      (let [h (hotel order)]
-        (intemporal/add-compensation s #(chotel h)))
-      (let [f (flight order)]
-        ;; flight compensation cancels the flight, then waits for a signal -
-        ;; the deterministic "crash" point mid-compensation.
-        (intemporal/add-compensation s #(do (cflight f)
-                                            (intemporal/wait-for-signal "continue-compensation"))))
-      (charge order)
-      :booked
+      (let [h (hotel order)
+            _ (intemporal/add-compensation s #(chotel h))
+            f (flight order)
+            ;; flight compensation cancels the flight, then waits for a signal -
+            ;; the deterministic "crash" point mid-compensation.
+            _ (intemporal/add-compensation s #(do (cflight f)
+                                                  (intemporal/wait-for-signal "continue-compensation")))]
+        (charge order)
+        :booked)
       (catch Exception e
         (intemporal/compensate s)
         (throw e)))))
@@ -60,15 +60,15 @@
 (deftest test-compensation-survives-crash
   (testing "Compensation suspended mid-way resumes and runs each step exactly once"
     (reset! exec-counts {})
-    (let [workflow-id "saga-crash-1"
+    (let [workflow-id      "saga-crash-1"
           persistent-store (store/->InMemoryStore (atom {}))]
 
       ;; Phase 1: run until the flight compensation suspends waiting for a signal
       (testing "Phase 1: fails, begins compensation, suspends mid-compensation"
         (let [engine-1 (intemporal/make-workflow-engine :store persistent-store :threads 2)
-              fut (future
-                    (intemporal/start-workflow engine-1 crash-saga ["o1"]
-                                               :workflow-id workflow-id))]
+              fut      (future
+                         (intemporal/start-workflow engine-1 crash-saga ["o1"]
+                                                    :workflow-id workflow-id))]
           ;; Give it time to: book hotel+flight, fail charge, cancel-flight,
           ;; then suspend at wait-for-signal.
           (Thread/sleep 300)
diff --git a/test/intemporal/tests/saga_test.clj b/test/intemporal/tests/saga_test.clj
index 42144cc..4f91644 100644
--- a/test/intemporal/tests/saga_test.clj
+++ b/test/intemporal/tests/saga_test.clj
@@ -15,7 +15,7 @@
 (def events (atom []))
 (defn- record! [e] (swap! events conj e))
 
-(defn book-hotel  [order] (record! [:book-hotel order])  {:hotel order})
+(defn book-hotel [order] (record! [:book-hotel order]) {:hotel order})
 (defn book-flight [order] (record! [:book-flight order]) {:flight order})
 (defn charge-card [order] (record! [:charge-card order]) {:charge order})
 
@@ -27,7 +27,7 @@
   (record! [:book-flight order])
   (throw (ex-info "no seats" {:order order})))
 
-(defn cancel-hotel  [v] (record! [:cancel-hotel v])  :hotel-cancelled)
+(defn cancel-hotel [v] (record! [:cancel-hotel v]) :hotel-cancelled)
 (defn cancel-flight [v] (record! [:cancel-flight v]) :flight-cancelled)
 
 (defn failing-cancel-flight [v]
@@ -42,53 +42,53 @@
 
 (defn happy-saga [order]
   (let [s       (intemporal/saga)
-        hotel  (intemporal/stub #'book-hotel)
-        flight (intemporal/stub #'book-flight)
-        charge (intemporal/stub #'charge-card)
+        hotel   (intemporal/stub #'book-hotel)
+        flight  (intemporal/stub #'book-flight)
+        charge  (intemporal/stub #'charge-card)
         chotel  (intemporal/stub #'cancel-hotel)
         cflight (intemporal/stub #'cancel-flight)]
     (try
-      (let [h (hotel order)]
-        (intemporal/add-compensation s #(chotel h)))
-      (let [f (flight order)]
-        (intemporal/add-compensation s #(cflight f)))
-      (charge order)
-      :booked
+      (let [h (hotel order)
+            _ (intemporal/add-compensation s #(chotel h))
+            f (flight order)
+            _ (intemporal/add-compensation s #(cflight f))]
+        (charge order)
+        :booked)
       (catch Exception e
         (intemporal/compensate s)
         (throw e)))))
 
 (defn failing-saga [order]
   (let [s       (intemporal/saga)
-        hotel  (intemporal/stub #'book-hotel)
-        flight (intemporal/stub #'book-flight)
-        charge (intemporal/stub #'charge-card-fails)
+        hotel   (intemporal/stub #'book-hotel)
+        flight  (intemporal/stub #'book-flight)
+        charge  (intemporal/stub #'charge-card-fails)
         chotel  (intemporal/stub #'cancel-hotel)
         cflight (intemporal/stub #'cancel-flight)]
     (try
-      (let [h (hotel order)]
-        (intemporal/add-compensation s #(chotel h)))
-      (let [f (flight order)]
-        (intemporal/add-compensation s #(cflight f)))
-      (charge order)
-      :booked
+      (let [h (hotel order)
+            _ (intemporal/add-compensation s #(chotel h))
+            f (flight order)
+            _ (intemporal/add-compensation s #(cflight f))]
+        (charge order)
+        :booked)
       (catch Exception e
         (intemporal/compensate s)
         (throw e)))))
 
 (defn fail-on-flight-saga [order]
   (let [s       (intemporal/saga)
-        hotel  (intemporal/stub #'book-hotel)
-        flight (intemporal/stub #'book-flight-fails)
+        hotel   (intemporal/stub #'book-hotel)
+        flight  (intemporal/stub #'book-flight-fails)
         chotel  (intemporal/stub #'cancel-hotel)
         cflight (intemporal/stub #'cancel-flight)]
     (try
-      (let [h (hotel order)]
-        (intemporal/add-compensation s #(chotel h)))
-      ;; flight fails before its compensation is registered
-      (let [f (flight order)]
-        (intemporal/add-compensation s #(cflight f)))
-      :booked
+      (let [h (hotel order)
+            _ (intemporal/add-compensation s #(chotel h))
+            ;; flight fails before its compensation is registered
+            f (flight order)
+            _ (intemporal/add-compensation s #(cflight f))]
+        :booked)
       (catch Exception e
         (intemporal/compensate s)
         (throw e)))))
@@ -103,12 +103,12 @@
         cflight (intemporal/stub #'cancel-flight)
         slow    (intemporal/stub #'slow-step)]
     (try
-      (let [h (hotel order)]
-        (intemporal/add-compensation s #(chotel h)))
-      (let [f (flight order)]
-        (intemporal/add-compensation s #(cflight f)))
-      (loop [i 0]
-        (if (< i 40) (do (slow i) (recur (inc i))) :booked))
+      (let [h (hotel order)
+            _ (intemporal/add-compensation s #(chotel h))
+            f (flight order)
+            _ (intemporal/add-compensation s #(cflight f))]
+        (loop [i 0]
+          (if (< i 40) (do (slow i) (recur (inc i))) :booked)))
       (catch Exception e
         (intemporal/compensate s)
         (throw e)))))
@@ -116,15 +116,15 @@
 ;; Cancel lands before any compensation is registered (busy first).
 (defn cancel-early-saga [order]
   (let [s      (intemporal/saga)
-        hotel (intemporal/stub #'book-hotel)
+        hotel  (intemporal/stub #'book-hotel)
         chotel (intemporal/stub #'cancel-hotel)
-        slow  (intemporal/stub #'slow-step)]
+        slow   (intemporal/stub #'slow-step)]
     (try
       (loop [i 0]
         (when (< i 3) (slow i) (recur (inc i))))
-      (let [h (hotel order)]
-        (intemporal/add-compensation s #(chotel h)))
-      :booked
+      (let [h (hotel order)
+            _ (intemporal/add-compensation s #(chotel h))]
+        :booked)
       (catch Exception e
         (intemporal/compensate s)
         (throw e)))))
@@ -132,18 +132,18 @@
 ;; Compensation activity itself fails -> swallowed, others still run.
 (defn failing-comp-saga [order]
   (let [s       (intemporal/saga)
-        hotel  (intemporal/stub #'book-hotel)
-        flight (intemporal/stub #'book-flight)
-        charge (intemporal/stub #'charge-card-fails)
+        hotel   (intemporal/stub #'book-hotel)
+        flight  (intemporal/stub #'book-flight)
+        charge  (intemporal/stub #'charge-card-fails)
         chotel  (intemporal/stub #'cancel-hotel)
         cflight (intemporal/stub #'failing-cancel-flight)]
     (try
-      (let [h (hotel order)]
-        (intemporal/add-compensation s #(chotel h)))
-      (let [f (flight order)]
-        (intemporal/add-compensation s #(cflight f)))
-      (charge order)
-      :booked
+      (let [h (hotel order)
+            _ (intemporal/add-compensation s #(chotel h))
+            f (flight order)
+            _ (intemporal/add-compensation s #(cflight f))]
+        (charge order)
+        :booked)
       (catch Exception e
         (intemporal/compensate s)
         (throw e)))))
diff --git a/test/intemporal/tests/saga_test.cljs b/test/intemporal/tests/saga_test.cljs
index 0928f74..0a28a89 100644
--- a/test/intemporal/tests/saga_test.cljs
+++ b/test/intemporal/tests/saga_test.cljs
@@ -13,7 +13,7 @@
 (def events (atom []))
 (defn- record! [e] (swap! events conj e))
 
-(defn book-hotel  [order] (record! [:book-hotel order])  {:hotel order})
+(defn book-hotel [order] (record! [:book-hotel order]) {:hotel order})
 (defn book-flight [order] (record! [:book-flight order]) {:flight order})
 (defn charge-card [order] (record! [:charge-card order]) {:charge order})
 
@@ -25,7 +25,7 @@
   (record! [:book-flight order])
   (throw (ex-info "no seats" {:order order})))
 
-(defn cancel-hotel  [v] (record! [:cancel-hotel v])  :hotel-cancelled)
+(defn cancel-hotel [v] (record! [:cancel-hotel v]) :hotel-cancelled)
 (defn cancel-flight [v] (record! [:cancel-flight v]) :flight-cancelled)
 
 ;; ============================================================================
@@ -34,18 +34,18 @@
 
 (defn happy-saga [order]
   (let [s       (intemporal/saga)
-        hotel  (intemporal/stub #'book-hotel)
-        flight (intemporal/stub #'book-flight)
-        charge (intemporal/stub #'charge-card)
+        hotel   (intemporal/stub #'book-hotel)
+        flight  (intemporal/stub #'book-flight)
+        charge  (intemporal/stub #'charge-card)
         chotel  (intemporal/stub #'cancel-hotel)
         cflight (intemporal/stub #'cancel-flight)]
     (try
-      (let [h (hotel order)]
-        (intemporal/add-compensation s #(chotel h)))
-      (let [f (flight order)]
-        (intemporal/add-compensation s #(cflight f)))
-      (charge order)
-      :booked
+      (let [h (hotel order)
+            _ (intemporal/add-compensation s #(chotel h))
+            f (flight order)
+            _ (intemporal/add-compensation s #(cflight f))]
+        (charge order)
+        :booked)
       (catch :default e
         (when (intemporal/suspension? e) (throw e))
         (intemporal/compensate s)
@@ -53,18 +53,18 @@
 
 (defn failing-saga [order]
   (let [s       (intemporal/saga)
-        hotel  (intemporal/stub #'book-hotel)
-        flight (intemporal/stub #'book-flight)
-        charge (intemporal/stub #'charge-card-fails)
+        hotel   (intemporal/stub #'book-hotel)
+        flight  (intemporal/stub #'book-flight)
+        charge  (intemporal/stub #'charge-card-fails)
         chotel  (intemporal/stub #'cancel-hotel)
         cflight (intemporal/stub #'cancel-flight)]
     (try
-      (let [h (hotel order)]
-        (intemporal/add-compensation s #(chotel h)))
-      (let [f (flight order)]
-        (intemporal/add-compensation s #(cflight f)))
-      (charge order)
-      :booked
+      (let [h (hotel order)
+            _ (intemporal/add-compensation s #(chotel h))
+            f (flight order)
+            _ (intemporal/add-compensation s #(cflight f))]
+        (charge order)
+        :booked)
       (catch :default e
         (when (intemporal/suspension? e) (throw e))
         (intemporal/compensate s)
@@ -72,16 +72,16 @@
 
 (defn fail-on-flight-saga [order]
   (let [s       (intemporal/saga)
-        hotel  (intemporal/stub #'book-hotel)
-        flight (intemporal/stub #'book-flight-fails)
+        hotel   (intemporal/stub #'book-hotel)
+        flight  (intemporal/stub #'book-flight-fails)
         chotel  (intemporal/stub #'cancel-hotel)
         cflight (intemporal/stub #'cancel-flight)]
     (try
-      (let [h (hotel order)]
-        (intemporal/add-compensation s #(chotel h)))
-      (let [f (flight order)]
-        (intemporal/add-compensation s #(cflight f)))
-      :booked
+      (let [h (hotel order)
+            _ (intemporal/add-compensation s #(chotel h))
+            f (flight order)
+            _ (intemporal/add-compensation s #(cflight f))]
+        :booked)
       (catch :default e
         (when (intemporal/suspension? e) (throw e))
         (intemporal/compensate s)