diff --git a/.gitignore b/.gitignore
index 75f32d2..45b979d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -36,5 +36,12 @@ memory-slop.md
 # Icon-design scratch renders (Nano Banana ideation output)
 docs/design/renders/
 
+# Regenerable eval plot artifacts — the per-run records under eval/runs/ ARE
+# committed (comparable history); these are scratch from `--dump-scores` + the
+# ROC plotter (scripts/plot_threshold.py).
+eval/scores-*.json
+eval/thresholds-*.json
+eval/threshold.png
+
 # LLVM profiling output
 *.profraw
diff --git a/CLAUDE.md b/CLAUDE.md
index 38f5c12..6d7fdfd 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -7,8 +7,8 @@ and recall content. See `README.md` for architecture and build instructions.
 
 - `Sources/EngramCore` — domain models, store, embeddings, ranking (the brains)
 - `Sources/EngramCore/RecallGate.swift` — the recall confidence gate: decides which fetched memories are confident enough to inject on a prompt. Shared by the hook and the eval; presets `.current`/`.proposed`. `RecallText.swift` is the shared tokenizer (stopwords + lexical token overlap) used by both the gate and FTS.
-- `Sources/EngramCore/RetrievalMetrics.swift` — pure retrieval-quality metrics (Recall@k, MRR, gate precision/recall, negative false-positive rate, injection precision) over labeled `QueryOutcome`s.
-- `Sources/engram-eval` — offline retrieval eval harness (`swift run engram-eval`): seeds a temp store from `Resources/corpus.json` + `queries.json`, runs each prompt through `fetch`, applies `RecallGate` configs, and prints a current-vs-tightened comparison (ADR 0021). `--distances` dumps per-kind distance separability; `--record` appends a per-run JSON file (git sha + embedder signature + host + metrics) under `eval/runs/`. Numbers are embedder/machine-dependent — it's a relative A/B, not a benchmark.
+- `Sources/EngramCore/RetrievalMetrics.swift` — pure retrieval-quality metrics (Recall@k, MRR, gate precision/recall, negative false-positive rate, injection precision) over labeled `QueryOutcome`s, plus the session-aware `SessionInjectionReport` (`evaluateSessions` / `firstTouchCoverage`) for the recall cooldown (ADR 0023).
+- `Sources/engram-eval` — offline retrieval eval harness (`swift run engram-eval`): seeds a temp store from `Resources/corpus.json` + `queries.json`, runs each prompt through `fetch`, applies `RecallGate` configs, and prints a current-vs-tightened comparison (ADR 0021). It then replays `Resources/sessions.json` (ordered on-topic prompt sequences) through the gate + the real session cooldown and prints the **session-aware injection** metric — redundant re-injection rate with vs without the cooldown, plus first-touch coverage (ADR 0023). `--distances` dumps per-kind distance separability; `--record` appends a per-run JSON file (git sha + embedder signature + host + metrics, incl. the `sessions` block) under `eval/runs/`. Numbers are embedder/machine-dependent — it's a relative A/B, not a benchmark.
 - `Sources/engram` — the `engram` CLI (store / fetch / stats / activity / hook)
 - `Sources/CSQLite` — vendored SQLite + sqlite-vec (static C target)
 - `Sources/engram/Setup.swift` — install logic (`engram install` / `engram setup`); the single source of truth for installing the CLI, hook, and skills. `engram install` symlinks `/usr/local/bin/engram` → the running binary
diff --git a/README.md b/README.md
index 2125788..b4d445e 100644
--- a/README.md
+++ b/README.md
@@ -139,7 +139,10 @@ per embedder — its distance thresholds are tuned to the live model's scale via
 the offline eval (`swift run engram-eval`), ADR 0021.
 Off-topic prompts inject nothing — it exits 0 silently, so it can't block or
 spam. (It does record a *retrieval-activity* row — see below — which is
-decoupled from ranking, ADR 0015.)
+decoupled from ranking, ADR 0015.) A **session-scoped cooldown** (ADR 0023) then
+drops any memory already injected via recall earlier in the same session (within
+30 min), so the same note doesn't re-appear on every on-topic prompt — keyed off
+the `session_id` now carried on each retrieval row.
 
 The same hook also appends a **reflection nudge** every 5th prompt of a session
 (tracked by a tiny per-session counter sidecar'd next to the store): a soft
diff --git a/Sources/EngramCore/MemoryStore.swift b/Sources/EngramCore/MemoryStore.swift
index 172a419..b06fa8b 100644
--- a/Sources/EngramCore/MemoryStore.swift
+++ b/Sources/EngramCore/MemoryStore.swift
@@ -47,10 +47,30 @@ public actor MemoryStore {
     private static func migrate(db: SQLiteDatabase, embedder: Embedder, databaseURL: URL) throws {
         try createSchema(db)
         try addMissingColumns(db)
+        try addMissingRetrievalColumns(db)
         try migrateVectorStore(db: db, embedder: embedder, databaseURL: databaseURL)
         try backfillFTS(db)
     }
 
+    /// Additively adds the `session_id` column the retrievals ledger grew for the
+    /// session-scoped recall cooldown (ADR 0023). No-op on an already-migrated DB.
+    private static func addMissingRetrievalColumns(_ db: SQLiteDatabase) throws {
+        var existingColumns = Set<String>()
+        try db.prepare("PRAGMA table_info(retrievals);") { stmt in
+            while try stmt.step() {
+                if let name = stmt.columnText(1) { existingColumns.insert(name) }
+            }
+        }
+        if !existingColumns.contains("session_id") {
+            try db.exec("ALTER TABLE retrievals ADD COLUMN session_id TEXT;")
+        }
+        // Create the index here (not in createSchema) so it's only built once the
+        // session_id column is guaranteed to exist — on a fresh DB and on an
+        // upgraded one alike. Referencing it in createSchema fails on old DBs
+        // whose retrievals table predates the column.
+        try db.exec("CREATE INDEX IF NOT EXISTS idx_retrievals_session ON retrievals(session_id, memory_id, at);")
+    }
+
     private static func createSchema(_ db: SQLiteDatabase) throws {
         try db.exec(
             """
@@ -81,7 +101,8 @@ public actor MemoryStore {
                 memory_id TEXT NOT NULL,
                 source TEXT NOT NULL,
                 query TEXT,
-                at REAL NOT NULL
+                at REAL NOT NULL,
+                session_id TEXT
             );
             CREATE INDEX IF NOT EXISTS idx_retrievals_at ON retrievals(at);
             CREATE INDEX IF NOT EXISTS idx_memories_deleted ON memories(deleted_at);
@@ -878,15 +899,16 @@ public actor MemoryStore {
     /// `query` that surfaced them. One row per id, single timestamp, in a
     /// transaction. Deliberately does **not** touch `access_count` — this ledger
     /// is decoupled from ranking (ADR 0015 preserves ADR 0005's loop-break).
-    public func recordRetrieval(memoryIDs: [UUID], source: RetrievalSource, query: String? = nil) throws {
+    public func recordRetrieval(memoryIDs: [UUID], source: RetrievalSource, query: String? = nil, sessionID: String? = nil) throws {
         guard !memoryIDs.isEmpty else { return }
         let now = Date().timeIntervalSince1970
         let trimmedQuery = query.map { String($0.prefix(Self.maxRetrievalQueryLength)) }
         try db.exec("BEGIN;")
         do {
             for id in memoryIDs {
-                try db.prepare("INSERT INTO retrievals(memory_id, source, query, at) VALUES(?, ?, ?, ?);") { stmt in
-                    stmt.bind(id.uuidString, at: 1).bind(source.rawValue, at: 2).bind(trimmedQuery, at: 3).bind(now, at: 4)
+                try db.prepare("INSERT INTO retrievals(memory_id, source, query, at, session_id) VALUES(?, ?, ?, ?, ?);") { stmt in
+                    stmt.bind(id.uuidString, at: 1).bind(source.rawValue, at: 2).bind(trimmedQuery, at: 3)
+                        .bind(now, at: 4).bind(sessionID, at: 5)
                     _ = try stmt.step()
                 }
             }
@@ -897,6 +919,35 @@ public actor MemoryStore {
         }
     }
 
+    /// Memories already injected via `recall` in this session within `cooldown`
+    /// (ADR 0023). The recall hook drops these post-gate so the same memory isn't
+    /// re-injected on every on-topic prompt of a session. Returns an empty set for
+    /// an empty `sessionID` (e.g. a manual `fetch` with no session) so nothing is
+    /// ever suppressed outside a real session.
+    public func recentlyInjectedInSession(_ memoryIDs: [UUID], sessionID: String, within cooldown: TimeInterval) throws -> Set<UUID> {
+        guard !sessionID.isEmpty, !memoryIDs.isEmpty else { return [] }
+        let cutoff = Date().timeIntervalSince1970 - cooldown
+        var suppressed = Set<UUID>()
+        let placeholders = memoryIDs.map { _ in "?" }.joined(separator: ",")
+        let sql = """
+            SELECT DISTINCT memory_id FROM retrievals
+            WHERE session_id = ? AND source = ? AND at >= ? AND memory_id IN (\(placeholders));
+            """
+        try db.prepare(sql) { stmt in
+            stmt.bind(sessionID, at: 1).bind(RetrievalSource.recall.rawValue, at: 2).bind(cutoff, at: 3)
+            for (offset, id) in memoryIDs.enumerated() { stmt.bind(id.uuidString, at: Int32(4 + offset)) }
+            while try stmt.step() {
+                if let text = stmt.columnText(0), let id = UUID(uuidString: text) { suppressed.insert(id) }
+            }
+        }
+        return suppressed
+    }
+
+    /// Cooldown for re-injecting the same memory via recall within one session
+    /// (ADR 0023). 30 minutes: short on-topic sessions show a memory once; a long
+    /// session gets at most a periodic refresh rather than the same note every prompt.
+    public static let recallReinjectionCooldown: TimeInterval = 30 * 60
+
     /// Retrieval-activity rows from `since` onward, newest first, optionally
     /// filtered to one `source`. Powers `engram activity` and the Activity view.
     public func retrievals(since: Date, source: RetrievalSource? = nil, limit: Int = 500) throws -> [RetrievalEvent] {
diff --git a/Sources/EngramCore/RecallGate.swift b/Sources/EngramCore/RecallGate.swift
index 3fe2961..020138c 100644
--- a/Sources/EngramCore/RecallGate.swift
+++ b/Sources/EngramCore/RecallGate.swift
@@ -54,13 +54,19 @@ public struct RecallGateConfig: Sendable, Equatable {
     /// kill the single-keyword leak. The per-query relevance floor and median gate
     /// were dropped — measurement showed neither separates on- from off-topic.
     ///
-    /// ⚠️ `maxDistance` is embedder-specific. 0.10 fits the contextual model; the
-    /// fallback `word-512` embedder lives on a different scale. Before shipping to
-    /// the hook this should become embedder-relative rather than a constant.
+    /// Retuned 0.10 → **0.09** (ADR 0021 addendum): on the eval that drops the
+    /// negative false-positive rate 13% → 0% with *unchanged* gate recall (the
+    /// lexical leg holds recall; the distance leg only sheds off-topic injections).
+    /// Engram's recall is precision-first — it runs on every prompt, so a false
+    /// positive bloats context repeatedly while a miss is recoverable (it re-fires
+    /// next prompt, or `/recall`). Tightening past 0.09 finally costs gate recall.
+    ///
+    /// ⚠️ `maxDistance` is embedder-specific. 0.09 fits the contextual model; the
+    /// fallback `word-512` embedder lives on a different scale (it keeps `.current`).
     public static let proposed = RecallGateConfig(
         topK: 3,
         minRelevance: 0,
-        maxDistance: 0.10,
+        maxDistance: 0.09,
         minLexicalTokenHits: 2,
         requireDistanceBelowMedian: false
     )
diff --git a/Sources/EngramCore/RetrievalMetrics.swift b/Sources/EngramCore/RetrievalMetrics.swift
index b2cbd32..1091dea 100644
--- a/Sources/EngramCore/RetrievalMetrics.swift
+++ b/Sources/EngramCore/RetrievalMetrics.swift
@@ -121,3 +121,62 @@ public enum RetrievalMetrics {
         values.isEmpty ? 0 : values.reduce(0, +) / Double(values.count)
     }
 }
+
+/// Session-aware injection metric (ADR 0023). The per-query metrics above can't
+/// see *repetition across a session* — the same memory re-injected on prompt
+/// after prompt. This summarizes that over ordered prompt sequences.
+public struct SessionInjectionReport: Sendable, Codable {
+    public let sessionCount: Int
+    public let promptCount: Int
+    /// Total memories injected across every prompt of every session.
+    public let totalInjections: Int
+    /// Injections of a memory *beyond the first* within the same session — pure
+    /// repetition. The number the session cooldown is meant to drive toward zero.
+    public let redundantInjections: Int
+    /// `redundantInjections / totalInjections` — 0 means every injection was a
+    /// memory's first appearance in its session.
+    public let redundantRate: Double
+    public let meanInjectionsPerSession: Double
+}
+
+extension RetrievalMetrics {
+    /// Evaluate session injection behavior. Input: for each session, the ordered
+    /// per-prompt lists of injected memory ids. "Redundant" counts any id seen
+    /// earlier in the *same* session.
+    public static func evaluateSessions(_ sessions: [[[UUID]]]) -> SessionInjectionReport {
+        var total = 0
+        var redundant = 0
+        var promptCount = 0
+        for session in sessions {
+            var seen = Set<UUID>()
+            for prompt in session {
+                promptCount += 1
+                for id in prompt {
+                    total += 1
+                    if !seen.insert(id).inserted { redundant += 1 }
+                }
+            }
+        }
+        return SessionInjectionReport(
+            sessionCount: sessions.count,
+            promptCount: promptCount,
+            totalInjections: total,
+            redundantInjections: redundant,
+            redundantRate: total == 0 ? 0 : Double(redundant) / Double(total),
+            meanInjectionsPerSession: sessions.isEmpty ? 0 : Double(total) / Double(sessions.count)
+        )
+    }
+
+    /// Fraction of memories that, injected at least once *without* the cooldown,
+    /// are still injected at least once *with* it — across the same sessions.
+    /// Must stay 1.0: the cooldown removes repeats, never a memory's only/first hit.
+    public static func firstTouchCoverage(withoutCooldown: [[[UUID]]], withCooldown: [[[UUID]]]) -> Double {
+        let distinct: ([[[UUID]]]) -> Set<UUID> = { sessions in
+            Set(sessions.flatMap { $0.flatMap { $0 } })
+        }
+        let base = distinct(withoutCooldown)
+        guard !base.isEmpty else { return 1 }
+        let kept = distinct(withCooldown).intersection(base)
+        return Double(kept.count) / Double(base.count)
+    }
+}
diff --git a/Sources/engram-eval/Resources/sessions.json b/Sources/engram-eval/Resources/sessions.json
new file mode 100644
index 0000000..f796095
--- /dev/null
+++ b/Sources/engram-eval/Resources/sessions.json
@@ -0,0 +1,37 @@
+{
+  "_comment": "Ordered prompt sequences for the session-aware injection metric (ADR 0023). Each session stays on one topic, so the same memories keep clearing the gate prompt after prompt — exactly the repetition the session cooldown should damp. Prompts are not labeled; the metric measures re-injection of the same memory within a session, not correctness (that's the per-query eval).",
+  "sessions": [
+    {
+      "name": "engram-internals",
+      "prompts": [
+        "how does engram's recall hook decide what to inject on a prompt?",
+        "what does the recall confidence gate actually do?",
+        "walk me through engram's recall flow end to end",
+        "how does engram keep off-topic memories from being injected?",
+        "what embedding model does engram use to embed memories?",
+        "how does engram combine keyword and semantic search?"
+      ]
+    },
+    {
+      "name": "python-stack",
+      "prompts": [
+        "what's the standard python project setup here?",
+        "how do I manage python dependencies and virtualenvs?",
+        "which tools lint and format our python code?",
+        "how do we type-check python code?",
+        "which test framework do python projects use?",
+        "remind me of the python conventions we follow"
+      ]
+    },
+    {
+      "name": "gcp-deploy",
+      "prompts": [
+        "how are services deployed to kubernetes?",
+        "which gcp project do production services run in?",
+        "how does authentication to gcp work for our services?",
+        "how do I access the kubernetes cluster and gcp infra?",
+        "how are application secrets managed?"
+      ]
+    }
+  ]
+}
diff --git a/Sources/engram-eval/main.swift b/Sources/engram-eval/main.swift
index b8e9246..c1fb18c 100644
--- a/Sources/engram-eval/main.swift
+++ b/Sources/engram-eval/main.swift
@@ -21,8 +21,14 @@ struct EvalQuery: Decodable {
     let kind: String  // "targeted" | "multi" | "negative"
 }
 
+struct EvalSession: Decodable {
+    let name: String
+    let prompts: [String]
+}
+
 struct Corpus: Decodable { let memories: [CorpusMemory] }
 struct QuerySet: Decodable { let queries: [EvalQuery] }
+struct SessionSet: Decodable { let sessions: [EvalSession] }
 
 func loadResource<T: Decodable>(_ name: String, as type: T.Type) -> T {
     guard let url = Bundle.module.url(forResource: name, withExtension: "json") else {
@@ -43,6 +49,11 @@ let configs: [(name: String, config: RecallGateConfig)] = [
     ("calib-0.12", RecallGateConfig(topK: 3, minRelevance: 0, maxDistance: 0.12, minLexicalTokenHits: 2)),
     ("calib-0.11", RecallGateConfig(topK: 3, minRelevance: 0, maxDistance: 0.11, minLexicalTokenHits: 2)),
     ("calib-0.10", RecallGateConfig(topK: 3, minRelevance: 0, maxDistance: 0.10, minLexicalTokenHits: 2)),
+    ("calib-0.09", RecallGateConfig(topK: 3, minRelevance: 0, maxDistance: 0.09, minLexicalTokenHits: 2)),
+    ("calib-0.08", RecallGateConfig(topK: 3, minRelevance: 0, maxDistance: 0.08, minLexicalTokenHits: 2)),
+    // Tight distance, NO lexical leg: shows how far recall falls back on the
+    // lexical floor when the semantic gate is nearly closed.
+    ("calib-0.08-lex0", RecallGateConfig(topK: 3, minRelevance: 0, maxDistance: 0.08, minLexicalTokenHits: 0)),
     ("calib-0.11-lex0", RecallGateConfig(topK: 3, minRelevance: 0, maxDistance: 0.11, minLexicalTokenHits: 0)),
 ]
 
@@ -93,13 +104,33 @@ func run() async throws {
 
     printTable(outcomesByConfig)
 
+    // Session-aware injection (ADR 0023): replay ordered, on-topic prompt
+    // sequences and measure how often the *same* memory is re-injected within a
+    // session — with vs without the session cooldown the recall hook applies.
+    let sessionSet = loadResource("sessions", as: SessionSet.self)
+    let (noCooldown, withCooldown) = try await simulateSessions(store: store, sessions: sessionSet.sessions)
+    let sessionRecord = SessionRunRecord(
+        withoutCooldown: RetrievalMetrics.evaluateSessions(noCooldown),
+        withCooldown: RetrievalMetrics.evaluateSessions(withCooldown),
+        firstTouchCoverage: RetrievalMetrics.firstTouchCoverage(withoutCooldown: noCooldown, withCooldown: withCooldown)
+    )
+    printSessionTable(sessionRecord)
+
     if CommandLine.arguments.contains("--distances") {
         try await dumpDistances(store: store, querySet: querySet)
     }
 
+    if CommandLine.arguments.contains("--dump-scores") {
+        try await dumpScores(
+            store: store, querySet: querySet, idForSlug: idForSlug,
+            embedderSignature: await store.embedderSignature
+        )
+    }
+
     if CommandLine.arguments.contains("--record") {
         try recordRun(
             outcomesByConfig: outcomesByConfig,
+            sessions: sessionRecord,
             embedderSignature: await store.embedderSignature,
             corpusSize: corpus.memories.count,
             queryCount: querySet.queries.count
@@ -107,6 +138,41 @@ func run() async throws {
     }
 }
 
+/// Replays each session's prompts in order through `fetch` + the **shipped** gate
+/// (`config(forEmbedderSignature:)`, the same one the recall hook uses), producing
+/// the per-prompt injected-id lists twice: once stateless ("without cooldown" —
+/// the old behavior) and once applying the real session-scoped cooldown
+/// (`recentlyInjectedInSession` + `recordRetrieval`, ADR 0023) against a unique
+/// session id, exactly as the recall hook does. Using the shipped gate (not the
+/// legacy `.current`) keeps the redundancy numbers faithful to production.
+func simulateSessions(store: MemoryStore, sessions: [EvalSession]) async throws
+    -> (withoutCooldown: [[[UUID]]], withCooldown: [[[UUID]]]) {
+    let gate = RecallGate.config(forEmbedderSignature: await store.embedderSignature)
+    var without: [[[UUID]]] = []
+    var withCd: [[[UUID]]] = []
+    for session in sessions {
+        var statelessLists: [[UUID]] = []
+        var cooldownLists: [[UUID]] = []
+        let sessionID = "eval-\(session.name)"
+        for prompt in session.prompts {
+            let results = (try? await store.fetch(query: prompt, limit: 8, recordAccess: false)) ?? []
+            let confident = RecallGate.select(results, query: prompt, config: gate).map(\.memory.id)
+            statelessLists.append(confident)
+
+            let suppressed = (try? await store.recentlyInjectedInSession(
+                confident, sessionID: sessionID, within: MemoryStore.recallReinjectionCooldown)) ?? []
+            let fresh = confident.filter { !suppressed.contains($0) }
+            if !fresh.isEmpty {
+                try? await store.recordRetrieval(memoryIDs: fresh, source: .recall, query: prompt, sessionID: sessionID)
+            }
+            cooldownLists.append(fresh)
+        }
+        without.append(statelessLists)
+        withCd.append(cooldownLists)
+    }
+    return (without, withCd)
+}
+
 // MARK: - Per-run recording (eval/runs/<timestamp>.json)
 
 struct VariantResult: Encodable {
@@ -114,6 +180,12 @@ struct VariantResult: Encodable {
     let report: RetrievalReport
 }
 
+struct SessionRunRecord: Encodable {
+    let withoutCooldown: SessionInjectionReport
+    let withCooldown: SessionInjectionReport
+    let firstTouchCoverage: Double
+}
+
 struct RunRecord: Encodable {
     let timestamp: String
     let gitSha: String
@@ -123,6 +195,7 @@ struct RunRecord: Encodable {
     let queryCount: Int
     let k: Int
     let results: [VariantResult]
+    let sessions: SessionRunRecord
 }
 
 /// Writes one JSON file per run under eval/runs/. The metadata (git sha, embedder
@@ -130,6 +203,7 @@ struct RunRecord: Encodable {
 /// alone are meaningless without the embedder/scale they were measured on.
 func recordRun(
     outcomesByConfig: [String: [QueryOutcome]],
+    sessions: SessionRunRecord,
     embedderSignature: String,
     corpusSize: Int,
     queryCount: Int
@@ -149,7 +223,8 @@ func recordRun(
         corpusSize: corpusSize,
         queryCount: queryCount,
         k: 3,
-        results: results
+        results: results,
+        sessions: sessions
     )
 
     let runsDir = URL(fileURLWithPath: "eval/runs", isDirectory: true)
@@ -180,6 +255,35 @@ func gitSha() -> String {
     }
 }
 
+/// Dumps every fetched candidate's semantic distance + whether it's relevant to
+/// the query, so an external tool can plot ROC/PR curves over the distance
+/// threshold and mark the shipped gate's ceiling. One row per (query, candidate)
+/// with a finite distance (lexical-only candidates carry no distance). Writes
+/// `eval/scores-<embedder>.json`.
+func dumpScores(store: MemoryStore, querySet: QuerySet, idForSlug: [String: UUID], embedderSignature: String) async throws {
+    struct ScoreRow: Encodable { let distance: Double; let relevant: Bool; let kind: String }
+    var rows: [ScoreRow] = []
+    for query in querySet.queries {
+        let relevant = Set(query.relevant.compactMap { idForSlug[$0] })
+        let results = (try? await store.fetch(query: query.prompt, limit: 8, recordAccess: false)) ?? []
+        for result in results where result.distance.isFinite && result.distance < .greatestFiniteMagnitude {
+            rows.append(ScoreRow(distance: result.distance, relevant: relevant.contains(result.memory.id), kind: query.kind))
+        }
+    }
+    let dir = URL(fileURLWithPath: "eval", isDirectory: true)
+    try FileManager.default.createDirectory(at: dir, withIntermediateDirectories: true)
+    let fileURL = dir.appendingPathComponent("scores-\(embedderSignature).json")
+    let encoder = JSONEncoder()
+    encoder.outputFormatting = [.prettyPrinted, .sortedKeys]
+    try encoder.encode([
+        "currentMaxDistance": RecallGateConfig.current.maxDistance,
+        "proposedMaxDistance": RecallGateConfig.proposed.maxDistance,
+    ]).write(to: dir.appendingPathComponent("thresholds-\(embedderSignature).json"))
+    try encoder.encode(rows).write(to: fileURL)
+    let pos = rows.filter(\.relevant).count
+    print("\ndumped \(rows.count) candidate scores (\(pos) relevant) → \(fileURL.path)")
+}
+
 /// Diagnostic: per query kind, how separable are on-topic from off-topic by raw
 /// distance? Prints mean top-1 distance and the gap/ratio between the best
 /// candidate and the candidate median — the signals a calibrated gate could use.
@@ -210,6 +314,35 @@ func pad(_ s: String, _ width: Int) -> String {
     s.count >= width ? s : s + String(repeating: " ", count: width - s.count)
 }
 
+/// Session-aware injection report (ADR 0023): the same memory re-injected across
+/// an on-topic session, before vs after the cooldown.
+func printSessionTable(_ record: SessionRunRecord) {
+    let before = record.withoutCooldown
+    let after = record.withCooldown
+    print("\n── session-aware injection (ADR 0023): \(before.sessionCount) sessions · \(before.promptCount) prompts ──")
+    let cols = ["variant", "injections", "redundant", "redund-rate"]
+    let widths = [18, 11, 10, 11]
+    print(zip(cols, widths).map { pad($0, $1) }.joined(separator: " "))
+    let rows = [("no-cooldown", before), ("session-cooldown", after)]
+    for (name, r) in rows {
+        let cells = [
+            pad(name, widths[0]),
+            pad("\(r.totalInjections)", widths[1]),
+            pad("\(r.redundantInjections)", widths[2]),
+            pad(String(format: "%.0f%%", r.redundantRate * 100), widths[3]),
+        ]
+        print(cells.joined(separator: " "))
+    }
+    print(String(format: "first-touch coverage: %.0f%%  (memories still surfaced ≥1× — must be 100%%)",
+                 record.firstTouchCoverage * 100))
+    print("""
+
+    redundant     injections of a memory beyond its first in the same session (repetition)
+    redund-rate   redundant ÷ total injections — the session cooldown should drive this to ~0
+    coverage      memories injected without the cooldown that still appear with it (over-suppression guard)
+    """)
+}
+
 func printTable(_ outcomesByConfig: [String: [QueryOutcome]]) {
     let cols = ["variant", "Recall@3", "answer%", "inj-rec", "P@3", "neg-FP%", "neg-junk", "inj-prec"]
     let widths = [16, 9, 8, 8, 7, 8, 9, 9]
diff --git a/Sources/engram/main.swift b/Sources/engram/main.swift
index 8d8d098..a9759bc 100644
--- a/Sources/engram/main.swift
+++ b/Sources/engram/main.swift
@@ -450,12 +450,23 @@ do {
             let gateConfig = RecallGate.config(forEmbedderSignature: await store.embedderSignature)
             let confident = RecallGate.select(results, query: prompt, config: gateConfig)
 
+            // Session-scoped cooldown (ADR 0023): a memory already injected via
+            // recall earlier in this session is dropped, so the same note doesn't
+            // re-appear on every on-topic prompt. No session id → nothing suppressed.
+            let sessionID = payload["session_id"] as? String ?? ""
+            let suppressed = (try? await store.recentlyInjectedInSession(
+                confident.map(\.memory.id), sessionID: sessionID,
+                within: MemoryStore.recallReinjectionCooldown)) ?? []
+            let fresh = confident.filter { !suppressed.contains($0.memory.id) }
+
             // Two independent sections: recalled notes (when confident) and a
             // periodic reflection nudge (every Nth prompt). Either may be empty.
             var sections: [String] = []
-            if !confident.isEmpty {
-                try? await store.recordRetrieval(memoryIDs: confident.map(\.memory.id), source: .recall, query: prompt)
-                let bullets = confident.map { "- \($0.memory.content)" }.joined(separator: "\n")
+            if !fresh.isEmpty {
+                try? await store.recordRetrieval(
+                    memoryIDs: fresh.map(\.memory.id), source: .recall, query: prompt,
+                    sessionID: sessionID.isEmpty ? nil : sessionID)
+                let bullets = fresh.map { "- \($0.memory.content)" }.joined(separator: "\n")
                 sections.append(untrustedMemoryBlock(
                     lead: "Possibly relevant notes from Engram (ignore if off-topic):",
                     body: bullets
diff --git a/Tests/EngramCoreTests/MemoryStoreTests.swift b/Tests/EngramCoreTests/MemoryStoreTests.swift
index fe7056a..3e4102a 100644
--- a/Tests/EngramCoreTests/MemoryStoreTests.swift
+++ b/Tests/EngramCoreTests/MemoryStoreTests.swift
@@ -182,6 +182,38 @@ private func makeTempStore() throws -> (MemoryStore, URL) {
     #expect(reloaded?.confidence == 1.0)
 }
 
+@Test func migratesPre0023RetrievalsTableMissingSessionID() async throws {
+    // Regression (ADR 0023): a DB whose `retrievals` table predates the
+    // session_id column must upgrade cleanly. The column-referencing index must
+    // not be created before the column exists, or open fails with
+    // "no such column: session_id". The existing tests only ever build the
+    // current schema on a fresh DB, so they missed this upgrade path.
+    let url = FileManager.default.temporaryDirectory
+        .appendingPathComponent("engram-test-\(UUID().uuidString).sqlite")
+    defer { try? FileManager.default.removeItem(at: url) }
+
+    // Seed the pre-0023 retrievals schema (no session_id, no session index).
+    let legacy = try SQLiteDatabase(path: url.path)
+    try legacy.exec(
+        """
+        CREATE TABLE retrievals(
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            memory_id TEXT NOT NULL,
+            source TEXT NOT NULL,
+            query TEXT,
+            at REAL NOT NULL
+        );
+        CREATE INDEX idx_retrievals_at ON retrievals(at);
+        """
+    )
+
+    // Opening with the current code must migrate, not throw.
+    let store = try MemoryStore(url: url)
+    let stored = try await store.store(content: "Survives the upgrade.")
+    try await store.recordRetrieval(memoryIDs: [stored.id], source: .recall, query: "q", sessionID: "S1")
+    #expect(try await store.recentlyInjectedInSession([stored.id], sessionID: "S1", within: 3600).contains(stored.id))
+}
+
 @Test func markVerifiedSetsVerifiedAtAndConfidence() async throws {
     let (store, url) = try makeTempStore()
     defer { try? FileManager.default.removeItem(at: url) }
@@ -379,3 +411,28 @@ private func makeTempStore() throws -> (MemoryStore, URL) {
     #expect(Lookback.parse("1w") == nil)
     #expect(Lookback.parse("abc") == nil)
 }
+
+@Test func recallCooldownSuppressesSameMemoryWithinSession() async throws {
+    // ADR 0023: a memory injected via recall earlier in the same session is
+    // reported as recently-injected so the hook can drop it (no re-injection
+    // every on-topic prompt). Uses raw ids — the ledger doesn't FK memory rows.
+    let (store, url) = try makeTempStore()
+    defer { try? FileManager.default.removeItem(at: url) }
+
+    let id = UUID()
+    try await store.recordRetrieval(memoryIDs: [id], source: .recall, query: "q", sessionID: "S1")
+
+    // same session, within the window → suppressed
+    #expect(try await store.recentlyInjectedInSession([id], sessionID: "S1", within: 3600).contains(id))
+    // a different session → not suppressed
+    #expect(try await store.recentlyInjectedInSession([id], sessionID: "S2", within: 3600).isEmpty)
+    // the cooldown has elapsed (the row is already in the past) → not suppressed
+    #expect(try await store.recentlyInjectedInSession([id], sessionID: "S1", within: 0).isEmpty)
+    // no session id (e.g. a manual fetch) → never suppress
+    #expect(try await store.recentlyInjectedInSession([id], sessionID: "", within: 3600).isEmpty)
+
+    // a non-recall source (manual fetch) is ignored by the recall cooldown
+    let other = UUID()
+    try await store.recordRetrieval(memoryIDs: [other], source: .fetch, query: "q", sessionID: "S1")
+    #expect(try await store.recentlyInjectedInSession([other], sessionID: "S1", within: 3600).isEmpty)
+}
diff --git a/Tests/EngramCoreTests/RetrievalMetricsTests.swift b/Tests/EngramCoreTests/RetrievalMetricsTests.swift
index c3dfc1f..8fefca8 100644
--- a/Tests/EngramCoreTests/RetrievalMetricsTests.swift
+++ b/Tests/EngramCoreTests/RetrievalMetricsTests.swift
@@ -69,3 +69,36 @@ private let a = UUID(), b = UUID(), c = UUID(), d = UUID()
     #expect(report.injectionPrecision == 1.0)
     #expect(report.labeledCount == 2)
 }
+
+@Test func sessionMetricCountsRedundantReinjections() {
+    // Two sessions. Session 1 injects A on three prompts (2 redundant) and B once;
+    // session 2 injects A once. Total 5 injections, 2 redundant → 40%.
+    let sessions: [[[UUID]]] = [
+        [[a], [a, b], [a]],
+        [[a]],
+    ]
+    let report = RetrievalMetrics.evaluateSessions(sessions)
+    #expect(report.sessionCount == 2)
+    #expect(report.promptCount == 4)
+    #expect(report.totalInjections == 5)
+    #expect(report.redundantInjections == 2)
+    #expect(abs(report.redundantRate - 0.4) < 1e-9)
+}
+
+@Test func sessionMetricZeroWhenNoRepeats() {
+    let sessions: [[[UUID]]] = [[[a], [b], [c]]]
+    let report = RetrievalMetrics.evaluateSessions(sessions)
+    #expect(report.redundantInjections == 0)
+    #expect(report.redundantRate == 0)
+}
+
+@Test func firstTouchCoverageIsFullWhenCooldownOnlyDropsRepeats() {
+    // Without cooldown A appears twice + B once; with cooldown A once + B once.
+    // Every distinct memory still surfaced → coverage 1.0.
+    let without: [[[UUID]]] = [[[a], [a, b]]]
+    let withCd: [[[UUID]]] = [[[a], [b]]]
+    #expect(RetrievalMetrics.firstTouchCoverage(withoutCooldown: without, withCooldown: withCd) == 1.0)
+    // If the cooldown wrongly dropped B entirely, coverage falls to 0.5.
+    let dropped: [[[UUID]]] = [[[a], []]]
+    #expect(RetrievalMetrics.firstTouchCoverage(withoutCooldown: without, withCooldown: dropped) == 0.5)
+}
diff --git a/docs/adr/0021-embedder-relative-recall-gate.md b/docs/adr/0021-embedder-relative-recall-gate.md
index 84472cf..69072c2 100644
--- a/docs/adr/0021-embedder-relative-recall-gate.md
+++ b/docs/adr/0021-embedder-relative-recall-gate.md
@@ -81,3 +81,21 @@ constant is unsafe.
   entirely by **bundling our own deterministic embedder** (ROADMAP item; would
   also make the eval reproducible across machines and the threshold a stable
   constant).
+
+## Addendum (2026-06-23) — recalibrated 0.10 → 0.09
+
+A follow-up sweep (finer `calib-0.09`/`calib-0.08` rows + an ROC/PR threshold
+plot, `scripts/plot_threshold.py`) tightened the contextual ceiling **0.10 →
+0.09**. This is calibration *within* the mechanism this ADR established, not a
+new decision — the decision text above stands.
+
+Why: on the eval, `0.10 → 0.09` drops the negative false-positive rate **13% →
+0%** and lifts injection precision **0.47 → 0.54** with **unchanged gate recall
+(93%)** — the lexical (≥2-token) leg holds recall while the tighter distance leg
+only sheds off-topic injections. This fits Engram's **precision-first** stance
+for recall: the hook runs on every prompt, so a false positive bloats context
+repeatedly, whereas a miss is recoverable (it re-fires next prompt, or `/recall`).
+`0.09` is the knee — `0.08` finally trades gate recall (93% → 91%) for
+diminishing precision. Caveat: small eval set (47 relevant / 15 negatives), so
+0% neg-FP is encouraging, not a guarantee; the value stays embedder-specific via
+`config(forEmbedderSignature:)`.
diff --git a/docs/adr/0023-session-scoped-recall-cooldown.md b/docs/adr/0023-session-scoped-recall-cooldown.md
new file mode 100644
index 0000000..de22b20
--- /dev/null
+++ b/docs/adr/0023-session-scoped-recall-cooldown.md
@@ -0,0 +1,80 @@
+# 0023 — Session-scoped recall re-injection cooldown
+
+- **Status:** Accepted
+- **Date:** 2026-06-22
+- **Deciders:** Daniel Klevebring
+
+## Context
+
+The recall hook (`engram hook recall`, ADR 0005/0021) is **stateless per prompt**:
+on every `UserPromptSubmit` it hybrid-searches with the prompt as the query and
+injects whatever clears the confidence gate (`RecallGate`). The gate answers "is
+this memory topically relevant to *this* prompt?" — which is the right question
+for a single prompt, but the wrong one across a session.
+
+A user reported that in a session *about* Engram, a single memory ("X uses Engram
+for Claude Code memory") was injected into ~30–40% of prompts (2–3 of 7–8) —
+always the same memory. That's the gate working as designed: the prompts were all
+topically on-subject, so the memory cleared the bar every time. But re-injecting
+the **same** memory across a session adds no new information after the first time;
+it wastes context and reads as spam.
+
+The retrieval-activity ledger (ADR 0015) already records one row per injected
+memory (`memory_id`, `source`, `query`, `at`) — it was missing only a session
+dimension, so the hook had no way to know "did I already show this memory in this
+session?"
+
+## Decision
+
+**Suppress re-injecting a memory that was already injected via recall in the same
+session within a cooldown window.** Concretely:
+
+- Add a `session_id TEXT` column to the `retrievals` ledger (additive migration,
+  mirroring `addMissingColumns`) plus an index on `(session_id, memory_id, at)`.
+  The recall hook reads `session_id` from the Claude Code hook payload and passes
+  it to `recordRetrieval`.
+- After the confidence gate selects the confident memories, the hook drops any
+  that were already injected via `source = recall` **in this session** within the
+  last `recallReinjectionCooldown` (default **30 minutes**). Whatever remains is
+  injected and recorded (with the session id); if nothing remains, the hook stays
+  silent, exactly as for an off-topic prompt.
+- Suppression is **scoped to recall and to the session**. Manual `engram fetch`
+  and the `session-digest` / `verify-context` hooks are unaffected. A genuinely
+  new session (or the same memory after the cooldown elapses) can surface it
+  again — so a long session still gets a periodic refresh rather than total
+  one-shot suppression, which matters because earlier context can be compacted
+  away.
+
+**Cooldown shape — time vs. prompts.** A prompt-count cooldown ("not within the
+last N prompts") maps more directly to the report, but it requires threading a
+per-session prompt index (the reflection-nudge counter) onto every ledger row.
+Time-based needs only the one `session_id` column and no coupling to the nudge
+counter, and it fixes the reported scenario equally well (the repeats were
+seconds-to-minutes apart). We ship **time-based** for v1; prompt-based remains a
+clean future refinement if 30 minutes proves too coarse.
+
+## Consequences
+
+- The most common annoyance — the *same* memory on most prompts of an on-topic
+  session — goes away, without narrowing recall breadth: other memories still
+  surface normally, and the gate/embedder calibration (ADR 0021) is untouched.
+- The change is confined to the ledger schema, `recordRetrieval`, one new query,
+  and the recall hook. New DBs get the column in `CREATE TABLE`; existing DBs get
+  it via the additive migration. The decoupling from ranking (ADR 0005/0015) is
+  preserved — this only reads/writes the retrieval ledger.
+- The `session_id` on the ledger also unlocks a **session-aware eval metric**,
+  added here (ADR 0021's eval was per-query and structurally couldn't see
+  re-injection). `engram-eval` now replays ordered prompt sequences
+  (`Resources/sessions.json`) through the gate + the real cooldown and reports,
+  via `RetrievalMetrics.SessionInjectionReport`:
+  - **redundant re-injection rate** — share of injections that repeat a memory
+    already injected earlier in the same session — measured **with vs. without**
+    the cooldown (the A/B that justifies this change), and
+  - **first-touch coverage** — that the cooldown never drops a memory's *first*
+    legitimate appearance (a guard against over-suppression).
+  As with the rest of the eval, the numbers are embedder/machine-dependent — a
+  relative A/B, not a benchmark — so concrete figures live in the per-run
+  `eval/runs/*.json` records, not here.
+- Trade-off: a memory shown once early in a very long session could be compacted
+  out of context and not reappear until the cooldown lapses. Accepted for v1; a
+  transcript-aware check ("is it still in context?") is a possible later refinement.
diff --git a/docs/adr/README.md b/docs/adr/README.md
index 5793417..c5c4572 100644
--- a/docs/adr/README.md
+++ b/docs/adr/README.md
@@ -32,6 +32,7 @@ supersedes the old one (and update the old one's status to `Superseded by NNNN`)
 | [0020](0020-unified-activity-timeline.md) | Unified Activity timeline: reads + writes in one stream (extends 0015) | Accepted |
 | [0021](0021-embedder-relative-recall-gate.md) | Embedder-relative recall gate, calibrated by offline eval (refines 0005's gate) | Accepted |
 | [0022](0022-privileged-helper-for-cli-install.md) | Privileged CLI install via a one-shot authenticated `osascript` | Accepted |
+| [0023](0023-session-scoped-recall-cooldown.md) | Session-scoped recall re-injection cooldown (stop re-injecting the same memory every prompt) | Accepted |
 
 ## Writing a new ADR
 
diff --git a/eval/runs/2026-06-22T19-05-37Z-contextual-512.json b/eval/runs/2026-06-22T19-05-37Z-contextual-512.json
new file mode 100644
index 0000000..916ac06
--- /dev/null
+++ b/eval/runs/2026-06-22T19-05-37Z-contextual-512.json
@@ -0,0 +1,101 @@
+{
+  "corpusSize" : 153,
+  "embedderSignature" : "contextual-512",
+  "gitSha" : "3b2fa83",
+  "host" : "daniels-fancy-macbook-pro-2.local",
+  "k" : 3,
+  "queryCount" : 58,
+  "results" : [
+    {
+      "report" : {
+        "answerHitRate" : 0.8837209302325582,
+        "avgInjectedOnNegatives" : 3,
+        "injectedRecall" : 0.8294573643410852,
+        "injectionPrecision" : 0.2413793103448276,
+        "labeledCount" : 43,
+        "mrr" : 0.8234496124031008,
+        "negativeCount" : 15,
+        "negativeFalsePositiveRate" : 1,
+        "precisionAtK" : 0.32558139534883723,
+        "recallAtK" : 0.8294573643410852
+      },
+      "variant" : "current"
+    },
+    {
+      "report" : {
+        "answerHitRate" : 0.8837209302325582,
+        "avgInjectedOnNegatives" : 1.2,
+        "injectedRecall" : 0.8410852713178294,
+        "injectionPrecision" : 0.31343283582089554,
+        "labeledCount" : 43,
+        "mrr" : 0.8234496124031008,
+        "negativeCount" : 15,
+        "negativeFalsePositiveRate" : 0.5333333333333333,
+        "precisionAtK" : 0.38888888888888884,
+        "recallAtK" : 0.8294573643410852
+      },
+      "variant" : "calib-0.13"
+    },
+    {
+      "report" : {
+        "answerHitRate" : 0.9302325581395349,
+        "avgInjectedOnNegatives" : 0.6666666666666666,
+        "injectedRecall" : 0.8875968992248061,
+        "injectionPrecision" : 0.3728813559322034,
+        "labeledCount" : 43,
+        "mrr" : 0.8234496124031008,
+        "negativeCount" : 15,
+        "negativeFalsePositiveRate" : 0.3333333333333333,
+        "precisionAtK" : 0.4761904761904763,
+        "recallAtK" : 0.8294573643410852
+      },
+      "variant" : "calib-0.12"
+    },
+    {
+      "report" : {
+        "answerHitRate" : 0.9302325581395349,
+        "avgInjectedOnNegatives" : 0.5333333333333333,
+        "injectedRecall" : 0.8992248062015503,
+        "injectionPrecision" : 0.4205607476635514,
+        "labeledCount" : 43,
+        "mrr" : 0.8234496124031008,
+        "negativeCount" : 15,
+        "negativeFalsePositiveRate" : 0.26666666666666666,
+        "precisionAtK" : 0.5436507936507938,
+        "recallAtK" : 0.8294573643410852
+      },
+      "variant" : "calib-0.11"
+    },
+    {
+      "report" : {
+        "answerHitRate" : 0.9302325581395349,
+        "avgInjectedOnNegatives" : 0.13333333333333333,
+        "injectedRecall" : 0.8992248062015503,
+        "injectionPrecision" : 0.46875,
+        "labeledCount" : 43,
+        "mrr" : 0.8234496124031008,
+        "negativeCount" : 15,
+        "negativeFalsePositiveRate" : 0.13333333333333333,
+        "precisionAtK" : 0.5714285714285715,
+        "recallAtK" : 0.8294573643410852
+      },
+      "variant" : "calib-0.10"
+    },
+    {
+      "report" : {
+        "answerHitRate" : 0.5116279069767442,
+        "avgInjectedOnNegatives" : 0.5333333333333333,
+        "injectedRecall" : 0.4922480620155039,
+        "injectionPrecision" : 0.32051282051282054,
+        "labeledCount" : 43,
+        "mrr" : 0.8234496124031008,
+        "negativeCount" : 15,
+        "negativeFalsePositiveRate" : 0.26666666666666666,
+        "precisionAtK" : 0.3928571428571428,
+        "recallAtK" : 0.8294573643410852
+      },
+      "variant" : "calib-0.11-lex0"
+    }
+  ],
+  "timestamp" : "2026-06-22T19:05:37Z"
+}
\ No newline at end of file
diff --git a/eval/runs/2026-06-23T05-05-50Z-contextual-512.json b/eval/runs/2026-06-23T05-05-50Z-contextual-512.json
new file mode 100644
index 0000000..aca4741
--- /dev/null
+++ b/eval/runs/2026-06-23T05-05-50Z-contextual-512.json
@@ -0,0 +1,120 @@
+{
+  "corpusSize" : 153,
+  "embedderSignature" : "contextual-512",
+  "gitSha" : "3a6d12e",
+  "host" : "daniels-fancy-macbook-pro-2.local",
+  "k" : 3,
+  "queryCount" : 58,
+  "results" : [
+    {
+      "report" : {
+        "answerHitRate" : 0.8837209302325582,
+        "avgInjectedOnNegatives" : 3,
+        "injectedRecall" : 0.8294573643410852,
+        "injectionPrecision" : 0.2413793103448276,
+        "labeledCount" : 43,
+        "mrr" : 0.8234496124031008,
+        "negativeCount" : 15,
+        "negativeFalsePositiveRate" : 1,
+        "precisionAtK" : 0.32558139534883723,
+        "recallAtK" : 0.8294573643410852
+      },
+      "variant" : "current"
+    },
+    {
+      "report" : {
+        "answerHitRate" : 0.8837209302325582,
+        "avgInjectedOnNegatives" : 1.2,
+        "injectedRecall" : 0.8410852713178294,
+        "injectionPrecision" : 0.31343283582089554,
+        "labeledCount" : 43,
+        "mrr" : 0.8234496124031008,
+        "negativeCount" : 15,
+        "negativeFalsePositiveRate" : 0.5333333333333333,
+        "precisionAtK" : 0.38888888888888884,
+        "recallAtK" : 0.8294573643410852
+      },
+      "variant" : "calib-0.13"
+    },
+    {
+      "report" : {
+        "answerHitRate" : 0.9302325581395349,
+        "avgInjectedOnNegatives" : 0.6666666666666666,
+        "injectedRecall" : 0.8875968992248061,
+        "injectionPrecision" : 0.3728813559322034,
+        "labeledCount" : 43,
+        "mrr" : 0.8234496124031008,
+        "negativeCount" : 15,
+        "negativeFalsePositiveRate" : 0.3333333333333333,
+        "precisionAtK" : 0.4761904761904763,
+        "recallAtK" : 0.8294573643410852
+      },
+      "variant" : "calib-0.12"
+    },
+    {
+      "report" : {
+        "answerHitRate" : 0.9302325581395349,
+        "avgInjectedOnNegatives" : 0.5333333333333333,
+        "injectedRecall" : 0.8992248062015503,
+        "injectionPrecision" : 0.4205607476635514,
+        "labeledCount" : 43,
+        "mrr" : 0.8234496124031008,
+        "negativeCount" : 15,
+        "negativeFalsePositiveRate" : 0.26666666666666666,
+        "precisionAtK" : 0.5436507936507938,
+        "recallAtK" : 0.8294573643410852
+      },
+      "variant" : "calib-0.11"
+    },
+    {
+      "report" : {
+        "answerHitRate" : 0.9302325581395349,
+        "avgInjectedOnNegatives" : 0.13333333333333333,
+        "injectedRecall" : 0.8992248062015503,
+        "injectionPrecision" : 0.46875,
+        "labeledCount" : 43,
+        "mrr" : 0.8234496124031008,
+        "negativeCount" : 15,
+        "negativeFalsePositiveRate" : 0.13333333333333333,
+        "precisionAtK" : 0.5714285714285715,
+        "recallAtK" : 0.8294573643410852
+      },
+      "variant" : "calib-0.10"
+    },
+    {
+      "report" : {
+        "answerHitRate" : 0.5116279069767442,
+        "avgInjectedOnNegatives" : 0.5333333333333333,
+        "injectedRecall" : 0.4922480620155039,
+        "injectionPrecision" : 0.32051282051282054,
+        "labeledCount" : 43,
+        "mrr" : 0.8234496124031008,
+        "negativeCount" : 15,
+        "negativeFalsePositiveRate" : 0.26666666666666666,
+        "precisionAtK" : 0.3928571428571428,
+        "recallAtK" : 0.8294573643410852
+      },
+      "variant" : "calib-0.11-lex0"
+    }
+  ],
+  "sessions" : {
+    "firstTouchCoverage" : 1,
+    "withCooldown" : {
+      "meanInjectionsPerSession" : 11.333333333333334,
+      "promptCount" : 17,
+      "redundantInjections" : 0,
+      "redundantRate" : 0,
+      "sessionCount" : 3,
+      "totalInjections" : 34
+    },
+    "withoutCooldown" : {
+      "meanInjectionsPerSession" : 17,
+      "promptCount" : 17,
+      "redundantInjections" : 17,
+      "redundantRate" : 0.3333333333333333,
+      "sessionCount" : 3,
+      "totalInjections" : 51
+    }
+  },
+  "timestamp" : "2026-06-23T05:05:50Z"
+}
\ No newline at end of file
diff --git a/eval/runs/2026-06-23T20-03-04Z-contextual-512.json b/eval/runs/2026-06-23T20-03-04Z-contextual-512.json
new file mode 100644
index 0000000..057e9ab
--- /dev/null
+++ b/eval/runs/2026-06-23T20-03-04Z-contextual-512.json
@@ -0,0 +1,165 @@
+{
+  "corpusSize" : 153,
+  "embedderSignature" : "contextual-512",
+  "gitSha" : "aa9854e",
+  "host" : "daniels-fancy-macbook-pro-2.local",
+  "k" : 3,
+  "queryCount" : 58,
+  "results" : [
+    {
+      "report" : {
+        "answerHitRate" : 0.8837209302325582,
+        "avgInjectedOnNegatives" : 3,
+        "injectedRecall" : 0.8294573643410852,
+        "injectionPrecision" : 0.2413793103448276,
+        "labeledCount" : 43,
+        "mrr" : 0.8234496124031008,
+        "negativeCount" : 15,
+        "negativeFalsePositiveRate" : 1,
+        "precisionAtK" : 0.32558139534883723,
+        "recallAtK" : 0.8294573643410852
+      },
+      "variant" : "current"
+    },
+    {
+      "report" : {
+        "answerHitRate" : 0.8837209302325582,
+        "avgInjectedOnNegatives" : 1.2,
+        "injectedRecall" : 0.8410852713178294,
+        "injectionPrecision" : 0.31343283582089554,
+        "labeledCount" : 43,
+        "mrr" : 0.8234496124031008,
+        "negativeCount" : 15,
+        "negativeFalsePositiveRate" : 0.5333333333333333,
+        "precisionAtK" : 0.38888888888888884,
+        "recallAtK" : 0.8294573643410852
+      },
+      "variant" : "calib-0.13"
+    },
+    {
+      "report" : {
+        "answerHitRate" : 0.9302325581395349,
+        "avgInjectedOnNegatives" : 0.6666666666666666,
+        "injectedRecall" : 0.8875968992248061,
+        "injectionPrecision" : 0.3728813559322034,
+        "labeledCount" : 43,
+        "mrr" : 0.8234496124031008,
+        "negativeCount" : 15,
+        "negativeFalsePositiveRate" : 0.3333333333333333,
+        "precisionAtK" : 0.4761904761904763,
+        "recallAtK" : 0.8294573643410852
+      },
+      "variant" : "calib-0.12"
+    },
+    {
+      "report" : {
+        "answerHitRate" : 0.9302325581395349,
+        "avgInjectedOnNegatives" : 0.5333333333333333,
+        "injectedRecall" : 0.8992248062015503,
+        "injectionPrecision" : 0.4205607476635514,
+        "labeledCount" : 43,
+        "mrr" : 0.8234496124031008,
+        "negativeCount" : 15,
+        "negativeFalsePositiveRate" : 0.26666666666666666,
+        "precisionAtK" : 0.5436507936507938,
+        "recallAtK" : 0.8294573643410852
+      },
+      "variant" : "calib-0.11"
+    },
+    {
+      "report" : {
+        "answerHitRate" : 0.9302325581395349,
+        "avgInjectedOnNegatives" : 0.13333333333333333,
+        "injectedRecall" : 0.8992248062015503,
+        "injectionPrecision" : 0.46875,
+        "labeledCount" : 43,
+        "mrr" : 0.8234496124031008,
+        "negativeCount" : 15,
+        "negativeFalsePositiveRate" : 0.13333333333333333,
+        "precisionAtK" : 0.5714285714285715,
+        "recallAtK" : 0.8294573643410852
+      },
+      "variant" : "calib-0.10"
+    },
+    {
+      "report" : {
+        "answerHitRate" : 0.9302325581395349,
+        "avgInjectedOnNegatives" : 0,
+        "injectedRecall" : 0.8992248062015503,
+        "injectionPrecision" : 0.5357142857142857,
+        "labeledCount" : 43,
+        "mrr" : 0.8234496124031008,
+        "negativeCount" : 15,
+        "negativeFalsePositiveRate" : 0,
+        "precisionAtK" : 0.6150793650793651,
+        "recallAtK" : 0.8294573643410852
+      },
+      "variant" : "calib-0.09"
+    },
+    {
+      "report" : {
+        "answerHitRate" : 0.9069767441860465,
+        "avgInjectedOnNegatives" : 0,
+        "injectedRecall" : 0.8759689922480619,
+        "injectionPrecision" : 0.5714285714285714,
+        "labeledCount" : 43,
+        "mrr" : 0.8234496124031008,
+        "negativeCount" : 15,
+        "negativeFalsePositiveRate" : 0,
+        "precisionAtK" : 0.6626016260162603,
+        "recallAtK" : 0.8294573643410852
+      },
+      "variant" : "calib-0.08"
+    },
+    {
+      "report" : {
+        "answerHitRate" : 0.20930232558139536,
+        "avgInjectedOnNegatives" : 0,
+        "injectedRecall" : 0.1937984496124031,
+        "injectionPrecision" : 0.5625,
+        "labeledCount" : 43,
+        "mrr" : 0.8234496124031008,
+        "negativeCount" : 15,
+        "negativeFalsePositiveRate" : 0,
+        "precisionAtK" : 0.7,
+        "recallAtK" : 0.8294573643410852
+      },
+      "variant" : "calib-0.08-lex0"
+    },
+    {
+      "report" : {
+        "answerHitRate" : 0.5116279069767442,
+        "avgInjectedOnNegatives" : 0.5333333333333333,
+        "injectedRecall" : 0.4922480620155039,
+        "injectionPrecision" : 0.32051282051282054,
+        "labeledCount" : 43,
+        "mrr" : 0.8234496124031008,
+        "negativeCount" : 15,
+        "negativeFalsePositiveRate" : 0.26666666666666666,
+        "precisionAtK" : 0.3928571428571428,
+        "recallAtK" : 0.8294573643410852
+      },
+      "variant" : "calib-0.11-lex0"
+    }
+  ],
+  "sessions" : {
+    "firstTouchCoverage" : 1,
+    "withCooldown" : {
+      "meanInjectionsPerSession" : 11.333333333333334,
+      "promptCount" : 17,
+      "redundantInjections" : 0,
+      "redundantRate" : 0,
+      "sessionCount" : 3,
+      "totalInjections" : 34
+    },
+    "withoutCooldown" : {
+      "meanInjectionsPerSession" : 17,
+      "promptCount" : 17,
+      "redundantInjections" : 17,
+      "redundantRate" : 0.3333333333333333,
+      "sessionCount" : 3,
+      "totalInjections" : 51
+    }
+  },
+  "timestamp" : "2026-06-23T20:03:04Z"
+}
\ No newline at end of file
diff --git a/eval/runs/2026-06-23T20-13-52Z-contextual-512.json b/eval/runs/2026-06-23T20-13-52Z-contextual-512.json
new file mode 100644
index 0000000..0222234
--- /dev/null
+++ b/eval/runs/2026-06-23T20-13-52Z-contextual-512.json
@@ -0,0 +1,165 @@
+{
+  "corpusSize" : 153,
+  "embedderSignature" : "contextual-512",
+  "gitSha" : "56bc8cd",
+  "host" : "daniels-fancy-macbook-pro-2.local",
+  "k" : 3,
+  "queryCount" : 58,
+  "results" : [
+    {
+      "report" : {
+        "answerHitRate" : 0.8837209302325582,
+        "avgInjectedOnNegatives" : 3,
+        "injectedRecall" : 0.8294573643410852,
+        "injectionPrecision" : 0.2413793103448276,
+        "labeledCount" : 43,
+        "mrr" : 0.8234496124031008,
+        "negativeCount" : 15,
+        "negativeFalsePositiveRate" : 1,
+        "precisionAtK" : 0.32558139534883723,
+        "recallAtK" : 0.8294573643410852
+      },
+      "variant" : "current"
+    },
+    {
+      "report" : {
+        "answerHitRate" : 0.8837209302325582,
+        "avgInjectedOnNegatives" : 1.2,
+        "injectedRecall" : 0.8410852713178294,
+        "injectionPrecision" : 0.31343283582089554,
+        "labeledCount" : 43,
+        "mrr" : 0.8234496124031008,
+        "negativeCount" : 15,
+        "negativeFalsePositiveRate" : 0.5333333333333333,
+        "precisionAtK" : 0.38888888888888884,
+        "recallAtK" : 0.8294573643410852
+      },
+      "variant" : "calib-0.13"
+    },
+    {
+      "report" : {
+        "answerHitRate" : 0.9302325581395349,
+        "avgInjectedOnNegatives" : 0.6666666666666666,
+        "injectedRecall" : 0.8875968992248061,
+        "injectionPrecision" : 0.3728813559322034,
+        "labeledCount" : 43,
+        "mrr" : 0.8234496124031008,
+        "negativeCount" : 15,
+        "negativeFalsePositiveRate" : 0.3333333333333333,
+        "precisionAtK" : 0.4761904761904763,
+        "recallAtK" : 0.8294573643410852
+      },
+      "variant" : "calib-0.12"
+    },
+    {
+      "report" : {
+        "answerHitRate" : 0.9302325581395349,
+        "avgInjectedOnNegatives" : 0.5333333333333333,
+        "injectedRecall" : 0.8992248062015503,
+        "injectionPrecision" : 0.4205607476635514,
+        "labeledCount" : 43,
+        "mrr" : 0.8234496124031008,
+        "negativeCount" : 15,
+        "negativeFalsePositiveRate" : 0.26666666666666666,
+        "precisionAtK" : 0.5436507936507938,
+        "recallAtK" : 0.8294573643410852
+      },
+      "variant" : "calib-0.11"
+    },
+    {
+      "report" : {
+        "answerHitRate" : 0.9302325581395349,
+        "avgInjectedOnNegatives" : 0.13333333333333333,
+        "injectedRecall" : 0.8992248062015503,
+        "injectionPrecision" : 0.46875,
+        "labeledCount" : 43,
+        "mrr" : 0.8234496124031008,
+        "negativeCount" : 15,
+        "negativeFalsePositiveRate" : 0.13333333333333333,
+        "precisionAtK" : 0.5714285714285715,
+        "recallAtK" : 0.8294573643410852
+      },
+      "variant" : "calib-0.10"
+    },
+    {
+      "report" : {
+        "answerHitRate" : 0.9302325581395349,
+        "avgInjectedOnNegatives" : 0,
+        "injectedRecall" : 0.8992248062015503,
+        "injectionPrecision" : 0.5357142857142857,
+        "labeledCount" : 43,
+        "mrr" : 0.8234496124031008,
+        "negativeCount" : 15,
+        "negativeFalsePositiveRate" : 0,
+        "precisionAtK" : 0.6150793650793651,
+        "recallAtK" : 0.8294573643410852
+      },
+      "variant" : "calib-0.09"
+    },
+    {
+      "report" : {
+        "answerHitRate" : 0.9069767441860465,
+        "avgInjectedOnNegatives" : 0,
+        "injectedRecall" : 0.8759689922480619,
+        "injectionPrecision" : 0.5714285714285714,
+        "labeledCount" : 43,
+        "mrr" : 0.8234496124031008,
+        "negativeCount" : 15,
+        "negativeFalsePositiveRate" : 0,
+        "precisionAtK" : 0.6626016260162603,
+        "recallAtK" : 0.8294573643410852
+      },
+      "variant" : "calib-0.08"
+    },
+    {
+      "report" : {
+        "answerHitRate" : 0.20930232558139536,
+        "avgInjectedOnNegatives" : 0,
+        "injectedRecall" : 0.1937984496124031,
+        "injectionPrecision" : 0.5625,
+        "labeledCount" : 43,
+        "mrr" : 0.8234496124031008,
+        "negativeCount" : 15,
+        "negativeFalsePositiveRate" : 0,
+        "precisionAtK" : 0.7,
+        "recallAtK" : 0.8294573643410852
+      },
+      "variant" : "calib-0.08-lex0"
+    },
+    {
+      "report" : {
+        "answerHitRate" : 0.5116279069767442,
+        "avgInjectedOnNegatives" : 0.5333333333333333,
+        "injectedRecall" : 0.4922480620155039,
+        "injectionPrecision" : 0.32051282051282054,
+        "labeledCount" : 43,
+        "mrr" : 0.8234496124031008,
+        "negativeCount" : 15,
+        "negativeFalsePositiveRate" : 0.26666666666666666,
+        "precisionAtK" : 0.3928571428571428,
+        "recallAtK" : 0.8294573643410852
+      },
+      "variant" : "calib-0.11-lex0"
+    }
+  ],
+  "sessions" : {
+    "firstTouchCoverage" : 1,
+    "withCooldown" : {
+      "meanInjectionsPerSession" : 7,
+      "promptCount" : 17,
+      "redundantInjections" : 0,
+      "redundantRate" : 0,
+      "sessionCount" : 3,
+      "totalInjections" : 21
+    },
+    "withoutCooldown" : {
+      "meanInjectionsPerSession" : 11,
+      "promptCount" : 17,
+      "redundantInjections" : 12,
+      "redundantRate" : 0.36363636363636365,
+      "sessionCount" : 3,
+      "totalInjections" : 33
+    }
+  },
+  "timestamp" : "2026-06-23T20:13:52Z"
+}
\ No newline at end of file
diff --git a/scripts/plot_threshold.py b/scripts/plot_threshold.py
new file mode 100644
index 0000000..c4904e3
--- /dev/null
+++ b/scripts/plot_threshold.py
@@ -0,0 +1,140 @@
+#!/usr/bin/env python3
+"""Plot ROC + precision/recall vs. the recall gate's distance threshold.
+
+Reads the per-candidate scores dumped by `engram-eval --dump-scores`
+(`eval/scores-<embedder>.json`: rows of {distance, relevant, kind}) and the
+marked thresholds (`eval/thresholds-<embedder>.json`), then renders an ROC curve
+(with AUC) and a precision/recall-vs-threshold curve, marking the shipped
+`proposed` gate and the legacy `current` ceiling. Writes `eval/threshold.png`.
+
+The gate also has a lexical leg; this models the *semantic distance* knob only —
+the dominant control and the thing an AUC actually characterizes.
+
+Run: uv run --with matplotlib --with numpy scripts/plot_threshold.py [embedder]
+"""
+from __future__ import annotations
+
+import glob
+import json
+import sys
+from pathlib import Path
+
+import matplotlib
+
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+import numpy as np
+
+ROOT = Path(__file__).resolve().parent.parent
+EVAL = ROOT / "eval"
+
+
+def load() -> tuple[np.ndarray, np.ndarray, dict, str]:
+    arg = sys.argv[1] if len(sys.argv) > 1 else None
+    scores = sorted(EVAL.glob(f"scores-{arg}.json" if arg else "scores-*.json"))
+    if not scores:
+        sys.exit("no eval/scores-*.json — run: swift run engram-eval --dump-scores")
+    path = scores[-1]
+    embedder = path.stem.replace("scores-", "")
+    rows = json.loads(path.read_text())
+    dist = np.array([r["distance"] for r in rows], dtype=float)
+    rel = np.array([bool(r["relevant"]) for r in rows], dtype=bool)
+    tpath = EVAL / f"thresholds-{embedder}.json"
+    marks = json.loads(tpath.read_text()) if tpath.exists() else {"currentMaxDistance": 0.45, "proposedMaxDistance": 0.10}
+    return dist, rel, marks, embedder
+
+
+def curve(dist: np.ndarray, rel: np.ndarray, taus: np.ndarray):
+    """A candidate is injected when distance < tau. Sweep tau → TPR/FPR/P/R."""
+    P = int(rel.sum())
+    N = int((~rel).sum())
+    tpr, fpr, prec, rec = [], [], [], []
+    for tau in taus:
+        pred = dist < tau
+        tp = int((pred & rel).sum())
+        fp = int((pred & ~rel).sum())
+        tpr.append(tp / P if P else 0.0)
+        fpr.append(fp / N if N else 0.0)
+        # Precision is undefined when nothing is injected — leave it NaN so the
+        # plot doesn't draw a misleading "P=1.0" shelf over the inject-nothing band.
+        prec.append(tp / (tp + fp) if (tp + fp) else float("nan"))
+        rec.append(tp / P if P else 0.0)
+    return np.array(tpr), np.array(fpr), np.array(prec), np.array(rec)
+
+
+def at_threshold(dist, rel, tau):
+    pred = dist < tau
+    tp = int((pred & rel).sum())
+    fp = int((pred & ~rel).sum())
+    P = int(rel.sum())
+    prec = tp / (tp + fp) if (tp + fp) else 1.0
+    rec = tp / P if P else 0.0
+    f1 = 2 * prec * rec / (prec + rec) if (prec + rec) else 0.0
+    return prec, rec, f1
+
+
+def main() -> None:
+    dist, rel, marks, embedder = load()
+    cur = float(marks["currentMaxDistance"])
+    prop = float(marks["proposedMaxDistance"])
+
+    taus = np.linspace(0.0, max(0.5, cur + 0.02), 400)
+    tpr, fpr, prec, rec = curve(dist, rel, taus)
+
+    # ROC AUC over the swept range (sort by FPR for a monotone integral).
+    trapz = getattr(np, "trapezoid", None) or np.trapz  # numpy 2.x renamed trapz
+    order = np.argsort(fpr)
+    auc = float(trapz(tpr[order], fpr[order]))
+
+    # Best-F1 threshold (a reasonable "optimal" operating point).
+    f1s = np.where((prec + rec) > 0, 2 * prec * rec / (prec + rec + 1e-12), 0.0)
+    best = int(np.argmax(f1s))
+    best_tau = float(taus[best])
+
+    fig, (ax_roc, ax_pr) = plt.subplots(1, 2, figsize=(13, 5.2))
+
+    # ── ROC ──
+    ax_roc.plot(fpr, tpr, color="#2E3A59", lw=2, label=f"ROC (AUC={auc:.3f})")
+    ax_roc.plot([0, 1], [0, 1], ls=":", color="#aaa", lw=1)
+    for tau, name, color in [(cur, f"current {cur:.2f}", "#C0504D"), (prop, f"proposed {prop:.2f} (shipped)", "#4F8A4F")]:
+        p, r, _ = at_threshold(dist, rel, tau)
+        # locate the curve point nearest this tau
+        i = int(np.argmin(np.abs(taus - tau)))
+        ax_roc.scatter([fpr[i]], [tpr[i]], color=color, zorder=5, s=70)
+        ax_roc.annotate(f"{name}\nP={p:.2f} R={r:.2f}", (fpr[i], tpr[i]),
+                        textcoords="offset points", xytext=(8, -4 if "current" in name else 10),
+                        fontsize=9, color=color)
+    ax_roc.set_xlabel("false-positive rate (off-topic injected)")
+    ax_roc.set_ylabel("true-positive rate (relevant injected)")
+    ax_roc.set_title(f"ROC — distance gate ({embedder})")
+    ax_roc.legend(loc="lower right")
+    ax_roc.grid(alpha=0.2)
+
+    # ── precision / recall vs threshold ──
+    ax_pr.plot(taus, prec, color="#2E3A59", lw=2, label="precision")
+    ax_pr.plot(taus, rec, color="#E08A4C", lw=2, label="recall")
+    for tau, name, color in [(cur, f"current {cur:.2f}", "#C0504D"), (prop, f"proposed {prop:.2f}", "#4F8A4F"), (best_tau, f"best-F1 {best_tau:.2f}", "#3b4a82")]:
+        ax_pr.axvline(tau, color=color, ls="--", lw=1.3, label=name)
+    ax_pr.set_xlabel("maxDistance threshold τ  (inject when distance < τ)")
+    ax_pr.set_ylabel("precision / recall")
+    ax_pr.set_title("precision & recall vs. threshold")
+    ax_pr.legend(loc="center right", fontsize=8)
+    ax_pr.grid(alpha=0.2)
+
+    fig.tight_layout()
+    out = EVAL / "threshold.png"
+    fig.savefig(out, dpi=140)
+
+    # text summary
+    pc, rc, fc = at_threshold(dist, rel, cur)
+    pp, rp, fp_ = at_threshold(dist, rel, prop)
+    pb, rb, fb = at_threshold(dist, rel, best_tau)
+    print(f"embedder: {embedder}   candidates: {len(dist)}  relevant: {int(rel.sum())}  ROC-AUC: {auc:.3f}")
+    print(f"  current  τ={cur:.2f} : precision {pc:.3f}  recall {rc:.3f}  F1 {fc:.3f}")
+    print(f"  proposed τ={prop:.2f} : precision {pp:.3f}  recall {rp:.3f}  F1 {fp_:.3f}   ← shipped")
+    print(f"  best-F1  τ={best_tau:.2f} : precision {pb:.3f}  recall {rb:.3f}  F1 {fb:.3f}")
+    print(f"wrote {out}")
+
+
+if __name__ == "__main__":
+    main()