dakl · dakl · Jun 23, 2026 · Jun 23, 2026 · Jun 23, 2026 · Jun 23, 2026
diff --git a/.gitignore b/.gitignore
@@ -36,5 +36,12 @@ memory-slop.md
 # Icon-design scratch renders (Nano Banana ideation output)
 docs/design/renders/
 
+# Regenerable eval plot artifacts — the per-run records under eval/runs/ ARE
+# committed (comparable history); these are scratch from `--dump-scores` + the
+# ROC plotter (scripts/plot_threshold.py).
+eval/scores-*.json
+eval/thresholds-*.json
+eval/threshold.png
+
 # LLVM profiling output
 *.profraw
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -7,8 +7,8 @@ and recall content. See `README.md` for architecture and build instructions.
 
 - `Sources/EngramCore` — domain models, store, embeddings, ranking (the brains)
 - `Sources/EngramCore/RecallGate.swift` — the recall confidence gate: decides which fetched memories are confident enough to inject on a prompt. Shared by the hook and the eval; presets `.current`/`.proposed`. `RecallText.swift` is the shared tokenizer (stopwords + lexical token overlap) used by both the gate and FTS.
-- `Sources/EngramCore/RetrievalMetrics.swift` — pure retrieval-quality metrics (Recall@k, MRR, gate precision/recall, negative false-positive rate, injection precision) over labeled `QueryOutcome`s.
-- `Sources/engram-eval` — offline retrieval eval harness (`swift run engram-eval`): seeds a temp store from `Resources/corpus.json` + `queries.json`, runs each prompt through `fetch`, applies `RecallGate` configs, and prints a current-vs-tightened comparison (ADR 0021). `--distances` dumps per-kind distance separability; `--record` appends a per-run JSON file (git sha + embedder signature + host + metrics) under `eval/runs/`. Numbers are embedder/machine-dependent — it's a relative A/B, not a benchmark.
+- `Sources/EngramCore/RetrievalMetrics.swift` — pure retrieval-quality metrics (Recall@k, MRR, gate precision/recall, negative false-positive rate, injection precision) over labeled `QueryOutcome`s, plus the session-aware `SessionInjectionReport` (`evaluateSessions` / `firstTouchCoverage`) for the recall cooldown (ADR 0023).
+- `Sources/engram-eval` — offline retrieval eval harness (`swift run engram-eval`): seeds a temp store from `Resources/corpus.json` + `queries.json`, runs each prompt through `fetch`, applies `RecallGate` configs, and prints a current-vs-tightened comparison (ADR 0021). It then replays `Resources/sessions.json` (ordered on-topic prompt sequences) through the gate + the real session cooldown and prints the **session-aware injection** metric — redundant re-injection rate with vs without the cooldown, plus first-touch coverage (ADR 0023). `--distances` dumps per-kind distance separability; `--record` appends a per-run JSON file (git sha + embedder signature + host + metrics, incl. the `sessions` block) under `eval/runs/`. Numbers are embedder/machine-dependent — it's a relative A/B, not a benchmark.
 - `Sources/engram` — the `engram` CLI (store / fetch / stats / activity / hook)
 - `Sources/CSQLite` — vendored SQLite + sqlite-vec (static C target)
 - `Sources/engram/Setup.swift` — install logic (`engram install` / `engram setup`); the single source of truth for installing the CLI, hook, and skills. `engram install` symlinks `/usr/local/bin/engram` → the running binary

diff --git a/README.md b/README.md
@@ -139,7 +139,10 @@ per embedder — its distance thresholds are tuned to the live model's scale via
 the offline eval (`swift run engram-eval`), ADR 0021.
 Off-topic prompts inject nothing — it exits 0 silently, so it can't block or
 spam. (It does record a *retrieval-activity* row — see below — which is
-decoupled from ranking, ADR 0015.)
+decoupled from ranking, ADR 0015.) A **session-scoped cooldown** (ADR 0023) then
+drops any memory already injected via recall earlier in the same session (within
+30 min), so the same note doesn't re-appear on every on-topic prompt — keyed off
+the `session_id` now carried on each retrieval row.
 
 The same hook also appends a **reflection nudge** every 5th prompt of a session
 (tracked by a tiny per-session counter sidecar'd next to the store): a soft

diff --git a/Sources/EngramCore/MemoryStore.swift b/Sources/EngramCore/MemoryStore.swift
@@ -47,10 +47,30 @@ public actor MemoryStore {
     private static func migrate(db: SQLiteDatabase, embedder: Embedder, databaseURL: URL) throws {
         try createSchema(db)
         try addMissingColumns(db)
+        try addMissingRetrievalColumns(db)
         try migrateVectorStore(db: db, embedder: embedder, databaseURL: databaseURL)
         try backfillFTS(db)
     }
 
+    /// Additively adds the `session_id` column the retrievals ledger grew for the
+    /// session-scoped recall cooldown (ADR 0023). No-op on an already-migrated DB.
+    private static func addMissingRetrievalColumns(_ db: SQLiteDatabase) throws {
+        var existingColumns = Set<String>()
+        try db.prepare("PRAGMA table_info(retrievals);") { stmt in
+            while try stmt.step() {
+                if let name = stmt.columnText(1) { existingColumns.insert(name) }
+            }
+        }
+        if !existingColumns.contains("session_id") {
+            try db.exec("ALTER TABLE retrievals ADD COLUMN session_id TEXT;")
+        }
+        // Create the index here (not in createSchema) so it's only built once the
+        // session_id column is guaranteed to exist — on a fresh DB and on an
+        // upgraded one alike. Referencing it in createSchema fails on old DBs
+        // whose retrievals table predates the column.
+        try db.exec("CREATE INDEX IF NOT EXISTS idx_retrievals_session ON retrievals(session_id, memory_id, at);")
+    }
+
     private static func createSchema(_ db: SQLiteDatabase) throws {
         try db.exec(
             """
@@ -81,7 +101,8 @@ public actor MemoryStore {
                 memory_id TEXT NOT NULL,
                 source TEXT NOT NULL,
                 query TEXT,
-                at REAL NOT NULL
+                at REAL NOT NULL,
+                session_id TEXT
             );
             CREATE INDEX IF NOT EXISTS idx_retrievals_at ON retrievals(at);
             CREATE INDEX IF NOT EXISTS idx_memories_deleted ON memories(deleted_at);
@@ -878,15 +899,16 @@ public actor MemoryStore {
     /// `query` that surfaced them. One row per id, single timestamp, in a
     /// transaction. Deliberately does **not** touch `access_count` — this ledger
     /// is decoupled from ranking (ADR 0015 preserves ADR 0005's loop-break).
-    public func recordRetrieval(memoryIDs: [UUID], source: RetrievalSource, query: String? = nil) throws {
+    public func recordRetrieval(memoryIDs: [UUID], source: RetrievalSource, query: String? = nil, sessionID: String? = nil) throws {
         guard !memoryIDs.isEmpty else { return }
         let now = Date().timeIntervalSince1970
         let trimmedQuery = query.map { String($0.prefix(Self.maxRetrievalQueryLength)) }
         try db.exec("BEGIN;")
         do {
             for id in memoryIDs {
-                try db.prepare("INSERT INTO retrievals(memory_id, source, query, at) VALUES(?, ?, ?, ?);") { stmt in
-                    stmt.bind(id.uuidString, at: 1).bind(source.rawValue, at: 2).bind(trimmedQuery, at: 3).bind(now, at: 4)
+                try db.prepare("INSERT INTO retrievals(memory_id, source, query, at, session_id) VALUES(?, ?, ?, ?, ?);") { stmt in
+                    stmt.bind(id.uuidString, at: 1).bind(source.rawValue, at: 2).bind(trimmedQuery, at: 3)
+                        .bind(now, at: 4).bind(sessionID, at: 5)
                     _ = try stmt.step()
                 }
             }
@@ -897,6 +919,35 @@ public actor MemoryStore {
         }
     }
 
+    /// Memories already injected via `recall` in this session within `cooldown`
+    /// (ADR 0023). The recall hook drops these post-gate so the same memory isn't
+    /// re-injected on every on-topic prompt of a session. Returns an empty set for
+    /// an empty `sessionID` (e.g. a manual `fetch` with no session) so nothing is
+    /// ever suppressed outside a real session.
+    public func recentlyInjectedInSession(_ memoryIDs: [UUID], sessionID: String, within cooldown: TimeInterval) throws -> Set<UUID> {
+        guard !sessionID.isEmpty, !memoryIDs.isEmpty else { return [] }
+        let cutoff = Date().timeIntervalSince1970 - cooldown
+        var suppressed = Set<UUID>()
+        let placeholders = memoryIDs.map { _ in "?" }.joined(separator: ",")
+        let sql = """
+            SELECT DISTINCT memory_id FROM retrievals
+            WHERE session_id = ? AND source = ? AND at >= ? AND memory_id IN (\(placeholders));
+            """
+        try db.prepare(sql) { stmt in
+            stmt.bind(sessionID, at: 1).bind(RetrievalSource.recall.rawValue, at: 2).bind(cutoff, at: 3)
+            for (offset, id) in memoryIDs.enumerated() { stmt.bind(id.uuidString, at: Int32(4 + offset)) }
+            while try stmt.step() {
+                if let text = stmt.columnText(0), let id = UUID(uuidString: text) { suppressed.insert(id) }
+            }
+        }
+        return suppressed
+    }
+
+    /// Cooldown for re-injecting the same memory via recall within one session
+    /// (ADR 0023). 30 minutes: short on-topic sessions show a memory once; a long
+    /// session gets at most a periodic refresh rather than the same note every prompt.
+    public static let recallReinjectionCooldown: TimeInterval = 30 * 60
+
     /// Retrieval-activity rows from `since` onward, newest first, optionally
     /// filtered to one `source`. Powers `engram activity` and the Activity view.
     public func retrievals(since: Date, source: RetrievalSource? = nil, limit: Int = 500) throws -> [RetrievalEvent] {

diff --git a/Sources/EngramCore/RecallGate.swift b/Sources/EngramCore/RecallGate.swift
@@ -54,13 +54,19 @@ public struct RecallGateConfig: Sendable, Equatable {
     /// kill the single-keyword leak. The per-query relevance floor and median gate
     /// were dropped — measurement showed neither separates on- from off-topic.
     ///
-    /// ⚠️ `maxDistance` is embedder-specific. 0.10 fits the contextual model; the
-    /// fallback `word-512` embedder lives on a different scale. Before shipping to
-    /// the hook this should become embedder-relative rather than a constant.
+    /// Retuned 0.10 → **0.09** (ADR 0021 addendum): on the eval that drops the
+    /// negative false-positive rate 13% → 0% with *unchanged* gate recall (the
+    /// lexical leg holds recall; the distance leg only sheds off-topic injections).
+    /// Engram's recall is precision-first — it runs on every prompt, so a false
+    /// positive bloats context repeatedly while a miss is recoverable (it re-fires
+    /// next prompt, or `/recall`). Tightening past 0.09 finally costs gate recall.
+    ///
+    /// ⚠️ `maxDistance` is embedder-specific. 0.09 fits the contextual model; the
+    /// fallback `word-512` embedder lives on a different scale (it keeps `.current`).
     public static let proposed = RecallGateConfig(
         topK: 3,
         minRelevance: 0,
-        maxDistance: 0.10,
+        maxDistance: 0.09,
         minLexicalTokenHits: 2,
         requireDistanceBelowMedian: false
     )

diff --git a/Sources/EngramCore/RetrievalMetrics.swift b/Sources/EngramCore/RetrievalMetrics.swift
@@ -121,3 +121,62 @@ public enum RetrievalMetrics {
         values.isEmpty ? 0 : values.reduce(0, +) / Double(values.count)
     }
 }
+
+/// Session-aware injection metric (ADR 0023). The per-query metrics above can't
+/// see *repetition across a session* — the same memory re-injected on prompt
+/// after prompt. This summarizes that over ordered prompt sequences.
+public struct SessionInjectionReport: Sendable, Codable {
+    public let sessionCount: Int
+    public let promptCount: Int
+    /// Total memories injected across every prompt of every session.
+    public let totalInjections: Int
+    /// Injections of a memory *beyond the first* within the same session — pure
+    /// repetition. The number the session cooldown is meant to drive toward zero.
+    public let redundantInjections: Int
+    /// `redundantInjections / totalInjections` — 0 means every injection was a
+    /// memory's first appearance in its session.
+    public let redundantRate: Double
+    public let meanInjectionsPerSession: Double
+}
+
+extension RetrievalMetrics {
+    /// Evaluate session injection behavior. Input: for each session, the ordered
+    /// per-prompt lists of injected memory ids. "Redundant" counts any id seen
+    /// earlier in the *same* session.
+    public static func evaluateSessions(_ sessions: [[[UUID]]]) -> SessionInjectionReport {
+        var total = 0
+        var redundant = 0
+        var promptCount = 0
+        for session in sessions {
+            var seen = Set<UUID>()
+            for prompt in session {
+                promptCount += 1
+                for id in prompt {
+                    total += 1
+                    if !seen.insert(id).inserted { redundant += 1 }
+                }
+            }
+        }
+        return SessionInjectionReport(
+            sessionCount: sessions.count,
+            promptCount: promptCount,
+            totalInjections: total,
+            redundantInjections: redundant,
+            redundantRate: total == 0 ? 0 : Double(redundant) / Double(total),
+            meanInjectionsPerSession: sessions.isEmpty ? 0 : Double(total) / Double(sessions.count)
+        )
+    }
+
+    /// Fraction of memories that, injected at least once *without* the cooldown,
+    /// are still injected at least once *with* it — across the same sessions.
+    /// Must stay 1.0: the cooldown removes repeats, never a memory's only/first hit.
+    public static func firstTouchCoverage(withoutCooldown: [[[UUID]]], withCooldown: [[[UUID]]]) -> Double {
+        let distinct: ([[[UUID]]]) -> Set<UUID> = { sessions in
+            Set(sessions.flatMap { $0.flatMap { $0 } })
+        }
+        let base = distinct(withoutCooldown)
+        guard !base.isEmpty else { return 1 }
+        let kept = distinct(withCooldown).intersection(base)
+        return Double(kept.count) / Double(base.count)
+    }
+}
diff --git a/Sources/engram-eval/Resources/sessions.json b/Sources/engram-eval/Resources/sessions.json
@@ -0,0 +1,37 @@
+{
+  "_comment": "Ordered prompt sequences for the session-aware injection metric (ADR 0023). Each session stays on one topic, so the same memories keep clearing the gate prompt after prompt — exactly the repetition the session cooldown should damp. Prompts are not labeled; the metric measures re-injection of the same memory within a session, not correctness (that's the per-query eval).",
+  "sessions": [
+    {
+      "name": "engram-internals",
+      "prompts": [
+        "how does engram's recall hook decide what to inject on a prompt?",
+        "what does the recall confidence gate actually do?",
+        "walk me through engram's recall flow end to end",
+        "how does engram keep off-topic memories from being injected?",
+        "what embedding model does engram use to embed memories?",
+        "how does engram combine keyword and semantic search?"
+      ]
+    },
+    {
+      "name": "python-stack",
+      "prompts": [
+        "what's the standard python project setup here?",
+        "how do I manage python dependencies and virtualenvs?",
+        "which tools lint and format our python code?",
+        "how do we type-check python code?",
+        "which test framework do python projects use?",
+        "remind me of the python conventions we follow"
+      ]
+    },
+    {
+      "name": "gcp-deploy",
+      "prompts": [
+        "how are services deployed to kubernetes?",
+        "which gcp project do production services run in?",
+        "how does authentication to gcp work for our services?",
+        "how do I access the kubernetes cluster and gcp infra?",
+        "how are application secrets managed?"
+      ]
+    }
+  ]
+}