lukefwalton · lukefwalton · Jun 16, 2026 · Jun 16, 2026
diff --git a/demo/README.md b/demo/README.md
@@ -60,15 +60,20 @@ Two facts make int8 admissible, and they differ in kind (the §6 split):
 - **Measured, by the suite.** Integer rounding perturbs direction and can
   reorder near-ties, so its harmlessness is not proven; it is verified. The
   harness reports rank correlation against the full-precision ranking, then runs
-  the gold suite. Rank correlation is *necessary, not sufficient*: a demo that
-  reports it and stops has shown a retrieval benchmark, not answerability
-  governing tuning. The gold suite is the actual adjudicator, and it checks not
+  the gold suite. Rank correlation is a *diagnostic*, not the gate: it measures
+  how much the ranking moved, but a demo that reports it and stops has shown a
+  retrieval benchmark, not answerability governing tuning. The gold suite is the
+  actual adjudicator, and it checks not
   just that the expected source is *retrieved* but that it *wins the top slot*:
   so a quantization flip that swaps which Smith ranks first (disambiguation) or
   lets a public record overtake the private note (route) is caught keyless, not
   only by the keyed answer pass. Past int8 (int4, PQ, binary) the exact part
   stops applying and the whole lever is measured; the wire format is versioned
-  so a code/data mismatch fails loudly.
+  so a code/data mismatch fails loudly. One scope note: int4 here is exercised
+  as *precision loss* to demonstrate the catch — codes still occupy an
+  `Int8Array`, nothing is nibble-packed — so the byte-size saving of low-bit
+  encodings is a production property (`docs/production-scaling.md` §2), not what
+  this gate measures.
 
 The headline run is **keyless**: it reads committed full-precision vectors and
 committed gold-query vectors, so no embedding call is made. A key is needed only

diff --git a/demo/harness.ts b/demo/harness.ts
@@ -4,10 +4,11 @@
 // (src/evaluate.ts) untouched: the int8 path is an encode/decode wrapper plus a
 // re-rank, never a second pipeline. Given full-precision index entries and a
 // quantization bit width, it builds the lossy index, re-ranks each gold query
-// against it, and reports the two things the paper distinguishes: rank
-// correlation against the full-precision ranking (necessary), and the gold
-// suite's verdicts including refuse and route (sufficient). Rank correlation
-// alone is a retrieval benchmark; the suite is the actual adjudicator.
+// against it, and reports two things: rank correlation against the
+// full-precision ranking (a diagnostic for how much the ranking moved), and the
+// gold suite's verdicts including refuse and route (the adjudicator). Rank
+// correlation gates nothing here — it is a retrieval benchmark; the gold suite
+// decides.
 
 import { cosine, retrieve } from '../src/retrieve.js';
 import type { RetrievalResult } from '../src/retrieve.js';
@@ -133,8 +134,19 @@ export function evaluateQuery(
   // the top slot, not merely appear. Refusals (not-found) carry no expected
   // source; the floor and forbidSources adjudicate them via judgeRetrieval.
   let topSlot: QueryGateResult['topSlot'];
-  if (gold.expectAnswerMode !== 'not-found' && gold.expectSources && gold.expectSources[0]) {
-    const expected = gold.expectSources[0];
+  if (gold.expectAnswerMode !== 'not-found') {
+    // The top-slot contract, made loud. The gate guards expectSources[0] only —
+    // that single source is the required top-slot winner — so a non-refusal case
+    // with two entries (which one must rank #1?) or none would let a flip past
+    // silently. Enforce exactly one rather than depend on the gold author
+    // happening to list one. Refusals name no source and never reach here.
+    if (gold.expectSources?.length !== 1) {
+      throw new Error(
+        `demo gold '${gold.id}': a non-refusal case must list exactly one expectSources ` +
+          `entry (the required top-slot winner); got ${gold.expectSources?.length ?? 0}.`,
+      );
+    }
+    const expected = gold.expectSources[0]!;
     const winner = topSource(hits);
     topSlot = { expected, winner: winner?.id ?? null, won: winner?.id === expected };
   }

diff --git a/demo/quantize.test.ts b/demo/quantize.test.ts
@@ -196,3 +196,20 @@ test('evaluateQuery: a refuse case with nothing above the floor stays not-found'
   const res = evaluateQuery(gold, index, requantizeIndex(index, 8), Q);
   assert.equal(res.pass, true, 'fillers stay below the floor, so nothing is forbidden-surfaced');
 });
+
+test('the top-slot contract: a non-refusal case must name exactly one expected source', () => {
+  // The gate guards expectSources[0] as the required top-slot winner. Two entries
+  // (which must rank #1?) or none would let a flip past silently, so the harness
+  // refuses to evaluate them rather than guess. Refusals are exempt.
+  const index = [recordEntry('work:a', VR), noteEntry('note:b', VN)];
+  const qIndex = requantizeIndex(index, 8);
+
+  const twoSources: GoldQuery = { id: 'two', query: 'q', expectAnswerMode: 'partial', expectSources: ['work:a', 'note:b'] };
+  assert.throws(() => evaluateQuery(twoSources, index, qIndex, Q), /exactly one expectSources/);
+
+  const noSource: GoldQuery = { id: 'none', query: 'q', expectAnswerMode: 'partial' };
+  assert.throws(() => evaluateQuery(noSource, index, qIndex, Q), /exactly one expectSources/);
+
+  const refusal: GoldQuery = { id: 'refuse', query: 'q', expectAnswerMode: 'not-found' };
+  assert.doesNotThrow(() => evaluateQuery(refusal, index, qIndex, Q));
+});
diff --git a/demo/quantize.ts b/demo/quantize.ts
@@ -13,6 +13,11 @@
 // reorder near-ties, so its harmlessness is not proven but measured against the
 // gold suite. int8 holds on the real corpus; int4 is the scalpel that makes the
 // gate say no.
+//
+// Scope: int4 is modeled as precision loss only — codes still occupy an
+// Int8Array, never nibble-packed — because this gate measures ranking error,
+// not storage size. The byte-size win of low-bit encodings is a production
+// property (docs/production-scaling.md §2), not what this demo proves.
 
 export interface QuantizedVector {
   /** Signed integer codes, one per dimension, each in [-level, level]. */