WXYC · jakebromberg · Jun 5, 2026 · Jun 5, 2026 · Jun 5, 2026
diff --git a/core/prompts.py b/core/prompts.py
@@ -66,11 +66,46 @@
     When in doubt, return null.
   - confidence: "high" if the row is clearly legible, "medium" if you had to
     guess one or two characters, "low" if mostly illegible
-  - notes: null in the common case. Use one of these tags only when relevant:
-      * "continuation" — this row is a wrap of the previous row, not its own entry
-      * "double_height" — handwriting takes up two physical rows of the grid
-      * "crossed_out" — the entry is struck through
-      * "illegible" — you could not read enough to attempt a transcription
+  - notes: null in the common case (>90% of rows). Use one of these tags
+    ONLY when the visual cue is unambiguous. Read each definition before
+    choosing a tag.
+
+      * "double_height" — ONE handwritten entry that visually spans two
+        printed grid rows. The DJ wrote a single song's text but the
+        handwriting is tall, or the artist/track text flows from the upper
+        printed line down into the lower printed line as one continuous
+        piece of writing. Emit a SINGLE Entry on the UPPER printed grid
+        row, with raw_text containing the whole song's text, and tag it
+        "double_height". Do NOT also emit a separate Entry on the lower
+        printed grid row for the same handwritten content — that produces
+        a phantom duplicate row. Example: the artist "Buffy Sainte-Marie"
+        is written on the printed row N and the track "God is Alive,
+        Magic is Afoot" continues on printed row N+1 as one piece of
+        handwriting → emit one Entry with row_index=N, raw_text="Buffy
+        Sainte-Marie - God is Alive, Magic is Afoot", notes="double_height".
+
+      * "continuation" — RARE in this corpus. Use ONLY when the lower
+        printed grid row contains a SEPARATE handwritten fragment that
+        completes a song started on the row above (different ink stroke,
+        an explicit arrow or bracket carrying the text down, or the DJ
+        clearly ran out of room and re-wrote the tail of the entry below).
+        Emit it as its own Entry on the lower grid row, with the
+        continuation fragment as raw_text and notes="continuation".
+        If you find yourself wanting to use "continuation" because the
+        handwriting itself is tall and flows visually across two rows,
+        use "double_height" instead — that is the same-entry case.
+
+      * "crossed_out" — a clear horizontal line (or angry scribble) drawn
+        THROUGH the artist/track text, indicating the DJ retracted the
+        entry. Tag this ONLY when the strike-through crosses the song
+        text itself. Margin doodles next to a row, asterisks, arrows,
+        underlines drawn UNDER the text, a mark in the left-margin type
+        column, or a scratch through ONLY the left-margin circle are NOT
+        crossed_out — capture those as entry-level oddities instead.
+        When in doubt, leave notes=null.
+
+      * "illegible" — you could not read enough to attempt a transcription.
+        Set confidence="low" as well.
 
 Always return EXACTLY FOUR quadrants in this fixed order:
   1. top_left  2. top_right  3. bottom_left  4. bottom_right
@@ -172,11 +207,46 @@
     When in doubt, return null.
   - confidence: "high" if the row is clearly legible, "medium" if you had to
     guess one or two characters, "low" if mostly illegible
-  - notes: null in the common case. Use one of these tags only when relevant:
-      * "continuation" — this row is a wrap of the previous row, not its own entry
-      * "double_height" — handwriting takes up two physical rows of the grid
-      * "crossed_out" — the entry is struck through
-      * "illegible" — you could not read enough to attempt a transcription
+  - notes: null in the common case (>90% of rows). Use one of these tags
+    ONLY when the visual cue is unambiguous. Read each definition before
+    choosing a tag.
+
+      * "double_height" — ONE handwritten entry that visually spans two
+        printed grid rows. The DJ wrote a single song's text but the
+        handwriting is tall, or the artist/track text flows from the upper
+        printed line down into the lower printed line as one continuous
+        piece of writing. Emit a SINGLE Entry on the UPPER printed grid
+        row, with raw_text containing the whole song's text, and tag it
+        "double_height". Do NOT also emit a separate Entry on the lower
+        printed grid row for the same handwritten content — that produces
+        a phantom duplicate row. Example: the artist "Buffy Sainte-Marie"
+        is written on the printed row N and the track "God is Alive,
+        Magic is Afoot" continues on printed row N+1 as one piece of
+        handwriting → emit one Entry with row_index=N, raw_text="Buffy
+        Sainte-Marie - God is Alive, Magic is Afoot", notes="double_height".
+
+      * "continuation" — RARE in this corpus. Use ONLY when the lower
+        printed grid row contains a SEPARATE handwritten fragment that
+        completes a song started on the row above (different ink stroke,
+        an explicit arrow or bracket carrying the text down, or the DJ
+        clearly ran out of room and re-wrote the tail of the entry below).
+        Emit it as its own Entry on the lower grid row, with the
+        continuation fragment as raw_text and notes="continuation".
+        If you find yourself wanting to use "continuation" because the
+        handwriting itself is tall and flows visually across two rows,
+        use "double_height" instead — that is the same-entry case.
+
+      * "crossed_out" — a clear horizontal line (or angry scribble) drawn
+        THROUGH the artist/track text, indicating the DJ retracted the
+        entry. Tag this ONLY when the strike-through crosses the song
+        text itself. Margin doodles next to a row, asterisks, arrows,
+        underlines drawn UNDER the text, a mark in the left-margin type
+        column, or a scratch through ONLY the left-margin circle are NOT
+        crossed_out — capture those as entry-level oddities instead.
+        When in doubt, leave notes=null.
+
+      * "illegible" — you could not read enough to attempt a transcription.
+        Set confidence="low" as well.
 
 For the quadrant itself, capture:
   - position: must be exactly "{position}" (the cell this crop came from)

diff --git a/scripts/_revalidate_notes_2026_06_04.py b/scripts/_revalidate_notes_2026_06_04.py
@@ -0,0 +1,64 @@
+"""One-shot script: re-extract a small sample of pages against current Gemini
+and dump the raw GeminiPageResult JSON next to the verified files for diff.
+
+Not a permanent CLI command — this is a measurement for issue #61.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import os
+from pathlib import Path
+
+from dotenv import load_dotenv
+
+from core.gemini import GeminiClient, MediaResolution
+
+PAGES = [2, 6, 9, 14, 16, 19]
+PAGES_DIR = Path("data/pages/1990/April 1990/1990-04apr0106")
+OUT_DIR = Path("data/notes-revalidation-2026-06-04")
+
+
+async def main() -> None:
+    load_dotenv(override=False)
+    api_key = os.environ["GEMINI_API_KEY"]
+    model = os.environ.get("GEMINI_MODEL", "gemini-3.1-pro-preview")
+    media_resolution = MediaResolution.from_string(
+        os.environ.get("GEMINI_MEDIA_RESOLUTION", "high")
+    )
+
+    from google import genai
+
+    client = GeminiClient(
+        sdk=genai.Client(api_key=api_key),
+        model=model,
+        media_resolution=media_resolution,
+    )
+    # Try to cache; harmless if it fails.
+    cached = await client.create_cache()
+    print(f"model={model}  cache={'on' if cached else 'off'}")
+
+    OUT_DIR.mkdir(parents=True, exist_ok=True)
+
+    async def run_one(p: int) -> tuple[int, dict | str]:
+        img = PAGES_DIR / f"page-{p:02d}.png"
+        try:
+            result = await client.extract_page(img)
+            return p, result.model_dump(mode="json")
+        except Exception as exc:  # noqa: BLE001
+            return p, f"ERROR: {type(exc).__name__}: {exc}"
+
+    results = await asyncio.gather(*(run_one(p) for p in PAGES))
+    for p, payload in results:
+        out = OUT_DIR / f"1990-04apr0106-page{p:02d}.fresh.json"
+        if isinstance(payload, str):
+            print(f"page {p:02d}: {payload}")
+            out.write_text(json.dumps({"error": payload}, indent=2))
+        else:
+            out.write_text(json.dumps(payload, indent=2))
+            print(f"page {p:02d}: wrote {out}")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/scripts/_revalidate_notes_metrics.py b/scripts/_revalidate_notes_metrics.py
@@ -0,0 +1,155 @@
+"""Compute per-page and aggregate metrics for the notes revalidation."""
+
+from __future__ import annotations
+
+import json
+from collections import Counter
+from pathlib import Path
+
+PAGES = [2, 6, 9, 14, 16, 19]
+VERIFIED_DIR = Path("data/verifier-pulled-refresh")
+FRESH_DIR = Path("data/notes-revalidation-2026-06-04")
+
+
+def index_notes(page: dict) -> dict[tuple[str, int], str | None]:
+    out: dict[tuple[str, int], str | None] = {}
+    for q in page.get("quadrants", []):
+        pos = q.get("position")
+        for e in q.get("entries", []):
+            ri = e.get("row_index")
+            out[(pos, ri)] = e.get("notes")
+    return out
+
+
+def main() -> None:
+    rows: list[dict] = []
+    agg_emit = Counter()  # fresh-Gemini notes value -> count
+    agg_truth = Counter()  # Alex notes value -> count
+    agg_keep = Counter()  # for each fresh-emitted tag, # where Alex agrees
+    agg_recall_hit = Counter()  # for each Alex-truth tag, # where fresh-Gemini matches
+    agg_unmatched_fresh = 0
+    agg_unmatched_truth = 0
+
+    for p in PAGES:
+        verified = json.loads(
+            (VERIFIED_DIR / f"1990-04apr0106-page{p:02d}.verified.json").read_text()
+        )
+        fresh = json.loads((FRESH_DIR / f"1990-04apr0106-page{p:02d}.fresh.json").read_text())
+
+        truth_notes = index_notes(verified)
+        fresh_notes = index_notes(fresh)
+
+        # Per-page counters
+        emit = Counter()
+        truth = Counter()
+        keep = Counter()
+        recall_hit = Counter()
+        unmatched_fresh = 0
+        unmatched_truth = 0
+
+        all_keys = set(truth_notes) | set(fresh_notes)
+        for k in all_keys:
+            t = truth_notes.get(k)  # may be None or missing-entry
+            f = fresh_notes.get(k)
+            if k not in truth_notes:
+                # fresh emitted an entry Alex doesn't have at that position
+                if f is not None:
+                    unmatched_fresh += 1
+                continue
+            if k not in fresh_notes:
+                if t is not None:
+                    unmatched_truth += 1
+                continue
+            if f is not None:
+                emit[f] += 1
+            if t is not None:
+                truth[t] += 1
+            if f is not None and t is not None and f == t:
+                keep[f] += 1
+                recall_hit[t] += 1
+
+        rows.append(
+            {
+                "page": p,
+                "emit": dict(emit),
+                "truth": dict(truth),
+                "keep": dict(keep),
+                "recall_hit": dict(recall_hit),
+                "unmatched_fresh_rows": unmatched_fresh,
+                "unmatched_truth_rows": unmatched_truth,
+            }
+        )
+
+        for k, v in emit.items():
+            agg_emit[k] += v
+        for k, v in truth.items():
+            agg_truth[k] += v
+        for k, v in keep.items():
+            agg_keep[k] += v
+        for k, v in recall_hit.items():
+            agg_recall_hit[k] += v
+        agg_unmatched_fresh += unmatched_fresh
+        agg_unmatched_truth += unmatched_truth
+
+    print("== Per-page (fresh emits / Alex truth) ==")
+    tags = ["crossed_out", "continuation", "double_height", "illegible", "other"]
+    header = f"{'page':>4}  " + "  ".join(f"{t[:5]:>11}" for t in tags) + "  unmatched(F/T)"
+    print(header)
+    for r in rows:
+        cells = []
+        for t in tags:
+            e = r["emit"].get(t, 0)
+            tr = r["truth"].get(t, 0)
+            cells.append(f"{e:>4}/{tr:<5}")
+        print(
+            f"{r['page']:>4}  "
+            + "  ".join(cells)
+            + f"  {r['unmatched_fresh_rows']}/{r['unmatched_truth_rows']}"
+        )
+
+    print()
+    print("== Aggregate ==")
+    print(f"fresh-Gemini emit counts: {dict(agg_emit)}")
+    print(f"Alex truth counts:        {dict(agg_truth)}")
+    print(f"matched (fresh==truth):   {dict(agg_keep)}")
+    print(f"truth rows recalled:      {dict(agg_recall_hit)}")
+    print(f"unmatched fresh entries (row Alex doesn't have): {agg_unmatched_fresh}")
+    print(f"unmatched truth entries (row fresh-Gemini lacks): {agg_unmatched_truth}")
+
+    print()
+    print("== Headline metrics ==")
+    # crossed_out precision: of fresh-Gemini's crossed_out emits, how many does Alex keep as crossed_out
+    co_emit = agg_emit.get("crossed_out", 0)
+    co_keep = agg_keep.get("crossed_out", 0)
+    co_prec = (co_keep / co_emit) if co_emit else None
+    print(
+        f"crossed_out precision  = {co_keep}/{co_emit} = {co_prec if co_prec is None else f'{co_prec * 100:.0f}%'}"
+    )
+
+    # continuation recall: of Alex's continuation, how many fresh-Gemini caught
+    co_truth = agg_truth.get("continuation", 0)
+    co_hit = agg_recall_hit.get("continuation", 0)
+    print(
+        f"continuation recall    = {co_hit}/{co_truth} = {(co_hit / co_truth * 100):.0f}%"
+        if co_truth
+        else "continuation recall    = N/A"
+    )
+
+    # double_height recall
+    dh_truth = agg_truth.get("double_height", 0)
+    dh_hit = agg_recall_hit.get("double_height", 0)
+    print(
+        f"double_height recall   = {dh_hit}/{dh_truth} = {(dh_hit / dh_truth * 100):.0f}%"
+        if dh_truth
+        else "double_height recall   = N/A"
+    )
+
+    # illegible recall (bonus)
+    il_truth = agg_truth.get("illegible", 0)
+    il_hit = agg_recall_hit.get("illegible", 0)
+    if il_truth:
+        print(f"illegible recall       = {il_hit}/{il_truth} = {(il_hit / il_truth * 100):.0f}%")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/_revalidate_notes_newprompt.py b/scripts/_revalidate_notes_newprompt.py
@@ -0,0 +1,65 @@
+"""Re-extract the same 6-page sample as scripts/_revalidate_notes_2026_06_04.py
+but against whatever core/prompts.py is currently on disk. Dumps results to
+data/notes-revalidation-newprompt/ so the baseline measurement
+(data/notes-revalidation-2026-06-04/) is preserved for diff.
+
+One-shot measurement for issue #61. Run from repo root.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import os
+from pathlib import Path
+
+from dotenv import load_dotenv
+
+from core.gemini import GeminiClient, MediaResolution
+
+PAGES = [2, 6, 9, 14, 16, 19]
+PAGES_DIR = Path("data/pages/1990/April 1990/1990-04apr0106")
+OUT_DIR = Path("data/notes-revalidation-newprompt")
+
+
+async def main() -> None:
+    load_dotenv(override=False)
+    api_key = os.environ["GEMINI_API_KEY"]
+    model = os.environ.get("GEMINI_MODEL", "gemini-3.1-pro-preview")
+    media_resolution = MediaResolution.from_string(
+        os.environ.get("GEMINI_MEDIA_RESOLUTION", "high")
+    )
+
+    from google import genai
+
+    client = GeminiClient(
+        sdk=genai.Client(api_key=api_key),
+        model=model,
+        media_resolution=media_resolution,
+    )
+    cached = await client.create_cache()
+    print(f"model={model}  cache={'on' if cached else 'off'}")
+
+    OUT_DIR.mkdir(parents=True, exist_ok=True)
+
+    async def run_one(p: int) -> tuple[int, dict | str]:
+        img = PAGES_DIR / f"page-{p:02d}.png"
+        try:
+            result = await client.extract_page(img)
+            return p, result.model_dump(mode="json")
+        except Exception as exc:  # noqa: BLE001
+            return p, f"ERROR: {type(exc).__name__}: {exc}"
+
+    results = await asyncio.gather(*(run_one(p) for p in PAGES))
+    for p, payload in results:
+        out = OUT_DIR / f"1990-04apr0106-page{p:02d}.fresh.json"
+        if isinstance(payload, str):
+            print(f"page {p:02d}: {payload}")
+            out.write_text(json.dumps({"error": payload}, indent=2))
+        else:
+            out.write_text(json.dumps(payload, indent=2))
+            print(f"page {p:02d}: wrote {out}")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())