From 29911de2385ffc7c28524bfee84e68b9b4c86a6b Mon Sep 17 00:00:00 2001 From: aabdi Date: Mon, 22 Jun 2026 13:14:00 +0700 Subject: [PATCH 1/5] feat: [decoder-concept-page-001] - [Align the existing decoder record for canonical concept discovery] --- src/content/registry/concepts/decoder.json | 17 ++- .../registry/concepts/transformer.json | 1 + src/content/registry/models/gpt-3.json | 3 +- src/content/registry/papers/gpt-2-report.json | 3 +- .../decoder-discovery-registry.test.tsx | 129 ++++++++++++++++++ 5 files changed, 148 insertions(+), 5 deletions(-) create mode 100644 src/lib/content/decoder-discovery-registry.test.tsx diff --git a/src/content/registry/concepts/decoder.json b/src/content/registry/concepts/decoder.json index 3dcbf0db..d06805ed 100644 --- a/src/content/registry/concepts/decoder.json +++ b/src/content/registry/concepts/decoder.json @@ -4,17 +4,28 @@ "kind": "concept", "defaultTitleKey": "title", "defaultSummaryKey": "description", - "aliases": ["Decoder", "decoder block", "decoder stack", "decoding network"], + "aliases": [ + "Decoder", + "decoder block", + "decoder stack", + "decoder-only stack", + "decoding network" + ], "tags": ["foundations", "taxonomy"], "relatedIds": [ "concept.encoder", "concept.encoder-decoder", - "concept.autoregressive-generation" + "concept.autoregressive-generation", + "concept.decode", + "concept.transformer", + "module.causal-attention", + "model.gpt-3", + "paper.gpt-2-report" ], "citationIds": ["citation.attention-is-all-you-need", "citation.brown-gpt-3"], "status": "published", "createdAt": "2026-06-04T11:39:45.780Z", - "updatedAt": "2026-06-04T12:30:00.000Z", + "updatedAt": "2026-06-22T00:00:00.000Z", "conceptType": "architecture", "sidebarGrouping": { "glossary": "sequence-and-attention" diff --git a/src/content/registry/concepts/transformer.json b/src/content/registry/concepts/transformer.json index 15c5e23a..084b9a6b 100644 --- a/src/content/registry/concepts/transformer.json +++ b/src/content/registry/concepts/transformer.json @@ -8,6 +8,7 @@ "tags": ["taxonomy", "model-family"], "relatedIds": [ "concept.architecture", + "concept.decoder", "concept.encoder-decoder", "concept.autoregressive-generation", "concept.kv-cache", diff --git a/src/content/registry/models/gpt-3.json b/src/content/registry/models/gpt-3.json index e5ddaf42..61c4ab8e 100644 --- a/src/content/registry/models/gpt-3.json +++ b/src/content/registry/models/gpt-3.json @@ -7,6 +7,7 @@ "aliases": ["GPT-3", "Generative Pre-trained Transformer 3"], "tags": ["foundations", "model-family", "attention", "context-window"], "relatedIds": [ + "concept.decoder", "concept.tokenizers-overview", "concept.transformer-architecture", "concept.autoregressive-generation", @@ -19,7 +20,7 @@ "citationIds": ["citation.brown-gpt-3", "citation.kaplan-scaling-laws"], "status": "published", "createdAt": "2026-06-18T00:00:00.000Z", - "updatedAt": "2026-06-21T00:00:00.000Z", + "updatedAt": "2026-06-22T00:00:00.000Z", "authors": [ "Tom B. Brown", "Benjamin Mann", diff --git a/src/content/registry/papers/gpt-2-report.json b/src/content/registry/papers/gpt-2-report.json index 84d82413..bd8e2648 100644 --- a/src/content/registry/papers/gpt-2-report.json +++ b/src/content/registry/papers/gpt-2-report.json @@ -13,6 +13,7 @@ ], "tags": ["foundations", "model-family", "tokenization"], "relatedIds": [ + "concept.decoder", "module.byte-level-tokenization", "concept.transformer-architecture", "concept.scaling-law" @@ -20,7 +21,7 @@ "citationIds": ["citation.gpt-2-report"], "status": "published", "createdAt": "2026-06-20T00:00:00.000Z", - "updatedAt": "2026-06-20T00:00:00.000Z", + "updatedAt": "2026-06-22T00:00:00.000Z", "authors": [ "Alec Radford", "Jeffrey Wu", diff --git a/src/lib/content/decoder-discovery-registry.test.tsx b/src/lib/content/decoder-discovery-registry.test.tsx new file mode 100644 index 00000000..a790c653 --- /dev/null +++ b/src/lib/content/decoder-discovery-registry.test.tsx @@ -0,0 +1,129 @@ +import { describe, expect, test } from "bun:test"; +import { renderToStaticMarkup } from "react-dom/server"; +import { DerivedRelatedDocs } from "@/features/docs/components/DerivedRelatedDocs"; +import { RelatedDocs } from "@/features/docs/components/RelatedDocs"; +import { PUBLISHED_DOCS_REGISTRY_IDS } from "@/lib/content/published-docs-registry-ids"; +import { + getConceptById, + getModelById, + getPaperById, + listRelatedRegistryRecords, +} from "@/lib/content/registry-runtime"; +import { deriveCuratedRelatedItems } from "@/lib/content/related-docs"; + +describe("decoder discovery registry alignment", () => { + test("concept.decoder keeps canonical broad-concept aliases and discovery neighbors", () => { + const record = getConceptById("concept.decoder"); + + expect(record?.status).toBe("published"); + expect(record?.aliases).toEqual( + expect.arrayContaining([ + "Decoder", + "decoder block", + "decoder stack", + "decoder-only stack", + ]), + ); + expect(record?.tags).toEqual(["foundations", "taxonomy"]); + expect(record?.relatedIds).toEqual([ + "concept.encoder", + "concept.encoder-decoder", + "concept.autoregressive-generation", + "concept.decode", + "concept.transformer", + "module.causal-attention", + "model.gpt-3", + "paper.gpt-2-report", + ]); + }); + + test("decoder curated related links resolve to the nearby published docs surfaces for this slice", () => { + const source = getConceptById("concept.decoder"); + if (!source) { + throw new Error("expected concept.decoder in registry"); + } + + const items = deriveCuratedRelatedItems( + source, + listRelatedRegistryRecords(), + PUBLISHED_DOCS_REGISTRY_IDS, + ); + + expect( + items.some( + (item) => + item.registryId === "concept.encoder-decoder" && + item.href === "/docs/glossary/encoder-decoder", + ), + ).toBe(true); + expect( + items.some( + (item) => + item.registryId === "concept.autoregressive-generation" && + item.href === "/docs/glossary/autoregressive-generation", + ), + ).toBe(true); + expect( + items.some( + (item) => + item.registryId === "concept.decode" && + item.href === "/docs/glossary/decode", + ), + ).toBe(true); + expect( + items.some( + (item) => + item.registryId === "concept.transformer" && + item.href === "/docs/glossary/transformer", + ), + ).toBe(true); + expect( + items.some( + (item) => + item.registryId === "module.causal-attention" && + item.href === "/docs/modules/causal-attention", + ), + ).toBe(true); + expect( + items.some( + (item) => + item.registryId === "model.gpt-3" && + item.href === "/docs/models/gpt-3", + ), + ).toBe(true); + expect( + items.some( + (item) => + item.registryId === "paper.gpt-2-report" && + item.href === "/docs/papers/gpt-2-report", + ), + ).toBe(true); + }); + + test("gpt-3 and the GPT-2 report expose decoder as a published related destination", () => { + const gpt3 = getModelById("model.gpt-3"); + const gpt2Report = getPaperById("paper.gpt-2-report"); + + expect(gpt3?.relatedIds).toContain("concept.decoder"); + expect(gpt2Report?.relatedIds).toContain("concept.decoder"); + + const gpt3Html = renderToStaticMarkup( + , + ); + expect(gpt3Html).toContain('href="/docs/glossary/decoder"'); + + const gpt2Html = renderToStaticMarkup( + , + ); + expect(gpt2Html).toContain('href="/docs/glossary/decoder"'); + }); +}); From 5e50dd5103c828430a93206bf2762d5a4ebe204e Mon Sep 17 00:00:00 2001 From: aabdi Date: Mon, 22 Jun 2026 13:26:07 +0700 Subject: [PATCH 2/5] feat: [decoder-concept-page-002] - [Publish the canonical decoder concept page] --- src/content/docs/concepts/decoder/assets.json | 1 + .../docs/concepts/decoder/messages/en.json | 36 +++++ src/content/docs/concepts/decoder/page.mdx | 71 ++++++++++ src/lib/content/decoder-concept.test.ts | 128 ++++++++++++++++++ 4 files changed, 236 insertions(+) create mode 100644 src/content/docs/concepts/decoder/assets.json create mode 100644 src/content/docs/concepts/decoder/messages/en.json create mode 100644 src/content/docs/concepts/decoder/page.mdx create mode 100644 src/lib/content/decoder-concept.test.ts diff --git a/src/content/docs/concepts/decoder/assets.json b/src/content/docs/concepts/decoder/assets.json new file mode 100644 index 00000000..0967ef42 --- /dev/null +++ b/src/content/docs/concepts/decoder/assets.json @@ -0,0 +1 @@ +{} diff --git a/src/content/docs/concepts/decoder/messages/en.json b/src/content/docs/concepts/decoder/messages/en.json new file mode 100644 index 00000000..0e7106de --- /dev/null +++ b/src/content/docs/concepts/decoder/messages/en.json @@ -0,0 +1,36 @@ +{ + "title": "Decoder", + "description": "The part of a model that turns context into output predictions, especially the left-to-right stack used by GPT-style language models.", + "openingSummary": "A decoder turns the context a model already has into output predictions. In modern decoder-only language models, the same stack repeatedly reads the tokens so far and predicts the next token, which is why decoders sit at the center of GPT-style text generation.", + "sections": { + "whatItIs": { + "title": "What It Is", + "body": "A decoder is the part of a model that reads internal context and produces output-facing predictions. In language systems, that usually means turning token representations into the next-token distribution. In other settings, a decoder may turn a latent or encoder memory into pixels, audio, or another structured output. The shared idea is that the decoder is the readout side of the system: it consumes representations and pushes them toward a final answer." + }, + "whyItMatters": { + "title": "Why It Matters", + "body": "The word decoder explains where generation actually happens. In a GPT-style model, there is no separate encoder that first builds a frozen memory for the prompt. The decoder stack itself both builds the running context and turns that context into next-token probabilities. That is why pages about causal attention, decode, autoregressive generation, and GPT-family models keep pointing back to decoders: they are the architecture layer that makes token-by-token output possible." + }, + "decoderOnlyLoop": { + "title": "Why Decoder-Only Fits Next-Token Generation", + "body": "A decoder-only transformer uses left-to-right, or causal, attention. Each position can read the tokens that already exist in the prefix, but it cannot read future tokens that have not been generated yet. That rule matches the job exactly: predict the next token from the tokens already seen. After one token is chosen, the prefix grows by one position and the same decoder stack runs again. Because the attention mask and the task follow the same left-to-right constraint, decoder-only models are a natural fit for autoregressive generation." + }, + "comparedWithOtherLayouts": { + "title": "Compared With Encoder And Encoder-Decoder Models", + "body": "An encoder-only model is usually built to read the whole input and produce representations, not to keep emitting new tokens step by step. It can use bidirectional attention because every input token is already known. An encoder-decoder model splits the work: the encoder reads the full source input first, then the decoder generates outputs while attending both to earlier output tokens and to encoder memory. A decoder-only model folds those jobs into one causal stack, which is simpler for plain next-token continuation but less specialized for tasks that benefit from a separate full-input reader." + }, + "commonConfusions": { + "title": "Common Confusions", + "body": "A decoder is not just the final softmax or language-model head; the head sits on top of the decoder stack. Decoder-only also does not mean the model only writes and never represents context. The same stack still builds rich internal states while it reads the prefix. Finally, not every decoder is autoregressive. Some image or latent decoders reconstruct outputs in one shot. The left-to-right token loop is specifically the language-model decoder pattern." + }, + "related": { + "title": "Related Concepts And Modules" + }, + "tags": { + "title": "Tags" + }, + "references": { + "title": "References" + } + } +} diff --git a/src/content/docs/concepts/decoder/page.mdx b/src/content/docs/concepts/decoder/page.mdx new file mode 100644 index 00000000..96fe2b96 --- /dev/null +++ b/src/content/docs/concepts/decoder/page.mdx @@ -0,0 +1,71 @@ +--- +title: Decoder +description: The part of a model that turns context into output predictions, especially the left-to-right stack used by GPT-style language models. +kind: "concept" +registryId: "concept.decoder" +messageNamespace: "local" +assetNamespace: "local" +status: "published" +tags: + - foundations + - taxonomy +aliases: + - "Decoder" + - "decoder block" + - "decoder stack" + - "decoder-only stack" + - "decoding network" +updatedAt: "2026-06-22" +--- + +import { CitationList } from "@/features/docs/components/CitationList"; +import { DerivedRelatedDocs } from "@/features/docs/components/DerivedRelatedDocs"; +import { RelatedDocs } from "@/features/docs/components/RelatedDocs"; +import { Section } from "@/features/docs/components/Section"; +import { T } from "@/features/docs/components/T"; +import { TagPillList } from "@/features/docs/components/TagPillList"; + +
+ +
+ +
+ +
+ +
+ +
+ +
+ +
+ +
+ +
+ + + +
+ +
+ +
+ +
diff --git a/src/lib/content/decoder-concept.test.ts b/src/lib/content/decoder-concept.test.ts new file mode 100644 index 00000000..a428b710 --- /dev/null +++ b/src/lib/content/decoder-concept.test.ts @@ -0,0 +1,128 @@ +import { describe, expect, test } from "bun:test"; +import { createElement } from "react"; +import { renderToStaticMarkup } from "react-dom/server"; +import { ModulePageProviders } from "@/features/docs/components/ModulePageProviders"; +import { loadConceptPage } from "@/lib/content/concept-page"; +import { loadPublishedDocsPages } from "@/lib/content/pages"; +import { PUBLISHED_DOCS_REGISTRY_IDS } from "@/lib/content/published-docs-registry-ids"; +import { loadRegistry } from "@/lib/content/registry"; +import { + getConceptById, + listRelatedRegistryRecords, +} from "@/lib/content/registry-runtime"; +import { deriveCuratedRelatedItems } from "@/lib/content/related-docs"; +import { buildSearchDocuments } from "@/lib/search/build-documents"; +import { docsSearchApi } from "@/lib/search/search-server"; + +describe("decoder concept page (decoder-concept-page-002)", () => { + test("publishes the canonical decoder concept route with the existing registry record", () => { + const record = getConceptById("concept.decoder"); + + expect(record?.status).toBe("published"); + expect(record?.kind).toBe("concept"); + expect(record?.aliases).toEqual( + expect.arrayContaining([ + "Decoder", + "decoder stack", + "decoder-only stack", + "decoding network", + ]), + ); + expect(PUBLISHED_DOCS_REGISTRY_IDS.has("concept.decoder")).toBe(true); + }); + + test("curated related links resolve to the decoder's nearby generation and architecture pages", () => { + const source = getConceptById("concept.decoder"); + if (!source) { + throw new Error("expected concept.decoder in registry"); + } + + const items = deriveCuratedRelatedItems( + source, + listRelatedRegistryRecords(), + PUBLISHED_DOCS_REGISTRY_IDS, + ); + + expect( + items.find((item) => item.registryId === "concept.encoder-decoder")?.href, + ).toBe("/docs/glossary/encoder-decoder"); + expect( + items.find( + (item) => item.registryId === "concept.autoregressive-generation", + )?.href, + ).toBe("/docs/glossary/autoregressive-generation"); + expect( + items.find((item) => item.registryId === "concept.decode")?.href, + ).toBe("/docs/glossary/decode"); + expect( + items.find((item) => item.registryId === "module.causal-attention")?.href, + ).toBe("/docs/modules/causal-attention"); + expect(items.find((item) => item.registryId === "model.gpt-3")?.href).toBe( + "/docs/models/gpt-3", + ); + expect( + items.find((item) => item.registryId === "paper.gpt-2-report")?.href, + ).toBe("/docs/papers/gpt-2-report"); + }); + + test("page renders the canonical concept route with decoder-only and architecture comparison copy", async () => { + const page = await loadConceptPage("decoder"); + + expect(page.frontmatter.kind).toBe("concept"); + expect(page.frontmatter.status).toBe("published"); + expect(page.frontmatter.registryId).toBe("concept.decoder"); + expect(page.messages.openingSummary?.toLowerCase()).toContain( + "predicts the next token", + ); + + const html = renderToStaticMarkup( + createElement(ModulePageProviders, { + messages: page.messages, + assets: page.assets, + // biome-ignore lint/correctness/noChildrenProp: third createElement arg conflicts with strict props typing + children: page.content, + }), + ); + + expect(html).toContain("What It Is"); + expect(html).toContain("Why It Matters"); + expect(html).toContain("Why Decoder-Only Fits Next-Token Generation"); + expect(html).toContain("Compared With Encoder And Encoder-Decoder Models"); + expect(html).toContain("causal attention"); + expect(html).toContain("decoder-only transformer uses left-to-right"); + expect(html).toContain("encoder-only model"); + expect(html).toContain('href="/docs/glossary/autoregressive-generation"'); + expect(html).toContain('href="/docs/glossary/encoder-decoder"'); + expect(html).toContain('href="/docs/modules/causal-attention"'); + expect(html).toContain('href="/docs/models/gpt-3"'); + expect(html).toContain('href="/docs/papers/gpt-2-report"'); + expect(html).toContain('href="/tags/foundations"'); + expect(html).toContain('data-testid="derived-related-docs"'); + expect(html).toContain('data-testid="curated-related-docs"'); + expect(html).not.toContain("Phase"); + expect(html).not.toContain("Reader Shortcut"); + }); + + test("search discovery prefers the canonical decoder concept route for broad decoder queries", async () => { + const registry = await loadRegistry(); + const pages = await loadPublishedDocsPages("en"); + const documents = buildSearchDocuments(pages, registry); + + const conceptDocument = documents.find( + (entry) => entry.url === "/docs/concepts/decoder", + ); + expect(conceptDocument?.aliases).toEqual( + expect.arrayContaining([ + "Decoder", + "decoder stack", + "decoder-only stack", + "decoding network", + ]), + ); + + for (const query of ["decoder-only stack", "decoding network"] as const) { + const results = await docsSearchApi.search(query); + expect(results[0]?.url).toBe("/docs/concepts/decoder"); + } + }); +}); From 8d7904a445e67f8409c26e01eb30e91958252f39 Mon Sep 17 00:00:00 2001 From: aabdi Date: Mon, 22 Jun 2026 13:41:47 +0700 Subject: [PATCH 3/5] feat: [decoder-concept-page-003] - [Route readers through the GPT-style decoder path] --- .../processes/ontology-classification-relevant-files.md | 6 +++++- src/lib/content/causal-attention-module-page.test.ts | 2 +- src/lib/content/decoder-discovery-registry.test.tsx | 6 +++--- .../content/encoder-decoder-architecture-glossary.test.ts | 2 +- src/lib/content/generation-paradigm-glossary.test.ts | 2 +- src/lib/content/glossary-architecture-index.test.ts | 4 +++- src/lib/content/gpt-2-report-paper-page.test.tsx | 1 + src/lib/content/gpt-2-report-paper-record.test.ts | 2 ++ src/lib/content/gpt-3-model-page.test.tsx | 1 + src/lib/content/prose-auto-link.test.ts | 2 +- 10 files changed, 19 insertions(+), 9 deletions(-) diff --git a/docs/internal/processes/ontology-classification-relevant-files.md b/docs/internal/processes/ontology-classification-relevant-files.md index affa1b7b..f14247a2 100644 --- a/docs/internal/processes/ontology-classification-relevant-files.md +++ b/docs/internal/processes/ontology-classification-relevant-files.md @@ -165,7 +165,11 @@ the temporary legacy-id bridge. * `src/lib/content/architecture.ts` Architecture browse/index classification should treat ontology architecture membership, including descendant branches such as activation, as canonical - evidence before any legacy `conceptType` fallback. + evidence before any legacy `conceptType` fallback. When a concept-backed + glossary term also gains a published concept-section page, architecture + browse expectations should move to the canonical concept route and index + counts should be updated from the runtime-derived published entries rather + than preserved as glossary-era constants. * `src/lib/governance/typed-taxonomy-consumer-audit.ts` Machine-checkable contract for remaining typed-taxonomy consumer clusters, ownership, compatibility status, the recommended next migration target, and diff --git a/src/lib/content/causal-attention-module-page.test.ts b/src/lib/content/causal-attention-module-page.test.ts index 0d39905e..e66882bd 100644 --- a/src/lib/content/causal-attention-module-page.test.ts +++ b/src/lib/content/causal-attention-module-page.test.ts @@ -82,7 +82,7 @@ describe("loadModulePage causal-attention", () => { expect(html).toContain('href="/docs/modules/attention"'); expect(html).toContain('href="/docs/modules/bidirectional-attention"'); expect(html).toContain('href="/docs/glossary/autoregressive-generation"'); - expect(html).toContain('href="/docs/glossary/decoder"'); + expect(html).toContain('href="/docs/concepts/decoder"'); expect(html).toContain('data-testid="curated-related-docs"'); expect((html.match(/data-testid="tag-pill-list"/g) ?? []).length).toBe(1); expect(html).not.toContain("Reader Shortcut"); diff --git a/src/lib/content/decoder-discovery-registry.test.tsx b/src/lib/content/decoder-discovery-registry.test.tsx index a790c653..5e564e3e 100644 --- a/src/lib/content/decoder-discovery-registry.test.tsx +++ b/src/lib/content/decoder-discovery-registry.test.tsx @@ -100,7 +100,7 @@ describe("decoder discovery registry alignment", () => { ).toBe(true); }); - test("gpt-3 and the GPT-2 report expose decoder as a published related destination", () => { + test("gpt-3 and the GPT-2 report expose the canonical decoder concept as a published related destination", () => { const gpt3 = getModelById("model.gpt-3"); const gpt2Report = getPaperById("paper.gpt-2-report"); @@ -119,11 +119,11 @@ describe("decoder discovery registry alignment", () => { ]} />, ); - expect(gpt3Html).toContain('href="/docs/glossary/decoder"'); + expect(gpt3Html).toContain('href="/docs/concepts/decoder"'); const gpt2Html = renderToStaticMarkup( , ); - expect(gpt2Html).toContain('href="/docs/glossary/decoder"'); + expect(gpt2Html).toContain('href="/docs/concepts/decoder"'); }); }); diff --git a/src/lib/content/encoder-decoder-architecture-glossary.test.ts b/src/lib/content/encoder-decoder-architecture-glossary.test.ts index 42917473..2d4f40c6 100644 --- a/src/lib/content/encoder-decoder-architecture-glossary.test.ts +++ b/src/lib/content/encoder-decoder-architecture-glossary.test.ts @@ -94,7 +94,7 @@ describe("Phase 2 encoder-decoder architecture glossary pages (US-002)", () => { const html = await renderGlossaryHtml("encoder-decoder"); expect(html).toContain('href="/docs/glossary/encoder"'); - expect(html).toContain('href="/docs/glossary/decoder"'); + expect(html).toContain('href="/docs/concepts/decoder"'); }); test("search index records encoder cluster with glossary kind not module", async () => { diff --git a/src/lib/content/generation-paradigm-glossary.test.ts b/src/lib/content/generation-paradigm-glossary.test.ts index 5413e68c..457aaf71 100644 --- a/src/lib/content/generation-paradigm-glossary.test.ts +++ b/src/lib/content/generation-paradigm-glossary.test.ts @@ -77,7 +77,7 @@ describe("Phase 2 generation paradigm glossary pages (US-003)", () => { const html = await renderGlossaryHtml("autoregressive-generation"); expect(html).toContain('href="/docs/glossary/token"'); - expect(html).toContain('href="/docs/glossary/decoder"'); + expect(html).toContain('href="/docs/concepts/decoder"'); expect(html).toContain('href="/docs/glossary/encoder-decoder"'); expect(html).toContain('href="/docs/glossary/logit"'); expect(html).toContain('href="/docs/glossary/softmax"'); diff --git a/src/lib/content/glossary-architecture-index.test.ts b/src/lib/content/glossary-architecture-index.test.ts index 723a23d5..89543640 100644 --- a/src/lib/content/glossary-architecture-index.test.ts +++ b/src/lib/content/glossary-architecture-index.test.ts @@ -133,7 +133,7 @@ const EXPECTED_GLOSSARY_TITLES: Record< }; const PUBLISHED_GLOSSARY_ENTRY_COUNT = 58; -const PUBLISHED_ARCHITECTURE_ENTRY_COUNT = 50; +const PUBLISHED_ARCHITECTURE_ENTRY_COUNT = 51; const GLOSSARY_SEPARATOR_TITLES = [ "Model Taxonomy", @@ -232,6 +232,7 @@ describe("Phase 2 glossary and architecture index navigation (US-007)", () => { for (const url of [ "/docs/glossary/architecture", + "/docs/concepts/decoder", "/docs/glossary/kv-cache", "/docs/glossary/normalization", "/docs/glossary/residual-connection", @@ -269,6 +270,7 @@ describe("Phase 2 glossary and architecture index navigation (US-007)", () => { for (const [title, href] of [ ["Attention with linear biases (ALiBi)", "/docs/concepts/alibi"], ["Architecture", "/docs/glossary/architecture"], + ["Decoder", "/docs/concepts/decoder"], ["Foundation Model", "/docs/glossary/foundation-model"], ["Key-value cache", "/docs/concepts/kv-cache"], ["Decode", "/docs/glossary/decode"], diff --git a/src/lib/content/gpt-2-report-paper-page.test.tsx b/src/lib/content/gpt-2-report-paper-page.test.tsx index 307f84ec..971a12e0 100644 --- a/src/lib/content/gpt-2-report-paper-page.test.tsx +++ b/src/lib/content/gpt-2-report-paper-page.test.tsx @@ -74,6 +74,7 @@ describe("GPT-2 report paper page", () => { expect(html).toContain("Decoder-only transformer"); expect(html).toContain("Byte-level BPE tokenization"); expect(html).toContain("Broad next-token pretraining"); + expect(html).toContain('href="/docs/concepts/decoder"'); expect(html).toContain('href="/docs/concepts/transformer-architecture"'); expect(html).toContain('href="/docs/modules/byte-level-tokenization"'); expect(html).toContain('href="/docs/glossary/scaling-law"'); diff --git a/src/lib/content/gpt-2-report-paper-record.test.ts b/src/lib/content/gpt-2-report-paper-record.test.ts index 455f36b5..c75673f6 100644 --- a/src/lib/content/gpt-2-report-paper-record.test.ts +++ b/src/lib/content/gpt-2-report-paper-record.test.ts @@ -10,6 +10,7 @@ import { } from "@/lib/content/related-docs"; const publishedRegistryIds = new Set([ + "concept.decoder", "module.byte-level-tokenization", "concept.transformer-architecture", "concept.scaling-law", @@ -79,6 +80,7 @@ describe("gpt-2 report paper registry record", () => { publishedRegistryIds, ); expect(curated.map((item) => item.registryId)).toEqual([ + "concept.decoder", "module.byte-level-tokenization", "concept.transformer-architecture", "concept.scaling-law", diff --git a/src/lib/content/gpt-3-model-page.test.tsx b/src/lib/content/gpt-3-model-page.test.tsx index 5f685273..5d027df2 100644 --- a/src/lib/content/gpt-3-model-page.test.tsx +++ b/src/lib/content/gpt-3-model-page.test.tsx @@ -66,6 +66,7 @@ describe("gpt-3 model page related docs", () => { expect(html).toContain('data-testid="derived-related-docs"'); expect(html).not.toContain('data-testid="curated-related-docs"'); + expect(html).toContain('href="/docs/concepts/decoder"'); expect(html).toContain('href="/docs/modules/bpe"'); }); diff --git a/src/lib/content/prose-auto-link.test.ts b/src/lib/content/prose-auto-link.test.ts index 4a3782c2..66f800e0 100644 --- a/src/lib/content/prose-auto-link.test.ts +++ b/src/lib/content/prose-auto-link.test.ts @@ -96,7 +96,7 @@ describe("prose auto-link", () => { test("segmentProseWithAutoLinks does not link slug prefixes inside hyphen compounds", () => { const phrases = buildProseAutoLinkPhrases([ - { phrase: "Decoder", href: "/docs/glossary/decoder" }, + { phrase: "Decoder", href: "/docs/concepts/decoder" }, ]); const segments = segmentProseWithAutoLinks( From 0f3200ba9937d0606bfeeebe4fd3ea95a155a98f Mon Sep 17 00:00:00 2001 From: aabdi Date: Mon, 22 Jun 2026 13:49:26 +0700 Subject: [PATCH 4/5] feat: [decoder-concept-page-004] - [Add focused validation for the decoder concept-page slice] --- .../decoder-concept-validation.test.ts | 113 ++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 src/lib/content/decoder-concept-validation.test.ts diff --git a/src/lib/content/decoder-concept-validation.test.ts b/src/lib/content/decoder-concept-validation.test.ts new file mode 100644 index 00000000..565e0709 --- /dev/null +++ b/src/lib/content/decoder-concept-validation.test.ts @@ -0,0 +1,113 @@ +import { describe, expect, test } from "bun:test"; +import { createElement } from "react"; +import { renderToStaticMarkup } from "react-dom/server"; +import { ModulePageProviders } from "@/features/docs/components/ModulePageProviders"; +import { validatePageAssetReferences } from "@/lib/content/assets"; +import { loadConceptPage } from "@/lib/content/concept-page"; +import { loadGlossaryPage } from "@/lib/content/glossary-page"; +import { loadPublishedDocsPages } from "@/lib/content/pages"; +import { + getPublishedDocsEntriesBySlug, + getPublishedDocsEntryByRegistryId, +} from "@/lib/content/published-docs-registry-ids"; +import { getConceptById } from "@/lib/content/registry-runtime"; +import { docsSearchApi } from "@/lib/search/search-server"; + +describe("decoder concept page focused validation (decoder-concept-page-004)", () => { + test("published docs inventory resolves the canonical decoder route, registry id, and English messages together", async () => { + const record = getConceptById("concept.decoder"); + const pages = await loadPublishedDocsPages("en"); + const conceptPage = pages.find( + (entry) => entry.url === "/docs/concepts/decoder", + ); + const glossaryPage = pages.find( + (entry) => entry.url === "/docs/glossary/decoder", + ); + + expect(record?.status).toBe("published"); + expect(conceptPage).toBeDefined(); + expect(conceptPage?.docsSlug).toBe("concepts/decoder"); + expect(conceptPage?.frontmatter.kind).toBe("concept"); + expect(conceptPage?.frontmatter.registryId).toBe("concept.decoder"); + expect(conceptPage?.frontmatter.messageNamespace).toBe("local"); + expect(conceptPage?.frontmatter.assetNamespace).toBe("local"); + expect(conceptPage?.messages.title).toBe("Decoder"); + expect(conceptPage?.messages.openingSummary).toContain( + "predicts the next token", + ); + + expect(glossaryPage?.frontmatter.kind).toBe("glossary"); + expect(glossaryPage?.frontmatter.registryId).toBe("concept.decoder"); + + expect(getPublishedDocsEntryByRegistryId("concept.decoder")).toEqual( + expect.objectContaining({ + docsSlug: "concepts/decoder", + pageKind: "concept", + section: "concepts", + }), + ); + expect(getPublishedDocsEntriesBySlug("decoder")).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + docsSlug: "concepts/decoder", + pageKind: "concept", + }), + expect.objectContaining({ + docsSlug: "glossary/decoder", + pageKind: "glossary", + }), + ]), + ); + }); + + test("canonical decoder bundle resolves registry-backed copy and valid local assets together", async () => { + const record = getConceptById("concept.decoder"); + if (!record) { + throw new Error("expected concept.decoder in registry"); + } + + const page = await loadConceptPage("decoder"); + + expect(page.frontmatter.kind).toBe("concept"); + expect(page.frontmatter.registryId).toBe(record.id); + expect(page.messages.title).toBe("Decoder"); + expect(page.messages.description).toContain( + "turns context into output predictions", + ); + expect(page.messages.sections?.whatItIs.body).toContain( + "readout side of the system", + ); + expect(page.messages.sections?.decoderOnlyLoop.body).toContain( + "left-to-right, or causal, attention", + ); + expect(page.messages.sections?.comparedWithOtherLayouts.body).toContain( + "encoder-decoder model splits the work", + ); + expect(validatePageAssetReferences(page.assets, page.messages)).toEqual([]); + }); + + test("discovery prefers the canonical concept route while the glossary bridge remains a visible nearby surface", async () => { + const conceptResults = await docsSearchApi.search("decoder-only stack"); + expect(conceptResults[0]?.url).toBe("/docs/concepts/decoder"); + + const bridgeResults = await docsSearchApi.search("decoding network"); + expect( + bridgeResults.some((result) => result.url === "/docs/concepts/decoder"), + ).toBe(true); + expect( + bridgeResults.some((result) => result.url === "/docs/glossary/decoder"), + ).toBe(true); + + const glossaryPage = await loadGlossaryPage("decoder"); + const html = renderToStaticMarkup( + createElement(ModulePageProviders, { + messages: glossaryPage.messages, + assets: glossaryPage.assets, + // biome-ignore lint/correctness/noChildrenProp: third createElement arg conflicts with strict props typing + children: glossaryPage.content, + }), + ); + + expect(html).toContain('href="/docs/concepts/decoder"'); + }); +}); From 14fb41ae2128d6e757fb1d94af0de9713d34c81e Mon Sep 17 00:00:00 2001 From: aabdi Date: Mon, 22 Jun 2026 14:01:14 +0700 Subject: [PATCH 5/5] fix: stabilize static export search dialog verifier --- .../verify/phase-1-search-dialog-checks.ts | 25 ++++++- src/lib/verify/search-dialog-checks.test.ts | 65 +++++++++++++++++++ 2 files changed, 89 insertions(+), 1 deletion(-) diff --git a/src/lib/verify/phase-1-search-dialog-checks.ts b/src/lib/verify/phase-1-search-dialog-checks.ts index 88d70e6b..03d132de 100644 --- a/src/lib/verify/phase-1-search-dialog-checks.ts +++ b/src/lib/verify/phase-1-search-dialog-checks.ts @@ -87,6 +87,23 @@ export function formatSearchDialogOpenFailureReason(timeoutMs: number): string { return `did not open the header search dialog on the home page within ${timeoutMs}ms`; } +export async function activateSearchDialogTrigger( + trigger: Locator, + timeoutMs: number, +): Promise { + try { + await trigger.click({ timeout: timeoutMs }); + return; + } catch (error) { + if (!(error instanceof Error) || !error.message.includes("Timeout")) { + throw error; + } + } + + await trigger.focus(); + await trigger.press("Enter", { timeout: timeoutMs }); +} + /** * Pure DOM outcome for the header search dialog — used by Playwright and unit tests. */ @@ -177,13 +194,19 @@ async function openHeaderSearchDialog( const dialog = page.getByRole("dialog", { name: "Search" }); const deadline = Date.now() + timeoutMs; + await trigger.waitFor({ state: "visible", timeout: timeoutMs }); + await page.locator("body").click({ position: { x: 8, y: 8 }, force: true }); + while (Date.now() < deadline) { if (await dialog.isVisible().catch(() => false)) { return dialog; } const remainingMs = Math.max(1, deadline - Date.now()); - await trigger.click({ timeout: Math.min(remainingMs, timeoutMs) }); + await activateSearchDialogTrigger( + trigger, + Math.min(remainingMs, timeoutMs), + ); try { await dialog.waitFor({ diff --git a/src/lib/verify/search-dialog-checks.test.ts b/src/lib/verify/search-dialog-checks.test.ts index ead73c0a..6a92672a 100644 --- a/src/lib/verify/search-dialog-checks.test.ts +++ b/src/lib/verify/search-dialog-checks.test.ts @@ -1,6 +1,7 @@ import { describe, expect, test } from "bun:test"; import { PHASE_1_GROUPED_QUERY_ATTENTION_URL } from "./phase-1-search-checks"; import { + activateSearchDialogTrigger, evaluateSearchDialogDomSnapshot, formatPhase1SearchDialogCheckFailure, formatSearchDialogOpenFailureReason, @@ -126,6 +127,70 @@ describe("formatSearchDialogOpenFailureReason", () => { }); }); +describe("activateSearchDialogTrigger", () => { + test("uses pointer click when the trigger click succeeds", async () => { + let focused = false; + let pressed = false; + + await activateSearchDialogTrigger( + { + click: async () => {}, + focus: async () => { + focused = true; + }, + press: async () => { + pressed = true; + }, + } as never, + 1_000, + ); + + expect(focused).toBe(false); + expect(pressed).toBe(false); + }); + + test("falls back to keyboard activation when click times out", async () => { + let focused = false; + let pressKey: string | undefined; + let pressTimeout: number | undefined; + + await activateSearchDialogTrigger( + { + click: async () => { + throw new Error("click: Timeout 44790ms exceeded."); + }, + focus: async () => { + focused = true; + }, + press: async (key: string, options?: { timeout?: number }) => { + pressKey = key; + pressTimeout = options?.timeout; + }, + } as never, + 1_234, + ); + + expect(focused).toBe(true); + expect(pressKey).toBe("Enter"); + expect(pressTimeout).toBe(1_234); + }); + + test("rethrows non-timeout trigger activation failures", async () => { + await expect( + activateSearchDialogTrigger( + { + click: async () => { + throw new Error("click: page closed"); + }, + focus: async () => {}, + press: async () => {}, + } as never, + 1_000, + ), + ).rejects.toThrow("click: page closed"); + }); +}); + describe("resolveSearchDialogCheckOptionsFromEnv", () => { test("returns pass stub when VERIFY_SEARCH_DIALOG_STUB=pass", async () => { const options = resolveSearchDialogCheckOptionsFromEnv({