From cdd6a99b099b53c00568e8a6d80553c48f19699d Mon Sep 17 00:00:00 2001 From: aabdi Date: Mon, 22 Jun 2026 13:18:00 +0700 Subject: [PATCH 1/6] feat: [decode-concept-page-001] - [Promote decode to the canonical concept discovery surface] --- src/content/docs/concepts/decode/assets.json | 1 + .../docs/concepts/decode/messages/en.json | 47 +++++ src/content/docs/concepts/decode/page.mdx | 94 +++++++++ src/content/docs/concepts/kv-cache/page.mdx | 2 +- src/content/docs/concepts/prefill/page.mdx | 2 +- src/content/docs/glossary/kv-cache/page.mdx | 2 +- .../glossary/prefill-decode-split/page.mdx | 2 +- src/content/docs/glossary/prefill/page.mdx | 2 +- src/content/registry/concepts/decode.json | 6 +- src/lib/content/batching-system-page.test.ts | 2 +- src/lib/content/content-paths.ts | 3 + ...ntent-reconciliation-attention-tag.test.ts | 3 +- .../continuous-batching-system-page.test.ts | 2 +- src/lib/content/decode-concept.test.ts | 189 ++++++++++++++++++ .../inference-engine-system-registry.test.ts | 2 +- src/lib/content/kv-cache-concept.test.ts | 4 +- src/lib/content/kv-cache-glossary.test.ts | 4 +- src/lib/content/memory-system-page.test.ts | 2 +- .../content/memory-system-registry.test.ts | 2 +- ...anese-attention-variant-proof-set.test.tsx | 4 +- ...phase-5-serving-path-search-locale.test.ts | 4 +- src/lib/content/prefill-concept.test.ts | 4 +- .../prefill-decode-split-glossary.test.ts | 6 +- .../speculative-decoding-system-page.test.ts | 2 +- 24 files changed, 365 insertions(+), 26 deletions(-) create mode 100644 src/content/docs/concepts/decode/assets.json create mode 100644 src/content/docs/concepts/decode/messages/en.json create mode 100644 src/content/docs/concepts/decode/page.mdx create mode 100644 src/lib/content/decode-concept.test.ts diff --git a/src/content/docs/concepts/decode/assets.json b/src/content/docs/concepts/decode/assets.json new file mode 100644 index 00000000..0967ef42 --- /dev/null +++ b/src/content/docs/concepts/decode/assets.json @@ -0,0 +1 @@ +{} diff --git a/src/content/docs/concepts/decode/messages/en.json b/src/content/docs/concepts/decode/messages/en.json new file mode 100644 index 00000000..eca79f62 --- /dev/null +++ b/src/content/docs/concepts/decode/messages/en.json @@ -0,0 +1,47 @@ +{ + "title": "Decode", + "description": "The repeated next-token stage that reuses saved prefix state, extends the KV cache one position at a time, and makes inter-token latency the main reader-visible cost after prefill.", + "openingSummary": "After prefill has processed the prompt, decode is the repeated next-token loop that keeps reusing saved key-value state, adds one new position at a time, and usually turns inter-token latency plus cache growth into the dominant feel of a long response.", + "sections": { + "whatItIs": { + "title": "What It Is", + "body": "Decode is the stage of autoregressive generation that starts after prefill has already read the prompt and built the first key-value cache state. Each turn computes the next token from the current prefix, appends that token, writes one more cache position, and then repeats until generation stops." + }, + "whyItMatters": { + "title": "Why It Matters", + "body": "Decode often controls the feel of a streamed answer because readers experience it as one token arriving after another. The model no longer has to reread the whole prompt from scratch, but each step still depends on moving through a growing cache and running another forward pass for the newest position. That is why inter-token latency, cache growth, memory bandwidth, and serving density show up so often in decode discussions." + }, + "simpleExample": { + "title": "Simple Example", + "body": "Suppose a prompt has already been processed and the model is about to generate a 50-token reply. Decode produces token 1, extends the prefix, updates the cache for that new position, then repeats for token 2, token 3, and the rest of the answer. Each turn is smaller than the prompt pass, but dozens of repeated turns can dominate the total user-visible wait." + }, + "commonConfusions": { + "title": "Common Confusions", + "body": "Decode is not the same as the decoder module. The decoder is the model stack; decode is the runtime loop that calls that stack again and again during generation. Decode is also not the same as prefill: prefill reads the existing prompt once, while decode handles one newly generated token at a time. Finally, decode is not the same as sampling. Decode produces the next-step hidden state and logits, while sampling decides how one token is chosen from those scores." + }, + "servingPath": { + "title": "Serving Path", + "body": "Use the nearby pages below to trace what decode depends on, what it changes in the serving stack, and where to go next if you want either the systems view or the token-choice view." + }, + "related": { + "title": "Related Concepts And Systems" + }, + "tags": { + "title": "Tags" + }, + "references": { + "title": "References" + } + }, + "links": { + "prefill": "Prefill", + "prefillDecodeSplit": "Prefill/decode split", + "kvCache": "KV cache", + "autoregressiveGeneration": "Autoregressive generation", + "batching": "Batching", + "continuousBatching": "Continuous batching", + "memory": "Memory", + "speculativeDecoding": "Speculative decoding", + "samplingOverview": "Sampling overview" + } +} diff --git a/src/content/docs/concepts/decode/page.mdx b/src/content/docs/concepts/decode/page.mdx new file mode 100644 index 00000000..96df58f2 --- /dev/null +++ b/src/content/docs/concepts/decode/page.mdx @@ -0,0 +1,94 @@ +--- +title: Decode +description: The repeated next-token stage that reuses saved prefix state, extends the KV cache one position at a time, and makes inter-token latency the main reader-visible cost after prefill. +kind: "concept" +registryId: "concept.decode" +messageNamespace: "local" +assetNamespace: "local" +status: "published" +tags: + - foundations + - attention + - kv-cache +aliases: + - "Decode" + - "decoding" + - "token-by-token generation" + - "next-token step" + - "inter-token generation" +updatedAt: "2026-06-22" +--- + +import { CitationList } from "@/features/docs/components/CitationList"; +import { DerivedRelatedDocs } from "@/features/docs/components/DerivedRelatedDocs"; +import { LocalizedLinkList } from "@/features/docs/components/LocalizedLinkList"; +import { RelatedDocs } from "@/features/docs/components/RelatedDocs"; +import { Section } from "@/features/docs/components/Section"; +import { T } from "@/features/docs/components/T"; +import { TagPillList } from "@/features/docs/components/TagPillList"; + +
+ +
+ +
+ +
+ +
+ +
+ +
+ +
+ +
+ + +
+ + + +
+ +
+ +
+ +
diff --git a/src/content/docs/concepts/kv-cache/page.mdx b/src/content/docs/concepts/kv-cache/page.mdx index 10f1f438..6a0e77a3 100644 --- a/src/content/docs/concepts/kv-cache/page.mdx +++ b/src/content/docs/concepts/kv-cache/page.mdx @@ -41,7 +41,7 @@ import { TagPillList } from "@/features/docs/components/TagPillList"; diff --git a/src/content/docs/glossary/prefill-decode-split/page.mdx b/src/content/docs/glossary/prefill-decode-split/page.mdx index 4b06b0a6..25089afc 100644 --- a/src/content/docs/glossary/prefill-decode-split/page.mdx +++ b/src/content/docs/glossary/prefill-decode-split/page.mdx @@ -46,7 +46,7 @@ import { TagPillList } from "@/features/docs/components/TagPillList"; items={[ { href: "/docs/concepts/kv-cache", labelKey: "links.kvCache" }, { href: "/docs/concepts/prefill", labelKey: "links.prefill" }, - { href: "/docs/glossary/decode", labelKey: "links.decode" }, + { href: "/docs/concepts/decode", labelKey: "links.decode" }, { href: "/search?q=paged%20attention", labelKey: "links.pagedAttention", diff --git a/src/content/docs/glossary/prefill/page.mdx b/src/content/docs/glossary/prefill/page.mdx index 45d022b5..49de8b68 100644 --- a/src/content/docs/glossary/prefill/page.mdx +++ b/src/content/docs/glossary/prefill/page.mdx @@ -46,7 +46,7 @@ import { TagPillList } from "@/features/docs/components/TagPillList"; { expect(html).toContain("Legend:"); expect(html).toContain("Queue requests"); expect(html).toContain('href="/docs/concepts/prefill"'); - expect(html).toContain('href="/docs/glossary/decode"'); + expect(html).toContain('href="/docs/concepts/decode"'); expect(html).toContain('href="/docs/glossary/prefill-decode-split"'); expect(html).toContain('href="/docs/concepts/kv-cache"'); expect(html).toContain('href="/docs/systems/continuous-batching"'); diff --git a/src/lib/content/content-paths.ts b/src/lib/content/content-paths.ts index 7d07b767..dceec038 100644 --- a/src/lib/content/content-paths.ts +++ b/src/lib/content/content-paths.ts @@ -293,6 +293,9 @@ export const HIDDEN_SIZE_GLOSSARY_PAGE_DIR = join( /** Prefill concept page directory. */ export const PREFILL_CONCEPT_PAGE_DIR = join(CONCEPTS_DOCS_ROOT, "prefill"); +/** Decode concept page directory. */ +export const DECODE_CONCEPT_PAGE_DIR = join(CONCEPTS_DOCS_ROOT, "decode"); + /** Vocabulary size glossary page directory. */ export const VOCABULARY_SIZE_GLOSSARY_PAGE_DIR = join( GLOSSARY_DOCS_ROOT, diff --git a/src/lib/content/content-reconciliation-attention-tag.test.ts b/src/lib/content/content-reconciliation-attention-tag.test.ts index ee3ced15..8030bddf 100644 --- a/src/lib/content/content-reconciliation-attention-tag.test.ts +++ b/src/lib/content/content-reconciliation-attention-tag.test.ts @@ -97,6 +97,7 @@ describe("Phase 2/3 reconciliation attention tag landing (US-007)", () => { const conceptGroup = groups.find((group) => group.kind === "concept"); expect(conceptGroup?.kindLabel).toBe("Concept"); expect(conceptGroup?.resources.map((resource) => resource.url)).toEqual([ + "/docs/concepts/decode", "/docs/concepts/kv-cache", "/docs/concepts/prefill", ]); @@ -164,7 +165,7 @@ describe("Phase 2/3 reconciliation attention tag page render (US-007)", () => { expect(html).toContain("Linear Attention"); expect(html).toContain('href="/docs/concepts/kv-cache"'); expect(html).toContain('href="/docs/glossary/autoregressive-generation"'); - expect(html).toContain('href="/docs/glossary/decode"'); + expect(html).toContain('href="/docs/concepts/decode"'); expect(html).toContain('href="/docs/glossary/kv-cache"'); expect(html).toContain('href="/docs/concepts/prefill"'); expect(html).toContain('href="/docs/glossary/token"'); diff --git a/src/lib/content/continuous-batching-system-page.test.ts b/src/lib/content/continuous-batching-system-page.test.ts index a6bc5a29..ee630862 100644 --- a/src/lib/content/continuous-batching-system-page.test.ts +++ b/src/lib/content/continuous-batching-system-page.test.ts @@ -171,7 +171,7 @@ describe("continuous batching docs route render", () => { ), ); - expect(html).toContain('href="/docs/glossary/decode"'); + expect(html).toContain('href="/docs/concepts/decode"'); expect(html).toContain('href="/docs/glossary/prefill-decode-split"'); expect(html).toContain('href="/docs/concepts/kv-cache"'); expect(html).toContain('href="/docs/systems/batching"'); diff --git a/src/lib/content/decode-concept.test.ts b/src/lib/content/decode-concept.test.ts new file mode 100644 index 00000000..1ec58d3d --- /dev/null +++ b/src/lib/content/decode-concept.test.ts @@ -0,0 +1,189 @@ +import { describe, expect, test } from "bun:test"; +import { readFileSync } from "node:fs"; +import { join } from "node:path"; +import { createElement } from "react"; +import { renderToStaticMarkup } from "react-dom/server"; +import { ModulePageProviders } from "@/features/docs/components/ModulePageProviders"; +import { loadConceptPage } from "@/lib/content/concept-page"; +import { DECODE_CONCEPT_PAGE_DIR } from "@/lib/content/content-paths"; +import { loadPublishedDocsPages } from "@/lib/content/pages"; +import { + PUBLISHED_CONCEPT_SECTION_REGISTRY_IDS, + PUBLISHED_DOCS_REGISTRY_IDS, +} from "@/lib/content/published-docs-registry-ids"; +import { loadRegistry } from "@/lib/content/registry"; +import { + getConceptById, + listRelatedRegistryRecords, +} from "@/lib/content/registry-runtime"; +import { deriveCuratedRelatedItems } from "@/lib/content/related-docs"; +import { pageMessagesSchema } from "@/lib/content/schemas"; +import { buildSearchDocuments } from "@/lib/search/build-documents"; +import { docsSearchApi } from "@/lib/search/search-server"; + +const pageDir = DECODE_CONCEPT_PAGE_DIR; +const messagesPath = join(pageDir, "messages/en.json"); + +describe("decode concept page (decode-concept-page-001)", () => { + test("registry record stays published and points curated discovery at the broad concept surface", () => { + const record = getConceptById("concept.decode"); + expect(record?.status).toBe("published"); + expect(record?.aliases).toEqual([ + "Decode", + "decoding", + "token-by-token generation", + "next-token step", + "inter-token generation", + ]); + expect(record?.tags).toEqual(["foundations", "attention", "kv-cache"]); + expect(record?.relatedIds).toEqual( + expect.arrayContaining([ + "concept.prefill", + "concept.kv-cache", + "concept.prefill-decode-split", + "system.batching", + "system.continuous-batching", + "system.memory", + "system.speculative-decoding", + "concept.autoregressive-generation", + "concept.sampling-overview", + ]), + ); + expect(PUBLISHED_DOCS_REGISTRY_IDS.has("concept.decode")).toBe(true); + expect(PUBLISHED_CONCEPT_SECTION_REGISTRY_IDS.has("concept.decode")).toBe( + true, + ); + }); + + test("curated related links resolve decode neighbors through their published routes", () => { + const source = getConceptById("concept.decode"); + if (!source) { + throw new Error("expected concept.decode in registry"); + } + + const items = deriveCuratedRelatedItems( + source, + listRelatedRegistryRecords(), + PUBLISHED_DOCS_REGISTRY_IDS, + ); + + expect( + items.find((item) => item.registryId === "concept.prefill")?.href, + ).toBe("/docs/concepts/prefill"); + expect( + items.find((item) => item.registryId === "concept.kv-cache")?.href, + ).toBe("/docs/concepts/kv-cache"); + expect( + items.find((item) => item.registryId === "concept.sampling-overview") + ?.href, + ).toBe("/docs/glossary/sampling-overview"); + expect( + items.find((item) => item.registryId === "system.batching")?.href, + ).toBe("/docs/systems/batching"); + expect( + items.find((item) => item.registryId === "system.continuous-batching") + ?.href, + ).toBe("/docs/systems/continuous-batching"); + expect( + items.find((item) => item.registryId === "system.memory")?.href, + ).toBe("/docs/systems/memory"); + expect( + items.find((item) => item.registryId === "system.speculative-decoding") + ?.href, + ).toBe("/docs/systems/speculative-decoding"); + }); + + test("messages explain decode as a repeated stage distinct from prefill and sampling", () => { + const messages = pageMessagesSchema.parse( + JSON.parse(readFileSync(messagesPath, "utf8")), + ); + + expect(messages.title).toBe("Decode"); + expect(messages.openingSummary?.length).toBeGreaterThan(0); + expect(messages.sections?.whatItIs.body?.toLowerCase()).toContain( + "autoregressive generation", + ); + expect(messages.sections?.whyItMatters.body?.toLowerCase()).toContain( + "inter-token latency", + ); + expect(messages.sections?.whyItMatters.body?.toLowerCase()).toContain( + "memory bandwidth", + ); + expect(messages.sections?.commonConfusions.body?.toLowerCase()).toContain( + "sampling", + ); + }); + + test("page renders the canonical concept route with serving and discovery links", async () => { + const page = await loadConceptPage("decode"); + + expect(page.frontmatter.kind).toBe("concept"); + expect(page.frontmatter.status).toBe("published"); + expect(page.frontmatter.registryId).toBe("concept.decode"); + expect(page.messages.openingSummary?.length).toBeGreaterThan(0); + + const html = renderToStaticMarkup( + createElement(ModulePageProviders, { + messages: page.messages, + assets: page.assets, + // biome-ignore lint/correctness/noChildrenProp: third createElement arg conflicts with strict props typing + children: page.content, + }), + ); + + expect(html).toContain("What It Is"); + expect(html).toContain("Why It Matters"); + expect(html).toContain("Serving Path"); + expect(html).toContain('href="/docs/concepts/prefill"'); + expect(html).toContain('href="/docs/concepts/kv-cache"'); + expect(html).toContain('href="/docs/glossary/prefill-decode-split"'); + expect(html).toContain('href="/docs/systems/batching"'); + expect(html).toContain('href="/docs/systems/continuous-batching"'); + expect(html).toContain('href="/docs/systems/memory"'); + expect(html).toContain('href="/docs/systems/speculative-decoding"'); + expect(html).toContain('href="/docs/glossary/sampling-overview"'); + expect(html).toContain('data-testid="derived-related-docs"'); + expect(html).toContain('data-testid="curated-related-docs"'); + expect(html).not.toContain("Reader Shortcut"); + }); + + test("published pages and search documents prefer the concept route for decode discovery", async () => { + const registry = await loadRegistry(); + const pages = await loadPublishedDocsPages("en"); + const documents = buildSearchDocuments(pages, registry); + + expect(pages.some((page) => page.docsSlug === "concepts/decode")).toBe( + true, + ); + expect(pages.some((page) => page.docsSlug === "glossary/decode")).toBe( + true, + ); + + const conceptDocument = documents.find( + (entry) => entry.url === "/docs/concepts/decode", + ); + expect(conceptDocument?.kind).toBe("concept"); + expect(conceptDocument?.facets.kind).toBe("concept"); + expect(conceptDocument?.aliases).toEqual( + expect.arrayContaining([ + "Decode", + "decoding", + "token-by-token generation", + "next-token step", + ]), + ); + }); + + test("search ranks the concept route first for representative decode queries", async () => { + for (const query of [ + "decode", + "decoding", + "token-by-token generation", + "next-token step", + ] as const) { + const results = await docsSearchApi.search(query); + expect(results.length).toBeGreaterThan(0); + expect(results[0]?.url).toBe("/docs/concepts/decode"); + } + }); +}); diff --git a/src/lib/content/inference-engine-system-registry.test.ts b/src/lib/content/inference-engine-system-registry.test.ts index 063232f6..4d49d25d 100644 --- a/src/lib/content/inference-engine-system-registry.test.ts +++ b/src/lib/content/inference-engine-system-registry.test.ts @@ -70,7 +70,7 @@ describe("inference engine system registry", () => { ).toBe("/docs/concepts/prefill"); expect( items.find((item) => item.registryId === "concept.decode")?.href, - ).toBe("/docs/glossary/decode"); + ).toBe("/docs/concepts/decode"); expect( items.find((item) => item.registryId === "concept.prefill-decode-split") ?.href, diff --git a/src/lib/content/kv-cache-concept.test.ts b/src/lib/content/kv-cache-concept.test.ts index c8cf1f8c..11cdf7d6 100644 --- a/src/lib/content/kv-cache-concept.test.ts +++ b/src/lib/content/kv-cache-concept.test.ts @@ -60,7 +60,7 @@ describe("KV-cache concept page (kv-cache-concept-page-001)", () => { ).toBe("/docs/concepts/prefill"); expect( items.find((item) => item.registryId === "concept.decode")?.href, - ).toBe("/docs/glossary/decode"); + ).toBe("/docs/concepts/decode"); expect( items.find((item) => item.registryId === "concept.prefill-decode-split") ?.href, @@ -111,7 +111,7 @@ describe("KV-cache concept page (kv-cache-concept-page-001)", () => { expect(html).toContain("repeated work"); expect(html).toContain("live memory"); expect(html).toContain('href="/docs/glossary/prefill"'); - expect(html).toContain('href="/docs/glossary/decode"'); + expect(html).toContain('href="/docs/concepts/decode"'); expect(html).toContain('href="/docs/modules/attention"'); expect(html).toContain('href="/docs/modules/grouped-query-attention"'); expect(html).toContain('href="/tags/kv-cache"'); diff --git a/src/lib/content/kv-cache-glossary.test.ts b/src/lib/content/kv-cache-glossary.test.ts index 97b7ebb9..bcf30a4e 100644 --- a/src/lib/content/kv-cache-glossary.test.ts +++ b/src/lib/content/kv-cache-glossary.test.ts @@ -150,9 +150,9 @@ describe("Phase 5 KV cache glossary page (US-001)", () => { ); expectHtmlToContainProse(html, "cost real money to serve"); expect(html).toContain('href="/docs/concepts/prefill"'); - expect(html).toContain('href="/docs/glossary/decode"'); + expect(html).toContain('href="/docs/concepts/decode"'); expect(html).toContain('href="/docs/concepts/prefill"'); - expect(html).toContain('href="/docs/glossary/decode"'); + expect(html).toContain('href="/docs/concepts/decode"'); expect(html).toContain('href="/docs/glossary/autoregressive-generation"'); expect(html).toContain('href="/docs/modules/attention"'); expect(html).toContain('href="/docs/modules/multi-query-attention"'); diff --git a/src/lib/content/memory-system-page.test.ts b/src/lib/content/memory-system-page.test.ts index e8e08f63..2ed8ba05 100644 --- a/src/lib/content/memory-system-page.test.ts +++ b/src/lib/content/memory-system-page.test.ts @@ -193,7 +193,7 @@ describe("memory docs route render", () => { ); expect(html).toContain('href="/docs/concepts/prefill"'); - expect(html).toContain('href="/docs/glossary/decode"'); + expect(html).toContain('href="/docs/concepts/decode"'); expect(html).toContain('href="/docs/glossary/prefill-decode-split"'); expect(html).toContain('href="/docs/concepts/kv-cache"'); expect(html).toContain('href="/docs/systems/batching"'); diff --git a/src/lib/content/memory-system-registry.test.ts b/src/lib/content/memory-system-registry.test.ts index ca07d5ec..908f1d3a 100644 --- a/src/lib/content/memory-system-registry.test.ts +++ b/src/lib/content/memory-system-registry.test.ts @@ -69,7 +69,7 @@ describe("memory system registry", () => { ).toBe("/docs/concepts/prefill"); expect( items.find((item) => item.registryId === "concept.decode")?.href, - ).toBe("/docs/glossary/decode"); + ).toBe("/docs/concepts/decode"); expect( items.find((item) => item.registryId === "concept.prefill-decode-split") ?.href, diff --git a/src/lib/content/phase-4-japanese-attention-variant-proof-set.test.tsx b/src/lib/content/phase-4-japanese-attention-variant-proof-set.test.tsx index 09ed942c..c6f1f4df 100644 --- a/src/lib/content/phase-4-japanese-attention-variant-proof-set.test.tsx +++ b/src/lib/content/phase-4-japanese-attention-variant-proof-set.test.tsx @@ -74,7 +74,7 @@ const JAPANESE_MODULE_EXPECTATIONS: readonly JapaneseModuleExpectation[] = [ ], expectedCanonicalFallbackHrefs: [ "/docs/concepts/kv-cache", - "/docs/glossary/decode", + "/docs/concepts/decode", "/docs/glossary/prefill-decode-split", "/docs/concepts/quantization", ], @@ -106,7 +106,7 @@ const JAPANESE_MODULE_EXPECTATIONS: readonly JapaneseModuleExpectation[] = [ ], expectedCanonicalFallbackHrefs: [ "/docs/concepts/kv-cache", - "/docs/glossary/decode", + "/docs/concepts/decode", "/docs/glossary/prefill-decode-split", ], }, diff --git a/src/lib/content/phase-5-serving-path-search-locale.test.ts b/src/lib/content/phase-5-serving-path-search-locale.test.ts index 7c8d632b..3a14496d 100644 --- a/src/lib/content/phase-5-serving-path-search-locale.test.ts +++ b/src/lib/content/phase-5-serving-path-search-locale.test.ts @@ -24,8 +24,8 @@ const PHASE_5_SERVING_DOCS = [ query: "prompt processing", }, { - docsSlug: "glossary/decode", - url: "/docs/glossary/decode", + docsSlug: "concepts/decode", + url: "/docs/concepts/decode", query: "token-by-token generation", }, { diff --git a/src/lib/content/prefill-concept.test.ts b/src/lib/content/prefill-concept.test.ts index b8bef6b3..27b1bfdf 100644 --- a/src/lib/content/prefill-concept.test.ts +++ b/src/lib/content/prefill-concept.test.ts @@ -82,7 +82,7 @@ describe("prefill concept page (prefill-concept-page-001)", () => { items.some( (item) => item.registryId === "concept.decode" && - item.href === "/docs/glossary/decode", + item.href === "/docs/concepts/decode", ), ).toBe(true); expect( @@ -150,7 +150,7 @@ describe("prefill concept page (prefill-concept-page-001)", () => { expect(html).toContain("Why It Matters"); expect(html).toContain("Serving Path"); expect(html).toContain('href="/docs/concepts/kv-cache"'); - expect(html).toContain('href="/docs/glossary/decode"'); + expect(html).toContain('href="/docs/concepts/decode"'); expect(html).toContain('href="/docs/glossary/prefill-decode-split"'); expect(html).toContain('href="/docs/glossary/autoregressive-generation"'); expect(html).toContain('href="/docs/modules/attention"'); diff --git a/src/lib/content/prefill-decode-split-glossary.test.ts b/src/lib/content/prefill-decode-split-glossary.test.ts index 5403f9f8..587e1265 100644 --- a/src/lib/content/prefill-decode-split-glossary.test.ts +++ b/src/lib/content/prefill-decode-split-glossary.test.ts @@ -109,7 +109,7 @@ describe("Phase 5 prefill/decode split glossary page (US-004)", () => { items.some( (item) => item.registryId === "concept.decode" && - item.href === "/docs/glossary/decode", + item.href === "/docs/concepts/decode", ), ).toBe(true); expect( @@ -176,7 +176,7 @@ describe("Phase 5 prefill/decode split glossary page (US-004)", () => { expectHtmlToContainProse(html, "queueing overhead"); expect(html).toContain('href="/docs/concepts/kv-cache"'); expect(html).toContain('href="/docs/concepts/prefill"'); - expect(html).toContain('href="/docs/glossary/decode"'); + expect(html).toContain('href="/docs/concepts/decode"'); expect(html).toContain('href="/search?q=paged%20attention"'); expect(html).toContain('href="/search?q=chunked%20prefill"'); expect(html).toContain('href="/search?q=speculative%20decoding"'); @@ -238,7 +238,7 @@ describe("Phase 5 prefill/decode split glossary page (US-004)", () => { expect(kvCache).toContain('href="/docs/concepts/prefill"'); expect(prefill).toContain('href="/docs/concepts/prefill"'); expect(decode).toContain('href="/docs/glossary/prefill-decode-split"'); - expect(split).toContain('href="/docs/glossary/decode"'); + expect(split).toContain('href="/docs/concepts/decode"'); }); test("transformer, attention, autoregressive generation, MQA, GQA, and sliding-window attention expose entry points into the serving path", () => { diff --git a/src/lib/content/speculative-decoding-system-page.test.ts b/src/lib/content/speculative-decoding-system-page.test.ts index 8c2f193b..8616b28a 100644 --- a/src/lib/content/speculative-decoding-system-page.test.ts +++ b/src/lib/content/speculative-decoding-system-page.test.ts @@ -180,7 +180,7 @@ describe("speculative decoding docs route render", () => { ), ); - expect(html).toContain('href="/docs/glossary/decode"'); + expect(html).toContain('href="/docs/concepts/decode"'); expect(html).toContain('href="/docs/glossary/prefill-decode-split"'); expect(html).toContain('href="/docs/concepts/kv-cache"'); expect(html).toContain('href="/docs/systems/batching"'); From ebdde4aa1d17524c979e9e5936d5fec57ca7cbea Mon Sep 17 00:00:00 2001 From: aabdi Date: Mon, 22 Jun 2026 13:32:26 +0700 Subject: [PATCH 2/6] feat: [decode-concept-page-002] - [Publish the canonical decode concept page] --- src/content/docs/concepts/decode/messages/en.json | 2 +- src/lib/content/decode-concept.test.ts | 4 ++++ src/lib/content/glossary-architecture-index.test.ts | 4 +++- src/tests/content/architecture-index.test.ts | 2 ++ src/tests/content/attention-tag-landing.test.ts | 1 + 5 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/content/docs/concepts/decode/messages/en.json b/src/content/docs/concepts/decode/messages/en.json index eca79f62..ad2deff8 100644 --- a/src/content/docs/concepts/decode/messages/en.json +++ b/src/content/docs/concepts/decode/messages/en.json @@ -5,7 +5,7 @@ "sections": { "whatItIs": { "title": "What It Is", - "body": "Decode is the stage of autoregressive generation that starts after prefill has already read the prompt and built the first key-value cache state. Each turn computes the next token from the current prefix, appends that token, writes one more cache position, and then repeats until generation stops." + "body": "Decode is the stage of autoregressive generation that starts after prefill has already read the prompt and built the first key-value cache state. Each turn computes the next token from the current prefix, appends that token, writes one more cache position, and then repeats until generation stops. It repeats one position at a time because the saved cache already summarizes the earlier prompt, so the model only has to process the newest step instead of rereading the whole prompt on every turn." }, "whyItMatters": { "title": "Why It Matters", diff --git a/src/lib/content/decode-concept.test.ts b/src/lib/content/decode-concept.test.ts index 1ec58d3d..45c8e8d2 100644 --- a/src/lib/content/decode-concept.test.ts +++ b/src/lib/content/decode-concept.test.ts @@ -103,6 +103,9 @@ describe("decode concept page (decode-concept-page-001)", () => { expect(messages.sections?.whatItIs.body?.toLowerCase()).toContain( "autoregressive generation", ); + expect(messages.sections?.whatItIs.body?.toLowerCase()).toContain( + "instead of rereading the whole prompt", + ); expect(messages.sections?.whyItMatters.body?.toLowerCase()).toContain( "inter-token latency", ); @@ -134,6 +137,7 @@ describe("decode concept page (decode-concept-page-001)", () => { expect(html).toContain("What It Is"); expect(html).toContain("Why It Matters"); expect(html).toContain("Serving Path"); + expect(html).toContain("instead of rereading the whole prompt"); expect(html).toContain('href="/docs/concepts/prefill"'); expect(html).toContain('href="/docs/concepts/kv-cache"'); expect(html).toContain('href="/docs/glossary/prefill-decode-split"'); diff --git a/src/lib/content/glossary-architecture-index.test.ts b/src/lib/content/glossary-architecture-index.test.ts index 723a23d5..20c939b5 100644 --- a/src/lib/content/glossary-architecture-index.test.ts +++ b/src/lib/content/glossary-architecture-index.test.ts @@ -133,7 +133,7 @@ const EXPECTED_GLOSSARY_TITLES: Record< }; const PUBLISHED_GLOSSARY_ENTRY_COUNT = 58; -const PUBLISHED_ARCHITECTURE_ENTRY_COUNT = 50; +const PUBLISHED_ARCHITECTURE_ENTRY_COUNT = 51; const GLOSSARY_SEPARATOR_TITLES = [ "Model Taxonomy", @@ -145,6 +145,7 @@ const GLOSSARY_SEPARATOR_TITLES = [ const ARCHITECTURE_CONCEPT_URLS = [ "/docs/concepts/alibi", "/docs/concepts/context-extension", + "/docs/concepts/decode", "/docs/concepts/kv-cache", "/docs/concepts/page-spec-workflow-sample", "/docs/concepts/positional-encodings", @@ -271,6 +272,7 @@ describe("Phase 2 glossary and architecture index navigation (US-007)", () => { ["Architecture", "/docs/glossary/architecture"], ["Foundation Model", "/docs/glossary/foundation-model"], ["Key-value cache", "/docs/concepts/kv-cache"], + ["Decode", "/docs/concepts/decode"], ["Decode", "/docs/glossary/decode"], ["Prefill", "/docs/concepts/prefill"], ["Positional encodings", "/docs/concepts/positional-encodings"], diff --git a/src/tests/content/architecture-index.test.ts b/src/tests/content/architecture-index.test.ts index 95a2466f..08557530 100644 --- a/src/tests/content/architecture-index.test.ts +++ b/src/tests/content/architecture-index.test.ts @@ -23,6 +23,7 @@ describe("isArchitectureRelatedPage", () => { expect(urls).toEqual( expect.arrayContaining([ "/docs/concepts/context-extension", + "/docs/concepts/decode", "/docs/concepts/page-spec-workflow-sample", "/docs/concepts/positional-encodings", "/docs/concepts/prefill", @@ -123,6 +124,7 @@ describe("architecture index page render", () => { ["Architecture", "/docs/glossary/architecture"], ["Foundation Model", "/docs/glossary/foundation-model"], ["KV cache", "/docs/glossary/kv-cache"], + ["Decode", "/docs/concepts/decode"], ["Decode", "/docs/glossary/decode"], ["Prefill", "/docs/concepts/prefill"], ["Positional encodings", "/docs/concepts/positional-encodings"], diff --git a/src/tests/content/attention-tag-landing.test.ts b/src/tests/content/attention-tag-landing.test.ts index c0f73822..c0297774 100644 --- a/src/tests/content/attention-tag-landing.test.ts +++ b/src/tests/content/attention-tag-landing.test.ts @@ -76,6 +76,7 @@ describe("attention tag landing resources", () => { const conceptGroup = groups.find((group) => group.kind === "concept"); expect(conceptGroup?.resources.map((resource) => resource.url)).toEqual([ + "/docs/concepts/decode", "/docs/concepts/kv-cache", "/docs/concepts/prefill", ]); From 3de2233fa13d5a32437177aa4c1dada9ce73e0f4 Mon Sep 17 00:00:00 2001 From: aabdi Date: Mon, 22 Jun 2026 13:39:48 +0700 Subject: [PATCH 3/6] feat: [decode-concept-page-003] - [Route readers between decode, serving stages, and sampling] --- src/content/docs/concepts/decode/page.mdx | 1 + src/content/docs/glossary/decode/page.mdx | 2 ++ src/content/registry/concepts/decode.json | 3 ++- src/lib/content/decode-concept.test.ts | 6 ++++++ src/lib/content/decode-glossary.test.ts | 2 ++ 5 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/content/docs/concepts/decode/page.mdx b/src/content/docs/concepts/decode/page.mdx index 96df58f2..73f753cb 100644 --- a/src/content/docs/concepts/decode/page.mdx +++ b/src/content/docs/concepts/decode/page.mdx @@ -16,6 +16,7 @@ aliases: - "token-by-token generation" - "next-token step" - "inter-token generation" + - "inter-token latency" updatedAt: "2026-06-22" --- diff --git a/src/content/docs/glossary/decode/page.mdx b/src/content/docs/glossary/decode/page.mdx index 13e59967..761a72a1 100644 --- a/src/content/docs/glossary/decode/page.mdx +++ b/src/content/docs/glossary/decode/page.mdx @@ -15,6 +15,8 @@ aliases: - "decoding" - "token-by-token generation" - "next-token step" + - "inter-token generation" + - "inter-token latency" updatedAt: "2026-06-18" --- diff --git a/src/content/registry/concepts/decode.json b/src/content/registry/concepts/decode.json index bab01768..d54470d6 100644 --- a/src/content/registry/concepts/decode.json +++ b/src/content/registry/concepts/decode.json @@ -9,7 +9,8 @@ "decoding", "token-by-token generation", "next-token step", - "inter-token generation" + "inter-token generation", + "inter-token latency" ], "tags": ["foundations", "attention", "kv-cache"], "relatedIds": [ diff --git a/src/lib/content/decode-concept.test.ts b/src/lib/content/decode-concept.test.ts index 45c8e8d2..fa2c8e8b 100644 --- a/src/lib/content/decode-concept.test.ts +++ b/src/lib/content/decode-concept.test.ts @@ -34,6 +34,7 @@ describe("decode concept page (decode-concept-page-001)", () => { "token-by-token generation", "next-token step", "inter-token generation", + "inter-token latency", ]); expect(record?.tags).toEqual(["foundations", "attention", "kv-cache"]); expect(record?.relatedIds).toEqual( @@ -137,6 +138,9 @@ describe("decode concept page (decode-concept-page-001)", () => { expect(html).toContain("What It Is"); expect(html).toContain("Why It Matters"); expect(html).toContain("Serving Path"); + expect(html).toContain("Related Concepts And Systems"); + expect(html).toContain("Tags"); + expect(html).toContain("References"); expect(html).toContain("instead of rereading the whole prompt"); expect(html).toContain('href="/docs/concepts/prefill"'); expect(html).toContain('href="/docs/concepts/kv-cache"'); @@ -174,6 +178,7 @@ describe("decode concept page (decode-concept-page-001)", () => { "decoding", "token-by-token generation", "next-token step", + "inter-token latency", ]), ); }); @@ -184,6 +189,7 @@ describe("decode concept page (decode-concept-page-001)", () => { "decoding", "token-by-token generation", "next-token step", + "inter-token latency", ] as const) { const results = await docsSearchApi.search(query); expect(results.length).toBeGreaterThan(0); diff --git a/src/lib/content/decode-glossary.test.ts b/src/lib/content/decode-glossary.test.ts index 6af9138f..07d21cc8 100644 --- a/src/lib/content/decode-glossary.test.ts +++ b/src/lib/content/decode-glossary.test.ts @@ -37,6 +37,7 @@ describe("Phase 5 decode glossary page (US-003)", () => { "token-by-token generation", "next-token step", "inter-token generation", + "inter-token latency", ]); expect(record?.tags).toEqual(["foundations", "attention", "kv-cache"]); expect(record?.relatedIds).toEqual( @@ -182,6 +183,7 @@ describe("Phase 5 decode glossary page (US-003)", () => { "decoding", "token-by-token generation", "next-token step", + "inter-token latency", ]), ); expect(document?.tags).toEqual( From 6a7abe0818d1a2e968b51dd8911a9af167d91885 Mon Sep 17 00:00:00 2001 From: aabdi Date: Mon, 22 Jun 2026 13:44:10 +0700 Subject: [PATCH 4/6] feat: [decode-concept-page-004] - [Add focused validation for the decode concept-page slice] --- src/lib/content/decode-concept.test.ts | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/src/lib/content/decode-concept.test.ts b/src/lib/content/decode-concept.test.ts index fa2c8e8b..cc3d3691 100644 --- a/src/lib/content/decode-concept.test.ts +++ b/src/lib/content/decode-concept.test.ts @@ -6,6 +6,7 @@ import { renderToStaticMarkup } from "react-dom/server"; import { ModulePageProviders } from "@/features/docs/components/ModulePageProviders"; import { loadConceptPage } from "@/lib/content/concept-page"; import { DECODE_CONCEPT_PAGE_DIR } from "@/lib/content/content-paths"; +import { localDocsRoute } from "@/lib/content/local-docs-page"; import { loadPublishedDocsPages } from "@/lib/content/pages"; import { PUBLISHED_CONCEPT_SECTION_REGISTRY_IDS, @@ -20,11 +21,32 @@ import { deriveCuratedRelatedItems } from "@/lib/content/related-docs"; import { pageMessagesSchema } from "@/lib/content/schemas"; import { buildSearchDocuments } from "@/lib/search/build-documents"; import { docsSearchApi } from "@/lib/search/search-server"; +import { validateColocatedPageBundle } from "./validate-registry"; const pageDir = DECODE_CONCEPT_PAGE_DIR; const messagesPath = join(pageDir, "messages/en.json"); +const DECODE_ROUTE = "/docs/concepts/decode"; + +describe("decode concept page focused validation (decode-concept-page-004)", () => { + test("canonical route, registry record, and default English page bundle resolve together", async () => { + const route = localDocsRoute({ + section: "concepts", + slug: "decode", + }); + const registry = await loadRegistry(); + const bundle = await validateColocatedPageBundle(pageDir, registry); + const record = registry.byId.get("concept.decode"); + + expect(route).toBe(DECODE_ROUTE); + expect(bundle.errors).toEqual([]); + expect(bundle.messages?.title).toBe("Decode"); + expect(bundle.messages?.openingSummary?.length).toBeGreaterThan(0); + expect(bundle.assets).toBeDefined(); + expect(record?.kind).toBe("concept"); + expect(record?.slug).toBe("decode"); + expect(record?.status).toBe("published"); + }); -describe("decode concept page (decode-concept-page-001)", () => { test("registry record stays published and points curated discovery at the broad concept surface", () => { const record = getConceptById("concept.decode"); expect(record?.status).toBe("published"); @@ -193,7 +215,7 @@ describe("decode concept page (decode-concept-page-001)", () => { ] as const) { const results = await docsSearchApi.search(query); expect(results.length).toBeGreaterThan(0); - expect(results[0]?.url).toBe("/docs/concepts/decode"); + expect(results[0]?.url).toBe(DECODE_ROUTE); } }); }); From a946044547118ea7ed78e43f1983413c89d3eda9 Mon Sep 17 00:00:00 2001 From: aabdi Date: Mon, 22 Jun 2026 14:00:35 +0700 Subject: [PATCH 5/6] feat: [decode-concept-page-001] - [Promote `decode` to the canonical concept discovery surface] --- src/lib/content/architecture.ts | 48 ++++++++++++++++++- .../glossary-architecture-index.test.ts | 5 +- src/tests/content/architecture-index.test.ts | 12 ++++- 3 files changed, 59 insertions(+), 6 deletions(-) diff --git a/src/lib/content/architecture.ts b/src/lib/content/architecture.ts index 60c737a4..fa6a92df 100644 --- a/src/lib/content/architecture.ts +++ b/src/lib/content/architecture.ts @@ -11,6 +11,49 @@ export type ArchitectureEntry = { slug: string; }; +function preferArchitecturePageCandidate( + current: DocsPageSource, + candidate: DocsPageSource, +): DocsPageSource { + if ( + current.frontmatter.kind !== "concept" && + candidate.frontmatter.kind === "concept" + ) { + return candidate; + } + + return current; +} + +function dedupeArchitecturePagesByRegistryId( + pages: DocsPageSource[], +): DocsPageSource[] { + const dedupedPages: DocsPageSource[] = []; + const pagesByRegistryId = new Map(); + + for (const page of pages) { + const { registryId } = page.frontmatter; + if (!registryId) { + dedupedPages.push(page); + continue; + } + + const existingIndex = pagesByRegistryId.get(registryId); + if (existingIndex === undefined) { + pagesByRegistryId.set(registryId, dedupedPages.length); + dedupedPages.push(page); + continue; + } + + dedupedPages[existingIndex] = preferArchitecturePageCandidate( + dedupedPages[existingIndex], + page, + ); + } + + return dedupedPages; +} + function isConceptRecord( record: ReturnType, ): record is ConceptRecord { @@ -112,5 +155,8 @@ export async function loadPublishedArchitectureEntries( const pages = (await loadShippedLocalizedDocsPages(locale)).filter((page) => isArchitectureRelatedPage(page, indexes), ); - return sortArchitectureEntriesByTitle(pages.map(toArchitectureEntry), locale); + return sortArchitectureEntriesByTitle( + dedupeArchitecturePagesByRegistryId(pages).map(toArchitectureEntry), + locale, + ); } diff --git a/src/lib/content/glossary-architecture-index.test.ts b/src/lib/content/glossary-architecture-index.test.ts index 20c939b5..3103e94e 100644 --- a/src/lib/content/glossary-architecture-index.test.ts +++ b/src/lib/content/glossary-architecture-index.test.ts @@ -133,7 +133,7 @@ const EXPECTED_GLOSSARY_TITLES: Record< }; const PUBLISHED_GLOSSARY_ENTRY_COUNT = 58; -const PUBLISHED_ARCHITECTURE_ENTRY_COUNT = 51; +const PUBLISHED_ARCHITECTURE_ENTRY_COUNT = 49; const GLOSSARY_SEPARATOR_TITLES = [ "Model Taxonomy", @@ -233,7 +233,6 @@ describe("Phase 2 glossary and architecture index navigation (US-007)", () => { for (const url of [ "/docs/glossary/architecture", - "/docs/glossary/kv-cache", "/docs/glossary/normalization", "/docs/glossary/residual-connection", "/docs/glossary/special-tokens", @@ -273,7 +272,6 @@ describe("Phase 2 glossary and architecture index navigation (US-007)", () => { ["Foundation Model", "/docs/glossary/foundation-model"], ["Key-value cache", "/docs/concepts/kv-cache"], ["Decode", "/docs/concepts/decode"], - ["Decode", "/docs/glossary/decode"], ["Prefill", "/docs/concepts/prefill"], ["Positional encodings", "/docs/concepts/positional-encodings"], ["Token", "/docs/glossary/token"], @@ -282,5 +280,6 @@ describe("Phase 2 glossary and architecture index navigation (US-007)", () => { expect(architectureHtml).toContain(title); expect(architectureHtml).toContain(`href="${href}"`); } + expect(architectureHtml).not.toContain('href="/docs/glossary/decode"'); }); }); diff --git a/src/tests/content/architecture-index.test.ts b/src/tests/content/architecture-index.test.ts index 08557530..95188790 100644 --- a/src/tests/content/architecture-index.test.ts +++ b/src/tests/content/architecture-index.test.ts @@ -69,6 +69,14 @@ describe("loadPublishedArchitectureEntries", () => { expect(token?.slug).toBe("glossary/token"); }); + it("prefers the concept route when glossary and concept pages share one registry record", async () => { + const entries = await loadPublishedArchitectureEntries("en"); + const decodeEntries = entries.filter((entry) => entry.title === "Decode"); + + expect(decodeEntries).toHaveLength(1); + expect(decodeEntries[0]?.url).toBe("/docs/concepts/decode"); + }); + it("returns published architecture pages sorted alphabetically by title", async () => { const entries = await loadPublishedArchitectureEntries("en"); for (let index = 1; index < entries.length; index += 1) { @@ -123,9 +131,8 @@ describe("architecture index page render", () => { for (const [title, href] of [ ["Architecture", "/docs/glossary/architecture"], ["Foundation Model", "/docs/glossary/foundation-model"], - ["KV cache", "/docs/glossary/kv-cache"], + ["Key-value cache", "/docs/concepts/kv-cache"], ["Decode", "/docs/concepts/decode"], - ["Decode", "/docs/glossary/decode"], ["Prefill", "/docs/concepts/prefill"], ["Positional encodings", "/docs/concepts/positional-encodings"], ["Token", "/docs/glossary/token"], @@ -138,6 +145,7 @@ describe("architecture index page render", () => { expect(html).not.toContain("No architecture entries yet"); expect(html).toContain("list-none"); expect(html).not.toContain("list-disc"); + expect(html).not.toContain('href="/docs/glossary/decode"'); }); it("renders localized vietnamese architecture entries when shipped page-local messages exist", async () => { From 84797d867557b83e9074e0ca49a6898d4bd06a38 Mon Sep 17 00:00:00 2001 From: aabdi Date: Mon, 22 Jun 2026 14:11:30 +0700 Subject: [PATCH 6/6] feat: [decode-concept-page-001] - [Promote `decode` to the canonical concept discovery surface] --- src/lib/content/content-reconciliation-browse-index.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/content/content-reconciliation-browse-index.test.ts b/src/lib/content/content-reconciliation-browse-index.test.ts index d9fae1cc..46ced702 100644 --- a/src/lib/content/content-reconciliation-browse-index.test.ts +++ b/src/lib/content/content-reconciliation-browse-index.test.ts @@ -21,7 +21,7 @@ describe("Phase 2/3 reconciliation browse indexes (US-004)", () => { for (const url of [ "/docs/concepts/transformer-architecture", "/docs/concepts/positional-encodings", - "/docs/glossary/kv-cache", + "/docs/concepts/kv-cache", "/docs/glossary/token", ]) { expect(architectureHtml).toContain(`href="${url}"`);