diff --git a/src/content/docs/concepts/decode/assets.json b/src/content/docs/concepts/decode/assets.json
new file mode 100644
index 00000000..0967ef42
--- /dev/null
+++ b/src/content/docs/concepts/decode/assets.json
@@ -0,0 +1 @@
+{}
diff --git a/src/content/docs/concepts/decode/messages/en.json b/src/content/docs/concepts/decode/messages/en.json
new file mode 100644
index 00000000..ad2deff8
--- /dev/null
+++ b/src/content/docs/concepts/decode/messages/en.json
@@ -0,0 +1,47 @@
+{
+ "title": "Decode",
+ "description": "The repeated next-token stage that reuses saved prefix state, extends the KV cache one position at a time, and makes inter-token latency the main reader-visible cost after prefill.",
+ "openingSummary": "After prefill has processed the prompt, decode is the repeated next-token loop that keeps reusing saved key-value state, adds one new position at a time, and usually turns inter-token latency plus cache growth into the dominant feel of a long response.",
+ "sections": {
+ "whatItIs": {
+ "title": "What It Is",
+ "body": "Decode is the stage of autoregressive generation that starts after prefill has already read the prompt and built the first key-value cache state. Each turn computes the next token from the current prefix, appends that token, writes one more cache position, and then repeats until generation stops. It repeats one position at a time because the saved cache already summarizes the earlier prompt, so the model only has to process the newest step instead of rereading the whole prompt on every turn."
+ },
+ "whyItMatters": {
+ "title": "Why It Matters",
+ "body": "Decode often controls the feel of a streamed answer because readers experience it as one token arriving after another. The model no longer has to reread the whole prompt from scratch, but each step still depends on moving through a growing cache and running another forward pass for the newest position. That is why inter-token latency, cache growth, memory bandwidth, and serving density show up so often in decode discussions."
+ },
+ "simpleExample": {
+ "title": "Simple Example",
+ "body": "Suppose a prompt has already been processed and the model is about to generate a 50-token reply. Decode produces token 1, extends the prefix, updates the cache for that new position, then repeats for token 2, token 3, and the rest of the answer. Each turn is smaller than the prompt pass, but dozens of repeated turns can dominate the total user-visible wait."
+ },
+ "commonConfusions": {
+ "title": "Common Confusions",
+ "body": "Decode is not the same as the decoder module. The decoder is the model stack; decode is the runtime loop that calls that stack again and again during generation. Decode is also not the same as prefill: prefill reads the existing prompt once, while decode handles one newly generated token at a time. Finally, decode is not the same as sampling. Decode produces the next-step hidden state and logits, while sampling decides how one token is chosen from those scores."
+ },
+ "servingPath": {
+ "title": "Serving Path",
+ "body": "Use the nearby pages below to trace what decode depends on, what it changes in the serving stack, and where to go next if you want either the systems view or the token-choice view."
+ },
+ "related": {
+ "title": "Related Concepts And Systems"
+ },
+ "tags": {
+ "title": "Tags"
+ },
+ "references": {
+ "title": "References"
+ }
+ },
+ "links": {
+ "prefill": "Prefill",
+ "prefillDecodeSplit": "Prefill/decode split",
+ "kvCache": "KV cache",
+ "autoregressiveGeneration": "Autoregressive generation",
+ "batching": "Batching",
+ "continuousBatching": "Continuous batching",
+ "memory": "Memory",
+ "speculativeDecoding": "Speculative decoding",
+ "samplingOverview": "Sampling overview"
+ }
+}
diff --git a/src/content/docs/concepts/decode/page.mdx b/src/content/docs/concepts/decode/page.mdx
new file mode 100644
index 00000000..73f753cb
--- /dev/null
+++ b/src/content/docs/concepts/decode/page.mdx
@@ -0,0 +1,95 @@
+---
+title: Decode
+description: The repeated next-token stage that reuses saved prefix state, extends the KV cache one position at a time, and makes inter-token latency the main reader-visible cost after prefill.
+kind: "concept"
+registryId: "concept.decode"
+messageNamespace: "local"
+assetNamespace: "local"
+status: "published"
+tags:
+ - foundations
+ - attention
+ - kv-cache
+aliases:
+ - "Decode"
+ - "decoding"
+ - "token-by-token generation"
+ - "next-token step"
+ - "inter-token generation"
+ - "inter-token latency"
+updatedAt: "2026-06-22"
+---
+
+import { CitationList } from "@/features/docs/components/CitationList";
+import { DerivedRelatedDocs } from "@/features/docs/components/DerivedRelatedDocs";
+import { LocalizedLinkList } from "@/features/docs/components/LocalizedLinkList";
+import { RelatedDocs } from "@/features/docs/components/RelatedDocs";
+import { Section } from "@/features/docs/components/Section";
+import { T } from "@/features/docs/components/T";
+import { TagPillList } from "@/features/docs/components/TagPillList";
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/content/docs/concepts/kv-cache/page.mdx b/src/content/docs/concepts/kv-cache/page.mdx
index 10f1f438..6a0e77a3 100644
--- a/src/content/docs/concepts/kv-cache/page.mdx
+++ b/src/content/docs/concepts/kv-cache/page.mdx
@@ -41,7 +41,7 @@ import { TagPillList } from "@/features/docs/components/TagPillList";
diff --git a/src/content/docs/glossary/prefill-decode-split/page.mdx b/src/content/docs/glossary/prefill-decode-split/page.mdx
index 4b06b0a6..25089afc 100644
--- a/src/content/docs/glossary/prefill-decode-split/page.mdx
+++ b/src/content/docs/glossary/prefill-decode-split/page.mdx
@@ -46,7 +46,7 @@ import { TagPillList } from "@/features/docs/components/TagPillList";
items={[
{ href: "/docs/concepts/kv-cache", labelKey: "links.kvCache" },
{ href: "/docs/concepts/prefill", labelKey: "links.prefill" },
- { href: "/docs/glossary/decode", labelKey: "links.decode" },
+ { href: "/docs/concepts/decode", labelKey: "links.decode" },
{
href: "/search?q=paged%20attention",
labelKey: "links.pagedAttention",
diff --git a/src/content/docs/glossary/prefill/page.mdx b/src/content/docs/glossary/prefill/page.mdx
index 45d022b5..49de8b68 100644
--- a/src/content/docs/glossary/prefill/page.mdx
+++ b/src/content/docs/glossary/prefill/page.mdx
@@ -46,7 +46,7 @@ import { TagPillList } from "@/features/docs/components/TagPillList";
();
+
+ for (const page of pages) {
+ const { registryId } = page.frontmatter;
+ if (!registryId) {
+ dedupedPages.push(page);
+ continue;
+ }
+
+ const existingIndex = pagesByRegistryId.get(registryId);
+ if (existingIndex === undefined) {
+ pagesByRegistryId.set(registryId, dedupedPages.length);
+ dedupedPages.push(page);
+ continue;
+ }
+
+ dedupedPages[existingIndex] = preferArchitecturePageCandidate(
+ dedupedPages[existingIndex],
+ page,
+ );
+ }
+
+ return dedupedPages;
+}
+
function isConceptRecord(
record: ReturnType,
): record is ConceptRecord {
@@ -112,5 +155,8 @@ export async function loadPublishedArchitectureEntries(
const pages = (await loadShippedLocalizedDocsPages(locale)).filter((page) =>
isArchitectureRelatedPage(page, indexes),
);
- return sortArchitectureEntriesByTitle(pages.map(toArchitectureEntry), locale);
+ return sortArchitectureEntriesByTitle(
+ dedupeArchitecturePagesByRegistryId(pages).map(toArchitectureEntry),
+ locale,
+ );
}
diff --git a/src/lib/content/batching-system-page.test.ts b/src/lib/content/batching-system-page.test.ts
index 59e4048a..25299ae4 100644
--- a/src/lib/content/batching-system-page.test.ts
+++ b/src/lib/content/batching-system-page.test.ts
@@ -165,7 +165,7 @@ describe("batching docs route render", () => {
expect(html).toContain("Legend:");
expect(html).toContain("Queue requests");
expect(html).toContain('href="/docs/concepts/prefill"');
- expect(html).toContain('href="/docs/glossary/decode"');
+ expect(html).toContain('href="/docs/concepts/decode"');
expect(html).toContain('href="/docs/glossary/prefill-decode-split"');
expect(html).toContain('href="/docs/concepts/kv-cache"');
expect(html).toContain('href="/docs/systems/continuous-batching"');
diff --git a/src/lib/content/content-paths.ts b/src/lib/content/content-paths.ts
index 7d07b767..dceec038 100644
--- a/src/lib/content/content-paths.ts
+++ b/src/lib/content/content-paths.ts
@@ -293,6 +293,9 @@ export const HIDDEN_SIZE_GLOSSARY_PAGE_DIR = join(
/** Prefill concept page directory. */
export const PREFILL_CONCEPT_PAGE_DIR = join(CONCEPTS_DOCS_ROOT, "prefill");
+/** Decode concept page directory. */
+export const DECODE_CONCEPT_PAGE_DIR = join(CONCEPTS_DOCS_ROOT, "decode");
+
/** Vocabulary size glossary page directory. */
export const VOCABULARY_SIZE_GLOSSARY_PAGE_DIR = join(
GLOSSARY_DOCS_ROOT,
diff --git a/src/lib/content/content-reconciliation-attention-tag.test.ts b/src/lib/content/content-reconciliation-attention-tag.test.ts
index ee3ced15..8030bddf 100644
--- a/src/lib/content/content-reconciliation-attention-tag.test.ts
+++ b/src/lib/content/content-reconciliation-attention-tag.test.ts
@@ -97,6 +97,7 @@ describe("Phase 2/3 reconciliation attention tag landing (US-007)", () => {
const conceptGroup = groups.find((group) => group.kind === "concept");
expect(conceptGroup?.kindLabel).toBe("Concept");
expect(conceptGroup?.resources.map((resource) => resource.url)).toEqual([
+ "/docs/concepts/decode",
"/docs/concepts/kv-cache",
"/docs/concepts/prefill",
]);
@@ -164,7 +165,7 @@ describe("Phase 2/3 reconciliation attention tag page render (US-007)", () => {
expect(html).toContain("Linear Attention");
expect(html).toContain('href="/docs/concepts/kv-cache"');
expect(html).toContain('href="/docs/glossary/autoregressive-generation"');
- expect(html).toContain('href="/docs/glossary/decode"');
+ expect(html).toContain('href="/docs/concepts/decode"');
expect(html).toContain('href="/docs/glossary/kv-cache"');
expect(html).toContain('href="/docs/concepts/prefill"');
expect(html).toContain('href="/docs/glossary/token"');
diff --git a/src/lib/content/content-reconciliation-browse-index.test.ts b/src/lib/content/content-reconciliation-browse-index.test.ts
index d9fae1cc..46ced702 100644
--- a/src/lib/content/content-reconciliation-browse-index.test.ts
+++ b/src/lib/content/content-reconciliation-browse-index.test.ts
@@ -21,7 +21,7 @@ describe("Phase 2/3 reconciliation browse indexes (US-004)", () => {
for (const url of [
"/docs/concepts/transformer-architecture",
"/docs/concepts/positional-encodings",
- "/docs/glossary/kv-cache",
+ "/docs/concepts/kv-cache",
"/docs/glossary/token",
]) {
expect(architectureHtml).toContain(`href="${url}"`);
diff --git a/src/lib/content/continuous-batching-system-page.test.ts b/src/lib/content/continuous-batching-system-page.test.ts
index a6bc5a29..ee630862 100644
--- a/src/lib/content/continuous-batching-system-page.test.ts
+++ b/src/lib/content/continuous-batching-system-page.test.ts
@@ -171,7 +171,7 @@ describe("continuous batching docs route render", () => {
),
);
- expect(html).toContain('href="/docs/glossary/decode"');
+ expect(html).toContain('href="/docs/concepts/decode"');
expect(html).toContain('href="/docs/glossary/prefill-decode-split"');
expect(html).toContain('href="/docs/concepts/kv-cache"');
expect(html).toContain('href="/docs/systems/batching"');
diff --git a/src/lib/content/decode-concept.test.ts b/src/lib/content/decode-concept.test.ts
new file mode 100644
index 00000000..cc3d3691
--- /dev/null
+++ b/src/lib/content/decode-concept.test.ts
@@ -0,0 +1,221 @@
+import { describe, expect, test } from "bun:test";
+import { readFileSync } from "node:fs";
+import { join } from "node:path";
+import { createElement } from "react";
+import { renderToStaticMarkup } from "react-dom/server";
+import { ModulePageProviders } from "@/features/docs/components/ModulePageProviders";
+import { loadConceptPage } from "@/lib/content/concept-page";
+import { DECODE_CONCEPT_PAGE_DIR } from "@/lib/content/content-paths";
+import { localDocsRoute } from "@/lib/content/local-docs-page";
+import { loadPublishedDocsPages } from "@/lib/content/pages";
+import {
+ PUBLISHED_CONCEPT_SECTION_REGISTRY_IDS,
+ PUBLISHED_DOCS_REGISTRY_IDS,
+} from "@/lib/content/published-docs-registry-ids";
+import { loadRegistry } from "@/lib/content/registry";
+import {
+ getConceptById,
+ listRelatedRegistryRecords,
+} from "@/lib/content/registry-runtime";
+import { deriveCuratedRelatedItems } from "@/lib/content/related-docs";
+import { pageMessagesSchema } from "@/lib/content/schemas";
+import { buildSearchDocuments } from "@/lib/search/build-documents";
+import { docsSearchApi } from "@/lib/search/search-server";
+import { validateColocatedPageBundle } from "./validate-registry";
+
+const pageDir = DECODE_CONCEPT_PAGE_DIR;
+const messagesPath = join(pageDir, "messages/en.json");
+const DECODE_ROUTE = "/docs/concepts/decode";
+
+describe("decode concept page focused validation (decode-concept-page-004)", () => {
+ test("canonical route, registry record, and default English page bundle resolve together", async () => {
+ const route = localDocsRoute({
+ section: "concepts",
+ slug: "decode",
+ });
+ const registry = await loadRegistry();
+ const bundle = await validateColocatedPageBundle(pageDir, registry);
+ const record = registry.byId.get("concept.decode");
+
+ expect(route).toBe(DECODE_ROUTE);
+ expect(bundle.errors).toEqual([]);
+ expect(bundle.messages?.title).toBe("Decode");
+ expect(bundle.messages?.openingSummary?.length).toBeGreaterThan(0);
+ expect(bundle.assets).toBeDefined();
+ expect(record?.kind).toBe("concept");
+ expect(record?.slug).toBe("decode");
+ expect(record?.status).toBe("published");
+ });
+
+ test("registry record stays published and points curated discovery at the broad concept surface", () => {
+ const record = getConceptById("concept.decode");
+ expect(record?.status).toBe("published");
+ expect(record?.aliases).toEqual([
+ "Decode",
+ "decoding",
+ "token-by-token generation",
+ "next-token step",
+ "inter-token generation",
+ "inter-token latency",
+ ]);
+ expect(record?.tags).toEqual(["foundations", "attention", "kv-cache"]);
+ expect(record?.relatedIds).toEqual(
+ expect.arrayContaining([
+ "concept.prefill",
+ "concept.kv-cache",
+ "concept.prefill-decode-split",
+ "system.batching",
+ "system.continuous-batching",
+ "system.memory",
+ "system.speculative-decoding",
+ "concept.autoregressive-generation",
+ "concept.sampling-overview",
+ ]),
+ );
+ expect(PUBLISHED_DOCS_REGISTRY_IDS.has("concept.decode")).toBe(true);
+ expect(PUBLISHED_CONCEPT_SECTION_REGISTRY_IDS.has("concept.decode")).toBe(
+ true,
+ );
+ });
+
+ test("curated related links resolve decode neighbors through their published routes", () => {
+ const source = getConceptById("concept.decode");
+ if (!source) {
+ throw new Error("expected concept.decode in registry");
+ }
+
+ const items = deriveCuratedRelatedItems(
+ source,
+ listRelatedRegistryRecords(),
+ PUBLISHED_DOCS_REGISTRY_IDS,
+ );
+
+ expect(
+ items.find((item) => item.registryId === "concept.prefill")?.href,
+ ).toBe("/docs/concepts/prefill");
+ expect(
+ items.find((item) => item.registryId === "concept.kv-cache")?.href,
+ ).toBe("/docs/concepts/kv-cache");
+ expect(
+ items.find((item) => item.registryId === "concept.sampling-overview")
+ ?.href,
+ ).toBe("/docs/glossary/sampling-overview");
+ expect(
+ items.find((item) => item.registryId === "system.batching")?.href,
+ ).toBe("/docs/systems/batching");
+ expect(
+ items.find((item) => item.registryId === "system.continuous-batching")
+ ?.href,
+ ).toBe("/docs/systems/continuous-batching");
+ expect(
+ items.find((item) => item.registryId === "system.memory")?.href,
+ ).toBe("/docs/systems/memory");
+ expect(
+ items.find((item) => item.registryId === "system.speculative-decoding")
+ ?.href,
+ ).toBe("/docs/systems/speculative-decoding");
+ });
+
+ test("messages explain decode as a repeated stage distinct from prefill and sampling", () => {
+ const messages = pageMessagesSchema.parse(
+ JSON.parse(readFileSync(messagesPath, "utf8")),
+ );
+
+ expect(messages.title).toBe("Decode");
+ expect(messages.openingSummary?.length).toBeGreaterThan(0);
+ expect(messages.sections?.whatItIs.body?.toLowerCase()).toContain(
+ "autoregressive generation",
+ );
+ expect(messages.sections?.whatItIs.body?.toLowerCase()).toContain(
+ "instead of rereading the whole prompt",
+ );
+ expect(messages.sections?.whyItMatters.body?.toLowerCase()).toContain(
+ "inter-token latency",
+ );
+ expect(messages.sections?.whyItMatters.body?.toLowerCase()).toContain(
+ "memory bandwidth",
+ );
+ expect(messages.sections?.commonConfusions.body?.toLowerCase()).toContain(
+ "sampling",
+ );
+ });
+
+ test("page renders the canonical concept route with serving and discovery links", async () => {
+ const page = await loadConceptPage("decode");
+
+ expect(page.frontmatter.kind).toBe("concept");
+ expect(page.frontmatter.status).toBe("published");
+ expect(page.frontmatter.registryId).toBe("concept.decode");
+ expect(page.messages.openingSummary?.length).toBeGreaterThan(0);
+
+ const html = renderToStaticMarkup(
+ createElement(ModulePageProviders, {
+ messages: page.messages,
+ assets: page.assets,
+ // biome-ignore lint/correctness/noChildrenProp: third createElement arg conflicts with strict props typing
+ children: page.content,
+ }),
+ );
+
+ expect(html).toContain("What It Is");
+ expect(html).toContain("Why It Matters");
+ expect(html).toContain("Serving Path");
+ expect(html).toContain("Related Concepts And Systems");
+ expect(html).toContain("Tags");
+ expect(html).toContain("References");
+ expect(html).toContain("instead of rereading the whole prompt");
+ expect(html).toContain('href="/docs/concepts/prefill"');
+ expect(html).toContain('href="/docs/concepts/kv-cache"');
+ expect(html).toContain('href="/docs/glossary/prefill-decode-split"');
+ expect(html).toContain('href="/docs/systems/batching"');
+ expect(html).toContain('href="/docs/systems/continuous-batching"');
+ expect(html).toContain('href="/docs/systems/memory"');
+ expect(html).toContain('href="/docs/systems/speculative-decoding"');
+ expect(html).toContain('href="/docs/glossary/sampling-overview"');
+ expect(html).toContain('data-testid="derived-related-docs"');
+ expect(html).toContain('data-testid="curated-related-docs"');
+ expect(html).not.toContain("Reader Shortcut");
+ });
+
+ test("published pages and search documents prefer the concept route for decode discovery", async () => {
+ const registry = await loadRegistry();
+ const pages = await loadPublishedDocsPages("en");
+ const documents = buildSearchDocuments(pages, registry);
+
+ expect(pages.some((page) => page.docsSlug === "concepts/decode")).toBe(
+ true,
+ );
+ expect(pages.some((page) => page.docsSlug === "glossary/decode")).toBe(
+ true,
+ );
+
+ const conceptDocument = documents.find(
+ (entry) => entry.url === "/docs/concepts/decode",
+ );
+ expect(conceptDocument?.kind).toBe("concept");
+ expect(conceptDocument?.facets.kind).toBe("concept");
+ expect(conceptDocument?.aliases).toEqual(
+ expect.arrayContaining([
+ "Decode",
+ "decoding",
+ "token-by-token generation",
+ "next-token step",
+ "inter-token latency",
+ ]),
+ );
+ });
+
+ test("search ranks the concept route first for representative decode queries", async () => {
+ for (const query of [
+ "decode",
+ "decoding",
+ "token-by-token generation",
+ "next-token step",
+ "inter-token latency",
+ ] as const) {
+ const results = await docsSearchApi.search(query);
+ expect(results.length).toBeGreaterThan(0);
+ expect(results[0]?.url).toBe(DECODE_ROUTE);
+ }
+ });
+});
diff --git a/src/lib/content/decode-glossary.test.ts b/src/lib/content/decode-glossary.test.ts
index 6af9138f..07d21cc8 100644
--- a/src/lib/content/decode-glossary.test.ts
+++ b/src/lib/content/decode-glossary.test.ts
@@ -37,6 +37,7 @@ describe("Phase 5 decode glossary page (US-003)", () => {
"token-by-token generation",
"next-token step",
"inter-token generation",
+ "inter-token latency",
]);
expect(record?.tags).toEqual(["foundations", "attention", "kv-cache"]);
expect(record?.relatedIds).toEqual(
@@ -182,6 +183,7 @@ describe("Phase 5 decode glossary page (US-003)", () => {
"decoding",
"token-by-token generation",
"next-token step",
+ "inter-token latency",
]),
);
expect(document?.tags).toEqual(
diff --git a/src/lib/content/glossary-architecture-index.test.ts b/src/lib/content/glossary-architecture-index.test.ts
index 723a23d5..3103e94e 100644
--- a/src/lib/content/glossary-architecture-index.test.ts
+++ b/src/lib/content/glossary-architecture-index.test.ts
@@ -133,7 +133,7 @@ const EXPECTED_GLOSSARY_TITLES: Record<
};
const PUBLISHED_GLOSSARY_ENTRY_COUNT = 58;
-const PUBLISHED_ARCHITECTURE_ENTRY_COUNT = 50;
+const PUBLISHED_ARCHITECTURE_ENTRY_COUNT = 49;
const GLOSSARY_SEPARATOR_TITLES = [
"Model Taxonomy",
@@ -145,6 +145,7 @@ const GLOSSARY_SEPARATOR_TITLES = [
const ARCHITECTURE_CONCEPT_URLS = [
"/docs/concepts/alibi",
"/docs/concepts/context-extension",
+ "/docs/concepts/decode",
"/docs/concepts/kv-cache",
"/docs/concepts/page-spec-workflow-sample",
"/docs/concepts/positional-encodings",
@@ -232,7 +233,6 @@ describe("Phase 2 glossary and architecture index navigation (US-007)", () => {
for (const url of [
"/docs/glossary/architecture",
- "/docs/glossary/kv-cache",
"/docs/glossary/normalization",
"/docs/glossary/residual-connection",
"/docs/glossary/special-tokens",
@@ -271,7 +271,7 @@ describe("Phase 2 glossary and architecture index navigation (US-007)", () => {
["Architecture", "/docs/glossary/architecture"],
["Foundation Model", "/docs/glossary/foundation-model"],
["Key-value cache", "/docs/concepts/kv-cache"],
- ["Decode", "/docs/glossary/decode"],
+ ["Decode", "/docs/concepts/decode"],
["Prefill", "/docs/concepts/prefill"],
["Positional encodings", "/docs/concepts/positional-encodings"],
["Token", "/docs/glossary/token"],
@@ -280,5 +280,6 @@ describe("Phase 2 glossary and architecture index navigation (US-007)", () => {
expect(architectureHtml).toContain(title);
expect(architectureHtml).toContain(`href="${href}"`);
}
+ expect(architectureHtml).not.toContain('href="/docs/glossary/decode"');
});
});
diff --git a/src/lib/content/inference-engine-system-registry.test.ts b/src/lib/content/inference-engine-system-registry.test.ts
index 063232f6..4d49d25d 100644
--- a/src/lib/content/inference-engine-system-registry.test.ts
+++ b/src/lib/content/inference-engine-system-registry.test.ts
@@ -70,7 +70,7 @@ describe("inference engine system registry", () => {
).toBe("/docs/concepts/prefill");
expect(
items.find((item) => item.registryId === "concept.decode")?.href,
- ).toBe("/docs/glossary/decode");
+ ).toBe("/docs/concepts/decode");
expect(
items.find((item) => item.registryId === "concept.prefill-decode-split")
?.href,
diff --git a/src/lib/content/kv-cache-concept.test.ts b/src/lib/content/kv-cache-concept.test.ts
index c8cf1f8c..11cdf7d6 100644
--- a/src/lib/content/kv-cache-concept.test.ts
+++ b/src/lib/content/kv-cache-concept.test.ts
@@ -60,7 +60,7 @@ describe("KV-cache concept page (kv-cache-concept-page-001)", () => {
).toBe("/docs/concepts/prefill");
expect(
items.find((item) => item.registryId === "concept.decode")?.href,
- ).toBe("/docs/glossary/decode");
+ ).toBe("/docs/concepts/decode");
expect(
items.find((item) => item.registryId === "concept.prefill-decode-split")
?.href,
@@ -111,7 +111,7 @@ describe("KV-cache concept page (kv-cache-concept-page-001)", () => {
expect(html).toContain("repeated work");
expect(html).toContain("live memory");
expect(html).toContain('href="/docs/glossary/prefill"');
- expect(html).toContain('href="/docs/glossary/decode"');
+ expect(html).toContain('href="/docs/concepts/decode"');
expect(html).toContain('href="/docs/modules/attention"');
expect(html).toContain('href="/docs/modules/grouped-query-attention"');
expect(html).toContain('href="/tags/kv-cache"');
diff --git a/src/lib/content/kv-cache-glossary.test.ts b/src/lib/content/kv-cache-glossary.test.ts
index 97b7ebb9..bcf30a4e 100644
--- a/src/lib/content/kv-cache-glossary.test.ts
+++ b/src/lib/content/kv-cache-glossary.test.ts
@@ -150,9 +150,9 @@ describe("Phase 5 KV cache glossary page (US-001)", () => {
);
expectHtmlToContainProse(html, "cost real money to serve");
expect(html).toContain('href="/docs/concepts/prefill"');
- expect(html).toContain('href="/docs/glossary/decode"');
+ expect(html).toContain('href="/docs/concepts/decode"');
expect(html).toContain('href="/docs/concepts/prefill"');
- expect(html).toContain('href="/docs/glossary/decode"');
+ expect(html).toContain('href="/docs/concepts/decode"');
expect(html).toContain('href="/docs/glossary/autoregressive-generation"');
expect(html).toContain('href="/docs/modules/attention"');
expect(html).toContain('href="/docs/modules/multi-query-attention"');
diff --git a/src/lib/content/memory-system-page.test.ts b/src/lib/content/memory-system-page.test.ts
index e8e08f63..2ed8ba05 100644
--- a/src/lib/content/memory-system-page.test.ts
+++ b/src/lib/content/memory-system-page.test.ts
@@ -193,7 +193,7 @@ describe("memory docs route render", () => {
);
expect(html).toContain('href="/docs/concepts/prefill"');
- expect(html).toContain('href="/docs/glossary/decode"');
+ expect(html).toContain('href="/docs/concepts/decode"');
expect(html).toContain('href="/docs/glossary/prefill-decode-split"');
expect(html).toContain('href="/docs/concepts/kv-cache"');
expect(html).toContain('href="/docs/systems/batching"');
diff --git a/src/lib/content/memory-system-registry.test.ts b/src/lib/content/memory-system-registry.test.ts
index ca07d5ec..908f1d3a 100644
--- a/src/lib/content/memory-system-registry.test.ts
+++ b/src/lib/content/memory-system-registry.test.ts
@@ -69,7 +69,7 @@ describe("memory system registry", () => {
).toBe("/docs/concepts/prefill");
expect(
items.find((item) => item.registryId === "concept.decode")?.href,
- ).toBe("/docs/glossary/decode");
+ ).toBe("/docs/concepts/decode");
expect(
items.find((item) => item.registryId === "concept.prefill-decode-split")
?.href,
diff --git a/src/lib/content/phase-4-japanese-attention-variant-proof-set.test.tsx b/src/lib/content/phase-4-japanese-attention-variant-proof-set.test.tsx
index 09ed942c..c6f1f4df 100644
--- a/src/lib/content/phase-4-japanese-attention-variant-proof-set.test.tsx
+++ b/src/lib/content/phase-4-japanese-attention-variant-proof-set.test.tsx
@@ -74,7 +74,7 @@ const JAPANESE_MODULE_EXPECTATIONS: readonly JapaneseModuleExpectation[] = [
],
expectedCanonicalFallbackHrefs: [
"/docs/concepts/kv-cache",
- "/docs/glossary/decode",
+ "/docs/concepts/decode",
"/docs/glossary/prefill-decode-split",
"/docs/concepts/quantization",
],
@@ -106,7 +106,7 @@ const JAPANESE_MODULE_EXPECTATIONS: readonly JapaneseModuleExpectation[] = [
],
expectedCanonicalFallbackHrefs: [
"/docs/concepts/kv-cache",
- "/docs/glossary/decode",
+ "/docs/concepts/decode",
"/docs/glossary/prefill-decode-split",
],
},
diff --git a/src/lib/content/phase-5-serving-path-search-locale.test.ts b/src/lib/content/phase-5-serving-path-search-locale.test.ts
index 7c8d632b..3a14496d 100644
--- a/src/lib/content/phase-5-serving-path-search-locale.test.ts
+++ b/src/lib/content/phase-5-serving-path-search-locale.test.ts
@@ -24,8 +24,8 @@ const PHASE_5_SERVING_DOCS = [
query: "prompt processing",
},
{
- docsSlug: "glossary/decode",
- url: "/docs/glossary/decode",
+ docsSlug: "concepts/decode",
+ url: "/docs/concepts/decode",
query: "token-by-token generation",
},
{
diff --git a/src/lib/content/prefill-concept.test.ts b/src/lib/content/prefill-concept.test.ts
index b8bef6b3..27b1bfdf 100644
--- a/src/lib/content/prefill-concept.test.ts
+++ b/src/lib/content/prefill-concept.test.ts
@@ -82,7 +82,7 @@ describe("prefill concept page (prefill-concept-page-001)", () => {
items.some(
(item) =>
item.registryId === "concept.decode" &&
- item.href === "/docs/glossary/decode",
+ item.href === "/docs/concepts/decode",
),
).toBe(true);
expect(
@@ -150,7 +150,7 @@ describe("prefill concept page (prefill-concept-page-001)", () => {
expect(html).toContain("Why It Matters");
expect(html).toContain("Serving Path");
expect(html).toContain('href="/docs/concepts/kv-cache"');
- expect(html).toContain('href="/docs/glossary/decode"');
+ expect(html).toContain('href="/docs/concepts/decode"');
expect(html).toContain('href="/docs/glossary/prefill-decode-split"');
expect(html).toContain('href="/docs/glossary/autoregressive-generation"');
expect(html).toContain('href="/docs/modules/attention"');
diff --git a/src/lib/content/prefill-decode-split-glossary.test.ts b/src/lib/content/prefill-decode-split-glossary.test.ts
index 5403f9f8..587e1265 100644
--- a/src/lib/content/prefill-decode-split-glossary.test.ts
+++ b/src/lib/content/prefill-decode-split-glossary.test.ts
@@ -109,7 +109,7 @@ describe("Phase 5 prefill/decode split glossary page (US-004)", () => {
items.some(
(item) =>
item.registryId === "concept.decode" &&
- item.href === "/docs/glossary/decode",
+ item.href === "/docs/concepts/decode",
),
).toBe(true);
expect(
@@ -176,7 +176,7 @@ describe("Phase 5 prefill/decode split glossary page (US-004)", () => {
expectHtmlToContainProse(html, "queueing overhead");
expect(html).toContain('href="/docs/concepts/kv-cache"');
expect(html).toContain('href="/docs/concepts/prefill"');
- expect(html).toContain('href="/docs/glossary/decode"');
+ expect(html).toContain('href="/docs/concepts/decode"');
expect(html).toContain('href="/search?q=paged%20attention"');
expect(html).toContain('href="/search?q=chunked%20prefill"');
expect(html).toContain('href="/search?q=speculative%20decoding"');
@@ -238,7 +238,7 @@ describe("Phase 5 prefill/decode split glossary page (US-004)", () => {
expect(kvCache).toContain('href="/docs/concepts/prefill"');
expect(prefill).toContain('href="/docs/concepts/prefill"');
expect(decode).toContain('href="/docs/glossary/prefill-decode-split"');
- expect(split).toContain('href="/docs/glossary/decode"');
+ expect(split).toContain('href="/docs/concepts/decode"');
});
test("transformer, attention, autoregressive generation, MQA, GQA, and sliding-window attention expose entry points into the serving path", () => {
diff --git a/src/lib/content/speculative-decoding-system-page.test.ts b/src/lib/content/speculative-decoding-system-page.test.ts
index 8c2f193b..8616b28a 100644
--- a/src/lib/content/speculative-decoding-system-page.test.ts
+++ b/src/lib/content/speculative-decoding-system-page.test.ts
@@ -180,7 +180,7 @@ describe("speculative decoding docs route render", () => {
),
);
- expect(html).toContain('href="/docs/glossary/decode"');
+ expect(html).toContain('href="/docs/concepts/decode"');
expect(html).toContain('href="/docs/glossary/prefill-decode-split"');
expect(html).toContain('href="/docs/concepts/kv-cache"');
expect(html).toContain('href="/docs/systems/batching"');
diff --git a/src/tests/content/architecture-index.test.ts b/src/tests/content/architecture-index.test.ts
index 95a2466f..95188790 100644
--- a/src/tests/content/architecture-index.test.ts
+++ b/src/tests/content/architecture-index.test.ts
@@ -23,6 +23,7 @@ describe("isArchitectureRelatedPage", () => {
expect(urls).toEqual(
expect.arrayContaining([
"/docs/concepts/context-extension",
+ "/docs/concepts/decode",
"/docs/concepts/page-spec-workflow-sample",
"/docs/concepts/positional-encodings",
"/docs/concepts/prefill",
@@ -68,6 +69,14 @@ describe("loadPublishedArchitectureEntries", () => {
expect(token?.slug).toBe("glossary/token");
});
+ it("prefers the concept route when glossary and concept pages share one registry record", async () => {
+ const entries = await loadPublishedArchitectureEntries("en");
+ const decodeEntries = entries.filter((entry) => entry.title === "Decode");
+
+ expect(decodeEntries).toHaveLength(1);
+ expect(decodeEntries[0]?.url).toBe("/docs/concepts/decode");
+ });
+
it("returns published architecture pages sorted alphabetically by title", async () => {
const entries = await loadPublishedArchitectureEntries("en");
for (let index = 1; index < entries.length; index += 1) {
@@ -122,8 +131,8 @@ describe("architecture index page render", () => {
for (const [title, href] of [
["Architecture", "/docs/glossary/architecture"],
["Foundation Model", "/docs/glossary/foundation-model"],
- ["KV cache", "/docs/glossary/kv-cache"],
- ["Decode", "/docs/glossary/decode"],
+ ["Key-value cache", "/docs/concepts/kv-cache"],
+ ["Decode", "/docs/concepts/decode"],
["Prefill", "/docs/concepts/prefill"],
["Positional encodings", "/docs/concepts/positional-encodings"],
["Token", "/docs/glossary/token"],
@@ -136,6 +145,7 @@ describe("architecture index page render", () => {
expect(html).not.toContain("No architecture entries yet");
expect(html).toContain("list-none");
expect(html).not.toContain("list-disc");
+ expect(html).not.toContain('href="/docs/glossary/decode"');
});
it("renders localized vietnamese architecture entries when shipped page-local messages exist", async () => {
diff --git a/src/tests/content/attention-tag-landing.test.ts b/src/tests/content/attention-tag-landing.test.ts
index c0f73822..c0297774 100644
--- a/src/tests/content/attention-tag-landing.test.ts
+++ b/src/tests/content/attention-tag-landing.test.ts
@@ -76,6 +76,7 @@ describe("attention tag landing resources", () => {
const conceptGroup = groups.find((group) => group.kind === "concept");
expect(conceptGroup?.resources.map((resource) => resource.url)).toEqual([
+ "/docs/concepts/decode",
"/docs/concepts/kv-cache",
"/docs/concepts/prefill",
]);