Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions src/app/docs/docs-slug-renderer.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,12 @@ async function renderLocalDocsPage(
/>
<DocsTitle>{loadedPage.messages.title}</DocsTitle>
<DocsDescription>{description}</DocsDescription>
{localRef.section === "concepts" ? (
<FoldedOpeningSummary
label={uiMessages.shell.openingSummary}
summary={loadedPage.messages.openingSummary}
/>
) : null}
<DocsBody>
{localRef.section !== "systems" && localRef.section !== "glossary" ? (
<DocsOpeningSummary
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{}
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
{
"title": "Autoregressive generation",
"description": "How a model turns the current context into one next token, appends that token, and repeats the same loop until the output is complete.",
"openingSummary": "Autoregressive generation is the step-by-step next-token loop behind most text generation: the model reads the current context, scores candidate next tokens, chooses one, adds it to the context, and then runs the same loop again one step later.",
"sections": {
"whatItIs": {
"title": "What It Is",
"body": "Autoregressive generation means producing an output one discrete step at a time while conditioning each new step on everything that came before it. In language models, that step is usually the next token. The model reads the current token context, produces logits for the vocabulary, turns those scores into a choice distribution with softmax, picks one token, appends it, and repeats. The key idea is not any one architecture block; it is the feedback loop where each new output becomes part of the next input."
},
"whyItMatters": {
"title": "Why It Matters",
"body": "This loop explains why text generation feels sequential even when transformers do heavy parallel math inside each forward pass. It also connects several pages readers often meet separately: token explains the unit being chosen, logit and softmax explain how raw scores become probabilities, decoder and encoder-decoder explain where the generation head lives, and sampling overview explains how the final token is picked when several options look plausible."
},
"oneTokenLoop": {
"title": "One Token At A Time",
"body": "Picture the prompt \"The capital of France is\". The model processes that context and emits logits for many candidate next tokens such as \"Paris\", punctuation, or less likely mistakes. A decoding rule then selects or samples one token. That chosen token is appended to the running context, so the next pass no longer answers the original prompt alone; it answers the prompt plus the new token. During prefill, the model can process many prompt positions in parallel because those tokens are already known. During generation, the next token is unknown until the previous step finishes, so the loop still advances one token at a time."
},
"servingBridge": {
"title": "From Architecture To Serving",
"body": "Autoregressive generation is where architecture choices meet serving reality. A decoder-only model uses causal attention so each step can look backward but not forward. An encoder-decoder model still generates autoregressively on the decoder side while cross-attending to fixed encoder outputs. Key-value (KV) cache reuse matters because each decode step should not recompute every earlier attention state from scratch. That is why serving stacks split prompt-heavy prefill from steady decode work, and why pages such as kv-cache and prefill-decode split talk about memory bandwidth, cache growth, and latency instead of only model quality."
},
"commonConfusions": {
"title": "Common Confusions",
"body": "Autoregressive generation is not the same as diffusion-style generation. Diffusion models also iterate, but they usually refine a noisy latent or image over many denoising steps rather than choosing one next token from a vocabulary at each step. It is also different from encoder-only understanding flows. Encoder-only systems such as many embedding or classification models read the whole input to build representations, but they are not primarily designed to append one generated token after another. Finally, autoregressive generation is broader than decoder-only chat models: translation and speech systems with encoder-decoder layouts can use the same next-token loop."
},
"related": {
"title": "Related Concepts And Modules"
},
"tags": {
"title": "Tags"
},
"references": {
"title": "References"
}
}
}
65 changes: 65 additions & 0 deletions src/content/docs/concepts/autoregressive-generation/page.mdx
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
---
title: Autoregressive generation
description: How a model turns the current context into one next token, appends that token, and repeats the same loop until the output is complete.
kind: "concept"
registryId: "concept.autoregressive-generation"
messageNamespace: "local"
assetNamespace: "local"
status: "published"
tags:
- "foundations"
- "taxonomy"
aliases:
- "autoregressive decoding"
- "next-token generation"
- "next token loop"
updatedAt: "2026-06-22"
---

import { CitationList } from "@/features/docs/components/CitationList";
import { DerivedRelatedDocs } from "@/features/docs/components/DerivedRelatedDocs";
import { RelatedDocs } from "@/features/docs/components/RelatedDocs";
import { Section } from "@/features/docs/components/Section";
import { T } from "@/features/docs/components/T";
import { TagPillList } from "@/features/docs/components/TagPillList";

<Section id="what-it-is" titleKey="sections.whatItIs.title">
<T k="sections.whatItIs.body" />
</Section>

<Section id="why-it-matters" titleKey="sections.whyItMatters.title">
<T k="sections.whyItMatters.body" />
</Section>

<Section id="one-token-loop" titleKey="sections.oneTokenLoop.title">
<T k="sections.oneTokenLoop.body" />
</Section>

<Section id="serving-bridge" titleKey="sections.servingBridge.title">
<T k="sections.servingBridge.body" />
</Section>

<Section id="common-confusions" titleKey="sections.commonConfusions.title">
<T k="sections.commonConfusions.body" />
</Section>

<Section id="related" titleKey="sections.related.title">
<DerivedRelatedDocs
registryId="concept.autoregressive-generation"
groups={[
"same-concept-type",
"shared-tags",
"curated-related"
]}
/>

<RelatedDocs registryId="concept.autoregressive-generation" />
</Section>

<Section id="tags" titleKey="sections.tags.title">
<TagPillList registryId="concept.autoregressive-generation" showDescriptions />
</Section>

<Section id="references" titleKey="sections.references.title">
<CitationList registryId="concept.autoregressive-generation" />
</Section>
7 changes: 6 additions & 1 deletion src/content/docs/glossary/decoder/messages/en.json
Original file line number Diff line number Diff line change
Expand Up @@ -32,5 +32,10 @@
"title": "References"
}
},
"openingSummary": "A decoder reads representations from an encoder or earlier blocks and maps them to an output space, often autoregressively or iteratively until a full sample is formed."
"openingSummary": "A decoder reads representations from an encoder or earlier blocks and maps them to an output space, often autoregressively or iteratively until a full sample is formed.",
"relatedDocs": {
"concept.autoregressive-generation": {
"reason": "Autoregressive generation explains the token-by-token loop decoder-based language models repeat after each next-token choice."
}
}
}
26 changes: 22 additions & 4 deletions src/content/registry/concepts/autoregressive-generation.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,16 @@
"Autoregressive generation",
"autoregressive decoding",
"next-token generation",
"next token generation",
"next-token prediction",
"AR generation"
],
"tags": ["foundations", "taxonomy", "attention"],
"tags": [
"foundations",
"taxonomy",
"attention",
"token-to-probability-chain"
],
"relatedIds": [
"concept.token",
"concept.logit",
Expand All @@ -24,20 +31,26 @@
"concept.decode",
"concept.prefill-decode-split",
"concept.encoder-decoder",
"concept.transformer",
"concept.conditioning",
"concept.denoising-generation",
"concept.diffusion-model",
"concept.quantization"
],
"citationIds": [
"citation.attention-is-all-you-need",
"citation.gpt-2-report",
"citation.raffel-t5",
"citation.brown-gpt-3",
"citation.curious-case-neural-text-degeneration"
],
"status": "published",
"createdAt": "2026-06-04T12:30:00.000Z",
"updatedAt": "2026-06-18T00:00:00.000Z",
"updatedAt": "2026-06-22T00:00:00.000Z",
"conceptType": "general",
"sidebarGrouping": {
"glossary": "generation-and-diffusion"
"glossary": "generation-and-diffusion",
"concepts": "inference"
},
"prerequisiteIds": [
"concept.token",
Expand All @@ -47,5 +60,10 @@
"concept.decoder",
"concept.encoder-decoder"
],
"explainsIds": ["concept.conditioning"]
"explainsIds": [
"concept.sampling-overview",
"concept.prefill",
"concept.decode",
"concept.prefill-decode-split"
]
}
159 changes: 159 additions & 0 deletions src/lib/content/autoregressive-generation-concept.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
import { describe, expect, test } from "bun:test";
import { createElement } from "react";
import { renderToStaticMarkup } from "react-dom/server";
import { ModulePageProviders } from "@/features/docs/components/ModulePageProviders";
import { loadConceptPage } from "@/lib/content/concept-page";
import { renderConceptDocsShell } from "@/lib/content/concept-shell-render";
import { stripHtmlTags } from "@/lib/content/glossary-test-helpers";
import { loadPublishedDocsPages } from "@/lib/content/pages";
import {
PUBLISHED_CONCEPT_SECTION_REGISTRY_IDS,
PUBLISHED_DOCS_REGISTRY_IDS,
} from "@/lib/content/published-docs-registry-ids";
import { loadRegistry } from "@/lib/content/registry";
import {
getConceptById,
listRelatedRegistryRecords,
} from "@/lib/content/registry-runtime";
import { deriveCuratedRelatedItems } from "@/lib/content/related-docs";
import { buildSearchDocuments } from "@/lib/search/build-documents";
import { docsSearchApi } from "@/lib/search/search-server";

describe("Autoregressive generation concept page", () => {
test("canonical concept route is published for the existing registry record", () => {
const record = getConceptById("concept.autoregressive-generation");

expect(record?.status).toBe("published");
expect(
PUBLISHED_CONCEPT_SECTION_REGISTRY_IDS.has(
"concept.autoregressive-generation",
),
).toBe(true);
expect(record?.relatedIds).toEqual(
expect.arrayContaining([
"concept.decoder",
"concept.encoder-decoder",
"concept.kv-cache",
"concept.prefill",
"concept.prefill-decode-split",
"concept.sampling-overview",
]),
);
});

test("curated related items route the concept toward serving and token-chain neighbors", () => {
const source = getConceptById("concept.autoregressive-generation");
if (!source) {
throw new Error("expected concept.autoregressive-generation in registry");
}

const items = deriveCuratedRelatedItems(
source,
listRelatedRegistryRecords(),
PUBLISHED_DOCS_REGISTRY_IDS,
);

expect(
items.find((item) => item.registryId === "concept.decoder")?.href,
).toBe("/docs/glossary/decoder");
expect(
items.find((item) => item.registryId === "concept.encoder-decoder")?.href,
).toBe("/docs/glossary/encoder-decoder");
expect(
items.find((item) => item.registryId === "concept.kv-cache")?.href,
).toBe("/docs/concepts/kv-cache");
expect(
items.find((item) => item.registryId === "concept.prefill")?.href,
).toBe("/docs/concepts/prefill");
expect(
items.find((item) => item.registryId === "concept.prefill-decode-split")
?.href,
).toBe("/docs/glossary/prefill-decode-split");
});

test("page renders the token loop, serving bridge, and related links", async () => {
const page = await loadConceptPage("autoregressive-generation");

expect(page.frontmatter.kind).toBe("concept");
expect(page.frontmatter.status).toBe("published");
expect(page.frontmatter.registryId).toBe(
"concept.autoregressive-generation",
);
expect(page.messages.openingSummary?.toLowerCase()).toContain(
"next-token loop",
);

const html = renderToStaticMarkup(
createElement(ModulePageProviders, {
messages: page.messages,
assets: page.assets,
// biome-ignore lint/correctness/noChildrenProp: third createElement arg conflicts with strict props typing
children: page.content,
}),
);

expect(html).toContain("What It Is");
expect(html).toContain("One Token At A Time");
expect(html).toContain("From Architecture To Serving");
expect(html).toContain("diffusion-style generation");
expect(html).toContain('href="/docs/glossary/decoder"');
expect(html).toContain('href="/docs/glossary/encoder-decoder"');
expect(html).toContain('href="/docs/concepts/kv-cache"');
expect(html).toContain('href="/docs/concepts/prefill"');
expect(html).toContain('href="/docs/glossary/prefill-decode-split"');
expect(html).toContain('href="/docs/glossary/sampling-overview"');
expect(html).toContain('data-testid="curated-related-docs"');
expect(html).not.toContain("Reader Shortcut");
expect(html).not.toContain("Phase");
});

test("shell render includes the folded summary and all expected references", async () => {
const page = await loadConceptPage("autoregressive-generation");

const html = renderConceptDocsShell(page);
const plainHtml = stripHtmlTags(html);

expect(html).toContain('data-testid="folded-opening-summary"');
expect(plainHtml).toContain("Summary");
expect(plainHtml).toContain(
"Autoregressive generation is the step-by-step next-token loop",
);
expect(html).toContain('data-testid="citation-list"');
expect(plainHtml).toContain("Attention Is All You Need");
expect(plainHtml).toContain("Language Models are Unsupervised");
expect(plainHtml).toContain(
"Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer",
);
});

test("discovery publishes both the concept explainer and glossary bridge for autoregressive generation", async () => {
const registry = await loadRegistry();
const pages = await loadPublishedDocsPages("en");
const documents = buildSearchDocuments(pages, registry);

const conceptDocument = documents.find(
(entry) => entry.url === "/docs/concepts/autoregressive-generation",
);
const glossaryDocument = documents.find(
(entry) => entry.url === "/docs/glossary/autoregressive-generation",
);

expect(conceptDocument?.registryId).toBe(
"concept.autoregressive-generation",
);
expect(conceptDocument?.kind).toBe("concept");
expect(conceptDocument?.facets.kind).toBe("concept");
expect(glossaryDocument?.registryId).toBe(
"concept.autoregressive-generation",
);
expect(glossaryDocument?.kind).toBe("glossary");
expect(glossaryDocument?.facets.kind).toBe("glossary");

const results = await docsSearchApi.search("next-token loop");
expect(
results.some(
(result) => result.url === "/docs/concepts/autoregressive-generation",
),
).toBe(true);
});
});
Loading
Loading