Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/content/docs/concepts/sampling-overview/assets.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{}
45 changes: 45 additions & 0 deletions src/content/docs/concepts/sampling-overview/messages/en.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
{
"title": "Sampling Overview",
"description": "The post-probability next-token decision step that turns one distribution into one chosen token, shaping how predictable or varied generated text feels.",
"openingSummary": "After a model produces logits and those scores become probabilities, one more step still remains: choosing the next token. Sampling is that choice step, and its settings trade repeatable predictability against diversity without changing what the model already knows.",
"sections": {
"whatItIs": {
"title": "What It Is",
"body": "Sampling is the final next-token decision step in autoregressive generation. The model has already scored every token with logits, and those scores have been turned into probabilities. Sampling decides what to do next with that distribution: always take the highest-probability token, or keep several plausible options and choose among them under a rule."
},
"whyItMatters": {
"title": "Why It Matters",
"body": "This step strongly shapes how the output feels to a reader. Stricter rules are usually more repeatable, which helps when you want the same answer each time. Looser rules usually allow more diversity, which helps when you want variation or brainstorming. These settings change the selection process, not the model's underlying knowledge; they control how the model uses its probabilities, not what the model has learned."
},
"simpleExample": {
"title": "Simple Example",
"body": "Imagine the next-token probabilities heavily favor \"Paris,\" give some weight to \"Lyon,\" and leave a smaller chance for \"Marseille.\" Greedy decoding always picks \"Paris.\" Top-k sampling keeps only the top few candidates before drawing one. Top-p sampling keeps the smallest candidate set whose total probability passes a threshold, then draws from that set. All three strategies start from the same probabilities, but they make different tradeoffs between stability and variety."
},
"readerPath": {
"title": "Where To Go Next",
"body": "Autoregressive generation and decode explain where this choice happens in the full loop. Temperature explains how probabilities can be reshaped before selection. Greedy decoding, top-k sampling, and top-p sampling each apply a different token-choice rule, while GPT-2 shows the same step inside a familiar decoder-only system."
},
"commonConfusions": {
"title": "Common Confusions",
"body": "Sampling is not the same as temperature. Temperature changes the shape of the probability distribution before a token is chosen, while sampling rules decide how to choose after probabilities exist. Sampling also does not mean pure randomness: greedy decoding is a sampling rule too, but it is deterministic. Temperature, greedy decoding, top-k sampling, and top-p sampling each isolate one control or method inside the larger next-token choice step that sampling describes."
},
"related": {
"title": "Related Concepts And Modules"
},
"tags": {
"title": "Tags"
},
"references": {
"title": "References"
}
},
"links": {
"autoregressiveGeneration": "Autoregressive generation",
"decode": "Decode",
"temperature": "Temperature",
"greedyDecoding": "Greedy decoding",
"topKSampling": "Top-k sampling",
"topPSampling": "Top-p sampling",
"gpt2Report": "GPT-2 report"
}
}
90 changes: 90 additions & 0 deletions src/content/docs/concepts/sampling-overview/page.mdx
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
---
title: "Sampling Overview"
description: "The post-probability next-token decision step that turns one distribution into one chosen token, shaping how predictable or varied generated text feels."
kind: "concept"
registryId: "concept.sampling-overview"
messageNamespace: "local"
assetNamespace: "local"
status: "published"
tags:
- foundations
- token-to-probability-chain
aliases:
- "token sampling"
- "next-token sampling"
- "sampling basics"
- "decoding strategy"
updatedAt: "2026-06-22"
---

import { CitationList } from "@/features/docs/components/CitationList";
import { DerivedRelatedDocs } from "@/features/docs/components/DerivedRelatedDocs";
import { LocalizedLinkList } from "@/features/docs/components/LocalizedLinkList";
import { RelatedDocs } from "@/features/docs/components/RelatedDocs";
import { Section } from "@/features/docs/components/Section";
import { T } from "@/features/docs/components/T";
import { TagPillList } from "@/features/docs/components/TagPillList";

<Section id="what-it-is" titleKey="sections.whatItIs.title">
<T k="sections.whatItIs.body" />
</Section>

<Section id="why-it-matters" titleKey="sections.whyItMatters.title">
<T k="sections.whyItMatters.body" />
</Section>

<Section id="simple-example" titleKey="sections.simpleExample.title">
<T k="sections.simpleExample.body" />
</Section>

<Section id="reader-path" titleKey="sections.readerPath.title">
<T k="sections.readerPath.body" />
<LocalizedLinkList
items={[
{
href: "/docs/glossary/autoregressive-generation",
labelKey: "links.autoregressiveGeneration",
},
{ href: "/docs/glossary/decode", labelKey: "links.decode" },
{ href: "/docs/glossary/temperature", labelKey: "links.temperature" },
{
href: "/docs/glossary/greedy-decoding",
labelKey: "links.greedyDecoding",
},
{
href: "/docs/glossary/top-k-sampling",
labelKey: "links.topKSampling",
},
{
href: "/docs/glossary/top-p-sampling",
labelKey: "links.topPSampling",
},
{ href: "/docs/papers/gpt-2-report", labelKey: "links.gpt2Report" },
]}
/>
</Section>

<Section id="common-confusions" titleKey="sections.commonConfusions.title">
<T k="sections.commonConfusions.body" />
</Section>

<Section id="related" titleKey="sections.related.title">
<DerivedRelatedDocs
registryId="concept.sampling-overview"
groups={[
"same-concept-type",
"shared-tags",
"curated-related"
]}
/>

<RelatedDocs registryId="concept.sampling-overview" />
</Section>

<Section id="tags" titleKey="sections.tags.title">
<TagPillList registryId="concept.sampling-overview" showDescriptions />
</Section>

<Section id="references" titleKey="sections.references.title">
<CitationList registryId="concept.sampling-overview" />
</Section>
1 change: 1 addition & 0 deletions src/content/registry/concepts/decode.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
"concept.prefill-decode-split",
"system.batching",
"concept.autoregressive-generation",
"concept.sampling-overview",
"module.attention",
"module.multi-query-attention",
"module.grouped-query-attention",
Expand Down
7 changes: 5 additions & 2 deletions src/content/registry/concepts/sampling-overview.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,19 @@
"sampling overview",
"token sampling",
"next-token sampling",
"sampling basics"
"sampling basics",
"decoding strategy"
],
"tags": ["foundations", "token-to-probability-chain"],
"relatedIds": [
"concept.temperature",
"concept.softmax",
"concept.autoregressive-generation",
"concept.decode",
"concept.greedy-decoding",
"concept.top-k-sampling",
"concept.top-p-sampling"
"concept.top-p-sampling",
"paper.gpt-2-report"
],
"citationIds": ["citation.curious-case-neural-text-degeneration"],
"status": "published",
Expand Down
3 changes: 2 additions & 1 deletion src/content/registry/papers/gpt-2-report.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
"relatedIds": [
"module.byte-level-tokenization",
"concept.transformer-architecture",
"concept.scaling-law"
"concept.scaling-law",
"concept.sampling-overview"
],
"citationIds": ["citation.gpt-2-report"],
"status": "published",
Expand Down
6 changes: 6 additions & 0 deletions src/lib/content/content-paths.ts
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,12 @@ export const HIDDEN_SIZE_GLOSSARY_PAGE_DIR = join(
/** Prefill concept page directory. */
export const PREFILL_CONCEPT_PAGE_DIR = join(CONCEPTS_DOCS_ROOT, "prefill");

/** Sampling overview concept page directory. */
export const SAMPLING_OVERVIEW_CONCEPT_PAGE_DIR = join(
CONCEPTS_DOCS_ROOT,
"sampling-overview",
);

/** Vocabulary size glossary page directory. */
export const VOCABULARY_SIZE_GLOSSARY_PAGE_DIR = join(
GLOSSARY_DOCS_ROOT,
Expand Down
9 changes: 9 additions & 0 deletions src/lib/content/decode-glossary.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ describe("Phase 5 decode glossary page (US-003)", () => {
"concept.prefill-decode-split",
"system.batching",
"concept.autoregressive-generation",
"concept.sampling-overview",
"module.attention",
"module.multi-query-attention",
"module.grouped-query-attention",
Expand Down Expand Up @@ -82,6 +83,13 @@ describe("Phase 5 decode glossary page (US-003)", () => {
item.href === "/docs/concepts/kv-cache",
),
).toBe(true);
expect(
items.some(
(item) =>
item.registryId === "concept.sampling-overview" &&
item.href === "/docs/concepts/sampling-overview",
),
).toBe(true);
expect(
items.some(
(item) =>
Expand Down Expand Up @@ -153,6 +161,7 @@ describe("Phase 5 decode glossary page (US-003)", () => {
expectHtmlToContainProse(html, "memory bandwidth");
expect(html).toContain('href="/docs/concepts/kv-cache"');
expect(html).toContain('href="/docs/concepts/prefill"');
expect(html).toContain('href="/docs/concepts/sampling-overview"');
expect(html).toContain('href="/docs/glossary/prefill-decode-split"');
expect(html).toContain('href="/docs/glossary/autoregressive-generation"');
expect(html).toContain('href="/docs/modules/attention"');
Expand Down
1 change: 1 addition & 0 deletions src/lib/content/gpt-2-report-paper-page.test.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ describe("GPT-2 report paper page", () => {
expect(html).toContain("Byte-level BPE tokenization");
expect(html).toContain("Broad next-token pretraining");
expect(html).toContain('href="/docs/concepts/transformer-architecture"');
expect(html).toContain('href="/docs/concepts/sampling-overview"');
expect(html).toContain('href="/docs/modules/byte-level-tokenization"');
expect(html).toContain('href="/docs/glossary/scaling-law"');
expect(html).toContain('data-testid="curated-related-docs"');
Expand Down
2 changes: 2 additions & 0 deletions src/lib/content/gpt-2-report-paper-record.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ const publishedRegistryIds = new Set([
"module.byte-level-tokenization",
"concept.transformer-architecture",
"concept.scaling-law",
"concept.sampling-overview",
]);

function requirePaperRecord() {
Expand Down Expand Up @@ -82,6 +83,7 @@ describe("gpt-2 report paper registry record", () => {
"module.byte-level-tokenization",
"concept.transformer-architecture",
"concept.scaling-law",
"concept.sampling-overview",
]);
});
});
4 changes: 2 additions & 2 deletions src/lib/content/greedy-decoding-glossary.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ describe("Phase 5 greedy decoding glossary page (phase-5-sampling-basics-decisio
items.some(
(item) =>
item.registryId === "concept.sampling-overview" &&
item.href === "/docs/glossary/sampling-overview" &&
item.href === "/docs/concepts/sampling-overview" &&
item.isPlanned === false,
),
).toBe(true);
Expand Down Expand Up @@ -176,7 +176,7 @@ describe("Phase 5 greedy decoding glossary page (phase-5-sampling-basics-decisio
html,
"The same prompt and model state produce the same next token each time.",
);
expect(html).toContain('href="/docs/glossary/sampling-overview"');
expect(html).toContain('href="/docs/concepts/sampling-overview"');
expect(html).toContain('href="/docs/glossary/temperature"');
expect(html).toContain('href="/docs/glossary/autoregressive-generation"');
expect(html).toContain('href="/docs/glossary/top-k-sampling"');
Expand Down
65 changes: 65 additions & 0 deletions src/lib/content/sampling-overview-concept-validation.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import { describe, expect, test } from "bun:test";
import { loadConceptPage } from "@/lib/content/concept-page";
import { loadPublishedDocsPages } from "@/lib/content/pages";
import {
getPublishedDocsEntriesBySlug,
getPublishedDocsEntryByRegistryId,
} from "@/lib/content/published-docs-registry-ids";
import { loadSearchResultMetaMap } from "@/lib/search/search-result-meta";
import { docsSearchApi } from "@/lib/search/search-server";
import { source } from "@/lib/source";

describe("sampling overview canonical validation (sampling-overview-concept-page-004)", () => {
test("published docs registry keeps one shared record while preferring the canonical concept route", () => {
const entries = getPublishedDocsEntriesBySlug("sampling-overview");

expect(entries).toHaveLength(2);
expect(entries.map((entry) => entry.docsSlug).sort()).toEqual([
"concepts/sampling-overview",
"glossary/sampling-overview",
]);

const canonicalEntry = getPublishedDocsEntryByRegistryId(
"concept.sampling-overview",
);
expect(canonicalEntry?.docsSlug).toBe("concepts/sampling-overview");
expect(canonicalEntry?.url).toBe("/docs/concepts/sampling-overview");
expect(canonicalEntry?.pageKind).toBe("concept");
});

test("default English page loading, sidebar discovery, and search expose the canonical route", async () => {
const page = await loadConceptPage("sampling-overview");
const pages = await loadPublishedDocsPages("en");
const searchMeta = await loadSearchResultMetaMap("en");
const searchResults = await docsSearchApi.search("sampling basics");
const sidebarEntry = source.getPage(["concepts", "sampling-overview"]);

expect(page.frontmatter.registryId).toBe("concept.sampling-overview");
expect(page.messages.title).toBe("Sampling Overview");

expect(
pages.some(
(entry) =>
entry.docsSlug === "concepts/sampling-overview" &&
entry.url === "/docs/concepts/sampling-overview" &&
entry.frontmatter.registryId === "concept.sampling-overview",
),
).toBe(true);

expect(sidebarEntry?.url).toBe("/docs/concepts/sampling-overview");

const meta = searchMeta.get("/docs/concepts/sampling-overview");
expect(meta?.title).toBe("Sampling Overview");
expect(meta?.kind).toBe("concept");
expect(meta?.aliases).toEqual(
expect.arrayContaining([
"sampling overview",
"token sampling",
"next-token sampling",
"sampling basics",
]),
);

expect(searchResults[0]?.url).toBe("/docs/concepts/sampling-overview");
});
});
Loading
Loading