diff --git a/src/app/docs/docs-slug-renderer.test.tsx b/src/app/docs/docs-slug-renderer.test.tsx
index 448a2f14..28c31774 100644
--- a/src/app/docs/docs-slug-renderer.test.tsx
+++ b/src/app/docs/docs-slug-renderer.test.tsx
@@ -2,6 +2,7 @@ import { describe, expect, test } from "bun:test";
import {
buildDocsPageMetadata,
renderDocsSlugPage,
+ resolveLocalDocsShellDescription,
} from "@/app/docs/docs-slug-renderer";
describe("docs slug renderer locale gating", () => {
@@ -243,4 +244,21 @@ describe("docs slug renderer locale gating", () => {
);
}
});
+
+ test("local non-glossary docs shell prefers openingSummary when present", () => {
+ const shellDescription = resolveLocalDocsShellDescription({
+ description:
+ "A post-training reinforcement-learning method that compares several sampled answers for the same prompt and updates the model from their relative quality.",
+ openingSummary:
+ "Group Relative Policy Optimization, usually shortened to GRPO, is a reinforcement-learning post-training method where the model samples several answers to one prompt, scores them as a group, and learns from which answers look better inside that local set.",
+ section: "training",
+ });
+
+ expect(shellDescription).toContain(
+ "Group Relative Policy Optimization, usually shortened to GRPO",
+ );
+ expect(shellDescription).not.toContain(
+ "A post-training reinforcement-learning method that compares several sampled answers for the same prompt and updates the model from their relative quality.",
+ );
+ });
});
diff --git a/src/app/docs/docs-slug-renderer.tsx b/src/app/docs/docs-slug-renderer.tsx
index 437a0027..9b9678bd 100644
--- a/src/app/docs/docs-slug-renderer.tsx
+++ b/src/app/docs/docs-slug-renderer.tsx
@@ -20,6 +20,19 @@ import { localizedRouteAlternates } from "@/lib/i18n/route-locale";
import { source } from "@/lib/source";
import { getMDXComponents } from "../../../mdx-components";
+export function resolveLocalDocsShellDescription(options: {
+ description: string;
+ openingSummary?: string;
+ section: string;
+}) {
+ const { description, openingSummary, section } = options;
+ if (section === "glossary") {
+ return description;
+ }
+
+ return openingSummary ?? description;
+}
+
function buildDocsPageAlternates(docsSlug: string) {
const alternates = localizedRouteAlternates({
surface: "docs-page",
@@ -53,11 +66,16 @@ async function renderLocalDocsPage(
const loadedPage = await loadLocalDocsPage(localRef, locale);
const uiMessages = await loadUiMessages(locale);
+ const shellDescriptionText = resolveLocalDocsShellDescription({
+ description: loadedPage.messages.description,
+ openingSummary: loadedPage.messages.openingSummary,
+ section: localRef.section,
+ });
const description =
localRef.section === "glossary" ? (
) : (
- loadedPage.messages.description
+ shellDescriptionText
);
return (
diff --git a/src/content/docs/training/grpo/assets.json b/src/content/docs/training/grpo/assets.json
new file mode 100644
index 00000000..04ee7b59
--- /dev/null
+++ b/src/content/docs/training/grpo/assets.json
@@ -0,0 +1,10 @@
+{
+ "trainingFlow": {
+ "type": "graph",
+ "graphId": "graph.grpo-training-flow",
+ "webRenderer": "react-flow",
+ "printRenderer": "vertical-svg",
+ "altKey": "assets.trainingFlow.alt",
+ "captionKey": "assets.trainingFlow.caption"
+ }
+}
diff --git a/src/content/docs/training/grpo/messages/en.json b/src/content/docs/training/grpo/messages/en.json
new file mode 100644
index 00000000..7998b9c8
--- /dev/null
+++ b/src/content/docs/training/grpo/messages/en.json
@@ -0,0 +1,94 @@
+{
+ "title": "Group Relative Policy Optimization",
+ "description": "A post-training reinforcement-learning method that compares several sampled answers for the same prompt and updates the model from their relative quality.",
+ "openingSummary": "Group Relative Policy Optimization, usually shortened to GRPO, is a reinforcement-learning post-training method where the model samples several answers to one prompt, scores them as a group, and learns from which answers look better inside that local set.",
+ "sections": {
+ "whatItIs": {
+ "title": "What It Is",
+ "body": "Group Relative Policy Optimization is a policy-update method used after pretraining or supervised fine-tuning. Instead of judging one answer in isolation, it samples a small group of candidate answers for the same prompt and uses their relative ranking to decide which behaviors should be reinforced."
+ },
+ "whyItExists": {
+ "title": "Why It Exists",
+ "body": "A single reward score can be noisy, and PPO-style language-model training often adds the extra cost of a learned critic to estimate a baseline. GRPO tries to keep the signal useful while simplifying the loop: the group itself supplies the local baseline, so the model can learn from which samples look better than their neighbors."
+ },
+ "howItWorks": {
+ "title": "How It Works",
+ "body": "For one prompt, the policy generates several completions, a reward function scores them, and those scores are normalized within that group. Answers above the group average get a positive learning signal, answers below it get a negative one, and the policy is updated so future samples are more likely to resemble the stronger members of the group."
+ },
+ "comparedToNearbyRegimes": {
+ "title": "Compared To Nearby Regimes",
+ "body": "Group Relative Policy Optimization still sits inside the broader alignment family, but it is narrower than Reinforcement Learning from Human Feedback as a full pipeline. Reinforcement Learning from Human Feedback often means collecting preference data, training a reward model, and then running a reinforcement-learning update such as Proximal Policy Optimization. GRPO keeps the reinforcement-learning loop, but it replaces the learned critic-style baseline with relative ranking inside one sampled group. Direct Preference Optimization moves in a different direction: it stays closer to supervised-style optimization on chosen-versus-rejected pairs, while GRPO uses rewards over several sampled answers and updates the policy from that within-group ordering instead of from one pairwise objective alone."
+ },
+ "limitationsAndFailureModes": {
+ "title": "Limitations And Failure Modes",
+ "body": "The method still depends on reward quality. If the reward function prefers shallow tricks, the whole group can drift in the wrong direction together. Relative scoring also means a weak group can still produce a misleading winner if every sampled answer is bad."
+ },
+ "related": {
+ "title": "Related To"
+ },
+ "tags": {
+ "title": "Tags"
+ },
+ "references": {
+ "title": "References"
+ }
+ },
+ "callouts": {
+ "trainingFlowGraph": {
+ "title": "GRPO training flow",
+ "body": "A visual walkthrough of one prompt, grouped sampling, relative scoring, and the policy update."
+ },
+ "trainingFlowLegend": {
+ "title": "Graph legend",
+ "body": "How to read each stage of the GRPO training flow."
+ }
+ },
+ "links": {
+ "trainingFlowLegendPrompt": "One prompt anchors the whole local comparison.",
+ "trainingFlowLegendSampling": "The policy samples several candidate answers for that same prompt.",
+ "trainingFlowLegendRelativeScoring": "Those answers are scored relative to each other inside the sampled group.",
+ "trainingFlowLegendPolicyUpdate": "The policy update reinforces answers that beat the group baseline and discourages weaker ones."
+ },
+ "assets": {
+ "trainingFlow": {
+ "alt": "A training flow from one prompt to a group of sampled answers, then to relative scoring inside the group, and finally to a policy update.",
+ "caption": "GRPO learns from which answers win inside each sampled group instead of relying on one separate critic-estimated baseline."
+ }
+ },
+ "math": {
+ "grpoAdvantage": {
+ "label": "Grouped relative advantage sketch",
+ "formula": "A_i = \\frac{r_i - \\operatorname{mean}(r_{1:G})}{\\operatorname{std}(r_{1:G})}",
+ "variableDefinitions": {
+ "advantage": {
+ "term": "A_i",
+ "definition": "normalized advantage for sampled answer i"
+ },
+ "reward": {
+ "term": "r_i",
+ "definition": "reward score assigned to sampled answer i"
+ },
+ "groupSize": {
+ "term": "G",
+ "definition": "number of sampled answers in the comparison group"
+ }
+ }
+ }
+ },
+ "graph": {
+ "nodes": {
+ "prompt": {
+ "label": "One prompt"
+ },
+ "sampleGroup": {
+ "label": "Sample a group of answers"
+ },
+ "relativeScore": {
+ "label": "Score answers relative to the group"
+ },
+ "policyUpdate": {
+ "label": "Update the policy"
+ }
+ }
+ }
+}
diff --git a/src/content/docs/training/grpo/page.mdx b/src/content/docs/training/grpo/page.mdx
new file mode 100644
index 00000000..e75d83ff
--- /dev/null
+++ b/src/content/docs/training/grpo/page.mdx
@@ -0,0 +1,124 @@
+---
+title: "Group Relative Policy Optimization"
+description: "A post-training reinforcement-learning method that compares several sampled answers for the same prompt and updates the model from their relative quality."
+kind: "training-regime"
+registryId: "training-regime.grpo"
+messageNamespace: "local"
+assetNamespace: "local"
+status: "published"
+tags:
+ - foundations
+aliases:
+ - "GRPO"
+ - "Group Relative Preference Optimization"
+updatedAt: "2026-06-19"
+---
+
+import { CitationList } from "@/features/docs/components/CitationList";
+import { BlockMath } from "@/features/docs/components/Math";
+import { RelatedDocs } from "@/features/docs/components/RelatedDocs";
+import { Section } from "@/features/docs/components/Section";
+import { T } from "@/features/docs/components/T";
+import { TagPillList } from "@/features/docs/components/TagPillList";
+import { TrainingRegimeAtAGlance } from "@/features/models/components/TrainingRegimeAtAGlance";
+import { TrainingRegimeFlow } from "@/features/models/components/TrainingRegimeFlow";
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ -
+
+
+ -
+
+
+ -
+
+
+ -
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/content/registry/citations/deepseek-r1-paper.json b/src/content/registry/citations/deepseek-r1-paper.json
new file mode 100644
index 00000000..d8c810d7
--- /dev/null
+++ b/src/content/registry/citations/deepseek-r1-paper.json
@@ -0,0 +1,20 @@
+{
+ "id": "citation.deepseek-r1-paper",
+ "slug": "deepseek-r1-paper",
+ "kind": "citation",
+ "defaultTitleKey": "title",
+ "defaultSummaryKey": "summary",
+ "aliases": ["DeepSeek-R1 paper"],
+ "tags": ["foundations"],
+ "relatedIds": [],
+ "citationIds": [],
+ "status": "published",
+ "createdAt": "2026-06-19T00:00:00.000Z",
+ "updatedAt": "2026-06-19T00:00:00.000Z",
+ "citationType": "paper",
+ "authors": ["DeepSeek-AI"],
+ "title": "DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning",
+ "year": 2025,
+ "url": "https://arxiv.org/abs/2501.12948",
+ "mla": "DeepSeek-AI. \"DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning.\" arXiv, 2025, https://arxiv.org/abs/2501.12948."
+}
diff --git a/src/content/registry/citations/deepseekmath-paper.json b/src/content/registry/citations/deepseekmath-paper.json
new file mode 100644
index 00000000..c4a55291
--- /dev/null
+++ b/src/content/registry/citations/deepseekmath-paper.json
@@ -0,0 +1,32 @@
+{
+ "id": "citation.deepseekmath-paper",
+ "slug": "deepseekmath-paper",
+ "kind": "citation",
+ "defaultTitleKey": "title",
+ "defaultSummaryKey": "summary",
+ "aliases": ["DeepSeekMath paper", "GRPO paper"],
+ "tags": ["foundations"],
+ "relatedIds": [],
+ "citationIds": [],
+ "status": "published",
+ "createdAt": "2026-06-19T00:00:00.000Z",
+ "updatedAt": "2026-06-19T00:00:00.000Z",
+ "citationType": "paper",
+ "authors": [
+ "Zhihong Shao",
+ "Peiyi Wang",
+ "Qihao Zhu",
+ "Runxin Xu",
+ "Junxiao Song",
+ "Xiao Bi",
+ "Haowei Zhang",
+ "Mingchuan Zhang",
+ "Y. K. Li",
+ "Yu Wu",
+ "Daya Guo"
+ ],
+ "title": "DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models",
+ "year": 2024,
+ "url": "https://arxiv.org/abs/2402.03300",
+ "mla": "Shao, Zhihong, et al. \"DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models.\" arXiv, 2024, https://arxiv.org/abs/2402.03300."
+}
diff --git a/src/content/registry/concepts/alignment.json b/src/content/registry/concepts/alignment.json
index ace37f0f..6fcafc7d 100644
--- a/src/content/registry/concepts/alignment.json
+++ b/src/content/registry/concepts/alignment.json
@@ -7,6 +7,7 @@
"aliases": ["Alignment", "RLHF", "preference alignment", "safety alignment"],
"tags": ["foundations", "taxonomy"],
"relatedIds": [
+ "training-regime.grpo",
"concept.model-capacity",
"concept.overfitting",
"concept.generalization",
diff --git a/src/content/registry/graphs/grpo-training-flow.json b/src/content/registry/graphs/grpo-training-flow.json
new file mode 100644
index 00000000..b5164d02
--- /dev/null
+++ b/src/content/registry/graphs/grpo-training-flow.json
@@ -0,0 +1,84 @@
+{
+ "id": "graph.grpo-training-flow",
+ "slug": "grpo-training-flow",
+ "kind": "graph",
+ "defaultTitleKey": "title",
+ "defaultSummaryKey": "description",
+ "aliases": ["GRPO training flow"],
+ "tags": ["foundations"],
+ "relatedIds": [],
+ "citationIds": ["citation.deepseekmath-paper"],
+ "status": "published",
+ "createdAt": "2026-06-19T00:00:00.000Z",
+ "updatedAt": "2026-06-19T00:00:00.000Z",
+ "subjectId": "training-regime.grpo",
+ "graphType": "recursive-module-graph",
+ "rootNodeId": "prompt",
+ "layout": "vertical-expandable",
+ "defaultExpandedDepth": 1,
+ "supportedRenderers": ["react-flow", "vertical-svg"],
+ "nodes": [
+ {
+ "id": "prompt",
+ "labelKey": "graph.nodes.prompt.label",
+ "moduleKind": "input",
+ "position": { "x": 250, "y": 0 },
+ "size": { "width": 220, "height": 70 },
+ "visualRole": "process-node",
+ "childNodeIds": ["sample-group"]
+ },
+ {
+ "id": "sample-group",
+ "labelKey": "graph.nodes.sampleGroup.label",
+ "moduleKind": "block",
+ "position": { "x": 250, "y": 110 },
+ "size": { "width": 220, "height": 80 },
+ "visualRole": "summary-node",
+ "childNodeIds": ["relative-score"]
+ },
+ {
+ "id": "relative-score",
+ "labelKey": "graph.nodes.relativeScore.label",
+ "moduleKind": "block",
+ "position": { "x": 250, "y": 240 },
+ "size": { "width": 220, "height": 80 },
+ "visualRole": "summary-node",
+ "childNodeIds": ["policy-update"]
+ },
+ {
+ "id": "policy-update",
+ "labelKey": "graph.nodes.policyUpdate.label",
+ "moduleKind": "output",
+ "position": { "x": 250, "y": 370 },
+ "size": { "width": 220, "height": 70 },
+ "visualRole": "process-node",
+ "childNodeIds": []
+ }
+ ],
+ "edges": [
+ {
+ "id": "prompt-sample-group",
+ "source": "prompt",
+ "target": "sample-group",
+ "edgeKind": "data-flow",
+ "sourceHandleSide": "bottom",
+ "targetHandleSide": "top"
+ },
+ {
+ "id": "sample-group-relative-score",
+ "source": "sample-group",
+ "target": "relative-score",
+ "edgeKind": "data-flow",
+ "sourceHandleSide": "bottom",
+ "targetHandleSide": "top"
+ },
+ {
+ "id": "relative-score-policy-update",
+ "source": "relative-score",
+ "target": "policy-update",
+ "edgeKind": "data-flow",
+ "sourceHandleSide": "bottom",
+ "targetHandleSide": "top"
+ }
+ ]
+}
diff --git a/src/content/registry/training-regimes/grpo.json b/src/content/registry/training-regimes/grpo.json
new file mode 100644
index 00000000..4da4b3f0
--- /dev/null
+++ b/src/content/registry/training-regimes/grpo.json
@@ -0,0 +1,41 @@
+{
+ "id": "training-regime.grpo",
+ "slug": "grpo",
+ "kind": "training-regime",
+ "defaultTitleKey": "title",
+ "defaultSummaryKey": "description",
+ "aliases": [
+ "GRPO",
+ "group relative policy optimization",
+ "group relative preference optimization",
+ "group-relative policy optimization",
+ "group-relative preference optimization"
+ ],
+ "tags": ["foundations"],
+ "relatedIds": ["concept.alignment"],
+ "citationIds": ["citation.deepseekmath-paper", "citation.deepseek-r1-paper"],
+ "status": "published",
+ "createdAt": "2026-06-19T00:00:00.000Z",
+ "updatedAt": "2026-06-19T00:00:00.000Z",
+ "releaseDate": "2024-02-05",
+ "authors": [
+ "Zhihong Shao",
+ "Peiyi Wang",
+ "Qihao Zhu",
+ "Runxin Xu",
+ "Junxiao Song",
+ "Xiao Bi",
+ "Haowei Zhang",
+ "Mingchuan Zhang",
+ "Y. K. Li",
+ "Yu Wu",
+ "Daya Guo"
+ ],
+ "sourceId": "citation.deepseekmath-paper",
+ "regimeType": "optimization",
+ "usedByModelIds": [],
+ "relatedModuleIds": [],
+ "paperIds": [],
+ "conceptType": "training",
+ "variantGroup": "group-relative-reinforcement-learning"
+}
diff --git a/src/features/docs/components/Math.tsx b/src/features/docs/components/Math.tsx
index 91335658..9ea4ae4c 100644
--- a/src/features/docs/components/Math.tsx
+++ b/src/features/docs/components/Math.tsx
@@ -1,11 +1,19 @@
+"use client";
+
import katex from "katex";
+import { MissingMessageKey } from "@/features/docs/components/MissingMessageKey";
+import { ProseAutoLinkText } from "@/features/docs/components/ProseAutoLinkText";
+import { useOptionalPageMessagesContext } from "@/features/docs/components/page-messages-context";
+import { lookupMessage } from "@/lib/content/messages";
type MathProps = {
- formula: string;
+ formula?: string;
+ label?: string;
+ mathId?: string;
};
export function InlineMath({ formula }: MathProps) {
- const html = katex.renderToString(formula, {
+ const html = katex.renderToString(formula ?? "", {
throwOnError: false,
displayMode: false,
});
@@ -19,20 +27,108 @@ export function InlineMath({ formula }: MathProps) {
);
}
-export function BlockMath({ formula }: MathProps) {
- const html = katex.renderToString(formula, {
+function MathVariableDefinitions({ mathId }: { mathId: string }) {
+ const context = useOptionalPageMessagesContext();
+
+ if (!context) {
+ return null;
+ }
+
+ const { messages, isDev } = context;
+ const definitionsKey = `math.${mathId}.variableDefinitions`;
+ const definitions = messages.math?.[mathId]?.variableDefinitions;
+
+ if (!definitions || Object.keys(definitions).length === 0) {
+ if (isDev) {
+ return ;
+ }
+ return null;
+ }
+
+ return (
+
+
+ {Object.entries(definitions).map(([id, row]) => (
+
+ ))}
+
+
+ );
+}
+
+export function BlockMath({ formula, label, mathId }: MathProps) {
+ const context = useOptionalPageMessagesContext();
+ const resolvedFormula =
+ mathId && context
+ ? lookupMessage(context.messages, `math.${mathId}.formula`)
+ : null;
+ const resolvedLabel =
+ mathId && context
+ ? lookupMessage(context.messages, `math.${mathId}.label`)
+ : null;
+
+ if (mathId && context?.isDev) {
+ if (!resolvedFormula?.ok) {
+ return (
+
+ );
+ }
+
+ if (!resolvedLabel?.ok) {
+ return (
+
+ );
+ }
+ }
+
+ const displayFormula = resolvedFormula?.ok ? resolvedFormula.value : formula;
+
+ if (!displayFormula) {
+ return null;
+ }
+ const displayLabel = resolvedLabel?.ok ? resolvedLabel.value : label;
+ const html = katex.renderToString(displayFormula, {
throwOnError: false,
displayMode: true,
});
return (
-
+
+ {displayLabel ? (
+
+ {displayLabel}
+
+ ) : null}
+
+ {mathId ?
: null}
+
);
}
diff --git a/src/lib/content/graph-registry-runtime.generated.ts b/src/lib/content/graph-registry-runtime.generated.ts
index f003fadb..18313738 100644
--- a/src/lib/content/graph-registry-runtime.generated.ts
+++ b/src/lib/content/graph-registry-runtime.generated.ts
@@ -21,6 +21,7 @@ import groupedQueryAttentionComputeFlowGraphRecord from "@/content/registry/grap
import groupedQueryAttentionComputeSchemaGraphRecord from "@/content/registry/graphs/grouped-query-attention-compute-schema.json";
import groupedQueryAttentionGqaComparisonGraphRecord from "@/content/registry/graphs/grouped-query-attention-gqa-comparison.json";
import groupedQueryAttentionMhaComparisonGraphRecord from "@/content/registry/graphs/grouped-query-attention-mha-comparison.json";
+import grpoTrainingFlowGraphRecord from "@/content/registry/graphs/grpo-training-flow.json";
import heavilyCompressedAttentionFlowGraphRecord from "@/content/registry/graphs/heavily-compressed-attention-flow.json";
import layerNormComputeFlowGraphRecord from "@/content/registry/graphs/layer-norm-compute-flow.json";
import leakyReluActivationFlowGraphRecord from "@/content/registry/graphs/leaky-relu-activation-flow.json";
@@ -73,6 +74,7 @@ export const graphRecords: GraphRecord[] = [
graphRecordSchema.parse(groupedQueryAttentionComputeSchemaGraphRecord),
graphRecordSchema.parse(groupedQueryAttentionGqaComparisonGraphRecord),
graphRecordSchema.parse(groupedQueryAttentionMhaComparisonGraphRecord),
+ graphRecordSchema.parse(grpoTrainingFlowGraphRecord),
graphRecordSchema.parse(heavilyCompressedAttentionFlowGraphRecord),
graphRecordSchema.parse(layerNormComputeFlowGraphRecord),
graphRecordSchema.parse(leakyReluActivationFlowGraphRecord),
diff --git a/src/lib/content/graph-registry-runtime.test.ts b/src/lib/content/graph-registry-runtime.test.ts
index 97a26ab1..97ac587d 100644
--- a/src/lib/content/graph-registry-runtime.test.ts
+++ b/src/lib/content/graph-registry-runtime.test.ts
@@ -161,10 +161,13 @@ describe("graph-registry-runtime", () => {
test("lists all bundled graph records", () => {
const records = listGraphRecords();
- expect(records.length).toBe(49);
+ expect(records.length).toBe(50);
expect(records.map((record) => record.id)).toContain(
"graph.bpe-compute-flow",
);
+ expect(records.map((record) => record.id)).toContain(
+ "graph.grpo-training-flow",
+ );
expect(records.map((record) => record.id)).toContain(
"graph.sentencepiece-compute-flow",
);
diff --git a/src/lib/content/grpo-training-regime-comparisons.test.tsx b/src/lib/content/grpo-training-regime-comparisons.test.tsx
new file mode 100644
index 00000000..b3b3a490
--- /dev/null
+++ b/src/lib/content/grpo-training-regime-comparisons.test.tsx
@@ -0,0 +1,67 @@
+import { describe, expect, test } from "bun:test";
+import { createElement } from "react";
+import { renderToReadableStream } from "react-dom/server";
+import { ModulePageProviders } from "@/features/docs/components/ModulePageProviders";
+import { loadTrainingRegimePageFromDisk } from "@/lib/content/training-regime-page-load";
+
+describe("grpo training regime comparisons", () => {
+ test("page explains nearby alignment methods and renders stable reader links", async () => {
+ const page = await loadTrainingRegimePageFromDisk("grpo");
+ const stream = await renderToReadableStream(
+ createElement(
+ ModulePageProviders,
+ {
+ messages: page.messages,
+ assets: page.assets,
+ },
+ page.content,
+ ),
+ );
+ await stream.allReady;
+ const html = await new Response(stream).text();
+ const normalizedHtml = html.toLowerCase();
+
+ expect(page.frontmatter.registryId).toBe("training-regime.grpo");
+ expect(normalizedHtml).toContain(
+ "reinforcement learning from human feedback",
+ );
+ expect(html).toContain("Proximal Policy Optimization");
+ expect(html).toContain("Direct Preference Optimization");
+ expect(html).toContain("pairwise objective");
+ expect(html).toContain("relative ranking inside one sampled group");
+ expect(html).toContain(">Alignment<");
+ expect(html).toContain(">RLHF<");
+ expect(html).toContain(">PPO<");
+ expect(html).toContain(">DPO<");
+ expect(html).toContain('href="/docs/glossary/alignment"');
+ expect(html).toContain('href="/search?q=ppo"');
+ expect(html).toContain('href="/search?q=dpo"');
+ });
+
+ test("page renders the graph title, legend, and symbol-only math definitions for the GRPO loop", async () => {
+ const page = await loadTrainingRegimePageFromDisk("grpo");
+ const stream = await renderToReadableStream(
+ createElement(
+ ModulePageProviders,
+ {
+ messages: page.messages,
+ assets: page.assets,
+ },
+ page.content,
+ ),
+ );
+ await stream.allReady;
+ const html = await new Response(stream).text();
+
+ expect(html).toContain("GRPO training flow");
+ expect(html).toContain("Graph legend");
+ expect(html).toContain("One prompt anchors the whole local comparison.");
+ expect(html).toContain('data-page-math-formula="grpoAdvantage"');
+ expect(html).toContain(
+ 'data-page-math-variable-definitions="grpoAdvantage"',
+ );
+ expect(html).toContain("normalized advantage for sampled answer i");
+ expect(html).toContain("reward score assigned to sampled answer i");
+ expect(html).toContain("number of sampled answers in the comparison group");
+ });
+});
diff --git a/src/lib/content/grpo-training-regime-contract.test.ts b/src/lib/content/grpo-training-regime-contract.test.ts
new file mode 100644
index 00000000..2ec35f73
--- /dev/null
+++ b/src/lib/content/grpo-training-regime-contract.test.ts
@@ -0,0 +1,47 @@
+import { describe, expect, test } from "bun:test";
+import { loadLocalDocsPage } from "@/lib/content/local-docs-page";
+import { getTrainingRegimeById } from "@/lib/content/registry-runtime.generated";
+import { docsSearchApi } from "@/lib/search/search-server";
+import { source } from "@/lib/source";
+
+describe("grpo training regime contract", () => {
+ test("canonical route, localized content, registry metadata, and discovery query resolve together", async () => {
+ const [page, searchResults] = await Promise.all([
+ loadLocalDocsPage({
+ section: "training",
+ slug: "grpo",
+ }),
+ docsSearchApi.search("group relative preference optimization"),
+ ]);
+
+ const route = source.getPage(["training", "grpo"]);
+ const record = getTrainingRegimeById("training-regime.grpo");
+
+ expect(route?.url).toBe("/docs/training/grpo");
+ expect(record).toBeDefined();
+ if (!record) {
+ throw new Error("Expected training-regime.grpo registry record to exist");
+ }
+ expect(page.frontmatter.registryId).toBe(record.id);
+ expect(page.messages.title).toBe("Group Relative Policy Optimization");
+ expect(page.messages.openingSummary).toContain(
+ "samples several answers to one prompt",
+ );
+ expect(page.messages.sections?.howItWorks?.body).toContain(
+ "normalized within that group",
+ );
+
+ expect(record.kind).toBe("training-regime");
+ expect(record.slug).toBe("grpo");
+ expect(record.aliases).toEqual(
+ expect.arrayContaining([
+ "GRPO",
+ "group relative preference optimization",
+ ]),
+ );
+ expect(record.relatedIds).toContain("concept.alignment");
+ expect(record.variantGroup).toBe("group-relative-reinforcement-learning");
+
+ expect(searchResults[0]?.url).toBe("/docs/training/grpo");
+ });
+});
diff --git a/src/lib/content/local-docs-page.test.ts b/src/lib/content/local-docs-page.test.ts
index d945b054..2c8468f3 100644
--- a/src/lib/content/local-docs-page.test.ts
+++ b/src/lib/content/local-docs-page.test.ts
@@ -162,6 +162,18 @@ describe("docs source local pages", () => {
expect(page.toc.some(hasTocUrl("#what-it-is"))).toBe(true);
});
+ test("loadLocalDocsPage resolves the canonical GRPO training page through the shared route contract", async () => {
+ const page = await loadLocalDocsPage({
+ section: "training",
+ slug: "grpo",
+ });
+
+ expect(page.messages.title).toBe("Group Relative Policy Optimization");
+ expect(page.messages.sections?.howItWorks?.title).toBe("How It Works");
+ expect(page.frontmatter.registryId).toBe("training-regime.grpo");
+ expect(page.toc.some(hasTocUrl("#what-it-is"))).toBe(true);
+ });
+
test("loadLocalDocsPage resolves shipped vietnamese canonical page messages without changing the shared MDX route contract", async () => {
const page = await loadLocalDocsPage(
{
diff --git a/src/lib/content/published-docs-registry-manifest.ts b/src/lib/content/published-docs-registry-manifest.ts
index b462163e..f2b527f9 100644
--- a/src/lib/content/published-docs-registry-manifest.ts
+++ b/src/lib/content/published-docs-registry-manifest.ts
@@ -34,6 +34,14 @@ export const GENERATED_PUBLISHED_DOCS_ENTRIES = [
pageKind: "training-regime",
section: "training",
},
+ {
+ registryId: "training-regime.grpo",
+ slug: "grpo",
+ docsSlug: "training/grpo",
+ url: "/docs/training/grpo",
+ pageKind: "training-regime",
+ section: "training",
+ },
{
registryId: "training-regime.on-policy-distillation",
slug: "on-policy-distillation",
@@ -1161,6 +1169,7 @@ export const GENERATED_PUBLISHED_DOCS_REGISTRY_IDS = [
"system.routing",
"training-regime.dpo",
"training-regime.fp4-quantization-aware-training",
+ "training-regime.grpo",
"training-regime.on-policy-distillation",
"training-regime.specialist-training",
] as const;
diff --git a/src/lib/content/published-docs-routing-contract.test.ts b/src/lib/content/published-docs-routing-contract.test.ts
index bb7ef5a7..a8aaf7d0 100644
--- a/src/lib/content/published-docs-routing-contract.test.ts
+++ b/src/lib/content/published-docs-routing-contract.test.ts
@@ -86,6 +86,14 @@ describe("published docs routing contract", () => {
),
href: "/docs/training/dpo",
},
+ {
+ label: "grpo training regime",
+ record: requireRecord(
+ getTrainingRegimeById("training-regime.grpo"),
+ "grpo training regime",
+ ),
+ href: "/docs/training/grpo",
+ },
{
label: "system",
record: requireRecord(
diff --git a/src/lib/content/registry-runtime.generated.ts b/src/lib/content/registry-runtime.generated.ts
index e00eafde..c60c2919 100644
--- a/src/lib/content/registry-runtime.generated.ts
+++ b/src/lib/content/registry-runtime.generated.ts
@@ -178,64 +178,67 @@ import registryRecord_143 from "../../content/registry/models/gpt-3.json";
import registryRecord_144 from "../../content/registry/papers/deepseek-v4.json";
import registryRecord_145 from "../../content/registry/training-regimes/dpo.json";
import registryRecord_146 from "../../content/registry/training-regimes/fp4-quantization-aware-training.json";
-import registryRecord_147 from "../../content/registry/training-regimes/on-policy-distillation.json";
-import registryRecord_148 from "../../content/registry/training-regimes/specialist-training.json";
-import registryRecord_149 from "../../content/registry/systems/expert-parallel-overlap.json";
-import registryRecord_150 from "../../content/registry/systems/on-disk-kv-cache.json";
-import registryRecord_151 from "../../content/registry/systems/routing.json";
-import registryRecord_152 from "../../content/registry/datasets/deepseek-v4-specialist-corpus.json";
-import registryRecord_153 from "../../content/registry/organizations/deepseek-ai.json";
-import registryRecord_154 from "../../content/registry/citations/attention-is-all-you-need.json";
-import registryRecord_155 from "../../content/registry/citations/awq.json";
-import registryRecord_156 from "../../content/registry/citations/batch-normalization.json";
-import registryRecord_157 from "../../content/registry/citations/brown-gpt-3.json";
-import registryRecord_158 from "../../content/registry/citations/chen-positional-interpolation.json";
-import registryRecord_159 from "../../content/registry/citations/classifier-free-diffusion-guidance.json";
-import registryRecord_160 from "../../content/registry/citations/curious-case-neural-text-degeneration.json";
-import registryRecord_161 from "../../content/registry/citations/deepseek-v2-mla-paper.json";
-import registryRecord_162 from "../../content/registry/citations/deepseek-v4-paper.json";
-import registryRecord_163 from "../../content/registry/citations/denoising-diffusion-probabilistic-models.json";
-import registryRecord_164 from "../../content/registry/citations/ding-longrope.json";
-import registryRecord_165 from "../../content/registry/citations/direct-preference-optimization.json";
-import registryRecord_166 from "../../content/registry/citations/flamingo-visual-language-model.json";
-import registryRecord_167 from "../../content/registry/citations/glu-variants-improve-transformer.json";
-import registryRecord_168 from "../../content/registry/citations/goodfellow-deep-learning.json";
-import registryRecord_169 from "../../content/registry/citations/gpt-2-report.json";
-import registryRecord_170 from "../../content/registry/citations/gqa-paper.json";
-import registryRecord_171 from "../../content/registry/citations/group-normalization.json";
-import registryRecord_172 from "../../content/registry/citations/image-is-worth-16x16-words.json";
-import registryRecord_173 from "../../content/registry/citations/kaiokendev-superhot.json";
-import registryRecord_174 from "../../content/registry/citations/kaplan-scaling-laws.json";
-import registryRecord_175 from "../../content/registry/citations/katharopoulos-linear-attention-paper.json";
-import registryRecord_176 from "../../content/registry/citations/kingma-adam.json";
-import registryRecord_177 from "../../content/registry/citations/kivi-kv-cache-quantization.json";
-import registryRecord_178 from "../../content/registry/citations/kudo-sentencepiece.json";
-import registryRecord_179 from "../../content/registry/citations/layer-normalization.json";
-import registryRecord_180 from "../../content/registry/citations/learning-transferable-visual-models-from-natural-language-supervision.json";
-import registryRecord_181 from "../../content/registry/citations/longformer.json";
-import registryRecord_182 from "../../content/registry/citations/multilayer-feedforward-networks-are-universal-approximators.json";
-import registryRecord_183 from "../../content/registry/citations/on-policy-distillation-of-language-models.json";
-import registryRecord_184 from "../../content/registry/citations/peng-yarn.json";
-import registryRecord_185 from "../../content/registry/citations/press-alibi.json";
-import registryRecord_186 from "../../content/registry/citations/qlora.json";
-import registryRecord_187 from "../../content/registry/citations/quantization-integer-only-inference.json";
-import registryRecord_188 from "../../content/registry/citations/query-key-normalization-for-transformers.json";
-import registryRecord_189 from "../../content/registry/citations/raffel-t5.json";
-import registryRecord_190 from "../../content/registry/citations/rectified-linear-units-improve-restricted-boltzmann-machines.json";
-import registryRecord_191 from "../../content/registry/citations/rectifier-nonlinearities-improve-neural-network-acoustic-models.json";
-import registryRecord_192 from "../../content/registry/citations/root-mean-square-layer-normalization.json";
-import registryRecord_193 from "../../content/registry/citations/self-attention-with-relative-position-representations.json";
-import registryRecord_194 from "../../content/registry/citations/sennrich-bpe.json";
-import registryRecord_195 from "../../content/registry/citations/shazeer-mqa-paper.json";
-import registryRecord_196 from "../../content/registry/citations/sigmoid-weighted-linear-units.json";
-import registryRecord_197 from "../../content/registry/citations/smoothquant.json";
-import registryRecord_198 from "../../content/registry/citations/sparse-transformers.json";
-import registryRecord_199 from "../../content/registry/citations/sparsely-gated-mixture-of-experts-layer.json";
-import registryRecord_200 from "../../content/registry/citations/su-roformer-rope.json";
-import registryRecord_201 from "../../content/registry/citations/training-language-models-to-follow-instructions-with-human-feedback.json";
-import registryRecord_202 from "../../content/registry/citations/transformer-lms-without-positional-encodings.json";
-import registryRecord_203 from "../../content/registry/citations/wei-emergent-abilities.json";
-import registryRecord_204 from "../../content/registry/citations/world-models.json";
+import registryRecord_147 from "../../content/registry/training-regimes/grpo.json";
+import registryRecord_148 from "../../content/registry/training-regimes/on-policy-distillation.json";
+import registryRecord_149 from "../../content/registry/training-regimes/specialist-training.json";
+import registryRecord_150 from "../../content/registry/systems/expert-parallel-overlap.json";
+import registryRecord_151 from "../../content/registry/systems/on-disk-kv-cache.json";
+import registryRecord_152 from "../../content/registry/systems/routing.json";
+import registryRecord_153 from "../../content/registry/datasets/deepseek-v4-specialist-corpus.json";
+import registryRecord_154 from "../../content/registry/organizations/deepseek-ai.json";
+import registryRecord_155 from "../../content/registry/citations/attention-is-all-you-need.json";
+import registryRecord_156 from "../../content/registry/citations/awq.json";
+import registryRecord_157 from "../../content/registry/citations/batch-normalization.json";
+import registryRecord_158 from "../../content/registry/citations/brown-gpt-3.json";
+import registryRecord_159 from "../../content/registry/citations/chen-positional-interpolation.json";
+import registryRecord_160 from "../../content/registry/citations/classifier-free-diffusion-guidance.json";
+import registryRecord_161 from "../../content/registry/citations/curious-case-neural-text-degeneration.json";
+import registryRecord_162 from "../../content/registry/citations/deepseek-r1-paper.json";
+import registryRecord_163 from "../../content/registry/citations/deepseek-v2-mla-paper.json";
+import registryRecord_164 from "../../content/registry/citations/deepseek-v4-paper.json";
+import registryRecord_165 from "../../content/registry/citations/deepseekmath-paper.json";
+import registryRecord_166 from "../../content/registry/citations/denoising-diffusion-probabilistic-models.json";
+import registryRecord_167 from "../../content/registry/citations/ding-longrope.json";
+import registryRecord_168 from "../../content/registry/citations/direct-preference-optimization.json";
+import registryRecord_169 from "../../content/registry/citations/flamingo-visual-language-model.json";
+import registryRecord_170 from "../../content/registry/citations/glu-variants-improve-transformer.json";
+import registryRecord_171 from "../../content/registry/citations/goodfellow-deep-learning.json";
+import registryRecord_172 from "../../content/registry/citations/gpt-2-report.json";
+import registryRecord_173 from "../../content/registry/citations/gqa-paper.json";
+import registryRecord_174 from "../../content/registry/citations/group-normalization.json";
+import registryRecord_175 from "../../content/registry/citations/image-is-worth-16x16-words.json";
+import registryRecord_176 from "../../content/registry/citations/kaiokendev-superhot.json";
+import registryRecord_177 from "../../content/registry/citations/kaplan-scaling-laws.json";
+import registryRecord_178 from "../../content/registry/citations/katharopoulos-linear-attention-paper.json";
+import registryRecord_179 from "../../content/registry/citations/kingma-adam.json";
+import registryRecord_180 from "../../content/registry/citations/kivi-kv-cache-quantization.json";
+import registryRecord_181 from "../../content/registry/citations/kudo-sentencepiece.json";
+import registryRecord_182 from "../../content/registry/citations/layer-normalization.json";
+import registryRecord_183 from "../../content/registry/citations/learning-transferable-visual-models-from-natural-language-supervision.json";
+import registryRecord_184 from "../../content/registry/citations/longformer.json";
+import registryRecord_185 from "../../content/registry/citations/multilayer-feedforward-networks-are-universal-approximators.json";
+import registryRecord_186 from "../../content/registry/citations/on-policy-distillation-of-language-models.json";
+import registryRecord_187 from "../../content/registry/citations/peng-yarn.json";
+import registryRecord_188 from "../../content/registry/citations/press-alibi.json";
+import registryRecord_189 from "../../content/registry/citations/qlora.json";
+import registryRecord_190 from "../../content/registry/citations/quantization-integer-only-inference.json";
+import registryRecord_191 from "../../content/registry/citations/query-key-normalization-for-transformers.json";
+import registryRecord_192 from "../../content/registry/citations/raffel-t5.json";
+import registryRecord_193 from "../../content/registry/citations/rectified-linear-units-improve-restricted-boltzmann-machines.json";
+import registryRecord_194 from "../../content/registry/citations/rectifier-nonlinearities-improve-neural-network-acoustic-models.json";
+import registryRecord_195 from "../../content/registry/citations/root-mean-square-layer-normalization.json";
+import registryRecord_196 from "../../content/registry/citations/self-attention-with-relative-position-representations.json";
+import registryRecord_197 from "../../content/registry/citations/sennrich-bpe.json";
+import registryRecord_198 from "../../content/registry/citations/shazeer-mqa-paper.json";
+import registryRecord_199 from "../../content/registry/citations/sigmoid-weighted-linear-units.json";
+import registryRecord_200 from "../../content/registry/citations/smoothquant.json";
+import registryRecord_201 from "../../content/registry/citations/sparse-transformers.json";
+import registryRecord_202 from "../../content/registry/citations/sparsely-gated-mixture-of-experts-layer.json";
+import registryRecord_203 from "../../content/registry/citations/su-roformer-rope.json";
+import registryRecord_204 from "../../content/registry/citations/training-language-models-to-follow-instructions-with-human-feedback.json";
+import registryRecord_205 from "../../content/registry/citations/transformer-lms-without-positional-encodings.json";
+import registryRecord_206 from "../../content/registry/citations/wei-emergent-abilities.json";
+import registryRecord_207 from "../../content/registry/citations/world-models.json";
const moduleRecords: ModuleRecord[] = [
moduleRecordSchema.parse(registryRecord_0),
@@ -399,24 +402,24 @@ const trainingRegimeRecords: TrainingRegimeRecord[] = [
trainingRegimeRecordSchema.parse(registryRecord_146),
trainingRegimeRecordSchema.parse(registryRecord_147),
trainingRegimeRecordSchema.parse(registryRecord_148),
+ trainingRegimeRecordSchema.parse(registryRecord_149),
];
const systemRecords: SystemRecord[] = [
- systemRecordSchema.parse(registryRecord_149),
systemRecordSchema.parse(registryRecord_150),
systemRecordSchema.parse(registryRecord_151),
+ systemRecordSchema.parse(registryRecord_152),
];
const datasetRecords: DatasetRecord[] = [
- datasetRecordSchema.parse(registryRecord_152),
+ datasetRecordSchema.parse(registryRecord_153),
];
const organizationRecords: OrganizationRecord[] = [
- organizationRecordSchema.parse(registryRecord_153),
+ organizationRecordSchema.parse(registryRecord_154),
];
const citationRecords: CitationRecord[] = [
- citationRecordSchema.parse(registryRecord_154),
citationRecordSchema.parse(registryRecord_155),
citationRecordSchema.parse(registryRecord_156),
citationRecordSchema.parse(registryRecord_157),
@@ -467,6 +470,9 @@ const citationRecords: CitationRecord[] = [
citationRecordSchema.parse(registryRecord_202),
citationRecordSchema.parse(registryRecord_203),
citationRecordSchema.parse(registryRecord_204),
+ citationRecordSchema.parse(registryRecord_205),
+ citationRecordSchema.parse(registryRecord_206),
+ citationRecordSchema.parse(registryRecord_207),
];
const modulesById = new Map(
diff --git a/src/lib/content/training-behavior-glossary.test.ts b/src/lib/content/training-behavior-glossary.test.ts
index 6f256b5e..bf680f74 100644
--- a/src/lib/content/training-behavior-glossary.test.ts
+++ b/src/lib/content/training-behavior-glossary.test.ts
@@ -77,6 +77,7 @@ describe("Phase 2 training behavior glossary pages (US-004)", () => {
test("alignment links to training peers and published token-chain glossary pages", async () => {
const html = await renderGlossaryHtml("alignment");
+ expect(html).toContain('href="/docs/training/grpo"');
expect(html).toContain('href="/docs/glossary/model-capacity"');
expect(html).toContain('href="/docs/glossary/overfitting"');
expect(html).toContain('href="/docs/glossary/generalization"');
@@ -112,6 +113,7 @@ describe("Phase 2 training behavior glossary pages (US-004)", () => {
) as ConceptRecord;
expect(alignment.conceptType).toBe("training");
+ expect(alignment.relatedIds).toContain("training-regime.grpo");
expect(modelCapacity.conceptType).toBe("training");
expect(overfitting.conceptType).toBe("training");
expect(generalization.conceptType).toBe("evaluation");
diff --git a/src/lib/navigation/generated-docs-page-tree.test.ts b/src/lib/navigation/generated-docs-page-tree.test.ts
index 4bcc649b..32515788 100644
--- a/src/lib/navigation/generated-docs-page-tree.test.ts
+++ b/src/lib/navigation/generated-docs-page-tree.test.ts
@@ -129,6 +129,13 @@ describe("generated docs page tree", () => {
url: "/docs/training/on-policy-distillation",
}),
);
+ expect(
+ findNodeIndex(trainingChildren, { name: "Optimization" }),
+ ).toBeLessThan(
+ findNodeIndex(trainingChildren, {
+ url: "/docs/training/grpo",
+ }),
+ );
expect(
findNodeIndex(trainingChildren, { name: "Optimization" }),
).toBeLessThan(
diff --git a/src/lib/source.test.ts b/src/lib/source.test.ts
index cf6b7843..f66a1c4a 100644
--- a/src/lib/source.test.ts
+++ b/src/lib/source.test.ts
@@ -128,6 +128,7 @@ const PAPER_INDEX_URLS = ["/docs/papers/deepseek-v4"] as const;
const TRAINING_INDEX_URLS = [
"/docs/training/dpo",
"/docs/training/fp4-quantization-aware-training",
+ "/docs/training/grpo",
"/docs/training/on-policy-distillation",
"/docs/training/specialist-training",
] as const;
diff --git a/src/tests/content/section-indexes.test.tsx b/src/tests/content/section-indexes.test.tsx
index 7242528a..53724b49 100644
--- a/src/tests/content/section-indexes.test.tsx
+++ b/src/tests/content/section-indexes.test.tsx
@@ -57,6 +57,7 @@ describe("section index page render", () => {
const html = renderToStaticMarkup(await TrainingIndexPage());
expect(html).toContain("Training");
+ expect(html).toContain('href="/docs/training/grpo"');
expect(html).toContain('href="/docs/training/on-policy-distillation"');
expect(html).toContain('href="/docs/training/specialist-training"');
});
diff --git a/src/tests/discovery/search-discovery.test.tsx b/src/tests/discovery/search-discovery.test.tsx
index 04023fcf..ba849c39 100644
--- a/src/tests/discovery/search-discovery.test.tsx
+++ b/src/tests/discovery/search-discovery.test.tsx
@@ -75,6 +75,15 @@ function expectRouteRendersOk(
}
describe("Phase 1 search discovery", () => {
+ test("group relative preference optimization query routes readers to the canonical GRPO page", async () => {
+ const results = await docsSearchApi.search(
+ "group relative preference optimization",
+ );
+ expect(results.length).toBeGreaterThan(0);
+ expect(assertCanonicalPageLevelApiResults(results)).toBeNull();
+ expect(results[0]?.url).toBe("/docs/training/grpo");
+ });
+
test("GQA query ranks grouped-query attention first", async () => {
const results = await docsSearchApi.search("GQA");
expect(results.length).toBeGreaterThan(0);
diff --git a/src/tests/search/build-documents.test.ts b/src/tests/search/build-documents.test.ts
index 23ca4b34..f54c3cbe 100644
--- a/src/tests/search/build-documents.test.ts
+++ b/src/tests/search/build-documents.test.ts
@@ -5,6 +5,7 @@ import { buildSearchDocuments } from "@/lib/search/build-documents";
const SAMPLE_URL = "/docs/modules/grouped-query-attention";
const TOKEN_GLOSSARY_URL = "/docs/glossary/token";
+const GRPO_URL = "/docs/training/grpo";
describe("buildSearchDocuments", () => {
test("indexes only published docs pages for the default locale", async () => {
@@ -63,4 +64,29 @@ describe("buildSearchDocuments", () => {
expect(token?.bodyText).toContain("tokenizer");
expect(token?.bodyText).toContain("token IDs");
});
+
+ test("indexes GRPO training page with search aliases and training facets", async () => {
+ const registry = await loadRegistry();
+ const pages = await loadPublishedDocsPages("en");
+ const documents = buildSearchDocuments(pages, registry);
+ const grpo = documents.find((document) => document.url === GRPO_URL);
+
+ expect(grpo).toBeDefined();
+ expect(grpo?.kind).toBe("training-regime");
+ expect(grpo?.registryId).toBe("training-regime.grpo");
+ expect(grpo?.aliases).toEqual(
+ expect.arrayContaining([
+ "GRPO",
+ "group relative policy optimization",
+ "group relative preference optimization",
+ "group-relative policy optimization",
+ "group-relative preference optimization",
+ ]),
+ );
+ expect(grpo?.tags).toEqual(expect.arrayContaining(["foundations"]));
+ expect(grpo?.facets.conceptType).toBe("training");
+ expect(grpo?.facets.variantGroup).toBe(
+ "group-relative-reinforcement-learning",
+ );
+ });
});