diff --git a/src/app/docs/docs-slug-renderer.test.tsx b/src/app/docs/docs-slug-renderer.test.tsx
index 448a2f14..28c31774 100644
--- a/src/app/docs/docs-slug-renderer.test.tsx
+++ b/src/app/docs/docs-slug-renderer.test.tsx
@@ -2,6 +2,7 @@ import { describe, expect, test } from "bun:test";
 import {
   buildDocsPageMetadata,
   renderDocsSlugPage,
+  resolveLocalDocsShellDescription,
 } from "@/app/docs/docs-slug-renderer";
 
 describe("docs slug renderer locale gating", () => {
@@ -243,4 +244,21 @@ describe("docs slug renderer locale gating", () => {
       );
     }
   });
+
+  test("local non-glossary docs shell prefers openingSummary when present", () => {
+    const shellDescription = resolveLocalDocsShellDescription({
+      description:
+        "A post-training reinforcement-learning method that compares several sampled answers for the same prompt and updates the model from their relative quality.",
+      openingSummary:
+        "Group Relative Policy Optimization, usually shortened to GRPO, is a reinforcement-learning post-training method where the model samples several answers to one prompt, scores them as a group, and learns from which answers look better inside that local set.",
+      section: "training",
+    });
+
+    expect(shellDescription).toContain(
+      "Group Relative Policy Optimization, usually shortened to GRPO",
+    );
+    expect(shellDescription).not.toContain(
+      "A post-training reinforcement-learning method that compares several sampled answers for the same prompt and updates the model from their relative quality.",
+    );
+  });
 });
diff --git a/src/app/docs/docs-slug-renderer.tsx b/src/app/docs/docs-slug-renderer.tsx
index 437a0027..9b9678bd 100644
--- a/src/app/docs/docs-slug-renderer.tsx
+++ b/src/app/docs/docs-slug-renderer.tsx
@@ -20,6 +20,19 @@ import { localizedRouteAlternates } from "@/lib/i18n/route-locale";
 import { source } from "@/lib/source";
 import { getMDXComponents } from "../../../mdx-components";
 
+export function resolveLocalDocsShellDescription(options: {
+  description: string;
+  openingSummary?: string;
+  section: string;
+}) {
+  const { description, openingSummary, section } = options;
+  if (section === "glossary") {
+    return description;
+  }
+
+  return openingSummary ?? description;
+}
+
 function buildDocsPageAlternates(docsSlug: string) {
   const alternates = localizedRouteAlternates({
     surface: "docs-page",
@@ -53,11 +66,16 @@ async function renderLocalDocsPage(
 
   const loadedPage = await loadLocalDocsPage(localRef, locale);
   const uiMessages = await loadUiMessages(locale);
+  const shellDescriptionText = resolveLocalDocsShellDescription({
+    description: loadedPage.messages.description,
+    openingSummary: loadedPage.messages.openingSummary,
+    section: localRef.section,
+  });
   const description =
     localRef.section === "glossary" ? (
       <DocsAutoLinkedDescription text={loadedPage.messages.description} />
     ) : (
-      loadedPage.messages.description
+      shellDescriptionText
     );
 
   return (
diff --git a/src/content/docs/training/grpo/assets.json b/src/content/docs/training/grpo/assets.json
new file mode 100644
index 00000000..04ee7b59
--- /dev/null
+++ b/src/content/docs/training/grpo/assets.json
@@ -0,0 +1,10 @@
+{
+  "trainingFlow": {
+    "type": "graph",
+    "graphId": "graph.grpo-training-flow",
+    "webRenderer": "react-flow",
+    "printRenderer": "vertical-svg",
+    "altKey": "assets.trainingFlow.alt",
+    "captionKey": "assets.trainingFlow.caption"
+  }
+}
diff --git a/src/content/docs/training/grpo/messages/en.json b/src/content/docs/training/grpo/messages/en.json
new file mode 100644
index 00000000..7998b9c8
--- /dev/null
+++ b/src/content/docs/training/grpo/messages/en.json
@@ -0,0 +1,94 @@
+{
+  "title": "Group Relative Policy Optimization",
+  "description": "A post-training reinforcement-learning method that compares several sampled answers for the same prompt and updates the model from their relative quality.",
+  "openingSummary": "Group Relative Policy Optimization, usually shortened to GRPO, is a reinforcement-learning post-training method where the model samples several answers to one prompt, scores them as a group, and learns from which answers look better inside that local set.",
+  "sections": {
+    "whatItIs": {
+      "title": "What It Is",
+      "body": "Group Relative Policy Optimization is a policy-update method used after pretraining or supervised fine-tuning. Instead of judging one answer in isolation, it samples a small group of candidate answers for the same prompt and uses their relative ranking to decide which behaviors should be reinforced."
+    },
+    "whyItExists": {
+      "title": "Why It Exists",
+      "body": "A single reward score can be noisy, and PPO-style language-model training often adds the extra cost of a learned critic to estimate a baseline. GRPO tries to keep the signal useful while simplifying the loop: the group itself supplies the local baseline, so the model can learn from which samples look better than their neighbors."
+    },
+    "howItWorks": {
+      "title": "How It Works",
+      "body": "For one prompt, the policy generates several completions, a reward function scores them, and those scores are normalized within that group. Answers above the group average get a positive learning signal, answers below it get a negative one, and the policy is updated so future samples are more likely to resemble the stronger members of the group."
+    },
+    "comparedToNearbyRegimes": {
+      "title": "Compared To Nearby Regimes",
+      "body": "Group Relative Policy Optimization still sits inside the broader alignment family, but it is narrower than Reinforcement Learning from Human Feedback as a full pipeline. Reinforcement Learning from Human Feedback often means collecting preference data, training a reward model, and then running a reinforcement-learning update such as Proximal Policy Optimization. GRPO keeps the reinforcement-learning loop, but it replaces the learned critic-style baseline with relative ranking inside one sampled group. Direct Preference Optimization moves in a different direction: it stays closer to supervised-style optimization on chosen-versus-rejected pairs, while GRPO uses rewards over several sampled answers and updates the policy from that within-group ordering instead of from one pairwise objective alone."
+    },
+    "limitationsAndFailureModes": {
+      "title": "Limitations And Failure Modes",
+      "body": "The method still depends on reward quality. If the reward function prefers shallow tricks, the whole group can drift in the wrong direction together. Relative scoring also means a weak group can still produce a misleading winner if every sampled answer is bad."
+    },
+    "related": {
+      "title": "Related To"
+    },
+    "tags": {
+      "title": "Tags"
+    },
+    "references": {
+      "title": "References"
+    }
+  },
+  "callouts": {
+    "trainingFlowGraph": {
+      "title": "GRPO training flow",
+      "body": "A visual walkthrough of one prompt, grouped sampling, relative scoring, and the policy update."
+    },
+    "trainingFlowLegend": {
+      "title": "Graph legend",
+      "body": "How to read each stage of the GRPO training flow."
+    }
+  },
+  "links": {
+    "trainingFlowLegendPrompt": "One prompt anchors the whole local comparison.",
+    "trainingFlowLegendSampling": "The policy samples several candidate answers for that same prompt.",
+    "trainingFlowLegendRelativeScoring": "Those answers are scored relative to each other inside the sampled group.",
+    "trainingFlowLegendPolicyUpdate": "The policy update reinforces answers that beat the group baseline and discourages weaker ones."
+  },
+  "assets": {
+    "trainingFlow": {
+      "alt": "A training flow from one prompt to a group of sampled answers, then to relative scoring inside the group, and finally to a policy update.",
+      "caption": "GRPO learns from which answers win inside each sampled group instead of relying on one separate critic-estimated baseline."
+    }
+  },
+  "math": {
+    "grpoAdvantage": {
+      "label": "Grouped relative advantage sketch",
+      "formula": "A_i = \\frac{r_i - \\operatorname{mean}(r_{1:G})}{\\operatorname{std}(r_{1:G})}",
+      "variableDefinitions": {
+        "advantage": {
+          "term": "A_i",
+          "definition": "normalized advantage for sampled answer i"
+        },
+        "reward": {
+          "term": "r_i",
+          "definition": "reward score assigned to sampled answer i"
+        },
+        "groupSize": {
+          "term": "G",
+          "definition": "number of sampled answers in the comparison group"
+        }
+      }
+    }
+  },
+  "graph": {
+    "nodes": {
+      "prompt": {
+        "label": "One prompt"
+      },
+      "sampleGroup": {
+        "label": "Sample a group of answers"
+      },
+      "relativeScore": {
+        "label": "Score answers relative to the group"
+      },
+      "policyUpdate": {
+        "label": "Update the policy"
+      }
+    }
+  }
+}
diff --git a/src/content/docs/training/grpo/page.mdx b/src/content/docs/training/grpo/page.mdx
new file mode 100644
index 00000000..e75d83ff
--- /dev/null
+++ b/src/content/docs/training/grpo/page.mdx
@@ -0,0 +1,124 @@
+---
+title: "Group Relative Policy Optimization"
+description: "A post-training reinforcement-learning method that compares several sampled answers for the same prompt and updates the model from their relative quality."
+kind: "training-regime"
+registryId: "training-regime.grpo"
+messageNamespace: "local"
+assetNamespace: "local"
+status: "published"
+tags:
+  - foundations
+aliases:
+  - "GRPO"
+  - "Group Relative Preference Optimization"
+updatedAt: "2026-06-19"
+---
+
+import { CitationList } from "@/features/docs/components/CitationList";
+import { BlockMath } from "@/features/docs/components/Math";
+import { RelatedDocs } from "@/features/docs/components/RelatedDocs";
+import { Section } from "@/features/docs/components/Section";
+import { T } from "@/features/docs/components/T";
+import { TagPillList } from "@/features/docs/components/TagPillList";
+import { TrainingRegimeAtAGlance } from "@/features/models/components/TrainingRegimeAtAGlance";
+import { TrainingRegimeFlow } from "@/features/models/components/TrainingRegimeFlow";
+
+
+<TrainingRegimeAtAGlance registryId="training-regime.grpo" />
+
+<Section id="what-it-is" titleKey="sections.whatItIs.title">
+  <T k="sections.whatItIs.body" />
+</Section>
+
+<Section id="why-it-exists" titleKey="sections.whyItExists.title">
+  <T k="sections.whyItExists.body" />
+</Section>
+
+<Section id="how-it-works" titleKey="sections.howItWorks.title">
+  <T k="sections.howItWorks.body" />
+  <h3 className="text-base font-semibold text-foreground">
+    <T k="callouts.trainingFlowGraph.title" />
+  </h3>
+  <TrainingRegimeFlow registryId="training-regime.grpo" assetId="trainingFlow" />
+  <div className="rounded-lg border border-border bg-card p-4">
+    <p className="text-sm font-medium text-foreground">
+      <T k="callouts.trainingFlowLegend.title" />
+    </p>
+    <ul className="mt-3 space-y-2 pl-5 text-sm text-muted-foreground">
+      <li>
+        <T k="links.trainingFlowLegendPrompt" />
+      </li>
+      <li>
+        <T k="links.trainingFlowLegendSampling" />
+      </li>
+      <li>
+        <T k="links.trainingFlowLegendRelativeScoring" />
+      </li>
+      <li>
+        <T k="links.trainingFlowLegendPolicyUpdate" />
+      </li>
+    </ul>
+  </div>
+  <BlockMath mathId="grpoAdvantage" />
+</Section>
+
+<Section
+  id="compared-to-nearby-regimes"
+  titleKey="sections.comparedToNearbyRegimes.title"
+>
+  <T k="sections.comparedToNearbyRegimes.body" />
+
+  <ul className="mt-4 space-y-2 list-none pl-0">
+    <li>
+      <a
+        className="text-primary underline underline-offset-4"
+        href="/docs/glossary/alignment"
+      >
+        Alignment
+      </a>
+    </li>
+    <li>
+      <a
+        className="text-primary underline underline-offset-4"
+        href="/docs/glossary/alignment"
+      >
+        RLHF
+      </a>
+    </li>
+    <li>
+      <a
+        className="text-primary underline underline-offset-4"
+        href="/search?q=ppo"
+      >
+        PPO
+      </a>
+    </li>
+    <li>
+      <a
+        className="text-primary underline underline-offset-4"
+        href="/search?q=dpo"
+      >
+        DPO
+      </a>
+    </li>
+  </ul>
+</Section>
+
+<Section
+  id="limitations-and-failure-modes"
+  titleKey="sections.limitationsAndFailureModes.title"
+>
+  <T k="sections.limitationsAndFailureModes.body" />
+</Section>
+
+<Section id="related" titleKey="sections.related.title">
+  <RelatedDocs registryId="training-regime.grpo" />
+</Section>
+
+<Section id="tags" titleKey="sections.tags.title">
+  <TagPillList registryId="training-regime.grpo" showDescriptions />
+</Section>
+
+<Section id="references" titleKey="sections.references.title">
+  <CitationList registryId="training-regime.grpo" />
+</Section>
diff --git a/src/content/registry/citations/deepseek-r1-paper.json b/src/content/registry/citations/deepseek-r1-paper.json
new file mode 100644
index 00000000..d8c810d7
--- /dev/null
+++ b/src/content/registry/citations/deepseek-r1-paper.json
@@ -0,0 +1,20 @@
+{
+  "id": "citation.deepseek-r1-paper",
+  "slug": "deepseek-r1-paper",
+  "kind": "citation",
+  "defaultTitleKey": "title",
+  "defaultSummaryKey": "summary",
+  "aliases": ["DeepSeek-R1 paper"],
+  "tags": ["foundations"],
+  "relatedIds": [],
+  "citationIds": [],
+  "status": "published",
+  "createdAt": "2026-06-19T00:00:00.000Z",
+  "updatedAt": "2026-06-19T00:00:00.000Z",
+  "citationType": "paper",
+  "authors": ["DeepSeek-AI"],
+  "title": "DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning",
+  "year": 2025,
+  "url": "https://arxiv.org/abs/2501.12948",
+  "mla": "DeepSeek-AI. \"DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning.\" arXiv, 2025, https://arxiv.org/abs/2501.12948."
+}
diff --git a/src/content/registry/citations/deepseekmath-paper.json b/src/content/registry/citations/deepseekmath-paper.json
new file mode 100644
index 00000000..c4a55291
--- /dev/null
+++ b/src/content/registry/citations/deepseekmath-paper.json
@@ -0,0 +1,32 @@
+{
+  "id": "citation.deepseekmath-paper",
+  "slug": "deepseekmath-paper",
+  "kind": "citation",
+  "defaultTitleKey": "title",
+  "defaultSummaryKey": "summary",
+  "aliases": ["DeepSeekMath paper", "GRPO paper"],
+  "tags": ["foundations"],
+  "relatedIds": [],
+  "citationIds": [],
+  "status": "published",
+  "createdAt": "2026-06-19T00:00:00.000Z",
+  "updatedAt": "2026-06-19T00:00:00.000Z",
+  "citationType": "paper",
+  "authors": [
+    "Zhihong Shao",
+    "Peiyi Wang",
+    "Qihao Zhu",
+    "Runxin Xu",
+    "Junxiao Song",
+    "Xiao Bi",
+    "Haowei Zhang",
+    "Mingchuan Zhang",
+    "Y. K. Li",
+    "Yu Wu",
+    "Daya Guo"
+  ],
+  "title": "DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models",
+  "year": 2024,
+  "url": "https://arxiv.org/abs/2402.03300",
+  "mla": "Shao, Zhihong, et al. \"DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models.\" arXiv, 2024, https://arxiv.org/abs/2402.03300."
+}
diff --git a/src/content/registry/concepts/alignment.json b/src/content/registry/concepts/alignment.json
index ace37f0f..6fcafc7d 100644
--- a/src/content/registry/concepts/alignment.json
+++ b/src/content/registry/concepts/alignment.json
@@ -7,6 +7,7 @@
   "aliases": ["Alignment", "RLHF", "preference alignment", "safety alignment"],
   "tags": ["foundations", "taxonomy"],
   "relatedIds": [
+    "training-regime.grpo",
     "concept.model-capacity",
     "concept.overfitting",
     "concept.generalization",
diff --git a/src/content/registry/graphs/grpo-training-flow.json b/src/content/registry/graphs/grpo-training-flow.json
new file mode 100644
index 00000000..b5164d02
--- /dev/null
+++ b/src/content/registry/graphs/grpo-training-flow.json
@@ -0,0 +1,84 @@
+{
+  "id": "graph.grpo-training-flow",
+  "slug": "grpo-training-flow",
+  "kind": "graph",
+  "defaultTitleKey": "title",
+  "defaultSummaryKey": "description",
+  "aliases": ["GRPO training flow"],
+  "tags": ["foundations"],
+  "relatedIds": [],
+  "citationIds": ["citation.deepseekmath-paper"],
+  "status": "published",
+  "createdAt": "2026-06-19T00:00:00.000Z",
+  "updatedAt": "2026-06-19T00:00:00.000Z",
+  "subjectId": "training-regime.grpo",
+  "graphType": "recursive-module-graph",
+  "rootNodeId": "prompt",
+  "layout": "vertical-expandable",
+  "defaultExpandedDepth": 1,
+  "supportedRenderers": ["react-flow", "vertical-svg"],
+  "nodes": [
+    {
+      "id": "prompt",
+      "labelKey": "graph.nodes.prompt.label",
+      "moduleKind": "input",
+      "position": { "x": 250, "y": 0 },
+      "size": { "width": 220, "height": 70 },
+      "visualRole": "process-node",
+      "childNodeIds": ["sample-group"]
+    },
+    {
+      "id": "sample-group",
+      "labelKey": "graph.nodes.sampleGroup.label",
+      "moduleKind": "block",
+      "position": { "x": 250, "y": 110 },
+      "size": { "width": 220, "height": 80 },
+      "visualRole": "summary-node",
+      "childNodeIds": ["relative-score"]
+    },
+    {
+      "id": "relative-score",
+      "labelKey": "graph.nodes.relativeScore.label",
+      "moduleKind": "block",
+      "position": { "x": 250, "y": 240 },
+      "size": { "width": 220, "height": 80 },
+      "visualRole": "summary-node",
+      "childNodeIds": ["policy-update"]
+    },
+    {
+      "id": "policy-update",
+      "labelKey": "graph.nodes.policyUpdate.label",
+      "moduleKind": "output",
+      "position": { "x": 250, "y": 370 },
+      "size": { "width": 220, "height": 70 },
+      "visualRole": "process-node",
+      "childNodeIds": []
+    }
+  ],
+  "edges": [
+    {
+      "id": "prompt-sample-group",
+      "source": "prompt",
+      "target": "sample-group",
+      "edgeKind": "data-flow",
+      "sourceHandleSide": "bottom",
+      "targetHandleSide": "top"
+    },
+    {
+      "id": "sample-group-relative-score",
+      "source": "sample-group",
+      "target": "relative-score",
+      "edgeKind": "data-flow",
+      "sourceHandleSide": "bottom",
+      "targetHandleSide": "top"
+    },
+    {
+      "id": "relative-score-policy-update",
+      "source": "relative-score",
+      "target": "policy-update",
+      "edgeKind": "data-flow",
+      "sourceHandleSide": "bottom",
+      "targetHandleSide": "top"
+    }
+  ]
+}
diff --git a/src/content/registry/training-regimes/grpo.json b/src/content/registry/training-regimes/grpo.json
new file mode 100644
index 00000000..4da4b3f0
--- /dev/null
+++ b/src/content/registry/training-regimes/grpo.json
@@ -0,0 +1,41 @@
+{
+  "id": "training-regime.grpo",
+  "slug": "grpo",
+  "kind": "training-regime",
+  "defaultTitleKey": "title",
+  "defaultSummaryKey": "description",
+  "aliases": [
+    "GRPO",
+    "group relative policy optimization",
+    "group relative preference optimization",
+    "group-relative policy optimization",
+    "group-relative preference optimization"
+  ],
+  "tags": ["foundations"],
+  "relatedIds": ["concept.alignment"],
+  "citationIds": ["citation.deepseekmath-paper", "citation.deepseek-r1-paper"],
+  "status": "published",
+  "createdAt": "2026-06-19T00:00:00.000Z",
+  "updatedAt": "2026-06-19T00:00:00.000Z",
+  "releaseDate": "2024-02-05",
+  "authors": [
+    "Zhihong Shao",
+    "Peiyi Wang",
+    "Qihao Zhu",
+    "Runxin Xu",
+    "Junxiao Song",
+    "Xiao Bi",
+    "Haowei Zhang",
+    "Mingchuan Zhang",
+    "Y. K. Li",
+    "Yu Wu",
+    "Daya Guo"
+  ],
+  "sourceId": "citation.deepseekmath-paper",
+  "regimeType": "optimization",
+  "usedByModelIds": [],
+  "relatedModuleIds": [],
+  "paperIds": [],
+  "conceptType": "training",
+  "variantGroup": "group-relative-reinforcement-learning"
+}
diff --git a/src/features/docs/components/Math.tsx b/src/features/docs/components/Math.tsx
index 91335658..9ea4ae4c 100644
--- a/src/features/docs/components/Math.tsx
+++ b/src/features/docs/components/Math.tsx
@@ -1,11 +1,19 @@
+"use client";
+
 import katex from "katex";
+import { MissingMessageKey } from "@/features/docs/components/MissingMessageKey";
+import { ProseAutoLinkText } from "@/features/docs/components/ProseAutoLinkText";
+import { useOptionalPageMessagesContext } from "@/features/docs/components/page-messages-context";
+import { lookupMessage } from "@/lib/content/messages";
 
 type MathProps = {
-  formula: string;
+  formula?: string;
+  label?: string;
+  mathId?: string;
 };
 
 export function InlineMath({ formula }: MathProps) {
-  const html = katex.renderToString(formula, {
+  const html = katex.renderToString(formula ?? "", {
     throwOnError: false,
     displayMode: false,
   });
@@ -19,20 +27,108 @@ export function InlineMath({ formula }: MathProps) {
   );
 }
 
-export function BlockMath({ formula }: MathProps) {
-  const html = katex.renderToString(formula, {
+function MathVariableDefinitions({ mathId }: { mathId: string }) {
+  const context = useOptionalPageMessagesContext();
+
+  if (!context) {
+    return null;
+  }
+
+  const { messages, isDev } = context;
+  const definitionsKey = `math.${mathId}.variableDefinitions`;
+  const definitions = messages.math?.[mathId]?.variableDefinitions;
+
+  if (!definitions || Object.keys(definitions).length === 0) {
+    if (isDev) {
+      return <MissingMessageKey messageKey={definitionsKey} reason="missing" />;
+    }
+    return null;
+  }
+
+  return (
+    <div
+      className="rounded-lg border border-border bg-card p-4"
+      data-page-math-variable-definitions={mathId}
+    >
+      <dl className="space-y-2">
+        {Object.entries(definitions).map(([id, row]) => (
+          <div
+            key={id}
+            className="flex flex-col gap-0 sm:flex-row sm:items-baseline sm:gap-4"
+            data-math-variable-definition={id}
+          >
+            <dt className="w-40 shrink-0 text-sm font-medium text-foreground">
+              <InlineMath formula={row.term} />
+            </dt>
+            <dd className="text-sm text-muted-foreground">
+              <ProseAutoLinkText text={row.definition} />
+            </dd>
+          </div>
+        ))}
+      </dl>
+    </div>
+  );
+}
+
+export function BlockMath({ formula, label, mathId }: MathProps) {
+  const context = useOptionalPageMessagesContext();
+  const resolvedFormula =
+    mathId && context
+      ? lookupMessage(context.messages, `math.${mathId}.formula`)
+      : null;
+  const resolvedLabel =
+    mathId && context
+      ? lookupMessage(context.messages, `math.${mathId}.label`)
+      : null;
+
+  if (mathId && context?.isDev) {
+    if (!resolvedFormula?.ok) {
+      return (
+        <MissingMessageKey
+          messageKey={`math.${mathId}.formula`}
+          reason={resolvedFormula?.reason ?? "missing"}
+        />
+      );
+    }
+
+    if (!resolvedLabel?.ok) {
+      return (
+        <MissingMessageKey
+          messageKey={`math.${mathId}.label`}
+          reason={resolvedLabel?.reason ?? "missing"}
+        />
+      );
+    }
+  }
+
+  const displayFormula = resolvedFormula?.ok ? resolvedFormula.value : formula;
+
+  if (!displayFormula) {
+    return null;
+  }
+  const displayLabel = resolvedLabel?.ok ? resolvedLabel.value : label;
+  const html = katex.renderToString(displayFormula, {
     throwOnError: false,
     displayMode: true,
   });
 
   return (
-    <div
-      className="katex-display my-4 max-w-full overflow-x-auto not-prose"
-      role="math"
-      aria-label={formula}
-      data-rich-content-scroll="math"
-      // biome-ignore lint/security/noDangerouslySetInnerHtml: KaTeX emits trusted formula HTML from author MDX
-      dangerouslySetInnerHTML={{ __html: html }}
-    />
+    <div className="not-prose my-4 flex min-w-0 max-w-full flex-col gap-3">
+      {displayLabel ? (
+        <p className="text-sm font-medium text-muted-foreground">
+          {displayLabel}
+        </p>
+      ) : null}
+      <div
+        className="katex-display max-w-full overflow-x-auto"
+        role="math"
+        aria-label={displayFormula}
+        data-rich-content-scroll="math"
+        data-page-math-formula={mathId}
+        // biome-ignore lint/security/noDangerouslySetInnerHtml: KaTeX emits trusted formula HTML from author MDX
+        dangerouslySetInnerHTML={{ __html: html }}
+      />
+      {mathId ? <MathVariableDefinitions mathId={mathId} /> : null}
+    </div>
   );
 }
diff --git a/src/lib/content/graph-registry-runtime.generated.ts b/src/lib/content/graph-registry-runtime.generated.ts
index f003fadb..18313738 100644
--- a/src/lib/content/graph-registry-runtime.generated.ts
+++ b/src/lib/content/graph-registry-runtime.generated.ts
@@ -21,6 +21,7 @@ import groupedQueryAttentionComputeFlowGraphRecord from "@/content/registry/grap
 import groupedQueryAttentionComputeSchemaGraphRecord from "@/content/registry/graphs/grouped-query-attention-compute-schema.json";
 import groupedQueryAttentionGqaComparisonGraphRecord from "@/content/registry/graphs/grouped-query-attention-gqa-comparison.json";
 import groupedQueryAttentionMhaComparisonGraphRecord from "@/content/registry/graphs/grouped-query-attention-mha-comparison.json";
+import grpoTrainingFlowGraphRecord from "@/content/registry/graphs/grpo-training-flow.json";
 import heavilyCompressedAttentionFlowGraphRecord from "@/content/registry/graphs/heavily-compressed-attention-flow.json";
 import layerNormComputeFlowGraphRecord from "@/content/registry/graphs/layer-norm-compute-flow.json";
 import leakyReluActivationFlowGraphRecord from "@/content/registry/graphs/leaky-relu-activation-flow.json";
@@ -73,6 +74,7 @@ export const graphRecords: GraphRecord[] = [
   graphRecordSchema.parse(groupedQueryAttentionComputeSchemaGraphRecord),
   graphRecordSchema.parse(groupedQueryAttentionGqaComparisonGraphRecord),
   graphRecordSchema.parse(groupedQueryAttentionMhaComparisonGraphRecord),
+  graphRecordSchema.parse(grpoTrainingFlowGraphRecord),
   graphRecordSchema.parse(heavilyCompressedAttentionFlowGraphRecord),
   graphRecordSchema.parse(layerNormComputeFlowGraphRecord),
   graphRecordSchema.parse(leakyReluActivationFlowGraphRecord),
diff --git a/src/lib/content/graph-registry-runtime.test.ts b/src/lib/content/graph-registry-runtime.test.ts
index 97a26ab1..97ac587d 100644
--- a/src/lib/content/graph-registry-runtime.test.ts
+++ b/src/lib/content/graph-registry-runtime.test.ts
@@ -161,10 +161,13 @@ describe("graph-registry-runtime", () => {
   test("lists all bundled graph records", () => {
     const records = listGraphRecords();
 
-    expect(records.length).toBe(49);
+    expect(records.length).toBe(50);
     expect(records.map((record) => record.id)).toContain(
       "graph.bpe-compute-flow",
     );
+    expect(records.map((record) => record.id)).toContain(
+      "graph.grpo-training-flow",
+    );
     expect(records.map((record) => record.id)).toContain(
       "graph.sentencepiece-compute-flow",
     );
diff --git a/src/lib/content/grpo-training-regime-comparisons.test.tsx b/src/lib/content/grpo-training-regime-comparisons.test.tsx
new file mode 100644
index 00000000..b3b3a490
--- /dev/null
+++ b/src/lib/content/grpo-training-regime-comparisons.test.tsx
@@ -0,0 +1,67 @@
+import { describe, expect, test } from "bun:test";
+import { createElement } from "react";
+import { renderToReadableStream } from "react-dom/server";
+import { ModulePageProviders } from "@/features/docs/components/ModulePageProviders";
+import { loadTrainingRegimePageFromDisk } from "@/lib/content/training-regime-page-load";
+
+describe("grpo training regime comparisons", () => {
+  test("page explains nearby alignment methods and renders stable reader links", async () => {
+    const page = await loadTrainingRegimePageFromDisk("grpo");
+    const stream = await renderToReadableStream(
+      createElement(
+        ModulePageProviders,
+        {
+          messages: page.messages,
+          assets: page.assets,
+        },
+        page.content,
+      ),
+    );
+    await stream.allReady;
+    const html = await new Response(stream).text();
+    const normalizedHtml = html.toLowerCase();
+
+    expect(page.frontmatter.registryId).toBe("training-regime.grpo");
+    expect(normalizedHtml).toContain(
+      "reinforcement learning from human feedback",
+    );
+    expect(html).toContain("Proximal Policy Optimization");
+    expect(html).toContain("Direct Preference Optimization");
+    expect(html).toContain("pairwise objective");
+    expect(html).toContain("relative ranking inside one sampled group");
+    expect(html).toContain(">Alignment<");
+    expect(html).toContain(">RLHF<");
+    expect(html).toContain(">PPO<");
+    expect(html).toContain(">DPO<");
+    expect(html).toContain('href="/docs/glossary/alignment"');
+    expect(html).toContain('href="/search?q=ppo"');
+    expect(html).toContain('href="/search?q=dpo"');
+  });
+
+  test("page renders the graph title, legend, and symbol-only math definitions for the GRPO loop", async () => {
+    const page = await loadTrainingRegimePageFromDisk("grpo");
+    const stream = await renderToReadableStream(
+      createElement(
+        ModulePageProviders,
+        {
+          messages: page.messages,
+          assets: page.assets,
+        },
+        page.content,
+      ),
+    );
+    await stream.allReady;
+    const html = await new Response(stream).text();
+
+    expect(html).toContain("GRPO training flow");
+    expect(html).toContain("Graph legend");
+    expect(html).toContain("One prompt anchors the whole local comparison.");
+    expect(html).toContain('data-page-math-formula="grpoAdvantage"');
+    expect(html).toContain(
+      'data-page-math-variable-definitions="grpoAdvantage"',
+    );
+    expect(html).toContain("normalized advantage for sampled answer i");
+    expect(html).toContain("reward score assigned to sampled answer i");
+    expect(html).toContain("number of sampled answers in the comparison group");
+  });
+});
diff --git a/src/lib/content/grpo-training-regime-contract.test.ts b/src/lib/content/grpo-training-regime-contract.test.ts
new file mode 100644
index 00000000..2ec35f73
--- /dev/null
+++ b/src/lib/content/grpo-training-regime-contract.test.ts
@@ -0,0 +1,47 @@
+import { describe, expect, test } from "bun:test";
+import { loadLocalDocsPage } from "@/lib/content/local-docs-page";
+import { getTrainingRegimeById } from "@/lib/content/registry-runtime.generated";
+import { docsSearchApi } from "@/lib/search/search-server";
+import { source } from "@/lib/source";
+
+describe("grpo training regime contract", () => {
+  test("canonical route, localized content, registry metadata, and discovery query resolve together", async () => {
+    const [page, searchResults] = await Promise.all([
+      loadLocalDocsPage({
+        section: "training",
+        slug: "grpo",
+      }),
+      docsSearchApi.search("group relative preference optimization"),
+    ]);
+
+    const route = source.getPage(["training", "grpo"]);
+    const record = getTrainingRegimeById("training-regime.grpo");
+
+    expect(route?.url).toBe("/docs/training/grpo");
+    expect(record).toBeDefined();
+    if (!record) {
+      throw new Error("Expected training-regime.grpo registry record to exist");
+    }
+    expect(page.frontmatter.registryId).toBe(record.id);
+    expect(page.messages.title).toBe("Group Relative Policy Optimization");
+    expect(page.messages.openingSummary).toContain(
+      "samples several answers to one prompt",
+    );
+    expect(page.messages.sections?.howItWorks?.body).toContain(
+      "normalized within that group",
+    );
+
+    expect(record.kind).toBe("training-regime");
+    expect(record.slug).toBe("grpo");
+    expect(record.aliases).toEqual(
+      expect.arrayContaining([
+        "GRPO",
+        "group relative preference optimization",
+      ]),
+    );
+    expect(record.relatedIds).toContain("concept.alignment");
+    expect(record.variantGroup).toBe("group-relative-reinforcement-learning");
+
+    expect(searchResults[0]?.url).toBe("/docs/training/grpo");
+  });
+});
diff --git a/src/lib/content/local-docs-page.test.ts b/src/lib/content/local-docs-page.test.ts
index d945b054..2c8468f3 100644
--- a/src/lib/content/local-docs-page.test.ts
+++ b/src/lib/content/local-docs-page.test.ts
@@ -162,6 +162,18 @@ describe("docs source local pages", () => {
     expect(page.toc.some(hasTocUrl("#what-it-is"))).toBe(true);
   });
 
+  test("loadLocalDocsPage resolves the canonical GRPO training page through the shared route contract", async () => {
+    const page = await loadLocalDocsPage({
+      section: "training",
+      slug: "grpo",
+    });
+
+    expect(page.messages.title).toBe("Group Relative Policy Optimization");
+    expect(page.messages.sections?.howItWorks?.title).toBe("How It Works");
+    expect(page.frontmatter.registryId).toBe("training-regime.grpo");
+    expect(page.toc.some(hasTocUrl("#what-it-is"))).toBe(true);
+  });
+
   test("loadLocalDocsPage resolves shipped vietnamese canonical page messages without changing the shared MDX route contract", async () => {
     const page = await loadLocalDocsPage(
       {
diff --git a/src/lib/content/published-docs-registry-manifest.ts b/src/lib/content/published-docs-registry-manifest.ts
index b462163e..f2b527f9 100644
--- a/src/lib/content/published-docs-registry-manifest.ts
+++ b/src/lib/content/published-docs-registry-manifest.ts
@@ -34,6 +34,14 @@ export const GENERATED_PUBLISHED_DOCS_ENTRIES = [
     pageKind: "training-regime",
     section: "training",
   },
+  {
+    registryId: "training-regime.grpo",
+    slug: "grpo",
+    docsSlug: "training/grpo",
+    url: "/docs/training/grpo",
+    pageKind: "training-regime",
+    section: "training",
+  },
   {
     registryId: "training-regime.on-policy-distillation",
     slug: "on-policy-distillation",
@@ -1161,6 +1169,7 @@ export const GENERATED_PUBLISHED_DOCS_REGISTRY_IDS = [
   "system.routing",
   "training-regime.dpo",
   "training-regime.fp4-quantization-aware-training",
+  "training-regime.grpo",
   "training-regime.on-policy-distillation",
   "training-regime.specialist-training",
 ] as const;
diff --git a/src/lib/content/published-docs-routing-contract.test.ts b/src/lib/content/published-docs-routing-contract.test.ts
index bb7ef5a7..a8aaf7d0 100644
--- a/src/lib/content/published-docs-routing-contract.test.ts
+++ b/src/lib/content/published-docs-routing-contract.test.ts
@@ -86,6 +86,14 @@ describe("published docs routing contract", () => {
         ),
         href: "/docs/training/dpo",
       },
+      {
+        label: "grpo training regime",
+        record: requireRecord(
+          getTrainingRegimeById("training-regime.grpo"),
+          "grpo training regime",
+        ),
+        href: "/docs/training/grpo",
+      },
       {
         label: "system",
         record: requireRecord(
diff --git a/src/lib/content/registry-runtime.generated.ts b/src/lib/content/registry-runtime.generated.ts
index e00eafde..c60c2919 100644
--- a/src/lib/content/registry-runtime.generated.ts
+++ b/src/lib/content/registry-runtime.generated.ts
@@ -178,64 +178,67 @@ import registryRecord_143 from "../../content/registry/models/gpt-3.json";
 import registryRecord_144 from "../../content/registry/papers/deepseek-v4.json";
 import registryRecord_145 from "../../content/registry/training-regimes/dpo.json";
 import registryRecord_146 from "../../content/registry/training-regimes/fp4-quantization-aware-training.json";
-import registryRecord_147 from "../../content/registry/training-regimes/on-policy-distillation.json";
-import registryRecord_148 from "../../content/registry/training-regimes/specialist-training.json";
-import registryRecord_149 from "../../content/registry/systems/expert-parallel-overlap.json";
-import registryRecord_150 from "../../content/registry/systems/on-disk-kv-cache.json";
-import registryRecord_151 from "../../content/registry/systems/routing.json";
-import registryRecord_152 from "../../content/registry/datasets/deepseek-v4-specialist-corpus.json";
-import registryRecord_153 from "../../content/registry/organizations/deepseek-ai.json";
-import registryRecord_154 from "../../content/registry/citations/attention-is-all-you-need.json";
-import registryRecord_155 from "../../content/registry/citations/awq.json";
-import registryRecord_156 from "../../content/registry/citations/batch-normalization.json";
-import registryRecord_157 from "../../content/registry/citations/brown-gpt-3.json";
-import registryRecord_158 from "../../content/registry/citations/chen-positional-interpolation.json";
-import registryRecord_159 from "../../content/registry/citations/classifier-free-diffusion-guidance.json";
-import registryRecord_160 from "../../content/registry/citations/curious-case-neural-text-degeneration.json";
-import registryRecord_161 from "../../content/registry/citations/deepseek-v2-mla-paper.json";
-import registryRecord_162 from "../../content/registry/citations/deepseek-v4-paper.json";
-import registryRecord_163 from "../../content/registry/citations/denoising-diffusion-probabilistic-models.json";
-import registryRecord_164 from "../../content/registry/citations/ding-longrope.json";
-import registryRecord_165 from "../../content/registry/citations/direct-preference-optimization.json";
-import registryRecord_166 from "../../content/registry/citations/flamingo-visual-language-model.json";
-import registryRecord_167 from "../../content/registry/citations/glu-variants-improve-transformer.json";
-import registryRecord_168 from "../../content/registry/citations/goodfellow-deep-learning.json";
-import registryRecord_169 from "../../content/registry/citations/gpt-2-report.json";
-import registryRecord_170 from "../../content/registry/citations/gqa-paper.json";
-import registryRecord_171 from "../../content/registry/citations/group-normalization.json";
-import registryRecord_172 from "../../content/registry/citations/image-is-worth-16x16-words.json";
-import registryRecord_173 from "../../content/registry/citations/kaiokendev-superhot.json";
-import registryRecord_174 from "../../content/registry/citations/kaplan-scaling-laws.json";
-import registryRecord_175 from "../../content/registry/citations/katharopoulos-linear-attention-paper.json";
-import registryRecord_176 from "../../content/registry/citations/kingma-adam.json";
-import registryRecord_177 from "../../content/registry/citations/kivi-kv-cache-quantization.json";
-import registryRecord_178 from "../../content/registry/citations/kudo-sentencepiece.json";
-import registryRecord_179 from "../../content/registry/citations/layer-normalization.json";
-import registryRecord_180 from "../../content/registry/citations/learning-transferable-visual-models-from-natural-language-supervision.json";
-import registryRecord_181 from "../../content/registry/citations/longformer.json";
-import registryRecord_182 from "../../content/registry/citations/multilayer-feedforward-networks-are-universal-approximators.json";
-import registryRecord_183 from "../../content/registry/citations/on-policy-distillation-of-language-models.json";
-import registryRecord_184 from "../../content/registry/citations/peng-yarn.json";
-import registryRecord_185 from "../../content/registry/citations/press-alibi.json";
-import registryRecord_186 from "../../content/registry/citations/qlora.json";
-import registryRecord_187 from "../../content/registry/citations/quantization-integer-only-inference.json";
-import registryRecord_188 from "../../content/registry/citations/query-key-normalization-for-transformers.json";
-import registryRecord_189 from "../../content/registry/citations/raffel-t5.json";
-import registryRecord_190 from "../../content/registry/citations/rectified-linear-units-improve-restricted-boltzmann-machines.json";
-import registryRecord_191 from "../../content/registry/citations/rectifier-nonlinearities-improve-neural-network-acoustic-models.json";
-import registryRecord_192 from "../../content/registry/citations/root-mean-square-layer-normalization.json";
-import registryRecord_193 from "../../content/registry/citations/self-attention-with-relative-position-representations.json";
-import registryRecord_194 from "../../content/registry/citations/sennrich-bpe.json";
-import registryRecord_195 from "../../content/registry/citations/shazeer-mqa-paper.json";
-import registryRecord_196 from "../../content/registry/citations/sigmoid-weighted-linear-units.json";
-import registryRecord_197 from "../../content/registry/citations/smoothquant.json";
-import registryRecord_198 from "../../content/registry/citations/sparse-transformers.json";
-import registryRecord_199 from "../../content/registry/citations/sparsely-gated-mixture-of-experts-layer.json";
-import registryRecord_200 from "../../content/registry/citations/su-roformer-rope.json";
-import registryRecord_201 from "../../content/registry/citations/training-language-models-to-follow-instructions-with-human-feedback.json";
-import registryRecord_202 from "../../content/registry/citations/transformer-lms-without-positional-encodings.json";
-import registryRecord_203 from "../../content/registry/citations/wei-emergent-abilities.json";
-import registryRecord_204 from "../../content/registry/citations/world-models.json";
+import registryRecord_147 from "../../content/registry/training-regimes/grpo.json";
+import registryRecord_148 from "../../content/registry/training-regimes/on-policy-distillation.json";
+import registryRecord_149 from "../../content/registry/training-regimes/specialist-training.json";
+import registryRecord_150 from "../../content/registry/systems/expert-parallel-overlap.json";
+import registryRecord_151 from "../../content/registry/systems/on-disk-kv-cache.json";
+import registryRecord_152 from "../../content/registry/systems/routing.json";
+import registryRecord_153 from "../../content/registry/datasets/deepseek-v4-specialist-corpus.json";
+import registryRecord_154 from "../../content/registry/organizations/deepseek-ai.json";
+import registryRecord_155 from "../../content/registry/citations/attention-is-all-you-need.json";
+import registryRecord_156 from "../../content/registry/citations/awq.json";
+import registryRecord_157 from "../../content/registry/citations/batch-normalization.json";
+import registryRecord_158 from "../../content/registry/citations/brown-gpt-3.json";
+import registryRecord_159 from "../../content/registry/citations/chen-positional-interpolation.json";
+import registryRecord_160 from "../../content/registry/citations/classifier-free-diffusion-guidance.json";
+import registryRecord_161 from "../../content/registry/citations/curious-case-neural-text-degeneration.json";
+import registryRecord_162 from "../../content/registry/citations/deepseek-r1-paper.json";
+import registryRecord_163 from "../../content/registry/citations/deepseek-v2-mla-paper.json";
+import registryRecord_164 from "../../content/registry/citations/deepseek-v4-paper.json";
+import registryRecord_165 from "../../content/registry/citations/deepseekmath-paper.json";
+import registryRecord_166 from "../../content/registry/citations/denoising-diffusion-probabilistic-models.json";
+import registryRecord_167 from "../../content/registry/citations/ding-longrope.json";
+import registryRecord_168 from "../../content/registry/citations/direct-preference-optimization.json";
+import registryRecord_169 from "../../content/registry/citations/flamingo-visual-language-model.json";
+import registryRecord_170 from "../../content/registry/citations/glu-variants-improve-transformer.json";
+import registryRecord_171 from "../../content/registry/citations/goodfellow-deep-learning.json";
+import registryRecord_172 from "../../content/registry/citations/gpt-2-report.json";
+import registryRecord_173 from "../../content/registry/citations/gqa-paper.json";
+import registryRecord_174 from "../../content/registry/citations/group-normalization.json";
+import registryRecord_175 from "../../content/registry/citations/image-is-worth-16x16-words.json";
+import registryRecord_176 from "../../content/registry/citations/kaiokendev-superhot.json";
+import registryRecord_177 from "../../content/registry/citations/kaplan-scaling-laws.json";
+import registryRecord_178 from "../../content/registry/citations/katharopoulos-linear-attention-paper.json";
+import registryRecord_179 from "../../content/registry/citations/kingma-adam.json";
+import registryRecord_180 from "../../content/registry/citations/kivi-kv-cache-quantization.json";
+import registryRecord_181 from "../../content/registry/citations/kudo-sentencepiece.json";
+import registryRecord_182 from "../../content/registry/citations/layer-normalization.json";
+import registryRecord_183 from "../../content/registry/citations/learning-transferable-visual-models-from-natural-language-supervision.json";
+import registryRecord_184 from "../../content/registry/citations/longformer.json";
+import registryRecord_185 from "../../content/registry/citations/multilayer-feedforward-networks-are-universal-approximators.json";
+import registryRecord_186 from "../../content/registry/citations/on-policy-distillation-of-language-models.json";
+import registryRecord_187 from "../../content/registry/citations/peng-yarn.json";
+import registryRecord_188 from "../../content/registry/citations/press-alibi.json";
+import registryRecord_189 from "../../content/registry/citations/qlora.json";
+import registryRecord_190 from "../../content/registry/citations/quantization-integer-only-inference.json";
+import registryRecord_191 from "../../content/registry/citations/query-key-normalization-for-transformers.json";
+import registryRecord_192 from "../../content/registry/citations/raffel-t5.json";
+import registryRecord_193 from "../../content/registry/citations/rectified-linear-units-improve-restricted-boltzmann-machines.json";
+import registryRecord_194 from "../../content/registry/citations/rectifier-nonlinearities-improve-neural-network-acoustic-models.json";
+import registryRecord_195 from "../../content/registry/citations/root-mean-square-layer-normalization.json";
+import registryRecord_196 from "../../content/registry/citations/self-attention-with-relative-position-representations.json";
+import registryRecord_197 from "../../content/registry/citations/sennrich-bpe.json";
+import registryRecord_198 from "../../content/registry/citations/shazeer-mqa-paper.json";
+import registryRecord_199 from "../../content/registry/citations/sigmoid-weighted-linear-units.json";
+import registryRecord_200 from "../../content/registry/citations/smoothquant.json";
+import registryRecord_201 from "../../content/registry/citations/sparse-transformers.json";
+import registryRecord_202 from "../../content/registry/citations/sparsely-gated-mixture-of-experts-layer.json";
+import registryRecord_203 from "../../content/registry/citations/su-roformer-rope.json";
+import registryRecord_204 from "../../content/registry/citations/training-language-models-to-follow-instructions-with-human-feedback.json";
+import registryRecord_205 from "../../content/registry/citations/transformer-lms-without-positional-encodings.json";
+import registryRecord_206 from "../../content/registry/citations/wei-emergent-abilities.json";
+import registryRecord_207 from "../../content/registry/citations/world-models.json";
 
 const moduleRecords: ModuleRecord[] = [
   moduleRecordSchema.parse(registryRecord_0),
@@ -399,24 +402,24 @@ const trainingRegimeRecords: TrainingRegimeRecord[] = [
   trainingRegimeRecordSchema.parse(registryRecord_146),
   trainingRegimeRecordSchema.parse(registryRecord_147),
   trainingRegimeRecordSchema.parse(registryRecord_148),
+  trainingRegimeRecordSchema.parse(registryRecord_149),
 ];
 
 const systemRecords: SystemRecord[] = [
-  systemRecordSchema.parse(registryRecord_149),
   systemRecordSchema.parse(registryRecord_150),
   systemRecordSchema.parse(registryRecord_151),
+  systemRecordSchema.parse(registryRecord_152),
 ];
 
 const datasetRecords: DatasetRecord[] = [
-  datasetRecordSchema.parse(registryRecord_152),
+  datasetRecordSchema.parse(registryRecord_153),
 ];
 
 const organizationRecords: OrganizationRecord[] = [
-  organizationRecordSchema.parse(registryRecord_153),
+  organizationRecordSchema.parse(registryRecord_154),
 ];
 
 const citationRecords: CitationRecord[] = [
-  citationRecordSchema.parse(registryRecord_154),
   citationRecordSchema.parse(registryRecord_155),
   citationRecordSchema.parse(registryRecord_156),
   citationRecordSchema.parse(registryRecord_157),
@@ -467,6 +470,9 @@ const citationRecords: CitationRecord[] = [
   citationRecordSchema.parse(registryRecord_202),
   citationRecordSchema.parse(registryRecord_203),
   citationRecordSchema.parse(registryRecord_204),
+  citationRecordSchema.parse(registryRecord_205),
+  citationRecordSchema.parse(registryRecord_206),
+  citationRecordSchema.parse(registryRecord_207),
 ];
 
 const modulesById = new Map<string, ModuleRecord>(
diff --git a/src/lib/content/training-behavior-glossary.test.ts b/src/lib/content/training-behavior-glossary.test.ts
index 6f256b5e..bf680f74 100644
--- a/src/lib/content/training-behavior-glossary.test.ts
+++ b/src/lib/content/training-behavior-glossary.test.ts
@@ -77,6 +77,7 @@ describe("Phase 2 training behavior glossary pages (US-004)", () => {
   test("alignment links to training peers and published token-chain glossary pages", async () => {
     const html = await renderGlossaryHtml("alignment");
 
+    expect(html).toContain('href="/docs/training/grpo"');
     expect(html).toContain('href="/docs/glossary/model-capacity"');
     expect(html).toContain('href="/docs/glossary/overfitting"');
     expect(html).toContain('href="/docs/glossary/generalization"');
@@ -112,6 +113,7 @@ describe("Phase 2 training behavior glossary pages (US-004)", () => {
     ) as ConceptRecord;
 
     expect(alignment.conceptType).toBe("training");
+    expect(alignment.relatedIds).toContain("training-regime.grpo");
     expect(modelCapacity.conceptType).toBe("training");
     expect(overfitting.conceptType).toBe("training");
     expect(generalization.conceptType).toBe("evaluation");
diff --git a/src/lib/navigation/generated-docs-page-tree.test.ts b/src/lib/navigation/generated-docs-page-tree.test.ts
index 4bcc649b..32515788 100644
--- a/src/lib/navigation/generated-docs-page-tree.test.ts
+++ b/src/lib/navigation/generated-docs-page-tree.test.ts
@@ -129,6 +129,13 @@ describe("generated docs page tree", () => {
         url: "/docs/training/on-policy-distillation",
       }),
     );
+    expect(
+      findNodeIndex(trainingChildren, { name: "Optimization" }),
+    ).toBeLessThan(
+      findNodeIndex(trainingChildren, {
+        url: "/docs/training/grpo",
+      }),
+    );
     expect(
       findNodeIndex(trainingChildren, { name: "Optimization" }),
     ).toBeLessThan(
diff --git a/src/lib/source.test.ts b/src/lib/source.test.ts
index cf6b7843..f66a1c4a 100644
--- a/src/lib/source.test.ts
+++ b/src/lib/source.test.ts
@@ -128,6 +128,7 @@ const PAPER_INDEX_URLS = ["/docs/papers/deepseek-v4"] as const;
 const TRAINING_INDEX_URLS = [
   "/docs/training/dpo",
   "/docs/training/fp4-quantization-aware-training",
+  "/docs/training/grpo",
   "/docs/training/on-policy-distillation",
   "/docs/training/specialist-training",
 ] as const;
diff --git a/src/tests/content/section-indexes.test.tsx b/src/tests/content/section-indexes.test.tsx
index 7242528a..53724b49 100644
--- a/src/tests/content/section-indexes.test.tsx
+++ b/src/tests/content/section-indexes.test.tsx
@@ -57,6 +57,7 @@ describe("section index page render", () => {
     const html = renderToStaticMarkup(await TrainingIndexPage());
 
     expect(html).toContain("Training");
+    expect(html).toContain('href="/docs/training/grpo"');
     expect(html).toContain('href="/docs/training/on-policy-distillation"');
     expect(html).toContain('href="/docs/training/specialist-training"');
   });
diff --git a/src/tests/discovery/search-discovery.test.tsx b/src/tests/discovery/search-discovery.test.tsx
index 04023fcf..ba849c39 100644
--- a/src/tests/discovery/search-discovery.test.tsx
+++ b/src/tests/discovery/search-discovery.test.tsx
@@ -75,6 +75,15 @@ function expectRouteRendersOk(
 }
 
 describe("Phase 1 search discovery", () => {
+  test("group relative preference optimization query routes readers to the canonical GRPO page", async () => {
+    const results = await docsSearchApi.search(
+      "group relative preference optimization",
+    );
+    expect(results.length).toBeGreaterThan(0);
+    expect(assertCanonicalPageLevelApiResults(results)).toBeNull();
+    expect(results[0]?.url).toBe("/docs/training/grpo");
+  });
+
   test("GQA query ranks grouped-query attention first", async () => {
     const results = await docsSearchApi.search("GQA");
     expect(results.length).toBeGreaterThan(0);
diff --git a/src/tests/search/build-documents.test.ts b/src/tests/search/build-documents.test.ts
index 23ca4b34..f54c3cbe 100644
--- a/src/tests/search/build-documents.test.ts
+++ b/src/tests/search/build-documents.test.ts
@@ -5,6 +5,7 @@ import { buildSearchDocuments } from "@/lib/search/build-documents";
 
 const SAMPLE_URL = "/docs/modules/grouped-query-attention";
 const TOKEN_GLOSSARY_URL = "/docs/glossary/token";
+const GRPO_URL = "/docs/training/grpo";
 
 describe("buildSearchDocuments", () => {
   test("indexes only published docs pages for the default locale", async () => {
@@ -63,4 +64,29 @@ describe("buildSearchDocuments", () => {
     expect(token?.bodyText).toContain("tokenizer");
     expect(token?.bodyText).toContain("token IDs");
   });
+
+  test("indexes GRPO training page with search aliases and training facets", async () => {
+    const registry = await loadRegistry();
+    const pages = await loadPublishedDocsPages("en");
+    const documents = buildSearchDocuments(pages, registry);
+    const grpo = documents.find((document) => document.url === GRPO_URL);
+
+    expect(grpo).toBeDefined();
+    expect(grpo?.kind).toBe("training-regime");
+    expect(grpo?.registryId).toBe("training-regime.grpo");
+    expect(grpo?.aliases).toEqual(
+      expect.arrayContaining([
+        "GRPO",
+        "group relative policy optimization",
+        "group relative preference optimization",
+        "group-relative policy optimization",
+        "group-relative preference optimization",
+      ]),
+    );
+    expect(grpo?.tags).toEqual(expect.arrayContaining(["foundations"]));
+    expect(grpo?.facets.conceptType).toBe("training");
+    expect(grpo?.facets.variantGroup).toBe(
+      "group-relative-reinforcement-learning",
+    );
+  });
 });