From b590f8040550b953e09f7ac162fcabe1b887c163 Mon Sep 17 00:00:00 2001
From: aabdi <andreas.abdi@outlook.com>
Date: Sat, 20 Jun 2026 01:23:52 +0700
Subject: [PATCH 1/6] feat: gpt-2-model-page-001 - Establish GPT-2 as a
 first-class model record

---
 src/content/registry/models/gpt-2.json        |  46 ++++++
 src/lib/content/gpt-2-model-record.test.ts    |  74 ++++++++++
 src/lib/content/registry-runtime.generated.ts | 138 +++++++++---------
 3 files changed, 190 insertions(+), 68 deletions(-)
 create mode 100644 src/content/registry/models/gpt-2.json
 create mode 100644 src/lib/content/gpt-2-model-record.test.ts

diff --git a/src/content/registry/models/gpt-2.json b/src/content/registry/models/gpt-2.json
new file mode 100644
index 00000000..04359bc5
--- /dev/null
+++ b/src/content/registry/models/gpt-2.json
@@ -0,0 +1,46 @@
+{
+  "id": "model.gpt-2",
+  "slug": "gpt-2",
+  "kind": "model",
+  "defaultTitleKey": "title",
+  "defaultSummaryKey": "description",
+  "aliases": ["GPT-2", "Generative Pre-trained Transformer 2", "gpt2"],
+  "tags": ["foundations", "model-family", "attention", "tokenization"],
+  "relatedIds": [
+    "concept.transformer-architecture",
+    "concept.tokenizers-overview",
+    "module.byte-level-tokenization",
+    "module.learned-positional-embeddings",
+    "module.multi-head-attention",
+    "module.feed-forward-network"
+  ],
+  "citationIds": ["citation.gpt-2-report"],
+  "status": "published",
+  "createdAt": "2026-06-20T00:00:00.000Z",
+  "updatedAt": "2026-06-20T00:00:00.000Z",
+  "authors": [
+    "Alec Radford",
+    "Jeffrey Wu",
+    "Rewon Child",
+    "David Luan",
+    "Dario Amodei",
+    "Ilya Sutskever"
+  ],
+  "sourceId": "citation.gpt-2-report",
+  "family": "gpt",
+  "sourceType": "open-weights",
+  "modalities": ["text"],
+  "architectureIds": ["concept.transformer-architecture"],
+  "moduleIds": [
+    "module.byte-level-tokenization",
+    "module.learned-positional-embeddings",
+    "module.multi-head-attention",
+    "module.feed-forward-network"
+  ],
+  "trainingRegimeIds": [],
+  "datasetIds": [],
+  "paperIds": [],
+  "releaseDate": "2019-02-14",
+  "parameterCount": "1.5 billion parameters",
+  "contextLength": 1024
+}
diff --git a/src/lib/content/gpt-2-model-record.test.ts b/src/lib/content/gpt-2-model-record.test.ts
new file mode 100644
index 00000000..eed3bdef
--- /dev/null
+++ b/src/lib/content/gpt-2-model-record.test.ts
@@ -0,0 +1,74 @@
+import { describe, expect, test } from "bun:test";
+import {
+  getModelById,
+  getRegistryRecordById,
+  getRegistryTags,
+} from "@/lib/content/registry-runtime";
+
+describe("gpt-2 model registry record", () => {
+  test("registers GPT-2 as a first-class GPT family model with required discovery aliases", () => {
+    const record = getModelById("model.gpt-2");
+
+    expect(record?.slug).toBe("gpt-2");
+    expect(record?.kind).toBe("model");
+    expect(record?.family).toBe("gpt");
+    expect(record?.sourceType).toBe("open-weights");
+    expect(record?.aliases).toEqual(
+      expect.arrayContaining([
+        "GPT-2",
+        "Generative Pre-trained Transformer 2",
+        "gpt2",
+      ]),
+    );
+    expect(getRegistryTags("model.gpt-2")).toEqual([
+      "foundations",
+      "model-family",
+      "attention",
+      "tokenization",
+    ]);
+  });
+
+  test("links GPT-2 to its canonical architecture and module records", () => {
+    const record = getModelById("model.gpt-2");
+
+    expect(record?.relatedIds).toEqual([
+      "concept.transformer-architecture",
+      "concept.tokenizers-overview",
+      "module.byte-level-tokenization",
+      "module.learned-positional-embeddings",
+      "module.multi-head-attention",
+      "module.feed-forward-network",
+    ]);
+    expect(record?.architectureIds).toEqual([
+      "concept.transformer-architecture",
+    ]);
+    expect(record?.moduleIds).toEqual([
+      "module.byte-level-tokenization",
+      "module.learned-positional-embeddings",
+      "module.multi-head-attention",
+      "module.feed-forward-network",
+    ]);
+    expect(record?.citationIds).toEqual(["citation.gpt-2-report"]);
+    expect(record?.sourceId).toBe("citation.gpt-2-report");
+  });
+
+  test("only exposes pretraining and GPT-2 paper links when canonical registry records exist", () => {
+    const record = getModelById("model.gpt-2");
+    const pretraining = getRegistryRecordById("training-regime.pretraining");
+    const gpt2Paper = getRegistryRecordById("paper.gpt-2-report");
+
+    if (pretraining) {
+      expect(record?.trainingRegimeIds).toContain(pretraining.id);
+    } else {
+      expect(record?.trainingRegimeIds).not.toContain(
+        "training-regime.pretraining",
+      );
+    }
+
+    if (gpt2Paper) {
+      expect(record?.paperIds).toContain(gpt2Paper.id);
+    } else {
+      expect(record?.paperIds).not.toContain("paper.gpt-2-report");
+    }
+  });
+});
diff --git a/src/lib/content/registry-runtime.generated.ts b/src/lib/content/registry-runtime.generated.ts
index e00eafde..038fbe8e 100644
--- a/src/lib/content/registry-runtime.generated.ts
+++ b/src/lib/content/registry-runtime.generated.ts
@@ -174,68 +174,69 @@ import registryRecord_139 from "../../content/registry/concepts/world-model.json
 import registryRecord_140 from "../../content/registry/concepts/yarn.json";
 import registryRecord_141 from "../../content/registry/models/deepseek-v4-flash.json";
 import registryRecord_142 from "../../content/registry/models/deepseek-v4-pro.json";
-import registryRecord_143 from "../../content/registry/models/gpt-3.json";
-import registryRecord_144 from "../../content/registry/papers/deepseek-v4.json";
-import registryRecord_145 from "../../content/registry/training-regimes/dpo.json";
-import registryRecord_146 from "../../content/registry/training-regimes/fp4-quantization-aware-training.json";
-import registryRecord_147 from "../../content/registry/training-regimes/on-policy-distillation.json";
-import registryRecord_148 from "../../content/registry/training-regimes/specialist-training.json";
-import registryRecord_149 from "../../content/registry/systems/expert-parallel-overlap.json";
-import registryRecord_150 from "../../content/registry/systems/on-disk-kv-cache.json";
-import registryRecord_151 from "../../content/registry/systems/routing.json";
-import registryRecord_152 from "../../content/registry/datasets/deepseek-v4-specialist-corpus.json";
-import registryRecord_153 from "../../content/registry/organizations/deepseek-ai.json";
-import registryRecord_154 from "../../content/registry/citations/attention-is-all-you-need.json";
-import registryRecord_155 from "../../content/registry/citations/awq.json";
-import registryRecord_156 from "../../content/registry/citations/batch-normalization.json";
-import registryRecord_157 from "../../content/registry/citations/brown-gpt-3.json";
-import registryRecord_158 from "../../content/registry/citations/chen-positional-interpolation.json";
-import registryRecord_159 from "../../content/registry/citations/classifier-free-diffusion-guidance.json";
-import registryRecord_160 from "../../content/registry/citations/curious-case-neural-text-degeneration.json";
-import registryRecord_161 from "../../content/registry/citations/deepseek-v2-mla-paper.json";
-import registryRecord_162 from "../../content/registry/citations/deepseek-v4-paper.json";
-import registryRecord_163 from "../../content/registry/citations/denoising-diffusion-probabilistic-models.json";
-import registryRecord_164 from "../../content/registry/citations/ding-longrope.json";
-import registryRecord_165 from "../../content/registry/citations/direct-preference-optimization.json";
-import registryRecord_166 from "../../content/registry/citations/flamingo-visual-language-model.json";
-import registryRecord_167 from "../../content/registry/citations/glu-variants-improve-transformer.json";
-import registryRecord_168 from "../../content/registry/citations/goodfellow-deep-learning.json";
-import registryRecord_169 from "../../content/registry/citations/gpt-2-report.json";
-import registryRecord_170 from "../../content/registry/citations/gqa-paper.json";
-import registryRecord_171 from "../../content/registry/citations/group-normalization.json";
-import registryRecord_172 from "../../content/registry/citations/image-is-worth-16x16-words.json";
-import registryRecord_173 from "../../content/registry/citations/kaiokendev-superhot.json";
-import registryRecord_174 from "../../content/registry/citations/kaplan-scaling-laws.json";
-import registryRecord_175 from "../../content/registry/citations/katharopoulos-linear-attention-paper.json";
-import registryRecord_176 from "../../content/registry/citations/kingma-adam.json";
-import registryRecord_177 from "../../content/registry/citations/kivi-kv-cache-quantization.json";
-import registryRecord_178 from "../../content/registry/citations/kudo-sentencepiece.json";
-import registryRecord_179 from "../../content/registry/citations/layer-normalization.json";
-import registryRecord_180 from "../../content/registry/citations/learning-transferable-visual-models-from-natural-language-supervision.json";
-import registryRecord_181 from "../../content/registry/citations/longformer.json";
-import registryRecord_182 from "../../content/registry/citations/multilayer-feedforward-networks-are-universal-approximators.json";
-import registryRecord_183 from "../../content/registry/citations/on-policy-distillation-of-language-models.json";
-import registryRecord_184 from "../../content/registry/citations/peng-yarn.json";
-import registryRecord_185 from "../../content/registry/citations/press-alibi.json";
-import registryRecord_186 from "../../content/registry/citations/qlora.json";
-import registryRecord_187 from "../../content/registry/citations/quantization-integer-only-inference.json";
-import registryRecord_188 from "../../content/registry/citations/query-key-normalization-for-transformers.json";
-import registryRecord_189 from "../../content/registry/citations/raffel-t5.json";
-import registryRecord_190 from "../../content/registry/citations/rectified-linear-units-improve-restricted-boltzmann-machines.json";
-import registryRecord_191 from "../../content/registry/citations/rectifier-nonlinearities-improve-neural-network-acoustic-models.json";
-import registryRecord_192 from "../../content/registry/citations/root-mean-square-layer-normalization.json";
-import registryRecord_193 from "../../content/registry/citations/self-attention-with-relative-position-representations.json";
-import registryRecord_194 from "../../content/registry/citations/sennrich-bpe.json";
-import registryRecord_195 from "../../content/registry/citations/shazeer-mqa-paper.json";
-import registryRecord_196 from "../../content/registry/citations/sigmoid-weighted-linear-units.json";
-import registryRecord_197 from "../../content/registry/citations/smoothquant.json";
-import registryRecord_198 from "../../content/registry/citations/sparse-transformers.json";
-import registryRecord_199 from "../../content/registry/citations/sparsely-gated-mixture-of-experts-layer.json";
-import registryRecord_200 from "../../content/registry/citations/su-roformer-rope.json";
-import registryRecord_201 from "../../content/registry/citations/training-language-models-to-follow-instructions-with-human-feedback.json";
-import registryRecord_202 from "../../content/registry/citations/transformer-lms-without-positional-encodings.json";
-import registryRecord_203 from "../../content/registry/citations/wei-emergent-abilities.json";
-import registryRecord_204 from "../../content/registry/citations/world-models.json";
+import registryRecord_143 from "../../content/registry/models/gpt-2.json";
+import registryRecord_144 from "../../content/registry/models/gpt-3.json";
+import registryRecord_145 from "../../content/registry/papers/deepseek-v4.json";
+import registryRecord_146 from "../../content/registry/training-regimes/dpo.json";
+import registryRecord_147 from "../../content/registry/training-regimes/fp4-quantization-aware-training.json";
+import registryRecord_148 from "../../content/registry/training-regimes/on-policy-distillation.json";
+import registryRecord_149 from "../../content/registry/training-regimes/specialist-training.json";
+import registryRecord_150 from "../../content/registry/systems/expert-parallel-overlap.json";
+import registryRecord_151 from "../../content/registry/systems/on-disk-kv-cache.json";
+import registryRecord_152 from "../../content/registry/systems/routing.json";
+import registryRecord_153 from "../../content/registry/datasets/deepseek-v4-specialist-corpus.json";
+import registryRecord_154 from "../../content/registry/organizations/deepseek-ai.json";
+import registryRecord_155 from "../../content/registry/citations/attention-is-all-you-need.json";
+import registryRecord_156 from "../../content/registry/citations/awq.json";
+import registryRecord_157 from "../../content/registry/citations/batch-normalization.json";
+import registryRecord_158 from "../../content/registry/citations/brown-gpt-3.json";
+import registryRecord_159 from "../../content/registry/citations/chen-positional-interpolation.json";
+import registryRecord_160 from "../../content/registry/citations/classifier-free-diffusion-guidance.json";
+import registryRecord_161 from "../../content/registry/citations/curious-case-neural-text-degeneration.json";
+import registryRecord_162 from "../../content/registry/citations/deepseek-v2-mla-paper.json";
+import registryRecord_163 from "../../content/registry/citations/deepseek-v4-paper.json";
+import registryRecord_164 from "../../content/registry/citations/denoising-diffusion-probabilistic-models.json";
+import registryRecord_165 from "../../content/registry/citations/ding-longrope.json";
+import registryRecord_166 from "../../content/registry/citations/direct-preference-optimization.json";
+import registryRecord_167 from "../../content/registry/citations/flamingo-visual-language-model.json";
+import registryRecord_168 from "../../content/registry/citations/glu-variants-improve-transformer.json";
+import registryRecord_169 from "../../content/registry/citations/goodfellow-deep-learning.json";
+import registryRecord_170 from "../../content/registry/citations/gpt-2-report.json";
+import registryRecord_171 from "../../content/registry/citations/gqa-paper.json";
+import registryRecord_172 from "../../content/registry/citations/group-normalization.json";
+import registryRecord_173 from "../../content/registry/citations/image-is-worth-16x16-words.json";
+import registryRecord_174 from "../../content/registry/citations/kaiokendev-superhot.json";
+import registryRecord_175 from "../../content/registry/citations/kaplan-scaling-laws.json";
+import registryRecord_176 from "../../content/registry/citations/katharopoulos-linear-attention-paper.json";
+import registryRecord_177 from "../../content/registry/citations/kingma-adam.json";
+import registryRecord_178 from "../../content/registry/citations/kivi-kv-cache-quantization.json";
+import registryRecord_179 from "../../content/registry/citations/kudo-sentencepiece.json";
+import registryRecord_180 from "../../content/registry/citations/layer-normalization.json";
+import registryRecord_181 from "../../content/registry/citations/learning-transferable-visual-models-from-natural-language-supervision.json";
+import registryRecord_182 from "../../content/registry/citations/longformer.json";
+import registryRecord_183 from "../../content/registry/citations/multilayer-feedforward-networks-are-universal-approximators.json";
+import registryRecord_184 from "../../content/registry/citations/on-policy-distillation-of-language-models.json";
+import registryRecord_185 from "../../content/registry/citations/peng-yarn.json";
+import registryRecord_186 from "../../content/registry/citations/press-alibi.json";
+import registryRecord_187 from "../../content/registry/citations/qlora.json";
+import registryRecord_188 from "../../content/registry/citations/quantization-integer-only-inference.json";
+import registryRecord_189 from "../../content/registry/citations/query-key-normalization-for-transformers.json";
+import registryRecord_190 from "../../content/registry/citations/raffel-t5.json";
+import registryRecord_191 from "../../content/registry/citations/rectified-linear-units-improve-restricted-boltzmann-machines.json";
+import registryRecord_192 from "../../content/registry/citations/rectifier-nonlinearities-improve-neural-network-acoustic-models.json";
+import registryRecord_193 from "../../content/registry/citations/root-mean-square-layer-normalization.json";
+import registryRecord_194 from "../../content/registry/citations/self-attention-with-relative-position-representations.json";
+import registryRecord_195 from "../../content/registry/citations/sennrich-bpe.json";
+import registryRecord_196 from "../../content/registry/citations/shazeer-mqa-paper.json";
+import registryRecord_197 from "../../content/registry/citations/sigmoid-weighted-linear-units.json";
+import registryRecord_198 from "../../content/registry/citations/smoothquant.json";
+import registryRecord_199 from "../../content/registry/citations/sparse-transformers.json";
+import registryRecord_200 from "../../content/registry/citations/sparsely-gated-mixture-of-experts-layer.json";
+import registryRecord_201 from "../../content/registry/citations/su-roformer-rope.json";
+import registryRecord_202 from "../../content/registry/citations/training-language-models-to-follow-instructions-with-human-feedback.json";
+import registryRecord_203 from "../../content/registry/citations/transformer-lms-without-positional-encodings.json";
+import registryRecord_204 from "../../content/registry/citations/wei-emergent-abilities.json";
+import registryRecord_205 from "../../content/registry/citations/world-models.json";
 
 const moduleRecords: ModuleRecord[] = [
   moduleRecordSchema.parse(registryRecord_0),
@@ -388,35 +389,35 @@ const modelRecords: ModelRecord[] = [
   modelRecordSchema.parse(registryRecord_141),
   modelRecordSchema.parse(registryRecord_142),
   modelRecordSchema.parse(registryRecord_143),
+  modelRecordSchema.parse(registryRecord_144),
 ];
 
 const paperRecords: PaperRecord[] = [
-  paperRecordSchema.parse(registryRecord_144),
+  paperRecordSchema.parse(registryRecord_145),
 ];
 
 const trainingRegimeRecords: TrainingRegimeRecord[] = [
-  trainingRegimeRecordSchema.parse(registryRecord_145),
   trainingRegimeRecordSchema.parse(registryRecord_146),
   trainingRegimeRecordSchema.parse(registryRecord_147),
   trainingRegimeRecordSchema.parse(registryRecord_148),
+  trainingRegimeRecordSchema.parse(registryRecord_149),
 ];
 
 const systemRecords: SystemRecord[] = [
-  systemRecordSchema.parse(registryRecord_149),
   systemRecordSchema.parse(registryRecord_150),
   systemRecordSchema.parse(registryRecord_151),
+  systemRecordSchema.parse(registryRecord_152),
 ];
 
 const datasetRecords: DatasetRecord[] = [
-  datasetRecordSchema.parse(registryRecord_152),
+  datasetRecordSchema.parse(registryRecord_153),
 ];
 
 const organizationRecords: OrganizationRecord[] = [
-  organizationRecordSchema.parse(registryRecord_153),
+  organizationRecordSchema.parse(registryRecord_154),
 ];
 
 const citationRecords: CitationRecord[] = [
-  citationRecordSchema.parse(registryRecord_154),
   citationRecordSchema.parse(registryRecord_155),
   citationRecordSchema.parse(registryRecord_156),
   citationRecordSchema.parse(registryRecord_157),
@@ -467,6 +468,7 @@ const citationRecords: CitationRecord[] = [
   citationRecordSchema.parse(registryRecord_202),
   citationRecordSchema.parse(registryRecord_203),
   citationRecordSchema.parse(registryRecord_204),
+  citationRecordSchema.parse(registryRecord_205),
 ];
 
 const modulesById = new Map<string, ModuleRecord>(

From 7207cee23fb6e7bbd6849c1705afc269364613d5 Mon Sep 17 00:00:00 2001
From: aabdi <andreas.abdi@outlook.com>
Date: Sat, 20 Jun 2026 01:39:44 +0700
Subject: [PATCH 2/6] feat: [gpt-2-model-page-002] - [Publish the canonical
 GPT-2 explainer page]

---
 src/content/docs/models/gpt-2/assets.json     |   9 +
 .../docs/models/gpt-2/messages/en.json        | 109 +++++++
 src/content/docs/models/gpt-2/page.mdx        |  84 +++++
 .../registry/graphs/gpt-2-architecture.json   | 293 ++++++++++++++++++
 .../components/ModelTrainingSummary.test.tsx  |  24 ++
 .../components/ModelTrainingSummary.tsx       |  44 +--
 ...ntent-reconciliation-attention-tag.test.ts |   1 +
 src/lib/content/gpt-2-model-page.test.tsx     |  56 ++++
 .../graph-registry-runtime.generated.ts       |   2 +
 .../content/graph-registry-runtime.test.ts    |   5 +-
 .../published-docs-registry-manifest.ts       |   9 +
 src/lib/source.test.ts                        |   1 +
 .../content/attention-tag-landing.test.ts     |   1 +
 13 files changed, 619 insertions(+), 19 deletions(-)
 create mode 100644 src/content/docs/models/gpt-2/assets.json
 create mode 100644 src/content/docs/models/gpt-2/messages/en.json
 create mode 100644 src/content/docs/models/gpt-2/page.mdx
 create mode 100644 src/content/registry/graphs/gpt-2-architecture.json
 create mode 100644 src/features/models/components/ModelTrainingSummary.test.tsx
 create mode 100644 src/lib/content/gpt-2-model-page.test.tsx

diff --git a/src/content/docs/models/gpt-2/assets.json b/src/content/docs/models/gpt-2/assets.json
new file mode 100644
index 00000000..8e64fa0b
--- /dev/null
+++ b/src/content/docs/models/gpt-2/assets.json
@@ -0,0 +1,9 @@
+{
+  "architectureGraph": {
+    "type": "graph",
+    "graphId": "graph.gpt-2-architecture",
+    "webRenderer": "react-flow",
+    "printRenderer": "vertical-svg",
+    "altKey": "assets.architectureGraph.alt"
+  }
+}
diff --git a/src/content/docs/models/gpt-2/messages/en.json b/src/content/docs/models/gpt-2/messages/en.json
new file mode 100644
index 00000000..c160c0ae
--- /dev/null
+++ b/src/content/docs/models/gpt-2/messages/en.json
@@ -0,0 +1,109 @@
+{
+  "title": "GPT-2",
+  "description": "An early decoder-only language model that showed broad text generation could emerge from large-scale next-token pretraining.",
+  "openingSummary": "Generative Pre-trained Transformer 2, usually shortened to GPT-2, became a landmark example of a decoder-only transformer because it showed that one large pretrained text model could continue prose, imitate formats, and answer simple prompts without a separate task-specific head.",
+  "sections": {
+    "whatItIs": {
+      "title": "What It Is",
+      "body": "GPT-2 is a text-only language model from OpenAI. It is a decoder-only transformer trained to predict the next token in a sequence, and it became one of the clearest early examples of a general-purpose generative model rather than a narrow task model."
+    },
+    "inputsAndOutputs": {
+      "title": "Inputs And Outputs",
+      "body": "The model reads text that has already been broken into byte-level tokens and produces more text tokens one step at a time. In practice, the prompt can be a sentence, a question, a block of code, or a partial article, and GPT-2 keeps extending that context through next-token prediction."
+    },
+    "architecture": {
+      "title": "Architecture",
+      "body": "GPT-2 keeps the standard decoder-only transformer layout. Token embeddings are combined with learned positional embeddings, then a repeated decoder block applies masked multi-head attention so each position can only read earlier tokens, followed by a feed-forward network and residual normalization steps before an output projection and softmax turn the final state into next-token probabilities."
+    },
+    "importantModules": {
+      "title": "Important Modules",
+      "body": "These modules explain most of what readers need to know about GPT-2's internals. Byte-level tokenization decides how raw text becomes tokens, learned positional embeddings mark order, multi-head attention mixes information across earlier tokens, and the feed-forward network does the per-token transformation inside each decoder block."
+    },
+    "training": {
+      "title": "Training",
+      "body": "GPT-2 mattered because its public story was simple and influential: scale up plain next-token pretraining on a broad internet text mixture, keep the architecture familiar, and let general language behavior emerge from the training objective. The model predates later instruction tuning workflows, so the base model is best understood as a pretrained text completer rather than a chat system shaped by post-training."
+    },
+    "practicalNotes": {
+      "title": "Practical Notes",
+      "body": "GPT-2 is historically important, but it also marks an earlier stage of the field. Its 1,024-token context window is small by modern standards, it uses learned absolute positions instead of later long-context schemes such as RoPE scaling, and it is most useful here as a clean reference point for understanding decoder-only transformers before more specialized variants complicate the picture."
+    },
+    "related": {
+      "title": "Related Models, Modules, And Papers"
+    },
+    "tags": {
+      "title": "Tags"
+    },
+    "references": {
+      "title": "References"
+    }
+  },
+  "assets": {
+    "architectureGraph": {
+      "alt": "GPT-2 architecture diagram with input tokens, token embeddings, learned positional embeddings, a repeated decoder block with masked multi-head attention and a feed-forward network, then an output linear layer and softmax."
+    }
+  },
+  "graph": {
+    "nodes": {
+      "outputProbabilities": {
+        "label": "Output\nProbabilities",
+        "summary": "The model's probability distribution over the next token"
+      },
+      "softmax": {
+        "label": "Softmax",
+        "summary": "Turns final vocabulary scores into probabilities"
+      },
+      "linear": {
+        "label": "Linear",
+        "summary": "Projects the hidden state into vocabulary logits"
+      },
+      "decoderStack": {
+        "label": " ",
+        "summary": "Repeated decoder block container"
+      },
+      "attentionSublayer": {
+        "label": " ",
+        "summary": "Attention sublayer container"
+      },
+      "ffnSublayer": {
+        "label": " ",
+        "summary": "Feed-forward sublayer container"
+      },
+      "repeatMarker": {
+        "label": "N×",
+        "summary": "This decoder block repeats many times through the stack"
+      },
+      "addNormAttention": {
+        "label": "Add & Norm",
+        "summary": "Residual add followed by normalization after masked attention"
+      },
+      "maskedMha": {
+        "label": "Masked\nMulti-Head\nAttention",
+        "summary": "Each token can read earlier tokens but not future ones"
+      },
+      "addNormFfn": {
+        "label": "Add & Norm",
+        "summary": "Residual add followed by normalization after the feed-forward block"
+      },
+      "feedForward": {
+        "label": "Feed\nForward",
+        "summary": "Per-token dense transformation inside each decoder block"
+      },
+      "positionAdd": {
+        "label": "+",
+        "summary": "Adds token embeddings and learned positional embeddings"
+      },
+      "learnedPositions": {
+        "label": "Learned\nPosition\nEmbeddings",
+        "summary": "Absolute position vectors added before the stack"
+      },
+      "inputEmbedding": {
+        "label": "Input\nEmbedding",
+        "summary": "Maps token IDs to hidden vectors"
+      },
+      "inputTokens": {
+        "label": "Input\nTokens",
+        "summary": "Prompt text after tokenization"
+      }
+    }
+  }
+}
diff --git a/src/content/docs/models/gpt-2/page.mdx b/src/content/docs/models/gpt-2/page.mdx
new file mode 100644
index 00000000..5746eeab
--- /dev/null
+++ b/src/content/docs/models/gpt-2/page.mdx
@@ -0,0 +1,84 @@
+---
+title: "GPT-2"
+description: "An early decoder-only language model that showed broad text generation could emerge from large-scale next-token pretraining."
+kind: "model"
+registryId: "model.gpt-2"
+messageNamespace: "local"
+assetNamespace: "local"
+status: "published"
+tags:
+  - foundations
+  - model-family
+  - attention
+  - tokenization
+aliases:
+  - "GPT-2"
+  - "Generative Pre-trained Transformer 2"
+  - "gpt2"
+updatedAt: "2026-06-20"
+---
+
+import { CitationList } from "@/features/docs/components/CitationList";
+import { DerivedRelatedDocs } from "@/features/docs/components/DerivedRelatedDocs";
+import { Section } from "@/features/docs/components/Section";
+import { T } from "@/features/docs/components/T";
+import { TagPillList } from "@/features/docs/components/TagPillList";
+import { ModelArchitectureGraph } from "@/features/models/components/ModelArchitectureGraph";
+import { ModelAtAGlance } from "@/features/models/components/ModelAtAGlance";
+import { ModelModuleList } from "@/features/models/components/ModelModuleList";
+import { ModelTrainingSummary } from "@/features/models/components/ModelTrainingSummary";
+
+
+<ModelAtAGlance registryId="model.gpt-2" />
+
+<Section id="what-it-is" titleKey="sections.whatItIs.title">
+  <T k="sections.whatItIs.body" />
+</Section>
+
+<Section id="inputs-and-outputs" titleKey="sections.inputsAndOutputs.title">
+  <T k="sections.inputsAndOutputs.body" />
+</Section>
+
+<Section id="architecture" titleKey="sections.architecture.title">
+  <T k="sections.architecture.body" />
+
+  <ModelArchitectureGraph
+    registryId="model.gpt-2"
+    assetId="architectureGraph"
+  />
+</Section>
+
+<Section id="important-modules" titleKey="sections.importantModules.title">
+  <T k="sections.importantModules.body" />
+  <ModelModuleList registryId="model.gpt-2" />
+</Section>
+
+<Section id="training" titleKey="sections.training.title">
+  <T k="sections.training.body" />
+  <ModelTrainingSummary registryId="model.gpt-2" />
+</Section>
+
+<Section id="practical-notes" titleKey="sections.practicalNotes.title">
+  <T k="sections.practicalNotes.body" />
+</Section>
+
+<Section id="related" titleKey="sections.related.title">
+  <DerivedRelatedDocs
+    registryId="model.gpt-2"
+    groups={[
+      "same-model-family",
+      "shared-modules",
+      "shared-training-regimes",
+      "shared-tags",
+      "curated-related"
+    ]}
+  />
+</Section>
+
+<Section id="tags" titleKey="sections.tags.title">
+  <TagPillList registryId="model.gpt-2" showDescriptions />
+</Section>
+
+<Section id="references" titleKey="sections.references.title">
+  <CitationList registryId="model.gpt-2" />
+</Section>
diff --git a/src/content/registry/graphs/gpt-2-architecture.json b/src/content/registry/graphs/gpt-2-architecture.json
new file mode 100644
index 00000000..ddebfd91
--- /dev/null
+++ b/src/content/registry/graphs/gpt-2-architecture.json
@@ -0,0 +1,293 @@
+{
+  "id": "graph.gpt-2-architecture",
+  "slug": "gpt-2-architecture",
+  "kind": "graph",
+  "defaultTitleKey": "title",
+  "defaultSummaryKey": "description",
+  "aliases": ["gpt-2 architecture graph"],
+  "tags": ["foundations", "model-family"],
+  "relatedIds": [],
+  "citationIds": ["citation.gpt-2-report"],
+  "status": "published",
+  "createdAt": "2026-06-20T00:00:00.000Z",
+  "updatedAt": "2026-06-20T00:00:00.000Z",
+  "subjectId": "model.gpt-2",
+  "graphType": "model-architecture",
+  "rootNodeId": "output-probabilities",
+  "layout": "vertical-expandable",
+  "defaultExpandedDepth": 1,
+  "supportedRenderers": ["react-flow", "vertical-svg"],
+  "nodes": [
+    {
+      "id": "output-probabilities",
+      "labelKey": "graph.nodes.outputProbabilities.label",
+      "summaryKey": "graph.nodes.outputProbabilities.summary",
+      "registryId": "concept.autoregressive-generation",
+      "moduleKind": "output",
+      "position": { "x": 290, "y": 0 },
+      "size": { "width": 170, "height": 62 },
+      "visualRole": "architecture-io",
+      "childNodeIds": []
+    },
+    {
+      "id": "softmax",
+      "labelKey": "graph.nodes.softmax.label",
+      "summaryKey": "graph.nodes.softmax.summary",
+      "registryId": "concept.softmax",
+      "moduleKind": "output",
+      "position": { "x": 300, "y": 92 },
+      "size": { "width": 150, "height": 44 },
+      "visualRole": "architecture-softmax",
+      "childNodeIds": []
+    },
+    {
+      "id": "linear",
+      "labelKey": "graph.nodes.linear.label",
+      "summaryKey": "graph.nodes.linear.summary",
+      "moduleKind": "projection",
+      "position": { "x": 300, "y": 164 },
+      "size": { "width": 150, "height": 44 },
+      "visualRole": "architecture-linear",
+      "childNodeIds": []
+    },
+    {
+      "id": "decoder-stack",
+      "labelKey": "graph.nodes.decoderStack.label",
+      "summaryKey": "graph.nodes.decoderStack.summary",
+      "registryId": "concept.transformer-architecture",
+      "moduleKind": "block",
+      "position": { "x": 190, "y": 246 },
+      "size": { "width": 370, "height": 410 },
+      "visualRole": "group-container",
+      "zIndex": 0,
+      "childNodeIds": []
+    },
+    {
+      "id": "attention-sublayer",
+      "labelKey": "graph.nodes.attentionSublayer.label",
+      "summaryKey": "graph.nodes.attentionSublayer.summary",
+      "moduleKind": "block",
+      "position": { "x": 222, "y": 290 },
+      "size": { "width": 305, "height": 150 },
+      "visualRole": "group-container",
+      "zIndex": 1,
+      "childNodeIds": []
+    },
+    {
+      "id": "ffn-sublayer",
+      "labelKey": "graph.nodes.ffnSublayer.label",
+      "summaryKey": "graph.nodes.ffnSublayer.summary",
+      "moduleKind": "block",
+      "position": { "x": 222, "y": 456 },
+      "size": { "width": 305, "height": 150 },
+      "visualRole": "group-container",
+      "zIndex": 1,
+      "childNodeIds": []
+    },
+    {
+      "id": "repeat-marker",
+      "labelKey": "graph.nodes.repeatMarker.label",
+      "summaryKey": "graph.nodes.repeatMarker.summary",
+      "moduleKind": "other",
+      "position": { "x": 576, "y": 402 },
+      "size": { "width": 48, "height": 120 },
+      "visualRole": "repeat-label",
+      "zIndex": 2,
+      "childNodeIds": []
+    },
+    {
+      "id": "add-norm-attention",
+      "labelKey": "graph.nodes.addNormAttention.label",
+      "summaryKey": "graph.nodes.addNormAttention.summary",
+      "registryId": "module.layer-norm",
+      "moduleKind": "normalization",
+      "position": { "x": 275, "y": 308 },
+      "size": { "width": 200, "height": 44 },
+      "visualRole": "architecture-add-norm",
+      "zIndex": 3,
+      "childNodeIds": []
+    },
+    {
+      "id": "masked-mha",
+      "labelKey": "graph.nodes.maskedMha.label",
+      "summaryKey": "graph.nodes.maskedMha.summary",
+      "registryId": "module.multi-head-attention",
+      "moduleKind": "attention",
+      "position": { "x": 260, "y": 366 },
+      "size": { "width": 230, "height": 82 },
+      "visualRole": "architecture-attention",
+      "zIndex": 3,
+      "childNodeIds": []
+    },
+    {
+      "id": "add-norm-ffn",
+      "labelKey": "graph.nodes.addNormFfn.label",
+      "summaryKey": "graph.nodes.addNormFfn.summary",
+      "registryId": "module.layer-norm",
+      "moduleKind": "normalization",
+      "position": { "x": 275, "y": 474 },
+      "size": { "width": 200, "height": 44 },
+      "visualRole": "architecture-add-norm",
+      "zIndex": 3,
+      "childNodeIds": []
+    },
+    {
+      "id": "feed-forward",
+      "labelKey": "graph.nodes.feedForward.label",
+      "summaryKey": "graph.nodes.feedForward.summary",
+      "registryId": "module.feed-forward-network",
+      "moduleKind": "feed-forward",
+      "position": { "x": 260, "y": 532 },
+      "size": { "width": 230, "height": 82 },
+      "visualRole": "architecture-feed-forward",
+      "zIndex": 3,
+      "childNodeIds": []
+    },
+    {
+      "id": "position-add",
+      "labelKey": "graph.nodes.positionAdd.label",
+      "summaryKey": "graph.nodes.positionAdd.summary",
+      "registryId": "concept.embedding",
+      "moduleKind": "other",
+      "position": { "x": 353, "y": 682 },
+      "size": { "width": 46, "height": 46 },
+      "visualRole": "operator-circle",
+      "zIndex": 3,
+      "childNodeIds": []
+    },
+    {
+      "id": "learned-positions",
+      "labelKey": "graph.nodes.learnedPositions.label",
+      "summaryKey": "graph.nodes.learnedPositions.summary",
+      "registryId": "module.learned-positional-embeddings",
+      "moduleKind": "embedding",
+      "position": { "x": 68, "y": 780 },
+      "size": { "width": 190, "height": 80 },
+      "visualRole": "architecture-embedding",
+      "zIndex": 3,
+      "childNodeIds": []
+    },
+    {
+      "id": "input-embedding",
+      "labelKey": "graph.nodes.inputEmbedding.label",
+      "summaryKey": "graph.nodes.inputEmbedding.summary",
+      "registryId": "concept.embedding",
+      "moduleKind": "embedding",
+      "position": { "x": 273, "y": 780 },
+      "size": { "width": 205, "height": 62 },
+      "visualRole": "architecture-embedding",
+      "zIndex": 3,
+      "childNodeIds": []
+    },
+    {
+      "id": "input-tokens",
+      "labelKey": "graph.nodes.inputTokens.label",
+      "summaryKey": "graph.nodes.inputTokens.summary",
+      "registryId": "concept.token",
+      "moduleKind": "input",
+      "position": { "x": 291, "y": 880 },
+      "size": { "width": 170, "height": 60 },
+      "visualRole": "architecture-io",
+      "zIndex": 3,
+      "childNodeIds": []
+    }
+  ],
+  "edges": [
+    {
+      "id": "softmax-to-output-probabilities",
+      "source": "softmax",
+      "target": "output-probabilities",
+      "edgeKind": "data-flow",
+      "sourceHandleSide": "top",
+      "targetHandleSide": "bottom"
+    },
+    {
+      "id": "linear-to-softmax",
+      "source": "linear",
+      "target": "softmax",
+      "edgeKind": "data-flow",
+      "sourceHandleSide": "top",
+      "targetHandleSide": "bottom"
+    },
+    {
+      "id": "add-norm-ffn-to-linear",
+      "source": "add-norm-ffn",
+      "target": "linear",
+      "edgeKind": "control-flow",
+      "sourceHandleSide": "top",
+      "targetHandleSide": "bottom"
+    },
+    {
+      "id": "feed-forward-to-add-norm-ffn",
+      "source": "feed-forward",
+      "target": "add-norm-ffn",
+      "edgeKind": "data-flow",
+      "sourceHandleSide": "top",
+      "targetHandleSide": "bottom"
+    },
+    {
+      "id": "add-norm-attention-to-feed-forward",
+      "source": "add-norm-attention",
+      "target": "feed-forward",
+      "edgeKind": "data-flow",
+      "sourceHandleSide": "top",
+      "targetHandleSide": "bottom"
+    },
+    {
+      "id": "add-norm-attention-to-add-norm-ffn",
+      "source": "add-norm-ffn",
+      "target": "add-norm-attention",
+      "edgeKind": "residual",
+      "sourceHandleSide": "right",
+      "targetHandleSide": "right"
+    },
+    {
+      "id": "masked-mha-to-add-norm-attention",
+      "source": "masked-mha",
+      "target": "add-norm-attention",
+      "edgeKind": "data-flow",
+      "sourceHandleSide": "top",
+      "targetHandleSide": "bottom"
+    },
+    {
+      "id": "position-add-to-masked-mha",
+      "source": "position-add",
+      "target": "masked-mha",
+      "edgeKind": "data-flow",
+      "sourceHandleSide": "top",
+      "targetHandleSide": "bottom"
+    },
+    {
+      "id": "position-add-to-add-norm-attention",
+      "source": "position-add",
+      "target": "add-norm-attention",
+      "edgeKind": "residual",
+      "sourceHandleSide": "right",
+      "targetHandleSide": "left"
+    },
+    {
+      "id": "learned-positions-to-position-add",
+      "source": "learned-positions",
+      "target": "position-add",
+      "edgeKind": "data-flow",
+      "sourceHandleSide": "top",
+      "targetHandleSide": "left"
+    },
+    {
+      "id": "input-embedding-to-position-add",
+      "source": "input-embedding",
+      "target": "position-add",
+      "edgeKind": "data-flow",
+      "sourceHandleSide": "top",
+      "targetHandleSide": "bottom"
+    },
+    {
+      "id": "input-tokens-to-input-embedding",
+      "source": "input-tokens",
+      "target": "input-embedding",
+      "edgeKind": "data-flow",
+      "sourceHandleSide": "top",
+      "targetHandleSide": "bottom"
+    }
+  ]
+}
diff --git a/src/features/models/components/ModelTrainingSummary.test.tsx b/src/features/models/components/ModelTrainingSummary.test.tsx
new file mode 100644
index 00000000..515a6dc7
--- /dev/null
+++ b/src/features/models/components/ModelTrainingSummary.test.tsx
@@ -0,0 +1,24 @@
+import { describe, expect, test } from "bun:test";
+import { renderToStaticMarkup } from "react-dom/server";
+import { ModelTrainingSummary } from "@/features/models/components/ModelTrainingSummary";
+
+describe("ModelTrainingSummary", () => {
+  test("omits empty training and paper placeholders when a model has no linked records", () => {
+    const html = renderToStaticMarkup(
+      <ModelTrainingSummary registryId="model.gpt-2" />,
+    );
+
+    expect(html).toBe("");
+  });
+
+  test("renders only populated training sections for models with linked records", () => {
+    const html = renderToStaticMarkup(
+      <ModelTrainingSummary registryId="model.deepseek-v4-pro" />,
+    );
+
+    expect(html).toContain("Training regimes");
+    expect(html).toContain("Linked papers");
+    expect(html).not.toContain("No training regimes listed yet.");
+    expect(html).not.toContain("No linked paper pages listed yet.");
+  });
+});
diff --git a/src/features/models/components/ModelTrainingSummary.tsx b/src/features/models/components/ModelTrainingSummary.tsx
index c32432a5..5c535fba 100644
--- a/src/features/models/components/ModelTrainingSummary.tsx
+++ b/src/features/models/components/ModelTrainingSummary.tsx
@@ -7,26 +7,34 @@ export function ModelTrainingSummary({ registryId }: { registryId: string }) {
     return null;
   }
 
+  const hasTrainingRegimes = record.trainingRegimeIds.length > 0;
+  const hasLinkedPapers = record.paperIds.length > 0;
+
+  if (!hasTrainingRegimes && !hasLinkedPapers) {
+    return null;
+  }
+
   return (
     <div className="my-4 space-y-4">
-      <section>
-        <h3 className="mb-2 text-sm font-medium text-muted-foreground">
-          Training regimes
-        </h3>
-        <RegistryLinkList
-          registryIds={record.trainingRegimeIds}
-          emptyLabel="No training regimes listed yet."
-        />
-      </section>
-      <section>
-        <h3 className="mb-2 text-sm font-medium text-muted-foreground">
-          Linked papers
-        </h3>
-        <RegistryLinkList
-          registryIds={record.paperIds}
-          emptyLabel="No linked paper pages listed yet."
-        />
-      </section>
+      {hasTrainingRegimes ? (
+        <section>
+          <h3 className="mb-2 text-sm font-medium text-muted-foreground">
+            Training regimes
+          </h3>
+          <RegistryLinkList
+            registryIds={record.trainingRegimeIds}
+            emptyLabel=""
+          />
+        </section>
+      ) : null}
+      {hasLinkedPapers ? (
+        <section>
+          <h3 className="mb-2 text-sm font-medium text-muted-foreground">
+            Linked papers
+          </h3>
+          <RegistryLinkList registryIds={record.paperIds} emptyLabel="" />
+        </section>
+      ) : null}
     </div>
   );
 }
diff --git a/src/lib/content/content-reconciliation-attention-tag.test.ts b/src/lib/content/content-reconciliation-attention-tag.test.ts
index edf9da16..65a8cac6 100644
--- a/src/lib/content/content-reconciliation-attention-tag.test.ts
+++ b/src/lib/content/content-reconciliation-attention-tag.test.ts
@@ -90,6 +90,7 @@ describe("Phase 2/3 reconciliation attention tag landing (US-007)", () => {
     expect(modelGroup?.resources.map((resource) => resource.url)).toEqual([
       "/docs/models/deepseek-v4-flash",
       "/docs/models/deepseek-v4-pro",
+      "/docs/models/gpt-2",
       "/docs/models/gpt-3",
     ]);
 
diff --git a/src/lib/content/gpt-2-model-page.test.tsx b/src/lib/content/gpt-2-model-page.test.tsx
new file mode 100644
index 00000000..7f233d7c
--- /dev/null
+++ b/src/lib/content/gpt-2-model-page.test.tsx
@@ -0,0 +1,56 @@
+import { describe, expect, test } from "bun:test";
+import { createElement } from "react";
+import { renderToStaticMarkup } from "react-dom/server";
+import { DerivedRelatedDocs } from "@/features/docs/components/DerivedRelatedDocs";
+import { ModulePageProviders } from "@/features/docs/components/ModulePageProviders";
+import { loadModelPage } from "@/lib/content/model-page";
+
+describe("gpt-2 model page", () => {
+  test("derived related docs keep core GPT-2 learning paths visible without duplicate module links", () => {
+    const html = renderToStaticMarkup(
+      <DerivedRelatedDocs
+        registryId="model.gpt-2"
+        groups={[
+          "same-model-family",
+          "shared-modules",
+          "shared-training-regimes",
+          "shared-tags",
+          "curated-related",
+        ]}
+      />,
+    );
+
+    expect(html).toContain('data-testid="derived-related-docs"');
+    expect(html).toContain('href="/docs/concepts/transformer-architecture"');
+    expect(html).toContain("Tokenizer overview");
+    expect(html).toContain('data-planned="true"');
+    expect(html).toContain('href="/docs/modules/byte-level-tokenization"');
+    expect(html).toContain('href="/docs/modules/multi-head-attention"');
+    expect(
+      html.match(/href="\/docs\/modules\/learned-positional-embeddings"/g) ??
+        [],
+    ).toHaveLength(1);
+  });
+
+  test("page renders the registry-backed architecture graph and standard related-docs block", async () => {
+    const page = await loadModelPage("gpt-2");
+    const html = renderToStaticMarkup(
+      createElement(ModulePageProviders, {
+        messages: page.messages,
+        assets: page.assets,
+        // biome-ignore lint/correctness/noChildrenProp: third createElement arg conflicts with strict props typing
+        children: page.content,
+      }),
+    );
+
+    expect(page.messages.title).toBe("GPT-2");
+    expect(html).toContain('data-page-asset="architectureGraph"');
+    expect(html).toContain('data-graph-id="graph.gpt-2-architecture"');
+    expect(html).toContain('data-react-flow-graph="true"');
+    expect(html).toContain('data-testid="derived-related-docs"');
+    expect(html).toContain('href="/docs/modules/byte-level-tokenization"');
+    expect(html).toContain("Masked");
+    expect(html).toContain("Multi-Head");
+    expect(html).not.toContain("Missing graph record");
+  });
+});
diff --git a/src/lib/content/graph-registry-runtime.generated.ts b/src/lib/content/graph-registry-runtime.generated.ts
index f003fadb..bcd0b192 100644
--- a/src/lib/content/graph-registry-runtime.generated.ts
+++ b/src/lib/content/graph-registry-runtime.generated.ts
@@ -15,6 +15,7 @@ import dpoTrainingFlowGraphRecord from "@/content/registry/graphs/dpo-training-f
 import expertParallelOverlapSystemFlowGraphRecord from "@/content/registry/graphs/expert-parallel-overlap-system-flow.json";
 import feedForwardNetworkFamilyOverviewGraphRecord from "@/content/registry/graphs/feed-forward-network-family-overview.json";
 import fp4QuantizationAwareTrainingTrainingFlowGraphRecord from "@/content/registry/graphs/fp4-quantization-aware-training-training-flow.json";
+import gpt2ArchitectureGraphRecord from "@/content/registry/graphs/gpt-2-architecture.json";
 import gpt3ArchitectureGraphRecord from "@/content/registry/graphs/gpt-3-architecture.json";
 import groupNormComputeFlowGraphRecord from "@/content/registry/graphs/group-norm-compute-flow.json";
 import groupedQueryAttentionComputeFlowGraphRecord from "@/content/registry/graphs/grouped-query-attention-compute-flow.json";
@@ -67,6 +68,7 @@ export const graphRecords: GraphRecord[] = [
   graphRecordSchema.parse(expertParallelOverlapSystemFlowGraphRecord),
   graphRecordSchema.parse(feedForwardNetworkFamilyOverviewGraphRecord),
   graphRecordSchema.parse(fp4QuantizationAwareTrainingTrainingFlowGraphRecord),
+  graphRecordSchema.parse(gpt2ArchitectureGraphRecord),
   graphRecordSchema.parse(gpt3ArchitectureGraphRecord),
   graphRecordSchema.parse(groupNormComputeFlowGraphRecord),
   graphRecordSchema.parse(groupedQueryAttentionComputeFlowGraphRecord),
diff --git a/src/lib/content/graph-registry-runtime.test.ts b/src/lib/content/graph-registry-runtime.test.ts
index 97a26ab1..584a0c42 100644
--- a/src/lib/content/graph-registry-runtime.test.ts
+++ b/src/lib/content/graph-registry-runtime.test.ts
@@ -161,7 +161,7 @@ describe("graph-registry-runtime", () => {
   test("lists all bundled graph records", () => {
     const records = listGraphRecords();
 
-    expect(records.length).toBe(49);
+    expect(records.length).toBe(50);
     expect(records.map((record) => record.id)).toContain(
       "graph.bpe-compute-flow",
     );
@@ -180,6 +180,9 @@ describe("graph-registry-runtime", () => {
     expect(records.map((record) => record.id)).toContain(
       "graph.deepseek-v4-flash-architecture",
     );
+    expect(records.map((record) => record.id)).toContain(
+      "graph.gpt-2-architecture",
+    );
     expect(records.map((record) => record.id)).toContain(
       "graph.deepseek-v4-pro-architecture",
     );
diff --git a/src/lib/content/published-docs-registry-manifest.ts b/src/lib/content/published-docs-registry-manifest.ts
index b462163e..cb180118 100644
--- a/src/lib/content/published-docs-registry-manifest.ts
+++ b/src/lib/content/published-docs-registry-manifest.ts
@@ -58,6 +58,14 @@ export const GENERATED_PUBLISHED_DOCS_ENTRIES = [
     pageKind: "model",
     section: "models",
   },
+  {
+    registryId: "model.gpt-2",
+    slug: "gpt-2",
+    docsSlug: "models/gpt-2",
+    url: "/docs/models/gpt-2",
+    pageKind: "model",
+    section: "models",
+  },
   {
     registryId: "model.gpt-3",
     slug: "gpt-3",
@@ -1113,6 +1121,7 @@ export const GENERATED_PUBLISHED_DOCS_REGISTRY_IDS = [
   "concept.yarn",
   "model.deepseek-v4-flash",
   "model.deepseek-v4-pro",
+  "model.gpt-2",
   "model.gpt-3",
   "module.absolute-positional-embeddings",
   "module.alibi",
diff --git a/src/lib/source.test.ts b/src/lib/source.test.ts
index cf6b7843..c506dac8 100644
--- a/src/lib/source.test.ts
+++ b/src/lib/source.test.ts
@@ -120,6 +120,7 @@ const CONCEPT_INDEX_URLS = [
 const MODEL_INDEX_URLS = [
   "/docs/models/deepseek-v4-flash",
   "/docs/models/deepseek-v4-pro",
+  "/docs/models/gpt-2",
   "/docs/models/gpt-3",
 ] as const;
 
diff --git a/src/tests/content/attention-tag-landing.test.ts b/src/tests/content/attention-tag-landing.test.ts
index e93e2eef..1d0923a4 100644
--- a/src/tests/content/attention-tag-landing.test.ts
+++ b/src/tests/content/attention-tag-landing.test.ts
@@ -71,6 +71,7 @@ describe("attention tag landing resources", () => {
     expect(modelGroup?.resources.map((resource) => resource.url)).toEqual([
       "/docs/models/deepseek-v4-flash",
       "/docs/models/deepseek-v4-pro",
+      "/docs/models/gpt-2",
       "/docs/models/gpt-3",
     ]);
 

From 907215a970ad9df4e9c0dd2560dd17a971f441a0 Mon Sep 17 00:00:00 2001
From: aabdi <andreas.abdi@outlook.com>
Date: Sat, 20 Jun 2026 01:46:49 +0700
Subject: [PATCH 3/6] feat: [gpt-2-model-page-003] - [Make GPT-2 discoverable
 through reader navigation]

---
 src/lib/content/gpt-2-model-page.test.tsx | 40 +++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/src/lib/content/gpt-2-model-page.test.tsx b/src/lib/content/gpt-2-model-page.test.tsx
index 7f233d7c..845e749c 100644
--- a/src/lib/content/gpt-2-model-page.test.tsx
+++ b/src/lib/content/gpt-2-model-page.test.tsx
@@ -1,9 +1,15 @@
 import { describe, expect, test } from "bun:test";
 import { createElement } from "react";
 import { renderToStaticMarkup } from "react-dom/server";
+import TagLandingPage from "@/app/(site)/tags/[slug]/page";
 import { DerivedRelatedDocs } from "@/features/docs/components/DerivedRelatedDocs";
 import { ModulePageProviders } from "@/features/docs/components/ModulePageProviders";
 import { loadModelPage } from "@/lib/content/model-page";
+import { docsSearchApi } from "@/lib/search/search-server";
+
+function pageBaseUrl(url: string): string {
+  return url.split("#")[0] ?? url;
+}
 
 describe("gpt-2 model page", () => {
   test("derived related docs keep core GPT-2 learning paths visible without duplicate module links", () => {
@@ -53,4 +59,38 @@ describe("gpt-2 model page", () => {
     expect(html).toContain("Multi-Head");
     expect(html).not.toContain("Missing graph record");
   });
+
+  test.each([
+    "GPT-2",
+    "gpt2",
+  ] as const)("search ranks the canonical gpt-2 page first for %s", async (query) => {
+    const results = await docsSearchApi.search(query);
+
+    expect(results.length).toBeGreaterThan(0);
+    expect(pageBaseUrl(results[0]?.url ?? "")).toBe("/docs/models/gpt-2");
+  });
+
+  test.each([
+    "decoder-only transformer",
+    "byte-level tokenization model",
+  ] as const)("search keeps the canonical gpt-2 page discoverable for %s", async (query) => {
+    const results = await docsSearchApi.search(query);
+
+    expect(results.length).toBeGreaterThan(0);
+    expect(
+      results.some(
+        (result) => pageBaseUrl(result.url) === "/docs/models/gpt-2",
+      ),
+    ).toBe(true);
+  });
+
+  test("neighboring attention tag landing routes readers into the gpt-2 page", async () => {
+    const page = await TagLandingPage({
+      params: Promise.resolve({ slug: "attention" }),
+    });
+    const html = renderToStaticMarkup(page);
+
+    expect(html).toContain('href="/docs/models/gpt-2"');
+    expect(html).toContain("GPT-2");
+  });
 });

From b1a289459208683358c44636c687330a1948da6e Mon Sep 17 00:00:00 2001
From: aabdi <andreas.abdi@outlook.com>
Date: Sat, 20 Jun 2026 01:50:11 +0700
Subject: [PATCH 4/6] feat: [gpt-2-model-page-004] - [Add focused validation
 for the GPT-2 model-page contract]

---
 src/lib/content/gpt-2-model-page.test.tsx | 52 +++++++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/src/lib/content/gpt-2-model-page.test.tsx b/src/lib/content/gpt-2-model-page.test.tsx
index 845e749c..107f9617 100644
--- a/src/lib/content/gpt-2-model-page.test.tsx
+++ b/src/lib/content/gpt-2-model-page.test.tsx
@@ -5,6 +5,10 @@ import TagLandingPage from "@/app/(site)/tags/[slug]/page";
 import { DerivedRelatedDocs } from "@/features/docs/components/DerivedRelatedDocs";
 import { ModulePageProviders } from "@/features/docs/components/ModulePageProviders";
 import { loadModelPage } from "@/lib/content/model-page";
+import { loadPublishedDocsPages } from "@/lib/content/pages";
+import { loadRegistry } from "@/lib/content/registry";
+import { getModelById } from "@/lib/content/registry-runtime";
+import { buildSearchDocuments } from "@/lib/search/build-documents";
 import { docsSearchApi } from "@/lib/search/search-server";
 
 function pageBaseUrl(url: string): string {
@@ -12,6 +16,54 @@ function pageBaseUrl(url: string): string {
 }
 
 describe("gpt-2 model page", () => {
+  test("canonical bundle keeps the published route, registry record, English messages, and search document aligned", async () => {
+    const record = getModelById("model.gpt-2");
+    if (!record) {
+      throw new Error("expected model.gpt-2 in registry runtime");
+    }
+
+    const page = await loadModelPage("gpt-2");
+    const pages = await loadPublishedDocsPages("en");
+    const registry = await loadRegistry();
+    const documents = buildSearchDocuments(pages, registry);
+    const gpt2Document = documents.find(
+      (document) => document.url === "/docs/models/gpt-2",
+    );
+
+    expect(page.frontmatter.kind).toBe("model");
+    expect(page.frontmatter.status).toBe("published");
+    expect(page.frontmatter.registryId).toBe(record.id);
+    expect(page.messages.title).toBe("GPT-2");
+    expect(page.messages.description).toContain("decoder-only");
+    expect(page.messages.openingSummary).toContain(
+      "Generative Pre-trained Transformer 2",
+    );
+    expect(page.assets.architectureGraph).toMatchObject({
+      type: "graph",
+      graphId: "graph.gpt-2-architecture",
+    });
+    expect(
+      pages.some((publishedPage) => publishedPage.url === "/docs/models/gpt-2"),
+    ).toBe(true);
+    expect(gpt2Document?.kind).toBe("model");
+    expect(gpt2Document?.aliases).toEqual(
+      expect.arrayContaining([
+        "GPT-2",
+        "Generative Pre-trained Transformer 2",
+        "gpt2",
+      ]),
+    );
+    expect(gpt2Document?.tags).toEqual(
+      expect.arrayContaining(["attention", "tokenization"]),
+    );
+    expect(gpt2Document?.relatedIds).toEqual(
+      expect.arrayContaining([
+        "concept.transformer-architecture",
+        "module.byte-level-tokenization",
+      ]),
+    );
+  });
+
   test("derived related docs keep core GPT-2 learning paths visible without duplicate module links", () => {
     const html = renderToStaticMarkup(
       <DerivedRelatedDocs

From 75d15527ff2b9389d812a1d80ec83df397d920ac Mon Sep 17 00:00:00 2001
From: aabdi <andreas.abdi@outlook.com>
Date: Mon, 22 Jun 2026 12:08:11 +0700
Subject: [PATCH 5/6] chore: retrigger ci for mergeability


From 331cf04340d240d71bce72b48d4c65f8134997e5 Mon Sep 17 00:00:00 2001
From: aabdi <andreas.abdi@outlook.com>
Date: Mon, 22 Jun 2026 12:10:59 +0700
Subject: [PATCH 6/6] chore: retrigger ci for mergeability