From 299b4071eb1097d595b0d25e524327f87ec1e0db Mon Sep 17 00:00:00 2001 From: aabdi Date: Sun, 21 Jun 2026 09:43:18 +0700 Subject: [PATCH 1/9] feat: [cross-attention-module-page-001] - [Register cross-attention as a first-class attention module] --- .../registry/modules/cross-attention.json | 56 +++++++++++++++++++ src/lib/content/baseline-records.test.ts | 35 ++++++++++++ .../content/cross-attention-discovery.test.ts | 40 +++++++++++++ src/lib/content/registry-runtime.test.ts | 32 +++++++++++ 4 files changed, 163 insertions(+) create mode 100644 src/content/registry/modules/cross-attention.json create mode 100644 src/lib/content/cross-attention-discovery.test.ts diff --git a/src/content/registry/modules/cross-attention.json b/src/content/registry/modules/cross-attention.json new file mode 100644 index 00000000..4aaeadcf --- /dev/null +++ b/src/content/registry/modules/cross-attention.json @@ -0,0 +1,56 @@ +{ + "id": "module.cross-attention", + "slug": "cross-attention", + "kind": "module", + "defaultTitleKey": "title", + "defaultSummaryKey": "description", + "aliases": [ + "cross attention", + "cross-attention", + "encoder-decoder attention", + "encoder decoder attention" + ], + "tags": ["attention"], + "relatedIds": [ + "module.attention", + "module.multi-head-attention", + "module.bidirectional-attention", + "concept.transformer-architecture", + "concept.encoder-decoder", + "concept.multimodal-model" + ], + "citationIds": ["citation.attention-is-all-you-need"], + "status": "published", + "createdAt": "2026-06-21T00:00:00.000Z", + "updatedAt": "2026-06-21T00:00:00.000Z", + "releaseDate": "2017-06-12", + "authors": [ + "Ashish Vaswani", + "Noam Shazeer", + "Niki Parmar", + "Jakob Uszkoreit", + "Llion Jones", + "Aidan N. Gomez", + "Lukasz Kaiser", + "Illia Polosukhin" + ], + "sourceId": "citation.attention-is-all-you-need", + "moduleType": "attention", + "moduleFamily": "attention", + "conceptType": "attention-variant", + "variantGroup": "attention-memory-sources", + "sidebarGrouping": { + "modules": "attention-foundations" + }, + "optimizes": [ + "cross-source-conditioning", + "multistream-context-fusion", + "encoder-decoder-context-bridging" + ], + "exampleModelIds": [], + "improvesOnIds": [], + "tradeoffIds": [], + "usedByModelIds": [], + "introducedByPaperIds": [], + "mathLevel": "light" +} diff --git a/src/lib/content/baseline-records.test.ts b/src/lib/content/baseline-records.test.ts index d79725c4..ba5b2368 100644 --- a/src/lib/content/baseline-records.test.ts +++ b/src/lib/content/baseline-records.test.ts @@ -196,6 +196,41 @@ describe("Phase 1 baseline registry records", () => { expect(module.optimizes.length).toBeGreaterThan(0); }); + test("cross-attention module JSON passes moduleRecordSchema", async () => { + const module = await readRegistryJson( + "modules/cross-attention.json", + moduleRecordSchema, + ); + + expect(module.id).toBe("module.cross-attention"); + expect(module.kind).toBe("module"); + expect(module.status).toBe("published"); + expect(module.moduleType).toBe("attention"); + expect(module.moduleFamily).toBe("attention"); + expect(module.tags).toEqual(["attention"]); + expect(module.aliases).toEqual( + expect.arrayContaining([ + "cross attention", + "cross-attention", + "encoder-decoder attention", + "encoder decoder attention", + ]), + ); + expect(module.variantGroup).toBe("attention-memory-sources"); + expect(module.conceptType).toBe("attention-variant"); + expect(module.relatedIds).toEqual([ + "module.attention", + "module.multi-head-attention", + "module.bidirectional-attention", + "concept.transformer-architecture", + "concept.encoder-decoder", + "concept.multimodal-model", + ]); + expect(module.citationIds).toContain("citation.attention-is-all-you-need"); + expect(module.sidebarGrouping?.modules).toBe("attention-foundations"); + expect(module.optimizes.length).toBeGreaterThan(0); + }); + test("attention tag JSON passes tagRecordSchema", async () => { const tag = await readRegistryJson("tags/attention.json", tagRecordSchema); diff --git a/src/lib/content/cross-attention-discovery.test.ts b/src/lib/content/cross-attention-discovery.test.ts new file mode 100644 index 00000000..a8eddef6 --- /dev/null +++ b/src/lib/content/cross-attention-discovery.test.ts @@ -0,0 +1,40 @@ +import { describe, expect, test } from "bun:test"; +import { PUBLISHED_DOCS_REGISTRY_IDS } from "@/lib/content/published-docs-registry-ids"; +import { + getModuleById, + listRelatedRegistryRecords, +} from "@/lib/content/registry-runtime"; +import { deriveCuratedRelatedItems } from "@/lib/content/related-docs"; + +describe("cross-attention discovery wiring", () => { + test("cross-attention derives nearby published related docs in registry order", () => { + const source = getModuleById("module.cross-attention"); + if (!source) { + throw new Error("expected module.cross-attention in registry runtime"); + } + + const items = deriveCuratedRelatedItems( + source, + listRelatedRegistryRecords(), + PUBLISHED_DOCS_REGISTRY_IDS, + ); + + expect(items.map((item) => item.registryId)).toEqual([ + "module.attention", + "module.multi-head-attention", + "module.bidirectional-attention", + "concept.transformer-architecture", + "concept.encoder-decoder", + "concept.multimodal-model", + ]); + expect(items.map((item) => item.href)).toEqual([ + "/docs/modules/attention", + "/docs/modules/multi-head-attention", + "/docs/modules/bidirectional-attention", + "/docs/concepts/transformer-architecture", + "/docs/glossary/encoder-decoder", + "/docs/glossary/multimodal-model", + ]); + expect(items.every((item) => item.isPlanned === false)).toBe(true); + }); +}); diff --git a/src/lib/content/registry-runtime.test.ts b/src/lib/content/registry-runtime.test.ts index efc6dfe4..8cc3d046 100644 --- a/src/lib/content/registry-runtime.test.ts +++ b/src/lib/content/registry-runtime.test.ts @@ -170,6 +170,29 @@ describe("registry-runtime", () => { expect(record?.citationIds).toEqual(["citation.kudo-sentencepiece"]); }); + test("getModuleById returns cross-attention with nearby architecture links", () => { + const record = getModuleById("module.cross-attention"); + expect(record?.slug).toBe("cross-attention"); + expect(record?.tags).toEqual(["attention"]); + expect(record?.aliases).toEqual( + expect.arrayContaining([ + "cross attention", + "cross-attention", + "encoder-decoder attention", + "encoder decoder attention", + ]), + ); + expect(record?.relatedIds).toEqual([ + "module.attention", + "module.multi-head-attention", + "module.bidirectional-attention", + "concept.transformer-architecture", + "concept.encoder-decoder", + "concept.multimodal-model", + ]); + expect(record?.variantGroup).toBe("attention-memory-sources"); + }); + test("getRegistryTags returns tags for a known module", () => { expect(getRegistryTags("module.grouped-query-attention")).toEqual([ "attention", @@ -195,6 +218,10 @@ describe("registry-runtime", () => { expect(getRegistryTags("module.causal-attention")).toEqual(["attention"]); }); + test("getRegistryTags returns tags for cross-attention", () => { + expect(getRegistryTags("module.cross-attention")).toEqual(["attention"]); + }); + test("getRegistryTags returns tags for a known concept", () => { expect(getRegistryTags("concept.token")).toEqual([ "attention", @@ -241,8 +268,13 @@ describe("registry-runtime", () => { ]); }); +<<<<<<< HEAD test("getRegistryCitationIds returns citations for causal attention", () => { expect(getRegistryCitationIds("module.causal-attention")).toEqual([ +======= + test("getRegistryCitationIds returns citations for cross-attention", () => { + expect(getRegistryCitationIds("module.cross-attention")).toEqual([ +>>>>>>> 3bc1d31f (feat: [cross-attention-module-page-001] - [Register cross-attention as a first-class attention module]) "citation.attention-is-all-you-need", ]); }); From b7370155cb1cbcaa6f321eddc00874a04cb43dae Mon Sep 17 00:00:00 2001 From: aabdi Date: Sun, 21 Jun 2026 09:52:34 +0700 Subject: [PATCH 2/9] feat: [cross-attention-module-page-002] - [Publish the canonical cross-attention page with the required teaching aids] --- .../docs/modules/cross-attention/assets.json | 25 +++ .../modules/cross-attention/messages/en.json | 190 ++++++++++++++++++ .../docs/modules/cross-attention/page.mdx | 85 ++++++++ .../cross-attention-memory-pattern.json | 130 ++++++++++++ .../tables/cross-attention-comparison.json | 58 ++++++ .../cross-attention-module-page.test.ts | 104 ++++++++++ .../generated/table-registry.generated.ts | 3 + 7 files changed, 595 insertions(+) create mode 100644 src/content/docs/modules/cross-attention/assets.json create mode 100644 src/content/docs/modules/cross-attention/messages/en.json create mode 100644 src/content/docs/modules/cross-attention/page.mdx create mode 100644 src/content/registry/graphs/cross-attention-memory-pattern.json create mode 100644 src/content/registry/tables/cross-attention-comparison.json create mode 100644 src/lib/content/cross-attention-module-page.test.ts diff --git a/src/content/docs/modules/cross-attention/assets.json b/src/content/docs/modules/cross-attention/assets.json new file mode 100644 index 00000000..1cfd85cb --- /dev/null +++ b/src/content/docs/modules/cross-attention/assets.json @@ -0,0 +1,25 @@ +{ + "computeFlow": { + "type": "attention-variant-graph", + "defaultVariantId": "cross", + "variants": [ + { + "variantId": "self", + "graphId": "graph.multi-head-attention-time-pattern", + "labelKey": "assets.computeFlow.variants.self.label" + }, + { + "variantId": "cross", + "graphId": "graph.cross-attention-memory-pattern", + "labelKey": "assets.computeFlow.variants.cross.label" + } + ], + "webRenderer": "react-flow", + "printRenderer": "mermaid", + "altKey": "assets.computeFlow.alt" + }, + "comparisonTable": { + "type": "table", + "tableId": "table.cross-attention-comparison" + } +} diff --git a/src/content/docs/modules/cross-attention/messages/en.json b/src/content/docs/modules/cross-attention/messages/en.json new file mode 100644 index 00000000..12b0455c --- /dev/null +++ b/src/content/docs/modules/cross-attention/messages/en.json @@ -0,0 +1,190 @@ +{ + "title": "Cross-Attention", + "description": "An attention pattern where queries come from one stream while keys and values come from a different memory source.", + "openingSummary": "Cross-attention is the attention pattern that lets one sequence ask questions of a different sequence or memory bank, which is why decoder layers can read encoder outputs and multimodal models can fuse text with image or audio features.", + "sections": { + "whatItIs": { + "title": "What It Is", + "body": "Cross-attention is an attention variant in which the query vectors come from one stream, but the key and value vectors come from somewhere else. The model still performs a weighted lookup, but it no longer reads only from the same sequence that produced the query." + }, + "whyItExists": { + "title": "Why It Exists", + "body": "Self-attention is enough when one sequence only needs to mix information inside itself. Cross-attention solves the different problem of conditioning one stream on information stored elsewhere, such as a decoder reading an encoder memory or a text stack reading image features." + }, + "howItWorks": { + "title": "How It Works", + "body": "A target stream produces queries, while a separate source stream produces keys and values. Each query scores the external memory slots, turns those scores into weights with softmax, and blends the matching value vectors into a context vector for the target stream. The critical difference from self-attention is the memory source: the target tokens ask, but a different representation answers." + }, + "mathOrComputeSchema": { + "title": "Math Or Compute Schema", + "body": "The formulas below contrast self-attention with cross-attention. The weighted lookup is still scaled dot-product attention, but cross-attention changes which hidden states create the queries versus the keys and values." + }, + "comparedToNearbyModules": { + "title": "Compared To Nearby Modules", + "body": "Compared with the broader attention overview, cross-attention is one specific memory-source pattern. Compared with multi-head attention, it can still use multiple heads but changes where keys and values come from. Compared with causal and bidirectional attention, the main contrast is not the mask direction but the fact that the model reads from a different sequence." + }, + "exampleArchitectures": { + "title": "Example Architectures", + "body": "Cross-attention appears in encoder-decoder Transformers where decoder states read encoder outputs, in multimodal models where text queries image or audio features, and in retrieval-style systems where the active stream reads an external memory representation." + }, + "limitationsAndTradeoffs": { + "title": "Limitations And Tradeoffs", + "body": "Cross-attention adds another memory interface, which means extra projections, more tensors to keep available, and more places where weak source representations can hurt the result. If the external memory is noisy or badly aligned with the target stream, the lookup can pull in the wrong evidence." + }, + "whyItStillMatters": { + "title": "Why It Still Matters", + "body": "Many important model designs depend on one stream reading another without collapsing both into one shared sequence. A dedicated cross-attention page makes that bridge mechanism clear instead of leaving it implied inside larger architecture diagrams." + }, + "related": { + "title": "Related" + }, + "tags": { + "title": "Tags" + }, + "references": { + "title": "References" + } + }, + "assets": { + "computeFlow": { + "alt": "Self-attention versus cross-attention memory source comparison", + "variants": { + "self": { + "label": "Self-attention" + }, + "cross": { + "label": "Cross-attention" + } + } + }, + "comparisonTable": {} + }, + "tables": { + "comparison": { + "columns": { + "cross": { + "title": "Cross-Attention" + }, + "attention": { + "title": "Attention Overview" + }, + "mha": { + "title": "Multi-Head Attention" + }, + "bidirectional": { + "title": "Bidirectional Attention" + } + }, + "dimensions": { + "querySource": "Where queries come from", + "memorySource": "Where keys and values come from", + "mainUse": "Main use context" + }, + "values": { + "cross": { + "querySource": "The active target stream that needs outside information", + "memorySource": "A different source sequence or memory bank", + "mainUse": "Conditioning one stream on another, such as decoder-on-encoder or text-on-image" + }, + "attention": { + "querySource": "Depends on the attention pattern being discussed", + "memorySource": "May be the same sequence or a separate memory depending on the design", + "mainUse": "General weighted lookup across Transformer modules" + }, + "mha": { + "querySource": "The same sequence that also produces keys and values in standard self-attention", + "memorySource": "The same sequence, split across multiple heads", + "mainUse": "Baseline dense attention inside Transformer blocks" + }, + "bidirectional": { + "querySource": "The visible sequence being encoded", + "memorySource": "The same visible sequence with left and right context available", + "mainUse": "Encoder-style full-context understanding" + } + } + } + }, + "math": { + "selfAttentionSchema": { + "label": "Self-attention", + "formula": "\\mathrm{Attention}(Q(X), K(X), V(X)) = \\mathrm{softmax}\\!\\left(\\frac{Q(X) K(X)^{\\top}}{\\sqrt{d_k}}\\right) V(X)", + "variableDefinitions": { + "x": { + "term": "X", + "definition": "Hidden states from the same source sequence." + }, + "q": { + "term": "Q(X)", + "definition": "Query projection of X." + }, + "k": { + "term": "K(X)", + "definition": "Key projection of X." + }, + "v": { + "term": "V(X)", + "definition": "Value projection of X." + }, + "dk": { + "term": "d_k", + "definition": "Key dimension per head." + } + } + }, + "crossAttentionSchema": { + "label": "Cross-attention", + "formula": "\\mathrm{Attention}(Q(Y), K(X), V(X)) = \\mathrm{softmax}\\!\\left(\\frac{Q(Y) K(X)^{\\top}}{\\sqrt{d_k}}\\right) V(X)", + "variableDefinitions": { + "x": { + "term": "X", + "definition": "Hidden states from the external memory source." + }, + "y": { + "term": "Y", + "definition": "Hidden states from the active target stream." + }, + "q": { + "term": "Q(Y)", + "definition": "Query projection of Y." + }, + "k": { + "term": "K(X)", + "definition": "Key projection of X." + }, + "v": { + "term": "V(X)", + "definition": "Value projection of X." + }, + "dk": { + "term": "d_k", + "definition": "Key dimension per head." + } + } + } + }, + "graph": { + "nodes": { + "timeCurrentQuery": { + "label": "q_t" + }, + "timeKv0": { + "label": "KV^X_0" + }, + "timeKv1": { + "label": "KV^X_1" + }, + "timeKvEllipsis": { + "label": "\\cdots" + }, + "timeKvT3": { + "label": "KV^X_{s-3}" + }, + "timeKvT2": { + "label": "KV^X_{s-2}" + }, + "timeKvT1": { + "label": "KV^X_{s-1}" + } + } + } +} diff --git a/src/content/docs/modules/cross-attention/page.mdx b/src/content/docs/modules/cross-attention/page.mdx new file mode 100644 index 00000000..34a5e29f --- /dev/null +++ b/src/content/docs/modules/cross-attention/page.mdx @@ -0,0 +1,85 @@ +--- +title: Cross-Attention +description: How cross-attention lets one sequence query a different memory source instead of reading only from itself. +kind: "module" +registryId: "module.cross-attention" +messageNamespace: "local" +assetNamespace: "local" +status: "published" +tags: + - attention +aliases: + - "cross attention" + - "cross-attention" + - "encoder-decoder attention" +updatedAt: "2026-06-21" +--- + +import { CitationList } from "@/features/docs/components/CitationList"; +import { RelatedDocs } from "@/features/docs/components/RelatedDocs"; +import { Section } from "@/features/docs/components/Section"; +import { T } from "@/features/docs/components/T"; +import { TagPillList } from "@/features/docs/components/TagPillList"; +import { ModuleComparisonTable } from "@/features/models/components/ModuleComparisonTable"; +import { ModuleGraph } from "@/features/models/components/ModuleGraph"; +import { ModuleAtAGlance } from "@/features/models/components/ModuleAtAGlance"; +import { ModuleAttentionSchemaComparison } from "@/features/models/components/ModuleAttentionSchemaComparison"; +import { ModelsUsingModule } from "@/features/models/components/ModelsUsingModule"; + + + + +
+ +
+ +
+ +
+ +
+ + + +
+ +
+ + +
+ +
+ + + +
+ +
+ + +
+ +
+ +
+ +
+ +
+ + + +
+ +
+ +
+ +
diff --git a/src/content/registry/graphs/cross-attention-memory-pattern.json b/src/content/registry/graphs/cross-attention-memory-pattern.json new file mode 100644 index 00000000..37559962 --- /dev/null +++ b/src/content/registry/graphs/cross-attention-memory-pattern.json @@ -0,0 +1,130 @@ +{ + "id": "graph.cross-attention-memory-pattern", + "slug": "cross-attention-memory-pattern", + "kind": "graph", + "defaultTitleKey": "title", + "defaultSummaryKey": "description", + "aliases": [], + "tags": ["attention"], + "relatedIds": [], + "citationIds": [], + "status": "published", + "createdAt": "2026-06-21T00:00:00.000Z", + "updatedAt": "2026-06-21T00:00:00.000Z", + "subjectId": "module.cross-attention", + "graphType": "attention-pattern", + "rootNodeId": "cross-time-current-query", + "layout": "vertical-expandable", + "defaultExpandedDepth": 1, + "supportedRenderers": ["react-flow", "mermaid"], + "nodes": [ + { + "id": "cross-time-current-query", + "labelKey": "graph.nodes.timeCurrentQuery.label", + "moduleKind": "attention", + "position": { "x": 360, "y": 0 }, + "headCountRole": "query", + "visualRole": "summary-node", + "childNodeIds": [ + "cross-time-kv-0", + "cross-time-kv-1", + "cross-time-kv-ellipsis", + "cross-time-kv-s-3", + "cross-time-kv-s-2", + "cross-time-kv-s-1" + ] + }, + { + "id": "cross-time-kv-0", + "labelKey": "graph.nodes.timeKv0.label", + "moduleKind": "cache", + "position": { "x": 0, "y": 220 }, + "headCountRole": "kv", + "visualRole": "timeline-node", + "childNodeIds": [] + }, + { + "id": "cross-time-kv-1", + "labelKey": "graph.nodes.timeKv1.label", + "moduleKind": "cache", + "position": { "x": 140, "y": 220 }, + "headCountRole": "kv", + "visualRole": "timeline-node", + "childNodeIds": [] + }, + { + "id": "cross-time-kv-ellipsis", + "labelKey": "graph.nodes.timeKvEllipsis.label", + "moduleKind": "cache", + "position": { "x": 280, "y": 220 }, + "headCountRole": "kv", + "visualRole": "timeline-node", + "childNodeIds": [] + }, + { + "id": "cross-time-kv-s-3", + "labelKey": "graph.nodes.timeKvT3.label", + "moduleKind": "cache", + "position": { "x": 420, "y": 220 }, + "headCountRole": "kv", + "visualRole": "timeline-node", + "childNodeIds": [] + }, + { + "id": "cross-time-kv-s-2", + "labelKey": "graph.nodes.timeKvT2.label", + "moduleKind": "cache", + "position": { "x": 560, "y": 220 }, + "headCountRole": "kv", + "visualRole": "timeline-node", + "childNodeIds": [] + }, + { + "id": "cross-time-kv-s-1", + "labelKey": "graph.nodes.timeKvT1.label", + "moduleKind": "cache", + "position": { "x": 700, "y": 220 }, + "headCountRole": "kv", + "visualRole": "timeline-node", + "childNodeIds": [] + } + ], + "edges": [ + { + "id": "cross-time-query-to-kv-0", + "source": "cross-time-current-query", + "target": "cross-time-kv-0", + "edgeKind": "data-flow" + }, + { + "id": "cross-time-query-to-kv-1", + "source": "cross-time-current-query", + "target": "cross-time-kv-1", + "edgeKind": "data-flow" + }, + { + "id": "cross-time-query-to-kv-ellipsis", + "source": "cross-time-current-query", + "target": "cross-time-kv-ellipsis", + "edgeKind": "data-flow" + }, + { + "id": "cross-time-query-to-kv-s-3", + "source": "cross-time-current-query", + "target": "cross-time-kv-s-3", + "edgeKind": "data-flow" + }, + { + "id": "cross-time-query-to-kv-s-2", + "source": "cross-time-current-query", + "target": "cross-time-kv-s-2", + "edgeKind": "data-flow" + }, + { + "id": "cross-time-query-to-kv-s-1", + "source": "cross-time-current-query", + "target": "cross-time-kv-s-1", + "edgeKind": "data-flow" + } + ] +} diff --git a/src/content/registry/tables/cross-attention-comparison.json b/src/content/registry/tables/cross-attention-comparison.json new file mode 100644 index 00000000..d376c350 --- /dev/null +++ b/src/content/registry/tables/cross-attention-comparison.json @@ -0,0 +1,58 @@ +{ + "id": "table.cross-attention-comparison", + "subjectId": "module.cross-attention", + "columns": [ + { + "moduleId": "module.cross-attention", + "titleKey": "tables.comparison.columns.cross.title" + }, + { + "moduleId": "module.attention", + "titleKey": "tables.comparison.columns.attention.title" + }, + { + "moduleId": "module.multi-head-attention", + "titleKey": "tables.comparison.columns.mha.title" + }, + { + "moduleId": "module.bidirectional-attention", + "titleKey": "tables.comparison.columns.bidirectional.title" + } + ], + "dimensions": [ + { + "id": "querySource", + "labelKey": "tables.comparison.dimensions.querySource" + }, + { + "id": "memorySource", + "labelKey": "tables.comparison.dimensions.memorySource" + }, + { + "id": "mainUse", + "labelKey": "tables.comparison.dimensions.mainUse" + } + ], + "valueKeysByModuleId": { + "module.cross-attention": { + "querySource": "tables.comparison.values.cross.querySource", + "memorySource": "tables.comparison.values.cross.memorySource", + "mainUse": "tables.comparison.values.cross.mainUse" + }, + "module.attention": { + "querySource": "tables.comparison.values.attention.querySource", + "memorySource": "tables.comparison.values.attention.memorySource", + "mainUse": "tables.comparison.values.attention.mainUse" + }, + "module.multi-head-attention": { + "querySource": "tables.comparison.values.mha.querySource", + "memorySource": "tables.comparison.values.mha.memorySource", + "mainUse": "tables.comparison.values.mha.mainUse" + }, + "module.bidirectional-attention": { + "querySource": "tables.comparison.values.bidirectional.querySource", + "memorySource": "tables.comparison.values.bidirectional.memorySource", + "mainUse": "tables.comparison.values.bidirectional.mainUse" + } + } +} diff --git a/src/lib/content/cross-attention-module-page.test.ts b/src/lib/content/cross-attention-module-page.test.ts new file mode 100644 index 00000000..20424bd3 --- /dev/null +++ b/src/lib/content/cross-attention-module-page.test.ts @@ -0,0 +1,104 @@ +import { describe, expect, test } from "bun:test"; +import { readFileSync } from "node:fs"; +import { join } from "node:path"; +import { createElement } from "react"; +import { renderToStaticMarkup } from "react-dom/server"; +import { ModulePageProviders } from "@/features/docs/components/ModulePageProviders"; +import { + parsePageAssetConfig, + validatePageAssetReferences, +} from "@/lib/content/assets"; +import { MODULES_DOCS_ROOT } from "@/lib/content/content-paths"; +import { expectGlossaryBodyOmitsTitleHeading } from "@/lib/content/glossary-test-helpers"; +import { loadModulePage } from "@/lib/content/module-page"; +import { pageMessagesSchema } from "@/lib/content/schemas"; + +const pageDir = join(MODULES_DOCS_ROOT, "cross-attention"); +const messagesPath = join(pageDir, "messages/en.json"); +const assetsPath = join(pageDir, "assets.json"); + +describe("cross-attention page messages", () => { + test("includes the localized fields required by the module template", () => { + const messages = pageMessagesSchema.parse( + JSON.parse(readFileSync(messagesPath, "utf8")), + ); + + expect(messages.title).toBe("Cross-Attention"); + expect(messages.openingSummary?.length).toBeGreaterThan(0); + expect(messages.problemStatement).toBeUndefined(); + expect(messages.coreIdea).toBeUndefined(); + expect( + messages.sections?.mathOrComputeSchema?.body?.length, + ).toBeGreaterThan(0); + expect(messages.math?.selfAttentionSchema?.label).toBe("Self-attention"); + expect(messages.math?.crossAttentionSchema?.label).toBe("Cross-attention"); + expect(messages.math?.crossAttentionSchema?.formula).toContain("Q(Y)"); + expect(messages.math?.crossAttentionSchema?.formula).toContain("K(X)"); + }); +}); + +describe("loadModulePage cross-attention", () => { + test("renders the canonical module structure with separate-memory teaching aids", async () => { + const page = await loadModulePage("cross-attention"); + + expect(page.frontmatter.registryId).toBe("module.cross-attention"); + expect(page.frontmatter.messageNamespace).toBe("local"); + expect(page.frontmatter.assetNamespace).toBe("local"); + expect(page.messages.title).toBe("Cross-Attention"); + + const html = renderToStaticMarkup( + createElement(ModulePageProviders, { + messages: page.messages, + assets: page.assets, + // biome-ignore lint/correctness/noChildrenProp: third createElement arg conflicts with strict props typing + children: page.content, + }), + ); + + expectGlossaryBodyOmitsTitleHeading(html, page.messages.title); + expect(html).toContain("At a glance"); + expect(html).toContain("Math Or Compute Schema"); + expect(html).toContain("Compared To Nearby Modules"); + expect(html).toContain("external memory slots"); + expect(html).toContain('data-attention-variant-comparison="true"'); + expect(html).toContain('data-attention-variant-active="cross"'); + expect(html).toContain('data-attention-variant-option="self"'); + expect(html).toContain('data-attention-variant-option="cross"'); + expect(html).toContain( + 'data-graph-id="graph.cross-attention-memory-pattern"', + ); + expect(html).toContain('data-graph-node-id="cross-time-kv-s-2"'); + expect(html).toContain('data-table-id="table.cross-attention-comparison"'); + expect(html).toContain("Where keys and values come from"); + expect(html).toContain('href="/docs/glossary/encoder-decoder"'); + expect(html).toContain('href="/docs/glossary/multimodal-model"'); + expect(html).toContain('data-testid="curated-related-docs"'); + expect((html.match(/data-testid="tag-pill-list"/g) ?? []).length).toBe(1); + expect(html).not.toContain("Reader Shortcut"); + expect(html).not.toContain('aria-label="Module metadata"'); + }); +}); + +describe("cross-attention page assets", () => { + test("resolve graph and table assets with message-backed copy", () => { + const messages = pageMessagesSchema.parse( + JSON.parse(readFileSync(messagesPath, "utf8")), + ); + const assets = parsePageAssetConfig( + JSON.parse(readFileSync(assetsPath, "utf8")), + ); + + expect(assets.computeFlow.type).toBe("attention-variant-graph"); + if (assets.computeFlow.type === "attention-variant-graph") { + expect(assets.computeFlow.defaultVariantId).toBe("cross"); + expect( + assets.computeFlow.variants.map((variant) => variant.graphId), + ).toEqual([ + "graph.multi-head-attention-time-pattern", + "graph.cross-attention-memory-pattern", + ]); + } + expect(assets.comparisonTable.type).toBe("table"); + expect(validatePageAssetReferences(assets, messages)).toEqual([]); + }); +}); diff --git a/src/lib/content/generated/table-registry.generated.ts b/src/lib/content/generated/table-registry.generated.ts index 6997843c..42887488 100644 --- a/src/lib/content/generated/table-registry.generated.ts +++ b/src/lib/content/generated/table-registry.generated.ts @@ -6,6 +6,7 @@ import bidirectionalAttentionComparisonTableRecord from "@/content/registry/tabl import bpeComparisonTableRecord from "@/content/registry/tables/bpe-comparison.json"; import byteLevelTokenizationComparisonTableRecord from "@/content/registry/tables/byte-level-tokenization-comparison.json"; import causalAttentionComparisonTableRecord from "@/content/registry/tables/causal-attention-comparison.json"; +import crossAttentionComparisonTableRecord from "@/content/registry/tables/cross-attention-comparison.json"; import feedForwardNetworkComparisonTableRecord from "@/content/registry/tables/feed-forward-network-comparison.json"; import geluComparisonTableRecord from "@/content/registry/tables/gelu-comparison.json"; import groupNormComparisonTableRecord from "@/content/registry/tables/group-norm-comparison.json"; @@ -36,6 +37,7 @@ export const generatedTableRegistrySourceFiles = [ "bpe-comparison.json", "byte-level-tokenization-comparison.json", "causal-attention-comparison.json", + "cross-attention-comparison.json", "feed-forward-network-comparison.json", "gelu-comparison.json", "group-norm-comparison.json", @@ -67,6 +69,7 @@ export const generatedTableRegistryPayloads = [ bpeComparisonTableRecord, byteLevelTokenizationComparisonTableRecord, causalAttentionComparisonTableRecord, + crossAttentionComparisonTableRecord, feedForwardNetworkComparisonTableRecord, geluComparisonTableRecord, groupNormComparisonTableRecord, From fea148865782f367f853da4437a0252a1e8b0e89 Mon Sep 17 00:00:00 2001 From: aabdi Date: Sun, 21 Jun 2026 09:57:57 +0700 Subject: [PATCH 3/9] feat: [cross-attention-module-page-003] - [Compare cross-attention against nearby attention variants and link onward paths] --- .../docs/modules/cross-attention/messages/en.json | 2 +- src/lib/content/cross-attention-module-page.test.ts | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/content/docs/modules/cross-attention/messages/en.json b/src/content/docs/modules/cross-attention/messages/en.json index 12b0455c..05496db1 100644 --- a/src/content/docs/modules/cross-attention/messages/en.json +++ b/src/content/docs/modules/cross-attention/messages/en.json @@ -21,7 +21,7 @@ }, "comparedToNearbyModules": { "title": "Compared To Nearby Modules", - "body": "Compared with the broader attention overview, cross-attention is one specific memory-source pattern. Compared with multi-head attention, it can still use multiple heads but changes where keys and values come from. Compared with causal and bidirectional attention, the main contrast is not the mask direction but the fact that the model reads from a different sequence." + "body": "Compared with the broader attention overview, cross-attention is one specific memory-source pattern rather than the whole family. Compared with multi-head attention, it can still use multiple heads but changes where keys and values come from. Compared with causal attention, the key difference is not next-token masking: causal attention usually still reads the same growing sequence, while cross-attention reads a different memory and can appear inside a causal decoder block. Compared with bidirectional attention, cross-attention again changes the memory source rather than simply opening left and right context inside one sequence." }, "exampleArchitectures": { "title": "Example Architectures", diff --git a/src/lib/content/cross-attention-module-page.test.ts b/src/lib/content/cross-attention-module-page.test.ts index 20424bd3..eda7f329 100644 --- a/src/lib/content/cross-attention-module-page.test.ts +++ b/src/lib/content/cross-attention-module-page.test.ts @@ -60,6 +60,12 @@ describe("loadModulePage cross-attention", () => { expect(html).toContain("Math Or Compute Schema"); expect(html).toContain("Compared To Nearby Modules"); expect(html).toContain("external memory slots"); + expect(html).toContain( + "usually still reads the same growing sequence, while", + ); + expect(html).toContain( + "again changes the memory source rather than simply opening left and right context", + ); expect(html).toContain('data-attention-variant-comparison="true"'); expect(html).toContain('data-attention-variant-active="cross"'); expect(html).toContain('data-attention-variant-option="self"'); @@ -74,6 +80,10 @@ describe("loadModulePage cross-attention", () => { expect(html).toContain('href="/docs/glossary/multimodal-model"'); expect(html).toContain('data-testid="curated-related-docs"'); expect((html.match(/data-testid="tag-pill-list"/g) ?? []).length).toBe(1); + expect(html).toContain('href="/tags/attention"'); + expect(html).toContain('data-testid="citation-list"'); + expect(html).toContain("Vaswani, Ashish"); + expect(html).toContain('href="https://arxiv.org/abs/1706.03762"'); expect(html).not.toContain("Reader Shortcut"); expect(html).not.toContain('aria-label="Module metadata"'); }); From e9e0128450bcbd9e5e66747bd18702eb86d2d7c0 Mon Sep 17 00:00:00 2001 From: aabdi Date: Sun, 21 Jun 2026 10:01:11 +0700 Subject: [PATCH 4/9] feat: [cross-attention-module-page-004] - [Add focused validation for the page contract and discovery behavior] --- .../cross-attention-module-page.test.ts | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/src/lib/content/cross-attention-module-page.test.ts b/src/lib/content/cross-attention-module-page.test.ts index eda7f329..9d406ff2 100644 --- a/src/lib/content/cross-attention-module-page.test.ts +++ b/src/lib/content/cross-attention-module-page.test.ts @@ -11,7 +11,12 @@ import { import { MODULES_DOCS_ROOT } from "@/lib/content/content-paths"; import { expectGlossaryBodyOmitsTitleHeading } from "@/lib/content/glossary-test-helpers"; import { loadModulePage } from "@/lib/content/module-page"; +import { loadPublishedDocsPages } from "@/lib/content/pages"; +import { PUBLISHED_DOCS_REGISTRY_IDS } from "@/lib/content/published-docs-registry-ids"; +import { loadRegistry } from "@/lib/content/registry"; +import { getModuleById } from "@/lib/content/registry-runtime"; import { pageMessagesSchema } from "@/lib/content/schemas"; +import { buildSearchDocuments } from "@/lib/search/build-documents"; const pageDir = join(MODULES_DOCS_ROOT, "cross-attention"); const messagesPath = join(pageDir, "messages/en.json"); @@ -112,3 +117,41 @@ describe("cross-attention page assets", () => { expect(validatePageAssetReferences(assets, messages)).toEqual([]); }); }); + +describe("cross-attention published discovery contract", () => { + test("keeps the canonical route discoverable through the published docs bundle and search documents", async () => { + const record = getModuleById("module.cross-attention"); + if (!record) { + throw new Error("expected module.cross-attention in registry runtime"); + } + + expect(record.status).toBe("published"); + expect(PUBLISHED_DOCS_REGISTRY_IDS.has(record.id)).toBe(true); + + const pages = await loadPublishedDocsPages("en"); + const registry = await loadRegistry(); + const documents = buildSearchDocuments(pages, registry); + const document = documents.find( + (entry) => entry.url === "/docs/modules/cross-attention", + ); + + expect(document).toBeDefined(); + expect(document?.kind).toBe("module"); + expect(document?.aliases).toEqual( + expect.arrayContaining([ + "cross attention", + "cross-attention", + "encoder-decoder attention", + ]), + ); + expect(document?.tags).toContain("attention"); + expect(document?.relatedIds).toEqual( + expect.arrayContaining([ + "module.attention", + "module.multi-head-attention", + "concept.encoder-decoder", + "concept.multimodal-model", + ]), + ); + }); +}); From 5eeefb32d9c3ad9d03c98d06191f58831df92c54 Mon Sep 17 00:00:00 2001 From: aabdi Date: Sun, 21 Jun 2026 10:27:31 +0700 Subject: [PATCH 5/9] feat: [cross-attention-module-page-002] - [Publish the canonical cross-attention page with the required teaching aids] --- src/app/docs/docs-slug-renderer.tsx | 7 +++ .../docs/modules/cross-attention/assets.json | 3 +- .../modules/cross-attention/messages/en.json | 1 + .../docs/components/DocsFoldedSummary.tsx | 15 +++++++ .../cross-attention-module-page.test.ts | 15 +++++++ .../content/graph-registry-runtime.test.ts | 31 +++++++++++++ ...ery-attention-module-shell-chrome.test.tsx | 9 ++-- src/lib/content/module-shell-render.tsx | 8 ++++ .../page-shell-folded-summary.test.tsx | 7 +-- src/lib/source.test.ts | 45 +++++++++++++++++++ .../content/attention-tag-landing.test.ts | 1 + 11 files changed, 135 insertions(+), 7 deletions(-) create mode 100644 src/features/docs/components/DocsFoldedSummary.tsx diff --git a/src/app/docs/docs-slug-renderer.tsx b/src/app/docs/docs-slug-renderer.tsx index 437a0027..3fafa73c 100644 --- a/src/app/docs/docs-slug-renderer.tsx +++ b/src/app/docs/docs-slug-renderer.tsx @@ -7,6 +7,7 @@ import { import { createRelativeLink } from "fumadocs-ui/mdx"; import { notFound } from "next/navigation"; import { DocsAutoLinkedDescription } from "@/features/docs/components/DocsAutoLinkedDescription"; +import { DocsFoldedSummary } from "@/features/docs/components/DocsFoldedSummary"; import { DocsPageBreadcrumb } from "@/features/docs/components/DocsPageBreadcrumb"; import { ModulePageProviders } from "@/features/docs/components/ModulePageProviders"; import { @@ -75,6 +76,12 @@ async function renderLocalDocsPage( /> {loadedPage.messages.title} {description} + {localRef.section !== "glossary" && + loadedPage.messages.openingSummary?.length ? ( + + {loadedPage.messages.openingSummary} + + ) : null}
{loadedPage.content} diff --git a/src/content/docs/modules/cross-attention/assets.json b/src/content/docs/modules/cross-attention/assets.json index 1cfd85cb..595f7fa4 100644 --- a/src/content/docs/modules/cross-attention/assets.json +++ b/src/content/docs/modules/cross-attention/assets.json @@ -16,7 +16,8 @@ ], "webRenderer": "react-flow", "printRenderer": "mermaid", - "altKey": "assets.computeFlow.alt" + "altKey": "assets.computeFlow.alt", + "captionKey": "assets.computeFlow.caption" }, "comparisonTable": { "type": "table", diff --git a/src/content/docs/modules/cross-attention/messages/en.json b/src/content/docs/modules/cross-attention/messages/en.json index 05496db1..13400ff1 100644 --- a/src/content/docs/modules/cross-attention/messages/en.json +++ b/src/content/docs/modules/cross-attention/messages/en.json @@ -48,6 +48,7 @@ "assets": { "computeFlow": { "alt": "Self-attention versus cross-attention memory source comparison", + "caption": "Cross-attention keeps the query on the active target stream while keys and values stay on a separate memory source, unlike self-attention where all three come from the same sequence.", "variants": { "self": { "label": "Self-attention" diff --git a/src/features/docs/components/DocsFoldedSummary.tsx b/src/features/docs/components/DocsFoldedSummary.tsx new file mode 100644 index 00000000..eaf58af4 --- /dev/null +++ b/src/features/docs/components/DocsFoldedSummary.tsx @@ -0,0 +1,15 @@ +import type { ReactNode } from "react"; + +export function DocsFoldedSummary({ children }: { children: ReactNode }) { + return ( +
+

{children}

+
+ ); +} diff --git a/src/lib/content/cross-attention-module-page.test.ts b/src/lib/content/cross-attention-module-page.test.ts index 9d406ff2..25beb8a7 100644 --- a/src/lib/content/cross-attention-module-page.test.ts +++ b/src/lib/content/cross-attention-module-page.test.ts @@ -11,6 +11,7 @@ import { import { MODULES_DOCS_ROOT } from "@/lib/content/content-paths"; import { expectGlossaryBodyOmitsTitleHeading } from "@/lib/content/glossary-test-helpers"; import { loadModulePage } from "@/lib/content/module-page"; +import { renderModuleDocsShell } from "@/lib/content/module-shell-render"; import { loadPublishedDocsPages } from "@/lib/content/pages"; import { PUBLISHED_DOCS_REGISTRY_IDS } from "@/lib/content/published-docs-registry-ids"; import { loadRegistry } from "@/lib/content/registry"; @@ -78,6 +79,9 @@ describe("loadModulePage cross-attention", () => { expect(html).toContain( 'data-graph-id="graph.cross-attention-memory-pattern"', ); + expect(html).toContain( + "Cross-attention keeps the query on the active target stream while keys and values stay on a separate memory source", + ); expect(html).toContain('data-graph-node-id="cross-time-kv-s-2"'); expect(html).toContain('data-table-id="table.cross-attention-comparison"'); expect(html).toContain("Where keys and values come from"); @@ -92,6 +96,16 @@ describe("loadModulePage cross-attention", () => { expect(html).not.toContain("Reader Shortcut"); expect(html).not.toContain('aria-label="Module metadata"'); }); + + test("renders the opening summary in the shared docs shell", async () => { + const page = await loadModulePage("cross-attention"); + const html = renderModuleDocsShell(page); + + expect(html).toContain('data-testid="folded-summary"'); + expect(html).toContain( + "lets one sequence ask questions of a different sequence or memory bank", + ); + }); }); describe("cross-attention page assets", () => { @@ -106,6 +120,7 @@ describe("cross-attention page assets", () => { expect(assets.computeFlow.type).toBe("attention-variant-graph"); if (assets.computeFlow.type === "attention-variant-graph") { expect(assets.computeFlow.defaultVariantId).toBe("cross"); + expect(assets.computeFlow.captionKey).toBe("assets.computeFlow.caption"); expect( assets.computeFlow.variants.map((variant) => variant.graphId), ).toEqual([ diff --git a/src/lib/content/graph-registry-runtime.test.ts b/src/lib/content/graph-registry-runtime.test.ts index 3731bc7c..b8c02638 100644 --- a/src/lib/content/graph-registry-runtime.test.ts +++ b/src/lib/content/graph-registry-runtime.test.ts @@ -45,6 +45,37 @@ describe("graph-registry-runtime", () => { expect( records.find((record) => record.id === CANONICAL_GRAPH_ID)?.subjectId, ).toBe("model.gpt-3"); + expect(records.length).toBe(50); + expect(records.map((record) => record.id)).toContain( + "graph.bpe-compute-flow", + ); + expect(records.map((record) => record.id)).toContain( + "graph.sentencepiece-compute-flow", + ); + expect(records.map((record) => record.id)).toContain( + "graph.byte-level-tokenization-compute-flow", + ); + expect(records.map((record) => record.id)).toContain( + "graph.deepseek-v4-contribution", + ); + expect(records.map((record) => record.id)).toContain( + "graph.bidirectional-attention-time-pattern", + ); + expect(records.map((record) => record.id)).toContain( + "graph.deepseek-v4-flash-architecture", + ); + expect(records.map((record) => record.id)).toContain( + "graph.deepseek-v4-pro-architecture", + ); + expect(records.map((record) => record.id)).toContain( + "graph.expert-parallel-overlap-system-flow", + ); + expect(records.map((record) => record.id)).toContain( + "graph.routing-system-flow", + ); + expect(records.map((record) => record.id)).toContain( + "graph.dpo-training-flow", + ); }); test("uses registered records for lookup without adding override-only records to the bundled listing", () => { diff --git a/src/lib/content/grouped-query-attention-module-shell-chrome.test.tsx b/src/lib/content/grouped-query-attention-module-shell-chrome.test.tsx index 503e5473..55c39d91 100644 --- a/src/lib/content/grouped-query-attention-module-shell-chrome.test.tsx +++ b/src/lib/content/grouped-query-attention-module-shell-chrome.test.tsx @@ -59,7 +59,7 @@ describe("grouped-query-attention module shell chrome", () => { expect(raw).not.toMatch(/]*\/>\s*\n\s* { + test("/docs/modules/grouped-query-attention renders one shell title, folded summary, and At a glance before the first content section", async () => { const loadedPage = await loadLocalDocsPage({ section: "modules", slug: "grouped-query-attention", @@ -76,11 +76,12 @@ describe("grouped-query-attention module shell chrome", () => { ).toBe(1); expectGlossaryBodyOmitsTitleHeading(articleHtml, loadedPage.messages.title); expect(html).not.toContain('aria-label="Module metadata"'); - expect(html).not.toContain('data-testid="folded-summary"'); - expect(html).not.toContain('data-opening-summary="folded"'); + expect(html).toContain('data-testid="folded-summary"'); + expect(html).toContain('data-opening-summary="folded"'); expect(html).toContain('aria-label="At a glance"'); expect(assertGroupedQueryAttentionTitleConvergence(html)).toBeNull(); + const foldedSummaryIndex = html.indexOf('data-testid="folded-summary"'); const atAGlanceIndex = html.indexOf('aria-label="At a glance"'); const whatItIsIndex = html.indexOf('id="what-it-is"'); @@ -88,7 +89,9 @@ describe("grouped-query-attention module shell chrome", () => { expect(plainHtml).toContain( "Grouped-query attention (GQA) is an attention variant", ); + expect(foldedSummaryIndex).toBeGreaterThanOrEqual(0); expect(atAGlanceIndex).toBeGreaterThanOrEqual(0); + expect(atAGlanceIndex).toBeGreaterThan(foldedSummaryIndex); expect(whatItIsIndex).toBeGreaterThan(atAGlanceIndex); }); }); diff --git a/src/lib/content/module-shell-render.tsx b/src/lib/content/module-shell-render.tsx index c0b0cd22..cd15f3ee 100644 --- a/src/lib/content/module-shell-render.tsx +++ b/src/lib/content/module-shell-render.tsx @@ -2,6 +2,7 @@ import { DocsDescription, DocsTitle } from "fumadocs-ui/layouts/docs/page"; import type { ReactNode } from "react"; import { createElement } from "react"; import { renderToStaticMarkup } from "react-dom/server"; +import { DocsFoldedSummary } from "@/features/docs/components/DocsFoldedSummary"; import { ModulePageProviders } from "@/features/docs/components/ModulePageProviders"; import type { LoadedModulePage } from "@/lib/content/module-page-load"; @@ -16,6 +17,13 @@ export function renderModuleDocsShell( null, createElement(DocsTitle, null, loadedPage.messages.title), createElement(DocsDescription, null, loadedPage.messages.description), + loadedPage.messages.openingSummary + ? createElement( + DocsFoldedSummary, + null, + loadedPage.messages.openingSummary, + ) + : null, createElement(ModulePageProviders, { messages: loadedPage.messages, assets: loadedPage.assets, diff --git a/src/lib/content/page-shell-folded-summary.test.tsx b/src/lib/content/page-shell-folded-summary.test.tsx index bb86534e..1d6788c1 100644 --- a/src/lib/content/page-shell-folded-summary.test.tsx +++ b/src/lib/content/page-shell-folded-summary.test.tsx @@ -4,16 +4,17 @@ import { renderModuleDocsShell } from "@/lib/content/module-shell-render"; describe("page shell opening layout", () => { for (const slug of ["grouped-query-attention", "attention"] as const) { - test(`${slug} module renders At a glance before the first content section without a folded summary block`, async () => { + test(`${slug} module renders the shell folded summary before At a glance and the first content section`, async () => { const page = await loadModulePage(slug); const html = renderModuleDocsShell(page); + const foldedSummaryIndex = html.indexOf('data-testid="folded-summary"'); const atAGlanceIndex = html.indexOf('aria-label="At a glance"'); const whatItIsIndex = html.indexOf('id="what-it-is"'); - expect(html).not.toContain('data-testid="folded-summary"'); - expect(html).not.toContain('data-opening-summary="folded"'); + expect(foldedSummaryIndex).toBeGreaterThanOrEqual(0); if (atAGlanceIndex >= 0) { + expect(foldedSummaryIndex).toBeLessThan(atAGlanceIndex); expect(atAGlanceIndex).toBeLessThan(whatItIsIndex); } expect(whatItIsIndex).toBeGreaterThanOrEqual(0); diff --git a/src/lib/source.test.ts b/src/lib/source.test.ts index c7e0b375..2be5c276 100644 --- a/src/lib/source.test.ts +++ b/src/lib/source.test.ts @@ -40,6 +40,51 @@ type GroupedSectionConfig
= { ) => SidebarGroupIdBySection[Section] | undefined; }; +const MODULE_INDEX_URLS = [ + "/docs/modules/absolute-positional-embeddings", + "/docs/modules/alibi", + "/docs/modules/attention", + "/docs/modules/batch-norm", + "/docs/modules/bpe", + "/docs/modules/bidirectional-attention", + "/docs/modules/byte-level-tokenization", + "/docs/modules/compressed-sparse-attention", + "/docs/modules/cross-attention", + "/docs/modules/deepseekmoe", + "/docs/modules/feed-forward-network", + "/docs/modules/group-norm", + "/docs/modules/grouped-query-attention", + "/docs/modules/heavily-compressed-attention", + "/docs/modules/layer-norm", + "/docs/modules/leaky-relu", + "/docs/modules/learned-positional-embeddings", + "/docs/modules/linear-attention", + "/docs/modules/longrope", + "/docs/modules/manifold-constrained-hyper-connections", + "/docs/modules/mixture-of-experts", + "/docs/modules/multi-head-attention", + "/docs/modules/multi-head-latent-attention", + "/docs/modules/multi-query-attention", + "/docs/modules/nope", + "/docs/modules/ntk-aware-rope-scaling", + "/docs/modules/positional-interpolation", + "/docs/modules/qk-norm", + "/docs/modules/relu", + "/docs/modules/relative-position-bias", + "/docs/modules/rmsnorm", + "/docs/modules/rope", + "/docs/modules/sentencepiece", + "/docs/modules/silu", + "/docs/modules/sinusoidal-positional-embeddings", + "/docs/modules/sliding-window-attention", + "/docs/modules/sparse-attention", + "/docs/modules/standard-ffn", + "/docs/modules/superhot-rope", + "/docs/modules/swiglu", + "/docs/modules/t5-relative-position-bias", + "/docs/modules/yarn", +] as const; + const GROUPED_SECTION_CONFIGS = { glossary: { section: "glossary", diff --git a/src/tests/content/attention-tag-landing.test.ts b/src/tests/content/attention-tag-landing.test.ts index 3554c151..5daac17a 100644 --- a/src/tests/content/attention-tag-landing.test.ts +++ b/src/tests/content/attention-tag-landing.test.ts @@ -17,6 +17,7 @@ const REPRESENTATIVE_ATTENTION_GROUP_URLS = { model: ["/docs/models/gpt-3"], module: [ "/docs/modules/attention", + "/docs/modules/cross-attention", "/docs/modules/grouped-query-attention", "/docs/modules/local-attention", "/docs/modules/sliding-window-attention", From e5f07c06c55b281cb8a075cfbc5a50e44139cceb Mon Sep 17 00:00:00 2001 From: aabdi Date: Sun, 21 Jun 2026 10:32:51 +0700 Subject: [PATCH 6/9] feat: [cross-attention-module-page-004] - [Add focused validation for the page contract and discovery behavior] --- src/lib/content/registry-runtime.test.ts | 7 ++-- src/lib/source.test.ts | 45 ------------------------ 2 files changed, 4 insertions(+), 48 deletions(-) diff --git a/src/lib/content/registry-runtime.test.ts b/src/lib/content/registry-runtime.test.ts index 8cc3d046..33e493d6 100644 --- a/src/lib/content/registry-runtime.test.ts +++ b/src/lib/content/registry-runtime.test.ts @@ -268,13 +268,14 @@ describe("registry-runtime", () => { ]); }); -<<<<<<< HEAD test("getRegistryCitationIds returns citations for causal attention", () => { expect(getRegistryCitationIds("module.causal-attention")).toEqual([ -======= + "citation.attention-is-all-you-need", + ]); + }); + test("getRegistryCitationIds returns citations for cross-attention", () => { expect(getRegistryCitationIds("module.cross-attention")).toEqual([ ->>>>>>> 3bc1d31f (feat: [cross-attention-module-page-001] - [Register cross-attention as a first-class attention module]) "citation.attention-is-all-you-need", ]); }); diff --git a/src/lib/source.test.ts b/src/lib/source.test.ts index 2be5c276..c7e0b375 100644 --- a/src/lib/source.test.ts +++ b/src/lib/source.test.ts @@ -40,51 +40,6 @@ type GroupedSectionConfig
= { ) => SidebarGroupIdBySection[Section] | undefined; }; -const MODULE_INDEX_URLS = [ - "/docs/modules/absolute-positional-embeddings", - "/docs/modules/alibi", - "/docs/modules/attention", - "/docs/modules/batch-norm", - "/docs/modules/bpe", - "/docs/modules/bidirectional-attention", - "/docs/modules/byte-level-tokenization", - "/docs/modules/compressed-sparse-attention", - "/docs/modules/cross-attention", - "/docs/modules/deepseekmoe", - "/docs/modules/feed-forward-network", - "/docs/modules/group-norm", - "/docs/modules/grouped-query-attention", - "/docs/modules/heavily-compressed-attention", - "/docs/modules/layer-norm", - "/docs/modules/leaky-relu", - "/docs/modules/learned-positional-embeddings", - "/docs/modules/linear-attention", - "/docs/modules/longrope", - "/docs/modules/manifold-constrained-hyper-connections", - "/docs/modules/mixture-of-experts", - "/docs/modules/multi-head-attention", - "/docs/modules/multi-head-latent-attention", - "/docs/modules/multi-query-attention", - "/docs/modules/nope", - "/docs/modules/ntk-aware-rope-scaling", - "/docs/modules/positional-interpolation", - "/docs/modules/qk-norm", - "/docs/modules/relu", - "/docs/modules/relative-position-bias", - "/docs/modules/rmsnorm", - "/docs/modules/rope", - "/docs/modules/sentencepiece", - "/docs/modules/silu", - "/docs/modules/sinusoidal-positional-embeddings", - "/docs/modules/sliding-window-attention", - "/docs/modules/sparse-attention", - "/docs/modules/standard-ffn", - "/docs/modules/superhot-rope", - "/docs/modules/swiglu", - "/docs/modules/t5-relative-position-bias", - "/docs/modules/yarn", -] as const; - const GROUPED_SECTION_CONFIGS = { glossary: { section: "glossary", From a0239e42f111cf044bd02316f52be7226e49b7b8 Mon Sep 17 00:00:00 2001 From: aabdi Date: Sun, 21 Jun 2026 10:40:32 +0700 Subject: [PATCH 7/9] feat: [cross-attention-module-page-004] - [Add focused validation for the page contract and discovery behavior] --- src/app/docs/docs-slug-renderer.tsx | 10 +++++++++- src/lib/content/graph-registry-runtime.test.ts | 4 +++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/app/docs/docs-slug-renderer.tsx b/src/app/docs/docs-slug-renderer.tsx index 3fafa73c..11cbd884 100644 --- a/src/app/docs/docs-slug-renderer.tsx +++ b/src/app/docs/docs-slug-renderer.tsx @@ -18,9 +18,13 @@ import { isDocsPageShippedForLocale } from "@/lib/content/pages"; import { loadUiMessages } from "@/lib/content/ui-messages"; import { defaultLocale, type SiteLocale } from "@/lib/i18n/locale-routing"; import { localizedRouteAlternates } from "@/lib/i18n/route-locale"; -import { source } from "@/lib/source"; import { getMDXComponents } from "../../../mdx-components"; +async function loadDocsSource() { + const { source } = await import("@/lib/source"); + return source; +} + function buildDocsPageAlternates(docsSlug: string) { const alternates = localizedRouteAlternates({ surface: "docs-page", @@ -47,6 +51,7 @@ async function renderLocalDocsPage( return null; } + const source = await loadDocsSource(); const page = source.getPage(slug); if (!page) { return null; @@ -106,6 +111,7 @@ export async function renderDocsSlugPage( return localPage; } + const source = await loadDocsSource(); const page = source.getPage(slug); if (!page) { @@ -152,6 +158,7 @@ export async function buildDocsPageMetadata( const localRef = parseLocalDocsPageRef(slug); if (localRef) { + const source = await loadDocsSource(); const page = source.getPage(slug); if (page && docsSlug) { const loadedPage = await loadLocalDocsPage(localRef, locale); @@ -163,6 +170,7 @@ export async function buildDocsPageMetadata( } } + const source = await loadDocsSource(); const page = source.getPage(slug); if (!page) { diff --git a/src/lib/content/graph-registry-runtime.test.ts b/src/lib/content/graph-registry-runtime.test.ts index b8c02638..54af3985 100644 --- a/src/lib/content/graph-registry-runtime.test.ts +++ b/src/lib/content/graph-registry-runtime.test.ts @@ -45,10 +45,12 @@ describe("graph-registry-runtime", () => { expect( records.find((record) => record.id === CANONICAL_GRAPH_ID)?.subjectId, ).toBe("model.gpt-3"); - expect(records.length).toBe(50); expect(records.map((record) => record.id)).toContain( "graph.bpe-compute-flow", ); + expect(records.map((record) => record.id)).toContain( + "graph.cross-attention-memory-pattern", + ); expect(records.map((record) => record.id)).toContain( "graph.sentencepiece-compute-flow", ); From 8c2d6060f942a2cab5b48fc8943a27c3973229c5 Mon Sep 17 00:00:00 2001 From: aabdi Date: Sun, 21 Jun 2026 10:56:56 +0700 Subject: [PATCH 8/9] feat: [cross-attention-module-page-001] - [Register cross-attention as a first-class attention module] --- src/content/registry/modules/cross-attention.json | 1 + src/lib/content/cross-attention-discovery.test.ts | 2 ++ src/lib/content/registry-runtime.test.ts | 1 + 3 files changed, 4 insertions(+) diff --git a/src/content/registry/modules/cross-attention.json b/src/content/registry/modules/cross-attention.json index 4aaeadcf..c8897659 100644 --- a/src/content/registry/modules/cross-attention.json +++ b/src/content/registry/modules/cross-attention.json @@ -14,6 +14,7 @@ "relatedIds": [ "module.attention", "module.multi-head-attention", + "module.causal-attention", "module.bidirectional-attention", "concept.transformer-architecture", "concept.encoder-decoder", diff --git a/src/lib/content/cross-attention-discovery.test.ts b/src/lib/content/cross-attention-discovery.test.ts index a8eddef6..b83dffd7 100644 --- a/src/lib/content/cross-attention-discovery.test.ts +++ b/src/lib/content/cross-attention-discovery.test.ts @@ -22,6 +22,7 @@ describe("cross-attention discovery wiring", () => { expect(items.map((item) => item.registryId)).toEqual([ "module.attention", "module.multi-head-attention", + "module.causal-attention", "module.bidirectional-attention", "concept.transformer-architecture", "concept.encoder-decoder", @@ -30,6 +31,7 @@ describe("cross-attention discovery wiring", () => { expect(items.map((item) => item.href)).toEqual([ "/docs/modules/attention", "/docs/modules/multi-head-attention", + "/docs/modules/causal-attention", "/docs/modules/bidirectional-attention", "/docs/concepts/transformer-architecture", "/docs/glossary/encoder-decoder", diff --git a/src/lib/content/registry-runtime.test.ts b/src/lib/content/registry-runtime.test.ts index 33e493d6..5042a29c 100644 --- a/src/lib/content/registry-runtime.test.ts +++ b/src/lib/content/registry-runtime.test.ts @@ -185,6 +185,7 @@ describe("registry-runtime", () => { expect(record?.relatedIds).toEqual([ "module.attention", "module.multi-head-attention", + "module.causal-attention", "module.bidirectional-attention", "concept.transformer-architecture", "concept.encoder-decoder", From 347a4a24000b900955fe8cf4c5a026ffc62aabc0 Mon Sep 17 00:00:00 2001 From: aabdi Date: Sun, 21 Jun 2026 13:23:50 +0700 Subject: [PATCH 9/9] feat: [cross-attention-module-page-003] - [Compare cross-attention against nearby attention variants and link onward paths] --- .../docs/modules/cross-attention/messages/en.json | 8 ++++++++ .../registry/tables/cross-attention-comparison.json | 9 +++++++++ src/lib/content/baseline-records.test.ts | 1 + src/lib/content/cross-attention-module-page.test.ts | 7 +++++++ 4 files changed, 25 insertions(+) diff --git a/src/content/docs/modules/cross-attention/messages/en.json b/src/content/docs/modules/cross-attention/messages/en.json index 13400ff1..e026f0d1 100644 --- a/src/content/docs/modules/cross-attention/messages/en.json +++ b/src/content/docs/modules/cross-attention/messages/en.json @@ -72,6 +72,9 @@ "mha": { "title": "Multi-Head Attention" }, + "causal": { + "title": "Causal Attention" + }, "bidirectional": { "title": "Bidirectional Attention" } @@ -97,6 +100,11 @@ "memorySource": "The same sequence, split across multiple heads", "mainUse": "Baseline dense attention inside Transformer blocks" }, + "causal": { + "querySource": "The active generated prefix up to the current token", + "memorySource": "The same growing sequence, but future positions are masked out", + "mainUse": "Autoregressive decoding where each token predicts the next one without seeing future tokens" + }, "bidirectional": { "querySource": "The visible sequence being encoded", "memorySource": "The same visible sequence with left and right context available", diff --git a/src/content/registry/tables/cross-attention-comparison.json b/src/content/registry/tables/cross-attention-comparison.json index d376c350..7c3bcefe 100644 --- a/src/content/registry/tables/cross-attention-comparison.json +++ b/src/content/registry/tables/cross-attention-comparison.json @@ -14,6 +14,10 @@ "moduleId": "module.multi-head-attention", "titleKey": "tables.comparison.columns.mha.title" }, + { + "moduleId": "module.causal-attention", + "titleKey": "tables.comparison.columns.causal.title" + }, { "moduleId": "module.bidirectional-attention", "titleKey": "tables.comparison.columns.bidirectional.title" @@ -49,6 +53,11 @@ "memorySource": "tables.comparison.values.mha.memorySource", "mainUse": "tables.comparison.values.mha.mainUse" }, + "module.causal-attention": { + "querySource": "tables.comparison.values.causal.querySource", + "memorySource": "tables.comparison.values.causal.memorySource", + "mainUse": "tables.comparison.values.causal.mainUse" + }, "module.bidirectional-attention": { "querySource": "tables.comparison.values.bidirectional.querySource", "memorySource": "tables.comparison.values.bidirectional.memorySource", diff --git a/src/lib/content/baseline-records.test.ts b/src/lib/content/baseline-records.test.ts index ba5b2368..963b2676 100644 --- a/src/lib/content/baseline-records.test.ts +++ b/src/lib/content/baseline-records.test.ts @@ -221,6 +221,7 @@ describe("Phase 1 baseline registry records", () => { expect(module.relatedIds).toEqual([ "module.attention", "module.multi-head-attention", + "module.causal-attention", "module.bidirectional-attention", "concept.transformer-architecture", "concept.encoder-decoder", diff --git a/src/lib/content/cross-attention-module-page.test.ts b/src/lib/content/cross-attention-module-page.test.ts index 25beb8a7..5fa29cb8 100644 --- a/src/lib/content/cross-attention-module-page.test.ts +++ b/src/lib/content/cross-attention-module-page.test.ts @@ -84,7 +84,11 @@ describe("loadModulePage cross-attention", () => { ); expect(html).toContain('data-graph-node-id="cross-time-kv-s-2"'); expect(html).toContain('data-table-id="table.cross-attention-comparison"'); + expect(html).toContain("Causal Attention"); expect(html).toContain("Where keys and values come from"); + expect(html).toContain( + "The same growing sequence, but future positions are masked out", + ); expect(html).toContain('href="/docs/glossary/encoder-decoder"'); expect(html).toContain('href="/docs/glossary/multimodal-model"'); expect(html).toContain('data-testid="curated-related-docs"'); @@ -130,6 +134,9 @@ describe("cross-attention page assets", () => { } expect(assets.comparisonTable.type).toBe("table"); expect(validatePageAssetReferences(assets, messages)).toEqual([]); + expect(messages.tables?.comparison?.columns?.causal?.title).toBe( + "Causal Attention", + ); }); });