diff --git a/src/app/docs/docs-slug-renderer.tsx b/src/app/docs/docs-slug-renderer.tsx index 437a0027..11cbd884 100644 --- a/src/app/docs/docs-slug-renderer.tsx +++ b/src/app/docs/docs-slug-renderer.tsx @@ -7,6 +7,7 @@ import { import { createRelativeLink } from "fumadocs-ui/mdx"; import { notFound } from "next/navigation"; import { DocsAutoLinkedDescription } from "@/features/docs/components/DocsAutoLinkedDescription"; +import { DocsFoldedSummary } from "@/features/docs/components/DocsFoldedSummary"; import { DocsPageBreadcrumb } from "@/features/docs/components/DocsPageBreadcrumb"; import { ModulePageProviders } from "@/features/docs/components/ModulePageProviders"; import { @@ -17,9 +18,13 @@ import { isDocsPageShippedForLocale } from "@/lib/content/pages"; import { loadUiMessages } from "@/lib/content/ui-messages"; import { defaultLocale, type SiteLocale } from "@/lib/i18n/locale-routing"; import { localizedRouteAlternates } from "@/lib/i18n/route-locale"; -import { source } from "@/lib/source"; import { getMDXComponents } from "../../../mdx-components"; +async function loadDocsSource() { + const { source } = await import("@/lib/source"); + return source; +} + function buildDocsPageAlternates(docsSlug: string) { const alternates = localizedRouteAlternates({ surface: "docs-page", @@ -46,6 +51,7 @@ async function renderLocalDocsPage( return null; } + const source = await loadDocsSource(); const page = source.getPage(slug); if (!page) { return null; @@ -75,6 +81,12 @@ async function renderLocalDocsPage( /> {loadedPage.messages.title} {description} + {localRef.section !== "glossary" && + loadedPage.messages.openingSummary?.length ? ( + + {loadedPage.messages.openingSummary} + + ) : null}
{loadedPage.content} @@ -99,6 +111,7 @@ export async function renderDocsSlugPage( return localPage; } + const source = await loadDocsSource(); const page = source.getPage(slug); if (!page) { @@ -145,6 +158,7 @@ export async function buildDocsPageMetadata( const localRef = parseLocalDocsPageRef(slug); if (localRef) { + const source = await loadDocsSource(); const page = source.getPage(slug); if (page && docsSlug) { const loadedPage = await loadLocalDocsPage(localRef, locale); @@ -156,6 +170,7 @@ export async function buildDocsPageMetadata( } } + const source = await loadDocsSource(); const page = source.getPage(slug); if (!page) { diff --git a/src/content/docs/modules/cross-attention/assets.json b/src/content/docs/modules/cross-attention/assets.json new file mode 100644 index 00000000..595f7fa4 --- /dev/null +++ b/src/content/docs/modules/cross-attention/assets.json @@ -0,0 +1,26 @@ +{ + "computeFlow": { + "type": "attention-variant-graph", + "defaultVariantId": "cross", + "variants": [ + { + "variantId": "self", + "graphId": "graph.multi-head-attention-time-pattern", + "labelKey": "assets.computeFlow.variants.self.label" + }, + { + "variantId": "cross", + "graphId": "graph.cross-attention-memory-pattern", + "labelKey": "assets.computeFlow.variants.cross.label" + } + ], + "webRenderer": "react-flow", + "printRenderer": "mermaid", + "altKey": "assets.computeFlow.alt", + "captionKey": "assets.computeFlow.caption" + }, + "comparisonTable": { + "type": "table", + "tableId": "table.cross-attention-comparison" + } +} diff --git a/src/content/docs/modules/cross-attention/messages/en.json b/src/content/docs/modules/cross-attention/messages/en.json new file mode 100644 index 00000000..e026f0d1 --- /dev/null +++ b/src/content/docs/modules/cross-attention/messages/en.json @@ -0,0 +1,199 @@ +{ + "title": "Cross-Attention", + "description": "An attention pattern where queries come from one stream while keys and values come from a different memory source.", + "openingSummary": "Cross-attention is the attention pattern that lets one sequence ask questions of a different sequence or memory bank, which is why decoder layers can read encoder outputs and multimodal models can fuse text with image or audio features.", + "sections": { + "whatItIs": { + "title": "What It Is", + "body": "Cross-attention is an attention variant in which the query vectors come from one stream, but the key and value vectors come from somewhere else. The model still performs a weighted lookup, but it no longer reads only from the same sequence that produced the query." + }, + "whyItExists": { + "title": "Why It Exists", + "body": "Self-attention is enough when one sequence only needs to mix information inside itself. Cross-attention solves the different problem of conditioning one stream on information stored elsewhere, such as a decoder reading an encoder memory or a text stack reading image features." + }, + "howItWorks": { + "title": "How It Works", + "body": "A target stream produces queries, while a separate source stream produces keys and values. Each query scores the external memory slots, turns those scores into weights with softmax, and blends the matching value vectors into a context vector for the target stream. The critical difference from self-attention is the memory source: the target tokens ask, but a different representation answers." + }, + "mathOrComputeSchema": { + "title": "Math Or Compute Schema", + "body": "The formulas below contrast self-attention with cross-attention. The weighted lookup is still scaled dot-product attention, but cross-attention changes which hidden states create the queries versus the keys and values." + }, + "comparedToNearbyModules": { + "title": "Compared To Nearby Modules", + "body": "Compared with the broader attention overview, cross-attention is one specific memory-source pattern rather than the whole family. Compared with multi-head attention, it can still use multiple heads but changes where keys and values come from. Compared with causal attention, the key difference is not next-token masking: causal attention usually still reads the same growing sequence, while cross-attention reads a different memory and can appear inside a causal decoder block. Compared with bidirectional attention, cross-attention again changes the memory source rather than simply opening left and right context inside one sequence." + }, + "exampleArchitectures": { + "title": "Example Architectures", + "body": "Cross-attention appears in encoder-decoder Transformers where decoder states read encoder outputs, in multimodal models where text queries image or audio features, and in retrieval-style systems where the active stream reads an external memory representation." + }, + "limitationsAndTradeoffs": { + "title": "Limitations And Tradeoffs", + "body": "Cross-attention adds another memory interface, which means extra projections, more tensors to keep available, and more places where weak source representations can hurt the result. If the external memory is noisy or badly aligned with the target stream, the lookup can pull in the wrong evidence." + }, + "whyItStillMatters": { + "title": "Why It Still Matters", + "body": "Many important model designs depend on one stream reading another without collapsing both into one shared sequence. A dedicated cross-attention page makes that bridge mechanism clear instead of leaving it implied inside larger architecture diagrams." + }, + "related": { + "title": "Related" + }, + "tags": { + "title": "Tags" + }, + "references": { + "title": "References" + } + }, + "assets": { + "computeFlow": { + "alt": "Self-attention versus cross-attention memory source comparison", + "caption": "Cross-attention keeps the query on the active target stream while keys and values stay on a separate memory source, unlike self-attention where all three come from the same sequence.", + "variants": { + "self": { + "label": "Self-attention" + }, + "cross": { + "label": "Cross-attention" + } + } + }, + "comparisonTable": {} + }, + "tables": { + "comparison": { + "columns": { + "cross": { + "title": "Cross-Attention" + }, + "attention": { + "title": "Attention Overview" + }, + "mha": { + "title": "Multi-Head Attention" + }, + "causal": { + "title": "Causal Attention" + }, + "bidirectional": { + "title": "Bidirectional Attention" + } + }, + "dimensions": { + "querySource": "Where queries come from", + "memorySource": "Where keys and values come from", + "mainUse": "Main use context" + }, + "values": { + "cross": { + "querySource": "The active target stream that needs outside information", + "memorySource": "A different source sequence or memory bank", + "mainUse": "Conditioning one stream on another, such as decoder-on-encoder or text-on-image" + }, + "attention": { + "querySource": "Depends on the attention pattern being discussed", + "memorySource": "May be the same sequence or a separate memory depending on the design", + "mainUse": "General weighted lookup across Transformer modules" + }, + "mha": { + "querySource": "The same sequence that also produces keys and values in standard self-attention", + "memorySource": "The same sequence, split across multiple heads", + "mainUse": "Baseline dense attention inside Transformer blocks" + }, + "causal": { + "querySource": "The active generated prefix up to the current token", + "memorySource": "The same growing sequence, but future positions are masked out", + "mainUse": "Autoregressive decoding where each token predicts the next one without seeing future tokens" + }, + "bidirectional": { + "querySource": "The visible sequence being encoded", + "memorySource": "The same visible sequence with left and right context available", + "mainUse": "Encoder-style full-context understanding" + } + } + } + }, + "math": { + "selfAttentionSchema": { + "label": "Self-attention", + "formula": "\\mathrm{Attention}(Q(X), K(X), V(X)) = \\mathrm{softmax}\\!\\left(\\frac{Q(X) K(X)^{\\top}}{\\sqrt{d_k}}\\right) V(X)", + "variableDefinitions": { + "x": { + "term": "X", + "definition": "Hidden states from the same source sequence." + }, + "q": { + "term": "Q(X)", + "definition": "Query projection of X." + }, + "k": { + "term": "K(X)", + "definition": "Key projection of X." + }, + "v": { + "term": "V(X)", + "definition": "Value projection of X." + }, + "dk": { + "term": "d_k", + "definition": "Key dimension per head." + } + } + }, + "crossAttentionSchema": { + "label": "Cross-attention", + "formula": "\\mathrm{Attention}(Q(Y), K(X), V(X)) = \\mathrm{softmax}\\!\\left(\\frac{Q(Y) K(X)^{\\top}}{\\sqrt{d_k}}\\right) V(X)", + "variableDefinitions": { + "x": { + "term": "X", + "definition": "Hidden states from the external memory source." + }, + "y": { + "term": "Y", + "definition": "Hidden states from the active target stream." + }, + "q": { + "term": "Q(Y)", + "definition": "Query projection of Y." + }, + "k": { + "term": "K(X)", + "definition": "Key projection of X." + }, + "v": { + "term": "V(X)", + "definition": "Value projection of X." + }, + "dk": { + "term": "d_k", + "definition": "Key dimension per head." + } + } + } + }, + "graph": { + "nodes": { + "timeCurrentQuery": { + "label": "q_t" + }, + "timeKv0": { + "label": "KV^X_0" + }, + "timeKv1": { + "label": "KV^X_1" + }, + "timeKvEllipsis": { + "label": "\\cdots" + }, + "timeKvT3": { + "label": "KV^X_{s-3}" + }, + "timeKvT2": { + "label": "KV^X_{s-2}" + }, + "timeKvT1": { + "label": "KV^X_{s-1}" + } + } + } +} diff --git a/src/content/docs/modules/cross-attention/page.mdx b/src/content/docs/modules/cross-attention/page.mdx new file mode 100644 index 00000000..34a5e29f --- /dev/null +++ b/src/content/docs/modules/cross-attention/page.mdx @@ -0,0 +1,85 @@ +--- +title: Cross-Attention +description: How cross-attention lets one sequence query a different memory source instead of reading only from itself. +kind: "module" +registryId: "module.cross-attention" +messageNamespace: "local" +assetNamespace: "local" +status: "published" +tags: + - attention +aliases: + - "cross attention" + - "cross-attention" + - "encoder-decoder attention" +updatedAt: "2026-06-21" +--- + +import { CitationList } from "@/features/docs/components/CitationList"; +import { RelatedDocs } from "@/features/docs/components/RelatedDocs"; +import { Section } from "@/features/docs/components/Section"; +import { T } from "@/features/docs/components/T"; +import { TagPillList } from "@/features/docs/components/TagPillList"; +import { ModuleComparisonTable } from "@/features/models/components/ModuleComparisonTable"; +import { ModuleGraph } from "@/features/models/components/ModuleGraph"; +import { ModuleAtAGlance } from "@/features/models/components/ModuleAtAGlance"; +import { ModuleAttentionSchemaComparison } from "@/features/models/components/ModuleAttentionSchemaComparison"; +import { ModelsUsingModule } from "@/features/models/components/ModelsUsingModule"; + + + + +
+ +
+ +
+ +
+ +
+ + + +
+ +
+ + +
+ +
+ + + +
+ +
+ + +
+ +
+ +
+ +
+ +
+ + + +
+ +
+ +
+ +
diff --git a/src/content/registry/graphs/cross-attention-memory-pattern.json b/src/content/registry/graphs/cross-attention-memory-pattern.json new file mode 100644 index 00000000..37559962 --- /dev/null +++ b/src/content/registry/graphs/cross-attention-memory-pattern.json @@ -0,0 +1,130 @@ +{ + "id": "graph.cross-attention-memory-pattern", + "slug": "cross-attention-memory-pattern", + "kind": "graph", + "defaultTitleKey": "title", + "defaultSummaryKey": "description", + "aliases": [], + "tags": ["attention"], + "relatedIds": [], + "citationIds": [], + "status": "published", + "createdAt": "2026-06-21T00:00:00.000Z", + "updatedAt": "2026-06-21T00:00:00.000Z", + "subjectId": "module.cross-attention", + "graphType": "attention-pattern", + "rootNodeId": "cross-time-current-query", + "layout": "vertical-expandable", + "defaultExpandedDepth": 1, + "supportedRenderers": ["react-flow", "mermaid"], + "nodes": [ + { + "id": "cross-time-current-query", + "labelKey": "graph.nodes.timeCurrentQuery.label", + "moduleKind": "attention", + "position": { "x": 360, "y": 0 }, + "headCountRole": "query", + "visualRole": "summary-node", + "childNodeIds": [ + "cross-time-kv-0", + "cross-time-kv-1", + "cross-time-kv-ellipsis", + "cross-time-kv-s-3", + "cross-time-kv-s-2", + "cross-time-kv-s-1" + ] + }, + { + "id": "cross-time-kv-0", + "labelKey": "graph.nodes.timeKv0.label", + "moduleKind": "cache", + "position": { "x": 0, "y": 220 }, + "headCountRole": "kv", + "visualRole": "timeline-node", + "childNodeIds": [] + }, + { + "id": "cross-time-kv-1", + "labelKey": "graph.nodes.timeKv1.label", + "moduleKind": "cache", + "position": { "x": 140, "y": 220 }, + "headCountRole": "kv", + "visualRole": "timeline-node", + "childNodeIds": [] + }, + { + "id": "cross-time-kv-ellipsis", + "labelKey": "graph.nodes.timeKvEllipsis.label", + "moduleKind": "cache", + "position": { "x": 280, "y": 220 }, + "headCountRole": "kv", + "visualRole": "timeline-node", + "childNodeIds": [] + }, + { + "id": "cross-time-kv-s-3", + "labelKey": "graph.nodes.timeKvT3.label", + "moduleKind": "cache", + "position": { "x": 420, "y": 220 }, + "headCountRole": "kv", + "visualRole": "timeline-node", + "childNodeIds": [] + }, + { + "id": "cross-time-kv-s-2", + "labelKey": "graph.nodes.timeKvT2.label", + "moduleKind": "cache", + "position": { "x": 560, "y": 220 }, + "headCountRole": "kv", + "visualRole": "timeline-node", + "childNodeIds": [] + }, + { + "id": "cross-time-kv-s-1", + "labelKey": "graph.nodes.timeKvT1.label", + "moduleKind": "cache", + "position": { "x": 700, "y": 220 }, + "headCountRole": "kv", + "visualRole": "timeline-node", + "childNodeIds": [] + } + ], + "edges": [ + { + "id": "cross-time-query-to-kv-0", + "source": "cross-time-current-query", + "target": "cross-time-kv-0", + "edgeKind": "data-flow" + }, + { + "id": "cross-time-query-to-kv-1", + "source": "cross-time-current-query", + "target": "cross-time-kv-1", + "edgeKind": "data-flow" + }, + { + "id": "cross-time-query-to-kv-ellipsis", + "source": "cross-time-current-query", + "target": "cross-time-kv-ellipsis", + "edgeKind": "data-flow" + }, + { + "id": "cross-time-query-to-kv-s-3", + "source": "cross-time-current-query", + "target": "cross-time-kv-s-3", + "edgeKind": "data-flow" + }, + { + "id": "cross-time-query-to-kv-s-2", + "source": "cross-time-current-query", + "target": "cross-time-kv-s-2", + "edgeKind": "data-flow" + }, + { + "id": "cross-time-query-to-kv-s-1", + "source": "cross-time-current-query", + "target": "cross-time-kv-s-1", + "edgeKind": "data-flow" + } + ] +} diff --git a/src/content/registry/modules/cross-attention.json b/src/content/registry/modules/cross-attention.json new file mode 100644 index 00000000..c8897659 --- /dev/null +++ b/src/content/registry/modules/cross-attention.json @@ -0,0 +1,57 @@ +{ + "id": "module.cross-attention", + "slug": "cross-attention", + "kind": "module", + "defaultTitleKey": "title", + "defaultSummaryKey": "description", + "aliases": [ + "cross attention", + "cross-attention", + "encoder-decoder attention", + "encoder decoder attention" + ], + "tags": ["attention"], + "relatedIds": [ + "module.attention", + "module.multi-head-attention", + "module.causal-attention", + "module.bidirectional-attention", + "concept.transformer-architecture", + "concept.encoder-decoder", + "concept.multimodal-model" + ], + "citationIds": ["citation.attention-is-all-you-need"], + "status": "published", + "createdAt": "2026-06-21T00:00:00.000Z", + "updatedAt": "2026-06-21T00:00:00.000Z", + "releaseDate": "2017-06-12", + "authors": [ + "Ashish Vaswani", + "Noam Shazeer", + "Niki Parmar", + "Jakob Uszkoreit", + "Llion Jones", + "Aidan N. Gomez", + "Lukasz Kaiser", + "Illia Polosukhin" + ], + "sourceId": "citation.attention-is-all-you-need", + "moduleType": "attention", + "moduleFamily": "attention", + "conceptType": "attention-variant", + "variantGroup": "attention-memory-sources", + "sidebarGrouping": { + "modules": "attention-foundations" + }, + "optimizes": [ + "cross-source-conditioning", + "multistream-context-fusion", + "encoder-decoder-context-bridging" + ], + "exampleModelIds": [], + "improvesOnIds": [], + "tradeoffIds": [], + "usedByModelIds": [], + "introducedByPaperIds": [], + "mathLevel": "light" +} diff --git a/src/content/registry/tables/cross-attention-comparison.json b/src/content/registry/tables/cross-attention-comparison.json new file mode 100644 index 00000000..7c3bcefe --- /dev/null +++ b/src/content/registry/tables/cross-attention-comparison.json @@ -0,0 +1,67 @@ +{ + "id": "table.cross-attention-comparison", + "subjectId": "module.cross-attention", + "columns": [ + { + "moduleId": "module.cross-attention", + "titleKey": "tables.comparison.columns.cross.title" + }, + { + "moduleId": "module.attention", + "titleKey": "tables.comparison.columns.attention.title" + }, + { + "moduleId": "module.multi-head-attention", + "titleKey": "tables.comparison.columns.mha.title" + }, + { + "moduleId": "module.causal-attention", + "titleKey": "tables.comparison.columns.causal.title" + }, + { + "moduleId": "module.bidirectional-attention", + "titleKey": "tables.comparison.columns.bidirectional.title" + } + ], + "dimensions": [ + { + "id": "querySource", + "labelKey": "tables.comparison.dimensions.querySource" + }, + { + "id": "memorySource", + "labelKey": "tables.comparison.dimensions.memorySource" + }, + { + "id": "mainUse", + "labelKey": "tables.comparison.dimensions.mainUse" + } + ], + "valueKeysByModuleId": { + "module.cross-attention": { + "querySource": "tables.comparison.values.cross.querySource", + "memorySource": "tables.comparison.values.cross.memorySource", + "mainUse": "tables.comparison.values.cross.mainUse" + }, + "module.attention": { + "querySource": "tables.comparison.values.attention.querySource", + "memorySource": "tables.comparison.values.attention.memorySource", + "mainUse": "tables.comparison.values.attention.mainUse" + }, + "module.multi-head-attention": { + "querySource": "tables.comparison.values.mha.querySource", + "memorySource": "tables.comparison.values.mha.memorySource", + "mainUse": "tables.comparison.values.mha.mainUse" + }, + "module.causal-attention": { + "querySource": "tables.comparison.values.causal.querySource", + "memorySource": "tables.comparison.values.causal.memorySource", + "mainUse": "tables.comparison.values.causal.mainUse" + }, + "module.bidirectional-attention": { + "querySource": "tables.comparison.values.bidirectional.querySource", + "memorySource": "tables.comparison.values.bidirectional.memorySource", + "mainUse": "tables.comparison.values.bidirectional.mainUse" + } + } +} diff --git a/src/features/docs/components/DocsFoldedSummary.tsx b/src/features/docs/components/DocsFoldedSummary.tsx new file mode 100644 index 00000000..eaf58af4 --- /dev/null +++ b/src/features/docs/components/DocsFoldedSummary.tsx @@ -0,0 +1,15 @@ +import type { ReactNode } from "react"; + +export function DocsFoldedSummary({ children }: { children: ReactNode }) { + return ( +
+

{children}

+
+ ); +} diff --git a/src/lib/content/baseline-records.test.ts b/src/lib/content/baseline-records.test.ts index d79725c4..963b2676 100644 --- a/src/lib/content/baseline-records.test.ts +++ b/src/lib/content/baseline-records.test.ts @@ -196,6 +196,42 @@ describe("Phase 1 baseline registry records", () => { expect(module.optimizes.length).toBeGreaterThan(0); }); + test("cross-attention module JSON passes moduleRecordSchema", async () => { + const module = await readRegistryJson( + "modules/cross-attention.json", + moduleRecordSchema, + ); + + expect(module.id).toBe("module.cross-attention"); + expect(module.kind).toBe("module"); + expect(module.status).toBe("published"); + expect(module.moduleType).toBe("attention"); + expect(module.moduleFamily).toBe("attention"); + expect(module.tags).toEqual(["attention"]); + expect(module.aliases).toEqual( + expect.arrayContaining([ + "cross attention", + "cross-attention", + "encoder-decoder attention", + "encoder decoder attention", + ]), + ); + expect(module.variantGroup).toBe("attention-memory-sources"); + expect(module.conceptType).toBe("attention-variant"); + expect(module.relatedIds).toEqual([ + "module.attention", + "module.multi-head-attention", + "module.causal-attention", + "module.bidirectional-attention", + "concept.transformer-architecture", + "concept.encoder-decoder", + "concept.multimodal-model", + ]); + expect(module.citationIds).toContain("citation.attention-is-all-you-need"); + expect(module.sidebarGrouping?.modules).toBe("attention-foundations"); + expect(module.optimizes.length).toBeGreaterThan(0); + }); + test("attention tag JSON passes tagRecordSchema", async () => { const tag = await readRegistryJson("tags/attention.json", tagRecordSchema); diff --git a/src/lib/content/cross-attention-discovery.test.ts b/src/lib/content/cross-attention-discovery.test.ts new file mode 100644 index 00000000..b83dffd7 --- /dev/null +++ b/src/lib/content/cross-attention-discovery.test.ts @@ -0,0 +1,42 @@ +import { describe, expect, test } from "bun:test"; +import { PUBLISHED_DOCS_REGISTRY_IDS } from "@/lib/content/published-docs-registry-ids"; +import { + getModuleById, + listRelatedRegistryRecords, +} from "@/lib/content/registry-runtime"; +import { deriveCuratedRelatedItems } from "@/lib/content/related-docs"; + +describe("cross-attention discovery wiring", () => { + test("cross-attention derives nearby published related docs in registry order", () => { + const source = getModuleById("module.cross-attention"); + if (!source) { + throw new Error("expected module.cross-attention in registry runtime"); + } + + const items = deriveCuratedRelatedItems( + source, + listRelatedRegistryRecords(), + PUBLISHED_DOCS_REGISTRY_IDS, + ); + + expect(items.map((item) => item.registryId)).toEqual([ + "module.attention", + "module.multi-head-attention", + "module.causal-attention", + "module.bidirectional-attention", + "concept.transformer-architecture", + "concept.encoder-decoder", + "concept.multimodal-model", + ]); + expect(items.map((item) => item.href)).toEqual([ + "/docs/modules/attention", + "/docs/modules/multi-head-attention", + "/docs/modules/causal-attention", + "/docs/modules/bidirectional-attention", + "/docs/concepts/transformer-architecture", + "/docs/glossary/encoder-decoder", + "/docs/glossary/multimodal-model", + ]); + expect(items.every((item) => item.isPlanned === false)).toBe(true); + }); +}); diff --git a/src/lib/content/cross-attention-module-page.test.ts b/src/lib/content/cross-attention-module-page.test.ts new file mode 100644 index 00000000..5fa29cb8 --- /dev/null +++ b/src/lib/content/cross-attention-module-page.test.ts @@ -0,0 +1,179 @@ +import { describe, expect, test } from "bun:test"; +import { readFileSync } from "node:fs"; +import { join } from "node:path"; +import { createElement } from "react"; +import { renderToStaticMarkup } from "react-dom/server"; +import { ModulePageProviders } from "@/features/docs/components/ModulePageProviders"; +import { + parsePageAssetConfig, + validatePageAssetReferences, +} from "@/lib/content/assets"; +import { MODULES_DOCS_ROOT } from "@/lib/content/content-paths"; +import { expectGlossaryBodyOmitsTitleHeading } from "@/lib/content/glossary-test-helpers"; +import { loadModulePage } from "@/lib/content/module-page"; +import { renderModuleDocsShell } from "@/lib/content/module-shell-render"; +import { loadPublishedDocsPages } from "@/lib/content/pages"; +import { PUBLISHED_DOCS_REGISTRY_IDS } from "@/lib/content/published-docs-registry-ids"; +import { loadRegistry } from "@/lib/content/registry"; +import { getModuleById } from "@/lib/content/registry-runtime"; +import { pageMessagesSchema } from "@/lib/content/schemas"; +import { buildSearchDocuments } from "@/lib/search/build-documents"; + +const pageDir = join(MODULES_DOCS_ROOT, "cross-attention"); +const messagesPath = join(pageDir, "messages/en.json"); +const assetsPath = join(pageDir, "assets.json"); + +describe("cross-attention page messages", () => { + test("includes the localized fields required by the module template", () => { + const messages = pageMessagesSchema.parse( + JSON.parse(readFileSync(messagesPath, "utf8")), + ); + + expect(messages.title).toBe("Cross-Attention"); + expect(messages.openingSummary?.length).toBeGreaterThan(0); + expect(messages.problemStatement).toBeUndefined(); + expect(messages.coreIdea).toBeUndefined(); + expect( + messages.sections?.mathOrComputeSchema?.body?.length, + ).toBeGreaterThan(0); + expect(messages.math?.selfAttentionSchema?.label).toBe("Self-attention"); + expect(messages.math?.crossAttentionSchema?.label).toBe("Cross-attention"); + expect(messages.math?.crossAttentionSchema?.formula).toContain("Q(Y)"); + expect(messages.math?.crossAttentionSchema?.formula).toContain("K(X)"); + }); +}); + +describe("loadModulePage cross-attention", () => { + test("renders the canonical module structure with separate-memory teaching aids", async () => { + const page = await loadModulePage("cross-attention"); + + expect(page.frontmatter.registryId).toBe("module.cross-attention"); + expect(page.frontmatter.messageNamespace).toBe("local"); + expect(page.frontmatter.assetNamespace).toBe("local"); + expect(page.messages.title).toBe("Cross-Attention"); + + const html = renderToStaticMarkup( + createElement(ModulePageProviders, { + messages: page.messages, + assets: page.assets, + // biome-ignore lint/correctness/noChildrenProp: third createElement arg conflicts with strict props typing + children: page.content, + }), + ); + + expectGlossaryBodyOmitsTitleHeading(html, page.messages.title); + expect(html).toContain("At a glance"); + expect(html).toContain("Math Or Compute Schema"); + expect(html).toContain("Compared To Nearby Modules"); + expect(html).toContain("external memory slots"); + expect(html).toContain( + "usually still reads the same growing sequence, while", + ); + expect(html).toContain( + "again changes the memory source rather than simply opening left and right context", + ); + expect(html).toContain('data-attention-variant-comparison="true"'); + expect(html).toContain('data-attention-variant-active="cross"'); + expect(html).toContain('data-attention-variant-option="self"'); + expect(html).toContain('data-attention-variant-option="cross"'); + expect(html).toContain( + 'data-graph-id="graph.cross-attention-memory-pattern"', + ); + expect(html).toContain( + "Cross-attention keeps the query on the active target stream while keys and values stay on a separate memory source", + ); + expect(html).toContain('data-graph-node-id="cross-time-kv-s-2"'); + expect(html).toContain('data-table-id="table.cross-attention-comparison"'); + expect(html).toContain("Causal Attention"); + expect(html).toContain("Where keys and values come from"); + expect(html).toContain( + "The same growing sequence, but future positions are masked out", + ); + expect(html).toContain('href="/docs/glossary/encoder-decoder"'); + expect(html).toContain('href="/docs/glossary/multimodal-model"'); + expect(html).toContain('data-testid="curated-related-docs"'); + expect((html.match(/data-testid="tag-pill-list"/g) ?? []).length).toBe(1); + expect(html).toContain('href="/tags/attention"'); + expect(html).toContain('data-testid="citation-list"'); + expect(html).toContain("Vaswani, Ashish"); + expect(html).toContain('href="https://arxiv.org/abs/1706.03762"'); + expect(html).not.toContain("Reader Shortcut"); + expect(html).not.toContain('aria-label="Module metadata"'); + }); + + test("renders the opening summary in the shared docs shell", async () => { + const page = await loadModulePage("cross-attention"); + const html = renderModuleDocsShell(page); + + expect(html).toContain('data-testid="folded-summary"'); + expect(html).toContain( + "lets one sequence ask questions of a different sequence or memory bank", + ); + }); +}); + +describe("cross-attention page assets", () => { + test("resolve graph and table assets with message-backed copy", () => { + const messages = pageMessagesSchema.parse( + JSON.parse(readFileSync(messagesPath, "utf8")), + ); + const assets = parsePageAssetConfig( + JSON.parse(readFileSync(assetsPath, "utf8")), + ); + + expect(assets.computeFlow.type).toBe("attention-variant-graph"); + if (assets.computeFlow.type === "attention-variant-graph") { + expect(assets.computeFlow.defaultVariantId).toBe("cross"); + expect(assets.computeFlow.captionKey).toBe("assets.computeFlow.caption"); + expect( + assets.computeFlow.variants.map((variant) => variant.graphId), + ).toEqual([ + "graph.multi-head-attention-time-pattern", + "graph.cross-attention-memory-pattern", + ]); + } + expect(assets.comparisonTable.type).toBe("table"); + expect(validatePageAssetReferences(assets, messages)).toEqual([]); + expect(messages.tables?.comparison?.columns?.causal?.title).toBe( + "Causal Attention", + ); + }); +}); + +describe("cross-attention published discovery contract", () => { + test("keeps the canonical route discoverable through the published docs bundle and search documents", async () => { + const record = getModuleById("module.cross-attention"); + if (!record) { + throw new Error("expected module.cross-attention in registry runtime"); + } + + expect(record.status).toBe("published"); + expect(PUBLISHED_DOCS_REGISTRY_IDS.has(record.id)).toBe(true); + + const pages = await loadPublishedDocsPages("en"); + const registry = await loadRegistry(); + const documents = buildSearchDocuments(pages, registry); + const document = documents.find( + (entry) => entry.url === "/docs/modules/cross-attention", + ); + + expect(document).toBeDefined(); + expect(document?.kind).toBe("module"); + expect(document?.aliases).toEqual( + expect.arrayContaining([ + "cross attention", + "cross-attention", + "encoder-decoder attention", + ]), + ); + expect(document?.tags).toContain("attention"); + expect(document?.relatedIds).toEqual( + expect.arrayContaining([ + "module.attention", + "module.multi-head-attention", + "concept.encoder-decoder", + "concept.multimodal-model", + ]), + ); + }); +}); diff --git a/src/lib/content/generated/table-registry.generated.ts b/src/lib/content/generated/table-registry.generated.ts index 6997843c..42887488 100644 --- a/src/lib/content/generated/table-registry.generated.ts +++ b/src/lib/content/generated/table-registry.generated.ts @@ -6,6 +6,7 @@ import bidirectionalAttentionComparisonTableRecord from "@/content/registry/tabl import bpeComparisonTableRecord from "@/content/registry/tables/bpe-comparison.json"; import byteLevelTokenizationComparisonTableRecord from "@/content/registry/tables/byte-level-tokenization-comparison.json"; import causalAttentionComparisonTableRecord from "@/content/registry/tables/causal-attention-comparison.json"; +import crossAttentionComparisonTableRecord from "@/content/registry/tables/cross-attention-comparison.json"; import feedForwardNetworkComparisonTableRecord from "@/content/registry/tables/feed-forward-network-comparison.json"; import geluComparisonTableRecord from "@/content/registry/tables/gelu-comparison.json"; import groupNormComparisonTableRecord from "@/content/registry/tables/group-norm-comparison.json"; @@ -36,6 +37,7 @@ export const generatedTableRegistrySourceFiles = [ "bpe-comparison.json", "byte-level-tokenization-comparison.json", "causal-attention-comparison.json", + "cross-attention-comparison.json", "feed-forward-network-comparison.json", "gelu-comparison.json", "group-norm-comparison.json", @@ -67,6 +69,7 @@ export const generatedTableRegistryPayloads = [ bpeComparisonTableRecord, byteLevelTokenizationComparisonTableRecord, causalAttentionComparisonTableRecord, + crossAttentionComparisonTableRecord, feedForwardNetworkComparisonTableRecord, geluComparisonTableRecord, groupNormComparisonTableRecord, diff --git a/src/lib/content/graph-registry-runtime.test.ts b/src/lib/content/graph-registry-runtime.test.ts index 3731bc7c..54af3985 100644 --- a/src/lib/content/graph-registry-runtime.test.ts +++ b/src/lib/content/graph-registry-runtime.test.ts @@ -45,6 +45,39 @@ describe("graph-registry-runtime", () => { expect( records.find((record) => record.id === CANONICAL_GRAPH_ID)?.subjectId, ).toBe("model.gpt-3"); + expect(records.map((record) => record.id)).toContain( + "graph.bpe-compute-flow", + ); + expect(records.map((record) => record.id)).toContain( + "graph.cross-attention-memory-pattern", + ); + expect(records.map((record) => record.id)).toContain( + "graph.sentencepiece-compute-flow", + ); + expect(records.map((record) => record.id)).toContain( + "graph.byte-level-tokenization-compute-flow", + ); + expect(records.map((record) => record.id)).toContain( + "graph.deepseek-v4-contribution", + ); + expect(records.map((record) => record.id)).toContain( + "graph.bidirectional-attention-time-pattern", + ); + expect(records.map((record) => record.id)).toContain( + "graph.deepseek-v4-flash-architecture", + ); + expect(records.map((record) => record.id)).toContain( + "graph.deepseek-v4-pro-architecture", + ); + expect(records.map((record) => record.id)).toContain( + "graph.expert-parallel-overlap-system-flow", + ); + expect(records.map((record) => record.id)).toContain( + "graph.routing-system-flow", + ); + expect(records.map((record) => record.id)).toContain( + "graph.dpo-training-flow", + ); }); test("uses registered records for lookup without adding override-only records to the bundled listing", () => { diff --git a/src/lib/content/grouped-query-attention-module-shell-chrome.test.tsx b/src/lib/content/grouped-query-attention-module-shell-chrome.test.tsx index 503e5473..55c39d91 100644 --- a/src/lib/content/grouped-query-attention-module-shell-chrome.test.tsx +++ b/src/lib/content/grouped-query-attention-module-shell-chrome.test.tsx @@ -59,7 +59,7 @@ describe("grouped-query-attention module shell chrome", () => { expect(raw).not.toMatch(/]*\/>\s*\n\s* { + test("/docs/modules/grouped-query-attention renders one shell title, folded summary, and At a glance before the first content section", async () => { const loadedPage = await loadLocalDocsPage({ section: "modules", slug: "grouped-query-attention", @@ -76,11 +76,12 @@ describe("grouped-query-attention module shell chrome", () => { ).toBe(1); expectGlossaryBodyOmitsTitleHeading(articleHtml, loadedPage.messages.title); expect(html).not.toContain('aria-label="Module metadata"'); - expect(html).not.toContain('data-testid="folded-summary"'); - expect(html).not.toContain('data-opening-summary="folded"'); + expect(html).toContain('data-testid="folded-summary"'); + expect(html).toContain('data-opening-summary="folded"'); expect(html).toContain('aria-label="At a glance"'); expect(assertGroupedQueryAttentionTitleConvergence(html)).toBeNull(); + const foldedSummaryIndex = html.indexOf('data-testid="folded-summary"'); const atAGlanceIndex = html.indexOf('aria-label="At a glance"'); const whatItIsIndex = html.indexOf('id="what-it-is"'); @@ -88,7 +89,9 @@ describe("grouped-query-attention module shell chrome", () => { expect(plainHtml).toContain( "Grouped-query attention (GQA) is an attention variant", ); + expect(foldedSummaryIndex).toBeGreaterThanOrEqual(0); expect(atAGlanceIndex).toBeGreaterThanOrEqual(0); + expect(atAGlanceIndex).toBeGreaterThan(foldedSummaryIndex); expect(whatItIsIndex).toBeGreaterThan(atAGlanceIndex); }); }); diff --git a/src/lib/content/module-shell-render.tsx b/src/lib/content/module-shell-render.tsx index c0b0cd22..cd15f3ee 100644 --- a/src/lib/content/module-shell-render.tsx +++ b/src/lib/content/module-shell-render.tsx @@ -2,6 +2,7 @@ import { DocsDescription, DocsTitle } from "fumadocs-ui/layouts/docs/page"; import type { ReactNode } from "react"; import { createElement } from "react"; import { renderToStaticMarkup } from "react-dom/server"; +import { DocsFoldedSummary } from "@/features/docs/components/DocsFoldedSummary"; import { ModulePageProviders } from "@/features/docs/components/ModulePageProviders"; import type { LoadedModulePage } from "@/lib/content/module-page-load"; @@ -16,6 +17,13 @@ export function renderModuleDocsShell( null, createElement(DocsTitle, null, loadedPage.messages.title), createElement(DocsDescription, null, loadedPage.messages.description), + loadedPage.messages.openingSummary + ? createElement( + DocsFoldedSummary, + null, + loadedPage.messages.openingSummary, + ) + : null, createElement(ModulePageProviders, { messages: loadedPage.messages, assets: loadedPage.assets, diff --git a/src/lib/content/page-shell-folded-summary.test.tsx b/src/lib/content/page-shell-folded-summary.test.tsx index bb86534e..1d6788c1 100644 --- a/src/lib/content/page-shell-folded-summary.test.tsx +++ b/src/lib/content/page-shell-folded-summary.test.tsx @@ -4,16 +4,17 @@ import { renderModuleDocsShell } from "@/lib/content/module-shell-render"; describe("page shell opening layout", () => { for (const slug of ["grouped-query-attention", "attention"] as const) { - test(`${slug} module renders At a glance before the first content section without a folded summary block`, async () => { + test(`${slug} module renders the shell folded summary before At a glance and the first content section`, async () => { const page = await loadModulePage(slug); const html = renderModuleDocsShell(page); + const foldedSummaryIndex = html.indexOf('data-testid="folded-summary"'); const atAGlanceIndex = html.indexOf('aria-label="At a glance"'); const whatItIsIndex = html.indexOf('id="what-it-is"'); - expect(html).not.toContain('data-testid="folded-summary"'); - expect(html).not.toContain('data-opening-summary="folded"'); + expect(foldedSummaryIndex).toBeGreaterThanOrEqual(0); if (atAGlanceIndex >= 0) { + expect(foldedSummaryIndex).toBeLessThan(atAGlanceIndex); expect(atAGlanceIndex).toBeLessThan(whatItIsIndex); } expect(whatItIsIndex).toBeGreaterThanOrEqual(0); diff --git a/src/lib/content/registry-runtime.test.ts b/src/lib/content/registry-runtime.test.ts index efc6dfe4..5042a29c 100644 --- a/src/lib/content/registry-runtime.test.ts +++ b/src/lib/content/registry-runtime.test.ts @@ -170,6 +170,30 @@ describe("registry-runtime", () => { expect(record?.citationIds).toEqual(["citation.kudo-sentencepiece"]); }); + test("getModuleById returns cross-attention with nearby architecture links", () => { + const record = getModuleById("module.cross-attention"); + expect(record?.slug).toBe("cross-attention"); + expect(record?.tags).toEqual(["attention"]); + expect(record?.aliases).toEqual( + expect.arrayContaining([ + "cross attention", + "cross-attention", + "encoder-decoder attention", + "encoder decoder attention", + ]), + ); + expect(record?.relatedIds).toEqual([ + "module.attention", + "module.multi-head-attention", + "module.causal-attention", + "module.bidirectional-attention", + "concept.transformer-architecture", + "concept.encoder-decoder", + "concept.multimodal-model", + ]); + expect(record?.variantGroup).toBe("attention-memory-sources"); + }); + test("getRegistryTags returns tags for a known module", () => { expect(getRegistryTags("module.grouped-query-attention")).toEqual([ "attention", @@ -195,6 +219,10 @@ describe("registry-runtime", () => { expect(getRegistryTags("module.causal-attention")).toEqual(["attention"]); }); + test("getRegistryTags returns tags for cross-attention", () => { + expect(getRegistryTags("module.cross-attention")).toEqual(["attention"]); + }); + test("getRegistryTags returns tags for a known concept", () => { expect(getRegistryTags("concept.token")).toEqual([ "attention", @@ -247,6 +275,12 @@ describe("registry-runtime", () => { ]); }); + test("getRegistryCitationIds returns citations for cross-attention", () => { + expect(getRegistryCitationIds("module.cross-attention")).toEqual([ + "citation.attention-is-all-you-need", + ]); + }); + test("MHA and MQA modules link attention overview and sibling variants", () => { expect(getModuleById("module.multi-head-attention")?.relatedIds).toEqual([ "module.attention", diff --git a/src/tests/content/attention-tag-landing.test.ts b/src/tests/content/attention-tag-landing.test.ts index 3554c151..5daac17a 100644 --- a/src/tests/content/attention-tag-landing.test.ts +++ b/src/tests/content/attention-tag-landing.test.ts @@ -17,6 +17,7 @@ const REPRESENTATIVE_ATTENTION_GROUP_URLS = { model: ["/docs/models/gpt-3"], module: [ "/docs/modules/attention", + "/docs/modules/cross-attention", "/docs/modules/grouped-query-attention", "/docs/modules/local-attention", "/docs/modules/sliding-window-attention",