diff --git a/src/content/docs/glossary/activation/messages/en.json b/src/content/docs/glossary/activation/messages/en.json index 2b157cdf..2ccf83eb 100644 --- a/src/content/docs/glossary/activation/messages/en.json +++ b/src/content/docs/glossary/activation/messages/en.json @@ -8,7 +8,7 @@ }, "whyItMatters": { "title": "Why It Matters", - "body": "Activations determine what information reaches the next layer and what backpropagation will differentiate. Memory planners track activation tensors during training because they are large but temporary, unlike checkpointed parameters." + "body": "Activations determine what information reaches the next layer and what backpropagation will differentiate. Memory planners track activation tensors during training because they are large but temporary, unlike checkpointed parameters. This page is the broad foundation for that idea; pages like ReLU, LeakyReLU, and SiLU zoom in on specific FFN activation choices, while SwiGLU shows how gating can turn that idea into a different FFN block shape." }, "simpleExample": { "title": "Simple Example", @@ -20,7 +20,7 @@ }, "commonConfusions": { "title": "Common Confusions", - "body": "A hidden activation is not the same as softmax: softmax turns vocabulary logits into a probability vector at the output head, while activations are internal layer outputs that may never be normalized across the vocabulary. Saying a “ReLU activation” refers to the nonlinearity applied inside a layer, not to the softmax step at decode time." + "body": "A hidden activation is not the same as softmax: softmax turns vocabulary logits into a probability vector at the output head, while activations are internal layer outputs that may never be normalized across the vocabulary. Saying a “ReLU activation” refers to one specific nonlinearity that shapes FFN activations, not to the softmax step at decode time. ReLU, LeakyReLU, and SiLU are specific activation choices; SwiGLU goes one step further and changes the FFN into a gated two-branch block." }, "related": { "title": "Related Concepts And Modules" diff --git a/src/content/docs/glossary/feed-forward-network/messages/en.json b/src/content/docs/glossary/feed-forward-network/messages/en.json index 00752b8a..572d1ac7 100644 --- a/src/content/docs/glossary/feed-forward-network/messages/en.json +++ b/src/content/docs/glossary/feed-forward-network/messages/en.json @@ -8,7 +8,7 @@ }, "whyItMatters": { "title": "Why It Matters", - "body": "Attention decides what each position can read from the sequence; the FFN decides how to transform what was read into richer features. Most transformer blocks alternate these two steps, so recognizing the FFN slot helps you read architecture diagrams and spot when a model swaps a dense MLP for a mixture-of-experts layer while keeping the block shape." + "body": "Attention decides what each position can read from the sequence; the FFN decides how to transform what was read into richer features. Most transformer blocks alternate these two steps, so recognizing the FFN slot helps you read architecture diagrams and spot when a model swaps a dense MLP for a mixture-of-experts layer while keeping the block shape. This page is the broad map of that slot; nearby pages like Standard FFN, ReLU, SiLU, and SwiGLU zoom in on one default block shape or one activation-driven variant inside it." }, "simpleExample": { "title": "Simple Example", @@ -16,7 +16,7 @@ }, "commonConfusions": { "title": "Common Confusions", - "body": "The FFN is not attention: it does not look at other tokens. It is also not the language-model head at the stack top—that head maps final hidden states to vocabulary logits. A mixture-of-experts layer replaces the dense FFN with routed expert MLPs but still sits in the same block slot after attention." + "body": "The FFN is not attention: it does not look at other tokens. It is also not the language-model head at the stack top—that head maps final hidden states to vocabulary logits. Standard FFN is the default dense version of this slot, while ReLU, LeakyReLU, and SiLU name different nonlinearities that can live inside it. SwiGLU changes the internal FFN shape with gating, and a mixture-of-experts layer replaces one shared dense path with routed expert MLPs while still sitting in the same block slot after attention." }, "related": { "title": "Related Concepts And Modules" diff --git a/src/content/docs/glossary/leaky-relu/messages/en.json b/src/content/docs/glossary/leaky-relu/messages/en.json index d993d4f9..285927a6 100644 --- a/src/content/docs/glossary/leaky-relu/messages/en.json +++ b/src/content/docs/glossary/leaky-relu/messages/en.json @@ -1,22 +1,22 @@ { "title": "LeakyReLU", - "description": "A ReLU-style activation that keeps a small negative slope instead of zeroing every negative FFN value.", + "description": "An FFN activation like ReLU that still lets a small negative signal pass instead of clamping all negative values to zero.", "sections": { "whatItIs": { "title": "What It Is", - "body": "LeakyReLU is a small change to ReLU. Positive hidden values pass through as usual, but negative values are multiplied by a small constant such as 0.01 instead of becoming zero. In a standard FFN after attention, that means a token can keep a weak negative signal rather than shutting the feature off completely." + "body": "LeakyReLU is a ReLU-style activation with a small slope on the negative side. Inside an FFN, positive hidden values still pass through normally, but negative values are shrunk rather than cut off entirely. The result is still a simple per-token hidden transform in the same FFN slot after attention." }, "whyItMatters": { "title": "Why It Matters", - "body": "The main reason to use LeakyReLU is to avoid losing all gradient signal on the negative side. A dense FFN with plain ReLU can leave some hidden units inactive for long stretches if they keep landing below zero. LeakyReLU softens that cutoff, so papers sometimes use it when they want ReLU-like behavior with a less brittle negative branch." + "body": "This variant changes one specific behavior relative to ReLU: negative responses do not disappear completely. That makes LeakyReLU useful as a comparison point when you want to understand what changes if an FFN keeps a faint negative signal alive instead of throwing it away. It stays a dense FFN choice, not a routing or architecture change." }, "simpleExample": { "title": "Simple Example", - "body": "If the hidden values are [-2.0, 0.7, 3.4] and the leak factor is 0.01, LeakyReLU produces [-0.02, 0.7, 3.4]. The FFN still runs the same expand then activate then project recipe; only the activation rule changes." + "body": "Suppose an FFN hidden vector contains -2, -0.3, 0.1, and 4. A LeakyReLU with a small negative slope might turn that into something like -0.02, -0.003, 0.1, and 4 before the block projects back down. The big idea is that the negative side still contributes a little instead of becoming exact zero." }, "commonConfusions": { "title": "Common Confusions", - "body": "LeakyReLU is not a gated FFN like SwiGLU and it is not a sparse router like mixture of experts. It stays inside the same dense standard FFN block. It also does not make negative values equally important as positive ones; the negative side is still much smaller, just not forced to zero." + "body": "LeakyReLU is still an activation inside a standard FFN, not a separate expert path or a new transformer layer. It differs from plain ReLU only in what happens to negative values. It also does not mean the model is using a gated FFN such as SwiGLU, because gating changes the FFN shape while LeakyReLU only changes the nonlinearity inside the usual dense path." }, "related": { "title": "Related Concepts And Modules" @@ -28,5 +28,5 @@ "title": "References" } }, - "openingSummary": "LeakyReLU keeps a small negative slope inside an FFN instead of clamping every negative hidden value to zero, so weak signals can still flow through the block." + "openingSummary": "LeakyReLU is a small variation on ReLU: it keeps the same FFN role, but negative hidden values leak through at a reduced scale instead of dropping to zero." } diff --git a/src/content/docs/glossary/leaky-relu/page.mdx b/src/content/docs/glossary/leaky-relu/page.mdx index 36eab020..eab4df02 100644 --- a/src/content/docs/glossary/leaky-relu/page.mdx +++ b/src/content/docs/glossary/leaky-relu/page.mdx @@ -1,6 +1,6 @@ --- title: LeakyReLU -description: A ReLU-style activation that keeps a small negative slope instead of zeroing every negative FFN value. +description: An FFN activation like ReLU that still lets a small negative signal pass instead of clamping all negative values to zero. kind: "glossary" registryId: "concept.leaky-relu" messageNamespace: "local" @@ -9,9 +9,9 @@ status: "published" tags: - foundations aliases: + - "LeakyReLU" - "leaky ReLU" - "leaky rectified linear unit" - - "Leaky ReLU activation" updatedAt: "2026-06-18" --- diff --git a/src/content/docs/glossary/mixture-of-experts/messages/en.json b/src/content/docs/glossary/mixture-of-experts/messages/en.json index 18f47c1a..41b3e79c 100644 --- a/src/content/docs/glossary/mixture-of-experts/messages/en.json +++ b/src/content/docs/glossary/mixture-of-experts/messages/en.json @@ -8,7 +8,7 @@ }, "whyItMatters": { "title": "Why It Matters", - "body": "MoE increases total parameter count while keeping compute per token roughly flat, because only a few experts activate per step. That tradeoff matters when scaling language models: you can add capacity without multiplying FLOPs for every position. Capacity limits, load balancing, and routing noise also shape training stability, so recognizing MoE helps you read model cards that list expert counts and top-k routing." + "body": "MoE increases total parameter count while keeping compute per token roughly flat, because only a few experts activate per step. That tradeoff matters when scaling language models: you can add capacity without multiplying FLOPs for every position. Capacity limits, load balancing, and routing noise also shape training stability, so recognizing MoE helps you read model cards that list expert counts and top-k routing. It also helps to see where MoE sits relative to the rest of the FFN family: it replaces the default dense block in the usual transformer slot rather than adding a whole new stage elsewhere." }, "simpleExample": { "title": "Simple Example", @@ -16,7 +16,7 @@ }, "commonConfusions": { "title": "Common Confusions", - "body": "MoE is not a model ensemble: ensembles combine separate full models at inference, while MoE keeps one shared stack and only sparsely activates internal experts. MoE is also not the same as a dense FFN—both sit after attention in the block, but dense FFN runs one MLP for every token whereas MoE selects a small subset. Finally, total expert parameters are not all used on every forward pass; active compute tracks top-k, not the full expert pool." + "body": "MoE is not a model ensemble: ensembles combine separate full models at inference, while MoE keeps one shared stack and only sparsely activates internal experts. MoE is also not the same as a dense FFN—both sit after attention in the block, but dense FFN runs one MLP for every token whereas MoE selects a small subset. SwiGLU is another FFN variant, but it keeps one shared gated dense block rather than routing tokens across experts. Finally, total expert parameters are not all used on every forward pass; active compute tracks top-k, not the full expert pool." }, "related": { "title": "Related Concepts And Modules" diff --git a/src/content/docs/glossary/relu/messages/en.json b/src/content/docs/glossary/relu/messages/en.json index 0ede63ea..66f9cb40 100644 --- a/src/content/docs/glossary/relu/messages/en.json +++ b/src/content/docs/glossary/relu/messages/en.json @@ -1,22 +1,22 @@ { "title": "ReLU", - "description": "A pointwise activation that keeps positive FFN values and clamps negative ones to zero.", + "description": "A simple activation that keeps positive values and turns negative values into zero inside many FFN hidden layers.", "sections": { "whatItIs": { "title": "What It Is", - "body": "ReLU stands for rectified linear unit. Inside a standard FFN, it looks at each hidden value after the expand layer and applies a simple rule: keep positive numbers, replace negative numbers with zero. The FFN stays in the same slot after attention, but this activation changes which hidden features stay active before the block projects back to model width." + "body": "ReLU stands for rectified linear unit. Inside a standard FFN, it leaves positive hidden values alone and replaces negative ones with zero before the next projection. That gives the FFN a simple nonlinearity, so the block can reshape features instead of behaving like one big linear map." }, "whyItMatters": { "title": "Why It Matters", - "body": "ReLU became popular because the rule is cheap to compute and easy to reason about. When a token's hidden feature is positive, the feature passes through unchanged; when it is negative, the feature drops out for that step. That sharp gate can help a dense FFN learn sparse feature patterns, which is why ReLU remains a useful baseline when papers compare newer activations." + "body": "ReLU is one of the easiest activation choices to understand, so it often acts as the baseline when people compare FFN variants. In a transformer block, the attention sublayer mixes information across tokens, then the FFN with ReLU transforms each token's hidden state on its own. If you know what ReLU changes, later pages on smoother or gated variants are easier to read." }, "simpleExample": { "title": "Simple Example", - "body": "Suppose an FFN hidden vector contains [-2.1, 0.7, 3.4]. ReLU turns it into [0, 0.7, 3.4]. The token stays in the same per-position feed-forward path; only the hidden values change before the next projection." + "body": "Imagine an FFN hidden vector with values like -2, -0.3, 0.1, and 4. ReLU turns that into 0, 0, 0.1, and 4 before the FFN projects the vector back to model width. In plain terms, the layer stops carrying the negative hidden responses forward but keeps the positive ones." }, "commonConfusions": { "title": "Common Confusions", - "body": "ReLU is only the activation step inside an FFN, not the whole feed-forward block. It also is not the same as LeakyReLU or SiLU, which keep some information for negative values instead of forcing every negative entry to zero. In transformer papers, seeing ReLU usually tells you how the hidden FFN state is shaped, not that the model changed its attention or residual path." + "body": "ReLU is an activation choice inside an FFN, not a separate transformer block. Swapping in ReLU does not move the feed-forward slot or turn the model into a mixture-of-experts layer. It is also not the same as LeakyReLU: plain ReLU cuts negative values all the way to zero, while LeakyReLU lets a small negative signal keep flowing." }, "related": { "title": "Related Concepts And Modules" @@ -28,5 +28,5 @@ "title": "References" } }, - "openingSummary": "ReLU is a simple FFN activation that zeroes negative hidden values, giving transformer MLPs a cheap nonlinearity that decides which features remain active." + "openingSummary": "ReLU is the simplest common FFN activation: after the hidden projection, it keeps positive values, zeros out negative ones, and then the block projects the result back down." } diff --git a/src/content/docs/glossary/relu/page.mdx b/src/content/docs/glossary/relu/page.mdx index 1357181b..9cb80335 100644 --- a/src/content/docs/glossary/relu/page.mdx +++ b/src/content/docs/glossary/relu/page.mdx @@ -1,6 +1,6 @@ --- title: ReLU -description: A pointwise activation that keeps positive FFN values and clamps negative ones to zero. +description: A simple activation that keeps positive values and turns negative values into zero inside many FFN hidden layers. kind: "glossary" registryId: "concept.relu" messageNamespace: "local" @@ -9,9 +9,9 @@ status: "published" tags: - foundations aliases: + - "ReLU" - "rectified linear unit" - - "ReLU activation" - - "rectifier" + - "relu activation" updatedAt: "2026-06-18" --- diff --git a/src/content/docs/glossary/silu/messages/en.json b/src/content/docs/glossary/silu/messages/en.json index d96fd6c3..d8f8cad1 100644 --- a/src/content/docs/glossary/silu/messages/en.json +++ b/src/content/docs/glossary/silu/messages/en.json @@ -1,22 +1,22 @@ { "title": "SiLU", - "description": "A smooth activation that multiplies each FFN hidden value by its sigmoid gate.", + "description": "A smooth FFN activation that scales each hidden value by a soft gate based on that same value.", "sections": { "whatItIs": { "title": "What It Is", - "body": "SiLU stands for sigmoid linear unit. For each hidden value x inside an FFN, it outputs x multiplied by sigmoid(x). Large positive values pass through strongly, values near zero are softened, and negative values are reduced smoothly instead of being cut off sharply. Many modern transformer blocks use SiLU because it fits well with wide FFN layers after attention." + "body": "SiLU stands for sigmoid linear unit. Inside an FFN, it multiplies each hidden value by a soft gate computed from that same value, so the response changes smoothly instead of snapping at zero the way ReLU does. The FFN still sits in the same transformer slot after attention and still transforms each token on its own." }, "whyItMatters": { "title": "Why It Matters", - "body": "Compared with ReLU, SiLU changes hidden states more gradually. That smoother shape often works well in large language model FFNs, where small changes in hidden values can matter across many layers. SiLU also matters because SwiGLU builds on the same nonlinearity: the gate branch in a SwiGLU block uses SiLU before it scales the value branch." + "body": "SiLU is a useful bridge between simple activations and more modern gated FFN designs. It keeps the dense FFN shape of a standard FFN, but it gives the hidden transform a smoother response that many recent architectures build on. If you understand SiLU, the jump to SwiGLU is much easier because SwiGLU reuses the same smooth gating idea inside a two-branch FFN." }, "simpleExample": { "title": "Simple Example", - "body": "If one hidden value is 3, sigmoid(3) is close to 0.95, so SiLU keeps most of that signal. If another value is -2, sigmoid(-2) is small, so the output stays negative but shrinks in magnitude. The token still follows the same standard FFN slot; SiLU only changes how the expanded hidden features are filtered." + "body": "Imagine an FFN hidden value is slightly negative. ReLU would cut it to zero, but SiLU usually leaves a small negative output because the gate fades it down instead of shutting it off completely. Large positive values still pass through strongly, so the FFN can keep strong positive evidence while treating weaker values more gently." }, "commonConfusions": { "title": "Common Confusions", - "body": "SiLU is an activation, not a full gated FFN by itself. SwiGLU uses SiLU as part of a larger two-branch feed-forward design, while plain SiLU can also appear inside an otherwise standard FFN. SiLU is also not the same as softmax or sigmoid output heads; it is an internal hidden-state transform." + "body": "SiLU is still just an activation choice inside a dense FFN, not a separate expert-routing layer and not a gated FFN by itself. A model using SiLU can still have a standard FFN block shape. SwiGLU goes further by adding a second branch that gates the main branch, while mixture-of-experts changes which FFN path a token uses." }, "related": { "title": "Related Concepts And Modules" @@ -28,5 +28,5 @@ "title": "References" } }, - "openingSummary": "SiLU smoothly gates each FFN hidden value with its own sigmoid score, giving transformer MLPs a softer activation than ReLU and setting up the gate used in SwiGLU blocks." + "openingSummary": "SiLU is a smooth FFN activation: it softly gates each hidden value using that value itself, so the dense block keeps the same slot as a standard FFN but changes how hidden responses flow through it." } diff --git a/src/content/docs/glossary/silu/page.mdx b/src/content/docs/glossary/silu/page.mdx index 49dc968a..9f50314b 100644 --- a/src/content/docs/glossary/silu/page.mdx +++ b/src/content/docs/glossary/silu/page.mdx @@ -1,6 +1,6 @@ --- title: SiLU -description: A smooth activation that multiplies each FFN hidden value by its sigmoid gate. +description: A smooth FFN activation that scales each hidden value by a soft gate based on that same value. kind: "glossary" registryId: "concept.silu" messageNamespace: "local" @@ -9,9 +9,9 @@ status: "published" tags: - foundations aliases: + - "SiLU" - "sigmoid linear unit" - - "Swish" - - "SiLU activation" + - "swish" updatedAt: "2026-06-18" --- diff --git a/src/content/docs/glossary/standard-ffn/messages/en.json b/src/content/docs/glossary/standard-ffn/messages/en.json index 26739cb7..dbffbf0d 100644 --- a/src/content/docs/glossary/standard-ffn/messages/en.json +++ b/src/content/docs/glossary/standard-ffn/messages/en.json @@ -1,22 +1,22 @@ { "title": "Standard FFN", - "description": "The default dense transformer MLP that expands, activates, and projects each token state after attention.", + "description": "The default dense MLP inside a transformer block, where every token passes through the same expand, activate, and project path after attention.", "sections": { "whatItIs": { "title": "What It Is", - "body": "A standard FFN is the default dense feed-forward block used in most transformer layers. After attention updates a token's hidden state, the standard FFN sends that vector through one wide linear layer, applies a pointwise activation, and projects it back to the model width. Every token position uses the same weights, and every token goes through the same dense path." + "body": "A standard FFN is the plain dense feed-forward block used in many transformer layers. After attention updates each token, the block sends that token's hidden vector through one larger projection, an activation such as ReLU or GELU, and one projection back to model width. Every token position runs the same weights, so the block changes features within a token rather than mixing information across tokens." }, "whyItMatters": { "title": "Why It Matters", - "body": "The broader feed-forward-network idea includes several designs that live in the same block slot. Standard FFN names the baseline most readers see in transformer diagrams and papers: expand, activate, project. Once you know that baseline, it becomes easier to notice when a paper swaps in a gated form such as SwiGLU or a sparse replacement such as mixture of experts." + "body": "This is the baseline block that later FFN variants modify. If a paper says it uses a dense MLP, a plain FFN, or the default transformer feed-forward block, it usually means this pattern. Knowing the baseline makes it easier to spot what changes when a model swaps in a routed mixture-of-experts layer or a gated design such as SwiGLU." }, "simpleExample": { "title": "Simple Example", - "body": "Suppose a transformer uses hidden size 4096 and FFN width 16384. A token vector leaves attention, multiplies by a 4096-to-16384 matrix, passes through an activation such as ReLU, GELU, or SiLU, then multiplies by a 16384-to-4096 matrix before returning to the residual stream. The path is dense because every hidden unit in the wide layer participates for every token." + "body": "Picture one decoder block. Attention lets the token for \"bank\" read nearby words and decide whether the sentence is about finance or a river. The standard FFN then refines only that token's updated vector: expand it to a wider hidden space, apply the activation, and project it back down before the residual stream continues to the next block." }, "commonConfusions": { "title": "Common Confusions", - "body": "Standard FFN is not the name for every feed-forward design. It is one common member of the larger FFN family. It also is not mixture of experts: MoE keeps the same transformer slot but routes each token through only a few experts instead of one dense MLP. The activation inside the block, such as ReLU, is only one step inside the standard FFN rather than the whole component." + "body": "Standard FFN is narrower than the broad feed-forward-network concept page: that broader page explains the transformer slot, while this page names the default dense version of that slot. It is also not a mixture-of-experts layer, because MoE routes tokens through a few expert MLPs instead of one shared dense path. Finally, changing the activation inside the block does not move the block elsewhere in the transformer; ReLU, SiLU, and gated variants still sit after attention in the same slot." }, "related": { "title": "Related Concepts And Modules" @@ -28,5 +28,5 @@ "title": "References" } }, - "openingSummary": "A standard FFN is the default dense MLP after attention in a transformer block, using an expand-activate-project path so each token gets the same per-position transformation." + "openingSummary": "A standard FFN is the default dense MLP that runs after attention in each transformer block, giving every token the same expand, activate, and project path before the model moves on." } diff --git a/src/content/docs/glossary/standard-ffn/page.mdx b/src/content/docs/glossary/standard-ffn/page.mdx index ef6e4e28..c34970c6 100644 --- a/src/content/docs/glossary/standard-ffn/page.mdx +++ b/src/content/docs/glossary/standard-ffn/page.mdx @@ -1,6 +1,6 @@ --- title: Standard FFN -description: The default dense transformer MLP that expands, activates, and projects each token state after attention. +description: The default dense MLP inside a transformer block, where every token passes through the same expand, activate, and project path after attention. kind: "glossary" registryId: "concept.standard-ffn" messageNamespace: "local" @@ -9,9 +9,9 @@ status: "published" tags: - foundations aliases: + - "standard FFN" - "dense FFN" - "dense MLP block" - - "standard feed-forward network" updatedAt: "2026-06-18" --- diff --git a/src/content/docs/glossary/swiglu/messages/en.json b/src/content/docs/glossary/swiglu/messages/en.json index ca266328..b58c2549 100644 --- a/src/content/docs/glossary/swiglu/messages/en.json +++ b/src/content/docs/glossary/swiglu/messages/en.json @@ -1,22 +1,22 @@ { "title": "SwiGLU", - "description": "A gated FFN design that splits the hidden expansion into value and gate branches, then uses SiLU to scale the result.", + "description": "A gated FFN variant that uses a SiLU-based gate to control a second hidden branch before projecting back to model width.", "sections": { "whatItIs": { "title": "What It Is", - "body": "SwiGLU is a feed-forward variant used in many modern language models. Instead of one expanded hidden branch, the FFN creates two branches after attention: a value branch and a gate branch. The gate branch passes through a SiLU activation, then multiplies the value branch element by element before the block projects back to model width." + "body": "SwiGLU is a gated FFN variant. Instead of one hidden branch followed by one activation, the block creates two hidden branches after the input projection: one branch carries candidate values, and the other branch uses a SiLU-style gate to decide how much of those values should pass through. After that gating step, the FFN projects the result back to model width in the same transformer slot after attention." }, "whyItMatters": { "title": "Why It Matters", - "body": "A standard FFN uses one activation on one expanded vector. SwiGLU changes the block shape by adding a separate learned gate, which lets the FFN decide more precisely which hidden features should stay strong for each token. That usually makes it a better comparison to standard FFN than to mixture of experts: SwiGLU is still one dense per-token block, while MoE changes the slot into sparse routed experts." + "body": "This page matters because SwiGLU changes the shape of the FFN block, not just the name of one activation. A standard FFN usually follows an expand, activate, and project pattern with one hidden path. SwiGLU keeps the same role inside the transformer block, but it replaces that single hidden path with a two-branch gated design. Many modern LLMs use this pattern because it gives the FFN a richer way to control hidden features." }, "simpleExample": { "title": "Simple Example", - "body": "Imagine an FFN expands a token into two vectors of the same width. One vector carries candidate features. The second goes through SiLU and becomes a gate between roughly zero and the original scale. If one gate entry is small, the matching feature in the value branch is suppressed before projection. If the gate entry is large, that feature passes through more strongly." + "body": "Picture one branch proposing hidden features like possible facts to keep, while the SiLU-gated branch acts like a soft valve that decides how open each feature should be. If the gate is near closed, that candidate feature is damped before the output projection. If the gate is more open, the feature survives more strongly. The important point is that the FFN is still a per-token transform, just with a gated internal path." }, "commonConfusions": { "title": "Common Confusions", - "body": "SwiGLU is not just another name for SiLU. SiLU is the activation on the gate branch, while SwiGLU is the full two-branch FFN pattern built around that activation. SwiGLU also is not mixture of experts: every token still uses the same dense block, so there is no top-k router choosing different experts." + "body": "SwiGLU is not the same thing as plain SiLU. SiLU is an activation that can live inside a standard FFN, while SwiGLU is a gated FFN structure that uses a SiLU-like gate across two branches. It is also not a mixture-of-experts layer: mixture-of-experts routes tokens across different FFN experts, but SwiGLU keeps one shared FFN block and only changes its internal shape." }, "related": { "title": "Related Concepts And Modules" @@ -28,5 +28,5 @@ "title": "References" } }, - "openingSummary": "SwiGLU splits the FFN expansion into value and gate branches, uses SiLU on the gate, and multiplies them together before projection so the block can choose which hidden features stay strong." + "openingSummary": "SwiGLU is a gated FFN: after attention, the block splits into two hidden branches, uses a SiLU-based gate to filter one branch with the other, and then projects the result back down in the same slot where a standard FFN would sit." } diff --git a/src/content/docs/glossary/swiglu/page.mdx b/src/content/docs/glossary/swiglu/page.mdx index 2ac894c5..eb200f22 100644 --- a/src/content/docs/glossary/swiglu/page.mdx +++ b/src/content/docs/glossary/swiglu/page.mdx @@ -1,6 +1,6 @@ --- title: SwiGLU -description: A gated FFN design that splits the hidden expansion into value and gate branches, then uses SiLU to scale the result. +description: A gated FFN variant that uses a SiLU-based gate to control a second hidden branch before projecting back to model width. kind: "glossary" registryId: "concept.swiglu" messageNamespace: "local" @@ -9,9 +9,10 @@ status: "published" tags: - foundations aliases: + - "SwiGLU" + - "Swi GLU" + - "Swish GLU" - "swish gated linear unit" - - "SiLU-gated FFN" - - "SwiGLU FFN" updatedAt: "2026-06-18" --- diff --git a/src/content/registry/concepts/activation.json b/src/content/registry/concepts/activation.json index 0aacb8e0..4a27d390 100644 --- a/src/content/registry/concepts/activation.json +++ b/src/content/registry/concepts/activation.json @@ -6,7 +6,16 @@ "defaultSummaryKey": "description", "aliases": ["activations", "hidden activation", "layer output"], "tags": ["token-to-probability-chain", "foundations"], - "relatedIds": ["concept.computational-graph", "concept.quantization"], + "relatedIds": [ + "concept.feed-forward-network", + "concept.standard-ffn", + "concept.relu", + "concept.leaky-relu", + "concept.silu", + "concept.swiglu", + "concept.computational-graph", + "concept.quantization" + ], "citationIds": [], "status": "published", "createdAt": "2026-06-04T00:00:00.000Z", diff --git a/src/content/registry/concepts/feed-forward-network.json b/src/content/registry/concepts/feed-forward-network.json index 3b78411c..86785636 100644 --- a/src/content/registry/concepts/feed-forward-network.json +++ b/src/content/registry/concepts/feed-forward-network.json @@ -8,14 +8,18 @@ "tags": ["foundations"], "relatedIds": [ "concept.transformer-architecture", + "concept.activation", "concept.standard-ffn", - "concept.mixture-of-experts", - "concept.activation" + "concept.relu", + "concept.leaky-relu", + "concept.silu", + "concept.swiglu", + "concept.mixture-of-experts" ], "citationIds": [], "status": "published", "createdAt": "2026-06-08T00:00:00.000Z", - "updatedAt": "2026-06-18T00:00:00.000Z", + "updatedAt": "2026-06-08T00:00:00.000Z", "conceptType": "general", "prerequisiteIds": [], "explainsIds": [] diff --git a/src/content/registry/concepts/leaky-relu.json b/src/content/registry/concepts/leaky-relu.json index e2f7521b..e1f992a6 100644 --- a/src/content/registry/concepts/leaky-relu.json +++ b/src/content/registry/concepts/leaky-relu.json @@ -4,18 +4,13 @@ "kind": "concept", "defaultTitleKey": "title", "defaultSummaryKey": "description", - "aliases": [ - "leaky ReLU", - "leaky rectified linear unit", - "Leaky ReLU activation" - ], + "aliases": ["LeakyReLU", "leaky ReLU", "leaky rectified linear unit"], "tags": ["foundations"], "relatedIds": [ "concept.activation", "concept.feed-forward-network", "concept.standard-ffn", - "concept.relu", - "concept.silu" + "concept.relu" ], "citationIds": [], "status": "published", diff --git a/src/content/registry/concepts/mixture-of-experts.json b/src/content/registry/concepts/mixture-of-experts.json index b71a435e..3db5cb7c 100644 --- a/src/content/registry/concepts/mixture-of-experts.json +++ b/src/content/registry/concepts/mixture-of-experts.json @@ -9,6 +9,8 @@ "relatedIds": [ "concept.feed-forward-network", "concept.standard-ffn", + "concept.swiglu", + "concept.activation", "concept.transformer-architecture" ], "citationIds": [], diff --git a/src/content/registry/concepts/relu.json b/src/content/registry/concepts/relu.json index d0721ae5..cda2bc9c 100644 --- a/src/content/registry/concepts/relu.json +++ b/src/content/registry/concepts/relu.json @@ -4,14 +4,13 @@ "kind": "concept", "defaultTitleKey": "title", "defaultSummaryKey": "description", - "aliases": ["rectified linear unit", "ReLU activation", "rectifier"], + "aliases": ["ReLU", "rectified linear unit", "relu activation"], "tags": ["foundations"], "relatedIds": [ "concept.activation", "concept.feed-forward-network", "concept.standard-ffn", - "concept.leaky-relu", - "concept.silu" + "concept.leaky-relu" ], "citationIds": [], "status": "published", diff --git a/src/content/registry/concepts/silu.json b/src/content/registry/concepts/silu.json index 5fa025d8..bd95e4c0 100644 --- a/src/content/registry/concepts/silu.json +++ b/src/content/registry/concepts/silu.json @@ -4,13 +4,12 @@ "kind": "concept", "defaultTitleKey": "title", "defaultSummaryKey": "description", - "aliases": ["sigmoid linear unit", "Swish", "SiLU activation"], + "aliases": ["SiLU", "sigmoid linear unit", "swish"], "tags": ["foundations"], "relatedIds": [ "concept.activation", "concept.feed-forward-network", "concept.standard-ffn", - "concept.relu", "concept.swiglu" ], "citationIds": [], diff --git a/src/content/registry/concepts/standard-ffn.json b/src/content/registry/concepts/standard-ffn.json index ecf49c0c..7890cd74 100644 --- a/src/content/registry/concepts/standard-ffn.json +++ b/src/content/registry/concepts/standard-ffn.json @@ -4,7 +4,7 @@ "kind": "concept", "defaultTitleKey": "title", "defaultSummaryKey": "description", - "aliases": ["dense FFN", "dense MLP block", "standard feed-forward network"], + "aliases": ["standard FFN", "dense FFN", "dense MLP block"], "tags": ["foundations"], "relatedIds": [ "concept.feed-forward-network", diff --git a/src/content/registry/concepts/swiglu.json b/src/content/registry/concepts/swiglu.json index 2c38bf25..d25f76d9 100644 --- a/src/content/registry/concepts/swiglu.json +++ b/src/content/registry/concepts/swiglu.json @@ -4,14 +4,14 @@ "kind": "concept", "defaultTitleKey": "title", "defaultSummaryKey": "description", - "aliases": ["swish gated linear unit", "SiLU-gated FFN", "SwiGLU FFN"], + "aliases": ["SwiGLU", "Swi GLU", "Swish GLU", "swish gated linear unit"], "tags": ["foundations"], "relatedIds": [ + "concept.activation", "concept.feed-forward-network", "concept.standard-ffn", - "concept.mixture-of-experts", "concept.silu", - "concept.activation" + "concept.mixture-of-experts" ], "citationIds": [], "status": "published", diff --git a/src/lib/content/activation-family-glossary.test.ts b/src/lib/content/activation-family-glossary.test.ts deleted file mode 100644 index ed553c81..00000000 --- a/src/lib/content/activation-family-glossary.test.ts +++ /dev/null @@ -1,235 +0,0 @@ -import { describe, expect, test } from "bun:test"; -import { readFileSync } from "node:fs"; -import { join } from "node:path"; -import { createElement } from "react"; -import { renderToStaticMarkup } from "react-dom/server"; -import { ModulePageProviders } from "@/features/docs/components/ModulePageProviders"; -import { - LEAKY_RELU_GLOSSARY_PAGE_DIR, - RELU_GLOSSARY_PAGE_DIR, - SILU_GLOSSARY_PAGE_DIR, - SWIGLU_GLOSSARY_PAGE_DIR, -} from "@/lib/content/content-paths"; -import { loadGlossaryPage } from "@/lib/content/glossary-page"; -import { - expectGlossaryPresentationConvergence, - expectHtmlToContainProse, -} from "@/lib/content/glossary-test-helpers"; -import { loadPublishedDocsPages } from "@/lib/content/pages"; -import { PUBLISHED_DOCS_REGISTRY_IDS } from "@/lib/content/published-docs-registry-ids"; -import { loadRegistry } from "@/lib/content/registry"; -import { - getConceptById, - listRelatedRegistryRecords, -} from "@/lib/content/registry-runtime"; -import { deriveCuratedRelatedItems } from "@/lib/content/related-docs"; -import { pageMessagesSchema } from "@/lib/content/schemas"; -import { buildSearchDocuments } from "@/lib/search/build-documents"; - -const PAGE_CASES = [ - { - slug: "relu", - registryId: "concept.relu", - title: "ReLU", - pageDir: RELU_GLOSSARY_PAGE_DIR, - aliases: ["rectified linear unit", "ReLU activation", "rectifier"], - relatedIds: [ - "concept.activation", - "concept.feed-forward-network", - "concept.standard-ffn", - "concept.leaky-relu", - "concept.silu", - ], - hrefs: [ - "/docs/glossary/activation", - "/docs/glossary/feed-forward-network", - "/docs/glossary/standard-ffn", - "/docs/glossary/leaky-relu", - "/docs/glossary/silu", - ], - messageNeedles: ["positive", "zero", "attention"], - renderNeedle: "keep positive numbers", - searchQuery: "ReLU", - }, - { - slug: "leaky-relu", - registryId: "concept.leaky-relu", - title: "LeakyReLU", - pageDir: LEAKY_RELU_GLOSSARY_PAGE_DIR, - aliases: [ - "leaky ReLU", - "leaky rectified linear unit", - "Leaky ReLU activation", - ], - relatedIds: [ - "concept.activation", - "concept.feed-forward-network", - "concept.standard-ffn", - "concept.relu", - "concept.silu", - ], - hrefs: [ - "/docs/glossary/activation", - "/docs/glossary/feed-forward-network", - "/docs/glossary/standard-ffn", - "/docs/glossary/relu", - "/docs/glossary/silu", - ], - messageNeedles: ["small constant", "negative", "standard ffn"], - renderNeedle: "small constant such as 0.01", - searchQuery: "LeakyReLU", - }, - { - slug: "silu", - registryId: "concept.silu", - title: "SiLU", - pageDir: SILU_GLOSSARY_PAGE_DIR, - aliases: ["sigmoid linear unit", "Swish", "SiLU activation"], - relatedIds: [ - "concept.activation", - "concept.feed-forward-network", - "concept.standard-ffn", - "concept.relu", - "concept.swiglu", - ], - hrefs: [ - "/docs/glossary/activation", - "/docs/glossary/feed-forward-network", - "/docs/glossary/standard-ffn", - "/docs/glossary/relu", - "/docs/glossary/swiglu", - ], - messageNeedles: ["sigmoid", "smooth", "swiglu"], - renderNeedle: "sigmoid linear unit", - searchQuery: "SiLU", - }, - { - slug: "swiglu", - registryId: "concept.swiglu", - title: "SwiGLU", - pageDir: SWIGLU_GLOSSARY_PAGE_DIR, - aliases: ["swish gated linear unit", "SiLU-gated FFN", "SwiGLU FFN"], - relatedIds: [ - "concept.feed-forward-network", - "concept.standard-ffn", - "concept.mixture-of-experts", - "concept.silu", - "concept.activation", - ], - hrefs: [ - "/docs/glossary/feed-forward-network", - "/docs/glossary/standard-ffn", - "/docs/glossary/mixture-of-experts", - "/docs/glossary/silu", - "/docs/glossary/activation", - ], - messageNeedles: ["gate", "silu", "mixture of experts"], - renderNeedle: "two branches after attention", - searchQuery: "SwiGLU", - }, -] as const; - -describe("Phase 3 activation-family glossary pages (US-002)", () => { - for (const testCase of PAGE_CASES) { - test(`${testCase.title} registry record is published with aliases, tags, and curated related ids`, () => { - const record = getConceptById(testCase.registryId); - expect(record?.status).toBe("published"); - expect(record?.aliases).toEqual([...testCase.aliases]); - expect(record?.tags).toEqual(["foundations"]); - expect(record?.relatedIds).toEqual([...testCase.relatedIds]); - expect(PUBLISHED_DOCS_REGISTRY_IDS.has(testCase.registryId)).toBe(true); - }); - - test(`${testCase.title} curated related links resolve to published FFN-family pages`, () => { - const source = getConceptById(testCase.registryId); - if (!source) { - throw new Error(`expected ${testCase.registryId} in registry`); - } - - const items = deriveCuratedRelatedItems( - source, - listRelatedRegistryRecords(), - PUBLISHED_DOCS_REGISTRY_IDS, - ); - - for (const href of testCase.hrefs) { - expect( - items.some((item) => item.href === href && !item.isPlanned), - ).toBe(true); - } - }); - - test(`${testCase.title} messages explain the intended FFN behavior in plain language`, () => { - const messages = pageMessagesSchema.parse( - JSON.parse( - readFileSync(join(testCase.pageDir, "messages/en.json"), "utf8"), - ), - ); - - expect(messages.title).toBe(testCase.title); - expect(messages.openingSummary?.length).toBeGreaterThan(0); - const combinedBody = [ - messages.sections?.whatItIs.body, - messages.sections?.whyItMatters.body, - messages.sections?.commonConfusions.body, - ] - .join(" ") - .toLowerCase(); - - for (const needle of testCase.messageNeedles) { - expect(combinedBody).toContain(needle); - } - }); - - test(`${testCase.title} page renders glossary sections, tags, and FFN-family links`, async () => { - const page = await loadGlossaryPage(testCase.slug); - - expect(page.frontmatter.kind).toBe("glossary"); - expect(page.frontmatter.status).toBe("published"); - expect(page.frontmatter.registryId).toBe(testCase.registryId); - - const html = renderToStaticMarkup( - createElement(ModulePageProviders, { - messages: page.messages, - assets: page.assets, - // biome-ignore lint/correctness/noChildrenProp: third createElement arg conflicts with strict props typing - children: page.content, - }), - ); - - expectGlossaryPresentationConvergence(html, { - title: testCase.title, - }); - expect(html).toContain("What It Is"); - expect(html).toContain("Common Confusions"); - expectHtmlToContainProse(html, testCase.renderNeedle); - for (const href of testCase.hrefs) { - expect(html).toContain(`href="${href}"`); - } - expect(html).toContain('href="/tags/foundations"'); - expect(html).toContain('data-testid="tag-pill-list"'); - expect(html).toContain('data-testid="curated-related-docs"'); - expect(html).not.toContain("Phase"); - expect(html).not.toContain("Reader Shortcut"); - }); - - test(`${testCase.title} search index records the glossary page and preserves aliases`, async () => { - const registry = await loadRegistry(); - const pages = await loadPublishedDocsPages("en"); - const documents = buildSearchDocuments(pages, registry); - - const document = documents.find( - (entry) => entry.url === `/docs/glossary/${testCase.slug}`, - ); - expect(document?.title).toBe(testCase.title); - expect(document?.kind).toBe("glossary"); - expect(document?.facets.kind).toBe("glossary"); - expect(document?.aliases).toEqual( - expect.arrayContaining(testCase.aliases), - ); - expect(document?.bodyText.length ?? 0).toBeGreaterThan(50); - expect(document?.headings.length ?? 0).toBeGreaterThan(0); - expect(testCase.searchQuery.length).toBeGreaterThan(0); - }); - } -}); diff --git a/src/lib/content/content-paths.ts b/src/lib/content/content-paths.ts index 407e590e..134b4147 100644 --- a/src/lib/content/content-paths.ts +++ b/src/lib/content/content-paths.ts @@ -164,6 +164,21 @@ export const STANDARD_FFN_GLOSSARY_PAGE_DIR = join( "standard-ffn", ); +/** Phase 3 ReLU glossary page directory. */ +export const RELU_GLOSSARY_PAGE_DIR = join(GLOSSARY_DOCS_ROOT, "relu"); + +/** Phase 3 LeakyReLU glossary page directory. */ +export const LEAKY_RELU_GLOSSARY_PAGE_DIR = join( + GLOSSARY_DOCS_ROOT, + "leaky-relu", +); + +/** Phase 3 SiLU glossary page directory. */ +export const SILU_GLOSSARY_PAGE_DIR = join(GLOSSARY_DOCS_ROOT, "silu"); + +/** Phase 3 SwiGLU glossary page directory. */ +export const SWIGLU_GLOSSARY_PAGE_DIR = join(GLOSSARY_DOCS_ROOT, "swiglu"); + /** Phase 3 mixture of experts glossary page directory. */ export const MIXTURE_OF_EXPERTS_GLOSSARY_PAGE_DIR = join( GLOSSARY_DOCS_ROOT, @@ -194,21 +209,6 @@ export const LAYER_NORM_GLOSSARY_PAGE_DIR = join( "layer-norm", ); -/** Phase 3 ReLU glossary page directory. */ -export const RELU_GLOSSARY_PAGE_DIR = join(GLOSSARY_DOCS_ROOT, "relu"); - -/** Phase 3 LeakyReLU glossary page directory. */ -export const LEAKY_RELU_GLOSSARY_PAGE_DIR = join( - GLOSSARY_DOCS_ROOT, - "leaky-relu", -); - -/** Phase 3 SiLU glossary page directory. */ -export const SILU_GLOSSARY_PAGE_DIR = join(GLOSSARY_DOCS_ROOT, "silu"); - -/** Phase 3 SwiGLU glossary page directory. */ -export const SWIGLU_GLOSSARY_PAGE_DIR = join(GLOSSARY_DOCS_ROOT, "swiglu"); - /** Phase 3 RMSNorm glossary page directory. */ export const RMSNORM_GLOSSARY_PAGE_DIR = join(GLOSSARY_DOCS_ROOT, "rmsnorm"); diff --git a/src/lib/content/content-reconciliation-browse-index.test.ts b/src/lib/content/content-reconciliation-browse-index.test.ts index 75a56aea..17ea5239 100644 --- a/src/lib/content/content-reconciliation-browse-index.test.ts +++ b/src/lib/content/content-reconciliation-browse-index.test.ts @@ -2,23 +2,108 @@ import { describe, expect, test } from "bun:test"; import { renderToStaticMarkup } from "react-dom/server"; import ArchitectureIndexPage from "@/app/(site)/docs/architecture/page"; import GlossaryIndexPage from "@/app/(site)/docs/glossary/page"; +import { loadPublishedArchitectureEntries } from "@/lib/content/architecture"; +import { loadPublishedGlossaryEntries } from "@/lib/content/glossary"; + +/** Batch 017 glossary pages reconciled in Phase 2/3 (see prd.md). */ +const BATCH_017_GLOSSARY_URLS = [ + "/docs/glossary/transformer", + "/docs/glossary/diffusion-model", + "/docs/glossary/multimodal-model", + "/docs/glossary/world-model", + "/docs/glossary/feed-forward-network", + "/docs/glossary/mixture-of-experts", + "/docs/glossary/normalization", + "/docs/glossary/layer-norm", + "/docs/glossary/rmsnorm", + "/docs/glossary/residual-connection", + "/docs/glossary/rope", + "/docs/glossary/alibi", + "/docs/glossary/context-window", + "/docs/glossary/standard-ffn", + "/docs/glossary/relu", + "/docs/glossary/leaky-relu", + "/docs/glossary/silu", + "/docs/glossary/swiglu", +] as const; + +/** Batch 017 architecture-related concept pages reconciled in Phase 2/3. */ +const BATCH_017_ARCHITECTURE_URLS = [ + "/docs/concepts/transformer-architecture", + "/docs/concepts/positional-encodings", + "/docs/concepts/context-extension", + "/docs/concepts/why-long-context-is-hard", + "/docs/glossary/transformer", + "/docs/glossary/diffusion-model", + "/docs/glossary/multimodal-model", + "/docs/glossary/world-model", + "/docs/glossary/feed-forward-network", + "/docs/glossary/mixture-of-experts", + "/docs/glossary/normalization", + "/docs/glossary/layer-norm", + "/docs/glossary/rmsnorm", + "/docs/glossary/residual-connection", + "/docs/glossary/rope", + "/docs/glossary/alibi", + "/docs/glossary/context-window", + "/docs/glossary/standard-ffn", + "/docs/glossary/relu", + "/docs/glossary/leaky-relu", + "/docs/glossary/silu", + "/docs/glossary/swiglu", +] as const; describe("Phase 2/3 reconciliation browse indexes (US-004)", () => { - test("glossary and architecture index pages render FFN, norm, and residual links without list bullets", async () => { + test("glossary index lists every batch 017 glossary page sorted by localized title", async () => { + const entries = await loadPublishedGlossaryEntries("en"); + const entryByUrl = new Map(entries.map((entry) => [entry.url, entry])); + + for (const url of BATCH_017_GLOSSARY_URLS) { + const entry = entryByUrl.get(url); + expect(entry).toBeDefined(); + expect(entry?.title.length).toBeGreaterThan(0); + expect(entry?.summary.length).toBeGreaterThan(0); + } + + for (let index = 1; index < entries.length; index += 1) { + expect( + entries[index - 1].title.localeCompare(entries[index].title, "en", { + sensitivity: "base", + }), + ).toBeLessThanOrEqual(0); + } + }); + + test("architecture index lists batch 017 foundations and concept pages sorted by title", async () => { + const entries = await loadPublishedArchitectureEntries("en"); + const entryByUrl = new Map(entries.map((entry) => [entry.url, entry])); + + for (const url of BATCH_017_ARCHITECTURE_URLS) { + const entry = entryByUrl.get(url); + expect(entry).toBeDefined(); + expect(entry?.title.length).toBeGreaterThan(0); + expect(entry?.summary.length).toBeGreaterThan(0); + } + + for (let index = 1; index < entries.length; index += 1) { + expect( + entries[index - 1].title.localeCompare(entries[index].title, "en", { + sensitivity: "base", + }), + ).toBeLessThanOrEqual(0); + } + }); + + test("glossary and architecture index pages render bulletless title-plus-summary lists", async () => { const glossaryHtml = renderToStaticMarkup(await GlossaryIndexPage()); const architectureHtml = renderToStaticMarkup( await ArchitectureIndexPage(), ); for (const url of [ - "/docs/glossary/feed-forward-network", - "/docs/glossary/standard-ffn", - "/docs/glossary/mixture-of-experts", - "/docs/glossary/normalization", - "/docs/glossary/layer-norm", - "/docs/glossary/qk-norm", - "/docs/glossary/residual-connection", - "/docs/glossary/skip-connection", + "/docs/glossary/transformer", + "/docs/glossary/rope", + "/docs/glossary/context-window", ]) { expect(glossaryHtml).toContain(`href="${url}"`); } @@ -27,8 +112,7 @@ describe("Phase 2/3 reconciliation browse indexes (US-004)", () => { "/docs/concepts/transformer-architecture", "/docs/concepts/positional-encodings", "/docs/glossary/feed-forward-network", - "/docs/glossary/normalization", - "/docs/glossary/residual-connection", + "/docs/glossary/world-model", ]) { expect(architectureHtml).toContain(`href="${url}"`); } diff --git a/src/lib/content/content-reconciliation-foundational-tags.test.ts b/src/lib/content/content-reconciliation-foundational-tags.test.ts index cdcfb2dd..cfd20577 100644 --- a/src/lib/content/content-reconciliation-foundational-tags.test.ts +++ b/src/lib/content/content-reconciliation-foundational-tags.test.ts @@ -2,6 +2,192 @@ import { describe, expect, test } from "bun:test"; import { renderToStaticMarkup } from "react-dom/server"; import TagLandingPage from "@/app/(site)/tags/[slug]/page"; import TagsIndexPage from "@/app/(site)/tags/page"; +import { loadPublishedDocsPages } from "@/lib/content/pages"; +import { loadRegistry } from "@/lib/content/registry"; +import { + loadTagLandingContext, + loadTagResourceEntries, + loadTagResourceGroups, +} from "@/lib/content/tag-resources"; +import { loadPublishedTagIndexEntries } from "@/lib/content/tags"; +import { loadUiMessages } from "@/lib/content/ui-messages"; + +const FOUNDATIONAL_TAG_SLUGS = [ + "foundations", + "taxonomy", + "model-family", + "token-to-probability-chain", +] as const; + +/** Batch 017 pages expected on each foundational tag landing (see prd.md). */ +const BATCH_017_TAG_URLS = { + foundations: [ + "/docs/concepts/context-extension", + "/docs/concepts/positional-encodings", + "/docs/concepts/transformer-architecture", + "/docs/concepts/why-long-context-is-hard", + "/docs/glossary/alibi", + "/docs/glossary/context-window", + "/docs/glossary/feed-forward-network", + "/docs/glossary/layer-norm", + "/docs/glossary/leaky-relu", + "/docs/glossary/mixture-of-experts", + "/docs/glossary/normalization", + "/docs/glossary/relu", + "/docs/glossary/residual-connection", + "/docs/glossary/rmsnorm", + "/docs/glossary/silu", + "/docs/glossary/rope", + "/docs/glossary/standard-ffn", + "/docs/glossary/swiglu", + ], + taxonomy: [ + "/docs/concepts/transformer-architecture", + "/docs/glossary/diffusion-model", + "/docs/glossary/multimodal-model", + "/docs/glossary/transformer", + "/docs/glossary/world-model", + ], + "model-family": [ + "/docs/glossary/diffusion-model", + "/docs/glossary/multimodal-model", + "/docs/glossary/transformer", + "/docs/glossary/world-model", + ], + "token-to-probability-chain": [], +} as const; + +function pageMatchesTag( + page: Awaited>[number], + tagSlug: string, + indexes: Awaited>, +): boolean { + if (page.frontmatter.tags.includes(tagSlug)) { + return true; + } + const record = indexes.byId.get(page.frontmatter.registryId); + return record?.tags.includes(tagSlug) ?? false; +} + +describe("Phase 2/3 reconciliation foundational tags (US-006)", () => { + test("foundational tag records expose localized title and summary", async () => { + const messages = await loadUiMessages(); + + const foundations = await loadTagLandingContext( + "foundations", + messages, + "en", + ); + expect(foundations?.title).toBe("Foundations"); + expect(foundations?.summary.length).toBeGreaterThan(0); + expect(foundations?.categoryLabel).toBe("Architecture"); + + const taxonomy = await loadTagLandingContext("taxonomy", messages, "en"); + expect(taxonomy?.title).toBe("Taxonomy"); + expect(taxonomy?.summary.length).toBeGreaterThan(0); + + const modelFamily = await loadTagLandingContext( + "model-family", + messages, + "en", + ); + expect(modelFamily?.title).toBe("Model family"); + expect(modelFamily?.summary).toContain( + "Published model-family glossary pages", + ); + expect(modelFamily?.categoryLabel).toBe("Model family"); + + const tokenChain = await loadTagLandingContext( + "token-to-probability-chain", + messages, + "en", + ); + expect(tokenChain?.title).toBe("Token-to-probability chain"); + expect(tokenChain?.summary.length).toBeGreaterThan(0); + }); + + test("foundational tag landings include every batch 017 associated page", async () => { + const messages = await loadUiMessages(); + + for (const tagSlug of FOUNDATIONAL_TAG_SLUGS) { + const entries = await loadTagResourceEntries(tagSlug, "en"); + const urls = new Set(entries.map((entry) => entry.url)); + + for (const url of BATCH_017_TAG_URLS[tagSlug]) { + expect(urls).toContain(url); + } + } + + const modelFamilyGroups = await loadTagResourceGroups( + "model-family", + messages, + "en", + ); + expect(modelFamilyGroups).toHaveLength(1); + expect(modelFamilyGroups[0]?.kind).toBe("glossary"); + expect(modelFamilyGroups[0]?.kindLabel).toBe("Glossary"); + expect( + modelFamilyGroups[0]?.resources.map((resource) => resource.url), + ).toEqual([ + "/docs/glossary/diffusion-model", + "/docs/glossary/multimodal-model", + "/docs/glossary/transformer", + "/docs/glossary/world-model", + ]); + }); + + test("foundational tag landings omit empty kind groups and sort resources by title", async () => { + const messages = await loadUiMessages(); + + for (const tagSlug of FOUNDATIONAL_TAG_SLUGS) { + const groups = await loadTagResourceGroups(tagSlug, messages, "en"); + expect(groups.every((group) => group.resources.length > 0)).toBe(true); + + for (const group of groups) { + const titles = group.resources.map((resource) => resource.title); + const sorted = [...titles].sort((a, b) => + a.localeCompare(b, "en", { sensitivity: "base" }), + ); + expect(titles).toEqual(sorted); + } + } + }); + + test("published pages with foundational tags resolve through registry or frontmatter", async () => { + const pages = await loadPublishedDocsPages("en"); + const indexes = await loadRegistry(); + + for (const tagSlug of FOUNDATIONAL_TAG_SLUGS) { + const taggedPages = pages.filter((page) => + pageMatchesTag(page, tagSlug, indexes), + ); + const entryUrls = new Set( + (await loadTagResourceEntries(tagSlug, "en")).map((entry) => entry.url), + ); + + for (const page of taggedPages) { + expect(entryUrls).toContain(page.url); + } + } + }); + + test("tags index links foundational tags with accurate descriptions", async () => { + const messages = await loadUiMessages(); + const entries = await loadPublishedTagIndexEntries(messages, "en"); + + for (const slug of FOUNDATIONAL_TAG_SLUGS) { + const entry = entries.find((candidate) => candidate.slug === slug); + expect(entry).toBeDefined(); + expect(entry?.url).toBe(`/tags/${slug}`); + expect(entry?.summary.length).toBeGreaterThan(0); + } + + const modelFamily = entries.find((entry) => entry.slug === "model-family"); + expect(modelFamily?.summary).toContain( + "Published model-family glossary pages", + ); + }); +}); describe("Phase 2/3 reconciliation foundational tag page render (US-006)", () => { test("foundations landing lists batch 017 resources grouped by kind", async () => { @@ -15,10 +201,6 @@ describe("Phase 2/3 reconciliation foundational tag page render (US-006)", () => expect(html).toContain("Concept"); expect(html).toContain('href="/docs/glossary/rope"'); expect(html).toContain('href="/docs/glossary/context-window"'); - expect(html).toContain('href="/docs/glossary/feed-forward-network"'); - expect(html).toContain('href="/docs/glossary/standard-ffn"'); - expect(html).toContain('href="/docs/glossary/normalization"'); - expect(html).toContain('href="/docs/glossary/skip-connection"'); expect(html).toContain('href="/docs/concepts/transformer-architecture"'); expect(html).toContain('href="/search?tag=foundations"'); expect(html).toContain("list-none"); diff --git a/src/lib/content/content-reconciliation-search-documents.test.ts b/src/lib/content/content-reconciliation-search-documents.test.ts new file mode 100644 index 00000000..99752b6e --- /dev/null +++ b/src/lib/content/content-reconciliation-search-documents.test.ts @@ -0,0 +1,178 @@ +import { describe, expect, test } from "bun:test"; +import { loadPublishedDocsPages } from "@/lib/content/pages"; +import { loadRegistry } from "@/lib/content/registry"; +import { buildSearchDocuments } from "@/lib/search/build-documents"; + +/** Batch 017 pages reconciled in Phase 2/3 (see prd.md). */ +const BATCH_017_DOCS_URLS = [ + "/docs/glossary/transformer", + "/docs/glossary/diffusion-model", + "/docs/glossary/multimodal-model", + "/docs/glossary/world-model", + "/docs/modules/attention", + "/docs/modules/multi-head-attention", + "/docs/modules/multi-query-attention", + "/docs/modules/multi-head-latent-attention", + "/docs/modules/sparse-attention", + "/docs/modules/sliding-window-attention", + "/docs/modules/linear-attention", + "/docs/concepts/transformer-architecture", + "/docs/glossary/feed-forward-network", + "/docs/glossary/mixture-of-experts", + "/docs/glossary/normalization", + "/docs/glossary/layer-norm", + "/docs/glossary/rmsnorm", + "/docs/glossary/residual-connection", + "/docs/concepts/positional-encodings", + "/docs/glossary/rope", + "/docs/glossary/alibi", + "/docs/glossary/context-window", + "/docs/concepts/context-extension", + "/docs/concepts/why-long-context-is-hard", + "/docs/glossary/silu", + "/docs/glossary/swiglu", +] as const; + +const BATCH_017_GLOSSARY_URLS = BATCH_017_DOCS_URLS.filter((url) => + url.startsWith("/docs/glossary/"), +); +const BATCH_017_CONCEPT_URLS = BATCH_017_DOCS_URLS.filter((url) => + url.startsWith("/docs/concepts/"), +); +const BATCH_017_MODULE_URLS = BATCH_017_DOCS_URLS.filter((url) => + url.startsWith("/docs/modules/"), +); + +const EXPECTED_ATTENTION_MODULE_URLS = [ + "/docs/modules/attention", + "/docs/modules/multi-head-attention", + "/docs/modules/multi-query-attention", + "/docs/modules/multi-head-latent-attention", + "/docs/modules/sparse-attention", + "/docs/modules/sliding-window-attention", + "/docs/modules/linear-attention", +] as const; + +const ALIAS_EXPECTATIONS = [ + { url: "/docs/modules/multi-head-attention", alias: "MHA" }, + { url: "/docs/modules/multi-query-attention", alias: "MQA" }, + { url: "/docs/modules/sparse-attention", alias: "sparse attention" }, + { url: "/docs/glossary/rope", alias: "RoPE" }, + { url: "/docs/glossary/context-window", alias: "context length" }, + { url: "/docs/glossary/silu", alias: "swish" }, + { url: "/docs/glossary/swiglu", alias: "Swi GLU" }, +] as const; + +describe("Phase 2/3 reconciliation search documents (US-009)", () => { + test("buildSearchDocuments emits one document per batch 017 page with registry-backed fields", async () => { + const registry = await loadRegistry(); + const pages = await loadPublishedDocsPages("en"); + const documents = buildSearchDocuments(pages, registry); + const byUrl = new Map( + documents.map((document) => [document.url, document]), + ); + + for (const url of BATCH_017_DOCS_URLS) { + const document = byUrl.get(url); + const page = pages.find((entry) => entry.url === url); + + expect(document).toBeDefined(); + expect(page).toBeDefined(); + expect(document?.registryId).toBe(page?.frontmatter.registryId); + expect(document?.title).toBe(page?.messages.title); + expect(document?.description).toBe(page?.messages.description); + expect(document?.description.length).toBeGreaterThan(0); + expect(document?.aliases.length).toBeGreaterThan(0); + expect(document?.tags.length).toBeGreaterThan(0); + expect(document?.bodyText.length).toBeGreaterThan(50); + expect(document?.headings.length).toBeGreaterThan(0); + } + }); + + test("batch 017 glossary, concept, and module pages index with matching kind facets", async () => { + const registry = await loadRegistry(); + const pages = await loadPublishedDocsPages("en"); + const documents = buildSearchDocuments(pages, registry); + const byUrl = new Map( + documents.map((document) => [document.url, document]), + ); + + for (const url of BATCH_017_GLOSSARY_URLS) { + const document = byUrl.get(url); + expect(document?.kind).toBe("glossary"); + expect(document?.facets.kind).toBe("glossary"); + } + + for (const url of BATCH_017_CONCEPT_URLS) { + const document = byUrl.get(url); + expect(document?.kind).toBe("concept"); + expect(document?.facets.kind).toBe("concept"); + } + + for (const url of BATCH_017_MODULE_URLS) { + const document = byUrl.get(url); + expect(document?.kind).toBe("module"); + expect(document?.facets.kind).toBe("module"); + } + }); + + test("batch 017 attention modules include moduleType attention and variantGroup when set", async () => { + const registry = await loadRegistry(); + const pages = await loadPublishedDocsPages("en"); + const documents = buildSearchDocuments(pages, registry); + const byUrl = new Map( + documents.map((document) => [document.url, document]), + ); + + for (const url of EXPECTED_ATTENTION_MODULE_URLS) { + const document = byUrl.get(url); + expect(document?.facets.moduleType).toBe("attention"); + expect(document?.tags).toEqual(expect.arrayContaining(["attention"])); + } + + expect( + byUrl.get("/docs/modules/multi-head-attention")?.facets.variantGroup, + ).toBe("attention-head-sharing"); + expect( + byUrl.get("/docs/modules/multi-query-attention")?.facets.variantGroup, + ).toBe("attention-head-sharing"); + expect( + byUrl.get("/docs/modules/multi-head-latent-attention")?.facets + .variantGroup, + ).toBe("attention-head-sharing"); + expect( + byUrl.get("/docs/modules/sparse-attention")?.facets.variantGroup, + ).toBe("sparse-patterns"); + expect( + byUrl.get("/docs/modules/sliding-window-attention")?.facets.variantGroup, + ).toBe("attention-locality"); + expect( + byUrl.get("/docs/modules/linear-attention")?.facets.variantGroup, + ).toBe("subquadratic-attention"); + }); + + test("model-family and transformer-component pages include representative registry aliases", async () => { + const registry = await loadRegistry(); + const pages = await loadPublishedDocsPages("en"); + const documents = buildSearchDocuments(pages, registry); + const byUrl = new Map( + documents.map((document) => [document.url, document]), + ); + + for (const { url, alias } of ALIAS_EXPECTATIONS) { + const document = byUrl.get(url); + expect(document?.aliases).toEqual(expect.arrayContaining([alias])); + } + + for (const url of [ + "/docs/glossary/transformer", + "/docs/glossary/diffusion-model", + "/docs/glossary/multimodal-model", + "/docs/glossary/world-model", + ] as const) { + const document = byUrl.get(url); + expect(document?.kind).toBe("glossary"); + expect(document?.tags).toEqual(expect.arrayContaining(["model-family"])); + } + }); +}); diff --git a/src/lib/content/content-reconciliation-search-queries.test.tsx b/src/lib/content/content-reconciliation-search-queries.test.tsx index ade9758a..487b5fc6 100644 --- a/src/lib/content/content-reconciliation-search-queries.test.tsx +++ b/src/lib/content/content-reconciliation-search-queries.test.tsx @@ -17,21 +17,8 @@ const MULTI_QUERY_ATTENTION_URL = "/docs/modules/multi-query-attention"; const SPARSE_ATTENTION_URL = "/docs/modules/sparse-attention"; const ROPE_GLOSSARY_URL = "/docs/glossary/rope"; const CONTEXT_WINDOW_GLOSSARY_URL = "/docs/glossary/context-window"; -const FEED_FORWARD_NETWORK_GLOSSARY_URL = "/docs/glossary/feed-forward-network"; -const STANDARD_FFN_GLOSSARY_URL = "/docs/glossary/standard-ffn"; -const MIXTURE_OF_EXPERTS_GLOSSARY_URL = "/docs/glossary/mixture-of-experts"; -const RELU_GLOSSARY_URL = "/docs/glossary/relu"; -const LEAKY_RELU_GLOSSARY_URL = "/docs/glossary/leaky-relu"; const SILU_GLOSSARY_URL = "/docs/glossary/silu"; const SWIGLU_GLOSSARY_URL = "/docs/glossary/swiglu"; -const NORMALIZATION_GLOSSARY_URL = "/docs/glossary/normalization"; -const LAYER_NORM_GLOSSARY_URL = "/docs/glossary/layer-norm"; -const BATCH_NORM_GLOSSARY_URL = "/docs/glossary/batch-norm"; -const GROUP_NORM_GLOSSARY_URL = "/docs/glossary/group-norm"; -const RMSNORM_GLOSSARY_URL = "/docs/glossary/rmsnorm"; -const QK_NORM_GLOSSARY_URL = "/docs/glossary/qk-norm"; -const RESIDUAL_CONNECTION_GLOSSARY_URL = "/docs/glossary/residual-connection"; -const SKIP_CONNECTION_GLOSSARY_URL = "/docs/glossary/skip-connection"; const ATTENTION_MODULE_QUERIES = [ { query: "MHA", url: MULTI_HEAD_ATTENTION_URL }, @@ -46,69 +33,8 @@ const GLOSSARY_CANONICAL_QUERIES = [ url: CONTEXT_WINDOW_GLOSSARY_URL, kind: "glossary" as const, }, - { - query: "feed-forward network", - url: FEED_FORWARD_NETWORK_GLOSSARY_URL, - kind: "glossary" as const, - }, - { - query: "standard FFN", - url: STANDARD_FFN_GLOSSARY_URL, - kind: "glossary" as const, - }, - { - query: "mixture of experts", - url: MIXTURE_OF_EXPERTS_GLOSSARY_URL, - kind: "glossary" as const, - }, - { query: "ReLU", url: RELU_GLOSSARY_URL, kind: "glossary" as const }, - { - query: "LeakyReLU", - url: LEAKY_RELU_GLOSSARY_URL, - kind: "glossary" as const, - }, { query: "SiLU", url: SILU_GLOSSARY_URL, kind: "glossary" as const }, { query: "SwiGLU", url: SWIGLU_GLOSSARY_URL, kind: "glossary" as const }, - { - query: "normalization", - url: NORMALIZATION_GLOSSARY_URL, - kind: "glossary" as const, - }, - { - query: "layer norm", - url: LAYER_NORM_GLOSSARY_URL, - kind: "glossary" as const, - }, - { - query: "batch norm", - url: BATCH_NORM_GLOSSARY_URL, - kind: "glossary" as const, - }, - { - query: "group norm", - url: GROUP_NORM_GLOSSARY_URL, - kind: "glossary" as const, - }, - { - query: "RMSNorm", - url: RMSNORM_GLOSSARY_URL, - kind: "glossary" as const, - }, - { - query: "QK norm", - url: QK_NORM_GLOSSARY_URL, - kind: "glossary" as const, - }, - { - query: "residual connection", - url: RESIDUAL_CONNECTION_GLOSSARY_URL, - kind: "glossary" as const, - }, - { - query: "skip connection", - url: SKIP_CONNECTION_GLOSSARY_URL, - kind: "glossary" as const, - }, ] as const; function resultsIncludeUrl( @@ -183,21 +109,8 @@ describe("Phase 2/3 reconciliation search UI kind labels (US-010)", () => { [SPARSE_ATTENTION_URL, "module", "Module"], [ROPE_GLOSSARY_URL, "glossary", "Glossary"], [CONTEXT_WINDOW_GLOSSARY_URL, "glossary", "Glossary"], - [FEED_FORWARD_NETWORK_GLOSSARY_URL, "glossary", "Glossary"], - [STANDARD_FFN_GLOSSARY_URL, "glossary", "Glossary"], - [MIXTURE_OF_EXPERTS_GLOSSARY_URL, "glossary", "Glossary"], - [RELU_GLOSSARY_URL, "glossary", "Glossary"], - [LEAKY_RELU_GLOSSARY_URL, "glossary", "Glossary"], [SILU_GLOSSARY_URL, "glossary", "Glossary"], [SWIGLU_GLOSSARY_URL, "glossary", "Glossary"], - [NORMALIZATION_GLOSSARY_URL, "glossary", "Glossary"], - [LAYER_NORM_GLOSSARY_URL, "glossary", "Glossary"], - [BATCH_NORM_GLOSSARY_URL, "glossary", "Glossary"], - [GROUP_NORM_GLOSSARY_URL, "glossary", "Glossary"], - [RMSNORM_GLOSSARY_URL, "glossary", "Glossary"], - [QK_NORM_GLOSSARY_URL, "glossary", "Glossary"], - [RESIDUAL_CONNECTION_GLOSSARY_URL, "glossary", "Glossary"], - [SKIP_CONNECTION_GLOSSARY_URL, "glossary", "Glossary"], ] as const)("SearchResultMetaDetails shows localized %s kind for %s", async (url, kind, label) => { const messages = await loadUiMessages(); const metaByUrl = searchResultMetaMapToRecord( diff --git a/src/lib/content/content-reconciliation-single-title.test.ts b/src/lib/content/content-reconciliation-single-title.test.ts index 351acccd..42ccac60 100644 --- a/src/lib/content/content-reconciliation-single-title.test.ts +++ b/src/lib/content/content-reconciliation-single-title.test.ts @@ -29,26 +29,19 @@ const BATCH_017_DOCS_URLS = [ "/docs/modules/linear-attention", "/docs/concepts/transformer-architecture", "/docs/glossary/feed-forward-network", - "/docs/glossary/batch-norm", - "/docs/glossary/group-norm", - "/docs/glossary/standard-ffn", "/docs/glossary/mixture-of-experts", - "/docs/glossary/relu", - "/docs/glossary/leaky-relu", - "/docs/glossary/silu", - "/docs/glossary/swiglu", "/docs/glossary/normalization", - "/docs/glossary/qk-norm", "/docs/glossary/layer-norm", "/docs/glossary/rmsnorm", "/docs/glossary/residual-connection", - "/docs/glossary/skip-connection", "/docs/concepts/positional-encodings", "/docs/glossary/rope", "/docs/glossary/alibi", "/docs/glossary/context-window", "/docs/concepts/context-extension", "/docs/concepts/why-long-context-is-hard", + "/docs/glossary/silu", + "/docs/glossary/swiglu", ] as const; const SPOT_CHECK_URLS = [ @@ -58,11 +51,6 @@ const SPOT_CHECK_URLS = [ "/docs/glossary/context-window", ] as const; -const BATCH_017_DOCS_URL_GROUPS = [ - BATCH_017_DOCS_URLS.slice(0, 12), - BATCH_017_DOCS_URLS.slice(12), -] as const; - function parseDocsUrl(url: string): { section: "concepts" | "glossary" | "modules"; slug: string; @@ -101,28 +89,6 @@ function extractArticleHtml(html: string, registryId: string): string { return extractModuleArticleHtml(html, registryId); } -async function expectSingleShellOwnedPrimaryTitle(url: string): Promise { - const { section, slug } = parseDocsUrl(url); - const loadedPage = await loadLocalDocsPage({ section, slug }); - const html = renderReconciledDocsShell(section, loadedPage); - const articleHtml = extractArticleHtml( - html, - loadedPage.frontmatter.registryId, - ); - - expect(articleHtml.length).toBeGreaterThan(0); - expect(countH1BlocksContaining(html, loadedPage.messages.title)).toBe(1); - expectGlossaryBodyOmitsTitleHeading(articleHtml, loadedPage.messages.title); - expectGlossaryBodyOmitsShellDescription( - articleHtml, - loadedPage.messages.description, - ); - - if (section === "glossary" || section === "concepts") { - expectGlossaryOmitsOpeningSummary(html); - } -} - describe("Phase 2/3 reconciliation single primary title (US-005)", () => { if (process.env[VERIFY_COVERAGE_SUBPROCESS_ENV] === "1") { test("skips shell title convergence during coverage subprocess rerun", () => {}); @@ -147,17 +113,38 @@ describe("Phase 2/3 reconciliation single primary title (US-005)", () => { } }); - for (const [index, urls] of BATCH_017_DOCS_URL_GROUPS.entries()) { - test( - `batch 017 title convergence group ${index + 1} renders exactly one shell-owned primary title`, - async () => { - for (const url of urls) { - await expectSingleShellOwnedPrimaryTitle(url); + test( + "every batch 017 page renders exactly one shell-owned primary title", + async () => { + for (const url of BATCH_017_DOCS_URLS) { + const { section, slug } = parseDocsUrl(url); + const loadedPage = await loadLocalDocsPage({ section, slug }); + const html = renderReconciledDocsShell(section, loadedPage); + const articleHtml = extractArticleHtml( + html, + loadedPage.frontmatter.registryId, + ); + + expect(articleHtml.length).toBeGreaterThan(0); + expect(countH1BlocksContaining(html, loadedPage.messages.title)).toBe( + 1, + ); + expectGlossaryBodyOmitsTitleHeading( + articleHtml, + loadedPage.messages.title, + ); + expectGlossaryBodyOmitsShellDescription( + articleHtml, + loadedPage.messages.description, + ); + + if (section === "glossary" || section === "concepts") { + expectGlossaryOmitsOpeningSummary(html); } - }, - { timeout: 15_000 }, - ); - } + } + }, + { timeout: 10_000 }, + ); test("spot-check pages keep glossary or module shell title patterns", async () => { for (const url of SPOT_CHECK_URLS) { diff --git a/src/lib/content/content-reconciliation-source-discovery.test.ts b/src/lib/content/content-reconciliation-source-discovery.test.ts new file mode 100644 index 00000000..c04d2581 --- /dev/null +++ b/src/lib/content/content-reconciliation-source-discovery.test.ts @@ -0,0 +1,127 @@ +import { describe, expect, test } from "bun:test"; +import { existsSync } from "node:fs"; +import { join } from "node:path"; +import { DOCS_ROOT } from "@/lib/content/content-paths"; +import { loadPublishedDocsPages } from "@/lib/content/pages"; +import { PUBLISHED_DOCS_REGISTRY_IDS } from "@/lib/content/published-docs-registry-ids"; +import { loadRegistry } from "@/lib/content/registry"; +import { source } from "@/lib/source"; + +/** Batch 017 pages reconciled in Phase 2/3 (see prd.md). */ +const BATCH_017_DOCS_URLS = [ + "/docs/glossary/transformer", + "/docs/glossary/diffusion-model", + "/docs/glossary/multimodal-model", + "/docs/glossary/world-model", + "/docs/modules/attention", + "/docs/modules/multi-head-attention", + "/docs/modules/multi-query-attention", + "/docs/modules/multi-head-latent-attention", + "/docs/modules/sparse-attention", + "/docs/modules/sliding-window-attention", + "/docs/modules/linear-attention", + "/docs/concepts/transformer-architecture", + "/docs/glossary/feed-forward-network", + "/docs/glossary/mixture-of-experts", + "/docs/glossary/normalization", + "/docs/glossary/layer-norm", + "/docs/glossary/rmsnorm", + "/docs/glossary/residual-connection", + "/docs/concepts/positional-encodings", + "/docs/glossary/rope", + "/docs/glossary/alibi", + "/docs/glossary/context-window", + "/docs/concepts/context-extension", + "/docs/concepts/why-long-context-is-hard", + "/docs/glossary/silu", + "/docs/glossary/swiglu", +] as const; + +function docsSlugFromUrl(url: string): string[] { + return url.replace("/docs/", "").split("/"); +} + +describe("Phase 2/3 reconciliation source discovery (US-002)", () => { + test("loadPublishedDocsPages includes every batch 017 glossary, concept, and module URL", async () => { + const pages = await loadPublishedDocsPages("en"); + const urls = new Set(pages.map((page) => page.url)); + + for (const url of BATCH_017_DOCS_URLS) { + expect(urls).toContain(url); + } + }); + + test("Fumadocs source resolves every batch 017 page slug", () => { + for (const url of BATCH_017_DOCS_URLS) { + const slug = docsSlugFromUrl(url); + const page = source.getPage(slug); + expect(page).toBeDefined(); + expect(page?.url).toBe(url); + } + }); + + test("Fumadocs generateParams includes every batch 017 slug path", () => { + const slugPaths = new Set( + source.generateParams().map((entry) => entry.slug.join("/")), + ); + + for (const url of BATCH_017_DOCS_URLS) { + const slugPath = url.replace("/docs/", ""); + expect(slugPaths.has(slugPath)).toBe(true); + } + }); + + test("every published docs registry id has a routable MDX bundle on disk", async () => { + const pages = await loadPublishedDocsPages("en"); + const pageByRegistryId = new Map( + pages.map((page) => [page.frontmatter.registryId, page]), + ); + + for (const registryId of PUBLISHED_DOCS_REGISTRY_IDS) { + const page = pageByRegistryId.get(registryId); + expect(page).toBeDefined(); + expect(existsSync(join(page?.pageDir ?? "", "page.mdx"))).toBe(true); + } + }); + + test("published module and concept registry records with docs pages resolve through source", async () => { + const indexes = await loadRegistry(); + const pages = await loadPublishedDocsPages("en"); + const pageByRegistryId = new Map( + pages.map((page) => [page.frontmatter.registryId, page]), + ); + + for (const record of indexes.byId.values()) { + if (record.status !== "published") { + continue; + } + if (record.kind !== "module" && record.kind !== "concept") { + continue; + } + if (!PUBLISHED_DOCS_REGISTRY_IDS.has(record.id)) { + continue; + } + + const page = pageByRegistryId.get(record.id); + expect(page).toBeDefined(); + + const slug = page?.docsSlug.split("/") ?? []; + expect(source.getPage(slug)).toBeDefined(); + expect(page?.url.startsWith("/docs/")).toBe(true); + expect(page?.pageDir.startsWith(DOCS_ROOT)).toBe(true); + } + }); + + test("every published docs page resolves through Fumadocs source and generateParams", async () => { + const pages = await loadPublishedDocsPages("en"); + const slugPaths = new Set( + source.generateParams().map((entry) => entry.slug.join("/")), + ); + + for (const page of pages) { + const slug = page.docsSlug.split("/"); + expect(source.getPage(slug)).toBeDefined(); + expect(slugPaths.has(page.docsSlug)).toBe(true); + } + }); +}); diff --git a/src/lib/content/feed-forward-network-glossary.test.ts b/src/lib/content/feed-forward-network-glossary.test.ts index e611dd34..2122c22a 100644 --- a/src/lib/content/feed-forward-network-glossary.test.ts +++ b/src/lib/content/feed-forward-network-glossary.test.ts @@ -36,16 +36,20 @@ describe("Phase 3 feed-forward network glossary page (US-002)", () => { expect(record?.tags).toEqual(["foundations"]); expect(record?.relatedIds).toEqual([ "concept.transformer-architecture", + "concept.activation", "concept.standard-ffn", + "concept.relu", + "concept.leaky-relu", + "concept.silu", + "concept.swiglu", "concept.mixture-of-experts", - "concept.activation", ]); expect( PUBLISHED_DOCS_REGISTRY_IDS.has("concept.feed-forward-network"), ).toBe(true); }); - test("curated related links transformer architecture, standard FFN, mixture of experts, and activation", () => { + test("curated related links connect the broad FFN overview to architecture and nearby family variants", () => { const source = getConceptById("concept.feed-forward-network"); if (!source) { throw new Error("expected concept.feed-forward-network in registry"); @@ -69,17 +73,35 @@ describe("Phase 3 feed-forward network glossary page (US-002)", () => { expect(standardFfn?.href).toBe("/docs/glossary/standard-ffn"); expect(standardFfn?.isPlanned).toBe(false); - const moe = items.find( - (item) => item.registryId === "concept.mixture-of-experts", - ); - expect(moe?.href).toBe("/docs/glossary/mixture-of-experts"); - expect(moe?.isPlanned).toBe(false); - const activation = items.find( (item) => item.registryId === "concept.activation", ); expect(activation?.href).toBe("/docs/glossary/activation"); expect(activation?.isPlanned).toBe(false); + + const relu = items.find((item) => item.registryId === "concept.relu"); + expect(relu?.href).toBe("/docs/glossary/relu"); + expect(relu?.isPlanned).toBe(false); + + const leakyRelu = items.find( + (item) => item.registryId === "concept.leaky-relu", + ); + expect(leakyRelu?.href).toBe("/docs/glossary/leaky-relu"); + expect(leakyRelu?.isPlanned).toBe(false); + + const silu = items.find((item) => item.registryId === "concept.silu"); + expect(silu?.href).toBe("/docs/glossary/silu"); + expect(silu?.isPlanned).toBe(false); + + const swiglu = items.find((item) => item.registryId === "concept.swiglu"); + expect(swiglu?.href).toBe("/docs/glossary/swiglu"); + expect(swiglu?.isPlanned).toBe(false); + + const moe = items.find( + (item) => item.registryId === "concept.mixture-of-experts", + ); + expect(moe?.href).toBe("/docs/glossary/mixture-of-experts"); + expect(moe?.isPlanned).toBe(false); }); test("messages describe per-position FFN role after attention", () => { @@ -100,7 +122,7 @@ describe("Phase 3 feed-forward network glossary page (US-002)", () => { ); }); - test("page renders glossary sections, tag pills, and FFN-family related links", async () => { + test("page renders glossary sections, tag pills, and FFN family related links", async () => { const page = await loadGlossaryPage("feed-forward-network"); expect(page.frontmatter.kind).toBe("glossary"); @@ -123,9 +145,13 @@ describe("Phase 3 feed-forward network glossary page (US-002)", () => { expect(html).toContain("Why It Matters"); expectHtmlToContainProse(html, "two-layer perceptron"); expect(html).toContain('href="/docs/concepts/transformer-architecture"'); + expect(html).toContain('href="/docs/glossary/activation"'); expect(html).toContain('href="/docs/glossary/standard-ffn"'); + expect(html).toContain('href="/docs/glossary/relu"'); + expect(html).toContain('href="/docs/glossary/leaky-relu"'); + expect(html).toContain('href="/docs/glossary/silu"'); + expect(html).toContain('href="/docs/glossary/swiglu"'); expect(html).toContain('href="/docs/glossary/mixture-of-experts"'); - expect(html).toContain('href="/docs/glossary/activation"'); expect(html).toContain('href="/tags/foundations"'); expect(html).toContain('data-testid="tag-pill-list"'); expect(html).toContain('data-testid="curated-related-docs"'); diff --git a/src/lib/content/ffn-variant-family-navigation.test.ts b/src/lib/content/ffn-variant-family-navigation.test.ts new file mode 100644 index 00000000..b5270a90 --- /dev/null +++ b/src/lib/content/ffn-variant-family-navigation.test.ts @@ -0,0 +1,90 @@ +import { describe, expect, test } from "bun:test"; +import { PUBLISHED_DOCS_REGISTRY_IDS } from "@/lib/content/published-docs-registry-ids"; +import { + getConceptById, + listRelatedRegistryRecords, +} from "@/lib/content/registry-runtime"; +import { deriveCuratedRelatedItems } from "@/lib/content/related-docs"; +import { pageBaseUrl } from "@/lib/search/collapse-search-results-to-page-hits"; +import { docsSearchApi } from "@/lib/search/search-server"; + +const FFN_FAMILY_IDS = [ + "concept.feed-forward-network", + "concept.activation", + "concept.standard-ffn", + "concept.relu", + "concept.leaky-relu", + "concept.silu", + "concept.swiglu", + "concept.mixture-of-experts", +] as const; + +const FFN_FAMILY_ID_SET = new Set(FFN_FAMILY_IDS); + +const SEARCH_EXPECTATIONS = [ + { query: "standard FFN", url: "/docs/glossary/standard-ffn" }, + { query: "ReLU", url: "/docs/glossary/relu" }, + { query: "LeakyReLU", url: "/docs/glossary/leaky-relu" }, + { query: "SiLU", url: "/docs/glossary/silu" }, + { query: "SwiGLU", url: "/docs/glossary/swiglu" }, +] as const; + +describe("FFN variant family navigation and search (US-004)", () => { + test("every FFN family page exposes at least one published in-family related doc link", () => { + const candidates = listRelatedRegistryRecords(); + + for (const registryId of FFN_FAMILY_IDS) { + const source = getConceptById(registryId); + expect(source).toBeDefined(); + if (!source) { + continue; + } + + const familyItems = deriveCuratedRelatedItems( + source, + candidates, + PUBLISHED_DOCS_REGISTRY_IDS, + ).filter((item) => FFN_FAMILY_ID_SET.has(item.registryId)); + + expect(familyItems.length).toBeGreaterThan(0); + + for (const item of familyItems) { + expect(item.isPlanned).toBe(false); + expect(item.href).toMatch(/^\/docs\//); + } + } + }); + + test("activation and feed-forward-network act as broad entry pages that can reach the whole FFN family cluster", () => { + const queue = ["concept.activation", "concept.feed-forward-network"]; + const visited = new Set(); + + while (queue.length > 0) { + const registryId = queue.shift(); + if (!registryId || visited.has(registryId)) { + continue; + } + + visited.add(registryId); + const source = getConceptById(registryId); + if (!source) { + continue; + } + + for (const relatedId of source.relatedIds) { + if (FFN_FAMILY_ID_SET.has(relatedId) && !visited.has(relatedId)) { + queue.push(relatedId); + } + } + } + + expect(visited).toEqual(new Set(FFN_FAMILY_IDS)); + }); + + test("representative FFN family searches return the canonical page first", async () => { + for (const { query, url } of SEARCH_EXPECTATIONS) { + const results = await docsSearchApi.search(query); + expect(pageBaseUrl(results[0]?.url ?? "")).toBe(url); + } + }); +}); diff --git a/src/lib/content/mixture-of-experts-glossary.test.ts b/src/lib/content/mixture-of-experts-glossary.test.ts index d5f7029c..d738861a 100644 --- a/src/lib/content/mixture-of-experts-glossary.test.ts +++ b/src/lib/content/mixture-of-experts-glossary.test.ts @@ -37,6 +37,8 @@ describe("Phase 3 mixture of experts glossary page (US-003)", () => { expect(record?.relatedIds).toEqual([ "concept.feed-forward-network", "concept.standard-ffn", + "concept.swiglu", + "concept.activation", "concept.transformer-architecture", ]); expect(PUBLISHED_DOCS_REGISTRY_IDS.has("concept.mixture-of-experts")).toBe( @@ -44,7 +46,7 @@ describe("Phase 3 mixture of experts glossary page (US-003)", () => { ); }); - test("curated related links feed-forward network, standard FFN, and transformer architecture", () => { + test("curated related links contrast MoE with the dense and gated FFN family", () => { const source = getConceptById("concept.mixture-of-experts"); if (!source) { throw new Error("expected concept.mixture-of-experts in registry"); @@ -68,6 +70,16 @@ describe("Phase 3 mixture of experts glossary page (US-003)", () => { expect(standardFfn?.href).toBe("/docs/glossary/standard-ffn"); expect(standardFfn?.isPlanned).toBe(false); + const swiglu = items.find((item) => item.registryId === "concept.swiglu"); + expect(swiglu?.href).toBe("/docs/glossary/swiglu"); + expect(swiglu?.isPlanned).toBe(false); + + const activation = items.find( + (item) => item.registryId === "concept.activation", + ); + expect(activation?.href).toBe("/docs/glossary/activation"); + expect(activation?.isPlanned).toBe(false); + const architecture = items.find( (item) => item.registryId === "concept.transformer-architecture", ); @@ -119,6 +131,8 @@ describe("Phase 3 mixture of experts glossary page (US-003)", () => { expectHtmlToContainProse(html, "gating network"); expect(html).toContain('href="/docs/glossary/feed-forward-network"'); expect(html).toContain('href="/docs/glossary/standard-ffn"'); + expect(html).toContain('href="/docs/glossary/swiglu"'); + expect(html).toContain('href="/docs/glossary/activation"'); expect(html).toContain('href="/docs/concepts/transformer-architecture"'); expect(html).toContain('href="/tags/foundations"'); expect(html).toContain('data-testid="tag-pill-list"'); diff --git a/src/lib/content/phase-2-3-reconciliation-convergence.ts b/src/lib/content/phase-2-3-reconciliation-convergence.ts index 266414a8..247e9348 100644 --- a/src/lib/content/phase-2-3-reconciliation-convergence.ts +++ b/src/lib/content/phase-2-3-reconciliation-convergence.ts @@ -72,6 +72,8 @@ export const BATCH_017_DOCS_URLS = [ "/docs/glossary/context-window", "/docs/concepts/context-extension", "/docs/concepts/why-long-context-is-hard", + "/docs/glossary/silu", + "/docs/glossary/swiglu", ] as const; const MODEL_FAMILY_REGISTRY_IDS = [ @@ -199,6 +201,16 @@ const REPRESENTATIVE_SEARCH_QUERY_EXPECTATIONS = [ firstUrl: "/docs/glossary/context-window", firstKind: "glossary" as const, }, + { + query: "SiLU", + firstUrl: "/docs/glossary/silu", + firstKind: "glossary" as const, + }, + { + query: "SwiGLU", + firstUrl: "/docs/glossary/swiglu", + firstKind: "glossary" as const, + }, ] as const; function docsSlugFromUrl(url: string): string[] { diff --git a/src/lib/content/published-docs-registry-ids.ts b/src/lib/content/published-docs-registry-ids.ts index fa01a4d1..31006866 100644 --- a/src/lib/content/published-docs-registry-ids.ts +++ b/src/lib/content/published-docs-registry-ids.ts @@ -83,6 +83,10 @@ export const PUBLISHED_DOCS_REGISTRY_IDS = new Set([ "concept.transformer-architecture", "concept.feed-forward-network", "concept.standard-ffn", + "concept.relu", + "concept.leaky-relu", + "concept.silu", + "concept.swiglu", "concept.mixture-of-experts", "concept.relu", "concept.leaky-relu", diff --git a/src/lib/content/relu-leaky-relu-glossary.test.ts b/src/lib/content/relu-leaky-relu-glossary.test.ts new file mode 100644 index 00000000..6af1e7c4 --- /dev/null +++ b/src/lib/content/relu-leaky-relu-glossary.test.ts @@ -0,0 +1,228 @@ +import { describe, expect, test } from "bun:test"; +import { readFileSync } from "node:fs"; +import { join } from "node:path"; +import { createElement } from "react"; +import { renderToStaticMarkup } from "react-dom/server"; +import { ModulePageProviders } from "@/features/docs/components/ModulePageProviders"; +import { + LEAKY_RELU_GLOSSARY_PAGE_DIR, + RELU_GLOSSARY_PAGE_DIR, +} from "@/lib/content/content-paths"; +import { loadPublishedGlossaryEntries } from "@/lib/content/glossary"; +import { loadGlossaryPage } from "@/lib/content/glossary-page"; +import { + expectGlossaryPresentationConvergence, + expectHtmlToContainProse, +} from "@/lib/content/glossary-test-helpers"; +import { loadPublishedDocsPages } from "@/lib/content/pages"; +import { PUBLISHED_DOCS_REGISTRY_IDS } from "@/lib/content/published-docs-registry-ids"; +import { loadRegistry } from "@/lib/content/registry"; +import { + getConceptById, + listRelatedRegistryRecords, +} from "@/lib/content/registry-runtime"; +import { deriveCuratedRelatedItems } from "@/lib/content/related-docs"; +import { pageMessagesSchema } from "@/lib/content/schemas"; +import { buildSearchDocuments } from "@/lib/search/build-documents"; +import { pageBaseUrl } from "@/lib/search/collapse-search-results-to-page-hits"; +import { docsSearchApi } from "@/lib/search/search-server"; + +const RELU_GLOSSARY_URL = "/docs/glossary/relu"; +const LEAKY_RELU_GLOSSARY_URL = "/docs/glossary/leaky-relu"; + +const pageFixtures = [ + { + registryId: "concept.relu", + title: "ReLU", + url: RELU_GLOSSARY_URL, + pageDir: RELU_GLOSSARY_PAGE_DIR, + aliases: ["ReLU", "rectified linear unit", "relu activation"], + relatedIds: [ + "concept.activation", + "concept.feed-forward-network", + "concept.standard-ffn", + "concept.leaky-relu", + ], + prose: "keeps the positive ones", + comparisonTerm: "leakyrelu", + }, + { + registryId: "concept.leaky-relu", + title: "LeakyReLU", + url: LEAKY_RELU_GLOSSARY_URL, + pageDir: LEAKY_RELU_GLOSSARY_PAGE_DIR, + aliases: ["LeakyReLU", "leaky ReLU", "leaky rectified linear unit"], + relatedIds: [ + "concept.activation", + "concept.feed-forward-network", + "concept.standard-ffn", + "concept.relu", + ], + prose: "negative signal", + comparisonTerm: "relu", + }, +] as const; + +describe("FFN pages from phase 3 ReLU and LeakyReLU glossary pages (US-002)", () => { + for (const fixture of pageFixtures) { + test(`${fixture.title} registry record is published with aliases and curated related ids`, () => { + const { registryId, aliases, relatedIds } = fixture; + const record = getConceptById(registryId); + expect(record?.status).toBe("published"); + expect(record?.aliases).toEqual([...aliases]); + expect(record?.tags).toEqual(["foundations"]); + expect(record?.relatedIds).toEqual([...relatedIds]); + expect(PUBLISHED_DOCS_REGISTRY_IDS.has(registryId)).toBe(true); + }); + + test(`${fixture.title} curated related links activation, feed-forward network, standard FFN, and nearby variant`, () => { + const { registryId, relatedIds } = fixture; + const source = getConceptById(registryId); + if (!source) { + throw new Error(`expected ${registryId} in registry`); + } + + const items = deriveCuratedRelatedItems( + source, + listRelatedRegistryRecords(), + PUBLISHED_DOCS_REGISTRY_IDS, + ); + + for (const relatedId of relatedIds) { + const item = items.find( + (candidate) => candidate.registryId === relatedId, + ); + expect(item?.isPlanned).toBe(false); + } + + expect( + items.find((item) => item.registryId === "concept.activation")?.href, + ).toBe("/docs/glossary/activation"); + expect( + items.find((item) => item.registryId === "concept.feed-forward-network") + ?.href, + ).toBe("/docs/glossary/feed-forward-network"); + expect( + items.find((item) => item.registryId === "concept.standard-ffn")?.href, + ).toBe("/docs/glossary/standard-ffn"); + }); + } + + test("messages explain ReLU zeroing and LeakyReLU's small negative slope", () => { + const reluMessages = pageMessagesSchema.parse( + JSON.parse( + readFileSync(join(RELU_GLOSSARY_PAGE_DIR, "messages/en.json"), "utf8"), + ), + ); + const leakyReluMessages = pageMessagesSchema.parse( + JSON.parse( + readFileSync( + join(LEAKY_RELU_GLOSSARY_PAGE_DIR, "messages/en.json"), + "utf8", + ), + ), + ); + + expect(reluMessages.sections?.whatItIs.body?.toLowerCase()).toContain( + "zero", + ); + expect(reluMessages.sections?.whatItIs.body?.toLowerCase()).toContain( + "ffn", + ); + expect( + reluMessages.sections?.commonConfusions.body?.toLowerCase(), + ).toContain("leakyrelu"); + + expect(leakyReluMessages.sections?.whatItIs.body?.toLowerCase()).toContain( + "slope", + ); + expect( + leakyReluMessages.sections?.whyItMatters.body?.toLowerCase(), + ).toContain("negative"); + expect( + leakyReluMessages.sections?.commonConfusions.body?.toLowerCase(), + ).toContain("swiglu"); + }); + + for (const fixture of pageFixtures) { + test(`${fixture.title} page renders required sections, tag pill, and FFN-family related links`, async () => { + const { registryId, title, url, prose, comparisonTerm } = fixture; + const slug = url.split("/").at(-1); + if (!slug) { + throw new Error(`missing slug for ${url}`); + } + + const page = await loadGlossaryPage(slug); + + expect(page.frontmatter.kind).toBe("glossary"); + expect(page.frontmatter.status).toBe("published"); + expect(page.frontmatter.registryId).toBe(registryId); + + const html = renderToStaticMarkup( + createElement(ModulePageProviders, { + messages: page.messages, + assets: page.assets, + // biome-ignore lint/correctness/noChildrenProp: third createElement arg conflicts with strict props typing + children: page.content, + }), + ); + + expectGlossaryPresentationConvergence(html, { + title, + }); + expect(html).toContain("What It Is"); + expect(html).toContain("Common Confusions"); + expectHtmlToContainProse(html, prose); + expect(html).toContain('href="/docs/glossary/activation"'); + expect(html).toContain('href="/docs/glossary/feed-forward-network"'); + expect(html).toContain('href="/docs/glossary/standard-ffn"'); + expect(html).toContain('href="/tags/foundations"'); + expect(html).toContain('data-testid="tag-pill-list"'); + expect(html).toContain('data-testid="curated-related-docs"'); + expect(html.toLowerCase()).toContain(comparisonTerm); + expect(html).not.toContain("Phase"); + expect(html).not.toContain("Reader Shortcut"); + }); + } + + test("glossary browse index includes ReLU and LeakyReLU with summaries", async () => { + const entries = await loadPublishedGlossaryEntries("en"); + const entryByUrl = new Map(entries.map((entry) => [entry.url, entry])); + + const relu = entryByUrl.get(RELU_GLOSSARY_URL); + expect(relu?.title).toBe("ReLU"); + expect(relu?.summary.length).toBeGreaterThan(0); + + const leakyRelu = entryByUrl.get(LEAKY_RELU_GLOSSARY_URL); + expect(leakyRelu?.title).toBe("LeakyReLU"); + expect(leakyRelu?.summary.length).toBeGreaterThan(0); + }); + + test("search documents and canonical queries resolve to the ReLU-family pages", async () => { + const registry = await loadRegistry(); + const pages = await loadPublishedDocsPages("en"); + const documents = buildSearchDocuments(pages, registry); + + const relu = documents.find((entry) => entry.url === RELU_GLOSSARY_URL); + expect(relu?.kind).toBe("glossary"); + expect(relu?.aliases).toEqual( + expect.arrayContaining(["ReLU", "rectified linear unit"]), + ); + + const leakyRelu = documents.find( + (entry) => entry.url === LEAKY_RELU_GLOSSARY_URL, + ); + expect(leakyRelu?.kind).toBe("glossary"); + expect(leakyRelu?.aliases).toEqual( + expect.arrayContaining(["LeakyReLU", "leaky ReLU"]), + ); + + const reluResults = await docsSearchApi.search("ReLU"); + expect(pageBaseUrl(reluResults[0]?.url ?? "")).toBe(RELU_GLOSSARY_URL); + + const leakyReluResults = await docsSearchApi.search("LeakyReLU"); + expect(pageBaseUrl(leakyReluResults[0]?.url ?? "")).toBe( + LEAKY_RELU_GLOSSARY_URL, + ); + }); +}); diff --git a/src/lib/content/silu-swiglu-glossary.test.ts b/src/lib/content/silu-swiglu-glossary.test.ts new file mode 100644 index 00000000..6e01d98e --- /dev/null +++ b/src/lib/content/silu-swiglu-glossary.test.ts @@ -0,0 +1,228 @@ +import { describe, expect, test } from "bun:test"; +import { readFileSync } from "node:fs"; +import { join } from "node:path"; +import { createElement } from "react"; +import { renderToStaticMarkup } from "react-dom/server"; +import { ModulePageProviders } from "@/features/docs/components/ModulePageProviders"; +import { + SILU_GLOSSARY_PAGE_DIR, + SWIGLU_GLOSSARY_PAGE_DIR, +} from "@/lib/content/content-paths"; +import { loadPublishedGlossaryEntries } from "@/lib/content/glossary"; +import { loadGlossaryPage } from "@/lib/content/glossary-page"; +import { + expectGlossaryPresentationConvergence, + expectHtmlToContainProse, +} from "@/lib/content/glossary-test-helpers"; +import { loadPublishedDocsPages } from "@/lib/content/pages"; +import { PUBLISHED_DOCS_REGISTRY_IDS } from "@/lib/content/published-docs-registry-ids"; +import { loadRegistry } from "@/lib/content/registry"; +import { + getConceptById, + listRelatedRegistryRecords, +} from "@/lib/content/registry-runtime"; +import { deriveCuratedRelatedItems } from "@/lib/content/related-docs"; +import { pageMessagesSchema } from "@/lib/content/schemas"; +import { buildSearchDocuments } from "@/lib/search/build-documents"; +import { pageBaseUrl } from "@/lib/search/collapse-search-results-to-page-hits"; +import { docsSearchApi } from "@/lib/search/search-server"; + +const SILU_GLOSSARY_URL = "/docs/glossary/silu"; +const SWIGLU_GLOSSARY_URL = "/docs/glossary/swiglu"; + +const pageFixtures = [ + { + registryId: "concept.silu", + title: "SiLU", + url: SILU_GLOSSARY_URL, + pageDir: SILU_GLOSSARY_PAGE_DIR, + aliases: ["SiLU", "sigmoid linear unit", "swish"], + relatedIds: [ + "concept.activation", + "concept.feed-forward-network", + "concept.standard-ffn", + "concept.swiglu", + ], + prose: "soft gate", + comparisonTerm: "swiglu", + }, + { + registryId: "concept.swiglu", + title: "SwiGLU", + url: SWIGLU_GLOSSARY_URL, + pageDir: SWIGLU_GLOSSARY_PAGE_DIR, + aliases: ["SwiGLU", "Swi GLU", "Swish GLU", "swish gated linear unit"], + relatedIds: [ + "concept.activation", + "concept.feed-forward-network", + "concept.standard-ffn", + "concept.silu", + "concept.mixture-of-experts", + ], + prose: "two hidden branches", + comparisonTerm: "mixture-of-experts", + }, +] as const; + +describe("FFN pages from phase 3 SiLU and SwiGLU glossary pages (US-003)", () => { + for (const fixture of pageFixtures) { + test(`${fixture.title} registry record is published with aliases and curated related ids`, () => { + const { registryId, aliases, relatedIds } = fixture; + const record = getConceptById(registryId); + expect(record?.status).toBe("published"); + expect(record?.aliases).toEqual([...aliases]); + expect(record?.tags).toEqual(["foundations"]); + expect(record?.relatedIds).toEqual([...relatedIds]); + expect(PUBLISHED_DOCS_REGISTRY_IDS.has(registryId)).toBe(true); + }); + + test(`${fixture.title} curated related links connect FFN foundations and nearby variants`, () => { + const { registryId, relatedIds } = fixture; + const source = getConceptById(registryId); + if (!source) { + throw new Error(`expected ${registryId} in registry`); + } + + const items = deriveCuratedRelatedItems( + source, + listRelatedRegistryRecords(), + PUBLISHED_DOCS_REGISTRY_IDS, + ); + + for (const relatedId of relatedIds) { + const item = items.find( + (candidate) => candidate.registryId === relatedId, + ); + expect(item?.isPlanned).toBe(false); + } + + expect( + items.find((item) => item.registryId === "concept.activation")?.href, + ).toBe("/docs/glossary/activation"); + expect( + items.find((item) => item.registryId === "concept.feed-forward-network") + ?.href, + ).toBe("/docs/glossary/feed-forward-network"); + expect( + items.find((item) => item.registryId === "concept.standard-ffn")?.href, + ).toBe("/docs/glossary/standard-ffn"); + }); + } + + test("messages explain SiLU's smooth gate and SwiGLU's gated FFN structure", () => { + const siluMessages = pageMessagesSchema.parse( + JSON.parse( + readFileSync(join(SILU_GLOSSARY_PAGE_DIR, "messages/en.json"), "utf8"), + ), + ); + const swigluMessages = pageMessagesSchema.parse( + JSON.parse( + readFileSync( + join(SWIGLU_GLOSSARY_PAGE_DIR, "messages/en.json"), + "utf8", + ), + ), + ); + + expect(siluMessages.sections?.whatItIs.body?.toLowerCase()).toContain( + "smooth", + ); + expect(siluMessages.sections?.whatItIs.body?.toLowerCase()).toContain( + "ffn", + ); + expect( + siluMessages.sections?.commonConfusions.body?.toLowerCase(), + ).toContain("swiglu"); + + expect(swigluMessages.sections?.whatItIs.body?.toLowerCase()).toContain( + "two hidden branches", + ); + expect(swigluMessages.sections?.whyItMatters.body?.toLowerCase()).toContain( + "standard ffn", + ); + expect( + swigluMessages.sections?.commonConfusions.body?.toLowerCase(), + ).toContain("mixture-of-experts"); + expect( + swigluMessages.sections?.commonConfusions.body?.toLowerCase(), + ).toContain("silu"); + }); + + for (const fixture of pageFixtures) { + test(`${fixture.title} page renders required sections, tag pill, and FFN-family related links`, async () => { + const { registryId, title, url, prose, comparisonTerm } = fixture; + const slug = url.split("/").at(-1); + if (!slug) { + throw new Error(`missing slug for ${url}`); + } + + const page = await loadGlossaryPage(slug); + + expect(page.frontmatter.kind).toBe("glossary"); + expect(page.frontmatter.status).toBe("published"); + expect(page.frontmatter.registryId).toBe(registryId); + + const html = renderToStaticMarkup( + createElement(ModulePageProviders, { + messages: page.messages, + assets: page.assets, + // biome-ignore lint/correctness/noChildrenProp: third createElement arg conflicts with strict props typing + children: page.content, + }), + ); + + expectGlossaryPresentationConvergence(html, { + title, + }); + expect(html).toContain("What It Is"); + expect(html).toContain("Common Confusions"); + expectHtmlToContainProse(html, prose); + expect(html).toContain('href="/docs/glossary/activation"'); + expect(html).toContain('href="/docs/glossary/feed-forward-network"'); + expect(html).toContain('href="/docs/glossary/standard-ffn"'); + expect(html).toContain('href="/tags/foundations"'); + expect(html).toContain('data-testid="tag-pill-list"'); + expect(html).toContain('data-testid="curated-related-docs"'); + expect(html.toLowerCase()).toContain(comparisonTerm); + expect(html).not.toContain("Phase"); + expect(html).not.toContain("Reader Shortcut"); + }); + } + + test("glossary browse index includes SiLU and SwiGLU with summaries", async () => { + const entries = await loadPublishedGlossaryEntries("en"); + const entryByUrl = new Map(entries.map((entry) => [entry.url, entry])); + + const silu = entryByUrl.get(SILU_GLOSSARY_URL); + expect(silu?.title).toBe("SiLU"); + expect(silu?.summary.length).toBeGreaterThan(0); + + const swiglu = entryByUrl.get(SWIGLU_GLOSSARY_URL); + expect(swiglu?.title).toBe("SwiGLU"); + expect(swiglu?.summary.length).toBeGreaterThan(0); + }); + + test("search documents and canonical queries resolve to the SiLU-family pages", async () => { + const registry = await loadRegistry(); + const pages = await loadPublishedDocsPages("en"); + const documents = buildSearchDocuments(pages, registry); + + const silu = documents.find((entry) => entry.url === SILU_GLOSSARY_URL); + expect(silu?.kind).toBe("glossary"); + expect(silu?.aliases).toEqual( + expect.arrayContaining(["SiLU", "sigmoid linear unit", "swish"]), + ); + + const swiglu = documents.find((entry) => entry.url === SWIGLU_GLOSSARY_URL); + expect(swiglu?.kind).toBe("glossary"); + expect(swiglu?.aliases).toEqual( + expect.arrayContaining(["SwiGLU", "Swi GLU", "Swish GLU"]), + ); + + const siluResults = await docsSearchApi.search("SiLU"); + expect(pageBaseUrl(siluResults[0]?.url ?? "")).toBe(SILU_GLOSSARY_URL); + + const swigluResults = await docsSearchApi.search("SwiGLU"); + expect(pageBaseUrl(swigluResults[0]?.url ?? "")).toBe(SWIGLU_GLOSSARY_URL); + }); +}); diff --git a/src/lib/content/standard-ffn-glossary.test.ts b/src/lib/content/standard-ffn-glossary.test.ts index 7103042f..ef4b6db7 100644 --- a/src/lib/content/standard-ffn-glossary.test.ts +++ b/src/lib/content/standard-ffn-glossary.test.ts @@ -24,14 +24,14 @@ import { buildSearchDocuments } from "@/lib/search/build-documents"; const pageDir = STANDARD_FFN_GLOSSARY_PAGE_DIR; const messagesPath = join(pageDir, "messages/en.json"); -describe("Phase 3 standard FFN glossary page (US-001)", () => { - test("registry record is published with aliases, tags, and curated related ids", () => { +describe("FFN pages from phase 3 standard FFN glossary page (US-001)", () => { + test("registry record is published with dense FFN aliases and curated related ids", () => { const record = getConceptById("concept.standard-ffn"); expect(record?.status).toBe("published"); expect(record?.aliases).toEqual([ + "standard FFN", "dense FFN", "dense MLP block", - "standard feed-forward network", ]); expect(record?.tags).toEqual(["foundations"]); expect(record?.relatedIds).toEqual([ @@ -42,7 +42,7 @@ describe("Phase 3 standard FFN glossary page (US-001)", () => { expect(PUBLISHED_DOCS_REGISTRY_IDS.has("concept.standard-ffn")).toBe(true); }); - test("curated related links feed-forward network, mixture of experts, and activation", () => { + test("curated related links broad FFN, mixture of experts, and activation", () => { const source = getConceptById("concept.standard-ffn"); if (!source) { throw new Error("expected concept.standard-ffn in registry"); @@ -73,7 +73,7 @@ describe("Phase 3 standard FFN glossary page (US-001)", () => { expect(activation?.isPlanned).toBe(false); }); - test("messages explain dense expand-activate-project baseline and contrast nearby variants", () => { + test("messages explain the dense expand activate project path after attention", () => { const messages = pageMessagesSchema.parse( JSON.parse(readFileSync(messagesPath, "utf8")), ); @@ -83,16 +83,18 @@ describe("Phase 3 standard FFN glossary page (US-001)", () => { expect(messages.sections?.whatItIs.body?.toLowerCase()).toContain( "attention", ); - expect(messages.sections?.whatItIs.body?.toLowerCase()).toContain("dense"); - expect(messages.sections?.whyItMatters.body?.toLowerCase()).toContain( - "expand", + expect(messages.sections?.whatItIs.body?.toLowerCase()).toContain( + "projection", + ); + expect(messages.sections?.commonConfusions.body?.toLowerCase()).toContain( + "mixture-of-experts", ); - expect(messages.sections?.whyItMatters.body?.toLowerCase()).toContain( - "mixture of experts", + expect(messages.sections?.commonConfusions.body?.toLowerCase()).toContain( + "relu", ); }); - test("page renders glossary sections, tags, and nearby FFN-family links", async () => { + test("page renders baseline FFN explanation and curated related links", async () => { const page = await loadGlossaryPage("standard-ffn"); expect(page.frontmatter.kind).toBe("glossary"); @@ -113,7 +115,7 @@ describe("Phase 3 standard FFN glossary page (US-001)", () => { }); expect(html).toContain("What It Is"); expect(html).toContain("Common Confusions"); - expectHtmlToContainProse(html, "expand, activate, project"); + expectHtmlToContainProse(html, "plain dense feed-forward block"); expect(html).toContain('href="/docs/glossary/feed-forward-network"'); expect(html).toContain('href="/docs/glossary/mixture-of-experts"'); expect(html).toContain('href="/docs/glossary/activation"'); @@ -124,7 +126,7 @@ describe("Phase 3 standard FFN glossary page (US-001)", () => { expect(html).not.toContain("Reader Shortcut"); }); - test("search index records standard FFN with glossary kind", async () => { + test("search index records Standard FFN with glossary kind and exact aliases", async () => { const registry = await loadRegistry(); const pages = await loadPublishedDocsPages("en"); const documents = buildSearchDocuments(pages, registry); @@ -135,7 +137,7 @@ describe("Phase 3 standard FFN glossary page (US-001)", () => { expect(document?.kind).toBe("glossary"); expect(document?.facets.kind).toBe("glossary"); expect(document?.aliases).toEqual( - expect.arrayContaining(["dense FFN", "standard feed-forward network"]), + expect.arrayContaining(["standard FFN", "dense FFN"]), ); }); }); diff --git a/src/lib/content/token-to-probability-chain-parameter-activation-graph.test.tsx b/src/lib/content/token-to-probability-chain-parameter-activation-graph.test.tsx index 7b171011..91a02d39 100644 --- a/src/lib/content/token-to-probability-chain-parameter-activation-graph.test.tsx +++ b/src/lib/content/token-to-probability-chain-parameter-activation-graph.test.tsx @@ -29,6 +29,12 @@ describe("Phase 2 parameter, activation, and computational graph glossary pages expect(parameter?.prerequisiteIds).toContain("concept.softmax"); expect(parameter?.relatedIds).toContain("concept.activation"); expect(activation?.prerequisiteIds).toContain("concept.parameter"); + expect(activation?.relatedIds).toContain("concept.feed-forward-network"); + expect(activation?.relatedIds).toContain("concept.standard-ffn"); + expect(activation?.relatedIds).toContain("concept.relu"); + expect(activation?.relatedIds).toContain("concept.leaky-relu"); + expect(activation?.relatedIds).toContain("concept.silu"); + expect(activation?.relatedIds).toContain("concept.swiglu"); expect(activation?.relatedIds).toContain("concept.computational-graph"); expect(graph?.prerequisiteIds).toContain("concept.activation"); expect(graph?.relatedIds).toContain("concept.gradient"); @@ -55,7 +61,7 @@ describe("Phase 2 parameter, activation, and computational graph glossary pages expect(html).toContain(DERIVED_RELATED_DOC_GROUP_LABELS[CURATED_RELATED]); }); - test("activation page distinguishes activations from softmax and links to computational graph", async () => { + test("activation page distinguishes activations from softmax and links into the FFN variant family", async () => { const page = await loadGlossaryPage("activation"); expect(page.frontmatter.status).toBe("published"); @@ -71,6 +77,15 @@ describe("Phase 2 parameter, activation, and computational graph glossary pages expect(html).toContain("Activation"); expect(html).toContain("What It Is"); expect(html).toContain("not the same as softmax"); + expect(html).toContain(">ReLU<"); + expect(html).toContain(">LeakyReLU<"); + expect(html).toContain(">SiLU<"); + expect(html).toContain('href="/docs/glossary/feed-forward-network"'); + expect(html).toContain('href="/docs/glossary/standard-ffn"'); + expect(html).toContain('href="/docs/glossary/relu"'); + expect(html).toContain('href="/docs/glossary/leaky-relu"'); + expect(html).toContain('href="/docs/glossary/silu"'); + expect(html).toContain('href="/docs/glossary/swiglu"'); expect(html).toContain('href="/docs/glossary/computational-graph"'); expect(html).toContain(DERIVED_RELATED_DOC_GROUP_LABELS[CURATED_RELATED]); }); diff --git a/src/lib/source.test.ts b/src/lib/source.test.ts index 4996c894..7456dae3 100644 --- a/src/lib/source.test.ts +++ b/src/lib/source.test.ts @@ -39,12 +39,14 @@ const GLOSSARY_INDEX_URLS = [ "/docs/glossary/skip-connection", "/docs/glossary/rope", "/docs/glossary/rmsnorm", + "/docs/glossary/silu", "/docs/glossary/overfitting", "/docs/glossary/patch", "/docs/glossary/perplexity", "/docs/glossary/prefill", "/docs/glossary/prefill-decode-split", "/docs/glossary/qk-norm", + "/docs/glossary/relu", "/docs/glossary/greedy-decoding", "/docs/glossary/top-k-sampling", "/docs/glossary/top-p-sampling", @@ -65,8 +67,6 @@ const GLOSSARY_INDEX_URLS = [ "/docs/glossary/sampling-overview", "/docs/glossary/parameter", "/docs/glossary/activation", - "/docs/glossary/relu", - "/docs/glossary/silu", "/docs/glossary/standard-ffn", "/docs/glossary/swiglu", "/docs/glossary/computational-graph",