diff --git a/README.md b/README.md
index 0db8d28..1e111f4 100644
--- a/README.md
+++ b/README.md
@@ -107,6 +107,9 @@ result = (
       #    Passes BEFORE this point apply to the parent rearrangement;
       #    passes AFTER apply per-descendant. So each clone shares the
       #    same V(D)J recombination but accumulates its own SHM + errors.
+      #    NOTE: expand_clones is the legacy fixed-size *star* model. For
+      #    real clonal trees and repertoires, see "Clonal lineages &
+      #    repertoires" below (clonal_lineage / clonal_repertoire).
       .expand_clones(n_clones=50, per_clone=20)
       # 3. Somatic hypermutation per descendant — S5F context-dependent
       #    model at 5% per-base rate (matches memory-B-cell SHM).
@@ -145,6 +148,41 @@ result[0]["experiment_id"], result[0]["tissue"]           # ('exp001', 'peripher
 result.to_tsv("repertoire.tsv")
 ```
 
+## Clonal lineages & repertoires
+
+GenAIRR models clonal structure three ways. Pick by what you need:
+
+| Method | Model | Use for |
+|---|---|---|
+| **`clonal_lineage`** | BCR affinity-maturation **trees** — generation-synchronous birth–death, per-division S5F SHM, optional affinity selection, sample + genotype-collapse | B-cell lineage trees with **ground truth** (Newick/FASTA per clone), benchmarking lineage/clone inference, ML training data |
+| **`clonal_repertoire`** | Non-tree **abundance** repertoires — heavy-tailed clone sizes (power-law/lognormal) + unexpanded-singleton fraction, reads through library-prep, collapsed to `duplicate_count` | **TCR** repertoires (no SHM) and flat-BCR abundance; clone-calling benchmarks |
+| `expand_clones` *(deprecated)* | Fixed-size **star**: one founder + `per_clone` independent descendants | legacy; kept working, but prefer the two above |
+
+```python
+import GenAIRR as ga
+
+# BCR clonal lineage trees — affinity maturation, with ground truth
+bcr = (ga.Experiment.on("human_igh").recombine()
+       .clonal_lineage(n_clones=50, max_generations=6, n_sample=30,
+                       rate=0.01, selection_strength=10.0)   # 0 = neutral
+       .sequencing_errors(rate=0.001)
+       .run_records(seed=0))
+bcr.records              # per-cell AIRR records: clone_id, lineage_node_id, lineage_affinity, ...
+bcr.lineage_trees[0].to_newick()   # ground-truth tree (branch length = per-edge mutations)
+
+# TCR clone-size repertoire — proliferated rearrangements, no SHM, abundance
+tcr = (ga.Experiment.on("human_tcrb").allow_curatable_refdata().recombine()
+       .clonal_repertoire(n_clones=200, size_distribution="power_law",
+                          exponent=2.0, max_size=500, unexpanded_fraction=0.5)
+       .sequencing_errors(rate=0.005)
+       .run_records(seed=0))
+tcr.records              # AIRR records: clone_id (membership) + duplicate_count (abundance)
+```
+
+`clonal_lineage` is BCR-only (it applies S5F SHM and rejects TCR loci); `clonal_repertoire` covers TCR and flat BCR. See the guides:
+[Clonal lineage trees](https://mutejester.github.io/GenAIRR/guides/clonal-lineage.html) ·
+[Clonal repertoires (TCR & abundance)](https://mutejester.github.io/GenAIRR/guides/clonal-repertoire.html).
+
 Other feature flags worth knowing:
 
 | Step | What it does |
@@ -620,7 +658,7 @@ The full documentation site is at **[mutejester.github.io/GenAIRR](https://mutej
 
 - **Learn** — [Lesson 1: V(D)J recombination](https://mutejester.github.io/GenAIRR/lesson-1.html) · [Lesson 2: Pipeline scrubber](https://mutejester.github.io/GenAIRR/lesson-2.html) · [Lesson 3: S5F SHM](https://mutejester.github.io/GenAIRR/lesson-3.html) · [Lesson 4: Sequencing artifacts](https://mutejester.github.io/GenAIRR/lesson-4.html) · [Lesson 5: Ground-truth payoff](https://mutejester.github.io/GenAIRR/lesson-5.html)
 - **Concepts** — [Simulation Pipeline](https://mutejester.github.io/GenAIRR/concept-pipeline.html) · [Persistent IR](https://mutejester.github.io/GenAIRR/concept-persistent-ir.html) · [Contracts](https://mutejester.github.io/GenAIRR/concept-contracts.html) · [AIRR Record](https://mutejester.github.io/GenAIRR/concept-airr-record.html) · [Live Calls](https://mutejester.github.io/GenAIRR/concept-live-call.html)
-- **Guides** — [Build a config](https://mutejester.github.io/GenAIRR/guide-build-config.html) · [Productive sampling](https://mutejester.github.io/GenAIRR/guide-productive.html) · [Clonal families](https://mutejester.github.io/GenAIRR/guide-clonal-families.html) · [Export](https://mutejester.github.io/GenAIRR/guide-export.html) · [Replay](https://mutejester.github.io/GenAIRR/guide-replay.html) · [Reproduce a dataset](https://mutejester.github.io/GenAIRR/guide-reproduce.html) · [Compare SHM models](https://mutejester.github.io/GenAIRR/guide-compare-shm.html) · [Tune corruption](https://mutejester.github.io/GenAIRR/guide-tune-corruption.html)
+- **Guides** — [Build a config](https://mutejester.github.io/GenAIRR/guide-build-config.html) · [Productive sampling](https://mutejester.github.io/GenAIRR/guide-productive.html) · [Clonal lineage trees](https://mutejester.github.io/GenAIRR/guides/clonal-lineage.html) · [Clonal repertoires](https://mutejester.github.io/GenAIRR/guides/clonal-repertoire.html) · [Export](https://mutejester.github.io/GenAIRR/guide-export.html) · [Replay](https://mutejester.github.io/GenAIRR/guide-replay.html) · [Reproduce a dataset](https://mutejester.github.io/GenAIRR/guide-reproduce.html) · [Compare SHM models](https://mutejester.github.io/GenAIRR/guide-compare-shm.html) · [Tune corruption](https://mutejester.github.io/GenAIRR/guide-tune-corruption.html)
 - **Reference** — [API + Configs + AIRR fields](https://mutejester.github.io/GenAIRR/reference.html)
 
 ---
diff --git a/engine_rs/src/lib.rs b/engine_rs/src/lib.rs
index 3a6de57..2c385ed 100644
--- a/engine_rs/src/lib.rs
+++ b/engine_rs/src/lib.rs
@@ -46,6 +46,7 @@ pub mod event;
 pub mod feasibility;
 pub mod ir;
 pub mod junction;
+pub mod lineage;
 pub mod live_call;
 pub mod pass;
 pub mod passes;
diff --git a/engine_rs/src/lineage/affinity.rs b/engine_rs/src/lineage/affinity.rs
new file mode 100644
index 0000000..f136ed3
--- /dev/null
+++ b/engine_rs/src/lineage/affinity.rs
@@ -0,0 +1,261 @@
+//! Affinity model for clonal selection: BLOSUM62-weighted, region-weighted
+//! amino-acid distance to a target antigen, and target generation.
+
+use crate::ir::{compute_codon_rail, Simulation};
+use crate::rng::Rng;
+
+/// 20 standard amino acids in the BLOSUM62 matrix's row/column order.
+const AA_ORDER: &[u8; 20] = b"ARNDCQEGHILKMFPSTWYV";
+
+/// BLOSUM62 substitution score for two amino-acid bytes (uppercase one-letter
+/// codes). Unknown / non-standard residues (including `*` and `X`) score as a
+/// strong mismatch so they contribute maximally to distance.
+pub fn blosum62(a: u8, b: u8) -> i8 {
+    fn idx(aa: u8) -> Option<usize> {
+        AA_ORDER.iter().position(|&x| x == aa)
+    }
+    // Canonical BLOSUM62, rows/cols in AA_ORDER.
+    const M: [[i8; 20]; 20] = [
+        // A  R  N  D  C  Q  E  G  H  I  L  K  M  F  P  S  T  W  Y  V
+        [ 4,-1,-2,-2, 0,-1,-1, 0,-2,-1,-1,-1,-1,-2,-1, 1, 0,-3,-2, 0],
+        [-1, 5, 0,-2,-3, 1, 0,-2, 0,-3,-2, 2,-1,-3,-2,-1,-1,-3,-2,-3],
+        [-2, 0, 6, 1,-3, 0, 0, 0, 1,-3,-3, 0,-2,-3,-2, 1, 0,-4,-2,-3],
+        [-2,-2, 1, 6,-3, 0, 2,-1,-1,-3,-4,-1,-3,-3,-1, 0,-1,-4,-3,-3],
+        [ 0,-3,-3,-3, 9,-3,-4,-3,-3,-1,-1,-3,-1,-2,-3,-1,-1,-2,-2,-1],
+        [-1, 1, 0, 0,-3, 5, 2,-2, 0,-3,-2, 1, 0,-3,-1, 0,-1,-2,-1,-2],
+        [-1, 0, 0, 2,-4, 2, 5,-2, 0,-3,-3, 1,-2,-3,-1, 0,-1,-3,-2,-2],
+        [ 0,-2, 0,-1,-3,-2,-2, 6,-2,-4,-4,-2,-3,-3,-2, 0,-2,-2,-3,-3],
+        [-2, 0, 1,-1,-3, 0, 0,-2, 8,-3,-3,-1,-2,-1,-2,-1,-2,-2, 2,-3],
+        [-1,-3,-3,-3,-1,-3,-3,-4,-3, 4, 2,-3, 1, 0,-3,-2,-1,-3,-1, 3],
+        [-1,-2,-3,-4,-1,-2,-3,-4,-3, 2, 4,-2, 2, 0,-3,-2,-1,-2,-1, 1],
+        [-1, 2, 0,-1,-3, 1, 1,-2,-1,-3,-2, 5,-1,-3,-1, 0,-1,-3,-2,-2],
+        [-1,-1,-2,-3,-1, 0,-2,-3,-2, 1, 2,-1, 5, 0,-2,-1,-1,-1,-1, 1],
+        [-2,-3,-3,-3,-2,-3,-3,-3,-1, 0, 0,-3, 0, 6,-4,-2,-2, 1, 3,-1],
+        [-1,-2,-2,-1,-3,-1,-1,-2,-2,-3,-3,-1,-2,-4, 7,-1,-1,-4,-3,-2],
+        [ 1,-1, 1, 0,-1, 0, 0, 0,-1,-2,-2, 0,-1,-2,-1, 4, 1,-3,-2,-2],
+        [ 0,-1, 0,-1,-1,-1,-1,-2,-2,-1,-1,-1,-1,-2,-1, 1, 5,-2,-2, 0],
+        [-3,-3,-4,-4,-2,-2,-3,-2,-2,-3,-2,-3,-1, 1,-4,-3,-2,11, 2,-3],
+        [-2,-2,-2,-3,-2,-1,-2,-3, 2,-1,-1,-2,-1, 3,-3,-2,-2, 2, 7,-1],
+        [ 0,-3,-3,-3,-1,-2,-2,-3,-3, 3, 1,-2, 1,-1,-2,-2, 0,-3,-1, 4],
+    ];
+    match (idx(a), idx(b)) {
+        (Some(i), Some(j)) => M[i][j],
+        _ => -4,
+    }
+}
+
+/// Per-position substitution cost from BLOSUM62: `S(a,a) - S(a,b)`, which is 0
+/// when `b == a` and grows as the substitution becomes less favorable.
+fn substitution_cost(a: u8, b: u8) -> f64 {
+    (blosum62(a, a) as f64 - blosum62(a, b) as f64).max(0.0)
+}
+
+/// Region-weighted BLOSUM62 amino-acid distance between two aa sequences.
+/// `weights` is per amino-acid position; positions beyond the shorter sequence
+/// are ignored, and weights shorter than the sequences default to 1.0.
+pub fn weighted_aa_distance(a: &[u8], b: &[u8], weights: &[f64]) -> f64 {
+    let n = a.len().min(b.len());
+    let mut d = 0.0;
+    for i in 0..n {
+        let w = weights.get(i).copied().unwrap_or(1.0);
+        d += w * substitution_cost(a[i], b[i]);
+    }
+    d
+}
+
+/// Generate a "mature" target: the naive aa sequence with up to `m` random
+/// single-residue substitutions to a different standard amino acid.
+/// Deterministic for the given `rng` state.
+pub fn make_mature_target(naive_aa: &[u8], m: u32, rng: &mut Rng) -> Vec<u8> {
+    let mut target = naive_aa.to_vec();
+    if target.is_empty() {
+        return target;
+    }
+    for _ in 0..m {
+        // Unbiased uniform draws (Lemire `range_u32`), matching the engine's
+        // RNG convention rather than `% len` modulo bias.
+        let pos = rng.range_u32(target.len() as u32) as usize;
+        let cur = target[pos];
+        // Pick a standard amino acid uniformly among the 19 != cur (no skew).
+        let pick = match AA_ORDER.iter().position(|&x| x == cur) {
+            Some(ci) => {
+                let r = rng.range_u32(19) as usize;
+                AA_ORDER[if r >= ci { r + 1 } else { r }]
+            }
+            None => AA_ORDER[rng.range_u32(20) as usize],
+        };
+        target[pos] = pick;
+    }
+    target
+}
+
+/// Translate a `Simulation` to its amino-acid sequence by concatenating the
+/// in-frame translation of each region (respecting each region's frame phase),
+/// in biological order. Used to score a cell's affinity to a target antigen.
+pub fn sim_to_aa(sim: &Simulation) -> Vec<u8> {
+    let mut aa = Vec::new();
+    for region in sim.sequence.regions.iter() {
+        let rail = compute_codon_rail(region, &sim.pool);
+        aa.extend_from_slice(&rail.amino_acids);
+    }
+    aa
+}
+
+/// Affinity-selection model: maps a cell's amino-acid sequence to an affinity in
+/// (0, 1] (1 = identical to target) and to a fitness multiplier for its offspring
+/// rate. `selection_strength == 0` makes fitness identically 1 (neutral).
+#[derive(Clone, Debug)]
+pub struct AffinityModel {
+    target_aa: Vec<u8>,
+    aa_weights: Vec<f64>,
+    beta: f64,
+    selection_strength: f64,
+    /// Affinity of the founder; fitness is measured relative to this baseline so
+    /// the founder has fitness ~1 and cells that improve on it exceed 1.
+    founder_affinity: f64,
+}
+
+impl AffinityModel {
+    /// Build a model. `founder_aa` is the naive founder's aa sequence; its
+    /// affinity becomes the fitness baseline.
+    pub fn new(
+        target_aa: Vec<u8>,
+        aa_weights: Vec<f64>,
+        beta: f64,
+        selection_strength: f64,
+        founder_aa: &[u8],
+    ) -> Self {
+        let founder_affinity =
+            (-beta * weighted_aa_distance(founder_aa, &target_aa, &aa_weights)).exp();
+        Self {
+            target_aa,
+            aa_weights,
+            beta,
+            selection_strength,
+            founder_affinity,
+        }
+    }
+
+    /// Affinity in (0, 1]: `exp(-beta * region-weighted BLOSUM distance to target)`.
+    pub fn affinity_value(&self, aa: &[u8]) -> f64 {
+        (-self.beta * weighted_aa_distance(aa, &self.target_aa, &self.aa_weights)).exp()
+    }
+
+    /// Fitness from an already-computed affinity value (avoids re-translation).
+    pub fn fitness_from_affinity(&self, affinity_value: f64) -> f64 {
+        (1.0 + self.selection_strength * (affinity_value - self.founder_affinity)).max(0.0)
+    }
+
+    /// Fitness multiplier for offspring rate: `1 + strength * (affinity - founder_affinity)`,
+    /// clamped at 0. `selection_strength == 0` ⇒ always 1 (neutral).
+    pub fn fitness(&self, aa: &[u8]) -> f64 {
+        self.fitness_from_affinity(self.affinity_value(aa))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::ir::{Nucleotide, NucHandle, Region, Segment, Simulation};
+    use crate::rng::Rng;
+
+    fn aa_founder(seq: &[u8]) -> Simulation {
+        let mut sim = Simulation::new();
+        for (i, b) in seq.iter().enumerate() {
+            let (next, _) = sim.with_nucleotide_pushed(
+                Nucleotide::germline(*b, i as u16, Segment::V));
+            sim = next;
+        }
+        sim.with_region_added(Region::new(Segment::V, NucHandle::new(0), NucHandle::new(seq.len() as u32)))
+    }
+
+    #[test]
+    fn sim_to_aa_translates_in_frame() {
+        // 8 'A' bases -> codons AAA, AAA (last 2 ignored) -> "KK" (Lys, Lys)
+        let sim = aa_founder(b"AAAAAAAA");
+        assert_eq!(sim_to_aa(&sim), b"KK".to_vec());
+    }
+
+    #[test]
+    fn affinity_value_is_one_at_target() {
+        let m = AffinityModel::new(b"KK".to_vec(), vec![1.0; 2], 1.0, 1.0, b"KK");
+        assert!((m.affinity_value(b"KK") - 1.0).abs() < 1e-9);
+    }
+
+    #[test]
+    fn affinity_value_decreases_with_distance() {
+        let m = AffinityModel::new(b"KK".to_vec(), vec![1.0; 2], 1.0, 1.0, b"KK");
+        let near = m.affinity_value(b"KK");
+        let far = m.affinity_value(b"WW");
+        assert!(far < near, "far {far} should be < near {near}");
+        assert!(far > 0.0);
+    }
+
+    #[test]
+    fn fitness_is_neutral_when_strength_zero() {
+        // selection_strength = 0 => fitness == 1.0 for ANY sequence
+        let m = AffinityModel::new(b"KK".to_vec(), vec![1.0; 2], 1.0, 0.0, b"NN");
+        assert!((m.fitness(b"KK") - 1.0).abs() < 1e-9);
+        assert!((m.fitness(b"WW") - 1.0).abs() < 1e-9);
+        assert!((m.fitness(b"NN") - 1.0).abs() < 1e-9);
+    }
+
+    #[test]
+    fn fitness_rewards_closer_than_founder_penalizes_farther() {
+        // founder = "NN" (far from target "KK"); strength 1.
+        let m = AffinityModel::new(b"KK".to_vec(), vec![1.0; 2], 1.0, 1.0, b"NN");
+        // a cell AT the target is fitter than the founder baseline (fitness > 1)
+        assert!(m.fitness(b"KK") > 1.0);
+        // a cell equal to the founder has fitness exactly 1 (baseline)
+        assert!((m.fitness(b"NN") - 1.0).abs() < 1e-9);
+        // fitness never goes negative
+        assert!(m.fitness(b"WWWWWWWW") >= 0.0);
+    }
+
+    #[test]
+    fn blosum62_known_entries() {
+        assert_eq!(blosum62(b'A', b'A'), 4);
+        assert_eq!(blosum62(b'W', b'W'), 11);
+        assert_eq!(blosum62(b'C', b'C'), 9);
+        assert_eq!(blosum62(b'A', b'R'), -1);
+        assert_eq!(blosum62(b'W', b'C'), -2);
+        assert_eq!(blosum62(b'D', b'E'), 2);
+        assert_eq!(blosum62(b'R', b'A'), blosum62(b'A', b'R'));
+        let _ = blosum62(b'*', b'A');
+        let _ = blosum62(b'X', b'X');
+    }
+
+    #[test]
+    fn weighted_distance_zero_for_identical() {
+        let a = b"ACDEW";
+        let w = vec![1.0; 5];
+        assert!(weighted_aa_distance(a, a, &w).abs() < 1e-9);
+    }
+
+    #[test]
+    fn weighted_distance_increases_with_substitutions_and_respects_weights() {
+        let a = b"AAAAA";
+        let b = b"AAAAW";
+        let flat = vec![1.0; 5];
+        let d_flat = weighted_aa_distance(a, b, &flat);
+        assert!(d_flat > 0.0);
+        let mut heavy = vec![1.0; 5];
+        heavy[4] = 10.0;
+        assert!(weighted_aa_distance(a, b, &heavy) > d_flat);
+        let mut zero = vec![1.0; 5];
+        zero[4] = 0.0;
+        assert!(weighted_aa_distance(a, b, &zero).abs() < 1e-9);
+    }
+
+    #[test]
+    fn mature_target_applies_m_substitutions_deterministically() {
+        let naive = b"ACDEFGHIKLMNPQRSTVWY".to_vec();
+        let mut rng = Rng::new(42);
+        let t = make_mature_target(&naive, 3, &mut rng);
+        assert_eq!(t.len(), naive.len());
+        let diffs = naive.iter().zip(&t).filter(|(a, b)| a != b).count();
+        assert!(diffs >= 1 && diffs <= 3, "expected up to 3 aa substitutions, got {diffs}");
+        let mut rng2 = Rng::new(42);
+        assert_eq!(make_mature_target(&naive, 3, &mut rng2), t);
+    }
+}
diff --git a/engine_rs/src/lineage/branching.rs b/engine_rs/src/lineage/branching.rs
new file mode 100644
index 0000000..6c4c0a6
--- /dev/null
+++ b/engine_rs/src/lineage/branching.rs
@@ -0,0 +1,424 @@
+//! Generation-synchronous clonal branching loop.
+
+use crate::ir::Simulation;
+use crate::pass::{Pass, PassContext};
+use crate::rng::Rng;
+use crate::trace::Trace;
+
+use super::affinity::{sim_to_aa, AffinityModel};
+use super::poisson::poisson_sample;
+use super::tree::{LineageNode, LineageTree};
+
+/// Parameters controlling one clonal family's growth (neutral mode).
+#[derive(Clone, Debug)]
+pub struct BranchingParams {
+    /// Base expected offspring per cell per generation (neutral λ).
+    pub lambda_base: f64,
+    /// Expected mutations per cell division. NOTE: currently inert — per-division
+    /// mutation count is driven by the injected `Pass` mutator; this field is
+    /// reserved for a later task and is not read yet.
+    pub lambda_mut: f64,
+    /// Maximum number of generations to grow.
+    pub max_generations: u32,
+    /// Carrying capacity: the live population per generation is capped at this,
+    /// and it is the pivot for logistic offspring-rate damping (the effective
+    /// rate falls to zero as the live set approaches `n_max`).
+    pub n_max: u32,
+    /// Number of cells to sample at the end (used in a later task).
+    pub n_sample: u32,
+    /// Master seed for the whole family (deterministic).
+    pub seed: u64,
+}
+
+/// Extract a node's genotype (pool bases) from its `Simulation`.
+fn genotype_of(sim: &Simulation) -> Vec<u8> {
+    sim.pool.as_slice().iter().map(|n| n.base).collect()
+}
+
+/// Apply one division's mutations: run `mutator` against `parent` with a fresh,
+/// deterministic per-division RNG seeded by `child_seed`. Returns the mutated child.
+fn mutate_child(parent: &Simulation, mutator: &dyn Pass, child_seed: u64) -> Simulation {
+    let mut trace = Trace::new();
+    let mut rng = Rng::new(child_seed);
+    let mut ctx = PassContext {
+        replay_cursor: None,
+        trace: &mut trace,
+        rng: &mut rng,
+        pass_index: 0,
+        refdata: None,
+        contracts: None,
+        feasibility: None,
+        reference_index: None,
+        event_log_sink: None,
+    };
+    mutator.execute(parent, &mut ctx)
+}
+
+/// Core generation-synchronous growth. With `mutator = None`, children are exact
+/// clones (and NO extra RNG is consumed). With `Some(m)`, each division draws a
+/// deterministic sub-seed and applies `m`. With `affinity = Some(model)`, each
+/// cell's offspring rate is modulated by the model's fitness; `None` leaves the
+/// rate unchanged (byte-identical to the pre-affinity path). Returns
+/// (tree, peak_live_population, sims_arena, final_live) where sims_arena[node.id]
+/// is the Simulation for that node, and `final_live` is the set of node ids that
+/// are ALIVE when growth stopped — the cells produced in the final completed
+/// generation (every cell is replaced by its offspring each generation, so the
+/// living population at sampling time is exactly this set). It is empty when the
+/// family went extinct (a cell drew 0 offspring and left no progeny).
+fn grow_core(
+    founder: &Simulation,
+    params: &BranchingParams,
+    mutator: Option<&dyn Pass>,
+    affinity: Option<&AffinityModel>,
+) -> (LineageTree, usize, Vec<Simulation>, Vec<u32>) {
+    let mut nodes: Vec<LineageNode> = Vec::new();
+    let mut sims: Vec<Simulation> = Vec::new();
+    let mut rng = Rng::new(params.seed);
+
+    let root_affinity = affinity
+        .map(|m| m.affinity_value(&sim_to_aa(founder)))
+        .unwrap_or(0.0);
+
+    nodes.push(LineageNode {
+        id: 0,
+        parent_id: None,
+        generation: 0,
+        genotype: genotype_of(founder),
+        mutations_from_parent: 0,
+        affinity: root_affinity,
+        abundance: 0,
+        observed: false,
+    });
+    sims.push(founder.clone());
+
+    let mut live: Vec<u32> = vec![0];
+    let mut next_id: u32 = 1;
+    let mut peak_live: usize = live.len();
+
+    for gen in 1..=params.max_generations {
+        if live.is_empty() {
+            break; // lineage went extinct (every cell drew 0 offspring)
+        }
+        // Logistic carrying-capacity damping: as the live population approaches
+        // n_max, the effective offspring rate falls toward zero (plateau).
+        let live_frac = (live.len() as f64) / (params.n_max as f64);
+        let eff_lambda = params.lambda_base * (1.0 - live_frac).max(0.0);
+
+        let mut next_live: Vec<u32> = Vec::new();
+        'generation: for &parent_id in &live {
+            let cell_lambda = match affinity {
+                Some(m) => eff_lambda * m.fitness_from_affinity(nodes[parent_id as usize].affinity),
+                None => eff_lambda,
+            };
+            let k = poisson_sample(&mut rng, cell_lambda);
+            for _ in 0..k {
+                // Hard cap: a Poisson draw can still overshoot near saturation.
+                if next_live.len() >= params.n_max as usize {
+                    break 'generation;
+                }
+                let parent_mut_count = sims[parent_id as usize].mutation_count;
+                let child_sim = match mutator {
+                    Some(m) => {
+                        let child_seed = rng.next_u64();
+                        mutate_child(&sims[parent_id as usize], m, child_seed)
+                    }
+                    None => sims[parent_id as usize].clone(),
+                };
+                let muts = child_sim
+                    .mutation_count
+                    .saturating_sub(parent_mut_count);
+                let child_affinity = affinity
+                    .map(|m| m.affinity_value(&sim_to_aa(&child_sim)))
+                    .unwrap_or(0.0);
+                nodes.push(LineageNode {
+                    id: next_id,
+                    parent_id: Some(parent_id),
+                    generation: gen,
+                    genotype: genotype_of(&child_sim),
+                    mutations_from_parent: muts,
+                    affinity: child_affinity,
+                    abundance: 0,
+                    observed: false,
+                });
+                sims.push(child_sim);
+                next_live.push(next_id);
+                next_id += 1;
+            }
+        }
+        live = next_live;
+        if live.len() > peak_live {
+            peak_live = live.len();
+        }
+    }
+
+    // `live` now holds the cells alive when growth stopped: either the last
+    // completed generation's offspring (ran to max_generations) or empty (the
+    // population went extinct — every live cell drew 0 offspring). This is the
+    // living population sampling must draw from.
+    (LineageTree { nodes }, peak_live, sims, live)
+}
+
+/// Grow a full clonal lineage with per-division mutation via `mutator`.
+/// Deterministic for `params.seed`.
+pub fn grow_lineage(
+    founder: &Simulation,
+    params: &BranchingParams,
+    mutator: &dyn Pass,
+) -> LineageTree {
+    grow_core(founder, params, Some(mutator), None).0
+}
+
+/// Grow the lineage TOPOLOGY only (children are exact clones of their parent
+/// `Simulation`; no mutation — a later task layers mutation on top).
+pub fn grow_topology(founder: &Simulation, params: &BranchingParams) -> LineageTree {
+    grow_core(founder, params, None, None).0
+}
+
+/// Like `grow_topology`, but also returns the final living node ids (cells alive
+/// when growth stopped). Empty when the family went extinct.
+#[cfg(test)]
+pub fn grow_topology_with_live(
+    founder: &Simulation,
+    params: &BranchingParams,
+) -> (LineageTree, Vec<u32>) {
+    let (tree, _peak, _sims, live) = grow_core(founder, params, None, None);
+    (tree, live)
+}
+
+/// Grow a clonal lineage with per-division mutation AND affinity selection.
+/// Deterministic for `params.seed`.
+pub fn grow_lineage_with_affinity(
+    founder: &Simulation,
+    params: &BranchingParams,
+    mutator: &dyn Pass,
+    model: &AffinityModel,
+) -> LineageTree {
+    grow_core(founder, params, Some(mutator), Some(model)).0
+}
+
+/// Grow + mutate a lineage and ALSO return the per-node `Simulation` arena
+/// (index == node id) and the final living node ids (cells alive when growth
+/// stopped; empty when the family went extinct), for building per-node AIRR
+/// `Outcome`s and sampling the living population.
+pub fn grow_lineage_retaining_sims(
+    founder: &Simulation,
+    params: &BranchingParams,
+    mutator: &dyn Pass,
+    affinity: Option<&AffinityModel>,
+) -> (LineageTree, Vec<Simulation>, Vec<u32>) {
+    let (tree, _peak, sims, live) = grow_core(founder, params, Some(mutator), affinity);
+    (tree, sims, live)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::dist::{EmpiricalLengthDist, UniformBase};
+    use crate::ir::{Nucleotide, NucHandle, Region, Segment, Simulation};
+    use crate::passes::UniformMutationPass;
+
+    /// Minimal founder: a 4-base V region "AAAA".
+    fn founder() -> Simulation {
+        let mut sim = Simulation::new();
+        for (i, b) in b"AAAA".iter().enumerate() {
+            let (next, _) = sim.with_nucleotide_pushed(
+                Nucleotide::germline(*b, i as u16, Segment::V),
+            );
+            sim = next;
+        }
+        sim.with_region_added(Region::new(Segment::V, NucHandle::new(0), NucHandle::new(4)))
+    }
+
+    fn neutral_params() -> BranchingParams {
+        BranchingParams {
+            lambda_base: 1.5,
+            lambda_mut: 0.0,
+            max_generations: 5,
+            n_max: 1000,
+            n_sample: 10,
+            seed: 42,
+        }
+    }
+
+    #[test]
+    fn grows_root_and_children_with_monotonic_generations() {
+        let tree = grow_topology(&founder(), &neutral_params());
+        assert_eq!(tree.nodes.iter().filter(|n| n.parent_id.is_none()).count(), 1);
+        let root = tree.root();
+        assert_eq!(root.id, 0);
+        assert_eq!(root.generation, 0);
+        for n in &tree.nodes {
+            if let Some(pid) = n.parent_id {
+                let parent = tree.get(pid).unwrap();
+                assert_eq!(n.generation, parent.generation + 1);
+            }
+        }
+        assert!(tree.len() > 1, "tree did not grow: {} nodes", tree.len());
+    }
+
+    #[test]
+    fn growth_is_deterministic_for_a_seed() {
+        let a = grow_topology(&founder(), &neutral_params());
+        let b = grow_topology(&founder(), &neutral_params());
+        assert_eq!(a.len(), b.len());
+        for (x, y) in a.nodes.iter().zip(b.nodes.iter()) {
+            assert_eq!(x.id, y.id);
+            assert_eq!(x.parent_id, y.parent_id);
+            assert_eq!(x.generation, y.generation);
+        }
+    }
+
+    #[test]
+    fn carrying_capacity_bounds_live_population() {
+        let params = BranchingParams {
+            lambda_base: 4.0,      // would explode unbounded
+            lambda_mut: 0.0,
+            max_generations: 40,
+            n_max: 200,
+            n_sample: 10,
+            seed: 7,
+        };
+        let (_tree, peak_live, _sims, _live) = grow_core(&founder(), &params, None, None);
+        // hard cap: live population never exceeds n_max
+        assert!(peak_live <= params.n_max as usize,
+            "peak live {peak_live} exceeded n_max {}", params.n_max);
+        // damping plateau (equilibrium ~ (1 - 1/lambda_base) * n_max = 150) is
+        // comfortably above half capacity — confirms it grew, not died early
+        assert!(peak_live > (params.n_max as usize) / 2,
+            "peak live {peak_live} did not approach capacity");
+    }
+
+    #[test]
+    fn final_live_is_empty_when_founder_never_divides() {
+        // lambda_base = 0 => the founder draws 0 offspring => after generation 1
+        // the live set is empty and the loop breaks. The final living population
+        // is empty (the clone went extinct).
+        let params = BranchingParams { lambda_base: 0.0, ..neutral_params() };
+        let (_tree, live) = grow_topology_with_live(&founder(), &params);
+        assert!(live.is_empty(), "expected extinct (empty) final-live set, got {:?}", live);
+    }
+
+    #[test]
+    fn final_live_is_the_max_generation_set() {
+        // In a grown family, every node in the final-live set is at the maximum
+        // generation reached, and the set is non-empty. A single founder has a
+        // real chance of immediate extinction at lambda_base ~1.5, so scan seeds
+        // for one that grows (the invariant under test is about non-extinct runs).
+        let mut grew = false;
+        for seed in 0..50u64 {
+            let params = BranchingParams { seed, ..neutral_params() };
+            let (tree, live) = grow_topology_with_live(&founder(), &params);
+            if live.is_empty() {
+                continue; // extinct for this seed; try another
+            }
+            grew = true;
+            let max_gen = tree.nodes.iter().map(|n| n.generation).max().unwrap();
+            for &id in &live {
+                assert_eq!(tree.get(id).unwrap().generation, max_gen,
+                    "final-live node {id} not at max generation {max_gen} (seed {seed})");
+            }
+        }
+        assert!(grew, "no seed in 0..50 produced a surviving family");
+    }
+
+    /// A mutator that applies exactly 2 substitutions per division.
+    fn two_mut_mutator() -> UniformMutationPass {
+        UniformMutationPass::new(
+            Box::new(EmpiricalLengthDist::from_pairs(vec![(2, 1.0)])),
+            Box::new(UniformBase),
+        )
+    }
+
+    #[test]
+    fn children_accumulate_mutations_and_branch_lengths() {
+        let params = BranchingParams {
+            lambda_base: 1.2, lambda_mut: 0.0, max_generations: 4,
+            n_max: 500, n_sample: 10, seed: 11,
+        };
+        let mutator = two_mut_mutator();
+        let tree = grow_lineage(&founder(), &params, &mutator);
+
+        assert_eq!(tree.root().mutations_from_parent, 0);
+
+        let mut saw_mutated_child = false;
+        for n in &tree.nodes {
+            if n.parent_id.is_some() {
+                // <= 2 (not == 2): on the 4-base "AAAA" founder the two uniform
+                // substitutions can draw the same position, yielding 1 net change.
+                assert!(n.mutations_from_parent <= 2);
+                if n.mutations_from_parent > 0 {
+                    saw_mutated_child = true;
+                    let parent = tree.get(n.parent_id.unwrap()).unwrap();
+                    assert_ne!(n.genotype, parent.genotype);
+                }
+            }
+        }
+        assert!(saw_mutated_child, "no child accumulated any mutation");
+    }
+
+    #[test]
+    fn mutated_growth_is_deterministic() {
+        let params = BranchingParams {
+            lambda_base: 1.2, lambda_mut: 0.0, max_generations: 4,
+            n_max: 500, n_sample: 10, seed: 11,
+        };
+        let a = grow_lineage(&founder(), &params, &two_mut_mutator());
+        let b = grow_lineage(&founder(), &params, &two_mut_mutator());
+        assert_eq!(a.len(), b.len());
+        for (x, y) in a.nodes.iter().zip(b.nodes.iter()) {
+            assert_eq!(x.genotype, y.genotype);
+            assert_eq!(x.mutations_from_parent, y.mutations_from_parent);
+        }
+    }
+
+    #[test]
+    fn affinity_run_populates_node_affinities_and_root_matches() {
+        use crate::lineage::affinity::{sim_to_aa, AffinityModel};
+        let f = founder();
+        let founder_aa = sim_to_aa(&f);
+        let w = vec![1.0; founder_aa.len().max(1)];
+        let model = AffinityModel::new(b"W".to_vec(), w, 1.0, 1.0, &founder_aa);
+        let params = BranchingParams { lambda_base:1.2, lambda_mut:0.0, max_generations:4, n_max:500, n_sample:10, seed:11 };
+        let tree = grow_lineage_with_affinity(&f, &params, &two_mut_mutator(), &model);
+        assert!((tree.root().affinity - model.affinity_value(&founder_aa)).abs() < 1e-9);
+        for n in &tree.nodes {
+            assert!(n.affinity > 0.0 && n.affinity <= 1.0 + 1e-9, "affinity out of range: {}", n.affinity);
+        }
+    }
+
+    #[test]
+    fn affinity_strength_zero_matches_neutral_topology() {
+        use crate::lineage::affinity::{sim_to_aa, AffinityModel};
+        let f = founder();
+        let founder_aa = sim_to_aa(&f);
+        let w = vec![1.0; founder_aa.len().max(1)];
+        // selection_strength = 0 => fitness identically 1 => identical topology/genotypes to neutral
+        let model = AffinityModel::new(b"W".to_vec(), w, 1.0, 0.0, &founder_aa);
+        let params = BranchingParams { lambda_base:1.2, lambda_mut:0.0, max_generations:4, n_max:500, n_sample:10, seed:11 };
+        let with_aff = grow_lineage_with_affinity(&f, &params, &two_mut_mutator(), &model);
+        let neutral = grow_lineage(&f, &params, &two_mut_mutator());
+        assert_eq!(with_aff.len(), neutral.len());
+        for (a, b) in with_aff.nodes.iter().zip(neutral.nodes.iter()) {
+            assert_eq!(a.genotype, b.genotype);
+            assert_eq!(a.parent_id, b.parent_id);
+            assert_eq!(a.generation, b.generation);
+            assert_eq!(a.mutations_from_parent, b.mutations_from_parent);
+        }
+    }
+
+    #[test]
+    fn affinity_growth_is_deterministic() {
+        use crate::lineage::affinity::{sim_to_aa, AffinityModel};
+        let f = founder();
+        let founder_aa = sim_to_aa(&f);
+        let mk = || AffinityModel::new(b"W".to_vec(), vec![1.0; founder_aa.len().max(1)], 1.0, 2.0, &founder_aa);
+        let params = BranchingParams { lambda_base:1.5, lambda_mut:0.0, max_generations:5, n_max:500, n_sample:10, seed:21 };
+        let a = grow_lineage_with_affinity(&f, &params, &two_mut_mutator(), &mk());
+        let b = grow_lineage_with_affinity(&f, &params, &two_mut_mutator(), &mk());
+        assert_eq!(a.len(), b.len());
+        for (x, y) in a.nodes.iter().zip(b.nodes.iter()) {
+            assert_eq!(x.genotype, y.genotype);
+            assert!((x.affinity - y.affinity).abs() < 1e-12);
+        }
+    }
+}
diff --git a/engine_rs/src/lineage/clone_size.rs b/engine_rs/src/lineage/clone_size.rs
new file mode 100644
index 0000000..0361b8c
--- /dev/null
+++ b/engine_rs/src/lineage/clone_size.rs
@@ -0,0 +1,170 @@
+//! Clone-size distributions for repertoire composition. T-cell (and the
+//! singleton tail of B-cell) repertoires have heavy-tailed clone-size
+//! distributions; this module samples integer clone sizes >= 1.
+
+use crate::rng::Rng;
+
+/// A heavy-tailed clone-size distribution. Sizes are integers in `[1, x_max]`.
+#[derive(Clone, Debug)]
+pub enum CloneSizeDist {
+    /// Truncated discrete power law P(size=k) ∝ k^(-exponent), k in [1, x_max].
+    /// `exponent` ~2–3 is typical for TCR repertoires.
+    PowerLaw { exponent: f64, x_max: u32 },
+    /// Log-normal sizes: round(exp(mu + sigma*Z)), clamped to [1, x_max].
+    LogNormal { mu: f64, sigma: f64, x_max: u32 },
+}
+
+impl Default for CloneSizeDist {
+    fn default() -> Self {
+        CloneSizeDist::PowerLaw {
+            exponent: 2.0,
+            x_max: 100_000,
+        }
+    }
+}
+
+/// Standard-normal variate via Box–Muller using two uniforms from `rng`.
+fn next_standard_normal(rng: &mut Rng) -> f64 {
+    let mut u1 = rng.next_f64();
+    if u1 < 1e-300 {
+        u1 = 1e-300;
+    }
+    let u2 = rng.next_f64();
+    (-2.0 * u1.ln()).sqrt() * (std::f64::consts::TAU * u2).cos()
+}
+
+/// Sample one clone size (>= 1) from `dist`, deterministic for the `rng` state.
+pub fn sample_clone_size(rng: &mut Rng, dist: &CloneSizeDist) -> u32 {
+    match *dist {
+        CloneSizeDist::PowerLaw { exponent, x_max } => {
+            let x_max = x_max.max(1) as f64;
+            let u = rng.next_f64();
+            let x = if (exponent - 1.0).abs() < 1e-9 {
+                x_max.powf(u)
+            } else {
+                let a = 1.0 - exponent;
+                (1.0 + u * (x_max.powf(a) - 1.0)).powf(1.0 / a)
+            };
+            (x.round() as i64).clamp(1, x_max as i64) as u32
+        }
+        CloneSizeDist::LogNormal { mu, sigma, x_max } => {
+            let z = next_standard_normal(rng);
+            let x = (mu + sigma * z).exp();
+            (x.round() as i64).clamp(1, x_max.max(1) as i64) as u32
+        }
+    }
+}
+
+/// Draw `n_clones` clone sizes from `dist`. A fraction `unexpanded_fraction`
+/// (in [0,1]) of the clones are forced to be unexpanded singletons (size 1);
+/// the deterministic forced count is `round(n_clones * unexpanded_fraction)`,
+/// placed at the front of the returned vector. The remainder are drawn from
+/// `dist` (and may themselves be 1, so the realized singleton count can exceed
+/// the forced minimum — matching real repertoires). Deterministic for the `rng`
+/// state. `unexpanded_fraction` is clamped to [0,1].
+pub fn sample_repertoire_sizes(
+    rng: &mut Rng,
+    n_clones: u32,
+    dist: &CloneSizeDist,
+    unexpanded_fraction: f64,
+) -> Vec<u32> {
+    let frac = unexpanded_fraction.clamp(0.0, 1.0);
+    let n = n_clones as usize;
+    let forced_singletons = (((n as f64) * frac).round() as usize).min(n);
+    let mut sizes = Vec::with_capacity(n);
+    for _ in 0..forced_singletons {
+        sizes.push(1u32);
+    }
+    for _ in forced_singletons..n {
+        sizes.push(sample_clone_size(rng, dist));
+    }
+    sizes
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::rng::Rng;
+
+    #[test]
+    fn sizes_are_at_least_one_and_within_max() {
+        let mut rng = Rng::new(1);
+        let d = CloneSizeDist::PowerLaw { exponent: 2.0, x_max: 1000 };
+        for _ in 0..1000 {
+            let s = sample_clone_size(&mut rng, &d);
+            assert!(s >= 1 && s <= 1000, "size {s} out of [1,1000]");
+        }
+    }
+
+    #[test]
+    fn power_law_is_deterministic() {
+        let d = CloneSizeDist::PowerLaw { exponent: 2.0, x_max: 1000 };
+        let mut a = Rng::new(42);
+        let mut b = Rng::new(42);
+        let xs: Vec<u32> = (0..100).map(|_| sample_clone_size(&mut a, &d)).collect();
+        let ys: Vec<u32> = (0..100).map(|_| sample_clone_size(&mut b, &d)).collect();
+        assert_eq!(xs, ys);
+    }
+
+    #[test]
+    fn power_law_is_heavy_tailed_mostly_small_some_large() {
+        let mut rng = Rng::new(7);
+        let d = CloneSizeDist::PowerLaw { exponent: 2.0, x_max: 100_000 };
+        let n = 20_000;
+        let sizes: Vec<u32> = (0..n).map(|_| sample_clone_size(&mut rng, &d)).collect();
+        let ones = sizes.iter().filter(|&&s| s == 1).count();
+        let big = sizes.iter().filter(|&&s| s >= 100).count();
+        assert!(ones > n / 3, "expected many singletons, got {ones}/{n}");
+        assert!(big > 0, "expected some large clones in the tail");
+        let max = *sizes.iter().max().unwrap();
+        assert!(max > 50, "tail did not reach large sizes (max {max})");
+    }
+
+    #[test]
+    fn lognormal_is_deterministic_and_in_range() {
+        let d = CloneSizeDist::LogNormal { mu: 1.0, sigma: 1.0, x_max: 10_000 };
+        let mut a = Rng::new(9);
+        let mut b = Rng::new(9);
+        let xs: Vec<u32> = (0..100).map(|_| sample_clone_size(&mut a, &d)).collect();
+        let ys: Vec<u32> = (0..100).map(|_| sample_clone_size(&mut b, &d)).collect();
+        assert_eq!(xs, ys);
+        for &s in &xs {
+            assert!(s >= 1 && s <= 10_000);
+        }
+    }
+
+    #[test]
+    fn repertoire_sizes_respects_count_and_min_one() {
+        let mut rng = Rng::new(3);
+        let d = CloneSizeDist::PowerLaw { exponent: 2.0, x_max: 1000 };
+        let sizes = sample_repertoire_sizes(&mut rng, 500, &d, 0.0);
+        assert_eq!(sizes.len(), 500);
+        assert!(sizes.iter().all(|&s| s >= 1));
+    }
+
+    #[test]
+    fn unexpanded_fraction_forces_singletons() {
+        let mut rng = Rng::new(5);
+        let d = CloneSizeDist::PowerLaw { exponent: 2.0, x_max: 1000 };
+        let sizes = sample_repertoire_sizes(&mut rng, 1000, &d, 0.4);
+        assert_eq!(sizes.len(), 1000);
+        let ones = sizes.iter().filter(|&&s| s == 1).count();
+        assert!(ones >= 400, "expected >=400 singletons, got {ones}");
+    }
+
+    #[test]
+    fn unexpanded_fraction_one_is_all_singletons() {
+        let mut rng = Rng::new(8);
+        let d = CloneSizeDist::default();
+        let sizes = sample_repertoire_sizes(&mut rng, 100, &d, 1.0);
+        assert!(sizes.iter().all(|&s| s == 1));
+    }
+
+    #[test]
+    fn repertoire_is_deterministic() {
+        let d = CloneSizeDist::PowerLaw { exponent: 2.0, x_max: 1000 };
+        let a = sample_repertoire_sizes(&mut Rng::new(11), 200, &d, 0.3);
+        let b = sample_repertoire_sizes(&mut Rng::new(11), 200, &d, 0.3);
+        assert_eq!(a, b);
+    }
+}
diff --git a/engine_rs/src/lineage/export.rs b/engine_rs/src/lineage/export.rs
new file mode 100644
index 0000000..f94bce2
--- /dev/null
+++ b/engine_rs/src/lineage/export.rs
@@ -0,0 +1,203 @@
+//! Ground-truth export for a `LineageTree`: node-table TSV, FASTA of all node
+//! (ancestral + observed) sequences, and a Newick tree with per-edge mutation
+//! counts as branch lengths. Consumed by lineage-inference benchmark tools.
+
+use std::fmt::Write as _;
+
+use super::tree::LineageTree;
+
+/// Tab-separated node table, one row per node plus a header. `parent_id` is
+/// `NA` for the root. Columns:
+/// `node_id, parent_id, generation, mutations_from_parent, abundance, observed, affinity, sequence`.
+pub fn to_node_table_tsv(tree: &LineageTree) -> String {
+    let mut out = String::new();
+    out.push_str(
+        "node_id\tparent_id\tgeneration\tmutations_from_parent\tabundance\tobserved\taffinity\tsequence\n",
+    );
+    for n in &tree.nodes {
+        let parent = match n.parent_id {
+            Some(p) => p.to_string(),
+            None => "NA".to_string(),
+        };
+        let seq = String::from_utf8_lossy(&n.genotype);
+        let _ = writeln!(
+            out,
+            "{}\t{}\t{}\t{}\t{}\t{}\t{:.4}\t{}",
+            n.id, parent, n.generation, n.mutations_from_parent, n.abundance, n.observed, n.affinity, seq
+        );
+    }
+    out
+}
+
+/// FASTA of every node (ancestral + observed). Header carries node id,
+/// generation, abundance, and observed flag so downstream tools can map a
+/// record back to its ground-truth node.
+pub fn to_fasta(tree: &LineageTree) -> String {
+    let mut out = String::new();
+    for n in &tree.nodes {
+        let seq = String::from_utf8_lossy(&n.genotype);
+        let _ = writeln!(
+            out,
+            ">node{}|gen={}|abundance={}|observed={}",
+            n.id, n.generation, n.abundance, n.observed
+        );
+        let _ = writeln!(out, "{}", seq);
+    }
+    out
+}
+
+/// Recursively render the subtree rooted at `id` in Newick form, with the edge
+/// to this node labelled by its `mutations_from_parent` count as branch length.
+/// Recursion depth is bounded by the number of generations (small), not node
+/// count, so this is safe from stack overflow at realistic family sizes.
+fn newick_subtree(tree: &LineageTree, id: u32) -> String {
+    let node = tree.get(id).expect("newick: node id out of range");
+    let children = tree.children_of(id);
+    let label = format!("node{id}");
+    if children.is_empty() {
+        format!("{label}:{}", node.mutations_from_parent)
+    } else {
+        let inner: Vec<String> = children
+            .iter()
+            .map(|c| newick_subtree(tree, c.id))
+            .collect();
+        format!("({}){label}:{}", inner.join(","), node.mutations_from_parent)
+    }
+}
+
+/// Newick string for the whole tree. Branch lengths are per-edge mutation
+/// counts. The founder is the named Newick root (its children are wrapped in
+/// parentheses, its label follows) and carries no branch length, since it is
+/// the origin. Always terminated with `;`. This is standard rooted Newick
+/// (e.g. `((node3:1)node1:1,node2:1)node0;`) — no phantom outer node — so
+/// ete3 / dendropy / Bio.Phylo parse `node0` as the actual root.
+///
+/// Precondition: the tree has exactly one root (one node with `parent_id ==
+/// None`); call [`LineageTree::validate`] first if the tree was hand-built.
+pub fn to_newick(tree: &LineageTree) -> String {
+    let root = tree.root();
+    let children = tree.children_of(root.id);
+    let root_label = format!("node{}", root.id);
+    let body = if children.is_empty() {
+        root_label
+    } else {
+        let inner: Vec<String> = children
+            .iter()
+            .map(|c| newick_subtree(tree, c.id))
+            .collect();
+        format!("({}){root_label}", inner.join(","))
+    };
+    format!("{body};")
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::dist::{EmpiricalLengthDist, UniformBase};
+    use crate::ir::{Nucleotide, NucHandle, Region, Segment, Simulation};
+    use crate::lineage::{simulate_family, BranchingParams};
+    use crate::lineage::tree::{LineageNode, LineageTree};
+    use crate::passes::UniformMutationPass;
+
+    fn grown_founder() -> Simulation {
+        let mut sim = Simulation::new();
+        for (i, b) in b"AAAAAAAA".iter().enumerate() {
+            let (next, _) = sim.with_nucleotide_pushed(
+                Nucleotide::germline(*b, i as u16, Segment::V));
+            sim = next;
+        }
+        sim.with_region_added(Region::new(Segment::V, NucHandle::new(0), NucHandle::new(8)))
+    }
+
+    #[test]
+    fn exports_a_grown_family_consistently() {
+        let params = BranchingParams {
+            lambda_base: 1.5, lambda_mut: 0.0, max_generations: 6,
+            n_max: 300, n_sample: 20, seed: 2024,
+        };
+        let mutator = UniformMutationPass::new(
+            Box::new(EmpiricalLengthDist::from_pairs(vec![(1, 1.0)])),
+            Box::new(UniformBase),
+        );
+        let tree = simulate_family(&grown_founder(), &params, &mutator);
+        assert!(tree.validate().is_ok());
+
+        let tsv = to_node_table_tsv(&tree);
+        let fasta = to_fasta(&tree);
+        let nwk = to_newick(&tree);
+
+        assert_eq!(tsv.lines().count(), tree.len() + 1);
+        assert_eq!(fasta.lines().count(), tree.len() * 2);
+        assert!(nwk.ends_with(';'));
+        assert_eq!(nwk.matches('(').count(), nwk.matches(')').count());
+        for n in &tree.nodes {
+            assert!(nwk.contains(&format!("node{}", n.id)),
+                "newick missing node{}", n.id);
+        }
+    }
+
+    // root(0,"AAAA") -> 1("AAAC", abundance 2, observed), 2("AAAG"); 1 -> 3("ATAC", abundance 1, observed)
+    fn sample_tree() -> LineageTree {
+        LineageTree {
+            nodes: vec![
+                LineageNode { id: 0, parent_id: None,    generation: 0, genotype: b"AAAA".to_vec(), mutations_from_parent: 0, affinity: 0.0, abundance: 0, observed: false },
+                LineageNode { id: 1, parent_id: Some(0), generation: 1, genotype: b"AAAC".to_vec(), mutations_from_parent: 1, affinity: 0.0, abundance: 2, observed: true },
+                LineageNode { id: 2, parent_id: Some(0), generation: 1, genotype: b"AAAG".to_vec(), mutations_from_parent: 1, affinity: 0.0, abundance: 0, observed: false },
+                LineageNode { id: 3, parent_id: Some(1), generation: 2, genotype: b"ATAC".to_vec(), mutations_from_parent: 1, affinity: 0.0, abundance: 1, observed: true },
+            ],
+        }
+    }
+
+    #[test]
+    fn fasta_emits_every_node_with_metadata_header() {
+        let fasta = to_fasta(&sample_tree());
+        let lines: Vec<&str> = fasta.lines().collect();
+        assert_eq!(lines.len(), 8); // 4 nodes => header+seq each
+        assert_eq!(lines[0], ">node0|gen=0|abundance=0|observed=false");
+        assert_eq!(lines[1], "AAAA");
+        assert_eq!(lines[2], ">node1|gen=1|abundance=2|observed=true");
+        assert_eq!(lines[3], "AAAC");
+        assert!(fasta.contains(">node3|gen=2|abundance=1|observed=true"));
+    }
+
+    #[test]
+    fn node_table_tsv_has_header_and_one_row_per_node() {
+        let tsv = to_node_table_tsv(&sample_tree());
+        let lines: Vec<&str> = tsv.lines().collect();
+        assert_eq!(lines.len(), 5); // 1 header + 4 nodes
+        assert_eq!(
+            lines[0],
+            "node_id\tparent_id\tgeneration\tmutations_from_parent\tabundance\tobserved\taffinity\tsequence"
+        );
+        assert_eq!(lines[1], "0\tNA\t0\t0\t0\tfalse\t0.0000\tAAAA");
+        assert_eq!(lines[2], "1\t0\t1\t1\t2\ttrue\t0.0000\tAAAC");
+        assert_eq!(lines[3], "2\t0\t1\t1\t0\tfalse\t0.0000\tAAAG");
+        assert_eq!(lines[4], "3\t1\t2\t1\t1\ttrue\t0.0000\tATAC");
+    }
+
+    #[test]
+    fn newick_encodes_topology_and_branch_lengths() {
+        let nwk = to_newick(&sample_tree());
+        assert!(nwk.ends_with(';'), "newick must end with ';': {nwk}");
+        let opens = nwk.matches('(').count();
+        let closes = nwk.matches(')').count();
+        assert_eq!(opens, closes, "unbalanced parens: {nwk}");
+        assert!(nwk.contains("node1:1"));
+        assert!(nwk.contains("node2:1"));
+        assert!(nwk.contains("node3:1"));
+        assert!(nwk.contains("node0"));
+        assert!(!nwk.contains("node0:"), "root must not have a branch length: {nwk}");
+        assert_eq!(nwk, "((node3:1)node1:1,node2:1)node0;");
+    }
+
+    #[test]
+    fn newick_single_node_tree() {
+        let tree = LineageTree {
+            nodes: vec![LineageNode {
+                id: 0, parent_id: None, generation: 0, genotype: b"AAAA".to_vec(),
+                mutations_from_parent: 0, affinity: 0.0, abundance: 1, observed: true,
+            }],
+        };
+        assert_eq!(to_newick(&tree), "node0;");
+    }
+}
diff --git a/engine_rs/src/lineage/family.rs b/engine_rs/src/lineage/family.rs
new file mode 100644
index 0000000..06b61be
--- /dev/null
+++ b/engine_rs/src/lineage/family.rs
@@ -0,0 +1,181 @@
+//! One-call clonal family simulation: grow a lineage and sample it.
+
+use crate::ir::Simulation;
+use crate::pass::Pass;
+use crate::rng::Rng;
+
+use super::affinity::AffinityModel;
+use super::branching::{grow_lineage_retaining_sims, BranchingParams};
+use super::sampling::sample_and_collapse;
+use super::tree::LineageTree;
+
+/// Salt mixed into `params.seed` to give sampling an independent RNG stream
+/// from growth, while staying deterministic for a given seed.
+const SAMPLE_SEED_SALT: u64 = 0x5341_4D50_4C45_0001; // "SAMPLE\0\1"
+
+/// Grow a clonal family from `founder` and sample observed cells.
+///
+/// Deterministic for `params.seed`. Growth and sampling use separate RNG
+/// streams (the sampling stream is `params.seed ^ SAMPLE_SEED_SALT`), so the
+/// whole family reproduces byte-for-byte across runs.
+pub fn simulate_family(
+    founder: &Simulation,
+    params: &BranchingParams,
+    mutator: &dyn Pass,
+) -> LineageTree {
+    let (mut tree, _sims, live) = grow_lineage_retaining_sims(founder, params, mutator, None);
+    let mut sample_rng = Rng::new(params.seed ^ SAMPLE_SEED_SALT);
+    sample_and_collapse(&mut tree, &live, params.n_sample, &mut sample_rng);
+    tree
+}
+
+/// Grow + sample a clonal family with affinity selection.
+pub fn simulate_family_with_affinity(
+    founder: &Simulation,
+    params: &BranchingParams,
+    mutator: &dyn Pass,
+    model: &AffinityModel,
+) -> LineageTree {
+    let (mut tree, _sims, live) =
+        grow_lineage_retaining_sims(founder, params, mutator, Some(model));
+    let mut sample_rng = Rng::new(params.seed ^ SAMPLE_SEED_SALT);
+    sample_and_collapse(&mut tree, &live, params.n_sample, &mut sample_rng);
+    tree
+}
+
+/// Grow + sample a family, returning the tree AND the per-node Simulation arena.
+/// Index in the arena equals the node id (arena[node.id] is the Simulation for
+/// that node). Only observed (sampled) nodes are useful for AIRR projection; the
+/// arena is full-tree and never trimmed.
+pub fn simulate_family_sims(
+    founder: &Simulation,
+    params: &BranchingParams,
+    mutator: &dyn Pass,
+    affinity: Option<&AffinityModel>,
+) -> (LineageTree, Vec<Simulation>) {
+    let (mut tree, sims, live) = grow_lineage_retaining_sims(founder, params, mutator, affinity);
+    let mut sample_rng = Rng::new(params.seed ^ SAMPLE_SEED_SALT);
+    sample_and_collapse(&mut tree, &live, params.n_sample, &mut sample_rng);
+    (tree, sims)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::dist::{EmpiricalLengthDist, UniformBase};
+    use crate::ir::{Nucleotide, NucHandle, Region, Segment, Simulation};
+    use crate::lineage::BranchingParams;
+    use crate::passes::UniformMutationPass;
+
+    fn founder() -> Simulation {
+        let mut sim = Simulation::new();
+        for (i, b) in b"AAAAAAAA".iter().enumerate() {
+            let (next, _) = sim.with_nucleotide_pushed(
+                Nucleotide::germline(*b, i as u16, Segment::V));
+            sim = next;
+        }
+        sim.with_region_added(Region::new(Segment::V, NucHandle::new(0), NucHandle::new(8)))
+    }
+
+    #[test]
+    fn simulate_family_returns_valid_sampled_tree() {
+        let params = BranchingParams {
+            lambda_base: 1.5, lambda_mut: 0.0, max_generations: 6,
+            n_max: 300, n_sample: 20, seed: 2024,
+        };
+        let mutator = UniformMutationPass::new(
+            Box::new(EmpiricalLengthDist::from_pairs(vec![(1, 1.0)])),
+            Box::new(UniformBase),
+        );
+        let tree = simulate_family(&founder(), &params, &mutator);
+
+        assert!(tree.validate().is_ok(), "invalid tree: {:?}", tree.validate());
+        let total_abundance: u32 = tree.nodes.iter().map(|n| n.abundance).sum();
+        // abundance equals n_sample unless the family went extinct early
+        assert!(total_abundance == params.n_sample || total_abundance == 0);
+
+        let tree2 = simulate_family(&founder(), &params,
+            &UniformMutationPass::new(
+                Box::new(EmpiricalLengthDist::from_pairs(vec![(1, 1.0)])),
+                Box::new(UniformBase)));
+        assert_eq!(tree.len(), tree2.len());
+    }
+
+    #[test]
+    fn extinct_founder_yields_no_observed_cells() {
+        // lambda_base = 0 => founder never divides => the living final population
+        // is empty (extinct). An extinct clone must NOT be sampled: no observed node.
+        let params = BranchingParams {
+            lambda_base: 0.0, lambda_mut: 0.0, max_generations: 6,
+            n_max: 300, n_sample: 20, seed: 2024,
+        };
+        let mutator = UniformMutationPass::new(
+            Box::new(EmpiricalLengthDist::from_pairs(vec![(1, 1.0)])),
+            Box::new(UniformBase),
+        );
+        let (tree, _sims) = simulate_family_sims(&founder(), &params, &mutator, None);
+        assert!(
+            tree.nodes.iter().all(|n| !n.observed && n.abundance == 0),
+            "extinct clone was sampled: {} observed nodes",
+            tree.nodes.iter().filter(|n| n.observed).count()
+        );
+    }
+
+    #[test]
+    fn observed_cells_are_all_at_the_final_living_generation() {
+        // In a grown family, every OBSERVED node must be a member of the final
+        // living generation (the maximum generation reached) — never an
+        // early-terminal/dead cell that drew 0 offspring at a lower generation.
+        // A single founder can go extinct, so scan seeds for one that grows.
+        let mutator = || UniformMutationPass::new(
+            Box::new(EmpiricalLengthDist::from_pairs(vec![(1, 1.0)])),
+            Box::new(UniformBase),
+        );
+        let mut grew = false;
+        for seed in 0..50u64 {
+            let params = BranchingParams {
+                lambda_base: 1.6, lambda_mut: 0.0, max_generations: 6,
+                n_max: 300, n_sample: 20, seed,
+            };
+            let (tree, _sims) = simulate_family_sims(&founder(), &params, &mutator(), None);
+            let observed: Vec<&_> = tree.nodes.iter().filter(|n| n.observed).collect();
+            if observed.is_empty() {
+                continue; // extinct for this seed
+            }
+            grew = true;
+            let max_gen = tree.nodes.iter().map(|n| n.generation).max().unwrap();
+            for n in &observed {
+                assert_eq!(
+                    n.generation, max_gen,
+                    "observed node {} at generation {} is not at the final generation {} (seed {seed})",
+                    n.id, n.generation, max_gen
+                );
+            }
+        }
+        assert!(grew, "no seed in 0..50 produced a surviving family");
+    }
+
+    #[test]
+    fn simulate_family_sims_arena_length_matches_tree_and_observed_nodes_have_nonempty_pool() {
+        let params = BranchingParams {
+            lambda_base: 1.5, lambda_mut: 0.0, max_generations: 6,
+            n_max: 300, n_sample: 20, seed: 9999,
+        };
+        let mutator = UniformMutationPass::new(
+            Box::new(EmpiricalLengthDist::from_pairs(vec![(1, 1.0)])),
+            Box::new(UniformBase),
+        );
+        let (tree, sims) = simulate_family_sims(&founder(), &params, &mutator, None);
+
+        // Arena length must equal tree length (one entry per node).
+        assert_eq!(sims.len(), tree.len(),
+            "arena len {} != tree len {}", sims.len(), tree.len());
+
+        // Every observed node's sim must have a non-empty pool.
+        for node in tree.nodes.iter().filter(|n| n.observed) {
+            let sim = &sims[node.id as usize];
+            assert!(!sim.pool.is_empty(),
+                "observed node {} has empty pool", node.id);
+        }
+    }
+}
diff --git a/engine_rs/src/lineage/mod.rs b/engine_rs/src/lineage/mod.rs
new file mode 100644
index 0000000..5f4a753
--- /dev/null
+++ b/engine_rs/src/lineage/mod.rs
@@ -0,0 +1,41 @@
+//! Clonal lineage simulation: grow a real mutation tree from one founder
+//! `Simulation` via a generation-synchronous birth–death process.
+//!
+//! ```ignore
+//! use genairr_engine::lineage::{simulate_family, BranchingParams};
+//! use genairr_engine::passes::UniformMutationPass;
+//! // build a founder Simulation + a mutator (S5F in production), then:
+//! let params = BranchingParams {
+//!     lambda_base: 1.5, lambda_mut: 0.0, max_generations: 12,
+//!     n_max: 1000, n_sample: 60, seed: 42,
+//! };
+//! let tree = simulate_family(&founder, &params, &mutator);
+//! assert!(tree.validate().is_ok());
+//! ```
+//!
+//! The mutator is any `Pass`; production wires the S5F pass, tests use
+//! `UniformMutationPass` (no reference cartridge required). Affinity-driven
+//! selection (BLOSUM-weighted distance to a target antigen → fitness-modulated
+//! offspring) lives in [`affinity`] and is used via [`simulate_family_with_affinity`].
+//!
+//! Ground-truth export (Newick / FASTA / node-table TSV) lives in [`export`].
+//! Heavy-tailed clone-size distributions + repertoire composition (the TCR core
+//! and the singleton tail of BCR repertoires) live in [`clone_size`].
+
+pub mod tree;
+pub mod poisson;
+pub mod branching;
+pub mod sampling;
+pub mod family;
+pub mod export;
+pub mod affinity;
+pub mod clone_size;
+
+pub use clone_size::{sample_clone_size, sample_repertoire_sizes, CloneSizeDist};
+
+pub use affinity::{sim_to_aa, AffinityModel};
+pub use tree::{LineageNode, LineageTree};
+pub use branching::{grow_lineage, grow_lineage_retaining_sims, grow_lineage_with_affinity, grow_topology, BranchingParams};
+pub use sampling::sample_and_collapse;
+pub use family::{simulate_family, simulate_family_sims, simulate_family_with_affinity};
+pub use export::{to_fasta, to_newick, to_node_table_tsv};
diff --git a/engine_rs/src/lineage/poisson.rs b/engine_rs/src/lineage/poisson.rs
new file mode 100644
index 0000000..e428767
--- /dev/null
+++ b/engine_rs/src/lineage/poisson.rs
@@ -0,0 +1,68 @@
+//! Self-contained Poisson sampler over the engine `Rng` (Knuth's algorithm).
+
+use crate::rng::Rng;
+
+/// Draw a Poisson(`lambda`) variate using Knuth's multiplicative method.
+///
+/// Deterministic for a given `Rng` state. Non-finite (NaN / ±inf) or
+/// non-positive `lambda` always returns 0 — the `is_finite` guard matches the
+/// engine's other Poisson sampler (`passes::count_source`) so an infinite
+/// lambda cannot drive the loop to a pathological underflow-termination.
+/// Suitable for the small lambdas used in clonal branching (offspring ~1.5,
+/// mutations ~<1). Not optimized for very large lambda.
+pub fn poisson_sample(rng: &mut Rng, lambda: f64) -> u32 {
+    if !(lambda.is_finite() && lambda > 0.0) {
+        return 0;
+    }
+    let l = (-lambda).exp();
+    let mut k: u32 = 0;
+    let mut p: f64 = 1.0;
+    loop {
+        k += 1;
+        p *= rng.next_f64();
+        if p <= l {
+            return k - 1;
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::rng::Rng;
+
+    #[test]
+    fn poisson_zero_lambda_is_always_zero() {
+        let mut rng = Rng::new(7);
+        for _ in 0..100 {
+            assert_eq!(poisson_sample(&mut rng, 0.0), 0);
+        }
+    }
+
+    #[test]
+    fn poisson_is_deterministic_for_a_seed() {
+        let mut a = Rng::new(123);
+        let mut b = Rng::new(123);
+        let xs: Vec<u32> = (0..50).map(|_| poisson_sample(&mut a, 1.5)).collect();
+        let ys: Vec<u32> = (0..50).map(|_| poisson_sample(&mut b, 1.5)).collect();
+        assert_eq!(xs, ys);
+    }
+
+    #[test]
+    fn poisson_nonfinite_lambda_is_zero() {
+        let mut rng = Rng::new(3);
+        assert_eq!(poisson_sample(&mut rng, f64::NAN), 0);
+        assert_eq!(poisson_sample(&mut rng, f64::INFINITY), 0);
+        assert_eq!(poisson_sample(&mut rng, -1.0), 0);
+    }
+
+    #[test]
+    fn poisson_mean_is_near_lambda() {
+        let mut rng = Rng::new(99);
+        let n = 20_000u32;
+        let lambda = 1.5;
+        let total: u64 = (0..n).map(|_| poisson_sample(&mut rng, lambda) as u64).sum();
+        let mean = total as f64 / n as f64;
+        assert!((mean - lambda).abs() < 0.1, "mean {mean} not near {lambda}");
+    }
+}
diff --git a/engine_rs/src/lineage/sampling.rs b/engine_rs/src/lineage/sampling.rs
new file mode 100644
index 0000000..a2e1991
--- /dev/null
+++ b/engine_rs/src/lineage/sampling.rs
@@ -0,0 +1,118 @@
+//! Sampling observed cells from a grown lineage and collapsing identical
+//! genotypes into abundance-bearing observed nodes.
+
+use std::collections::HashMap;
+
+use crate::rng::Rng;
+
+use super::tree::LineageTree;
+
+/// Sample `n_sample` cells uniformly (with replacement) from `sampleable_ids`
+/// (the LIVING population at sampling time — the final-generation cells, NOT all
+/// tree leaves) and collapse identical genotypes: the first cell *drawn* carrying
+/// a given genotype becomes the observed representative and accumulates the
+/// abundance; later draws of the same genotype fold into it. Mutates `tree` in
+/// place.
+///
+/// If `sampleable_ids` is empty (an extinct family — no living cells), nothing is
+/// marked observed: an unobserved extinct clone yields zero records.
+pub fn sample_and_collapse(
+    tree: &mut LineageTree,
+    sampleable_ids: &[u32],
+    n_sample: u32,
+    rng: &mut Rng,
+) {
+    if sampleable_ids.is_empty() || n_sample == 0 {
+        return;
+    }
+
+    // genotype -> representative node id (first cell drawn with that genotype)
+    let mut rep_by_genotype: HashMap<Vec<u8>, u32> = HashMap::new();
+    // representative node id -> accumulated abundance
+    let mut abundance: HashMap<u32, u32> = HashMap::new();
+
+    for _ in 0..n_sample {
+        // Unbiased uniform index (Lemire), matching the engine's RNG convention;
+        // avoids the modulo bias of `next_u64() % len`.
+        let idx = rng.range_u32(sampleable_ids.len() as u32) as usize;
+        let cell_id = sampleable_ids[idx];
+        let genotype = tree.get(cell_id).unwrap().genotype.clone();
+        let rep = *rep_by_genotype.entry(genotype).or_insert(cell_id);
+        *abundance.entry(rep).or_insert(0) += 1;
+    }
+
+    for (id, count) in abundance {
+        // Write-back relies on the `node.id == index into nodes` invariant
+        // (held by construction in the branching loop); guard it in debug builds.
+        debug_assert_eq!(
+            tree.nodes[id as usize].id, id,
+            "id-index invariant violated during sample write-back"
+        );
+        let node = &mut tree.nodes[id as usize];
+        node.abundance = count;
+        node.observed = true;
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::lineage::tree::{LineageNode, LineageTree};
+    use crate::rng::Rng;
+
+    // Tree with duplicate genotypes among leaves to exercise collapse.
+    fn tree_with_dupes() -> LineageTree {
+        LineageTree {
+            nodes: vec![
+                LineageNode { id: 0, parent_id: None,    generation: 0, genotype: b"AAAA".to_vec(), mutations_from_parent: 0, affinity: 0.0, abundance: 0, observed: false },
+                LineageNode { id: 1, parent_id: Some(0), generation: 1, genotype: b"AAAC".to_vec(), mutations_from_parent: 1, affinity: 0.0, abundance: 0, observed: false },
+                LineageNode { id: 2, parent_id: Some(0), generation: 1, genotype: b"AAAC".to_vec(), mutations_from_parent: 1, affinity: 0.0, abundance: 0, observed: false },
+                LineageNode { id: 3, parent_id: Some(1), generation: 2, genotype: b"AAGC".to_vec(), mutations_from_parent: 1, affinity: 0.0, abundance: 0, observed: false },
+            ],
+        }
+    }
+
+    // Sampleable set with duplicate genotypes among the living cells (nodes 1 & 2
+    // share "AAAC") to exercise genotype-collapse.
+    fn sampleable() -> Vec<u32> {
+        vec![1, 2, 3]
+    }
+
+    #[test]
+    fn sampling_sets_abundances_summing_to_n_sample() {
+        let mut tree = tree_with_dupes();
+        let mut rng = Rng::new(5);
+        let n_sample = 3;
+        sample_and_collapse(&mut tree, &sampleable(), n_sample, &mut rng);
+
+        let total: u32 = tree.nodes.iter().map(|n| n.abundance).sum();
+        assert_eq!(total, n_sample);
+
+        assert!(tree.nodes.iter().any(|n| n.observed));
+        for n in &tree.nodes {
+            assert_eq!(n.observed, n.abundance > 0);
+        }
+    }
+
+    #[test]
+    fn identical_genotypes_collapse_into_one_observed_node() {
+        let mut tree = tree_with_dupes();
+        let mut rng = Rng::new(1);
+        sample_and_collapse(&mut tree, &sampleable(), 3, &mut rng);
+        use std::collections::HashSet;
+        let mut seen: HashSet<Vec<u8>> = HashSet::new();
+        for n in tree.nodes.iter().filter(|n| n.observed) {
+            assert!(seen.insert(n.genotype.clone()),
+                "genotype observed in more than one node (collapse failed)");
+        }
+    }
+
+    #[test]
+    fn empty_sampleable_set_marks_nothing_observed() {
+        // An extinct family (no living cells) yields zero observed cells.
+        let mut tree = tree_with_dupes();
+        let mut rng = Rng::new(3);
+        sample_and_collapse(&mut tree, &[], 5, &mut rng);
+        assert!(tree.nodes.iter().all(|n| !n.observed && n.abundance == 0));
+    }
+}
diff --git a/engine_rs/src/lineage/tree.rs b/engine_rs/src/lineage/tree.rs
new file mode 100644
index 0000000..188a3f5
--- /dev/null
+++ b/engine_rs/src/lineage/tree.rs
@@ -0,0 +1,191 @@
+//! Clonal lineage tree data structures.
+//!
+//! A `LineageTree` is a flat arena of `LineageNode`s addressed by `id`. The
+//! root is the founder (naive rearrangement); every other node is a somatic
+//! descendant produced by one cell division. `genotype` is the node's
+//! nucleotide sequence (pool bases) captured at creation, used for
+//! genotype-collapse and downstream export.
+
+/// One cell in a clonal lineage.
+#[derive(Clone, Debug, PartialEq)]
+pub struct LineageNode {
+    /// Stable index of this node within `LineageTree::nodes` (also its arena position).
+    pub id: u32,
+    /// Parent node id; `None` only for the root founder.
+    pub parent_id: Option<u32>,
+    /// Generation depth from the root (root == 0).
+    pub generation: u32,
+    /// Nucleotide bases of this cell's `Simulation` pool at creation time.
+    pub genotype: Vec<u8>,
+    /// Mutations introduced on the edge from the parent to this node
+    /// (the number of substitutions on the parent→child edge; 0 for the root).
+    pub mutations_from_parent: u32,
+    /// Affinity of this cell to the target antigen (0.0 = neutral / not computed).
+    pub affinity: f64,
+    /// Observation count after sampling + genotype-collapse. 0 until sampled.
+    pub abundance: u32,
+    /// Whether this node was observed: the representative node a sampled
+    /// genotype collapses onto. (Internal-ancestor observation is a later concern.)
+    pub observed: bool,
+}
+
+/// A clonal lineage as a flat arena of nodes (node `id` == index into `nodes`).
+#[derive(Clone, Debug, Default)]
+pub struct LineageTree {
+    pub nodes: Vec<LineageNode>,
+}
+
+impl LineageTree {
+    /// Total node count (founder + all descendants).
+    pub fn len(&self) -> usize {
+        self.nodes.len()
+    }
+
+    /// True when the tree has no nodes.
+    pub fn is_empty(&self) -> bool {
+        self.nodes.is_empty()
+    }
+
+    /// The founder node (the unique node with no parent).
+    ///
+    /// Panics if the tree has no root — call only on a grown tree.
+    pub fn root(&self) -> &LineageNode {
+        self.nodes
+            .iter()
+            .find(|n| n.parent_id.is_none())
+            .expect("LineageTree has no root node")
+    }
+
+    /// Node by id, or `None` if out of range.
+    pub fn get(&self, id: u32) -> Option<&LineageNode> {
+        self.nodes.get(id as usize)
+    }
+
+    /// Direct children of `id`, in ascending id order.
+    pub fn children_of(&self, id: u32) -> Vec<&LineageNode> {
+        let mut v: Vec<&LineageNode> = self.nodes
+            .iter()
+            .filter(|n| n.parent_id == Some(id))
+            .collect();
+        v.sort_unstable_by_key(|n| n.id);
+        v
+    }
+
+    /// Check structural invariants: `node.id == index` for every node; exactly
+    /// one root; every non-root parent exists; child generation == parent
+    /// generation + 1 (acyclicity then follows from the strictly-increasing
+    /// generation along any parent chain, since `get(pid)` resolves ids as
+    /// arena indices).
+    pub fn validate(&self) -> Result<(), String> {
+        if self.nodes.is_empty() {
+            return Err("empty lineage tree".to_string());
+        }
+        // id == index invariant: relied upon by id-based lookups/write-backs.
+        for (idx, n) in self.nodes.iter().enumerate() {
+            if n.id as usize != idx {
+                return Err(format!(
+                    "node at index {idx} has id {} (id-index mismatch)",
+                    n.id
+                ));
+            }
+        }
+        let roots = self.nodes.iter().filter(|n| n.parent_id.is_none()).count();
+        if roots != 1 {
+            return Err(format!("expected exactly 1 root, found {roots}"));
+        }
+        for n in &self.nodes {
+            if let Some(pid) = n.parent_id {
+                let parent = self
+                    .get(pid)
+                    .ok_or_else(|| format!("node {} references missing parent {pid}", n.id))?;
+                if n.generation != parent.generation + 1 {
+                    return Err(format!(
+                        "node {} generation {} != parent {} generation {} + 1",
+                        n.id, n.generation, parent.id, parent.generation
+                    ));
+                }
+            }
+        }
+        Ok(())
+    }
+
+    /// Nodes with no children (tips), in ascending id order.
+    ///
+    /// O(n): a single pass marks which ids are parents (relying on the
+    /// `id == index` invariant, same as the rest of the module), then filters.
+    /// Avoids the previous O(n²) of calling `children_of` per node.
+    pub fn leaves(&self) -> Vec<&LineageNode> {
+        let mut has_child = vec![false; self.nodes.len()];
+        for n in &self.nodes {
+            if let Some(p) = n.parent_id {
+                if (p as usize) < has_child.len() {
+                    has_child[p as usize] = true;
+                }
+            }
+        }
+        // Arena order == ascending id under the id==index invariant.
+        self.nodes
+            .iter()
+            .filter(|n| !has_child.get(n.id as usize).copied().unwrap_or(false))
+            .collect()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    // Build a tiny tree by hand: root(0) -> {1, 2}; 1 -> {3}
+    fn hand_tree() -> LineageTree {
+        LineageTree {
+            nodes: vec![
+                LineageNode { id: 0, parent_id: None,    generation: 0, genotype: b"AAAA".to_vec(), mutations_from_parent: 0, affinity: 0.0, abundance: 0, observed: false },
+                LineageNode { id: 1, parent_id: Some(0), generation: 1, genotype: b"AAAC".to_vec(), mutations_from_parent: 1, affinity: 0.0, abundance: 0, observed: false },
+                LineageNode { id: 2, parent_id: Some(0), generation: 1, genotype: b"AAAG".to_vec(), mutations_from_parent: 1, affinity: 0.0, abundance: 0, observed: false },
+                LineageNode { id: 3, parent_id: Some(1), generation: 2, genotype: b"AATC".to_vec(), mutations_from_parent: 1, affinity: 0.0, abundance: 0, observed: false },
+            ],
+        }
+    }
+
+    #[test]
+    fn validate_accepts_well_formed_tree() {
+        assert!(hand_tree().validate().is_ok());
+    }
+
+    #[test]
+    fn validate_rejects_two_roots() {
+        let mut t = hand_tree();
+        t.nodes[1].parent_id = None; // second root
+        assert!(t.validate().is_err());
+    }
+
+    #[test]
+    fn validate_rejects_nonmonotonic_generation() {
+        let mut t = hand_tree();
+        t.nodes[3].generation = 1; // child not parent.gen + 1
+        assert!(t.validate().is_err());
+    }
+
+    #[test]
+    fn validate_rejects_id_index_mismatch() {
+        let mut t = hand_tree();
+        t.nodes[2].id = 99; // id no longer equals its arena index
+        assert!(t.validate().is_err());
+    }
+
+    #[test]
+    fn tree_accessors_report_structure() {
+        let t = hand_tree();
+        assert_eq!(t.root().id, 0);
+        assert_eq!(t.len(), 4);
+
+        let root_children: Vec<u32> = t.children_of(0).iter().map(|n| n.id).collect();
+        assert_eq!(root_children, vec![1, 2]);
+
+        let leaves: Vec<u32> = t.leaves().iter().map(|n| n.id).collect();
+        assert_eq!(leaves, vec![2, 3]); // nodes with no children
+
+        assert_eq!(t.get(3).unwrap().parent_id, Some(1));
+        assert!(t.get(99).is_none());
+    }
+}
diff --git a/engine_rs/src/live_call/mod.rs b/engine_rs/src/live_call/mod.rs
index 7b1d535..474d0c0 100644
--- a/engine_rs/src/live_call/mod.rs
+++ b/engine_rs/src/live_call/mod.rs
@@ -28,7 +28,7 @@ pub use reference_index::{
     BaseBitsets, BaseEvidence, BoundaryIndex, IndexedAllele, KmerHit, KmerIndex,
     ReferenceMatchIndex, SegmentRefIndex, DEFAULT_REFERENCE_KMER_LEN,
 };
-pub use refresh_hook::LiveCallRefreshHook;
+pub use refresh_hook::{refresh_live_calls, LiveCallRefreshHook};
 pub use refresh_plan::{LiveCallRefreshPlan, LiveCallRefreshStep};
 
 fn assert_live_segment(segment: Segment) {
diff --git a/engine_rs/src/live_call/refresh_hook.rs b/engine_rs/src/live_call/refresh_hook.rs
index 64b750c..90fa844 100644
--- a/engine_rs/src/live_call/refresh_hook.rs
+++ b/engine_rs/src/live_call/refresh_hook.rs
@@ -98,6 +98,26 @@ impl EffectHook for LiveCallRefreshHook {
     }
 }
 
+/// Public, unconditional V/D/J live-call refresh for a simulation
+/// whose pool was edited outside the compiled runtime (e.g. the
+/// clonal-lineage branching loop, which mutates child sims with a
+/// bare `Pass::execute` and so never runs `LiveCallRefreshHook`).
+///
+/// Unlike [`refresh_segments_for_edit`], this recomputes the call
+/// for *every* assignable segment from scratch against the current
+/// pool, regardless of the dirty log — the caller may have applied
+/// many independent edits whose dirty windows were never threaded.
+/// The dirty log is drained so the returned sim is in a clean state.
+pub fn refresh_live_calls(
+    mut sim: Simulation,
+    reference_index: &ReferenceMatchIndex,
+) -> Simulation {
+    for &segment in Segment::assignable() {
+        sim = with_assembled_segment_live_call(&sim, reference_index, segment);
+    }
+    drain_dirty_windows(sim)
+}
+
 /// Refresh V/D/J live calls after a base-edit pass, using dirty
 /// windows stamped by the pass's `DirtySignalObserver` to skip
 /// segments whose region doesn't overlap any dirty position.
diff --git a/engine_rs/src/python/lineage.rs b/engine_rs/src/python/lineage.rs
new file mode 100644
index 0000000..c1c7ba9
--- /dev/null
+++ b/engine_rs/src/python/lineage.rs
@@ -0,0 +1,612 @@
+//! PyO3 bindings for the clonal lineage engine: `LineageNode`, `LineageTree`,
+//! and the `simulate_lineage` entry point.
+
+use pyo3::prelude::*;
+use pyo3::types::PyDict;
+
+use crate::airr_record::build_airr_record;
+use crate::event::{EventRecord, StateSummary, TraceSpan};
+use crate::ir::{NucHandle, SimulationEvent};
+use crate::ir::Simulation;
+use crate::lineage::export::{to_fasta, to_newick, to_node_table_tsv};
+use crate::lineage::tree::{LineageNode, LineageTree};
+use crate::lineage::{simulate_family, simulate_family_sims, simulate_family_with_affinity, sim_to_aa, AffinityModel, BranchingParams};
+use crate::lineage::affinity::make_mature_target;
+use crate::live_call::{refresh_live_calls, ReferenceMatchIndex};
+use crate::pass::{Outcome, PassCompileEffect};
+use crate::rng::Rng;
+use crate::passes::S5FMutationPass;
+use crate::s5f::S5FKernel;
+
+use super::outcome::{PyOutcome, airr_record_to_pydict};
+use super::refdata::PyRefDataConfig;
+use super::simulation::PySimulation;
+
+/// One node of a clonal lineage tree (read-only view).
+#[pyclass(name = "LineageNode", module = "GenAIRR._engine", frozen)]
+pub struct PyLineageNode {
+    pub(crate) inner: LineageNode,
+}
+
+#[pymethods]
+impl PyLineageNode {
+    #[getter]
+    fn id(&self) -> u32 {
+        self.inner.id
+    }
+    #[getter]
+    fn parent_id(&self) -> Option<u32> {
+        self.inner.parent_id
+    }
+    #[getter]
+    fn generation(&self) -> u32 {
+        self.inner.generation
+    }
+    #[getter]
+    fn mutations_from_parent(&self) -> u32 {
+        self.inner.mutations_from_parent
+    }
+    #[getter]
+    fn abundance(&self) -> u32 {
+        self.inner.abundance
+    }
+    #[getter]
+    fn observed(&self) -> bool {
+        self.inner.observed
+    }
+    #[getter]
+    fn affinity(&self) -> f64 {
+        self.inner.affinity
+    }
+    /// Nucleotide sequence (pool bases) as a string.
+    #[getter]
+    fn sequence(&self) -> String {
+        String::from_utf8_lossy(&self.inner.genotype).into_owned()
+    }
+
+    fn __repr__(&self) -> String {
+        format!(
+            "LineageNode(id={}, parent_id={:?}, generation={}, abundance={}, observed={})",
+            self.inner.id,
+            self.inner.parent_id,
+            self.inner.generation,
+            self.inner.abundance,
+            self.inner.observed
+        )
+    }
+}
+
+/// A clonal lineage tree (read-only view) with ground-truth export.
+#[pyclass(name = "LineageTree", module = "GenAIRR._engine", frozen)]
+pub struct PyLineageTree {
+    pub(crate) inner: LineageTree,
+}
+
+impl PyLineageTree {
+    pub(crate) fn new(inner: LineageTree) -> Self {
+        Self { inner }
+    }
+}
+
+#[pymethods]
+impl PyLineageTree {
+    fn __len__(&self) -> usize {
+        self.inner.len()
+    }
+
+    /// All nodes (founder + descendants), in arena order (ascending id).
+    fn nodes(&self) -> Vec<PyLineageNode> {
+        self.inner
+            .nodes
+            .iter()
+            .cloned()
+            .map(|inner| PyLineageNode { inner })
+            .collect()
+    }
+
+    /// Validate structural invariants; raises ValueError if malformed.
+    fn validate(&self) -> PyResult<()> {
+        self.inner
+            .validate()
+            .map_err(pyo3::exceptions::PyValueError::new_err)
+    }
+
+    /// Standard rooted Newick string (branch length = per-edge mutation count).
+    fn to_newick(&self) -> String {
+        to_newick(&self.inner)
+    }
+
+    /// FASTA of every node (ancestral + observed) sequence.
+    fn to_fasta(&self) -> String {
+        to_fasta(&self.inner)
+    }
+
+    /// Tab-separated node table.
+    fn to_node_table_tsv(&self) -> String {
+        to_node_table_tsv(&self.inner)
+    }
+}
+
+/// Grow + sample a clonal lineage family from `founder` using an S5F mutator
+/// built from the supplied kernel tables. Returns the ground-truth tree.
+///
+/// `mutability` must have 1024 entries and `substitution` 4096 (the S5F 5-mer
+/// kernel); all values must be finite and non-negative. `rate` is the per-base
+/// SHM rate in [0, 1]. Determinism is keyed on `seed`. `lambda_mut` is reserved
+/// for a future plan and currently has no effect (mutations are driven by the
+/// S5F `rate`).
+#[pyfunction]
+#[pyo3(signature = (
+    founder, mutability, substitution, rate,
+    lambda_base, lambda_mut, max_generations, n_max, n_sample, seed,
+    selection_strength=0.0, beta=1.0, target_aa=None, mature_substitutions=5
+))]
+#[allow(clippy::too_many_arguments)]
+pub(crate) fn simulate_lineage(
+    founder: &PySimulation,
+    mutability: Vec<f64>,
+    substitution: Vec<f64>,
+    rate: f64,
+    lambda_base: f64,
+    lambda_mut: f64,
+    max_generations: u32,
+    n_max: u32,
+    n_sample: u32,
+    seed: u64,
+    selection_strength: f64,
+    beta: f64,
+    target_aa: Option<String>,
+    mature_substitutions: u32,
+) -> PyResult<PyLineageTree> {
+    use pyo3::exceptions::PyValueError;
+
+    if mutability.len() != 1024 {
+        return Err(PyValueError::new_err(format!(
+            "simulate_lineage: mutability must have 1024 entries, got {}",
+            mutability.len()
+        )));
+    }
+    if substitution.len() != 4096 {
+        return Err(PyValueError::new_err(format!(
+            "simulate_lineage: substitution must have 4096 entries, got {}",
+            substitution.len()
+        )));
+    }
+    if !(rate.is_finite() && (0.0..=1.0).contains(&rate)) {
+        return Err(PyValueError::new_err(format!(
+            "simulate_lineage: rate must be in [0.0, 1.0], got {rate}"
+        )));
+    }
+    if mutability.iter().any(|&m| !m.is_finite() || m < 0.0) {
+        return Err(PyValueError::new_err(
+            "simulate_lineage: mutability values must be finite and non-negative",
+        ));
+    }
+    if substitution.iter().any(|&s| !s.is_finite() || s < 0.0) {
+        return Err(PyValueError::new_err(
+            "simulate_lineage: substitution values must be finite and non-negative",
+        ));
+    }
+    if n_max == 0 {
+        return Err(PyValueError::new_err("simulate_lineage: n_max must be > 0"));
+    }
+    if n_sample == 0 {
+        return Err(PyValueError::new_err("simulate_lineage: n_sample must be > 0"));
+    }
+    if !(lambda_base.is_finite() && lambda_base >= 0.0) {
+        return Err(PyValueError::new_err(format!(
+            "simulate_lineage: lambda_base must be finite and >= 0, got {lambda_base}"
+        )));
+    }
+    if !(lambda_mut.is_finite() && lambda_mut >= 0.0) {
+        return Err(PyValueError::new_err(format!(
+            "simulate_lineage: lambda_mut must be finite and >= 0, got {lambda_mut}"
+        )));
+    }
+    // Cap generations: `to_newick` recurses to a depth equal to the generation
+    // count, so an unbounded value could overflow the stack across the FFI
+    // boundary. 1000 is far beyond any biological germinal-center reaction.
+    if max_generations > 1000 {
+        return Err(PyValueError::new_err(format!(
+            "simulate_lineage: max_generations must be <= 1000, got {max_generations}"
+        )));
+    }
+
+    // Lengths and value ranges are now guaranteed, so S5FKernel::new cannot panic.
+    let kernel = S5FKernel::new(mutability, substitution);
+    let mutator = S5FMutationPass::new_rate(kernel, rate);
+    let params = BranchingParams {
+        lambda_base,
+        lambda_mut,
+        max_generations,
+        n_max,
+        n_sample,
+        seed,
+    };
+
+    // Validate affinity params.
+    if !(beta.is_finite() && beta >= 0.0) {
+        return Err(PyValueError::new_err(format!(
+            "simulate_lineage: beta must be finite and >= 0, got {beta}"
+        )));
+    }
+    if !(selection_strength.is_finite() && selection_strength >= 0.0) {
+        return Err(PyValueError::new_err(format!(
+            "simulate_lineage: selection_strength must be finite and >= 0, got {selection_strength}"
+        )));
+    }
+
+    // Neutral fast path: no selection and no explicit target → byte-identical to
+    // the pre-affinity behavior.
+    if selection_strength == 0.0 && target_aa.is_none() {
+        let tree = simulate_family(&founder.inner, &params, &mutator);
+        return Ok(PyLineageTree::new(tree));
+    }
+
+    // Build the affinity model. Uniform per-position weights for v1 (the model
+    // accepts arbitrary weights; CDR3 region-weighting is a follow-up).
+    let founder_aa = sim_to_aa(&founder.inner);
+    let target = match target_aa {
+        Some(s) => s.into_bytes(),
+        None => {
+            // Deterministic auto "mature" target derived from the seed.
+            let mut trng = Rng::new(seed ^ 0x7461_7267_6574_0001); // "target\0\1"
+            make_mature_target(&founder_aa, mature_substitutions, &mut trng)
+        }
+    };
+    let weights = vec![1.0; founder_aa.len()];
+    let model = AffinityModel::new(target, weights, beta, selection_strength, &founder_aa);
+    let tree = simulate_family_with_affinity(&founder.inner, &params, &mutator, &model);
+    Ok(PyLineageTree::new(tree))
+}
+
+/// A grown clonal family: the lineage tree plus per-node AIRR-projectable
+/// `Outcome`s (only observed nodes carry one).
+#[pyclass(name = "FamilyOutcome", module = "GenAIRR._engine")]
+pub struct PyFamilyOutcome {
+    tree: LineageTree,
+    node_outcomes: Vec<Option<Outcome>>, // index == node id
+}
+
+#[pymethods]
+impl PyFamilyOutcome {
+    /// The ground-truth lineage tree.
+    fn tree(&self) -> PyLineageTree {
+        PyLineageTree::new(self.tree.clone())
+    }
+
+    /// Per-node Outcomes aligned with `tree().nodes()`; None for unsampled nodes.
+    fn node_outcomes(&self) -> Vec<Option<PyOutcome>> {
+        self.node_outcomes
+            .iter()
+            .cloned()
+            .map(|o: Option<Outcome>| o.map(PyOutcome::new))
+            .collect()
+    }
+
+    /// Observed nodes' Outcomes only (abundance > 0), in node-id order.
+    fn observed_outcomes(&self) -> Vec<PyOutcome> {
+        self.node_outcomes
+            .iter()
+            .cloned()
+            .flatten()
+            .map(PyOutcome::new)
+            .collect()
+    }
+
+    /// Build AIRR Rearrangement record dicts for every observed node,
+    /// in node-id order (aligned with `observed_outcomes()`).
+    ///
+    /// Each node `Outcome` now carries a synthesized `MUTATE_S5F`
+    /// `EventRecord` encoding every pool position where
+    /// `base != germline` as a `BaseChanged` event, and
+    /// `mutation_count` is set to the number of such events.
+    /// `build_airr_record` therefore produces correct per-segment
+    /// and V-subregion mutation counts natively, without any manual
+    /// field overwrite.
+    fn airr_records<'py>(
+        &self,
+        py: Python<'py>,
+        refdata: &PyRefDataConfig,
+    ) -> PyResult<Vec<Bound<'py, PyDict>>> {
+        let mut results = Vec::new();
+        for (node_id, outcome) in self.node_outcomes.iter().enumerate() {
+            let Some(outcome) = outcome else { continue };
+            let sequence_id = format!("node{}", node_id);
+            let rec = build_airr_record(outcome, refdata.inner(), &sequence_id);
+            let dict = airr_record_to_pydict(py, &rec)?;
+            results.push(dict);
+        }
+        Ok(results)
+    }
+}
+
+// ──────────────────────────────────────────────────────────────────
+// SHM event synthesis helper
+// ──────────────────────────────────────────────────────────────────
+
+/// Synthesize a single `EventRecord` that encodes all net SHM
+/// mutations visible in `sim.pool` (positions where `base !=
+/// germline`) as `SimulationEvent::BaseChanged` events.
+///
+/// The record is tagged with `address::MUTATE_S5F` as its pass
+/// name so the AIRR builder's event-loop filter picks it up and
+/// the per-segment / per-subregion mutation counts it produces are
+/// identical to the pool-scan. The returned count equals the number
+/// of synthesized events; callers should set `sim.mutation_count`
+/// to this value before wrapping `sim` in an `Outcome`.
+fn synthesize_shm_event_record(sim: &Simulation) -> (EventRecord, u32) {
+    let mut events: Vec<SimulationEvent> = Vec::new();
+    for (i, nuc) in sim.pool.as_slice().iter().enumerate() {
+        if nuc.base == nuc.germline {
+            continue;
+        }
+        events.push(SimulationEvent::BaseChanged {
+            handle: NucHandle::new(i as u32),
+            old_base: nuc.germline,
+            new_base: nuc.base,
+            segment: nuc.segment,
+            germline_pos: nuc.germline_pos.get(),
+        });
+    }
+    let net = events.len() as u32;
+    let summary = StateSummary::from_simulation(sim);
+    let record = EventRecord::pass_committed(
+        0,
+        crate::address::MUTATE_S5F,
+        vec![PassCompileEffect::EditBases],
+        TraceSpan::new(0, 0),
+        summary,
+        summary,
+        events,
+    );
+    (record, net)
+}
+
+
+/// Merge a per-node lineage `Outcome` with a corruption `Outcome` that was
+/// run *from* the lineage node's final simulation.
+///
+/// `base` is a lineage node `Outcome`: its `trace` is the founder
+/// recombination trace, its `events` is the synthesized SHM `EventRecord`,
+/// and its final simulation is post-SHM (`mutation_count` = net SHM).
+///
+/// `corruption` is the corruption plan run from `base`'s final simulation:
+/// its `trace` holds the corruption choices, its `events` holds the
+/// corruption `EventRecord`s, and its final simulation is the corrupted
+/// molecule (SHM `mutation_count` is preserved by the corruption passes).
+///
+/// The merged `Outcome` carries the corrupted final simulation (so sequence
+/// + quality bytes reflect the artefacts) but the *concatenation* of both
+/// traces and both event ledgers, so the AIRR builder derives BOTH the SHM
+/// per-segment counts (from the synthesized SHM event) AND the corruption
+/// counters (`n_quality_errors`, `n_pcr_errors`, `n_indels`, …) from the
+/// corruption events / trace.
+#[pyfunction]
+pub(crate) fn merge_lineage_corruption(base: &PyOutcome, corruption: &PyOutcome) -> PyOutcome {
+    let mut trace = base.inner.trace.clone();
+    trace.append_delta(corruption.inner.trace.clone());
+
+    let mut events = base.inner.events.clone();
+    events.extend(corruption.inner.events.iter().cloned());
+
+    PyOutcome::new(Outcome {
+        revisions: corruption.inner.revisions.clone(),
+        pass_names: corruption.inner.pass_names.clone(),
+        trace,
+        events,
+    })
+}
+
+/// Grow + sample a clonal lineage family from `founder` (a full `Outcome`) using
+/// an S5F mutator, returning a `FamilyOutcome` with per-node AIRR-projectable
+/// `Outcome`s for every observed (sampled) node.
+#[pyfunction]
+#[pyo3(signature = (
+    founder, refdata, mutability, substitution, rate,
+    lambda_base, lambda_mut, max_generations, n_max, n_sample, seed,
+    selection_strength=0.0, beta=1.0, target_aa=None, mature_substitutions=5
+))]
+#[allow(clippy::too_many_arguments)]
+pub(crate) fn simulate_family_outcomes(
+    founder: &PyOutcome,
+    refdata: &PyRefDataConfig,
+    mutability: Vec<f64>,
+    substitution: Vec<f64>,
+    rate: f64,
+    lambda_base: f64,
+    lambda_mut: f64,
+    max_generations: u32,
+    n_max: u32,
+    n_sample: u32,
+    seed: u64,
+    selection_strength: f64,
+    beta: f64,
+    target_aa: Option<String>,
+    mature_substitutions: u32,
+) -> PyResult<PyFamilyOutcome> {
+    use pyo3::exceptions::PyValueError;
+
+    if mutability.len() != 1024 {
+        return Err(PyValueError::new_err(format!(
+            "simulate_family_outcomes: mutability must have 1024 entries, got {}",
+            mutability.len()
+        )));
+    }
+    if substitution.len() != 4096 {
+        return Err(PyValueError::new_err(format!(
+            "simulate_family_outcomes: substitution must have 4096 entries, got {}",
+            substitution.len()
+        )));
+    }
+    if !(rate.is_finite() && (0.0..=1.0).contains(&rate)) {
+        return Err(PyValueError::new_err(format!(
+            "simulate_family_outcomes: rate must be in [0.0, 1.0], got {rate}"
+        )));
+    }
+    if mutability.iter().any(|&m| !m.is_finite() || m < 0.0) {
+        return Err(PyValueError::new_err(
+            "simulate_family_outcomes: mutability values must be finite and non-negative",
+        ));
+    }
+    if substitution.iter().any(|&s| !s.is_finite() || s < 0.0) {
+        return Err(PyValueError::new_err(
+            "simulate_family_outcomes: substitution values must be finite and non-negative",
+        ));
+    }
+    if n_max == 0 {
+        return Err(PyValueError::new_err("simulate_family_outcomes: n_max must be > 0"));
+    }
+    if n_sample == 0 {
+        return Err(PyValueError::new_err("simulate_family_outcomes: n_sample must be > 0"));
+    }
+    if !(lambda_base.is_finite() && lambda_base >= 0.0) {
+        return Err(PyValueError::new_err(format!(
+            "simulate_family_outcomes: lambda_base must be finite and >= 0, got {lambda_base}"
+        )));
+    }
+    if !(lambda_mut.is_finite() && lambda_mut >= 0.0) {
+        return Err(PyValueError::new_err(format!(
+            "simulate_family_outcomes: lambda_mut must be finite and >= 0, got {lambda_mut}"
+        )));
+    }
+    if max_generations > 1000 {
+        return Err(PyValueError::new_err(format!(
+            "simulate_family_outcomes: max_generations must be <= 1000, got {max_generations}"
+        )));
+    }
+    if !(beta.is_finite() && beta >= 0.0) {
+        return Err(PyValueError::new_err(format!(
+            "simulate_family_outcomes: beta must be finite and >= 0, got {beta}"
+        )));
+    }
+    if !(selection_strength.is_finite() && selection_strength >= 0.0) {
+        return Err(PyValueError::new_err(format!(
+            "simulate_family_outcomes: selection_strength must be finite and >= 0, got {selection_strength}"
+        )));
+    }
+
+    let founder_sim = founder.inner.final_simulation().clone();
+    let founder_trace = founder.inner.trace.clone();
+
+    let kernel = S5FKernel::new(mutability, substitution);
+    let mutator = S5FMutationPass::new_rate(kernel, rate);
+    let params = BranchingParams {
+        lambda_base,
+        lambda_mut,
+        max_generations,
+        n_max,
+        n_sample,
+        seed,
+    };
+
+    // Optional affinity model (same logic as simulate_lineage's non-neutral path).
+    let model: Option<AffinityModel> = if selection_strength == 0.0 && target_aa.is_none() {
+        None
+    } else {
+        let founder_aa = sim_to_aa(&founder_sim);
+        let target = match target_aa {
+            Some(s) => s.into_bytes(),
+            None => {
+                let mut trng = Rng::new(seed ^ 0x7461_7267_6574_0001);
+                make_mature_target(&founder_aa, mature_substitutions, &mut trng)
+            }
+        };
+        let weights = vec![1.0; founder_aa.len()];
+        Some(AffinityModel::new(target, weights, beta, selection_strength, &founder_aa))
+    };
+
+    let (tree, sims) = simulate_family_sims(&founder_sim, &params, &mutator, model.as_ref());
+
+    // The branching loop mutates child sims with a bare `Pass::execute`
+    // (no `LiveCallRefreshHook`), so each node sim's `segment_calls`
+    // sidecar still reflects the FOUNDER's alignment, not its own
+    // mutated pool. Recompute the V/D/J live calls from each observed
+    // node's pool before building its Outcome; otherwise the
+    // synthesized record carries stale v/d/j calls and validate_record
+    // false-fails with AlleleCallTieSetMismatch under heavy SHM.
+    let reference_index = ReferenceMatchIndex::build(refdata.inner());
+
+    let node_outcomes: Vec<Option<Outcome>> = tree
+        .nodes
+        .iter()
+        .map(|n| {
+            if n.observed {
+                let refreshed = refresh_live_calls(sims[n.id as usize].clone(), &reference_index);
+                let s = &refreshed;
+                // Synthesize a MUTATE_S5F EventRecord encoding every
+                // pool position where base != germline as a
+                // BaseChanged event.  This makes the Outcome
+                // self-consistent: build_airr_record reads these
+                // events to derive per-segment / per-subregion
+                // mutation counts, and the net count is stored in
+                // mutation_count so the NMutationsMismatch validator
+                // check passes.
+                let (event_record, net) = synthesize_shm_event_record(s);
+                let s2 = s.with_mutation_count(net);
+                Some(Outcome {
+                    revisions: vec![s2],
+                    pass_names: Vec::new(),
+                    trace: founder_trace.clone(),
+                    events: vec![event_record],
+                })
+            } else {
+                None
+            }
+        })
+        .collect();
+
+    Ok(PyFamilyOutcome { tree, node_outcomes })
+}
+
+/// Draw `n_clones` clone sizes (>=1) from a heavy-tailed distribution, with a
+/// fraction `unexpanded_fraction` forced to size 1. Deterministic for `seed`.
+/// `kind` is "power_law" or "lognormal".
+#[pyfunction]
+#[pyo3(signature = (n_clones, seed, kind="power_law", exponent=2.0, mu=1.0, sigma=1.0, max_size=100_000, unexpanded_fraction=0.0))]
+#[allow(clippy::too_many_arguments)]
+pub(crate) fn sample_clone_sizes(
+    n_clones: u32,
+    seed: u64,
+    kind: &str,
+    exponent: f64,
+    mu: f64,
+    sigma: f64,
+    max_size: u32,
+    unexpanded_fraction: f64,
+) -> PyResult<Vec<u32>> {
+    use pyo3::exceptions::PyValueError;
+    if max_size == 0 {
+        return Err(PyValueError::new_err("sample_clone_sizes: max_size must be > 0"));
+    }
+    if !(unexpanded_fraction.is_finite() && (0.0..=1.0).contains(&unexpanded_fraction)) {
+        return Err(PyValueError::new_err(
+            "sample_clone_sizes: unexpanded_fraction must be in [0.0, 1.0]",
+        ));
+    }
+    let dist = match kind {
+        "power_law" => {
+            if !(exponent.is_finite() && exponent > 0.0) {
+                return Err(PyValueError::new_err(
+                    "sample_clone_sizes: power_law exponent must be finite and > 0",
+                ));
+            }
+            crate::lineage::CloneSizeDist::PowerLaw { exponent, x_max: max_size }
+        }
+        "lognormal" => {
+            if !(mu.is_finite() && sigma.is_finite() && sigma >= 0.0) {
+                return Err(PyValueError::new_err(
+                    "sample_clone_sizes: lognormal mu/sigma must be finite and sigma >= 0",
+                ));
+            }
+            crate::lineage::CloneSizeDist::LogNormal { mu, sigma, x_max: max_size }
+        }
+        other => {
+            return Err(PyValueError::new_err(format!(
+                "sample_clone_sizes: unknown kind {other:?}; use \"power_law\" or \"lognormal\""
+            )));
+        }
+    };
+    let mut rng = crate::rng::Rng::new(seed);
+    Ok(crate::lineage::sample_repertoire_sizes(&mut rng, n_clones, &dist, unexpanded_fraction))
+}
diff --git a/engine_rs/src/python/mod.rs b/engine_rs/src/python/mod.rs
index fbb9e43..8186fdb 100644
--- a/engine_rs/src/python/mod.rs
+++ b/engine_rs/src/python/mod.rs
@@ -28,6 +28,7 @@ use pyo3::prelude::*;
 pub mod compiled;
 pub mod contract;
 pub mod event;
+pub mod lineage;
 pub mod outcome;
 pub mod plan;
 pub mod refdata;
@@ -47,6 +48,13 @@ pub(crate) fn register(m: &Bound<'_, PyModule>) -> PyResult<()> {
     m.add_class::<event::PyStateSummary>()?;
     m.add_class::<event::PyEventRecord>()?;
     m.add_class::<outcome::PyOutcome>()?;
+    m.add_class::<lineage::PyLineageNode>()?;
+    m.add_class::<lineage::PyLineageTree>()?;
+    m.add_class::<lineage::PyFamilyOutcome>()?;
+    m.add_function(pyo3::wrap_pyfunction!(lineage::simulate_lineage, m)?)?;
+    m.add_function(pyo3::wrap_pyfunction!(lineage::simulate_family_outcomes, m)?)?;
+    m.add_function(pyo3::wrap_pyfunction!(lineage::merge_lineage_corruption, m)?)?;
+    m.add_function(pyo3::wrap_pyfunction!(lineage::sample_clone_sizes, m)?)?;
     m.add_class::<refdata::PyAllele>()?;
     m.add_class::<refdata::PyRefDataConfig>()?;
     m.add_class::<plan::PyPassPlan>()?;
diff --git a/engine_rs/src/python/outcome.rs b/engine_rs/src/python/outcome.rs
index 943b62f..34c0e8e 100644
--- a/engine_rs/src/python/outcome.rs
+++ b/engine_rs/src/python/outcome.rs
@@ -225,7 +225,7 @@ impl PyOutcome {
 /// `Option<i64>` becomes `int | None`, `Option<f64>` becomes `float
 /// | None`, `Option<bool>` becomes `bool | None`. Strings and the
 /// few `bool`/`i64`/`f64` non-optional fields go through directly.
-fn airr_record_to_pydict<'py>(py: Python<'py>, rec: &AirrRecord) -> PyResult<Bound<'py, PyDict>> {
+pub(crate) fn airr_record_to_pydict<'py>(py: Python<'py>, rec: &AirrRecord) -> PyResult<Bound<'py, PyDict>> {
     let dict = PyDict::new_bound(py);
 
     // AIRR metadata
diff --git a/mkdocs.yml b/mkdocs.yml
index f4470f8..d864b06 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -204,7 +204,9 @@ nav:
       - Biology map: guides/biology-map.md
       - Recombination + junction biology: guides/recombination-junction.md
       - D inversion + receptor revision: guides/recombination-editing.md
-      - Clonal families: guides/clonal-families.md
+      - Clonal simulation overview: guides/clonal-families.md
+      - Clonal lineage trees: guides/clonal-lineage.md
+      - Clonal repertoires (TCR & abundance): guides/clonal-repertoire.md
       - Junction N/P additions: guides/junction-additions.md
       - Targeted SHM rates: guides/shm-targeting.md
       - Corruption + sequencing artefacts: guides/corruption-sequencing.md
diff --git a/site_docs/assets/clonal-lineage-detection.png b/site_docs/assets/clonal-lineage-detection.png
new file mode 100644
index 0000000..1f07577
Binary files /dev/null and b/site_docs/assets/clonal-lineage-detection.png differ
diff --git a/site_docs/concepts/airr-record.md b/site_docs/concepts/airr-record.md
index 87eb982..0960592 100644
--- a/site_docs/concepts/airr-record.md
+++ b/site_docs/concepts/airr-record.md
@@ -59,7 +59,7 @@ The canonical groups, in the order they appear on the record:
 11. D-inversion provenance
 12. Receptor-revision provenance
 13. Read layout (paired-end)
-14. Clonal stamping (when expand_clones runs)
+14. Clonal stamping (when a clonal workflow runs)
 15. Truth columns (when `expose_provenance=True`)
 
 The remaining sections walk through each.
@@ -127,7 +127,7 @@ load-bearing in two cases:
 
 - **Family validation** uses them to confirm every descendant of
   a clonal family shares the recombination-time identity (see
-  [Clonal families](../guides/clonal-families.md)).
+  [Clonal simulation overview](../guides/clonal-families.md)).
 - **Aligner benchmarking** treats `v_call` as the aligner's
   prediction and `truth_*_call` as the ground truth, even when
   both came from the same simulator — the column split makes the
@@ -343,20 +343,25 @@ for the quality model and the layout coordinates.
 
 ## Clonal fields
 
-Two fields stamped Python-side when `.expand_clones(n_clones=...,
-per_clone=...)` runs:
-
-| Field | Type | Meaning |
-|---|---|---|
-| `clone_id` | int | Identifier of the clonal family this record belongs to (0-based) |
-| `parent_id` | int | Identifier of the ancestor outcome; equals `clone_id` for the records expanded from that ancestor |
-
-On non-clonal runs (no `expand_clones`), both fields are
-**absent** from the record dict. Don't write code that assumes
-they're always present; the family validators (`validate_families`,
-`validate_families_with_parents`) are safe no-ops on non-clonal
-batches because they handle the absent case explicitly. See
-[Clonal families](../guides/clonal-families.md).
+All modern clonal workflows stamp `clone_id`, but the rest of the
+surface depends on which clonal model produced the record:
+
+| Field | Type | Produced by | Meaning |
+|---|---|---|---|
+| `clone_id` | int | `clonal_lineage`, `clonal_repertoire`, `expand_clones` | Planted clone / family label (0-based) |
+| `duplicate_count` | int | `clonal_lineage`, `clonal_repertoire` | AIRR-standard abundance after genotype collapse |
+| `parent_id` | int | legacy `expand_clones` | Identifier of the ancestor `Outcome`; equals `clone_id` for records expanded from that ancestor |
+| `lineage_node_id` | int | `clonal_lineage` | Node id of the observed cell in the ground-truth lineage tree |
+| `lineage_parent_id` | int | `clonal_lineage` | Parent node id in the lineage tree (`-1` for founder) |
+| `lineage_generation` | int | `clonal_lineage` | Generation depth of the observed cell |
+| `lineage_abundance` | int | `clonal_lineage` | Observation count after final-cell sampling and genotype collapse |
+| `lineage_affinity` | float | `clonal_lineage` | Sequence-distance proxy to the target; 0 only when no affinity model is active |
+
+On non-clonal runs, these fields are **absent** from the record dict. Don't write
+code that assumes they're always present; check with `"clone_id" in rec`.
+`validate_families()` is a safe no-op on non-clonal batches because it handles
+the absent case explicitly. See
+[Clonal simulation overview](../guides/clonal-families.md).
 
 ## Validation
 
@@ -369,10 +374,9 @@ The records page composes cleanly with the validation surface:
 - **`validate_families(refdata=None)`** groups records by
   `clone_id` and asserts each family agrees on `truth_v_call`,
   `truth_d_call`, `truth_j_call` (when present).
-- **`validate_families_with_parents(refdata)`** compares each
-  descendant against its actual parent `Outcome` — catches a
-  whole class of family-stamping bugs the cheaper validator
-  misses.
+- **`validate_families_with_parents(refdata)`** is for legacy
+  `expand_clones` results with `result.parents`; it compares each
+  descendant against its actual parent `Outcome`.
 
 The full validation picture lives at the
 [Validation hub](../validation/index.md).
@@ -423,10 +427,11 @@ appear only when `expose_provenance=True` is passed to
 `run_records(...)`. Without the flag, the columns are absent
 entirely — not `None`-valued, absent.
 
-**Expecting `clone_id` on non-clonal records.** Without
-`.expand_clones(...)`, neither `clone_id` nor `parent_id` is
-stamped on the record dict at all. Check for presence with
-`"clone_id" in rec`, not `rec.get("clone_id") is not None`.
+**Expecting `clone_id` on non-clonal records.** Without a clonal
+workflow (`clonal_lineage`, `clonal_repertoire`, or legacy
+`expand_clones`), clonal fields are not stamped on the record dict at
+all. Check for presence with `"clone_id" in rec`, not
+`rec.get("clone_id") is not None`.
 
 ## Where to go next
 
@@ -438,7 +443,7 @@ stamped on the record dict at all. Check for presence with
   — the SHM counters in depth.
 - **[Corruption and sequencing artefacts](../guides/corruption-sequencing.md)**
   — the artefact counters in depth.
-- **[Clonal families](../guides/clonal-families.md)** — the
-  `clone_id` / `parent_id` surface and family validation.
+- **[Clonal simulation overview](../guides/clonal-families.md)** —
+  `clone_id`, `duplicate_count`, lineage metadata, and family validation.
 - **[Validation hub](../validation/index.md)** — re-deriving every
   field from the underlying `Outcome`.
diff --git a/site_docs/demo.md b/site_docs/demo.md
index 57fc53f..33810ef 100644
--- a/site_docs/demo.md
+++ b/site_docs/demo.md
@@ -287,10 +287,14 @@ ValueError: replay_from_trace_file: pass plan signature mismatch.
 
 ---
 
-## 4. Clonal families
+## 4. Legacy fixed-size clonal families
 
 One parent recombination can fork into many independently mutated
-descendants.
+descendants. This demo uses legacy `expand_clones` for the simple
+fixed-size star shape; for new clone benchmarks, see
+[`clonal_lineage`](guides/clonal-lineage.md) for BCR trees and
+[`clonal_repertoire`](guides/clonal-repertoire.md) for TCR / abundance
+repertoires.
 
 <div class="demo-run-card" markdown="1">
 <div class="demo-cell-label">Code</div>
diff --git a/site_docs/guides/biology-map.md b/site_docs/guides/biology-map.md
index 372a0bb..6d4b703 100644
--- a/site_docs/guides/biology-map.md
+++ b/site_docs/guides/biology-map.md
@@ -37,7 +37,9 @@ better starting point.
 | **End-loss (5′ / 3′)** | `.end_loss_5prime(length=...)`, `.end_loss_3prime(length=...)` (or `primer_trim_*prime` aliases) | Library / sequencing artefact — descendant | `end_loss_5_length`, `end_loss_3_length` | [Corruption + sequencing artefacts](corruption-sequencing.md) |
 | **Random strand orientation** | `.random_strand_orientation(prob=0.5)` | Read layout — descendant | `rev_comp` | [Corruption + sequencing artefacts](corruption-sequencing.md) |
 | **Paired-end layout** | `.paired_end(r1_length=..., insert_size=...)` | Read layout — descendant | `read_layout`, `r1_sequence`, `r2_sequence`, `r1_start`, `r1_end`, `r2_start`, `r2_end`, `insert_size` | [Paired-end reads and FASTQ](paired-end-fastq.md) |
-| **Clonal expansion** | `.expand_clones(n_clones=..., per_clone=...)` | Ancestor / descendant fork | `clone_id`, `parent_id` (stamped Python-side) | [Clonal families](clonal-families.md) |
+| **BCR lineage trees** | `.clonal_lineage(...)` | BCR affinity-maturation tree | `clone_id`, `lineage_*`, `duplicate_count`, `result.lineage_trees` | [Clonal lineage trees](clonal-lineage.md) |
+| **TCR / flat-BCR clone-size repertoires** | `.clonal_repertoire(...)` | Non-tree clonal abundance | `clone_id`, `duplicate_count` | [Clonal repertoires](clonal-repertoire.md) |
+| **Legacy fixed-size clonal stars** | `.expand_clones(n_clones=..., per_clone=...)` | Ancestor / descendant fork | `clone_id`, `parent_id`, `result.parents` | [Clonal simulation overview](clonal-families.md) |
 | **Contamination** | `.contaminate(prob=...)` | Library / sequencing artefact — descendant | `is_contaminant` | [Experiment builder](experiment-builder.md) |
 | **Sample metadata** | `.with_metadata(**fields)` | Bookkeeping — post-run | Arbitrary user-stamped columns | [Experiment builder](experiment-builder.md) |
 
@@ -56,24 +58,32 @@ insertion. Productivity constraints (`productive_only`,
 `.receptor_revision()` edit the just-recombined molecule. Each
 can fire at most once per record.
 
-**3. Ancestor / descendant fork (clonal pipelines only).**
-`.expand_clones()` partitions the pipeline. Everything before
-the fork runs once per ancestor; everything after fires per
-descendant.
+**3. Clonal structure (optional).** Choose one clonal surface:
+`clonal_lineage()` for BCR trees, `clonal_repertoire()` for TCR /
+flat-BCR clone-size repertoires, or legacy `expand_clones()` for a
+fixed-size star. For flat forks, everything before the fork runs once
+per clone and everything after fires per emitted copy. For
+`clonal_lineage`, the tree growth and SHM happen inside the lineage
+engine.
 
 **4. Biology — descendant phase.** `.mutate(...)` accumulates
-biological SHM on top of recombination. On clonal pipelines
-this fires *after* `expand_clones`; SHM is per-descendant, not
-shared across the family.
+biological SHM on top of recombination. On flat clonal pipelines
+(`clonal_repertoire` / `expand_clones`) this fires after the fork so
+SHM is per copy, not shared across the clone. TCR refdata rejects
+`.mutate(...)`. `clonal_lineage` has its own tree-internal SHM rate.
 
 **5. Library / sequencing artefacts + read layout —
 descendant phase.** All corruption passes (`pcr_amplify`,
 `sequencing_errors`, `ambiguous_base_calls`, `polymerase_indels`,
 `end_loss_5prime`, `end_loss_3prime`, `random_strand_orientation`,
 `contaminate`) plus the read-layout projection
-(`paired_end`). On clonal pipelines these must come after
-`.expand_clones()`; calling any of them *before* the fork raises
-`ValueError`.
+(`paired_end`). On flat clonal pipelines these must come after the
+fork; calling any of them before `clonal_repertoire()` or
+`expand_clones()` raises `ValueError`. `clonal_lineage` accepts the
+corruption passes after the fork but not `paired_end` yet. With
+`clonal_repertoire`, paired-end records remain abundance-collapsed:
+`duplicate_count` carries copy number and FASTQ export does not expand
+it into repeated read pairs.
 
 **Per-batch bookkeeping** (`.with_metadata(...)`) stamps the
 result after every other stage has run.
@@ -84,8 +94,8 @@ The two main ordering invariants:
   mutation; the corruption passes model the wet lab. Reversing
   the order would model SHM mutating an already-corrupted
   sequence, which doesn't match reality.
-- **All descendant-phase passes follow `expand_clones`.** That's
-  what makes them per-descendant. Putting them earlier would
+- **All descendant/read-phase passes follow flat clonal forks.**
+  That's what makes them per emitted copy. Putting them earlier would
   share their effects across the whole family.
 
 ## Cartridge-controlled vs Experiment-controlled
@@ -135,7 +145,8 @@ The `Experiment` DSL carries the experimental design:
 - **Targeting overrides** — `segment_rates`, `v_subregion_rates`
   on `mutate(...)`; per-experiment `trim(v_3=..., d_5=..., ...)`
   distributions that override the cartridge defaults.
-- **Clonal structure** — `expand_clones(n_clones, per_clone)`.
+- **Clonal structure** — `clonal_lineage(...)`,
+  `clonal_repertoire(...)`, or legacy `expand_clones(...)`.
 - **Read layout** — `paired_end(...)`,
   `random_strand_orientation(...)`.
 - **Run-time flags** — `strict`, `expose_provenance`,
@@ -170,10 +181,10 @@ priors.
 - **Paired-end geometry** — when `read_layout == "paired_end"`,
   R1/R2 coordinates checked against `insert_size`; reads are
   consistent with their parent assembled sequence.
-- **Family invariants** — `validate_families` and
-  `validate_families_with_parents` assert each `clone_id` group
-  agrees on `truth_v_call` / `truth_d_call` / `truth_j_call`.
-  The parent-aware form additionally compares descendants
+- **Family invariants** — `validate_families` groups by `clone_id`
+  and, when truth columns are present, asserts each group agrees on
+  `truth_v_call` / `truth_d_call` / `truth_j_call`. The parent-aware
+  form additionally compares legacy `expand_clones` descendants
   against their actual parent `Outcome`.
 
 **NOT validated:**
diff --git a/site_docs/guides/clonal-families.md b/site_docs/guides/clonal-families.md
index ba6847a..d1c4693 100644
--- a/site_docs/guides/clonal-families.md
+++ b/site_docs/guides/clonal-families.md
@@ -1,314 +1,298 @@
-# Generate clonal families
-
-<p class="lead">A clonal family is one parent recombination plus
-many descendants that share the parent's V(D)J truth but diverge
-through somatic hypermutation, library prep, and sequencing
-artefacts. <code>expand_clones</code> is the DSL marker that turns
-a flat pipeline into a clonal one — and once it's in the chain,
-the API rejects misordered steps so you can't accidentally collapse
-SHM diversity or split a recombination decision across descendants.</p>
-
-## What clonal simulation means
-
-In real biology, a clonal family is the lineage descended from a
-single B cell whose V(D)J recombination has happened. Every cell
-in that lineage shares the same V/D/J allele choices, the same
-trims, the same NP bases, the same D orientation — those are
-*recombination-time* decisions, frozen at fork. What diverges
-between cousins is somatic hypermutation (each B cell mutates
-independently), library prep artefacts (PCR errors don't cross
-between reads), and sequencing geometry (R1/R2 windows are per-read).
-
-GenAIRR models that with a single DSL marker, `expand_clones`. The
-methods you append BEFORE it run *once per clone* (and that
-outcome is shared across every descendant). The methods you
-append AFTER run *once per descendant* (independent draws per
-read).
-
-## A minimal clonal example
+# Clonal simulation overview
 
-```python
-import GenAIRR as ga
+<p class="lead">GenAIRR has three clonal surfaces. Use
+<code>clonal_lineage</code> when you need B-cell affinity-maturation trees,
+<code>clonal_repertoire</code> when you need TCR or flat-BCR clone-size /
+abundance repertoires, and legacy <code>expand_clones</code> only when you
+need the older fixed-size star model. All three stamp planted clone labels so
+AIRR clone-calling and ML benchmarks can compare inferred groups against the
+truth the simulator created.</p>
 
-result = (
-    ga.Experiment.on("human_igh")
-      .recombine()                                          # ancestor phase
-      .expand_clones(n_clones=5, per_clone=10)              # fork
-      .mutate(model="s5f", rate=0.02)                       # descendant phase
-      .run_records(seed=1)
-)
+## Choose the right clonal model
 
-print(len(result))            # 50  = 5 clones × 10 descendants
-print(result[0]["clone_id"], result[0]["parent_id"])    # 0 0
-print(result[10]["clone_id"])                            # 1
-```
+| Use case | DSL | Biology | Output truth |
+|---|---|---|---|
+| **BCR lineage reconstruction / affinity maturation** | [`clonal_lineage(...)`](clonal-lineage.md) | Generation-synchronous B-cell tree, per-division S5F SHM, optional sequence-distance selection, final live-cell sampling | AIRR records with `clone_id`, `lineage_*`, `duplicate_count`; one `LineageTree` per clone |
+| **TCR clone-size / abundance benchmark** | [`clonal_repertoire(...)`](clonal-repertoire.md) | One rearranged T cell copied to a heavy-tailed clone size; no SHM; optional per-read technical noise | AIRR records with `clone_id` and AIRR `duplicate_count` |
+| **Flat BCR abundance without genealogy** | [`clonal_repertoire(...)`](clonal-repertoire.md) | One BCR rearrangement copied to a heavy-tailed size; optional flat post-fork SHM per copy | AIRR records with `clone_id` and `duplicate_count` |
+| **Legacy fixed-size star** | `expand_clones(...)` | One parent rearrangement and a fixed `per_clone` descendant count | AIRR records with `clone_id`, `parent_id`; parent `Outcome`s on `result.parents` |
 
-`expand_clones(n_clones=5, per_clone=10)` produces exactly 5 × 10 =
-50 records. Note that `run_records(seed=1)` doesn't take an `n`
-argument — the clonal pipeline computes its own total. You can
-pass `n=50` as a consistency check, but any other value (including
-`n=100`) raises `ValueError` at run time. The pipeline knows how
-many records it produces; you don't override it.
+`expand_clones` is still supported for old scripts, but new clone-related
+benchmarks should usually start with `clonal_lineage` or `clonal_repertoire`.
+Those two encode the distinction AIRR users usually care about: BCR lineages are
+mutation trees, while TCR clones are abundance groups with technical read noise.
 
-## Ancestor vs descendant phase
+## Shared DSL shape
 
-The DSL partitions steps into two phases:
+Clonal workflows all start by creating one founder rearrangement per clone:
 
-**Ancestor-phase** — runs once per clonal parent. Use for any
-mechanism whose decision must be inherited by every descendant
-of a family.
+```python
+ga.Experiment.on("human_igh").recombine()
+```
 
-| Pass | Why ancestor |
-|---|---|
-| `.recombine()` | V/D/J allele choices + trims + NP define the family's identity |
-| `.invert_d(prob=...)` | D orientation is a recombination-time event; the family shares it |
-| `.receptor_revision(prob=...)` | V replacement happens once during B-cell development; the family shares the post-revision V |
+Everything before the clonal fork runs once per clone. Everything after a flat
+fork (`clonal_repertoire` or `expand_clones`) runs once per emitted read/copy.
+`clonal_lineage` is different: it grows the SHM tree internally, then optional
+library-prep / sequencing artefact passes run once per observed cell.
 
-**Descendant-phase** — runs independently per descendant. Use for
-mechanisms that should vary within a family.
+Only one clonal fork is allowed in a pipeline:
 
-| Pass | Why descendant |
-|---|---|
-| `.mutate(...)` | Each memory B cell mutates independently |
-| `.pcr_amplify(...)` | PCR errors are read-specific |
-| `.polymerase_indels(...)` | Same — per-read library artefacts |
-| `.ambiguous_base_calls(...)` | Per-read N-injection |
-| `.sequencing_errors(...)` | Per-read quality artefacts |
-| `.end_loss_5prime(...)`, `.end_loss_3prime(...)` | Per-read 5'/3' adapter loss |
-| `.random_strand_orientation(...)` | Per-read strand decision |
-| `.paired_end(...)` | R1/R2 windows are per-read |
-
-The DSL enforces ordering at chain time — not at compile time, not
-silently at run time. If you call a descendant-phase method
-*before* `expand_clones`, the next call to `expand_clones` raises:
-
-```text
-ValueError: mutate must be called after expand_clones();
-SHM is descendant-specific in GenAIRR's current clonal model.
-Move mutate(...) after expand_clones(...).
+```python
+.clonal_lineage(...)
+.clonal_repertoire(...)
+.expand_clones(...)
 ```
 
-If you call an ancestor-phase method (`invert_d` / `receptor_revision`)
-*after* `expand_clones`, the offending method itself raises with
-the symmetric message:
+are mutually exclusive.
 
-```text
-ValueError: invert_d must be called before expand_clones();
-D inversion is a recombination-time decision and must be inherited
-by all clone descendants. Move the invert_d(...) call before
-expand_clones(...).
-```
+`run_records(n=...)` is not the way to set clone output size for modern clonal
+models. Use the clonal parameters instead:
 
-A second call to `expand_clones` raises `expand_clones() can only
-be called once per pipeline`.
-
-In practice the rule of thumb is: anything biology fixed once per
-cell goes before the fork, anything happening per read goes after.
-The DSL catches violations the moment you write them.
+| Model | Output-size knobs |
+|---|---|
+| `clonal_lineage` | `n_clones`, `max_generations`, `n_max`, `n_sample`, extinction/survival, genotype collapse |
+| `clonal_repertoire` | `n_clones`, `size_distribution`, `max_size`, `unexpanded_fraction`, genotype collapse |
+| `expand_clones` | `n_clones × per_clone` fixed descendants |
 
-## Reading clone IDs and parents
+## BCR lineage trees
 
-Every descendant record carries two integer fields that wire it
-back to its clonal family:
+Use `clonal_lineage` when you need a real B-cell genealogy:
 
 ```python
-rec = result[0]
-
-rec["clone_id"]    # 0  — family identity; every descendant of clone 0 carries 0
-rec["parent_id"]   # 0  — addressing index into result.parents
-```
+import GenAIRR as ga
 
-**Today `clone_id == parent_id` by construction** — the two are
-stamped separately because they carry distinct semantics
-(`clone_id` is the family identity, `parent_id` is the lookup
-index into the parents collection), so a future change to the
-addressing scheme can move one without retrofitting the other.
-Treat them as a pair; address downstream joins by `clone_id`.
+result = (
+    ga.Experiment.on("human_igh")
+      .recombine()
+      .clonal_lineage(
+          n_clones=20,
+          max_generations=6,
+          n_max=300,
+          n_sample=30,
+          rate=0.01,
+          lambda_base=1.6,
+          selection_strength=10.0,
+      )
+      .sequencing_errors(rate=0.005)
+      .run_records(seed=0, validate_records=True)
+)
 
-The parent outcomes live separately on the result:
+rec = result.records[0]
+print(rec["clone_id"], rec["lineage_node_id"], rec["lineage_generation"])
+print(rec["lineage_abundance"], rec["duplicate_count"])
 
-```python
-result.parents                       # list of length n_clones
-len(result.parents)                  # 5
-
-parent_outcome = result.parents[rec["parent_id"]]
-# Outcome carrying the pre-fork plan's full trace + event ledger
-parent_outcome.final_simulation()    # post-recombine IR
-parent_outcome.trace()               # pre-fork addressed-choice trace
-parent_outcome.events()              # pre-fork event ledger
+tree = result.lineage_trees[rec["clone_id"]]
+newick = tree.to_newick()
+node_table = tree.to_node_table_tsv()
 ```
 
-The flat `result.outcomes` list continues to hold exactly one
-entry per descendant record (length `n_clones × per_clone`);
-parents are exposed on the separate `.parents` collection so
-clonal consumers get extra information without changing the
-descendant-list shape.
+What happens:
 
-!!! info "Non-clonal results"
-    `result.parents` is `None` when `expand_clones` is NOT in the
-    pipeline. Record dicts from non-clonal runs also have
-    `record.get("clone_id") is None` and `record.get("parent_id")
-    is None`. Code that may receive either shape can safely use
-    `result.parents or []` and `record.get("clone_id")` without
-    branching on clonal-ness.
+1. `recombine()` creates one naive BCR founder per clone.
+2. The Rust lineage engine grows a tree for `max_generations`, with live-cell
+   carrying capacity `n_max`.
+3. Each child receives per-division S5F SHM at `rate`.
+4. If selection is enabled, offspring rates are modulated by a BLOSUM62
+   sequence-distance proxy, not a physical antigen-binding model.
+5. `n_sample` cells are sampled from the living final-generation population and
+   identical genotypes are collapsed into `lineage_abundance` /
+   `duplicate_count`.
 
-## Validation
+`clonal_lineage` is BCR-only. Calling it on TCR refdata raises `ValueError`
+because T cells do not somatically hypermutate.
 
-The validation hub covers the layers in depth; here's the clonal
-slice. Three calls compose for the strongest gate:
+Deep dive: [Clonal lineage trees](clonal-lineage.md).
+
+## TCR and flat-BCR abundance repertoires
+
+Use `clonal_repertoire` when the clone truth is membership and abundance, not a
+lineage tree:
 
 ```python
-# 1. Per-record AIRR consistency.
-record_report = result.validate_records(refdata)
-assert record_report, record_report.summary()
+import GenAIRR as ga
 
-# 2. Within-family invariants (no refdata required).
-family_report = result.validate_families()
-assert family_report, family_report.summary()
+result = (
+    ga.Experiment.on("human_tcrb")
+      .allow_curatable_refdata()
+      .recombine()
+      .clonal_repertoire(
+          n_clones=200,
+          size_distribution="power_law",
+          exponent=2.0,
+          max_size=500,
+          unexpanded_fraction=0.5,
+      )
+      .sequencing_errors(rate=0.005)
+      .run_records(seed=0, validate_records=True)
+)
 
-# 3. Parent-aware checks: descendant truth fields agree with their parent.
-full_report = result.validate_families_with_parents(refdata)
-assert full_report, full_report.summary()
+for rec in result.records[:5]:
+    print(rec["clone_id"], rec["duplicate_count"], rec["v_call"], rec["j_call"])
 ```
 
-What each adds:
-
-- **`validate_families()`** groups records by `clone_id` and asserts
-  the recombination-time truth fields (`truth_v_call`, `truth_d_call`,
-  `truth_j_call`) are invariant across every descendant of a clone.
-  No refdata required; works on records-only results (e.g.
-  round-tripped from TSV). On a non-clonal batch (no record carries
-  a non-null `clone_id`) it's a safe no-op and returns ok with
-  `family_count == 0` — you don't need to branch on whether the
-  pipeline was clonal.
-- **`validate_families_with_parents(refdata)`** adds structural
-  checks (`ParentsMissing` / `ParentIdMissing` / `ParentIdOutOfRange`)
-  and value-comparison checks against the actual parent `Outcome`
-  on `result.parents` (`ParentDInvertedMismatch`,
-  `ParentOriginalVCallMismatch`, `ParentTruthVCallMismatch`,
-  etc.). It does NOT re-run the within-family checks
-  `validate_families` does; it's a sibling validator, not a
-  superset.
-
-The runtime opt-in `run_records(..., validate_records=True)` runs
-the per-record validator *and* the field-level family checks — but
-NOT the parent-aware checks. If you want
-`validate_families_with_parents` in CI, call it explicitly:
+What happens:
 
-```python
-result = exp.run_records(seed=42, validate_records=True)
-assert result.validate_families_with_parents(refdata), "parent mismatch"
-```
+1. `recombine()` creates one rearrangement per clone.
+2. A clone size is drawn from a heavy-tailed distribution: rounded continuous
+   power-law by default, or log-normal.
+3. That many copies pass through post-fork per-read passes such as
+   `sequencing_errors`, `pcr_amplify`, `polymerase_indels`, `end_loss_*`,
+   `ambiguous_base_calls`, `random_strand_orientation`, or `paired_end`.
+4. Identical output sequences collapse into one AIRR record whose
+   `duplicate_count` is the represented abundance.
 
-## Common workflows
+For TCR, do not add `.mutate(...)`; the API rejects it. TCR within-clone sequence
+diversity should come from technical artefact passes only. For flat BCR
+abundance, you may add post-fork `.mutate(...)`, but that is independent SHM off
+the founder, not a tree.
 
-A few patterns that come up repeatedly with clonal output.
+Deep dive: [Clonal repertoires](clonal-repertoire.md).
 
-**Clone-level SHM benchmark.** Hold the parent V(D)J fixed and
-let SHM diverge:
+## Legacy fixed-size stars
+
+`expand_clones` remains available for old fixed-size star benchmarks:
 
 ```python
 result = (
     ga.Experiment.on("human_igh")
       .recombine()
-      .expand_clones(n_clones=100, per_clone=50)
-      .mutate(model="s5f", rate=0.05)
-      .run_records(seed=42)
+      .expand_clones(n_clones=5, per_clone=10)
+      .mutate(model="s5f", rate=0.02)
+      .run_records(seed=1)
 )
 
-# Per-clone SHM distribution:
-import pandas as pd
-df = pd.DataFrame(result.records)
-df.groupby("clone_id")["n_mutations"].describe()
+print(len(result.records))                         # 50
+print(result.records[0]["clone_id"])               # 0
+print(result.records[0]["parent_id"])              # 0
+parent = result.parents[result.records[0]["parent_id"]]
 ```
 
-**Parent / descendant comparison.** Pull the parent IR for each
-descendant and compare against the post-SHM sequence:
+`expand_clones` records carry `parent_id` and `result.parents` because the old
+star model keeps an explicit parent `Outcome`. Modern `clonal_repertoire` does
+not expose `parents`; it collapses abundance into records. `clonal_lineage`
+exposes lineage truth through `lineage_*` fields and `result.lineage_trees`
+instead.
+
+## Output fields by model
+
+| Field / object | `clonal_lineage` | `clonal_repertoire` | `expand_clones` |
+|---|---|---|---|
+| `clone_id` | Yes: planted family label | Yes: planted clone label | Yes: planted family label |
+| `duplicate_count` | Yes: alias of `lineage_abundance` after final-cell sampling | Yes: collapsed abundance | No standard abundance field in the legacy star model |
+| `lineage_node_id` / `lineage_parent_id` / `lineage_generation` | Yes | No | No |
+| `lineage_abundance` / `lineage_affinity` | Yes | No | No |
+| `parent_id` | No; use `lineage_parent_id` for tree parent | No | Yes |
+| `result.parents` | No | No | Yes |
+| `result.lineage_trees` | Yes | No | No |
+| `result.outcomes` | One per observed record | One per emitted/collapsed record | One per descendant record |
+
+For external clone-calling tools, keep the planted label under a non-AIRR name so
+it does not collide with the tool's inferred `clone_id`:
 
 ```python
-for rec in result.records:
-    parent = result.parents[rec["parent_id"]]
-    parent_seq = parent.final_simulation().bases()
-    # rec["sequence"] is the post-SHM descendant; parent_seq is
-    # the pre-fork assembled IR.
+import pandas as pd
+
+df = pd.DataFrame(result.records).rename(columns={"clone_id": "true_clone_id"})
+df.to_csv("repertoire.tsv", sep="\t", index=False)
 ```
 
-**Export records with clone IDs.** Every export format carries the
-`clone_id` and `parent_id` columns as-is — they ship with the
-record dict:
+`duplicate_count` is the AIRR-standard abundance column consumed by
+abundance-aware workflows. `clonal_repertoire` and `clonal_lineage` emit it
+directly.
+
+## Validation
+
+Use record validation on every clonal workflow:
 
 ```python
-result.to_tsv("repertoire.tsv")            # clone_id + parent_id columns included
-df = result.to_dataframe()                 # same in the DataFrame
-result.to_fasta("seqs.fa", prefix="seq")   # headers include sequence_id; clone IDs in TSV
+result = exp.run_records(seed=42, validate_records=True)
 ```
 
-**Paired FASTQ from clonal output.** `paired_end` is descendant-
-phase, so it composes with `expand_clones` naturally:
+or explicitly:
 
 ```python
-result = (
-    ga.Experiment.on("human_igh")
-      .recombine()
-      .expand_clones(n_clones=50, per_clone=20)
-      .mutate(model="s5f", rate=0.05)
-      .paired_end(r1_length=150, insert_size=300)
-      .run_records(seed=1)
-)
+record_report = result.validate_records(refdata)
+assert record_report, record_report.summary()
+```
+
+Family validation is records-only and works across all clonal models that carry
+`clone_id`:
 
-result.to_paired_fastq("reads_R1.fastq", "reads_R2.fastq")
-# 1,000 R1 records + 1,000 R2 records, sequence_ids match
+```python
+family_report = result.validate_families()
+assert family_report, family_report.summary()
 ```
 
+Currently `validate_families()` checks that every record in a clonal batch has a
+`clone_id` and, when `truth_v_call` / `truth_d_call` / `truth_j_call` are present
+from `expose_provenance=True`, that those truth calls are invariant within each
+clone. It does not validate lineage topology, clone-size priors, or biological
+realism.
+
+`validate_families_with_parents(refdata)` is specific to legacy
+`expand_clones`, because it compares descendant records against
+`result.parents`. For `clonal_lineage`, validate the tree objects directly with
+`tree.validate()` and use the exported Newick/FASTA/node table for lineage-tool
+scoring.
+
+## Ordering rules
+
+Ancestor-phase steps go before the clonal fork:
+
+| Step | Why |
+|---|---|
+| `.recombine()` | Defines the clone's V/D/J, trims, NP sequence, and junction |
+| `.invert_d(...)` | Recombination-time D orientation; inherited by the clone |
+| `.receptor_revision(...)` | Recombination/development-time V replacement; inherited by the clone |
+| `.productive_only()` / `.restrict_alleles(...)` | Constraints on the founder draw |
+
+Descendant/read-phase steps go after a flat fork (`clonal_repertoire` or
+`expand_clones`):
+
+| Step | Notes |
+|---|---|
+| `.mutate(...)` | BCR-only flat SHM; not allowed on TCR |
+| `.pcr_amplify(...)`, `.sequencing_errors(...)`, `.polymerase_indels(...)` | Per-read technical artefacts |
+| `.ambiguous_base_calls(...)`, `.end_loss_*prime(...)`, `.random_strand_orientation(...)` | Per-read observation artefacts |
+| `.paired_end(...)` | Supported after legacy `expand_clones`; accepted after `clonal_repertoire` with abundance-collapse caveats; not yet supported after `clonal_lineage` |
+
+For `clonal_lineage`, do not add `.mutate(...)` afterward: SHM is internal to the
+tree and controlled by `clonal_lineage(rate=...)`. Library-prep and sequencing
+artefact passes may follow; `paired_end` is still a future addition for lineage
+output.
+
+When `paired_end` follows `clonal_repertoire`, records are still collapsed by
+assembled `sequence` and carry `duplicate_count`. TSV/DataFrame output preserves
+that abundance. FASTQ exporters do not expand `duplicate_count` back into multiple
+read pairs, so use this path for paired fields on collapsed records, not for exact
+per-copy paired FASTQ depth.
+
 ## Common mistakes
 
-A handful of issues that show up with clonal pipelines.
-
-**Calling `invert_d()` after `expand_clones()`.** D orientation is
-a recombination-time decision; it must be inherited by every
-descendant. `invert_d` rejects this immediately at chain time with
-"D inversion is a recombination-time decision and must be
-inherited by all clone descendants." Move it before
-`expand_clones`. Same goes for `receptor_revision`.
-
-**Putting `.paired_end()` before `expand_clones()`.** R1/R2 windows
-are per-read; placing `paired_end` in the ancestor phase would
-share one R1/R2 window across the whole family. `expand_clones`
-scans the prior steps when called and rejects descendant-phase
-methods that appear before it: "paired_end must be called after
-expand_clones(); it is descendant-specific and must be sampled
-independently for each clone member."
-
-**Expecting child traces to include the full parent trace.** A
-descendant `Outcome.trace()` carries only the post-fork plan's
-addressed choices — SHM substitutions, PCR errors, indels, etc.
-The pre-fork plan's trace (recombination choices, NP bases, D
-inversion) lives on `result.parents[i].trace()` instead. They're
-two separate addressing namespaces because the pre-fork plan ran
-once per parent and the post-fork plan ran once per descendant.
-
-**Expecting mutation-distance aggregation fields today.** The
-clonal validator's per-clone mutation-distance distribution and
-pre-SHM junction invariance checks are deliberately deferred
-(future-slice scope). Today's `validate_families` checks the
-recombination-time truth fields are invariant within a clone; it
-does NOT verify that descendant SHM counts cluster around a
-biologically plausible distribution. If you need that, compute it
-yourself from `df.groupby("clone_id")["n_mutations"]`.
+**Using `clonal_lineage` for TCR.** TCR clones do not SHM. Use
+`clonal_repertoire` for TCR clone-size and abundance benchmarks.
+
+**Expecting exact discrete Zipf from `clonal_repertoire`.** The default
+`power_law` sampler is a rounded continuous inverse-CDF draw. It is heavy-tailed
+and Zipf-like, but not an exact discrete Zipf PMF.
+
+**Expecting `parent_id` on every clonal model.** `parent_id` belongs to legacy
+`expand_clones`. Use `lineage_parent_id` for BCR lineage-tree parentage, and use
+`clone_id` + `duplicate_count` for `clonal_repertoire`.
+
+**Using `n=` with modern clonal models.** `clonal_lineage` and
+`clonal_repertoire` compute record counts from their own parameters and genotype
+collapse. Passing `n` to `run_records` raises.
 
 ## Where to go next
 
-- **[Validation & reproducibility](../validation/index.md)** — the
-  hub explaining the three validation layers and how the runtime
-  opt-in composes with `validate_families`.
-- **[SHM and mutation targeting](shm-targeting.md)** — what runs
-  per descendant in the SHM pass.
-- **[Export the results](../getting-started/export-results.md)** —
-  TSV / DataFrame / paired FASTQ formats; `clone_id` ships on
-  every record.
-- **[The Experiment builder](experiment-builder.md)** — the full
-  control-panel page including the canonical ancestor-phase /
-  descendant-phase rule.
-- For the engine-side mechanics of the plan split and the
-  pre-fork / post-fork compile path, see the contributor audit
-  [`docs/clonal_plan_split_design.md`](https://github.com/MuteJester/GenAIRR/blob/master/docs/clonal_plan_split_design.md).
+- **[Clonal lineage trees](clonal-lineage.md)** — full BCR lineage model,
+  affinity-selection proxy, tree exporters, and Change-O validation example.
+- **[Clonal repertoires](clonal-repertoire.md)** — TCR and flat-BCR abundance
+  model, clone-size parameters, `duplicate_count`, and tool export notes.
+- **[Validation & reproducibility](../validation/index.md)** — record and family
+  validation layers.
+- **[Corruption + sequencing artefacts](corruption-sequencing.md)** — technical
+  noise passes that compose with clonal workflows.
+- **[Paired-end reads and FASTQ](paired-end-fastq.md)** — paired-end output;
+  currently available for non-lineage clonal workflows.
diff --git a/site_docs/guides/clonal-lineage.md b/site_docs/guides/clonal-lineage.md
new file mode 100644
index 0000000..98a802d
--- /dev/null
+++ b/site_docs/guides/clonal-lineage.md
@@ -0,0 +1,436 @@
+# Clonal lineage trees (affinity maturation)
+
+<p class="lead">Where <code>expand_clones</code> produces a <em>star</em> — one founder and many independent descendants —
+<code>clonal_lineage</code> grows a real <em>tree</em>: a generation-by-generation
+birth–death process in which cells divide, somatically hypermutate, are selected
+for antigen affinity, and are finally sampled. The output is a set of
+per-cell AIRR records <em>plus</em> the ground-truth lineage tree (topology,
+ancestral sequences, abundances) — the kind of object B-cell lineage-inference tools
+(GCtree, IgPhyML, dowser, Change-O) are built to reconstruct. This page explains
+precisely how it works under the hood; nothing here is a black box.</p>
+
+## Why a tree, not a star
+
+A clonal family in vivo is the progeny of one naive B cell that has entered a
+germinal center. Inside that germinal center the cell **divides**, its B-cell
+receptor **somatically hypermutates** a few bases per division, and cells whose
+mutated receptor **binds the antigen better** are preferentially expanded
+(affinity maturation). The result is a genealogy with internal ancestors,
+unequal branch lengths, and selective sweeps — a tree.
+
+The older `expand_clones` collapses all of that into a star: it takes the founder
+recombination and draws `per_clone` independent descendants directly off it. That
+is fine for "many reads that share a V(D)J truth", but it has no genealogy, no
+generations, no selection, and no ancestral nodes — so it cannot serve as ground
+truth for lineage reconstruction. `clonal_lineage` adds the missing biology.
+
+> **`clonal_lineage` is BCR-only.** T cells do **not** somatically hypermutate, and
+> `clonal_lineage` applies S5F SHM, so calling it on a TCR locus raises a clear
+> `ValueError`. A TCR "clone" is one rearrangement proliferated to many identical
+> copies; the meaningful quantity is the **clone-size distribution**, not a mutation
+> tree. For **TCR and flat clonal repertoires**, use
+> [`clonal_repertoire`](clonal-repertoire.md) — it draws a heavy-tailed clone size
+> per clone and emits `clone_id` + `duplicate_count` (see
+> [Clone-size distributions](#clone-size-distributions-tcr-and-repertoire-mix)).
+
+## Quick start
+
+```python
+import GenAIRR as ga
+
+result = (
+    ga.Experiment.on("human_igh")
+      .recombine()                       # the founder: one V(D)J recombination per clone
+      .clonal_lineage(
+          n_clones=20,                   # grow 20 independent families
+          max_generations=6,             # germinal-center rounds
+          n_max=300,                     # per-generation living-population carrying capacity
+          n_sample=30,                   # cells sampled per family at the end
+          rate=0.01,                     # per-base S5F SHM rate, per division
+          lambda_base=1.6,               # mean offspring per cell per generation
+          selection_strength=10.0,       # 0 = neutral; >0 = affinity selection
+      )
+      .run_records(seed=0)
+)
+
+# Per-cell AIRR records, tagged with clone + lineage metadata:
+for rec in result.records[:3]:
+    print(rec["clone_id"], rec["lineage_node_id"], rec["lineage_generation"],
+          rec["lineage_abundance"], rec["lineage_affinity"], rec["v_call"])
+
+# Ground-truth trees, one per clone:
+tree = result.lineage_trees[0]
+tree.validate()                  # structural invariants (raises if malformed)
+newick = tree.to_newick()        # true topology, branch length = per-edge mutations
+fasta  = tree.to_fasta()         # every node's sequence (ancestral + observed)
+table  = tree.to_node_table_tsv()
+```
+
+## How it works under the hood
+
+Each clone is grown independently by a generation-synchronous birth–death process
+in the Rust engine. The whole loop is deterministic for a given `seed`.
+
+```mermaid
+flowchart TB
+    A["Founder: one V(D)J recombination<br/>(the pre-fork plan, run once per clone)"] --> B["Generation g = 1..max_generations"]
+    B --> C["For each live cell:<br/>offspring k ~ Poisson(λ_eff)"]
+    C --> D["λ_eff = lambda_base<br/>× carrying-capacity damping<br/>× affinity fitness(cell)"]
+    C --> E["Each child = parent + per-division S5F mutations"]
+    E --> F["Child affinity = exp(−β · weighted aa distance to target)"]
+    F --> B
+    B --> G["Stop at max_generations / extinction / capacity"]
+    G --> H["Sample n_sample cells from the final population"]
+    H --> I["Collapse identical genotypes → abundances<br/>(observed nodes)"]
+    I --> J["Project each observed cell → AIRR record<br/>+ emit ground-truth tree"]
+```
+
+### 1. The founder
+
+`recombine()` (everything before `clonal_lineage` in the chain) is the **per-clone**
+phase: it runs once to produce the naive rearrangement — the V/D/J allele picks,
+trims, NP bases, junction. That founder `Simulation` (and its recombination trace)
+is the root of the tree. Clone *c* uses seed `seed + c × 1_000_000`, so families
+are independent and reproducible.
+
+### 2. Generation-synchronous birth–death
+
+Growth proceeds in discrete generations. In each generation every currently-live
+cell produces a number of offspring drawn from a Poisson distribution:
+
+```
+k ~ Poisson(λ_eff)
+```
+
+`k = 0` means the cell leaves no progeny (it becomes a tip); `k ≥ 1` creates `k`
+children for the next generation. `λ_eff` is the base offspring rate `lambda_base`
+modulated by two factors below.
+
+### 3. Carrying capacity (logistic damping)
+
+A germinal center is population-bounded, so the effective rate is damped as the
+live population `P` approaches `n_max`:
+
+```
+λ_eff = lambda_base × max(0, 1 − P / n_max)
+```
+
+Near saturation `λ_eff → 0` and growth plateaus instead of exploding. A hard cap
+also prevents the live set from exceeding `n_max` even on a lucky Poisson draw.
+
+### 4. Per-division somatic hypermutation (S5F)
+
+Every child is a clone of its parent's `Simulation` with a fresh round of somatic
+hypermutation applied. GenAIRR reuses its **context-sensitive S5F** engine (the
+same one behind `mutate(model="s5f")`): mutations are drawn from the 5-mer
+mutability/substitution kernel (`s5f_model`, default `"hh_s5f"`) at per-base rate
+`rate`, applied one at a time with the sequence context re-evaluated between
+mutations. Because SHM only substitutes bases in place, each cell keeps the
+founder's V/D/J assignments and region map — its germline ancestry stays intact,
+which is what lets every node be projected to a correct AIRR record (below).
+
+The branch length stored on each edge is the **realized** number of substitutions
+introduced on that division.
+
+### 5. Affinity selection
+
+This is what turns a neutral tree into affinity maturation. Each cell has an
+**affinity** to a target *sequence*:
+
+```
+affinity = exp(−beta · weighted_aa_distance(cell, target))
+```
+
+> **What "affinity" means here — read this.** This is a **sequence-distance
+> proxy**, not a physically modeled antigen-binding affinity. It is a BLOSUM62
+> substitution-aware amino-acid distance from the cell's translated receptor to a
+> **target amino-acid sequence**, mapped through `exp(−β · distance)`. There is
+> **no Kd, no antigen concentration, no biophysical binding model** anywhere in
+> the computation. Treat it as a tunable **selection pressure that pulls the
+> lineage toward a target sequence** — the closer a cell's receptor gets to the
+> target, the higher its "affinity" and the faster it divides.
+
+`weighted_aa_distance` is a **BLOSUM62 substitution-aware** amino-acid distance
+between the cell's translated receptor and the target (region weights let CDRs be
+emphasized; v1 uses uniform weights, with CDR3-weighting as a planned refinement).
+`affinity` is 1.0 at the target sequence and decays toward 0 as the cell diverges.
+
+The target is either supplied by you (`target_aa=...`, a target amino-acid
+sequence) or auto-generated as a "mature" target — the founder's amino-acid
+sequence with `mature_substitutions` random residue changes (the standard
+benchmark convention).
+
+Affinity feeds back into the offspring rate through a **fitness multiplier**:
+
+```
+fitness = max(0, 1 + selection_strength · (affinity − founder_affinity))
+λ_eff   = lambda_base × carrying_capacity_damping × fitness
+```
+
+`founder_affinity` is the affinity of the naive founder, so the founder has
+fitness ≈ 1, cells that improve on it divide faster, and worse cells divide
+slower — producing selective sweeps. **`selection_strength = 0` makes `fitness ≡ 1`,
+i.e. a neutral tree** (byte-identical to growing with no selection at all).
+
+> **Calibration note.** `exp(−beta · distance)` can underflow to ~0 for long
+> receptor sequences at large `beta`, flattening selection. If you supply a full
+> antigen `target_aa`, keep `beta` small (e.g. `1e-3`) or use the auto target.
+
+### 6. Sampling and genotype collapse
+
+When growth stops (at `max_generations` or capacity), `n_sample` cells are sampled
+from the **LIVING final-generation population** — the cells that are alive when
+growth stops. Cells with **identical genotypes** are then **collapsed** into
+**observed cells**: the first cell seen for a genotype becomes the observed
+representative and accumulates an **abundance** count (surfaced as both
+`lineage_abundance` and the AIRR-standard `duplicate_count`) — so abundance-aware
+tools (GCtree, Change-O, SCOPer, dowser) get observed cells with multiplicities.
+The observed cells are the ones that become AIRR records.
+
+Because sampling draws from the **living** population, an **extinct clone** — one
+whose founder draws 0 offspring — has no living cells and therefore yields **zero
+observed cells and zero records**. A single founder at `lambda_base ≈ 1.5` goes
+extinct roughly 25 % of the time. By default (`allow_extinction=False`) each
+requested clone is **conditioned on survival**: an extinct family is re-grown with
+a fresh deterministic sub-seed (up to a bounded number of attempts) so you reliably
+get all `n_clones` families back. Determinism is preserved — the same top-level
+`seed` always reproduces the same result. Set `allow_extinction=True` to accept
+extinction instead: extinct clones are skipped and you get **fewer** than `n_clones`
+families.
+
+The full genealogy — including every unobserved **internal ancestor** — is still
+emitted in the `LineageTree` (and its Newick/FASTA), so ancestral-sequence
+reconstruction can be scored against truth; note, however, that observed/sampled
+nodes are always tips, not internal ancestors (direct sampling of internal
+ancestors is a future addition).
+
+## What you get back
+
+### Per-cell AIRR records
+
+`result.records` is a list of full AIRR Rearrangement dicts, one per *observed*
+(genotype-collapsed) cell. Each carries the founder's recombination provenance
+(`v_call`, `d_call`, `j_call`, junction, …) — correct because the node's `Outcome`
+reuses the founder's recombination trace — plus its own mutated `sequence`. Mutation
+counts (`n_mutations`, `n_v_mutations`, …, and the IMGT-subregion counters) are
+recomputed **from the cell's sequence vs. germline** — these are **net differences
+from germline** (accumulated across all divisions from founder to leaf). Because
+identical genotypes are collapsed before sampling, the number of records per clone
+is ≤ `n_sample`; the `lineage_abundance` field (mirrored by the AIRR-standard
+`duplicate_count`) accounts for the collapsed copies.
+
+> **Branch lengths vs. record `n_mutations`.** Newick branch lengths
+> (as returned by `to_newick()`) count the **per-division substitution events**
+> along each edge — re-mutations of a site that was already mutated count again.
+> The record field `n_mutations` is the **net difference from germline** at the
+> leaf (a site mutated and then back-mutated is not counted). As a result,
+> summing branch lengths root→leaf will generally exceed a leaf's `n_mutations`.
+> Both quantities are standard and correct: branch lengths track evolutionary
+> distance along an edge (as tools like GCtree and IgPhyML expect), while
+> `n_mutations` tracks the observable deviation from germline (as AIRR requires).
+
+Consistency check: `n_v + n_d + n_j + n_np == n_mutations` holds for every record.
+
+Lineage metadata stamped on every record:
+
+| Field | Meaning |
+|---|---|
+| `clone_id` | Which family (0 … n_clones−1) — the ground-truth clone label |
+| `lineage_node_id` | The cell's node id within its tree |
+| `lineage_parent_id` | Parent node id (−1 for the founder) |
+| `lineage_generation` | Generation depth (founder = 0) |
+| `lineage_abundance` | Observation count after genotype collapse |
+| `duplicate_count` | AIRR-standard alias of `lineage_abundance` (read by Change-O / SCOPer / dowser) |
+| `lineage_affinity` | Sequence-distance proxy to the target (see [§5](#5-affinity-selection)). `0` **only** when no target is in play — fully neutral mode (`target_aa=None` **and** `selection_strength=0`). If a `target_aa` is supplied (or selection is on), affinities are computed and reported even when `selection_strength=0` |
+
+### Ground-truth lineage trees
+
+`result.lineage_trees` is one `LineageTree` per clone. Each tree exposes the full
+genealogy (every node, not just observed ones) and three exporters consumed by
+inference tools:
+
+- `to_newick()` — standard rooted Newick; the founder is the root, branch lengths
+  are per-edge mutation counts. (`((node3:1)node1:1,node2:1)node0;`)
+- `to_fasta()` — every node's sequence, ancestral and observed, so
+  ancestral-sequence-reconstruction can be scored against truth.
+- `to_node_table_tsv()` — `node_id, parent_id, generation, mutations_from_parent,
+  abundance, observed, affinity, sequence`.
+- `nodes()` / `validate()` — node access and a structural-invariant check.
+
+## Library-prep & sequencing artefacts
+
+Library-prep and sequencing passes can follow `clonal_lineage` — they run
+**independently on each observed cell**, so every read picks up its own noise:
+
+```python
+result = (ga.Experiment.on("human_igh").recombine()
+          .clonal_lineage(n_clones=10, n_sample=30, rate=0.01, selection_strength=10)
+          .sequencing_errors(rate=0.005)
+          .pcr_amplify(rate=0.002)
+          .run_records(seed=0))
+```
+
+Each observed cell's post-SHM sequence is passed through the corruption plan with
+its own seed, and the resulting artefacts are merged back onto the cell's record.
+The founder's recombination provenance (`v_call`, `d_call`, `j_call`, trims,
+junction) **and** the per-segment SHM counts are preserved; the record additionally
+reports the artefact counters (`n_quality_errors`, `n_pcr_errors`, `n_indels`, …).
+Supported passes are the per-read library-prep / sequencing artefact set also
+used by `clonal_repertoire` and legacy `expand_clones`:
+`sequencing_errors`, `pcr_amplify`, `polymerase_indels`, `end_loss_*`,
+`ambiguous_base_calls`, `random_strand_orientation`.
+
+`mutate` is **not** allowed after `clonal_lineage` — SHM is internal to the lineage
+engine (set it via `clonal_lineage(rate=...)`). `paired_end` is **not** allowed yet
+either (the read layout is not wired through the per-cell corruption merge — a future
+addition).
+
+Validation works on lineage results too: `run_records(..., validate_records=True)`
+runs the per-record postcondition check and the clonal-family consistency check
+(by `clone_id`), with or without a corruption pass. `run_records(...,
+expose_provenance=True)` adds `truth_v_call` / `truth_d_call` / `truth_j_call`
+columns from the founder assignments, and `result.outcomes` carries the per-record
+`Outcome` objects index-aligned with `result.records`.
+
+## Clone-size distributions (TCR and repertoire mix)
+
+> **For TCR, use [`clonal_repertoire`](clonal-repertoire.md).** `clonal_lineage`
+> itself is **BCR-only** — it still rejects TCR loci. The heavy-tailed clone-size
+> model described below is now exposed as a fluent DSL workflow via
+> [`clonal_repertoire`](clonal-repertoire.md); that is the TCR (and flat-BCR-abundance)
+> path. This section explains the model; the dedicated guide is the place to drive it.
+
+Real repertoires are not uniform: a few clones are huge, most are singletons.
+[`clonal_repertoire`](clonal-repertoire.md) draws **clone sizes** from a
+heavy-tailed distribution (rounded power-law / Zipf-like by default, log-normal
+optional) with a controllable **unexpanded fraction** (size-1, never-expanded
+clones). For TCR —
+which has no SHM — a clone is simply one rearrangement at copy-number `size`, with
+within-clone variation coming only from the post-fork sequencing/PCR-error passes;
+identical reads collapse into AIRR records carrying `clone_id` + `duplicate_count`.
+That mixes large expanded families with a realistic singleton tail. See the
+[Clonal repertoires guide](clonal-repertoire.md) for the full workflow.
+
+## Determinism
+
+Everything is keyed on `seed`. Clone *c* grows from `seed + c × 1_000_000`;
+within a clone, generations, divisions, mutations, and sampling all draw from
+seeded RNG streams (growth and sampling use separate streams so they don't
+interfere). Re-running with the same seed reproduces the trees and records
+byte-for-byte.
+
+## Parameters
+
+| Parameter | Default | Meaning |
+|---|---|---|
+| `n_clones` | — | Number of independent families to grow |
+| `max_generations` | 10 | Germinal-center rounds (≤ 1000) |
+| `n_max` | 1000 | **Per-generation LIVING-population carrying capacity** — the live population each generation is capped at this. It is **not** a hard cap on total cells per clone; the tree can contain more total nodes across generations |
+| `n_sample` | 50 | Cells sampled per family at the end; records per clone ≤ this (genotype-collapsed) |
+| `rate` | 0.05 | Per-base S5F SHM rate, per division |
+| `lambda_base` | 1.5 | Mean offspring per cell per generation |
+| `selection_strength` | 0.0 | `0` = neutral drift (`fitness ≡ 1`); set `> 0` for affinity selection. Note `0` makes selection neutral but does **not** force `lineage_affinity` to 0 — affinities are still computed/reported whenever a `target_aa` is supplied |
+| `beta` | 1.0 | Affinity steepness in `exp(−beta·distance)` |
+| `target_aa` | `None` | Target amino-acid sequence (a full translated receptor) used as the selection target (BLOSUM62-weighted distance, position-wise; only the overlapping prefix is scored when lengths differ). A **sequence-distance proxy**, not a biophysical antigen. `None` ⇒ auto "mature" target |
+| `mature_substitutions` | 5 | aa substitutions for the auto target |
+| `s5f_model` | `"hh_s5f"` | Bundled S5F kernel |
+| `allow_extinction` | `False` | `False` ⇒ condition each clone on survival (retry extinct founders with fresh deterministic sub-seeds), so you reliably get `n_clones` families. `True` ⇒ accept extinction and skip extinct clones, producing fewer families |
+
+## Clone recovery: what we actually ran
+
+The point of planting ground-truth clones is that **other people's tools can find
+them**. Two clusterers were actually run against the planted labels and both
+recover them perfectly (adjusted Rand index = 1.0) at realistic SHM:
+
+1. **Immcantation Change-O `DefineClones`** at its **default** junction-distance
+   threshold (0.16) recovers the planted clones exactly.
+2. An **in-repo, implementation-independent standard-heuristic clusterer**
+   (V/J + junction-length + single-linkage) recovers them just as cleanly.
+
+The export formats are **designed to feed** the broader B-cell lineage ecosystem —
+tree-based tools like **GCtree, IgPhyML, and dowser** consume the
+Newick/FASTA/node-table exports, and abundance-aware clustering tools like
+**SCOPer** read the AIRR TSV with `duplicate_count`. Those tools were **not** run
+as part of this validation; the claim here is scoped to the two clusterers above
+and to format compatibility, not to having executed the full ecosystem.
+
+![GenAIRR clonal_lineage clones are recovered by Change-O at default settings](../assets/clonal-lineage-detection.png)
+
+*30 clones simulated with `clonal_lineage` (human IGH, realistic SHM). **(A)** the
+repertoire has a realistic spread of clone sizes. **(B)** within-clone junction
+distances sit far below Change-O's default 0.16 threshold while between-clone
+distances are ~1.0 — the planted clones are cleanly separable. **(C)** Change-O at
+its default threshold recovers all 30 planted clones (adjusted Rand index = 1.0,
+precision = recall = 1.0).*
+
+### Reproduce it: run Change-O on a simulated repertoire
+
+```python
+import GenAIRR as ga
+import pandas as pd
+
+result = (ga.Experiment.on("human_igh").recombine()
+          .clonal_lineage(n_clones=30, max_generations=3, n_max=300,
+                          n_sample=20, rate=0.008, lambda_base=1.6,
+                          selection_strength=0.3)
+          .run_records(seed=42))
+
+# Write an AIRR-format TSV. Keep the ground-truth clone label under a NON-AIRR
+# name so it doesn't collide with the tool's inferred `clone_id` column.
+df = pd.DataFrame(result.records)
+df = df.rename(columns={"clone_id": "true_clone_id"})
+df.to_csv("repertoire.tsv", sep="\t", index=False)
+```
+
+```bash
+# Immcantation Change-O (pip install changeo) infers clones from junctions:
+DefineClones.py -d repertoire.tsv --format airr \
+    --act set --model ham --norm len --dist 0.16 -o clones.tsv
+```
+
+Comparing Change-O's inferred `clone_id` against the planted `true_clone_id`
+(e.g. with `sklearn.metrics.adjusted_rand_score`) gives **ARI = 1.0** — a perfect
+match.
+
+### What the validation shows across SHM regimes
+
+- **Perfect precision, always.** Across every tool and threshold tested, two
+  *different* planted clones are never merged. The per-clone founding
+  rearrangements are distinct, so the planted signal is unambiguous.
+- **Full recovery at a matched threshold.** Detection is a function of how mutated
+  the lineage is versus the caller's distance cutoff. At realistic SHM, the default
+  0.16 cutoff recovers everything; for deeply matured lineages (e.g. `rate=0.05`,
+  6 generations → ~21 % SHM) you raise the cutoff (a threshold sweep climbs from
+  ARI 0.26 at 0.16 → 0.91 at 0.30 → 1.0 at 0.45). This mirrors how these tools
+  behave on real data and is **not** a property of the simulator.
+- **Independent of any one tool.** The same recovery holds for the in-repo
+  implementation-independent V/J + junction-length + single-linkage clusterer — so
+  the signal is not an artefact of Change-O's specific model. The exported
+  Newick/FASTA/AIRR-TSV (with `duplicate_count`) are **designed to feed** tree-based
+  and abundance-aware methods (GCtree, IgPhyML, dowser, SCOPer) directly; those
+  downstream tools were not run here.
+
+In short: the two clusterers we ran recover the planted clones exactly, and the
+export formats are built to hand the same ground truth to the wider ecosystem.
+
+## Relationship to `expand_clones`
+
+`expand_clones` (the star model) is **deprecated** but still works — it remains
+useful for "many reads sharing one V(D)J truth" without a genealogy.
+
+`clonal_lineage` is **not a drop-in replacement.** It grows real
+affinity-maturation trees rather than a flat star, so the surface differs:
+
+- **Different parameters.** There is no `per_clone`; the number of observed records
+  depends on `n_sample`, genotype collapse, and selection (not a fixed
+  `n_clones × per_clone` product). SHM is internal (`rate=...`), not a separate
+  `mutate` step.
+- **Different return shape.** `clonal_lineage` returns a
+  `SimulationResultWithLineages` with per-clone `.lineage_trees` (Newick / FASTA /
+  node-table exporters) alongside the per-cell records.
+
+What *does* carry over: the same per-read library-prep / sequencing passes
+(`sequencing_errors`, `pcr_amplify`, …) can follow `clonal_lineage` exactly as they
+follow other clonal workflows, applied independently per observed cell (see
+[Library-prep & sequencing artefacts](#library-prep-sequencing-artefacts)). And
+`run_records(..., validate_records=True)` is supported on lineage results too.
diff --git a/site_docs/guides/clonal-repertoire.md b/site_docs/guides/clonal-repertoire.md
new file mode 100644
index 0000000..a209d4f
--- /dev/null
+++ b/site_docs/guides/clonal-repertoire.md
@@ -0,0 +1,216 @@
+# Clonal repertoires (TCR & abundance)
+
+<p class="lead">Where <code>clonal_lineage</code> grows BCR affinity-maturation <em>trees</em>, <code>clonal_repertoire</code> builds
+a <em>non-tree</em> clonal repertoire: each clone is one rearrangement proliferated
+to a clone <strong>size</strong> drawn from a heavy-tailed distribution, and those
+copies are emitted as reads through the library-prep / sequencing passes. Identical
+reads collapse into AIRR records carrying the AIRR-standard <code>duplicate_count</code>.
+It is the model for <strong>TCR</strong> repertoires (T cells don't somatically
+hypermutate) and for <strong>flat BCR</strong> clonal abundance — the modern
+replacement for the deprecated <code>expand_clones</code>, with realistic clone
+sizes instead of a fixed per-clone count.</p>
+
+## What it is & when to use it
+
+A real repertoire is a population of clones with wildly uneven sizes: a few huge
+expanded clones and a long tail of singletons. `clonal_repertoire` reproduces that
+structure. For each of `n_clones` clones it:
+
+1. runs the pre-fork plan (`recombine()`) **once** to fix the clone's
+   V/D/J + trim + NP backbone — the single rearrangement that defines the clone;
+2. draws a **size** from a heavy-tailed clone-size distribution (rounded
+   power-law / Zipf-like by default, log-normal optional), with a controllable
+   **unexpanded-singleton fraction**;
+3. emits that many **reads** through the post-fork library-prep / sequencing passes,
+   so reads diverge only by technical noise;
+4. **genotype-collapses** identical reads into AIRR records, each carrying a
+   `clone_id` (ground-truth clone label) and a `duplicate_count` (abundance).
+
+Reach for it when you want a repertoire whose **ground truth is clone membership +
+abundance** — the input clone-callers and abundance-aware tools expect — rather than
+a per-clone mutation genealogy.
+
+### How it compares
+
+| | What it models | Ground truth | Loci |
+|---|---|---|---|
+| **`clonal_repertoire`** | Non-tree clonal abundance; one rearrangement × N copies + technical noise | `clone_id` + `duplicate_count` | **TCR** and flat **BCR** |
+| [`clonal_lineage`](clonal-lineage.md) | BCR affinity-maturation **trees** (per-division SHM, selection) | Lineage tree + per-cell records | **BCR only** |
+| `expand_clones` *(deprecated)* | Star: fixed `per_clone` count, **no** size distribution | `clone_id` + `parent_id` | BCR / TCR |
+
+`clonal_repertoire` is the modern replacement for flat clonal expansion: instead of
+`expand_clones`' fixed `per_clone` count, every clone draws a realistic heavy-tailed
+size. For BCR **lineage trees** (genealogy, ancestral sequences, selection), use
+[`clonal_lineage`](clonal-lineage.md) instead.
+
+## The biology
+
+A T-cell clone is the progeny of one rearranged T cell proliferated to many
+**identical** copies. T cells do **not** somatically hypermutate, so all the
+sequence diversity you observe *within* a TCR clone is **technical** — PCR and
+sequencing error introduced during library prep — not biological. That is exactly
+the shape `clonal_repertoire` produces: one rearrangement per clone, `size` copies,
+divergence only through the post-fork sequencing passes.
+
+Clone **sizes** are empirically heavy-tailed — TCR clone-size distributions are
+approximately **power-law** (a handful of enormous clones, a long tail of
+singletons). The default `size_distribution="power_law"` (with `exponent≈2`)
+captures that, and `unexpanded_fraction` sets the share of clones forced to be
+**never-expanded singletons** (size 1) — the resting naive cells that were never
+clonally expanded.
+
+For **BCR** the same flat model is useful when you want clonal abundance without a
+genealogy. SHM is optional and applied flat across the clone's copies via a
+post-fork `.mutate()` (see below) — there is no mutation tree.
+
+## Quick start (TCR)
+
+```python
+import GenAIRR as ga
+
+result = (ga.Experiment.on("human_tcrb").allow_curatable_refdata().recombine()
+          .clonal_repertoire(n_clones=200, size_distribution="power_law",
+                             exponent=2.0, max_size=500, unexpanded_fraction=0.5)
+          .sequencing_errors(rate=0.005)         # per-read technical noise
+          .run_records(seed=0))
+
+# Each record carries a ground-truth clone label + an abundance count:
+for rec in result.records[:5]:
+    print(rec["clone_id"], rec["duplicate_count"], rec["v_call"], rec["j_call"])
+
+# T cells don't hypermutate — every record has zero SHM:
+assert all(rec.get("n_mutations", 0) == 0 for rec in result.records)
+```
+
+`allow_curatable_refdata()` is the usual opt-in for sampling from the bundled TCR
+catalogue (which includes pseudogene/ORF alleles). `sequencing_errors(rate=...)` is
+the per-read technical-noise pass — `rate` is a per-base error probability in
+`[0, 1]` (drawn as `count ~ Poisson(rate × read_len)` per read). `pcr_amplify(rate=...)`
+has the same shape; any of the post-fork library-prep passes
+(`sequencing_errors`, `pcr_amplify`, `polymerase_indels`, `end_loss_*`,
+`ambiguous_base_calls`, `random_strand_orientation`) can follow the fork.
+
+## Quick start (flat BCR with SHM)
+
+The same model works for BCR. Optionally add a post-fork `.mutate()` to apply flat
+SHM independently to each copy (this is **not** a lineage tree — it's per-read
+mutation off the shared founder):
+
+```python
+import GenAIRR as ga
+
+result = (ga.Experiment.on("human_igh").recombine()
+          .clonal_repertoire(n_clones=100, max_size=300, unexpanded_fraction=0.3)
+          .mutate(model="s5f", rate=0.01)        # flat SHM on each copy
+          .sequencing_errors(rate=0.005)
+          .run_records(seed=0))
+```
+
+With **no** post-fork passes at all, every copy of a clone is identical, so the
+clone collapses to a **single** record whose `duplicate_count` equals the drawn
+size:
+
+```python
+r = (ga.Experiment.on("human_igh").recombine()
+     .clonal_repertoire(n_clones=10, max_size=100)
+     .run_records(seed=1))
+
+from collections import Counter
+per_clone = Counter(rec["clone_id"] for rec in r.records)
+assert all(c == 1 for c in per_clone.values())     # one record per clone
+assert all(rec["duplicate_count"] >= 1 for rec in r.records)
+```
+
+> **`mutate` is BCR-only.** A post-fork `.mutate()` on a **TCR** experiment is
+> rejected by `mutate`'s own TCR guard — T cells don't hypermutate. On TCR, leave
+> SHM out; the post-fork sequencing passes provide all within-clone variation.
+
+## How it works
+
+```mermaid
+flowchart TB
+    A["For each clone c = 0..n_clones:<br/>recombine() once → founder rearrangement"] --> B["Draw size_c from the clone-size distribution<br/>(deterministic for seed)"]
+    B --> C{"Post-fork passes?"}
+    C -->|"no"| D["1 record, duplicate_count = size_c"]
+    C -->|"yes"| E["Emit size_c reads through the<br/>library-prep / sequencing passes<br/>(each read its own seed)"]
+    E --> F["Collapse identical sequences →<br/>records with duplicate_count"]
+    D --> G["Stamp clone_id = c"]
+    F --> G
+```
+
+Per clone, the engine draws the size, runs the founder recombination once, then
+plays `size` reads through the post-fork plan (each with its own derived seed) and
+collapses by emitted `sequence`. `clone_id` is the ground-truth clone index;
+`duplicate_count` is the post-collapse abundance.
+
+**Determinism.** Everything is keyed on `seed`: clone sizes come from a seeded draw,
+clone *c* recombines from `seed + c × 1_000_000`, and each read within a clone draws
+from a derived sub-seed. Re-running with the same `seed` reproduces the records and
+`duplicate_count`s byte-for-byte.
+
+**Read-count cost.** When post-fork passes are present, the **total reads simulated
+is roughly the sum of the drawn clone sizes** (before collapse). A heavy tail with a
+large `max_size` can therefore blow up runtime — keep `max_size` modest (the default
+is 1000) and remember a few clones near `max_size` dominate the cost.
+
+## Parameters
+
+| Parameter | Default | Meaning |
+|---|---|---|
+| `n_clones` | — | Number of clones to simulate (positive int). Sets the number of distinct `clone_id`s. |
+| `size_distribution` | `"power_law"` | Clone-size law: `"power_law"` (Zipf-like, heavy-tailed) or `"lognormal"`. |
+| `exponent` | `2.0` | Power-law exponent (`> 0`), used when `size_distribution="power_law"`. Higher ⇒ steeper tail / more singletons; `~2–3` is typical for TCR. |
+| `mu` | `1.0` | Log-normal location parameter, used when `size_distribution="lognormal"`. |
+| `sigma` | `1.0` | Log-normal scale (`>= 0`), used when `size_distribution="lognormal"`. Larger ⇒ heavier tail. |
+| `max_size` | `1000` | Upper clamp on any clone size. Bounds runtime (total reads ≈ Σ sizes when post-fork passes are present) — keep it modest. |
+| `unexpanded_fraction` | `0.0` | Fraction of clones **forced** to size 1 (never-expanded singletons), in `[0, 1]`. The forced count is `round(n_clones × fraction)`. |
+
+> **Singletons come from two places.** The power-law size is drawn by a **continuous
+> inverse-CDF** and then **rounded** to an integer in `[1, max_size]`, so even with
+> `unexpanded_fraction=0` a large share of clones round to size 1 (for `exponent=2`
+> the singleton share is **~1/3**). `unexpanded_fraction` adds a *forced* floor of
+> size-1 clones **on top of** that natural tail, so raise it when you want an even
+> larger never-expanded population. Because the power-law is a continuous draw that
+> is rounded — not an exact discrete Zipf PMF — the realized distribution is an
+> approximation of true discrete Zipf.
+
+## Ground truth & tooling
+
+Each record carries the two ground-truth fields the downstream ecosystem consumes:
+
+- **`clone_id`** — the planted **clone membership** label (which rearrangement this
+  read descends from).
+- **`duplicate_count`** — the **abundance** of the collapsed record (AIRR-standard
+  field name).
+
+Write the records to an AIRR TSV (keeping the ground-truth label under a non-AIRR
+column name so it doesn't collide with a tool's inferred `clone_id`):
+
+```python
+import pandas as pd
+df = pd.DataFrame(result.records).rename(columns={"clone_id": "true_clone_id"})
+df.to_csv("repertoire.tsv", sep="\t", index=False)
+```
+
+The output format is **what clone-callers and abundance-aware tools consume** —
+Change-O `DefineClones` and SCOPer read AIRR TSV with `duplicate_count`, and TCR
+tools like tcrdist read V/J + junction. You can score a tool's inferred clusters
+against the planted `true_clone_id` (e.g. with `sklearn.metrics.adjusted_rand_score`).
+We frame the records as **format-compatible** with these tools; this page does **not**
+claim they were validated against `clonal_repertoire` output here.
+
+## Limitations
+
+- **No mutation tree.** `clonal_repertoire` is a flat, non-tree model — there are no
+  ancestral nodes, generations, or selection. For a BCR genealogy (Newick / FASTA /
+  node table, affinity maturation), use [`clonal_lineage`](clonal-lineage.md).
+  A post-fork `.mutate()` on BCR applies *flat* SHM per copy, not a lineage.
+- **Power-law is continuous-rounded, not exact discrete Zipf.** Sizes come from a
+  continuous inverse-CDF rounded to integers; this approximates true discrete Zipf
+  rather than matching its PMF exactly.
+- **Cost scales with Σ sizes.** With post-fork passes present, the simulator emits
+  roughly the sum of the drawn sizes in reads before collapse. Large clones combined
+  with heavy post-fork passes mean many reads — keep `max_size` modest.
+- **One fork per pipeline.** `clonal_repertoire`, `expand_clones`, and
+  `clonal_lineage` are mutually exclusive in a single chain; descendant-phase steps
+  (e.g. `.mutate()`, the corruption passes) must come **after** the fork.
diff --git a/site_docs/guides/corruption-sequencing.md b/site_docs/guides/corruption-sequencing.md
index 20679c8..93c75d8 100644
--- a/site_docs/guides/corruption-sequencing.md
+++ b/site_docs/guides/corruption-sequencing.md
@@ -253,18 +253,18 @@ A few facts to know:
   reflect the strand decision automatically. Don't apply a
   second flip downstream.
 
-## Ordering with clonal families
+## Ordering with clonal workflows
 
 **All seven corruption passes are descendant-phase.** They model
-per-read artefacts — every descendant of a clone gets independent
-PCR errors, independent indel events, independent end-loss
-draws, etc.
+per-read artefacts — every emitted clone member or observed lineage
+cell gets independent PCR errors, independent indel events,
+independent end-loss draws, etc.
 
 ```python
 result = (
     ga.Experiment.on("human_igh")
       .recombine()
-      .expand_clones(n_clones=10, per_clone=20)
+      .clonal_repertoire(n_clones=10, max_size=50)
       .mutate(model="s5f", rate=0.03)        # biology — descendant-phase
       .pcr_amplify(count=(0, 3))             # corruption — descendant-phase
       .polymerase_indels(count=(0, 2))
@@ -278,9 +278,12 @@ result = (
 )
 ```
 
-Calling any of them *before* `.expand_clones(...)` raises
-`ValueError` at chain time. See [Clonal families](clonal-families.md)
-for the ancestor / descendant phase discipline in full.
+Calling corruption before a flat fork (`clonal_repertoire` or legacy
+`expand_clones`) raises `ValueError` at chain time because the artefact would be
+shared by the whole clone. Corruption may also follow `clonal_lineage`; in that
+case it is applied independently to each observed sampled cell. See
+[Clonal simulation overview](clonal-families.md) for the phase discipline in
+full.
 
 ## Per-platform calibrated profiles
 
@@ -402,12 +405,11 @@ separate passes, separate fields, separate biology stages. The
 [Recombination + junction biology](recombination-junction.md#trims-vs-end-loss)
 guide has the side-by-side comparison.
 
-**Putting corruption before `.expand_clones()`.** All seven
-corruption passes are descendant-phase — they're per-read
-artefacts that need to vary within a clonal family. The DSL
-rejects this at chain time with the uniform message: "<method>
-must be called after expand_clones(); it is descendant-specific
-and must be sampled independently for each clone member."
+**Putting corruption before a clonal fork.** All seven corruption
+passes are descendant-phase — they're per-read artefacts that need to
+vary within a clonal family. The DSL rejects this before
+`clonal_repertoire()` or `expand_clones()` with a message naming the
+offending method and telling you to move it after the fork.
 
 **Reverse-complementing R2 again after random strand orientation.**
 When `.random_strand_orientation(...)` is in the pipeline,
@@ -430,8 +432,8 @@ count them yourself with `rec["sequence"].count("N")`.
   partition.
 - **[Paired-end reads and FASTQ](paired-end-fastq.md)** — the
   read-layout projection that usually sits alongside corruption.
-- **[Clonal families](clonal-families.md)** — the
-  ancestor / descendant phase discipline that gates every
+- **[Clonal simulation overview](clonal-families.md)** — the
+  clonal model chooser and phase discipline that gates every
   corruption pass.
 - **[Recombination + junction biology](recombination-junction.md)**
   — the trim vs end-loss boundary in detail.
diff --git a/site_docs/guides/experiment-builder.md b/site_docs/guides/experiment-builder.md
index 7bdc1a1..d3d1916 100644
--- a/site_docs/guides/experiment-builder.md
+++ b/site_docs/guides/experiment-builder.md
@@ -86,7 +86,7 @@ pass actually does:
 | **Recombination-stage mechanisms** | `.invert_d(prob=...)`, `.receptor_revision(prob=...)` | D in reverse-complement orientation; post-recombine V replacement |
 | **Constraints** | `.productive_only()`, `.restrict_alleles(v=[...], d=[...], j=[...])` | Constrain the sample space (productive triad; allele subsetting) |
 | **Biological mutation** | `.mutate(model="s5f"\|"uniform", rate=..., segment_rates={...}, v_subregion_rates={...})` | Somatic hypermutation per descendant |
-| **Clonal structure** | `.expand_clones(n_clones=..., per_clone=...)` | Fork: passes before run once per parent, passes after run per descendant |
+| **Clonal structure** | `.clonal_lineage(...)`, `.clonal_repertoire(...)`, legacy `.expand_clones(...)` | BCR trees, TCR / flat-BCR abundance repertoires, or fixed-size star families |
 | **Library / sequencing corruption** | `.pcr_amplify(count=...)`, `.polymerase_indels(count=...)`, `.ambiguous_base_calls(count=...)`, `.sequencing_errors(count=...)`, `.end_loss_5prime(length=...)`, `.end_loss_3prime(length=...)` | Library-prep + sequencer artefacts |
 | **Read layout** | `.paired_end(r1_length=..., r2_length=..., insert_size=...)`, `.random_strand_orientation(prob=...)` | R1/R2 windows; strand flips |
 | **Bookkeeping** | `.with_metadata(experiment_id=..., tissue=...)`, `.contaminate(prob=...)` | Stamp user fields onto every record; inject background contaminants |
@@ -101,9 +101,16 @@ controls how much N is added and what bases get drawn; the
 ## Order matters
 
 GenAIRR's API rejects pipelines whose steps biologically cannot
-compose. The single most important ordering rule is the **clonal
-fork**: `expand_clones(...)` partitions the pipeline into an
-ancestor phase and a descendant phase.
+compose. The clonal methods are the main ordering boundary:
+
+| Method | Use it for | Post-fork behavior |
+|---|---|---|
+| `clonal_lineage(...)` | BCR affinity-maturation trees | SHM is internal to the tree; optional library-prep / sequencing artefacts run once per observed cell |
+| `clonal_repertoire(...)` | TCR or flat-BCR abundance repertoires | Copies are emitted through post-fork per-read passes and collapsed into `duplicate_count` |
+| `expand_clones(...)` | Legacy fixed-size star families | Fixed `n_clones × per_clone` descendants |
+
+For flat clonal models (`clonal_repertoire` and `expand_clones`), steps before
+the fork run once per clone; steps after run once per emitted read/copy.
 
 ```python
 exp = (
@@ -113,7 +120,7 @@ exp = (
       .invert_d(prob=0.05)
       .receptor_revision(prob=0.05)
       # ──── fork ─────────────────────────────────────────────
-      .expand_clones(n_clones=50, per_clone=20)
+      .clonal_repertoire(n_clones=50, max_size=100, unexpanded_fraction=0.3)
       # ──── descendant phase (runs ONCE per descendant) ──────
       .mutate(model="s5f", rate=0.05)
       .pcr_amplify(count=(0, 3))
@@ -122,34 +129,30 @@ exp = (
 )
 
 result = exp.run_records(seed=42)
-# Output: n_clones × per_clone = 1000 records. Every clone shares
-# the same V(D)J recombination + D orientation + receptor revision;
-# every descendant within a clone has independent SHM, PCR errors,
-# end-loss, and R1/R2 windows.
+# Each clone shares the same V(D)J recombination + D orientation +
+# receptor revision; each emitted read has independent SHM, PCR errors,
+# end-loss, and R1/R2 windows. Identical reads collapse into duplicate_count.
 ```
 
-**Ancestor-phase passes** (anything before `expand_clones`) run once
-per clonal parent and propagate to every descendant. Use them for
-the V(D)J recombination event itself, the recombination-stage
-mechanisms (D inversion, receptor revision), and any constraint
-that pertains to the parent (`productive_only`, `restrict_alleles`).
-
-**Descendant-phase passes** (anything after `expand_clones`) run
-independently per descendant. Use them for somatic hypermutation
-(every memory B cell mutates independently), all library /
-sequencer corruption (PCR errors don't share across descendants),
-and read layout (R1/R2 windows are per-read).
-
-Putting an SHM pass *before* `expand_clones` would mean every
-descendant of every clone shares the same mutations — biologically
-wrong. Putting `invert_d` *after* `expand_clones` would mean
-descendants of the same parent see different D orientations —
-also wrong. The DSL raises `ValueError` at chain time when these
-orderings are violated; you don't have to remember the rules
-exhaustively, the API does.
-
-If `expand_clones` is absent, every pass is "descendant phase" and
-runs per-record (since each record is its own one-off lineage).
+**Ancestor-phase passes** (anything before a flat clonal fork) run once per
+clonal parent and propagate to every emitted copy. Use them for the V(D)J
+recombination event itself, recombination-stage mechanisms (D inversion,
+receptor revision), and constraints on the founder draw (`productive_only`,
+`restrict_alleles`).
+
+**Descendant/read-phase passes** (anything after a flat clonal fork) run
+independently per emitted copy. Use them for BCR flat SHM, all library /
+sequencer corruption, and read layout. TCR rejects `.mutate(...)`; T cells do
+not SHM. If `paired_end` follows `clonal_repertoire`, remember the result is
+still abundance-collapsed by assembled sequence: `duplicate_count` carries copy
+number, and FASTQ export does not expand it into multiple read pairs.
+
+`clonal_lineage` handles its own tree-internal SHM through
+`clonal_lineage(rate=...)`; do not add `.mutate(...)` after it. Library-prep and
+sequencing artefacts may follow lineage output, but `paired_end` is not wired
+through `clonal_lineage` yet.
+
+If no clonal method is present, every pass runs per record.
 
 ## Choosing counts vs rates
 
@@ -260,7 +263,9 @@ seed produces the same draws, which is how golden tests work.
 | Biology → API surface lookup | [Biology map](biology-map.md) |
 | Per-segment + per-V-subregion SHM targeting | [Targeted SHM rates](shm-targeting.md) |
 | R1/R2 windows + insert sizes + FASTQ output | [Paired-end reads and FASTQ](paired-end-fastq.md) |
-| Forking into clonal families | [Clonal families](clonal-families.md) |
+| Choosing a clonal model | [Clonal simulation overview](clonal-families.md) |
+| BCR lineage trees | [Clonal lineage trees](clonal-lineage.md) |
+| TCR / flat-BCR abundance repertoires | [Clonal repertoires](clonal-repertoire.md) |
 | Authoring or tuning a custom reference cartridge | [Reference cartridge](../concepts/reference-cartridge.md) |
 | Confirming output integrity post-run | [`validate_records`](../validation/validate-records.md) |
 | Per-record AIRR field catalogue | [Your first AIRR record](../getting-started/first-airr-record.md) |
diff --git a/site_docs/guides/index.md b/site_docs/guides/index.md
index b17a2d6..1c38e39 100644
--- a/site_docs/guides/index.md
+++ b/site_docs/guides/index.md
@@ -21,8 +21,14 @@ specific biological mechanisms or pipeline patterns.</p>
 - **[SHM and mutation targeting](shm-targeting.md)** — uniform
   vs S5F, per-segment and per-V-subregion rates, counter
   partitions.
-- **[Clonal families](clonal-families.md)** — `expand_clones`,
-  the ancestor / descendant phase split, family validation.
+- **[Clonal simulation overview](clonal-families.md)** — choose
+  between BCR lineage trees, TCR / flat-BCR clone-size repertoires,
+  and legacy fixed-size stars.
+- **[Clonal lineage trees](clonal-lineage.md)** — BCR
+  affinity-maturation trees with SHM, selection, sampling, and
+  ground-truth exports.
+- **[Clonal repertoires](clonal-repertoire.md)** — TCR and flat-BCR
+  abundance models with heavy-tailed clone sizes and `duplicate_count`.
 
 ## Library + sequencing
 
diff --git a/site_docs/guides/paired-end-fastq.md b/site_docs/guides/paired-end-fastq.md
index 11608a5..146ee76 100644
--- a/site_docs/guides/paired-end-fastq.md
+++ b/site_docs/guides/paired-end-fastq.md
@@ -178,31 +178,37 @@ few interactions are worth knowing:
   at projection time, exactly once; the writer never applies a
   second flip.
 
-## Clonal families
+## Clonal workflows
 
 `paired_end` is a descendant-phase pass — R1/R2 windows are
-per-read, so each clone member gets its own independent layout
-draw. If you put `.paired_end(...)` before `.expand_clones(...)`,
-the DSL raises at chain time. The right order:
+per-read, so each emitted clone member gets its own independent layout
+draw. It is fully supported after legacy `expand_clones(...)`, and
+accepted after `clonal_repertoire(...)` with one important caveat:
+`clonal_repertoire` collapses identical assembled sequences into one
+record with `duplicate_count`, and FASTQ export does not expand that
+abundance back into multiple read pairs. `clonal_lineage(...)` does
+not support paired-end projection yet. If you put `.paired_end(...)`
+before a flat clonal fork, the DSL raises at chain time. The right
+order for a collapsed clonal-repertoire record surface:
 
 ```python
 result = (
     ga.Experiment.on("human_igh")
       .recombine()
-      .expand_clones(n_clones=50, per_clone=20)
+      .clonal_repertoire(n_clones=50, max_size=100)
       .mutate(model="s5f", rate=0.05)
       .paired_end(r1_length=150, insert_size=300)
       .run_records(seed=1)
 )
 
 result.to_paired_fastq("reads_R1.fastq", "reads_R2.fastq")
-# 1,000 paired records (50 clones × 20 descendants). Each
-# descendant has its own R1 / R2 windows, possibly drawn
-# from a different insert_size.
+# Each emitted read has its own R1 / R2 windows, possibly drawn from
+# a different insert_size; identical reads may have collapsed into
+# duplicate_count before export.
 ```
 
-See [Clonal families](clonal-families.md) for the ancestor / descendant
-phase rules in full.
+See [Clonal simulation overview](clonal-families.md) for the clonal model
+chooser and phase rules in full.
 
 ## Validation
 
@@ -242,12 +248,14 @@ discipline.
 
 A handful of issues that show up repeatedly with paired-end.
 
-**Calling `.paired_end()` before `.expand_clones()`.** R1/R2
-windows are per-read, so `paired_end` is descendant-phase. The
-DSL rejects this at chain time: "paired_end must be called after
-expand_clones(); it is descendant-specific and must be sampled
-independently for each clone member." Move `.paired_end(...)`
-after `.expand_clones(...)`.
+**Calling `.paired_end()` before a flat clonal fork.** R1/R2 windows
+are per-read, so `paired_end` is descendant-phase. Move
+`.paired_end(...)` after `clonal_repertoire(...)` or legacy
+`expand_clones(...)`. For exact per-copy paired FASTQ depth today,
+use legacy `expand_clones`; `clonal_repertoire` is abundance-collapsed
+and exposes copy number through `duplicate_count`. `clonal_lineage(...)`
+currently rejects `.paired_end(...)` even after the fork; paired-end
+lineage output is a future addition.
 
 **Expecting two AIRR rows per molecule.** There's still one row
 per record — paired-end is a layered projection, not a record
@@ -278,8 +286,8 @@ instead.
 - **[Validation & reproducibility](../validation/index.md)** — the
   validator that checks paired-end geometry, plus the
   reproducibility model.
-- **[Clonal families](clonal-families.md)** — the ancestor /
-  descendant phase rules and why `paired_end` must come after
-  `expand_clones`.
+- **[Clonal simulation overview](clonal-families.md)** — the clonal
+  model chooser and why `paired_end` must come after flat clonal
+  forks.
 - **[The Experiment builder](experiment-builder.md)** — where
   `paired_end` sits in the full DSL pipeline.
diff --git a/site_docs/guides/recombination-editing.md b/site_docs/guides/recombination-editing.md
index 1ffab07..6a62598 100644
--- a/site_docs/guides/recombination-editing.md
+++ b/site_docs/guides/recombination-editing.md
@@ -65,7 +65,7 @@ print(sum(1 for r in result.records if r["d_inverted"]))
 - **Single call per pipeline.** A second `.invert_d(...)` raises
   `ValueError: invert_d already configured on this experiment;
   v1 accepts at most one inversion step per pipeline`.
-- **Must come before `.expand_clones(...)`** (ancestor-phase). See
+- **Must come before clonal forks** (ancestor-phase). See
   [Clonal placement](#clonal-placement) below.
 
 ### The `d_inverted` AIRR field
@@ -148,7 +148,7 @@ print(sum(1 for r in result.records if r["receptor_revision_applied"]))
 - **Single call per pipeline.** A second call raises
   `ValueError: receptor_revision already configured on this
   experiment`.
-- **Must come before `.expand_clones(...)`** (ancestor-phase).
+- **Must come before clonal forks** (ancestor-phase).
 - The compile path also checks that the cartridge has a V pool
   in refdata — required because the replacement allele draws from
   it.
@@ -209,24 +209,25 @@ after V assembly).
 
 ## Clonal placement
 
-Both methods must be configured BEFORE `.expand_clones(...)` —
+Both methods must be configured BEFORE a clonal fork
+(`clonal_lineage`, `clonal_repertoire`, or legacy `expand_clones`) —
 they're recombination-time decisions that the family inherits.
 The DSL enforces this at chain time with two symmetric guards:
 
 ```python
-# WRONG — invert_d after expand_clones raises ValueError
+# WRONG — invert_d after a clonal fork raises ValueError
 ga.Experiment.on("human_igh") \
    .recombine() \
-   .expand_clones(n_clones=10, per_clone=20) \
+   .clonal_repertoire(n_clones=10, max_size=20) \
    .invert_d(prob=0.05)
-# ValueError: invert_d must be called before expand_clones();
+# ValueError: invert_d must be called before the clonal fork;
 # D inversion is a recombination-time decision and must be
 # inherited by all clone descendants. Move the invert_d(...)
-# call before expand_clones(...).
+# call before clonal_lineage(...), clonal_repertoire(...), or expand_clones(...).
 ```
 
 Symmetric message for `receptor_revision` placed after
-`expand_clones`. The right order is:
+a clonal fork. The right order is:
 
 ```python
 result = (
@@ -234,7 +235,7 @@ result = (
       .recombine()
       .invert_d(prob=0.05)              # ancestor phase
       .receptor_revision(prob=0.02)     # ancestor phase
-      .expand_clones(n_clones=10, per_clone=20)
+      .clonal_repertoire(n_clones=10, max_size=20)
       .mutate(model="s5f", rate=0.03)   # descendant phase
       .run_records(seed=1)
 )
@@ -248,7 +249,7 @@ checks (via the `truth_*_call` fields when provenance exposure is
 on; the parent-aware validator also compares `d_inverted` and
 `original_v_call` against the parent Outcome).
 
-See [Clonal families](clonal-families.md) for the full
+See [Clonal simulation overview](clonal-families.md) for the full
 ancestor-vs-descendant phase discipline.
 
 ## Validation and replay
@@ -329,8 +330,8 @@ re-RCs the bytes before comparing, do it on
 `rec["sequence"][d_sequence_start:d_sequence_end]` — don't
 remap the coordinates.
 
-**Putting `.invert_d()` or `.receptor_revision()` after
-`.expand_clones()`.** The DSL rejects this immediately at chain
+**Putting `.invert_d()` or `.receptor_revision()` after a clonal
+fork.** The DSL rejects this immediately at chain
 time with the symmetric message above. Both decisions must be
 shared across every descendant of a clonal family; the API will
 not let you accidentally split them.
@@ -340,7 +341,7 @@ not let you accidentally split them.
 - **[Recombination and junction biology](recombination-junction.md)**
   — what `.recombine()` produces in the first place, before
   either of these mechanisms run.
-- **[Clonal families](clonal-families.md)** — the ancestor-vs-
+- **[Clonal simulation overview](clonal-families.md)** — the ancestor-vs-
   descendant phase discipline both mechanisms participate in.
 - **[Validation & reproducibility](../validation/index.md)** —
   the validator's issue catalogue (including the three issue
diff --git a/site_docs/guides/recombination-junction.md b/site_docs/guides/recombination-junction.md
index 0cb4193..146ee32 100644
--- a/site_docs/guides/recombination-junction.md
+++ b/site_docs/guides/recombination-junction.md
@@ -23,10 +23,11 @@ analysis would call "the receptor":
 | **Assembled regions** | The final structural regions (`V`, `NP1`, `D`, `NP2`, `J`) carrying their coordinates on the record |
 | **Junction** | The canonical V Cys → J W/F + 3 window exposed on `junction` / `junction_aa` / `junction_length` |
 
-Everything before `.mutate()`, `.expand_clones()`, or any
-corruption pass is recombination's responsibility. After the
-pass, the molecule is "finished" in the recombination-biology
-sense — SHM and library prep happen on top of it.
+Everything before `.mutate()`, a clonal fork (`clonal_lineage`,
+`clonal_repertoire`, legacy `expand_clones`), or any corruption pass is
+recombination's responsibility. After the pass, the molecule is "finished" in
+the recombination-biology sense — SHM, clonal expansion, and library prep happen
+on top of it.
 
 ## A minimal recombination
 
diff --git a/site_docs/index.md b/site_docs/index.md
index 96037ac..c6c28fd 100644
--- a/site_docs/index.md
+++ b/site_docs/index.md
@@ -81,9 +81,9 @@ an aligner.</p>
 
 <div class="cap-card cap-clone" markdown>
   <p class="cap-num">// Lineage · 03</p>
-  <h3>Expand clones</h3>
-  <p>Parent rearrangement forks N descendants. SHM accumulates
-  per-descendant after the fork.</p>
+  <h3>Simulate clones</h3>
+  <p>BCR lineage trees, TCR clone-size repertoires, and flat
+  abundance benchmarks with planted clone IDs.</p>
 </div>
 
 <div class="cap-card cap-corrupt" markdown>
@@ -112,17 +112,18 @@ an aligner.</p>
 ```python
 import GenAIRR as ga
 
+# Grow real BCR clonal lineage trees — affinity maturation, with ground truth
 result = (
     ga.Experiment.on("human_igh")
       .recombine()
-      .expand_clones(n_clones=50, per_clone=20)
-      .mutate(rate=0.05)
-      .pcr_amplify(count=(0, 3))
-      .ambiguous_base_calls(count=(0, 2))
-      .productive_only().run_records(seed=42)
+      .clonal_lineage(n_clones=50, max_generations=6, n_sample=30,
+                      rate=0.01, selection_strength=10.0)
+      .sequencing_errors(rate=0.001)
+      .run_records(seed=42)
 )
 
-result.to_tsv("repertoire.tsv")
+result.to_tsv("repertoire.tsv")                 # per-cell AIRR records (clone_id, lineage_*)
+newick = result.lineage_trees[0].to_newick()    # ground-truth lineage tree per clone
 ```
 
 ## Install. One command. No compiler.
@@ -145,10 +146,10 @@ your task.
 
 | If you want to ... | Start here |
 |---|---|
-| **Simulate sequences** | [Quick start](getting-started/quick-start.md) → [The Experiment builder](guides/experiment-builder.md) |
+| **Simulate sequences** | [Quick start](getting-started/quick-start.md) → [The Experiment builder](guides/experiment-builder.md) → [API reference](reference/index.md) |
 | **Build a reference cartridge** | [Reference cartridge concept](concepts/reference-cartridge.md) → [Build a reference cartridge](guides/build-reference-cartridge.md) |
-| **Get reproducible / validated output** | [Validation hub](validation/index.md) → [Trace, replay, reproducibility](guides/trace-replay.md) |
-| **Understand the biological mechanisms** | [Recombination + junction biology](guides/recombination-junction.md), [SHM and mutation targeting](guides/shm-targeting.md), [Corruption + sequencing artefacts](guides/corruption-sequencing.md), [Clonal families](guides/clonal-families.md) |
+| **Get reproducible / validated output** | [Validation hub](validation/index.md) → [Validate AIRR records](validation/validate-records.md) → [Trace, replay, reproducibility](guides/trace-replay.md) |
+| **Understand the biological mechanisms** | [Recombination + junction biology](guides/recombination-junction.md), [SHM and mutation targeting](guides/shm-targeting.md), [Corruption + sequencing artefacts](guides/corruption-sequencing.md), [Clonal simulation overview](guides/clonal-families.md) |
 | **Contribute to GenAIRR** | [Architecture (Contributor)](architecture/index.md) |
 
 The **[Choose your path](learn.md)** page expands each of these
diff --git a/site_docs/learn.md b/site_docs/learn.md
index bfc7f5b..9e5536a 100644
--- a/site_docs/learn.md
+++ b/site_docs/learn.md
@@ -73,10 +73,16 @@ where the v1 boundary sits.
 4. **[SHM and mutation targeting](guides/shm-targeting.md)** —
    uniform vs S5F, per-segment and per-V-subregion rates,
    counter partitions.
-5. **[Clonal families](guides/clonal-families.md)** — ancestor /
-   descendant phase discipline, `clone_id` / `parent_id`, family
-   validation.
-6. **[Corruption + sequencing artefacts](guides/corruption-sequencing.md)**
+5. **[Clonal simulation overview](guides/clonal-families.md)** —
+   choose `clonal_lineage` for BCR trees, `clonal_repertoire` for
+   TCR / abundance repertoires, or legacy `expand_clones`.
+6. **[Clonal lineage trees](guides/clonal-lineage.md)** — BCR SHM
+   trees, selection, final-cell sampling, lineage metadata, and
+   tree exports.
+7. **[Clonal repertoires](guides/clonal-repertoire.md)** — TCR and
+   flat-BCR clone sizes, `duplicate_count`, and AIRR clone-caller
+   export.
+8. **[Corruption + sequencing artefacts](guides/corruption-sequencing.md)**
    — the observation-stage mechanisms (PCR, sequencing errors,
    indels, end-loss, N corruption, strand).
 
diff --git a/site_docs/reference/experiment.md b/site_docs/reference/experiment.md
index 6578b25..074eb33 100644
--- a/site_docs/reference/experiment.md
+++ b/site_docs/reference/experiment.md
@@ -5,21 +5,23 @@ builder. Every method returns the same <code>Experiment</code>
 extended by one pipeline stage; the pipeline runs when you call
 <code>.run_records(...)</code>, <code>.run(...)</code>, or
 <code>.compile().run(...)</code>. For the conceptual walk-through —
-which methods are ancestor-phase vs descendant-phase, how clonal
-expansion partitions the pipeline, how compile reuse works — see
+which methods are ancestor-phase vs descendant-phase, which clonal
+model to choose, how compile reuse works — see
 the <a href="../guides/experiment-builder.md">Experiment builder
 guide</a>. The reference below catalogues the public surface.</p>
 
 ## Common methods
 
-The six methods you'll reach for in 90 % of real pipelines:
+The methods you'll reach for in most real pipelines:
 
 | Method | Purpose |
 |---|---|
 | `.recombine(...)` | Add the V(D)J recombination pass — the foundational ancestor-phase mechanism |
 | `.productive_only()` | Constrain sampling so only productive rearrangements survive |
 | `.mutate(...)` | Apply biological SHM (uniform or S5F) on top of recombination |
-| `.expand_clones(...)` | Partition the pipeline into ancestor + descendant phases for clonal families |
+| `.clonal_lineage(...)` | Grow BCR affinity-maturation lineage trees |
+| `.clonal_repertoire(...)` | Generate TCR / flat-BCR abundance repertoires with clone sizes and `duplicate_count` |
+| `.expand_clones(...)` | Legacy fixed-size clonal star model |
 | `.paired_end(...)` | Project assembled sequences as paired R1 / R2 reads |
 | `.run_records(...)` | Compile + run + return a `SimulationResult` |
 
@@ -43,6 +45,8 @@ pilot for the wider generated-reference effort.
         - recombine
         - productive_only
         - mutate
+        - clonal_lineage
+        - clonal_repertoire
         - expand_clones
         - paired_end
         - run_records
diff --git a/site_docs/reference/index.md b/site_docs/reference/index.md
index 17883a7..c666399 100644
--- a/site_docs/reference/index.md
+++ b/site_docs/reference/index.md
@@ -86,9 +86,9 @@ access).
 | Symbol | Purpose | Guide |
 |---|---|---|
 | `ValidationReport` | Per-record AIRR-validator output | [`validate_records`](../validation/validate-records.md) |
-| `FamilyValidationReport` | Family-validator output | [Clonal families](../guides/clonal-families.md) |
+| `FamilyValidationReport` | Family-validator output | [Clonal simulation overview](../guides/clonal-families.md) |
 | `RecordValidationFailedError` | Raised when strict record validation fails | [`validate_records`](../validation/validate-records.md) |
-| `FamilyValidationFailedError` | Raised when strict family validation fails | [Clonal families](../guides/clonal-families.md) |
+| `FamilyValidationFailedError` | Raised when strict family validation fails | [Clonal simulation overview](../guides/clonal-families.md) |
 | `StrictSamplingError` | Raised under `strict=True` on empty admissible support | [Validation hub](../validation/index.md#strict-vs-permissive) |
 | `productive` | Convenience contract bundle for productive sampling | [Recombination biology](../guides/recombination-junction.md#productivity) |
 
@@ -105,7 +105,7 @@ is called.
 | **Recombination** | `.recombine()`, `.invert_d(prob=...)`, `.receptor_revision(prob=...)` | Ancestor-phase mechanisms |
 | **Constraints** | `.productive_only()`, `.restrict_alleles(v=..., d=..., j=...)` | Constrain the sample space |
 | **Biological mutation** | `.mutate(model="s5f"\|"uniform", rate=..., count=..., segment_rates=..., v_subregion_rates=...)` | The only pass that increments `n_mutations` |
-| **Clonal structure** | `.expand_clones(n_clones=..., per_clone=...)` | Marks the ancestor/descendant fork |
+| **Clonal structure** | `.clonal_lineage(...)`, `.clonal_repertoire(...)`, legacy `.expand_clones(...)` | BCR trees, TCR / flat-BCR abundance repertoires, or fixed-size star families |
 | **Trims override** | `.trim(v_3=..., d_5=..., d_3=..., j_5=..., enabled=...)` | Per-experiment trim distribution overrides |
 | **Library / sequencer artefacts** | `.pcr_amplify(...)`, `.polymerase_indels(...)`, `.ambiguous_base_calls(...)`, `.sequencing_errors(...)`, `.end_loss_5prime(...)`, `.end_loss_3prime(...)` | All descendant-phase |
 | **Read layout** | `.paired_end(r1_length=..., r2_length=..., insert_size=...)`, `.random_strand_orientation(prob=...)` | Per-read projection |
@@ -125,7 +125,7 @@ record dicts plus the underlying `Outcome` objects.
 | Surface | Methods / properties | Notes |
 |---|---|---|
 | **List-like access** | `len(result)`, `result[i]`, `result[a:b]`, `for rec in result:` | One AIRR dict per element |
-| **Underlying state** | `.records`, `.outcomes`, `.parents` | `outcomes` is `None` when built from records only; `parents` is `None` on non-clonal results |
+| **Underlying state** | `.records`, `.outcomes`, `.parents`, `.lineage_trees` | `outcomes` is `None` when built from records only; `parents` exists only for legacy `expand_clones`; `lineage_trees` exists on lineage results |
 | **Validation** | `.validate_records(refdata)`, `.validate_families()`, `.validate_families_with_parents(refdata)` | See [Validation hub](../validation/index.md) |
 | **Export** | `.to_tsv(path, *, airr_strict=False)`, `.to_csv(path, *, airr_strict=False)`, `.to_fasta(path, *, prefix="seq")`, `.to_fastq(path, *, quality="illumina", **kw)`, `.to_paired_fastq(r1, r2, *, quality="illumina", overwrite=False, **kw)`, `.to_dataframe(*, airr_strict=False)` | See [Export the results](../getting-started/export-results.md) |
 | **Construction** | `SimulationResult.from_outcomes(outcomes, refdata, *, id_prefix="seq", expose_provenance=False)` | Build a result from Rust `Outcome` objects + refdata; `expose_provenance=True` injects `truth_*_call` columns |
diff --git a/site_docs/reference/simulation-result.md b/site_docs/reference/simulation-result.md
index 789fffd..3ca7dd6 100644
--- a/site_docs/reference/simulation-result.md
+++ b/site_docs/reference/simulation-result.md
@@ -3,8 +3,10 @@
 <p class="lead"><code>SimulationResult</code> is the output wrapper
 returned by <code>Experiment.run_records(...)</code>. It holds the
 list of AIRR record dicts, the underlying engine <code>Outcome</code>
-objects (for trace / replay / validation), and the parent
-<code>Outcome</code>s when the pipeline produced clonal families.
+objects (for trace / replay / validation), legacy parent
+<code>Outcome</code>s when <code>expand_clones</code> produced fixed-size
+families, and lineage trees when <code>clonal_lineage</code> produced BCR
+tree output.
 Treat it as a list-like view of records plus the typed validators
 and export helpers below.</p>
 
@@ -23,6 +25,11 @@ The eight methods you'll reach for in real pipelines:
 | `.to_fasta(path, *, prefix="seq")` | Write assembled sequences as FASTA |
 | `.to_fastq(...)` / `.to_paired_fastq(...)` | Write FASTQ; paired-end requires `read_layout="paired_end"` |
 
+`clonal_repertoire` returns ordinary `SimulationResult` records with
+`clone_id` and `duplicate_count`. `clonal_lineage` returns
+`SimulationResultWithLineages`, a subclass that adds `.lineage_trees`.
+Legacy `expand_clones` returns `SimulationResult` with `.parents`.
+
 ### FASTQ exports (prose only)
 
 The two FASTQ-emitting methods are documented in
diff --git a/site_docs/validation/index.md b/site_docs/validation/index.md
index 15358b4..1332fbf 100644
--- a/site_docs/validation/index.md
+++ b/site_docs/validation/index.md
@@ -61,8 +61,9 @@ one-liner.
 
 ### Family validation
 
-For workloads using `expand_clones(...)`, two sibling validators
-check family-level invariants:
+For workloads that stamp `clone_id` (`clonal_lineage(...)`,
+`clonal_repertoire(...)`, or legacy `expand_clones(...)`), family
+validation checks clone-level record consistency:
 
 ```python
 family_report = result.validate_families()
@@ -73,16 +74,19 @@ full_report = result.validate_families_with_parents(refdata)
 assert full_report, full_report.summary()
 ```
 
-`validate_families` checks within-family invariants alone — every
-descendant of a clone shares its V(D)J recombination, every
-descendant's `clone_id` matches the parent's, no descendant has a
-SHM count below the parent's, and the per-clone size matches what
-`expand_clones` was asked for. No refdata required.
+`validate_families` is records-only. It checks that a clonal batch is
+not mixed with non-clonal records and, when `truth_v_call`,
+`truth_d_call`, and `truth_j_call` are present, that those
+recombination-time truth calls are invariant within each `clone_id`
+group. No refdata required.
 
-`validate_families_with_parents(refdata)` adds full per-record
-validation on every parent before checking family invariants —
-it's the strongest gate. Use it in release-tier CI when the
-pipeline ships clonal output.
+`validate_families_with_parents(refdata)` is the stronger
+legacy-star diagnostic for `expand_clones(...)` outputs, where
+`result.parents` exists. It compares descendant records against the
+actual parent `Outcome`. Modern `clonal_repertoire` and
+`clonal_lineage` do not expose `result.parents`; validate their
+records with `validate_records`, use `validate_families` for
+`clone_id` grouping, and validate lineage trees with `tree.validate()`.
 
 ### Runtime opt-in
 
@@ -108,8 +112,8 @@ hot loops.
 | Allele calls (`v_call` / `d_call` / `j_call` matches an independent walker) | `validate_records` |
 | Junction + productive triad (junction content, `vj_in_frame`, `stop_codon`, `productive` predicate identification) | `validate_records` |
 | Paired-end geometry (R1/R2 windows, R2 reverse-complement, `insert_size`) | `validate_records` (when `paired_end()` is in the pipeline) |
-| Family size, parent/descendant `clone_id` match, descendant `n_mutations ≥ parent's` | `validate_families` |
-| Each family's *parent* validates as a standalone record | `validate_families_with_parents` |
+| Clonal batch is consistently stamped with `clone_id`; truth V/D/J calls are invariant within clone when truth columns are present | `validate_families` |
+| Legacy `expand_clones` descendants agree with their parent `Outcome` | `validate_families_with_parents` |
 
 The three layers are independent — `validate_records` doesn't
 require a clonal structure; `validate_families` doesn't require
@@ -308,24 +312,25 @@ the `failures` list is JSON-serialisable for downstream tooling.
 
 ### Clonal output
 
-Stack record + family validation when the pipeline ships clonal
-families:
+Stack record + family validation when the pipeline ships clonal records:
 
 ```python
 result = (
     exp
     .recombine()
-    .expand_clones(n_clones=50, per_clone=20)
-    .mutate(model="s5f", rate=0.05)
-    .run_records(n=1000, seed=42)
+    .clonal_repertoire(n_clones=50, max_size=100)
+    .sequencing_errors(rate=0.001)
+    .run_records(seed=42, expose_provenance=True)
 )
 
 assert result.validate_records(refdata), "AIRR record divergence"
-assert result.validate_families_with_parents(refdata), "Family invariant divergence"
+assert result.validate_families(), "Family invariant divergence"
 ```
 
-The two layers catch different things and don't substitute for
-each other.
+For legacy `expand_clones(...)`, add
+`result.validate_families_with_parents(refdata)`. For
+`clonal_lineage(...)`, also call `tree.validate()` on each
+`result.lineage_trees` entry when you need topology checks.
 
 ## When validation is not enough
 
diff --git a/site_docs/validation/validate-records.md b/site_docs/validation/validate-records.md
index aadce33..09b1113 100644
--- a/site_docs/validation/validate-records.md
+++ b/site_docs/validation/validate-records.md
@@ -182,39 +182,40 @@ and the insert size matches.
 
 ## Family validation
 
-For workloads using `expand_clones(...)` to generate clonal
-families, two sibling validators check family-level consistency:
+For workloads that stamp `clone_id` (`clonal_lineage(...)`,
+`clonal_repertoire(...)`, or legacy `expand_clones(...)`), family
+validation checks clone-level consistency:
 
 ```python
 result = (
     ga.Experiment.on("human_igh")
       .recombine()
-      .expand_clones(n_clones=50, per_clone=20)
-      .mutate(rate=0.05)
-      .run_records(n=1000, seed=42)
+      .clonal_repertoire(n_clones=50, max_size=100)
+      .mutate(rate=0.01)
+      .run_records(seed=42, expose_provenance=True)
 )
 
-family_report = result.validate_families(refdata)
+family_report = result.validate_families()
 assert family_report, family_report.summary()
 ```
 
-`validate_families` checks within-family invariants: every
-descendant of a clone shares its V(D)J recombination, every
-descendant's `clone_id` matches the parent, no descendant has a
-SHM count below the parent's, and the per-clone size matches what
-`expand_clones` was asked for.
+`validate_families` is records-only. It checks that a clonal batch is
+not mixed with non-clonal records and, when `truth_v_call`,
+`truth_d_call`, and `truth_j_call` are present, that those
+recombination-time truth calls are invariant within each `clone_id`
+group.
 
-When you also want to check that each family's *parent* record
-itself validates against the cartridge (in addition to family
-invariants), use the parent-aware variant:
+For legacy `expand_clones(...)`, you can also compare each
+descendant against its actual parent `Outcome`:
 
 ```python
 report = result.validate_families_with_parents(refdata)
 ```
 
-This is the most expensive of the three — it runs the per-record
-validator on every parent before checking family invariants — but
-it's the strongest gate. Use it in release-tier CI.
+`clonal_repertoire` and `clonal_lineage` do not expose
+`result.parents`, so `validate_families_with_parents` is not their
+primary validator. For `clonal_lineage`, validate the tree objects
+directly with `tree.validate()` when topology matters.
 
 ## What `validate_records` does NOT do
 
diff --git a/src/GenAIRR/_compiled.py b/src/GenAIRR/_compiled.py
index e6f8bf8..9c53d6d 100644
--- a/src/GenAIRR/_compiled.py
+++ b/src/GenAIRR/_compiled.py
@@ -24,7 +24,7 @@
     _describe_step_sequence,
     _format_active_contracts,
 )
-from ._pipeline_ir import _ClonalForkStep
+from ._pipeline_ir import _ClonalForkStep, _LineageForkStep, _RepertoireForkStep
 
 if TYPE_CHECKING:
     from .dataconfig import DataConfig
@@ -500,3 +500,383 @@ def __repr__(self) -> str:
             f"<CompiledClonalExperiment n_clones={self._fork.n_clones} "
             f"size={self._fork.size} chain={self._refdata.chain_type}>"
         )
+
+
+class CompiledLineageExperiment:
+    """A compiled experiment that grows BCR affinity-maturation lineage trees.
+
+    Wraps a pre-fork :class:`GenAIRR._engine.CompiledSimulator` (founder
+    recombination) and a :class:`~GenAIRR._pipeline_ir._LineageForkStep`
+    (lineage parameters). :meth:`run_records` grows one lineage tree per
+    clone via the Rust ``simulate_family_outcomes`` kernel and returns a
+    :class:`~GenAIRR.result.SimulationResultWithLineages` whose records are
+    per-observed-node AIRR dicts with lineage metadata.
+    """
+
+    __slots__ = (
+        "_pre",
+        "_step",
+        "_refdata",
+        "_post",
+        "_post_steps",
+        "_dataconfig",
+        "_metadata",
+    )
+
+    def __init__(
+        self,
+        pre_simulator: "_engine.CompiledSimulator",
+        step: "_LineageForkStep",
+        refdata: "_engine.RefDataConfig",
+        post_simulator: Optional["_engine.CompiledSimulator"] = None,
+        post_steps: Sequence[Any] = (),
+        dataconfig: Optional["DataConfig"] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> None:
+        self._pre = pre_simulator
+        self._step = step
+        self._refdata = refdata
+        # Per-observed-cell library-prep / sequencing corruption plan.
+        # ``None`` keeps the pristine-read path (no artefacts applied).
+        self._post = post_simulator
+        self._post_steps: Tuple[Any, ...] = tuple(post_steps)
+        self._dataconfig = dataconfig
+        self._metadata = dict(metadata) if metadata else {}
+
+    @property
+    def refdata(self) -> "_engine.RefDataConfig":
+        return self._refdata
+
+    def run_records(
+        self,
+        *,
+        seed: int = 0,
+        strict: bool = False,
+        expose_provenance: bool = False,
+        validate_records: bool = False,
+    ) -> "SimulationResultWithLineages":
+        """Grow lineage trees and return per-observed-node AIRR records.
+
+        Each clone is seeded at ``seed + clone_idx * 1_000_000`` so
+        independent clones are reproducible and non-overlapping.
+
+        The returned :class:`SimulationResultWithLineages` carries the
+        per-record ``Outcome`` that produced each AIRR record on its
+        ``.outcomes`` attribute (index-aligned with ``.records``). On the
+        pristine-read path these are the per-observed-node SHM
+        ``Outcome`` objects (``fam.observed_outcomes()``); on the
+        corruption path they are the per-node merged
+        recombination+SHM+artefact ``Outcome`` objects. Since each node
+        ``Outcome`` is self-consistent, ``result.validate_records(refdata)``
+        passes.
+
+        ``expose_provenance=True`` appends ``truth_v_call`` /
+        ``truth_d_call`` / ``truth_j_call`` columns to each record from
+        the founder allele assignments carried on the per-record
+        ``Outcome``.
+
+        ``validate_records=True`` runs
+        :meth:`SimulationResult.validate_records` (per-record
+        postcondition) and :meth:`SimulationResult.validate_families`
+        (clonal-family consistency by ``clone_id``) on the freshly built
+        batch, raising the matching validation error on any failure.
+        """
+        from GenAIRR import _engine as _eng
+        from ._airr_record import outcome_to_airr_record
+        from ._s5f_loader import load_builtin_s5f_kernel
+        from .result import SimulationResultWithLineages, _inject_truth_columns
+
+        step = self._step
+        mutability, substitution = load_builtin_s5f_kernel(step.s5f_model)
+
+        records: List[Dict[str, Any]] = []
+        # Per-record source ``Outcome`` objects, index-aligned with
+        # ``records`` so ``result.validate_records`` can re-derive each
+        # record from the engine state that produced it.
+        outcomes: List["_engine.Outcome"] = []
+        lineage_trees = []
+
+        # Bound on founder-survival retries. Sampling draws from the LIVING
+        # final-generation population, so an extinct founder (drew 0 offspring)
+        # yields zero observed cells. With ``allow_extinction=False`` (default)
+        # we condition each clone on survival by re-growing the family with a
+        # fresh deterministic sub-seed until it survives or we exhaust the
+        # bound. The sub-seed offset (a prime times the attempt index) keeps the
+        # retries deterministic — same top-level seed => same result.
+        _MAX_SURVIVAL_ATTEMPTS = 50
+        _SUBSEED_STRIDE = 7_919  # prime; keeps retry sub-seeds well-separated
+        allow_extinction = getattr(step, "allow_extinction", False)
+
+        for clone_idx in range(step.n_clones):
+            clone_seed = int(seed) + clone_idx * 1_000_000
+            fam = None
+            tree = None
+            for attempt in range(_MAX_SURVIVAL_ATTEMPTS):
+                family_seed = clone_seed + attempt * _SUBSEED_STRIDE
+                # _pre.run() returns a single Outcome (not a list) — mirror
+                # CompiledClonalExperiment which calls self._pre.run(seed=...).
+                founder = self._pre.run(seed=family_seed, strict=strict)
+                candidate = _eng.simulate_family_outcomes(
+                    founder,
+                    self._refdata,
+                    mutability,
+                    substitution,
+                    step.rate,
+                    step.lambda_base,
+                    0.0,      # lambda_mut: positional slot 6 (inert; hardcoded)
+                    step.max_generations,
+                    step.n_max,
+                    step.n_sample,
+                    family_seed,
+                    selection_strength=step.selection_strength,
+                    beta=step.beta,
+                    target_aa=step.target_aa,
+                    mature_substitutions=step.mature_substitutions,
+                )
+                survived = len(candidate.observed_outcomes()) > 0
+                if survived or allow_extinction:
+                    fam = candidate
+                    tree = candidate.tree()
+                    break
+                # extinct + survival required => retry with next sub-seed
+            else:
+                # Exhausted the retry bound without a surviving family.
+                raise ValueError(
+                    f"clonal_lineage: clone {clone_idx} went extinct on every "
+                    f"one of {_MAX_SURVIVAL_ATTEMPTS} survival attempts (each "
+                    "founder drew 0 offspring). Increase lambda_base (offspring "
+                    "Poisson mean) and/or max_generations so families reliably "
+                    "survive, or pass allow_extinction=True to accept extinct "
+                    "clones (producing fewer than n_clones families)."
+                )
+
+            if fam is None:
+                # allow_extinction=True and the final attempt was extinct:
+                # skip this clone (it contributes no observed cells/records).
+                continue
+
+            lineage_trees.append(tree)
+            if len(fam.observed_outcomes()) == 0:
+                # allow_extinction=True and this family is extinct: keep its
+                # (empty) tree for export parity but emit no records.
+                continue
+
+            if self._post is None:
+                # Pristine-read path: project each observed node's
+                # synthesized SHM Outcome straight to an AIRR record.
+                # ``observed_outcomes()`` is index-aligned with both the
+                # observed-node list and ``airr_records()``.
+                observed_nodes = [n for n in tree.nodes() if n.observed]
+                recs = fam.airr_records(self._refdata)
+                observed_outcomes = fam.observed_outcomes()
+                for node, rec, node_outcome in zip(
+                    observed_nodes, recs, observed_outcomes
+                ):
+                    self._stamp_lineage_metadata(rec, clone_idx, node)
+                    if expose_provenance and node_outcome is not None:
+                        _inject_truth_columns(node_outcome, self._refdata, rec)
+                    records.append(rec)
+                    outcomes.append(node_outcome)
+                continue
+
+            # Corruption path: per observed node, run the library-prep /
+            # sequencing corruption plan FROM the node's post-SHM
+            # simulation, then merge the founder-recombination + SHM
+            # provenance with the corruption trace / events so the AIRR
+            # record reports trims, v/d/j, SHM counts AND artefact
+            # counters (n_quality_errors, n_pcr_errors, n_indels, …).
+            node_outcomes = fam.node_outcomes()
+            for node, base_outcome in zip(tree.nodes(), node_outcomes):
+                if base_outcome is None:
+                    continue
+                node_seed = clone_seed + 1 + node.id
+                corruption_outcome = self._post.run_from(
+                    base_outcome.final_simulation(), node_seed, strict=strict
+                )
+                merged = _eng.merge_lineage_corruption(
+                    base_outcome, corruption_outcome
+                )
+                rec = outcome_to_airr_record(
+                    merged,
+                    self._refdata,
+                    sequence_id=f"clone{clone_idx}_node{node.id}",
+                )
+                self._stamp_lineage_metadata(rec, clone_idx, node)
+                if expose_provenance:
+                    _inject_truth_columns(merged, self._refdata, rec)
+                records.append(rec)
+                outcomes.append(merged)
+
+        result = SimulationResultWithLineages(
+            records, outcomes=outcomes, lineage_trees=lineage_trees
+        )
+        if validate_records:
+            from ._validation import (
+                _raise_on_family_validation_failure,
+                _raise_on_validation_failure,
+            )
+
+            _raise_on_validation_failure(result.validate_records(self._refdata))
+            _raise_on_family_validation_failure(result.validate_families())
+        return result
+
+    @staticmethod
+    def _stamp_lineage_metadata(rec: Dict[str, Any], clone_idx: int, node) -> None:
+        """Stamp clone + lineage-node provenance onto an AIRR record."""
+        rec["clone_id"] = clone_idx
+        rec["lineage_node_id"] = node.id
+        rec["lineage_parent_id"] = (
+            node.parent_id if node.parent_id is not None else -1
+        )
+        rec["lineage_generation"] = node.generation
+        rec["lineage_abundance"] = node.abundance
+        # AIRR-standard abundance field that abundance-aware tools (Change-O /
+        # SCOPer / dowser) read. Mirror lineage_abundance so the genotype-
+        # collapsed observed cell carries its represented count both ways.
+        rec["duplicate_count"] = node.abundance
+        rec["lineage_affinity"] = node.affinity
+        rec["sequence_id"] = f"clone{clone_idx}_node{node.id}"
+
+    def __repr__(self) -> str:
+        return (
+            f"<CompiledLineageExperiment n_clones={self._step.n_clones} "
+            f"max_gen={self._step.max_generations} chain={self._refdata.chain_type}>"
+        )
+
+
+class CompiledRepertoireExperiment:
+    """A compiled non-tree clonal-repertoire experiment.
+
+    Wraps a pre-fork :class:`GenAIRR._engine.CompiledSimulator` (the
+    founder recombination, run once per clone) and an optional
+    post-fork simulator (the per-read library-prep / sequencing passes).
+    Per clone a size is drawn from a heavy-tailed distribution via
+    ``_engine.sample_clone_sizes``; that many reads are emitted through
+    the post-fork passes and identical reads are genotype-collapsed into
+    AIRR records carrying a standard ``duplicate_count``.
+
+    When there are no post-fork passes every read of a clone is
+    identical, so the clone collapses to a single record whose
+    ``duplicate_count`` equals the drawn size (the no-corruption
+    shortcut). Every record carries an integer ``clone_id``.
+    """
+
+    __slots__ = (
+        "_pre",
+        "_post",
+        "_step",
+        "_refdata",
+        "_dataconfig",
+        "_metadata",
+    )
+
+    def __init__(
+        self,
+        pre_simulator: "_engine.CompiledSimulator",
+        post_simulator: Optional["_engine.CompiledSimulator"],
+        step: "_RepertoireForkStep",
+        refdata: "_engine.RefDataConfig",
+        dataconfig: Optional["DataConfig"] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> None:
+        self._pre = pre_simulator
+        self._post = post_simulator
+        self._step = step
+        self._refdata = refdata
+        self._dataconfig = dataconfig
+        self._metadata = dict(metadata) if metadata else {}
+
+    @property
+    def refdata(self) -> "_engine.RefDataConfig":
+        return self._refdata
+
+    def run_records(
+        self,
+        *,
+        seed: int = 0,
+        strict: bool = False,
+        validate_records: bool = False,
+        expose_provenance: bool = False,
+    ) -> "SimulationResult":
+        """Draw per-clone sizes, emit reads through the post-fork passes,
+        collapse identical reads, and return a :class:`SimulationResult`
+        whose record dicts carry ``clone_id`` and ``duplicate_count``.
+        """
+        import GenAIRR._engine as _engine
+
+        from ._airr_record import outcome_to_airr_record
+        from .result import SimulationResult, _inject_truth_columns
+
+        step = self._step
+        sizes = _engine.sample_clone_sizes(
+            step.n_clones,
+            int(seed),
+            kind=step.size_distribution,
+            exponent=step.exponent,
+            mu=step.mu,
+            sigma=step.sigma,
+            max_size=step.max_size,
+            unexpanded_fraction=step.unexpanded_fraction,
+        )
+        records: List[Dict[str, Any]] = []
+        outcomes: List["_engine.Outcome"] = []
+        for clone_idx, size in enumerate(sizes):
+            clone_seed = int(seed) + clone_idx * 1_000_000
+            founder = self._pre.run(seed=clone_seed, strict=strict)
+            founder_sim = founder.final_simulation()
+            if self._post is None:
+                # No post-fork passes: all ``size`` copies are
+                # identical -> one record with duplicate_count = size.
+                rec = outcome_to_airr_record(
+                    founder,
+                    self._refdata,
+                    sequence_id=f"clone{clone_idx}_read0",
+                )
+                rec["clone_id"] = clone_idx
+                rec["duplicate_count"] = int(size)
+                if expose_provenance:
+                    _inject_truth_columns(founder, self._refdata, rec)
+                records.append(rec)
+                outcomes.append(founder)
+                continue
+            # Post-fork passes present: simulate ``size`` reads and
+            # collapse by emitted sequence.
+            by_seq: Dict[str, list] = {}
+            for read_idx in range(int(size)):
+                desc = self._post.run_from(
+                    founder_sim, clone_seed + 1 + read_idx, strict=strict
+                )
+                rec = outcome_to_airr_record(
+                    desc,
+                    self._refdata,
+                    sequence_id=f"clone{clone_idx}_read{read_idx}",
+                )
+                key = rec["sequence"]
+                if key in by_seq:
+                    by_seq[key][0] += 1
+                else:
+                    by_seq[key] = [1, desc, rec]
+            for count, desc, rec in by_seq.values():
+                rec["clone_id"] = clone_idx
+                rec["duplicate_count"] = count
+                if expose_provenance:
+                    _inject_truth_columns(desc, self._refdata, rec)
+                records.append(rec)
+                outcomes.append(desc)
+        result = SimulationResult(records, outcomes=outcomes)
+        if validate_records:
+            from ._validation import (
+                _raise_on_family_validation_failure,
+                _raise_on_validation_failure,
+            )
+
+            _raise_on_validation_failure(result.validate_records(self._refdata))
+            _raise_on_family_validation_failure(result.validate_families())
+        return result
+
+    def __repr__(self) -> str:
+        return (
+            f"<CompiledRepertoireExperiment n_clones={self._step.n_clones} "
+            f"size_distribution={self._step.size_distribution!r} "
+            f"chain={self._refdata.chain_type}>"
+        )
diff --git a/src/GenAIRR/_pipeline_ir.py b/src/GenAIRR/_pipeline_ir.py
index 0a69856..ac7b6c7 100644
--- a/src/GenAIRR/_pipeline_ir.py
+++ b/src/GenAIRR/_pipeline_ir.py
@@ -188,6 +188,54 @@ class _ClonalForkStep:
     size: int
 
 
+@dataclass(frozen=True)
+class _RepertoireForkStep:
+    """Marks a non-tree clonal-repertoire fork.
+
+    Generalizes :class:`_ClonalForkStep`: instead of a fixed
+    ``per_clone`` size, each clone draws a size from a heavy-tailed
+    distribution (with an unexpanded-singleton fraction). That many
+    reads pass through the post-fork library-prep / sequencing passes
+    and identical reads collapse into AIRR records carrying a standard
+    ``duplicate_count``.
+
+    Causes :meth:`Experiment.compile` to return a
+    :class:`~GenAIRR._compiled.CompiledRepertoireExperiment`.
+    """
+
+    n_clones: int
+    size_distribution: str
+    exponent: float
+    mu: float
+    sigma: float
+    max_size: int
+    unexpanded_fraction: float
+
+
+@dataclass(frozen=True)
+class _LineageForkStep:
+    """Marks an affinity-maturation lineage fork.
+
+    Causes :meth:`Experiment.compile` to return a
+    :class:`~GenAIRR._compiled.CompiledLineageExperiment` that grows BCR
+    clonal lineage trees via the Rust ``simulate_family_outcomes`` kernel
+    and returns per-observed-node AIRR records with lineage metadata.
+    """
+
+    n_clones: int
+    max_generations: int
+    n_max: int
+    n_sample: int
+    rate: float
+    lambda_base: float
+    selection_strength: float
+    beta: float
+    target_aa: Optional[str]
+    mature_substitutions: int
+    s5f_model: str
+    allow_extinction: bool = False
+
+
 @dataclass(frozen=True)
 class _PairedEndStep:
     """One ``paired_end(r1_length=…, r2_length=…, insert_size=…)``
diff --git a/src/GenAIRR/experiment.py b/src/GenAIRR/experiment.py
index eb34e74..88cee7a 100644
--- a/src/GenAIRR/experiment.py
+++ b/src/GenAIRR/experiment.py
@@ -49,7 +49,12 @@
     _lower_recombine,
     lower_step,
 )
-from ._compiled import CompiledClonalExperiment, CompiledExperiment
+from ._compiled import (
+    CompiledClonalExperiment,
+    CompiledExperiment,
+    CompiledLineageExperiment,
+    CompiledRepertoireExperiment,
+)
 from ._refdata_resolver import (
     _CONFIG_ALIASES,
     ExperimentInput,
@@ -69,10 +74,12 @@
     _ClonalForkStep,
     _CorruptStep,
     _InvertDStep,
+    _LineageForkStep,
     _MutateStep,
     _PairedEndStep,
-    _ReceptorRevisionStep,
     _RecombineStep,
+    _ReceptorRevisionStep,
+    _RepertoireForkStep,
 )
 
 
@@ -330,8 +337,8 @@ def _coerce(label_for_error: str, value: object) -> float:
 # collapses descendant diversity (every clone member shares an
 # identical effect because the pass ran once on the parent IR).
 #
-# :meth:`Experiment.expand_clones` scans the already-appended step
-# list against this table; the first match is rejected with a
+# The clonal fork methods scan the already-appended step list against
+# this table; the first match is rejected with a
 # message naming the offending DSL method and the canonical fix.
 #
 # Each entry is ``(predicate, dsl_method_name)`` where the predicate
@@ -343,9 +350,9 @@ def _descendant_phase_step_classifier(step):
     """Return the DSL method name a descendant-phase ``step`` came
     from, or ``None`` if ``step`` is not a descendant-phase step.
 
-    Single source of truth for the unified guard in
-    :meth:`Experiment.expand_clones`. Adding a new descendant-phase
-    DSL method means appending a clause here (and adding the
+    Single source of truth for the unified guard in the flat clonal
+    fork methods. Adding a new descendant-phase DSL method means
+    appending a clause here (and adding the
     companion spec test in
     ``tests/test_clonal_descendant_phase_guards.py``).
     """
@@ -968,6 +975,13 @@ def expand_clones(
         records automatically. Passing ``n`` is allowed only when
         ``n == n_clones * per_clone``.
 
+        .. deprecated::
+            Use :meth:`clonal_lineage` for BCR affinity-maturation
+            trees, or :meth:`clonal_repertoire` for TCR / flat-BCR
+            abundance repertoires with clone-size distributions.
+            ``expand_clones`` remains supported for fixed-size flat
+            star expansion.
+
         Constraints:
         - Both ``n_clones`` and ``per_clone`` must be positive ints.
         - At most one expansion per pipeline; calling this method
@@ -980,15 +994,28 @@ def expand_clones(
         shares the same recombination provenance (V allele, trim,
         NP bases) and only diverges through the post-fork passes.
         """
+        warnings.warn(
+            "Experiment.expand_clones() is deprecated. Use "
+            "Experiment.clonal_lineage() for BCR affinity-maturation trees "
+            "(internal SHM, lineage_trees, no per_clone) or "
+            "Experiment.clonal_repertoire() for TCR / flat-BCR abundance "
+            "repertoires with clone-size distributions and duplicate_count. "
+            "Neither is a drop-in replacement for fixed per_clone output; "
+            "expand_clones() remains supported for legacy fixed-size flat "
+            "star expansion.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
         if not isinstance(n_clones, int) or isinstance(n_clones, bool) or n_clones < 1:
             raise ValueError(
                 f"n_clones must be a positive int, got {n_clones!r}"
             )
         if not isinstance(per_clone, int) or isinstance(per_clone, bool) or per_clone < 1:
             raise ValueError(f"per_clone must be a positive int, got {per_clone!r}")
-        if any(isinstance(s, _ClonalForkStep) for s in self._steps):
+        if self._has_clonal_fork():
             raise ValueError(
-                "expand_clones() can only be called once per pipeline"
+                "expand_clones() / clonal_lineage() / clonal_repertoire() "
+                "can only be called once per pipeline"
             )
         # Unified descendant-phase ordering guard. Scan the appended
         # step list for any step that came from a descendant-phase
@@ -1023,6 +1050,373 @@ def expand_clones(
         self._steps.append(_ClonalForkStep(n_clones=n_clones, size=per_clone))
         return self
 
+    def clonal_repertoire(
+        self,
+        *,
+        n_clones: int,
+        size_distribution: str = "power_law",
+        exponent: float = 2.0,
+        mu: float = 1.0,
+        sigma: float = 1.0,
+        max_size: int = 1000,
+        unexpanded_fraction: float = 0.0,
+    ) -> "Experiment":
+        """Expand the pipeline into a non-tree clonal **repertoire**.
+
+        This is the non-tree clonal model (contrast with
+        :meth:`clonal_lineage`, which grows real affinity-maturation
+        lineage *trees*). It generalizes the deprecated
+        :meth:`expand_clones`: instead of a fixed ``per_clone`` size,
+        each clone draws a size from a heavy-tailed distribution (with
+        an unexpanded-singleton fraction). That many reads pass through
+        the post-fork library-prep / sequencing passes, and identical
+        reads are genotype-collapsed into AIRR records carrying a
+        standard ``duplicate_count`` that records abundance.
+
+        Like :meth:`expand_clones`, this call marks the per-clone /
+        per-read boundary: steps appended *before* it run **once per
+        clone** (typically just :meth:`recombine`, establishing the
+        clonal V/D/J + trim + NP backbone); steps appended *after* it
+        run **once per read** (the library-prep / sequencing passes
+        that introduce per-read divergence).
+
+        Concrete shape::
+
+            exp = (Experiment.on("human_igh")
+                   .recombine()
+                   .clonal_repertoire(n_clones=200, max_size=500,
+                                      unexpanded_fraction=0.3)
+                   .sequencing_errors(rate=0.005))
+            result = exp.run_records(seed=0)
+            # Each record carries a `clone_id` and a `duplicate_count`.
+
+        Parameters:
+        - ``n_clones`` — number of clones (positive int).
+        - ``size_distribution`` — ``"power_law"`` or ``"lognormal"``.
+        - ``exponent`` — power-law exponent (``> 0``; used when
+          ``size_distribution="power_law"``).
+        - ``mu`` / ``sigma`` — lognormal parameters (``sigma >= 0``;
+          used when ``size_distribution="lognormal"``).
+        - ``max_size`` — clamps the largest clone. Because the total
+          number of reads simulated is roughly the **sum** of the drawn
+          sizes when post-fork passes are present, keep ``max_size``
+          modest to bound runtime.
+        - ``unexpanded_fraction`` — fraction of clones forced to size 1
+          (unexpanded singletons), in ``[0, 1]``.
+
+        TCR works out of the box (no SHM): the reads diverge only
+        through the post-fork sequencing passes. Note that ``mutate``
+        after this call is rejected on TCR by :meth:`mutate`'s own TCR
+        guard; on BCR an optional post-fork ``mutate`` adds flat SHM.
+
+        **No-corruption shortcut:** a clone with no post-fork passes
+        emits identical copies, so it collapses to a single record whose
+        ``duplicate_count`` equals the drawn size.
+
+        Constraints:
+        - At most one fork per pipeline — calling this when an
+          :meth:`expand_clones`, :meth:`clonal_lineage`, or
+          :meth:`clonal_repertoire` fork is already present raises
+          ``ValueError``.
+        - The same descendant-phase ordering guard as
+          :meth:`expand_clones` applies: a descendant-phase step (e.g.
+          ``.mutate()``) appended *before* this call is rejected;
+          :meth:`recombine` is fine.
+        """
+        if not isinstance(n_clones, int) or isinstance(n_clones, bool) or n_clones < 1:
+            raise ValueError(
+                f"n_clones must be a positive int, got {n_clones!r}"
+            )
+        if size_distribution not in ("power_law", "lognormal"):
+            raise ValueError(
+                "size_distribution must be 'power_law' or 'lognormal', got "
+                f"{size_distribution!r}"
+            )
+        if not (isinstance(exponent, (int, float)) and exponent > 0):
+            raise ValueError(f"exponent must be > 0, got {exponent!r}")
+        if not (isinstance(sigma, (int, float)) and sigma >= 0):
+            raise ValueError(f"sigma must be >= 0, got {sigma!r}")
+        if not isinstance(max_size, int) or isinstance(max_size, bool) or max_size < 1:
+            raise ValueError(f"max_size must be a positive int, got {max_size!r}")
+        if not (
+            isinstance(unexpanded_fraction, (int, float))
+            and 0.0 <= unexpanded_fraction <= 1.0
+        ):
+            raise ValueError(
+                "unexpanded_fraction must be in [0, 1], got "
+                f"{unexpanded_fraction!r}"
+            )
+        if any(
+            isinstance(s, (_ClonalForkStep, _RepertoireForkStep, _LineageForkStep))
+            for s in self._steps
+        ):
+            raise ValueError(
+                "clonal_repertoire() / expand_clones() / clonal_lineage() "
+                "can only be called once per pipeline"
+            )
+        # Same descendant-phase ordering guard as expand_clones: a
+        # descendant-phase step (mutate / corruption / paired_end)
+        # appended before the fork is rejected.
+        for step in self._steps:
+            offending_method = _descendant_phase_step_classifier(step)
+            if offending_method is None:
+                continue
+            if offending_method == "mutate":
+                detail = (
+                    "SHM is descendant-specific in GenAIRR's current "
+                    "clonal model"
+                )
+            else:
+                detail = (
+                    "it is descendant-specific and must be sampled "
+                    "independently for each read"
+                )
+            raise ValueError(
+                f"{offending_method} must be called after "
+                f"clonal_repertoire(); {detail}. Move "
+                f"{offending_method}(...) after clonal_repertoire(...)."
+            )
+        self._steps.append(
+            _RepertoireForkStep(
+                n_clones=n_clones,
+                size_distribution=size_distribution,
+                exponent=float(exponent),
+                mu=float(mu),
+                sigma=float(sigma),
+                max_size=max_size,
+                unexpanded_fraction=float(unexpanded_fraction),
+            )
+        )
+        return self
+
+    def clonal_lineage(
+        self,
+        *,
+        n_clones: int,
+        max_generations: int = 10,
+        n_max: int = 1000,
+        n_sample: int = 50,
+        rate: float = 0.05,
+        lambda_base: float = 1.5,
+        selection_strength: float = 0.0,
+        beta: float = 1.0,
+        target_aa: Optional[str] = None,
+        mature_substitutions: int = 5,
+        s5f_model: str = "hh_s5f",
+        allow_extinction: bool = False,
+    ) -> "Experiment":
+        """Grow BCR lineage trees (neutral by default; set ``selection_strength > 0``
+        and optionally ``target_aa`` to enable affinity maturation).
+
+        Each clone gets its own lineage tree produced by the Rust
+        ``simulate_family_outcomes`` kernel. The returned
+        :class:`~GenAIRR.result.SimulationResultWithLineages` carries:
+
+        - ``.records`` — one AIRR dict per *observed* (genotype-collapsed) cell,
+          tagged with ``clone_id``, ``lineage_node_id``, ``lineage_parent_id``,
+          ``lineage_generation``, ``lineage_abundance``, and
+          ``lineage_affinity``. Because identical genotypes are collapsed before
+          sampling, the number of records per clone is ≤ ``n_sample``; the
+          ``lineage_abundance`` field accounts for how many sampled cells were
+          represented by each observed record. Mutation counts (``n_mutations``,
+          ``n_v_mutations``, …) are pool-derived and self-consistent.
+        - ``.lineage_trees`` — one :class:`~GenAIRR._engine.LineageTree`
+          per clone for ground-truth export (Newick, FASTA, node table TSV).
+
+        Parameters
+        ----------
+        n_clones:
+            Number of independent clonal lineages to grow.
+        max_generations:
+            Maximum depth of the lineage tree (≤ 1000).
+        n_max:
+            Per-generation LIVING-population carrying capacity: the live
+            population per generation is capped at this (the tree can contain
+            more total nodes across generations). It is NOT a hard cap on the
+            total number of cells per clone.
+        n_sample:
+            Number of cells to sample as observed leaves. Records returned
+            per clone are ≤ ``n_sample`` because identical genotypes are
+            collapsed (duplicates are counted in ``lineage_abundance``).
+        rate:
+            Per-base SHM rate for within-lineage mutations.
+        lambda_base:
+            Poisson mean for offspring count at affinity 0.
+        selection_strength:
+            Selection pressure; ``0.0`` = neutral drift (fitness is 1.0 for
+            every cell). This disables selection but does not force
+            ``lineage_affinity`` to 0 when a target sequence is supplied.
+            Set ``> 0`` to enable affinity maturation; combine with
+            ``target_aa`` for a fixed sequence target.
+        beta:
+            Scaling factor for the affinity term in ``exp(−beta·distance)``.
+        target_aa:
+            Amino-acid sequence of the full receptor used to compute
+            per-cell affinity via a BLOSUM62-weighted distance (compared
+            position-wise against the cell's translated receptor; only
+            the overlapping prefix is scored when lengths differ). Must be
+            a non-empty string of standard amino-acid letters
+            (``ACDEFGHIKLMNPQRSTVWY``). When ``None``, an auto target is
+            generated from the founder by applying ``mature_substitutions``
+            random residue changes whenever selection is enabled. In fully
+            neutral mode (``selection_strength=0`` and ``target_aa=None``),
+            no affinity model is built and ``lineage_affinity`` is 0.
+        mature_substitutions:
+            Number of amino-acid substitutions used to build the auto
+            target (when ``target_aa`` is ``None``).
+        s5f_model:
+            Bundled S5F kernel name for within-lineage mutation context
+            (``"hh_s5f"``, ``"hkl_s5f"``, …).
+        allow_extinction:
+            Sampling draws from the LIVING final-generation population, so a
+            founder that draws 0 offspring goes extinct and yields zero
+            observed cells/records. With ``allow_extinction=False`` (default)
+            each requested clone is conditioned on survival: an extinct family
+            is retried with a fresh deterministic sub-seed (up to a bounded
+            number of attempts) so you reliably get ``n_clones`` families. With
+            ``allow_extinction=True`` extinction is accepted and the extinct
+            clone is skipped, producing fewer families than ``n_clones``.
+
+        **BCR-only guard:** ``clonal_lineage`` applies S5F somatic
+        hypermutation, which is a B-cell process. Calling it on a TCR-configured
+        experiment raises ``ValueError`` (immunoglobulin / BCR loci only). TCR
+        clone-size primitives exist in the engine but are not yet exposed as a
+        DSL workflow.
+        """
+        import math
+        import warnings
+
+        # --- BCR-only guard (mirror mutate()'s TCR rejection) ---
+        # clonal_lineage applies S5F somatic hypermutation, a B-cell process,
+        # so it must reject TCR loci. ``_is_tcr_refdata`` inspects the first V
+        # allele name prefix (TR* => TCR, IG* => BCR) on the already-bound
+        # refdata, exactly as mutate() does. Firing here, at call time and
+        # before compile(), guarantees the clear BCR-only message instead of a
+        # downstream cartridge / compile error.
+        if self._is_tcr_refdata():
+            locus = self._refdata.v_allele(0).name if self._refdata.v_pool_size() else "?"
+            raise ValueError(
+                "clonal_lineage models B-cell somatic hypermutation and "
+                "supports immunoglobulin (BCR) loci only; the locus "
+                f"'{locus}' is a TCR locus. (TCR clone-size simulation is not "
+                "yet exposed in the DSL.)"
+            )
+        # --- allow_extinction ---
+        if not isinstance(allow_extinction, bool):
+            raise ValueError(
+                f"allow_extinction must be a bool, got {allow_extinction!r}"
+            )
+
+        # --- n_clones ---
+        if isinstance(n_clones, bool) or not isinstance(n_clones, int) or n_clones < 1:
+            raise ValueError(f"n_clones must be a positive int, got {n_clones!r}")
+        # --- max_generations ---
+        if (
+            isinstance(max_generations, bool)
+            or not isinstance(max_generations, int)
+            or max_generations < 1
+            or max_generations > 1000
+        ):
+            raise ValueError(
+                f"max_generations must be a positive int <= 1000, got {max_generations!r}"
+            )
+        # --- n_max ---
+        if isinstance(n_max, bool) or not isinstance(n_max, int) or n_max < 1:
+            raise ValueError(f"n_max must be a positive int, got {n_max!r}")
+        # --- n_sample ---
+        if isinstance(n_sample, bool) or not isinstance(n_sample, int) or n_sample < 1:
+            raise ValueError(f"n_sample must be a positive int, got {n_sample!r}")
+        # --- rate ---
+        if not isinstance(rate, (int, float)) or not math.isfinite(rate) or rate < 0 or rate > 1:
+            raise ValueError(f"rate must be a float in [0, 1], got {rate!r}")
+        # --- lambda_base ---
+        if not isinstance(lambda_base, (int, float)) or not math.isfinite(lambda_base) or lambda_base < 0:
+            raise ValueError(f"lambda_base must be a finite non-negative float, got {lambda_base!r}")
+        # --- beta ---
+        if not isinstance(beta, (int, float)) or not math.isfinite(beta) or beta < 0:
+            raise ValueError(f"beta must be a finite non-negative float, got {beta!r}")
+        # --- selection_strength ---
+        if not isinstance(selection_strength, (int, float)) or not math.isfinite(selection_strength) or selection_strength < 0:
+            raise ValueError(
+                f"selection_strength must be a finite non-negative float, got {selection_strength!r}"
+            )
+        # --- mature_substitutions ---
+        if (
+            isinstance(mature_substitutions, bool)
+            or not isinstance(mature_substitutions, int)
+            or mature_substitutions < 0
+        ):
+            raise ValueError(
+                f"mature_substitutions must be a non-negative int, got {mature_substitutions!r}"
+            )
+        # --- target_aa ---
+        _VALID_AA = set("ACDEFGHIKLMNPQRSTVWY")
+        if target_aa is not None:
+            if not isinstance(target_aa, str) or len(target_aa) == 0:
+                raise ValueError(
+                    "target_aa must be a non-empty amino-acid string "
+                    "(letters from ACDEFGHIKLMNPQRSTVWY)"
+                )
+            target_aa = target_aa.upper()
+            invalid = set(target_aa) - _VALID_AA
+            if invalid:
+                raise ValueError(
+                    f"target_aa contains invalid characters {sorted(invalid)!r}; "
+                    "only standard amino-acid letters (ACDEFGHIKLMNPQRSTVWY) are allowed"
+                )
+            if len(target_aa) < 30:
+                warnings.warn(
+                    f"target_aa has length {len(target_aa)}, which is shorter than a typical "
+                    "receptor sequence (~300+ aa). If this is an epitope sequence rather than "
+                    "the full receptor, affinity scoring will be based only on the overlapping "
+                    "prefix — consider supplying the full translated receptor instead.",
+                    UserWarning,
+                    stacklevel=2,
+                )
+        # --- s5f_model (validate at call time, not at run time) ---
+        from GenAIRR._s5f_loader import _BUILTIN_S5F_MODELS
+        _s5f_key = s5f_model.lower().strip()
+        if _s5f_key not in _BUILTIN_S5F_MODELS:
+            avail = ", ".join(f'"{k}"' for k in sorted(_BUILTIN_S5F_MODELS))
+            raise ValueError(
+                f"Unknown s5f_model {s5f_model!r}. Available: {avail}"
+            )
+        # --- reject duplicate fork steps ---
+        for s in self._steps:
+            if isinstance(s, (_ClonalForkStep, _RepertoireForkStep, _LineageForkStep)):
+                raise ValueError(
+                    "clonal_lineage() / expand_clones() / clonal_repertoire() "
+                    "can only be called once per pipeline"
+                )
+        # --- descendant-phase guard (same as expand_clones) ---
+        for step in self._steps:
+            offending_method = _descendant_phase_step_classifier(step)
+            if offending_method is None:
+                continue
+            raise ValueError(
+                f"{offending_method} must be called after "
+                f"clonal_lineage(); it is descendant-specific and must be sampled "
+                f"independently for each clone member. Move "
+                f"{offending_method}(...) after clonal_lineage(...)."
+            )
+        self._steps.append(
+            _LineageForkStep(
+                n_clones=n_clones,
+                max_generations=max_generations,
+                n_max=n_max,
+                n_sample=n_sample,
+                rate=rate,
+                lambda_base=lambda_base,
+                selection_strength=selection_strength,
+                beta=beta,
+                target_aa=target_aa,
+                mature_substitutions=mature_substitutions,
+                s5f_model=s5f_model,
+                allow_extinction=allow_extinction,
+            )
+        )
+        return self
+
     def mutate(
         self,
         *,
@@ -1181,17 +1575,20 @@ def _check_v_subregion_rates_satisfiable(
         )
 
     def _has_clonal_fork(self) -> bool:
-        """Whether :meth:`expand_clones` has already been appended.
+        """Whether any clonal fork has already been appended.
 
         Used by the DSL ordering guards on :meth:`invert_d`,
-        :meth:`receptor_revision`, and :meth:`expand_clones` itself.
-        Each step lowers into either the pre-fork (per-clone) or
-        the post-fork (per-descendant) plan; misordered calls used
-        to silently lower into the wrong half and produce records
-        with empty / default fields. The guards reject those
-        configurations at the DSL boundary.
+        :meth:`receptor_revision`, and the clonal fork methods.
+        Each fork has a pre-fork parent/founder phase; recombination-time
+        mechanisms must be inherited by every descendant, emitted copy,
+        or lineage node. Misordered calls used to lower into the wrong
+        half and produce records with empty / default fields. The guards
+        reject those configurations at the DSL boundary.
         """
-        return any(isinstance(s, _ClonalForkStep) for s in self._steps)
+        return any(
+            isinstance(s, (_ClonalForkStep, _RepertoireForkStep, _LineageForkStep))
+            for s in self._steps
+        )
 
     def _is_tcr_refdata(self) -> bool:
         """Detect whether the bound refdata is a TCR locus.
@@ -1659,10 +2056,11 @@ def invert_d(self, *, prob: float = 0.05) -> "Experiment":
         # even at prob=1.0. Reject at the DSL boundary instead.
         if self._has_clonal_fork():
             raise ValueError(
-                "invert_d must be called before expand_clones(); D "
+                "invert_d must be called before the clonal fork; D "
                 "inversion is a recombination-time decision and must "
                 "be inherited by all clone descendants. Move the "
-                "invert_d(...) call before expand_clones(...)."
+                "invert_d(...) call before clonal_lineage(...), "
+                "clonal_repertoire(...), or expand_clones(...)."
             )
         if not isinstance(prob, (int, float)):
             raise ValueError(
@@ -1757,10 +2155,11 @@ def receptor_revision(self, *, prob: float = 0.05) -> "Experiment":
         if self._has_clonal_fork():
             raise ValueError(
                 "receptor_revision must be called before "
-                "expand_clones(); receptor revision is a "
+                "the clonal fork; receptor revision is a "
                 "recombination-time decision and must be inherited by "
                 "all clone descendants. Move the "
-                "receptor_revision(...) call before expand_clones(...)."
+                "receptor_revision(...) call before clonal_lineage(...), "
+                "clonal_repertoire(...), or expand_clones(...)."
             )
         if not isinstance(prob, (int, float)):
             raise ValueError(
@@ -2227,6 +2626,124 @@ def compile(self, *, allow_curatable_refdata: Optional[bool] = None):
                 metadata=self._metadata,
             )
 
+        # if a `_RepertoireForkStep` is present, split the step list
+        # at it and compile the pre-fork (per-clone) simulator plus an
+        # optional post-fork (per-read) simulator, mirroring the
+        # `_ClonalForkStep` branch. Per-clone sizes are drawn at run
+        # time from the heavy-tailed distribution; identical reads are
+        # collapsed into `duplicate_count`-carrying records.
+        repertoire_idx = next(
+            (
+                i
+                for i, s in enumerate(self._steps)
+                if isinstance(s, _RepertoireForkStep)
+            ),
+            None,
+        )
+        if repertoire_idx is not None:
+            repertoire_step: _RepertoireForkStep = self._steps[repertoire_idx]
+            pre_steps = self._steps[:repertoire_idx]
+            post_steps = self._steps[repertoire_idx + 1 :]
+            pre_simulator = self._build_simulator(
+                pre_steps,
+                contracts,
+                any_lock,
+                replace_fn=_replace,
+                allow_curatable_refdata=allow_curatable_refdata,
+            )
+            # post_steps may be empty — that's the pure-copy case
+            # (each clone collapses to one record with
+            # duplicate_count = size). When present, the post-fork
+            # plan inherits the parent's V/D/J/NP backbone, so
+            # any_lock=False and no recombination facts on this side
+            # (same as the clonal branch).
+            post_simulator = None
+            if post_steps:
+                post_simulator = self._build_simulator(
+                    post_steps,
+                    contracts,
+                    any_lock=False,
+                    replace_fn=_replace,
+                    allow_curatable_refdata=allow_curatable_refdata,
+                )
+            return CompiledRepertoireExperiment(
+                pre_simulator,
+                post_simulator,
+                repertoire_step,
+                self._refdata,
+                dataconfig=self._dataconfig,
+                metadata=self._metadata,
+            )
+
+        # if a `_LineageForkStep` is present, compile a
+        # CompiledLineageExperiment: pre-fork steps (recombine) become
+        # the founder simulator; post-fork steps must be empty (the
+        # lineage engine handles mutation internally).
+        lineage_idx = next(
+            (i for i, s in enumerate(self._steps) if isinstance(s, _LineageForkStep)),
+            None,
+        )
+        if lineage_idx is not None:
+            lineage_step: _LineageForkStep = self._steps[lineage_idx]
+            pre_steps = self._steps[:lineage_idx]
+            post_steps = self._steps[lineage_idx + 1:]
+            # Steps after clonal_lineage() are per-observed-cell
+            # library-prep / sequencing artefact passes (the same
+            # post-fork set expand_clones() allows). SHM is internal
+            # to the lineage engine, so .mutate() is rejected; the
+            # paired-end read layout is not yet wired through the
+            # per-cell corruption merge, so reject it for now too.
+            for s in post_steps:
+                if isinstance(s, _MutateStep):
+                    raise ValueError(
+                        "SHM is internal to clonal_lineage; do not add "
+                        ".mutate() after it. Set the within-lineage SHM rate "
+                        "via clonal_lineage(rate=...)."
+                    )
+                if isinstance(s, _PairedEndStep):
+                    raise ValueError(
+                        "paired_end not yet supported with clonal_lineage; "
+                        "apply per-read library-prep passes (sequencing_errors, "
+                        "pcr_amplify, polymerase_indels, end_loss_*, "
+                        "ambiguous_base_calls, random_strand_orientation) instead."
+                    )
+                if not isinstance(s, _CorruptStep):
+                    raise ValueError(
+                        "Only per-read library-prep / sequencing artefact passes "
+                        "may follow clonal_lineage(); got "
+                        f"{type(s).__name__}."
+                    )
+            pre_simulator = self._build_simulator(
+                pre_steps,
+                contracts,
+                any_lock,
+                replace_fn=_replace,
+                allow_curatable_refdata=allow_curatable_refdata,
+            )
+            # Build the per-cell corruption simulator from the
+            # post-fork steps, mirroring the clonal branch's
+            # post_simulator (no recombination facts on this side, so
+            # any_lock=False and the analyzer skips the productive
+            # precondition check).
+            post_simulator = None
+            if post_steps:
+                post_simulator = self._build_simulator(
+                    post_steps,
+                    contracts,
+                    any_lock=False,
+                    replace_fn=_replace,
+                    allow_curatable_refdata=allow_curatable_refdata,
+                )
+            return CompiledLineageExperiment(
+                pre_simulator,
+                lineage_step,
+                self._refdata,
+                post_simulator=post_simulator,
+                post_steps=tuple(post_steps),
+                dataconfig=self._dataconfig,
+                metadata=self._metadata,
+            )
+
         simulator = self._build_simulator(
             self._steps,
             contracts,
@@ -2374,6 +2891,33 @@ def run_records(
                 expose_provenance=expose_provenance,
                 validate_records=validate_records,
             )
+        elif isinstance(compiled, CompiledRepertoireExperiment):
+            if n is not None:
+                raise ValueError(
+                    "The 'n' parameter is not supported for clonal_repertoire "
+                    "experiments. The number of records depends on the per-clone "
+                    "sizes drawn from the heavy-tailed distribution and the "
+                    "read-collapse, not a fixed product."
+                )
+            result = compiled.run_records(
+                seed=seed,
+                strict=strict,
+                expose_provenance=expose_provenance,
+                validate_records=validate_records,
+            )
+        elif isinstance(compiled, CompiledLineageExperiment):
+            if n is not None:
+                raise ValueError(
+                    "The 'n' parameter is not supported for clonal_lineage experiments. "
+                    "The number of observed records depends on the lineage trees "
+                    "grown from n_clones / n_sample / selection, not a fixed product."
+                )
+            result = compiled.run_records(
+                seed=seed,
+                strict=strict,
+                expose_provenance=expose_provenance,
+                validate_records=validate_records,
+            )
         else:
             result = compiled.run_records(
                 n=1 if n is None else n,
@@ -2539,4 +3083,3 @@ def _steps_with_locks_resolved(self) -> List[Any]:
 
     def __repr__(self) -> str:
         return f"<Experiment chain={self.chain_type} steps={self.step_count}>"
-
diff --git a/src/GenAIRR/result.py b/src/GenAIRR/result.py
index f5d8054..b2baabc 100644
--- a/src/GenAIRR/result.py
+++ b/src/GenAIRR/result.py
@@ -1318,3 +1318,38 @@ def _write_delimited(
                 # don't carry literal ``"None"`` strings.
                 row = {k: ("" if v is None else v) for k, v in source.items()}
                 writer.writerow(row)
+
+
+class SimulationResultWithLineages(SimulationResult):
+    """A :class:`SimulationResult` that also carries per-clone lineage trees.
+
+    Produced by :meth:`CompiledLineageExperiment.run_records`. Adds a
+    ``.lineage_trees`` property that exposes the raw
+    :class:`~GenAIRR._engine.LineageTree` objects (one per clone) for
+    ground-truth export via ``.to_newick()``, ``.to_fasta()``, and
+    ``.to_node_table_tsv()``.
+    """
+
+    __slots__ = ("_lineage_trees",)
+
+    def __init__(
+        self,
+        records: "Sequence[Dict[str, Any]]",
+        outcomes: "Optional[Sequence]" = None,
+        parents: "Optional[Sequence]" = None,
+        lineage_trees: "Optional[Sequence]" = None,
+    ) -> None:
+        super().__init__(records, outcomes, parents)
+        self._lineage_trees: "Optional[List]" = (
+            list(lineage_trees) if lineage_trees is not None else None
+        )
+
+    @property
+    def lineage_trees(self) -> "Optional[List]":
+        """Per-clone :class:`~GenAIRR._engine.LineageTree` objects, or ``None``.
+
+        Each tree supports ``.validate()``, ``.to_newick()``,
+        ``.to_fasta()``, and ``.to_node_table_tsv()`` for ground-truth
+        export and downstream phylogenetic analysis.
+        """
+        return self._lineage_trees
diff --git a/tests/test_clonal_dsl_ordering_guards.py b/tests/test_clonal_dsl_ordering_guards.py
index a7a0452..44a8042 100644
--- a/tests/test_clonal_dsl_ordering_guards.py
+++ b/tests/test_clonal_dsl_ordering_guards.py
@@ -70,7 +70,7 @@ def test_invert_d_after_expand_clones_raises() -> None:
         exp.invert_d(prob=1.0)
     msg = str(exc_info.value)
     # Must name BOTH the correct ordering and the biological reason.
-    assert "invert_d must be called before expand_clones" in msg, msg
+    assert "invert_d must be called before the clonal fork" in msg, msg
     assert "recombination" in msg.lower(), msg
     assert "inherited" in msg, msg
 
@@ -93,7 +93,7 @@ def test_receptor_revision_after_expand_clones_raises() -> None:
         exp.receptor_revision(prob=1.0)
     msg = str(exc_info.value)
     assert (
-        "receptor_revision must be called before expand_clones" in msg
+        "receptor_revision must be called before the clonal fork" in msg
     ), msg
     assert "recombination" in msg.lower(), msg
     assert "inherited" in msg, msg
@@ -225,7 +225,7 @@ def test_error_message_invert_d_names_recombination_time() -> None:
         exp.invert_d(prob=0.5)
     msg = str(exc_info.value)
     # The correct ordering.
-    assert "before expand_clones" in msg
+    assert "before the clonal fork" in msg
     # The biological reason — pin the phrase that names the
     # mechanism.
     assert re.search(r"recombination[- ]?time", msg.lower())
@@ -242,7 +242,7 @@ def test_error_message_receptor_revision_names_recombination_time() -> None:
     with pytest.raises(ValueError) as exc_info:
         exp.receptor_revision(prob=0.5)
     msg = str(exc_info.value)
-    assert "before expand_clones" in msg
+    assert "before the clonal fork" in msg
     assert re.search(r"recombination[- ]?time", msg.lower())
     assert "Move" in msg or "move" in msg
 
@@ -273,7 +273,7 @@ def test_pin_guarded_invert_d_post_fork_drop_cannot_reach_runtime() -> None:
     is rejected at the DSL boundary so the silent runtime
     behaviour is structurally unreachable. Pin the rejection
     rather than the broken runtime output."""
-    with pytest.raises(ValueError, match="before expand_clones"):
+    with pytest.raises(ValueError, match="before the clonal fork"):
         (
             ga.Experiment.on("human_igh")
             .recombine()
@@ -284,7 +284,7 @@ def test_pin_guarded_invert_d_post_fork_drop_cannot_reach_runtime() -> None:
 
 def test_pin_guarded_receptor_revision_post_fork_drop_cannot_reach_runtime() -> None:
     """Bug B's silent drop closed at the DSL boundary."""
-    with pytest.raises(ValueError, match="before expand_clones"):
+    with pytest.raises(ValueError, match="before the clonal fork"):
         (
             ga.Experiment.on("human_igh")
             .recombine()
@@ -349,3 +349,38 @@ def test_has_clonal_fork_helper_reflects_pipeline_state() -> None:
     assert base._has_clonal_fork() is False
     forked = base.expand_clones(n_clones=1, per_clone=1)
     assert forked._has_clonal_fork() is True
+
+
+# ──────────────────────────────────────────────────────────────────
+# A pipeline may contain at most one clonal fork — stacking any two
+# of expand_clones / clonal_lineage / clonal_repertoire raises a
+# clear "once per pipeline" error at DSL time (not a confusing
+# downstream TypeError at compile()).
+# ──────────────────────────────────────────────────────────────────
+
+
+@pytest.mark.parametrize(
+    "first, second",
+    [
+        ("clonal_repertoire", "clonal_lineage"),
+        ("clonal_lineage", "clonal_repertoire"),
+        ("expand_clones", "clonal_lineage"),
+        ("expand_clones", "clonal_repertoire"),
+        ("clonal_lineage", "expand_clones"),
+        ("clonal_repertoire", "expand_clones"),
+    ],
+)
+def test_two_clonal_forks_rejected_once_per_pipeline(first: str, second: str) -> None:
+    """Any second clonal fork is rejected with the canonical
+    "once per pipeline" message, regardless of which fork came
+    first."""
+    kwargs = {
+        "expand_clones": dict(n_clones=1, per_clone=2),
+        "clonal_lineage": dict(n_clones=1),
+        "clonal_repertoire": dict(n_clones=1),
+    }
+    exp = getattr(
+        ga.Experiment.on("human_igh").recombine(), first
+    )(**kwargs[first])
+    with pytest.raises(ValueError, match="once per pipeline"):
+        getattr(exp, second)(**kwargs[second])
diff --git a/tests/test_clonal_lineage_corruption.py b/tests/test_clonal_lineage_corruption.py
new file mode 100644
index 0000000..1de6e27
--- /dev/null
+++ b/tests/test_clonal_lineage_corruption.py
@@ -0,0 +1,76 @@
+"""Library-prep / sequencing artefact passes applied to clonal_lineage reads.
+
+``clonal_lineage`` produces per-observed-cell AIRR records. Prior to this
+slice the compiler rejected ALL steps following the lineage fork, so the
+reads were pristine — no sequencing / PCR / indel noise, a regression vs
+``expand_clones``. These tests pin the corruption-aware path: each observed
+cell now gets independent corruption, and its record reports BOTH the
+founder recombination provenance (trims, v/d/j) AND the SHM counts AND the
+corruption counts (``n_quality_errors`` etc.).
+"""
+import pytest
+
+import GenAIRR as ga
+
+
+def test_clonal_lineage_with_corruption_applies_artefacts():
+    exp = (
+        ga.Experiment.on("human_igh")
+        .recombine()
+        .clonal_lineage(
+            n_clones=3, max_generations=4, n_max=200, n_sample=20, rate=0.02
+        )
+        .sequencing_errors(rate=0.05)
+    )
+    result = exp.run_records(seed=0)
+    assert len(result.records) > 0
+    # Corruption was actually applied to at least one observed cell.
+    assert any(r.get("n_quality_errors", 0) > 0 for r in result.records)
+    for r in result.records:
+        # Founder recombination provenance survives the merge.
+        assert r["v_call"]
+        assert r["clone_id"] in (0, 1, 2)
+        assert "lineage_node_id" in r
+        # SHM per-segment counts stay self-consistent after corruption merge.
+        assert r["n_mutations"] == (
+            r["n_v_mutations"]
+            + r["n_d_mutations"]
+            + r["n_j_mutations"]
+            + r["n_np_mutations"]
+        )
+
+
+def test_clonal_lineage_rejects_mutate_after():
+    with pytest.raises(Exception):
+        (
+            ga.Experiment.on("human_igh")
+            .recombine()
+            .clonal_lineage(n_clones=2, n_sample=5)
+            .mutate(rate=0.05)
+            .compile()
+        )
+
+
+def test_clonal_lineage_rejects_paired_end_after():
+    with pytest.raises(Exception):
+        (
+            ga.Experiment.on("human_igh")
+            .recombine()
+            .clonal_lineage(n_clones=2, n_sample=5)
+            .paired_end(r1_length=150, insert_size=300)
+            .compile()
+        )
+
+
+def test_clonal_lineage_without_corruption_still_works():
+    result = (
+        ga.Experiment.on("human_igh")
+        .recombine()
+        .clonal_lineage(n_clones=2, n_sample=10, rate=0.02)
+        # seed chosen so the families survive: sampling draws from the living
+        # final generation, so an all-extinct seed (e.g. 0) yields zero records.
+        .run_records(seed=1)
+    )
+    assert len(result.records) > 0
+    # No corruption pass → no quality errors stamped.
+    assert all(r.get("n_quality_errors", 0) == 0 for r in result.records)
diff --git a/tests/test_clonal_lineage_dsl.py b/tests/test_clonal_lineage_dsl.py
new file mode 100644
index 0000000..5642792
--- /dev/null
+++ b/tests/test_clonal_lineage_dsl.py
@@ -0,0 +1,57 @@
+import pytest
+import GenAIRR as ga
+
+
+def _exp(**kw):
+    base = dict(n_clones=3, max_generations=6, n_max=200, n_sample=20,
+                rate=0.05, lambda_base=1.6)
+    base.update(kw)
+    return ga.Experiment.on("human_igh").recombine().clonal_lineage(**base)
+
+
+def test_clonal_lineage_runs_and_tags_records():
+    result = _exp().run_records(seed=0)
+    # Sampling draws from the LIVING final-generation population, so an extinct
+    # clone (founder drew 0 offspring) contributes no records. By default the
+    # founder-survival guard retries extinct clones with fresh deterministic
+    # sub-seeds, so every requested clone survives and all clone_ids are present.
+    assert len(result.records) > 0
+    cids = {r["clone_id"] for r in result.records}
+    assert cids == {0, 1, 2}
+    for r in result.records:
+        assert r["v_call"]            # real recombination provenance
+        assert r["sequence"]
+        assert "lineage_node_id" in r
+        assert "lineage_generation" in r
+        assert "lineage_abundance" in r
+        assert "lineage_affinity" in r
+        # mutation counts are self-consistent (pool-derived)
+        per_seg = (r["n_v_mutations"] + r["n_d_mutations"]
+                   + r["n_j_mutations"] + r["n_np_mutations"])
+        assert r["n_mutations"] == per_seg
+
+
+def test_clonal_lineage_exposes_per_clone_trees():
+    result = _exp(n_clones=2).run_records(seed=1)
+    trees = result.lineage_trees
+    assert trees is not None and len(trees) == 2
+    for t in trees:
+        t.validate()                  # raises if malformed
+        assert t.to_newick().endswith(";")
+        assert t.to_fasta().startswith(">node")
+
+
+def test_clonal_lineage_selection_raises_mean_affinity():
+    neutral = _exp(n_clones=2, selection_strength=0.0, target_aa="A" * 100,
+                   beta=0.001, max_generations=10, n_sample=40).run_records(seed=5)
+    selected = _exp(n_clones=2, selection_strength=50.0, target_aa="A" * 100,
+                    beta=0.001, max_generations=10, n_sample=40).run_records(seed=5)
+    def mean_aff(res):
+        a = [r["lineage_affinity"] for r in res.records]
+        return sum(a) / max(1, len(a))
+    assert mean_aff(selected) > mean_aff(neutral)
+
+
+def test_clonal_lineage_validation_rejects_bad_args():
+    with pytest.raises((ValueError, TypeError)):
+        ga.Experiment.on("human_igh").recombine().clonal_lineage(n_clones=0)
diff --git a/tests/test_clonal_lineage_validation.py b/tests/test_clonal_lineage_validation.py
new file mode 100644
index 0000000..4ae3267
--- /dev/null
+++ b/tests/test_clonal_lineage_validation.py
@@ -0,0 +1,294 @@
+"""Tests for clonal_lineage DSL validation (Fix 1–4)."""
+import pytest
+import GenAIRR as ga
+
+
+def _base_exp(**kw):
+    defaults = dict(n_clones=2, max_generations=4, n_max=100, n_sample=10,
+                    rate=0.03, lambda_base=1.5)
+    defaults.update(kw)
+    return ga.Experiment.on("human_igh").recombine().clonal_lineage(**defaults)
+
+
+# ---------------------------------------------------------------------------
+# Fix 1: lambda_mut is removed — passing it must be a TypeError
+# ---------------------------------------------------------------------------
+
+def test_lambda_mut_removed_raises_typeerror():
+    """lambda_mut is no longer a parameter; passing it must raise TypeError."""
+    with pytest.raises(TypeError):
+        _base_exp(lambda_mut=0.1)
+
+
+# ---------------------------------------------------------------------------
+# Fix 3: target_aa validation
+# ---------------------------------------------------------------------------
+
+def test_target_aa_empty_string_raises():
+    with pytest.raises(ValueError, match="non-empty amino-acid"):
+        _base_exp(target_aa="")
+
+
+def test_target_aa_invalid_chars_raises():
+    with pytest.raises(ValueError, match="invalid characters"):
+        _base_exp(target_aa="not-an-aa-$$")
+
+
+def test_target_aa_invalid_chars_mixed_raises():
+    with pytest.raises(ValueError, match="invalid characters"):
+        _base_exp(target_aa="ACDEFG1HIKLM")
+
+
+def test_target_aa_non_aa_chars_raises():
+    """Characters outside the 20 standard AAs (X, B, Z, U, O, numbers) must be rejected."""
+    with pytest.raises(ValueError):
+        _base_exp(target_aa="XBZUOACDEFGHIKL")
+    with pytest.raises(ValueError):
+        _base_exp(target_aa="ACDEFG0HIKLMN")
+
+
+def test_target_aa_none_is_valid():
+    """None (auto-target) must be accepted without error."""
+    exp = _base_exp(target_aa=None)
+    assert exp is not None
+
+
+def test_target_aa_valid_long_string():
+    """A realistic receptor-length AA string must be accepted."""
+    aa = "ACDEFGHIKLMNPQRSTVWY" * 20   # 400 aa — long enough for no warning
+    exp = _base_exp(target_aa=aa)
+    assert exp is not None
+
+
+def test_target_aa_short_triggers_warning(recwarn):
+    """A very short target_aa (< 30 aa) must emit a UserWarning."""
+    aa = "ACDEFGHIKLMN"  # 12 aa — below the 30-aa soft threshold
+    import warnings
+    with warnings.catch_warnings(record=True) as caught:
+        warnings.simplefilter("always")
+        _base_exp(target_aa=aa)
+    user_warnings = [w for w in caught if issubclass(w.category, UserWarning)]
+    assert len(user_warnings) == 1
+    assert "receptor" in str(user_warnings[0].message).lower()
+
+
+# ---------------------------------------------------------------------------
+# Fix 3: s5f_model validation at call time
+# ---------------------------------------------------------------------------
+
+def test_s5f_model_bogus_raises_at_call_time():
+    """A typo in s5f_model must error at clonal_lineage() call time."""
+    with pytest.raises(ValueError, match="Unknown s5f_model"):
+        _base_exp(s5f_model="bogus")
+
+
+def test_s5f_model_valid_accepted():
+    for model in ("hh_s5f", "hkl_s5f", "hh_s5f_60", "hh_s5f_opposite"):
+        exp = _base_exp(s5f_model=model)
+        assert exp is not None
+
+
+# ---------------------------------------------------------------------------
+# Fix 4: run_records kwargs on the lineage path
+# ---------------------------------------------------------------------------
+
+def _compiled_lineage():
+    return _base_exp()
+
+
+def test_n_not_none_raises_for_lineage():
+    """Record count is not a fixed product for lineage; passing n is rejected."""
+    with pytest.raises(ValueError):
+        _compiled_lineage().run_records(n=5)
+
+
+def test_seed_and_strict_still_work_for_lineage():
+    """seed and strict must work."""
+    result = _compiled_lineage().run_records(seed=42, strict=False)
+    assert len(result.records) > 0
+
+
+# ---------------------------------------------------------------------------
+# validate_records / expose_provenance now SUPPORTED on the lineage path
+# ---------------------------------------------------------------------------
+
+def _lineage_exp(**kw):
+    """A clonal_lineage experiment that reliably yields observed records."""
+    defaults = dict(
+        n_clones=2, max_generations=6, n_max=300, n_sample=15,
+        rate=0.05, lambda_base=1.5, selection_strength=0.0, target_aa=None,
+    )
+    defaults.update(kw)
+    return ga.Experiment.on("human_igh").recombine().clonal_lineage(**defaults)
+
+
+# seed chosen so the (single-founder) families survive: sampling now draws from
+# the living final generation, so an all-extinct seed yields zero records.
+_SURVIVING_SEED = 1
+
+
+def test_validate_records_true_no_corruption_does_not_raise():
+    result = _lineage_exp().run_records(seed=_SURVIVING_SEED, validate_records=True)
+    assert len(result.records) > 0
+
+
+def test_validate_records_true_with_corruption_does_not_raise():
+    exp = _lineage_exp().sequencing_errors(rate=0.02)
+    result = exp.run_records(seed=_SURVIVING_SEED, validate_records=True)
+    assert len(result.records) > 0
+
+
+def test_outcomes_are_retained_and_index_aligned():
+    result = _lineage_exp().run_records(seed=0)
+    assert result.outcomes is not None
+    assert len(result.outcomes) == len(result.records)
+
+
+def test_outcomes_retained_with_corruption():
+    result = _lineage_exp().sequencing_errors(rate=0.02).run_records(seed=0)
+    assert result.outcomes is not None
+    assert len(result.outcomes) == len(result.records)
+
+
+def test_validate_records_called_directly_is_clean_no_corruption():
+    exp = _lineage_exp()
+    refdata = exp.compile().refdata
+    result = exp.run_records(seed=0)
+    report = result.validate_records(refdata)
+    assert report.ok, report.summary()
+
+
+def test_validate_records_called_directly_is_clean_with_corruption():
+    exp = _lineage_exp().sequencing_errors(rate=0.02)
+    refdata = exp.compile().refdata
+    result = exp.run_records(seed=0)
+    report = result.validate_records(refdata)
+    assert report.ok, report.summary()
+
+
+def test_expose_provenance_adds_truth_v_call_no_corruption():
+    result = _lineage_exp().run_records(seed=_SURVIVING_SEED, expose_provenance=True)
+    assert len(result.records) > 0
+    for rec in result.records:
+        assert "truth_v_call" in rec
+        assert rec["truth_v_call"]
+
+
+def test_expose_provenance_adds_truth_v_call_with_corruption():
+    exp = _lineage_exp().sequencing_errors(rate=0.02)
+    result = exp.run_records(seed=_SURVIVING_SEED, expose_provenance=True)
+    assert len(result.records) > 0
+    for rec in result.records:
+        assert "truth_v_call" in rec
+        assert rec["truth_v_call"]
+
+
+# ---------------------------------------------------------------------------
+# TCR guard: clonal_lineage models B-cell SHM => BCR/Ig only
+# ---------------------------------------------------------------------------
+
+def test_clonal_lineage_rejects_tcr_locus():
+    """clonal_lineage applies S5F SHM (a B-cell process) so it must reject
+    TCR loci with a clear BCR-only message — mirroring mutate()'s TCR guard.
+    The guard must fire at clonal_lineage() call time (before compile) so the
+    error is the BCR-only message, not a cartridge/compile error.
+    """
+    with pytest.raises(ValueError) as excinfo:
+        (
+            ga.Experiment.on("human_tcrb")
+            .allow_curatable_refdata()
+            .recombine()
+            .clonal_lineage(n_clones=2, n_sample=10)
+        )
+    msg = str(excinfo.value)
+    assert "BCR" in msg
+    assert "TCR" in msg
+
+
+def test_clonal_lineage_allows_bcr_locus():
+    """Sanity: the BCR-only guard does not fire on an Ig locus."""
+    exp = ga.Experiment.on("human_igh").recombine().clonal_lineage(
+        n_clones=1, n_sample=5
+    )
+    assert exp is not None
+
+
+# ---------------------------------------------------------------------------
+# Founder-survival guard: every requested clone yields a surviving family
+# ---------------------------------------------------------------------------
+
+def test_survival_guard_yields_all_clones_by_default():
+    """By default (allow_extinction=False) every requested clone survives via
+    deterministic retry, so clone_ids == {0..n_clones-1} — even at a
+    lambda_base where a single founder goes extinct ~25% of the time.
+    """
+    result = (
+        ga.Experiment.on("human_igh")
+        .recombine()
+        .clonal_lineage(n_clones=5, max_generations=6, n_max=200,
+                        n_sample=15, rate=0.05, lambda_base=1.5)
+        .run_records(seed=0)
+    )
+    cids = {r["clone_id"] for r in result.records}
+    assert cids == {0, 1, 2, 3, 4}
+
+
+def test_survival_guard_is_deterministic():
+    """Same top-level seed => same result, even with retries."""
+    def run():
+        return (
+            ga.Experiment.on("human_igh")
+            .recombine()
+            .clonal_lineage(n_clones=5, max_generations=6, n_max=200,
+                            n_sample=15, rate=0.05, lambda_base=1.5)
+            .run_records(seed=0)
+        )
+    a = run().records
+    b = run().records
+    assert len(a) == len(b)
+    assert [r["sequence"] for r in a] == [r["sequence"] for r in b]
+    assert [r["clone_id"] for r in a] == [r["clone_id"] for r in b]
+
+
+def test_allow_extinction_true_allows_missing_clones():
+    """allow_extinction=True accepts extinction and skips extinct clones, so
+    clone_ids is a (possibly proper) subset of the requested range.
+    """
+    result = (
+        ga.Experiment.on("human_igh")
+        .recombine()
+        .clonal_lineage(n_clones=5, max_generations=6, n_max=200,
+                        n_sample=15, rate=0.05, lambda_base=1.5,
+                        allow_extinction=True)
+        .run_records(seed=0)
+    )
+    cids = {r["clone_id"] for r in result.records}
+    assert cids <= {0, 1, 2, 3, 4}
+
+
+# ---------------------------------------------------------------------------
+# AIRR-standard duplicate_count mirrors lineage_abundance
+# ---------------------------------------------------------------------------
+
+def test_records_emit_duplicate_count_equal_to_abundance():
+    result = _base_exp(n_clones=3, lambda_base=1.6).run_records(seed=0)
+    assert len(result.records) > 0
+    for rec in result.records:
+        assert "duplicate_count" in rec
+        assert rec["duplicate_count"] == rec["lineage_abundance"]
+
+
+# ---------------------------------------------------------------------------
+# Smoke test: a valid call still works end-to-end
+# ---------------------------------------------------------------------------
+
+def test_valid_clonal_lineage_call_works():
+    result = _base_exp(
+        n_clones=2,
+        selection_strength=0.0,
+        target_aa=None,
+    ).run_records(seed=7)
+    assert len(result.records) > 0
+    for rec in result.records:
+        assert "lineage_affinity" in rec
+        assert "clone_id" in rec
diff --git a/tests/test_clonal_plan_split_contract.py b/tests/test_clonal_plan_split_contract.py
index b65d1b9..6d4a45c 100644
--- a/tests/test_clonal_plan_split_contract.py
+++ b/tests/test_clonal_plan_split_contract.py
@@ -188,7 +188,10 @@ def test_pin_scaffold_ancestor_phase_post_fork_rejects(method) -> None:
         .recombine()
         .expand_clones(n_clones=1, per_clone=2)
     )
-    with pytest.raises(ValueError, match="before expand_clones"):
+    # Message generalized to cover all three clonal forks
+    # (clonal_lineage / clonal_repertoire / expand_clones); the
+    # rejection behaviour is unchanged.
+    with pytest.raises(ValueError, match="before the clonal fork"):
         _ANCESTOR_PHASE_BUILDERS[method](exp)
 
 
diff --git a/tests/test_clonal_repertoire.py b/tests/test_clonal_repertoire.py
new file mode 100644
index 0000000..a774b3a
--- /dev/null
+++ b/tests/test_clonal_repertoire.py
@@ -0,0 +1,123 @@
+"""Tests for the non-tree clonal repertoire DSL (``clonal_repertoire``).
+
+``clonal_repertoire`` generalizes the deprecated ``expand_clones``: per
+clone a size is drawn from a heavy-tailed distribution (with an
+unexpanded-singleton fraction), that many reads are emitted through the
+post-fork library-prep / sequencing passes, and identical reads are
+genotype-collapsed into AIRR records carrying ``duplicate_count``.
+"""
+
+from collections import Counter
+
+import pytest
+
+import GenAIRR as ga
+
+
+def test_bcr_repertoire_with_corruption_has_clone_ids_and_duplicate_count():
+    r = (ga.Experiment.on("human_igh").recombine()
+         .clonal_repertoire(n_clones=20, max_size=50, unexpanded_fraction=0.3)
+         .sequencing_errors(rate=0.005)
+         .run_records(seed=0))
+    assert len(r.records) > 0
+    assert all("duplicate_count" in rec and rec["duplicate_count"] >= 1 for rec in r.records)
+    assert all("clone_id" in rec for rec in r.records)
+    per_clone = Counter(rec["clone_id"] for rec in r.records)
+    assert min(per_clone.values()) >= 1
+
+
+def test_pure_copy_no_postfork_one_record_per_clone():
+    r = (ga.Experiment.on("human_igh").recombine()
+         .clonal_repertoire(n_clones=10, max_size=100).run_records(seed=1))
+    # no post-fork passes -> each clone collapses to ONE record with
+    # duplicate_count = size
+    per_clone = Counter(rec["clone_id"] for rec in r.records)
+    assert all(c == 1 for c in per_clone.values())
+    assert all(rec["duplicate_count"] >= 1 for rec in r.records)
+
+
+def test_tcr_repertoire_runs_no_shm():
+    r = (ga.Experiment.on("human_tcrb").allow_curatable_refdata().recombine()
+         .clonal_repertoire(n_clones=10, max_size=50).run_records(seed=0))
+    assert len(r.records) > 0
+    assert all(rec.get("n_mutations", 0) == 0 for rec in r.records)
+    assert all("duplicate_count" in rec for rec in r.records)
+
+
+def test_tcr_repertoire_rejects_mutate():
+    with pytest.raises(Exception):
+        (ga.Experiment.on("human_tcrb").allow_curatable_refdata().recombine()
+         .clonal_repertoire(n_clones=5, max_size=10).mutate(rate=0.05).compile())
+
+
+def test_determinism():
+    mk = lambda: (ga.Experiment.on("human_igh").recombine()
+                  .clonal_repertoire(n_clones=8, max_size=40).run_records(seed=3))
+    a, b = mk(), mk()
+    assert [r["sequence"] for r in a.records] == [r["sequence"] for r in b.records]
+    assert [r["duplicate_count"] for r in a.records] == [r["duplicate_count"] for r in b.records]
+
+
+def test_validation_works():
+    r = (ga.Experiment.on("human_igh").recombine()
+         .clonal_repertoire(n_clones=5, max_size=30).sequencing_errors(rate=0.003)
+         .run_records(seed=0, validate_records=True))
+    assert len(r.records) > 0
+
+
+def test_clonal_repertoire_rejects_premature_mutate():
+    # .mutate() before clonal_repertoire is a descendant-phase step and
+    # must be rejected (SHM is descendant-specific).
+    with pytest.raises(ValueError):
+        (ga.Experiment.on("human_igh").recombine()
+         .mutate(rate=0.05).clonal_repertoire(n_clones=5, max_size=10))
+
+
+def test_clonal_repertoire_rejects_double_fork():
+    with pytest.raises(ValueError):
+        (ga.Experiment.on("human_igh").recombine()
+         .clonal_repertoire(n_clones=5, max_size=10)
+         .clonal_repertoire(n_clones=3, max_size=5))
+
+
+@pytest.mark.parametrize("method", ["invert_d", "receptor_revision"])
+def test_recombination_time_edits_reject_after_clonal_repertoire(method):
+    exp = (
+        ga.Experiment.on("human_igh")
+        .recombine()
+        .clonal_repertoire(n_clones=5, max_size=10)
+    )
+    with pytest.raises(ValueError, match="before the clonal fork"):
+        getattr(exp, method)(prob=0.05)
+
+
+@pytest.mark.parametrize("method", ["invert_d", "receptor_revision"])
+def test_recombination_time_edits_reject_after_clonal_lineage(method):
+    exp = (
+        ga.Experiment.on("human_igh")
+        .recombine()
+        .clonal_lineage(n_clones=1, max_generations=2, n_sample=2)
+    )
+    with pytest.raises(ValueError, match="before the clonal fork"):
+        getattr(exp, method)(prob=0.05)
+
+
+def test_clonal_repertoire_validates_args():
+    base = ga.Experiment.on("human_igh").recombine()
+    with pytest.raises(ValueError):
+        base.clonal_repertoire(n_clones=0)
+    with pytest.raises(ValueError):
+        ga.Experiment.on("human_igh").recombine().clonal_repertoire(
+            n_clones=5, size_distribution="bogus")
+    with pytest.raises(ValueError):
+        ga.Experiment.on("human_igh").recombine().clonal_repertoire(
+            n_clones=5, exponent=0.0)
+    with pytest.raises(ValueError):
+        ga.Experiment.on("human_igh").recombine().clonal_repertoire(
+            n_clones=5, sigma=-1.0)
+    with pytest.raises(ValueError):
+        ga.Experiment.on("human_igh").recombine().clonal_repertoire(
+            n_clones=5, max_size=0)
+    with pytest.raises(ValueError):
+        ga.Experiment.on("human_igh").recombine().clonal_repertoire(
+            n_clones=5, unexpanded_fraction=1.5)
diff --git a/tests/test_clone_size_sampler.py b/tests/test_clone_size_sampler.py
new file mode 100644
index 0000000..becb4ca
--- /dev/null
+++ b/tests/test_clone_size_sampler.py
@@ -0,0 +1,29 @@
+from GenAIRR import _engine
+
+
+def test_sample_clone_sizes_basic():
+    sizes = _engine.sample_clone_sizes(500, 0)           # defaults: power_law exp 2
+    assert len(sizes) == 500
+    assert all(s >= 1 for s in sizes)
+    assert max(sizes) > 1                                 # heavy tail reaches >1
+
+
+def test_sample_clone_sizes_deterministic():
+    a = _engine.sample_clone_sizes(200, 7, kind="power_law", exponent=2.0)
+    b = _engine.sample_clone_sizes(200, 7, kind="power_law", exponent=2.0)
+    assert a == b
+
+
+def test_unexpanded_fraction_forces_singletons():
+    sizes = _engine.sample_clone_sizes(1000, 3, unexpanded_fraction=0.4)
+    assert sum(1 for s in sizes if s == 1) >= 400
+
+
+def test_lognormal_and_validation():
+    import pytest
+    sizes = _engine.sample_clone_sizes(100, 1, kind="lognormal", mu=1.0, sigma=1.0, max_size=10000)
+    assert len(sizes) == 100 and all(1 <= s <= 10000 for s in sizes)
+    with pytest.raises(ValueError):
+        _engine.sample_clone_sizes(10, 0, kind="bogus")
+    with pytest.raises(ValueError):
+        _engine.sample_clone_sizes(10, 0, unexpanded_fraction=2.0)
diff --git a/tests/test_docs_website_contract.py b/tests/test_docs_website_contract.py
index 1078e0b..f6c9712 100644
--- a/tests/test_docs_website_contract.py
+++ b/tests/test_docs_website_contract.py
@@ -42,6 +42,7 @@
 _AUDIT_DOC = _REPO_ROOT / "audit-docs" / "docs_website_audit.md"
 _README = _REPO_ROOT / "README.md"
 _WEBSITE = _REPO_ROOT / "website"
+_SITE_DOCS = _REPO_ROOT / "site_docs"
 _DOCS = _REPO_ROOT / "docs"
 _OLD_DOCS = _REPO_ROOT / "_old_docs"
 _DEPLOY_WORKFLOW = _REPO_ROOT / ".github" / "workflows" / "deploy-docs.yml"
@@ -302,9 +303,16 @@ def test_pin_present_readme_hosted_doc_urls_resolve_to_live_website_pages() -> N
             continue
         # Strip query string / fragment if present.
         path = path.split("?", 1)[0].split("#", 1)[0]
-        target = _WEBSITE / path
-        if not target.is_file():
-            missing.append((url, str(target)))
+        # The live site is the MkDocs build of `site_docs/` (deploy-docs.yml).
+        # A hosted `<p>.html` URL resolves if EITHER it exists as a flat page
+        # in the preserved `website/` rollback tree, OR it maps to a MkDocs
+        # source page `site_docs/<p>.md` (use_directory_urls: false ⇒
+        # `foo/bar.md` → `foo/bar.html`).
+        resolves = (_WEBSITE / path).is_file()
+        if not resolves and path.endswith(".html"):
+            resolves = (_SITE_DOCS / (path[:-len(".html")] + ".md")).is_file()
+        if not resolves:
+            missing.append((url, str(_WEBSITE / path)))
     assert not missing, (
         "README hosted-doc URLs do not resolve to live website "
         "pages:\n"
diff --git a/tests/test_expand_clones_deprecation.py b/tests/test_expand_clones_deprecation.py
new file mode 100644
index 0000000..5ef1532
--- /dev/null
+++ b/tests/test_expand_clones_deprecation.py
@@ -0,0 +1,32 @@
+"""Deprecation contract for ``Experiment.expand_clones``.
+
+Verifies that:
+
+1. Calling ``expand_clones()`` emits a ``DeprecationWarning`` that
+   mentions ``clonal_lineage``.
+2. Despite the warning, ``expand_clones()`` continues to produce the
+   correct star-topology clonal output (n_clones * per_clone records
+   with the expected ``clone_id`` values).
+"""
+
+import warnings
+
+import pytest
+
+import GenAIRR as ga
+
+
+def test_expand_clones_emits_deprecation_warning_but_still_works():
+    exp = ga.Experiment.on("human_igh").recombine()
+    with pytest.warns(DeprecationWarning, match="clonal_lineage"):
+        exp = exp.expand_clones(n_clones=2, per_clone=3)
+    # Pipeline continues to work after the deprecation warning.
+    exp = exp.mutate(count=5)
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore", DeprecationWarning)
+        result = exp.run_records(seed=0)
+    # 2 clones × 3 descendants = 6 records.
+    assert len(result) == 6
+    # Exactly the expected clone IDs are present.
+    clone_ids = {r["clone_id"] for r in result}
+    assert clone_ids == {0, 1}
diff --git a/tests/test_lineage_engine.py b/tests/test_lineage_engine.py
new file mode 100644
index 0000000..b8f4aba
--- /dev/null
+++ b/tests/test_lineage_engine.py
@@ -0,0 +1,207 @@
+import pytest
+import GenAIRR as ga
+from GenAIRR import _engine
+from GenAIRR._s5f_loader import load_builtin_s5f_kernel
+
+S5F_MODEL = "hh_s5f"
+
+
+def _founder():
+    compiled = ga.Experiment.on("human_igh").recombine().compile()
+    outcome = compiled.run(n=1, seed=0)[0]
+    return outcome.final_simulation()
+
+
+def _kernel():
+    return load_builtin_s5f_kernel(S5F_MODEL)
+
+
+def test_simulate_lineage_produces_valid_tree():
+    founder = _founder()
+    mut, sub = _kernel()
+    tree = _engine.simulate_lineage(founder, mut, sub, 0.05, 1.5, 0.0, 8, 500, 30, 2024)
+    tree.validate()
+    assert len(tree) >= 1
+    nodes = tree.nodes()
+    assert nodes[0].parent_id is None
+    assert nodes[0].generation == 0
+    assert any(n.mutations_from_parent > 0 for n in nodes) or len(tree) == 1
+
+
+def test_simulate_lineage_exports_are_wellformed():
+    founder = _founder()
+    mut, sub = _kernel()
+    tree = _engine.simulate_lineage(founder, mut, sub, 0.05, 1.5, 0.0, 8, 500, 30, 7)
+    nwk = tree.to_newick()
+    assert nwk.endswith(";")
+    assert nwk.count("(") == nwk.count(")")
+    assert tree.to_fasta().startswith(">node")
+    tsv = tree.to_node_table_tsv()
+    assert tsv.splitlines()[0].startswith("node_id\t")
+    assert len(tsv.splitlines()) == len(tree) + 1
+
+
+def test_simulate_lineage_is_deterministic():
+    founder = _founder()
+    mut, sub = _kernel()
+    a = _engine.simulate_lineage(founder, mut, sub, 0.05, 1.5, 0.0, 8, 500, 30, 99)
+    b = _engine.simulate_lineage(founder, mut, sub, 0.05, 1.5, 0.0, 8, 500, 30, 99)
+    assert a.to_newick() == b.to_newick()
+    assert a.to_fasta() == b.to_fasta()
+
+
+def test_simulate_lineage_rejects_bad_kernel():
+    founder = _founder()
+    with pytest.raises(ValueError):
+        _engine.simulate_lineage(founder, [0.1] * 10, [0.1] * 10, 0.05, 1.5, 0.0, 8, 500, 30, 0)
+
+
+def test_simulate_lineage_rejects_nonfinite_or_negative_kernel():
+    founder = _founder()
+    _mut, sub = _kernel()
+    # NaN in the mutability table must raise (not panic across the FFI boundary).
+    with pytest.raises(ValueError):
+        _engine.simulate_lineage(
+            founder, [float("nan")] * 1024, sub, 0.05, 1.5, 0.0, 8, 500, 30, 0
+        )
+    # Negative value in the mutability table must raise too.
+    with pytest.raises(ValueError):
+        _engine.simulate_lineage(
+            founder, [-1.0] + [0.0] * 1023, sub, 0.05, 1.5, 0.0, 8, 500, 30, 0
+        )
+
+
+def test_simulate_lineage_rejects_zero_n_sample():
+    founder = _founder()
+    mut, sub = _kernel()
+    with pytest.raises(ValueError):
+        _engine.simulate_lineage(founder, mut, sub, 0.05, 1.5, 0.0, 8, 500, 0, 0)
+
+
+def test_simulate_lineage_rejects_nonfinite_lambda():
+    founder = _founder()
+    mut, sub = _kernel()
+    # NaN/inf/negative lambda_base must raise, not silently return a founder-only tree.
+    for bad in (float("nan"), float("inf"), -1.0):
+        with pytest.raises(ValueError):
+            _engine.simulate_lineage(founder, mut, sub, 0.05, bad, 0.0, 8, 500, 30, 0)
+    # lambda_mut is validated too.
+    with pytest.raises(ValueError):
+        _engine.simulate_lineage(founder, mut, sub, 0.05, 1.5, float("nan"), 8, 500, 30, 0)
+
+
+def test_simulate_lineage_rejects_excessive_max_generations():
+    founder = _founder()
+    mut, sub = _kernel()
+    with pytest.raises(ValueError):
+        _engine.simulate_lineage(founder, mut, sub, 0.05, 1.5, 0.0, 5000, 500, 30, 0)
+
+
+def test_affinity_node_getter_and_neutral_default():
+    founder = _founder()
+    mut, sub = _kernel()
+    # Default call (no affinity args) is the neutral path; affinity getter exists.
+    tree = _engine.simulate_lineage(founder, mut, sub, 0.05, 1.5, 0.0, 8, 500, 30, 2024)
+    nodes = tree.nodes()
+    assert all(hasattr(n, "affinity") for n in nodes)
+    # neutral path leaves affinity at 0.0
+    assert all(n.affinity == 0.0 for n in nodes)
+
+
+def test_affinity_selection_raises_mean_affinity():
+    founder = _founder()
+    mut, sub = _kernel()
+    founder_aa_len = 100  # target length need not match; distance uses min length
+    # Use "A" (alanine) target with small beta so exp(-beta*d) stays in (0,1)
+    # and doesn't underflow. "W"*100 + beta=1.0 collapses to denorm for IgH
+    # sequences, making neutral and selected indistinguishable.
+    target = "A" * founder_aa_len  # a fixed explicit target both runs share
+    common = dict(target_aa=target, beta=0.001, mature_substitutions=5)
+    # neutral: selection_strength = 0 (affinities populated but not selected on)
+    neutral = _engine.simulate_lineage(
+        founder, mut, sub, 0.1, 1.6, 0.0, 12, 800, 60, 4242,
+        selection_strength=0.0, **common,
+    )
+    # strong selection toward the same target, same seed
+    selected = _engine.simulate_lineage(
+        founder, mut, sub, 0.1, 1.6, 0.0, 12, 800, 60, 4242,
+        selection_strength=50.0, **common,
+    )
+
+    def mean_aff(tree):
+        ns = tree.nodes()
+        return sum(n.affinity for n in ns) / max(1, len(ns))
+
+    # selection should enrich high-affinity cells → higher mean affinity
+    assert mean_aff(selected) > mean_aff(neutral), (
+        f"selected {mean_aff(selected)} !> neutral {mean_aff(neutral)}"
+    )
+    # affinities are populated (in (0,1]) when a model is active
+    assert all(0.0 < n.affinity <= 1.0 + 1e-9 for n in selected.nodes())
+
+
+def test_affinity_auto_target_is_deterministic():
+    founder = _founder()
+    mut, sub = _kernel()
+    a = _engine.simulate_lineage(founder, mut, sub, 0.1, 1.5, 0.0, 10, 500, 40, 7,
+                                 selection_strength=5.0)
+    b = _engine.simulate_lineage(founder, mut, sub, 0.1, 1.5, 0.0, 10, 500, 40, 7,
+                                 selection_strength=5.0)
+    assert a.to_newick() == b.to_newick()
+    assert [n.affinity for n in a.nodes()] == [n.affinity for n in b.nodes()]
+
+
+def test_simulate_family_outcomes_extinct_founder_yields_no_observed():
+    """lambda_base=0 => founder never divides => extinct => zero observed cells.
+
+    Sampling must draw from the LIVING final-generation population, not all tree
+    leaves. An extinct clone (no living cells) is not observed, so it contributes
+    no node/observed Outcomes.
+    """
+    c = ga.Experiment.on("human_igh").recombine().compile()
+    founder = c.run(n=1, seed=0)[0]  # full Outcome (not just the Simulation)
+    refdata = c.refdata
+    mut, sub = _kernel()
+    fam = _engine.simulate_family_outcomes(
+        founder, refdata, mut, sub, 0.05, 0.0, 0.0, 6, 300, 30, 2024
+    )
+    assert len(fam.observed_outcomes()) == 0
+    assert all(o is None for o in fam.node_outcomes())
+    assert all(not n.observed and n.abundance == 0 for n in fam.tree().nodes())
+
+
+def test_simulate_family_outcomes_healthy_lambda_produces_observed():
+    """A healthy lambda_base produces observed cells whose abundances sum to n_sample.
+
+    Scans seeds because a single founder can go extinct even at a healthy rate.
+    """
+    c = ga.Experiment.on("human_igh").recombine().compile()
+    founder = c.run(n=1, seed=0)[0]  # full Outcome (not just the Simulation)
+    refdata = c.refdata
+    mut, sub = _kernel()
+    n_sample = 30
+    for seed in range(20):
+        fam = _engine.simulate_family_outcomes(
+            founder, refdata, mut, sub, 0.05, 1.6, 0.0, 6, 300, n_sample, seed
+        )
+        observed = [n for n in fam.tree().nodes() if n.observed]
+        if not observed:
+            continue  # extinct for this seed
+        assert len(fam.observed_outcomes()) == len(observed)
+        assert sum(n.abundance for n in observed) == n_sample
+        max_gen = max(n.generation for n in fam.tree().nodes())
+        assert all(n.generation == max_gen for n in observed)
+        break
+    else:
+        raise AssertionError("no seed in 0..20 produced a surviving family")
+
+
+def test_affinity_rejects_bad_params():
+    founder = _founder()
+    mut, sub = _kernel()
+    with pytest.raises(ValueError):
+        _engine.simulate_lineage(founder, mut, sub, 0.05, 1.5, 0.0, 8, 500, 30, 0,
+                                 selection_strength=float("nan"))
+    with pytest.raises(ValueError):
+        _engine.simulate_lineage(founder, mut, sub, 0.05, 1.5, 0.0, 8, 500, 30, 0,
+                                 beta=-1.0)
diff --git a/tests/test_lineage_mutation_counts.py b/tests/test_lineage_mutation_counts.py
new file mode 100644
index 0000000..ae6771b
--- /dev/null
+++ b/tests/test_lineage_mutation_counts.py
@@ -0,0 +1,30 @@
+import GenAIRR as ga
+from GenAIRR import _engine
+from GenAIRR._s5f_loader import load_builtin_s5f_kernel
+
+
+def _founder_and_refdata():
+    compiled = ga.Experiment.on("human_igh").recombine().compile()
+    return compiled.run(n=1, seed=0)[0], compiled.refdata
+
+
+def test_lineage_record_mutation_counts_are_consistent():
+    founder, refdata = _founder_and_refdata()
+    mut, sub = load_builtin_s5f_kernel("hh_s5f")
+    fam = _engine.simulate_family_outcomes(founder, refdata, mut, sub, 0.1, 1.6, 0.0, 8, 300, 30, 7)
+    recs = fam.airr_records(refdata)   # NEW method (Task 1b)
+    assert len(recs) >= 1
+    saw_mutated = False
+    for r in recs:
+        per_seg = r["n_v_mutations"] + r["n_d_mutations"] + r["n_j_mutations"] + r["n_np_mutations"]
+        assert r["n_mutations"] == per_seg, (
+            f"n_mutations {r['n_mutations']} != sum per-segment {per_seg}"
+        )
+        v_sub = (r["n_fwr1_mutations"] + r["n_cdr1_mutations"] + r["n_fwr2_mutations"]
+                 + r["n_cdr2_mutations"] + r["n_fwr3_mutations"] + r["n_v_unannotated_mutations"])
+        assert r["n_v_mutations"] == v_sub, (
+            f"n_v_mutations {r['n_v_mutations']} != sum subregion {v_sub}"
+        )
+        if r["n_mutations"] > 0:
+            saw_mutated = True
+    assert saw_mutated, "expected at least one mutated node record"
diff --git a/tests/test_lineage_outcome_validation.py b/tests/test_lineage_outcome_validation.py
new file mode 100644
index 0000000..9e98ca8
--- /dev/null
+++ b/tests/test_lineage_outcome_validation.py
@@ -0,0 +1,102 @@
+"""
+Correctness test: per-node Outcomes from simulate_family_outcomes must be
+self-consistent so that native build_airr_record + validate_record produce
+correct mutation-count fields and pass mutation-count invariants.
+
+Bug fixed: node Outcomes were built with an empty event ledger but a
+nonzero Simulation.mutation_count, so validate_record would false-fail the
+mutation-count sum invariant on mutated nodes.
+
+Note: AlleleCallTieSetMismatch issues on D can still appear for SHM-mutated
+nodes (SHM changes D-segment match scores, making formerly-unique calls
+ambiguous). These are a biological reality of the lineage path, not caused
+by the event-ledger bug, and are not checked here.
+"""
+import GenAIRR as ga
+from GenAIRR import _engine
+from GenAIRR._s5f_loader import load_builtin_s5f_kernel
+
+# Mutation-count issue kinds that this fix must eliminate.
+_MUTATION_COUNT_KINDS = {
+    "NMutationsMismatch",
+    "NVMutationsMismatch",
+    "NDMutationsMismatch",
+    "NJMutationsMismatch",
+    "NNpMutationsMismatch",
+    "MutationCountSumMismatch",
+    "NFwr1MutationsMismatch",
+    "NCdr1MutationsMismatch",
+    "NFwr2MutationsMismatch",
+    "NCdr2MutationsMismatch",
+    "NFwr3MutationsMismatch",
+    "NVUnannotatedMutationsMismatch",
+    "VSubregionMutationCountSumMismatch",
+}
+
+
+def _founder_refdata():
+    c = ga.Experiment.on("human_igh").recombine().compile()
+    return c.run(n=1, seed=0)[0], c.refdata
+
+
+def test_node_outcomes_no_mutation_count_issues():
+    """After the fix, no mutation-count validation issues on any lineage node."""
+    founder, refdata = _founder_refdata()
+    mut, sub = load_builtin_s5f_kernel("hh_s5f")
+    fam = _engine.simulate_family_outcomes(founder, refdata, mut, sub, 0.05, 1.6, 0.0, 6, 300, 30, 7)
+    saw_mut = False
+    for o in fam.observed_outcomes():
+        issues = o.validate_record(refdata)
+        mut_issues = [i for i in issues if i["kind"] in _MUTATION_COUNT_KINDS]
+        assert len(mut_issues) == 0, f"mutation-count validate_record issues: {mut_issues}"
+        rec = o.airr_record(refdata)
+        if rec["n_mutations"] > 0:
+            saw_mut = True
+            per_seg = (rec["n_v_mutations"] + rec["n_d_mutations"]
+                       + rec["n_j_mutations"] + rec["n_np_mutations"])
+            assert rec["n_mutations"] == per_seg, (
+                f"n_mutations {rec['n_mutations']} != per-segment sum {per_seg}"
+            )
+            v_sub = (rec["n_fwr1_mutations"] + rec["n_cdr1_mutations"]
+                     + rec["n_fwr2_mutations"] + rec["n_cdr2_mutations"]
+                     + rec["n_fwr3_mutations"] + rec["n_v_unannotated_mutations"])
+            assert rec["n_v_mutations"] == v_sub, (
+                f"n_v_mutations {rec['n_v_mutations']} != subregion sum {v_sub}"
+            )
+    assert saw_mut, "expected at least one mutated observed node"
+
+
+def test_node_outcomes_validate_clean_under_heavy_shm():
+    """Heavy SHM lineage: every observed node Outcome must validate cleanly.
+
+    Regression for the stale live-call cache bug: node sims carried the
+    founder's allele calls instead of the mutated sequence's, so under heavy
+    SHM validate_record reported AlleleCallTieSetMismatch. After refreshing
+    the live-call cache on each observed node sim, this must be clean.
+    """
+    founder, refdata = _founder_refdata()
+    mut, sub = load_builtin_s5f_kernel("hh_s5f")
+    fam = _engine.simulate_family_outcomes(founder, refdata, mut, sub, 0.05, 1.6, 0.0, 6, 300, 30, 7)
+    issues_total = 0
+    for o in fam.observed_outcomes():
+        issues_total += len(o.validate_record(refdata))
+    assert issues_total == 0, f"validate_record reported {issues_total} issues under heavy SHM"
+
+
+def test_airr_records_mutation_counts_consistent():
+    """PyFamilyOutcome.airr_records() produces consistent mutation counts."""
+    founder, refdata = _founder_refdata()
+    mut, sub = load_builtin_s5f_kernel("hh_s5f")
+    fam = _engine.simulate_family_outcomes(founder, refdata, mut, sub, 0.05, 1.6, 0.0, 6, 300, 30, 7)
+    recs = fam.airr_records(refdata)
+    assert len(recs) >= 1
+    saw_mut = False
+    for rec in recs:
+        per_seg = (rec["n_v_mutations"] + rec["n_d_mutations"]
+                   + rec["n_j_mutations"] + rec["n_np_mutations"])
+        assert rec["n_mutations"] == per_seg, (
+            f"n_mutations {rec['n_mutations']} != per-segment sum {per_seg}"
+        )
+        if rec["n_mutations"] > 0:
+            saw_mut = True
+    assert saw_mut, "expected at least one mutated node record"