Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
59 commits
Select commit Hold shift + click to select a range
7d845db
feat(lineage): tree data structures (LineageNode, LineageTree)
MuteJester Jun 15, 2026
4a3610d
refactor(lineage): enforce child/leaf id ordering, fix doc accuracy
MuteJester Jun 15, 2026
64965e8
feat(lineage): deterministic Poisson sampler
MuteJester Jun 15, 2026
c9ef362
feat(lineage): neutral generation-synchronous branching topology
MuteJester Jun 15, 2026
30558f5
fix(lineage): exit generation loop cleanly when n_max cap is hit
MuteJester Jun 15, 2026
18f5a80
feat(lineage): logistic carrying-capacity damping with peak tracking
MuteJester Jun 15, 2026
a693171
docs(lineage): clarify carrying-capacity cap rationale and n_max doc
MuteJester Jun 15, 2026
ea76cc4
feat(lineage): per-division mutation via pluggable Pass mutator
MuteJester Jun 15, 2026
d89f546
refactor(lineage): drop unused peak wrapper; test calls grow_core dir…
MuteJester Jun 15, 2026
a68d665
feat(lineage): sample + genotype-collapse into abundances
MuteJester Jun 15, 2026
ce93585
refactor(lineage): debug-assert id-index invariant on sample write-back
MuteJester Jun 15, 2026
35328f2
feat(lineage): tree validator + simulate_family one-call entry
MuteJester Jun 15, 2026
39edfab
feat(lineage): validate enforces id-index invariant
MuteJester Jun 15, 2026
9d2ed0a
docs(lineage): module usage example
MuteJester Jun 15, 2026
a574a07
docs(lineage): clarify representative-draw semantics and inert lambda…
MuteJester Jun 15, 2026
114accc
feat(lineage): node-table TSV export
MuteJester Jun 15, 2026
b95cba8
feat(lineage): FASTA export of all node sequences
MuteJester Jun 15, 2026
af7f018
feat(lineage): Newick export with per-edge mutation branch lengths
MuteJester Jun 15, 2026
e339f2a
fix(lineage): emit standard rooted Newick (founder is the root, no ph…
MuteJester Jun 15, 2026
1d89290
test(lineage): export a grown family end-to-end; module doc
MuteJester Jun 15, 2026
1bcca64
docs(lineage): to_newick precondition note; assert node-2 TSV row
MuteJester Jun 15, 2026
0852140
feat(lineage): PyO3 LineageNode/LineageTree bindings
MuteJester Jun 15, 2026
efdea3a
feat(lineage): simulate_lineage PyO3 entry point
MuteJester Jun 15, 2026
463b7ad
test(lineage): end-to-end Python simulate_lineage
MuteJester Jun 15, 2026
668e1c4
fix(lineage): guard simulate_lineage against non-finite/negative kern…
MuteJester Jun 15, 2026
90a3f54
feat(lineage): BLOSUM62 weighted aa-distance + mature-target generation
MuteJester Jun 15, 2026
9724132
review: validate lambda/max_generations, harden poisson finiteness, O…
MuteJester Jun 15, 2026
f6481f0
feat(lineage): add per-node affinity field + node-table column
MuteJester Jun 15, 2026
d201334
feat(lineage): AffinityModel (affinity_value/fitness) + sim_to_aa tra…
MuteJester Jun 15, 2026
723343b
feat(lineage): affinity-modulated offspring in grow_core + family/aff…
MuteJester Jun 15, 2026
fa45128
feat(lineage): expose affinity selection in simulate_lineage (Python)
MuteJester Jun 15, 2026
264c8d8
feat(lineage): heavy-tailed clone-size distributions (power-law, logn…
MuteJester Jun 15, 2026
01b1f4b
feat(lineage): repertoire composition with unexpanded-singleton fraction
MuteJester Jun 15, 2026
260a254
docs(lineage): refresh module doc for affinity + clone_size
MuteJester Jun 15, 2026
8965c43
feat(lineage): per-node Outcomes via simulate_family_outcomes (full A…
MuteJester Jun 15, 2026
3fdc8f3
fix(lineage): pool-derived per-segment mutation counts on node AIRR r…
MuteJester Jun 15, 2026
a2377a2
feat(lineage): Experiment.clonal_lineage DSL with per-node AIRR recor…
MuteJester Jun 15, 2026
51ac130
feat(lineage): deprecate expand_clones in favor of clonal_lineage (ke…
MuteJester Jun 15, 2026
8135854
docs(site): detailed clonal lineage guide (how the engine grows real …
MuteJester Jun 15, 2026
e98d500
docs(site): add detection figure + Change-O worked example to clonal …
MuteJester Jun 15, 2026
186e495
fix(lineage): remove dead lambda_mut, validate target_aa/s5f_model, c…
MuteJester Jun 15, 2026
449c7fb
fix(lineage): self-consistent node Outcomes (synthesize SHM events + …
MuteJester Jun 15, 2026
4d7b2c2
fix(lineage): refresh live-call cache on node sims so v/d/j calls mat…
MuteJester Jun 15, 2026
11bc982
feat(lineage): apply post-fork corruption to clonal_lineage reads (pe…
MuteJester Jun 15, 2026
58de262
feat(lineage): validate_records/expose_provenance support + honest ex…
MuteJester Jun 16, 2026
07d6054
polish(lineage): unbiased mature-target RNG + uniform residue pick; h…
MuteJester Jun 16, 2026
648729a
fix(lineage): sample the living final-generation population, not all …
MuteJester Jun 16, 2026
7307267
fix(lineage): BCR-only guard, founder-survival retry, duplicate_count…
MuteJester Jun 16, 2026
f596dd0
docs(site): regenerate detection figure from corrected engine (final-…
MuteJester Jun 16, 2026
01b6d52
docs(lineage): correct selection_strength/target_aa affinity-reportin…
MuteJester Jun 16, 2026
592dbe8
feat(lineage): expose sample_clone_sizes to Python (clone-size distri…
MuteJester Jun 16, 2026
8a2e951
feat(repertoire): clonal_repertoire DSL (heavy-tailed clone sizes, TC…
MuteJester Jun 16, 2026
aae6113
docs(site): clonal_repertoire guide (TCR & abundance) + point TCR not…
MuteJester Jun 16, 2026
9a6e272
docs(site): homepage advertises BCR clonal_lineage (flagship, ground-…
MuteJester Jun 16, 2026
e6726f6
test(clonal): update plan-split pin to generalized multi-fork rejecti…
MuteJester Jun 16, 2026
8ed5fbd
docs(readme): add Clonal lineages & repertoires section (clonal_linea…
MuteJester Jun 16, 2026
a6746c5
docs: link new clonal guides at live mkdocs URLs; fix README-URL cont…
MuteJester Jun 16, 2026
28fcd44
docs+test: mkdocs internal-link/claim polish; clonal-fork ordering-gu…
MuteJester Jun 16, 2026
9af5485
fix(clonal): reject any second clonal fork (expand_clones/clonal_line…
MuteJester Jun 16, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 39 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,9 @@ result = (
# Passes BEFORE this point apply to the parent rearrangement;
# passes AFTER apply per-descendant. So each clone shares the
# same V(D)J recombination but accumulates its own SHM + errors.
# NOTE: expand_clones is the legacy fixed-size *star* model. For
# real clonal trees and repertoires, see "Clonal lineages &
# repertoires" below (clonal_lineage / clonal_repertoire).
.expand_clones(n_clones=50, per_clone=20)
# 3. Somatic hypermutation per descendant — S5F context-dependent
# model at 5% per-base rate (matches memory-B-cell SHM).
Expand Down Expand Up @@ -145,6 +148,41 @@ result[0]["experiment_id"], result[0]["tissue"] # ('exp001', 'peripher
result.to_tsv("repertoire.tsv")
```

## Clonal lineages & repertoires

GenAIRR models clonal structure three ways. Pick by what you need:

| Method | Model | Use for |
|---|---|---|
| **`clonal_lineage`** | BCR affinity-maturation **trees** — generation-synchronous birth–death, per-division S5F SHM, optional affinity selection, sample + genotype-collapse | B-cell lineage trees with **ground truth** (Newick/FASTA per clone), benchmarking lineage/clone inference, ML training data |
| **`clonal_repertoire`** | Non-tree **abundance** repertoires — heavy-tailed clone sizes (power-law/lognormal) + unexpanded-singleton fraction, reads through library-prep, collapsed to `duplicate_count` | **TCR** repertoires (no SHM) and flat-BCR abundance; clone-calling benchmarks |
| `expand_clones` *(deprecated)* | Fixed-size **star**: one founder + `per_clone` independent descendants | legacy; kept working, but prefer the two above |

```python
import GenAIRR as ga

# BCR clonal lineage trees — affinity maturation, with ground truth
bcr = (ga.Experiment.on("human_igh").recombine()
.clonal_lineage(n_clones=50, max_generations=6, n_sample=30,
rate=0.01, selection_strength=10.0) # 0 = neutral
.sequencing_errors(rate=0.001)
.run_records(seed=0))
bcr.records # per-cell AIRR records: clone_id, lineage_node_id, lineage_affinity, ...
bcr.lineage_trees[0].to_newick() # ground-truth tree (branch length = per-edge mutations)

# TCR clone-size repertoire — proliferated rearrangements, no SHM, abundance
tcr = (ga.Experiment.on("human_tcrb").allow_curatable_refdata().recombine()
.clonal_repertoire(n_clones=200, size_distribution="power_law",
exponent=2.0, max_size=500, unexpanded_fraction=0.5)
.sequencing_errors(rate=0.005)
.run_records(seed=0))
tcr.records # AIRR records: clone_id (membership) + duplicate_count (abundance)
```

`clonal_lineage` is BCR-only (it applies S5F SHM and rejects TCR loci); `clonal_repertoire` covers TCR and flat BCR. See the guides:
[Clonal lineage trees](https://mutejester.github.io/GenAIRR/guides/clonal-lineage.html) ·
[Clonal repertoires (TCR & abundance)](https://mutejester.github.io/GenAIRR/guides/clonal-repertoire.html).

Other feature flags worth knowing:

| Step | What it does |
Expand Down Expand Up @@ -620,7 +658,7 @@ The full documentation site is at **[mutejester.github.io/GenAIRR](https://mutej

- **Learn** — [Lesson 1: V(D)J recombination](https://mutejester.github.io/GenAIRR/lesson-1.html) · [Lesson 2: Pipeline scrubber](https://mutejester.github.io/GenAIRR/lesson-2.html) · [Lesson 3: S5F SHM](https://mutejester.github.io/GenAIRR/lesson-3.html) · [Lesson 4: Sequencing artifacts](https://mutejester.github.io/GenAIRR/lesson-4.html) · [Lesson 5: Ground-truth payoff](https://mutejester.github.io/GenAIRR/lesson-5.html)
- **Concepts** — [Simulation Pipeline](https://mutejester.github.io/GenAIRR/concept-pipeline.html) · [Persistent IR](https://mutejester.github.io/GenAIRR/concept-persistent-ir.html) · [Contracts](https://mutejester.github.io/GenAIRR/concept-contracts.html) · [AIRR Record](https://mutejester.github.io/GenAIRR/concept-airr-record.html) · [Live Calls](https://mutejester.github.io/GenAIRR/concept-live-call.html)
- **Guides** — [Build a config](https://mutejester.github.io/GenAIRR/guide-build-config.html) · [Productive sampling](https://mutejester.github.io/GenAIRR/guide-productive.html) · [Clonal families](https://mutejester.github.io/GenAIRR/guide-clonal-families.html) · [Export](https://mutejester.github.io/GenAIRR/guide-export.html) · [Replay](https://mutejester.github.io/GenAIRR/guide-replay.html) · [Reproduce a dataset](https://mutejester.github.io/GenAIRR/guide-reproduce.html) · [Compare SHM models](https://mutejester.github.io/GenAIRR/guide-compare-shm.html) · [Tune corruption](https://mutejester.github.io/GenAIRR/guide-tune-corruption.html)
- **Guides** — [Build a config](https://mutejester.github.io/GenAIRR/guide-build-config.html) · [Productive sampling](https://mutejester.github.io/GenAIRR/guide-productive.html) · [Clonal lineage trees](https://mutejester.github.io/GenAIRR/guides/clonal-lineage.html) · [Clonal repertoires](https://mutejester.github.io/GenAIRR/guides/clonal-repertoire.html) · [Export](https://mutejester.github.io/GenAIRR/guide-export.html) · [Replay](https://mutejester.github.io/GenAIRR/guide-replay.html) · [Reproduce a dataset](https://mutejester.github.io/GenAIRR/guide-reproduce.html) · [Compare SHM models](https://mutejester.github.io/GenAIRR/guide-compare-shm.html) · [Tune corruption](https://mutejester.github.io/GenAIRR/guide-tune-corruption.html)
- **Reference** — [API + Configs + AIRR fields](https://mutejester.github.io/GenAIRR/reference.html)

---
Expand Down
1 change: 1 addition & 0 deletions engine_rs/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ pub mod event;
pub mod feasibility;
pub mod ir;
pub mod junction;
pub mod lineage;
pub mod live_call;
pub mod pass;
pub mod passes;
Expand Down
261 changes: 261 additions & 0 deletions engine_rs/src/lineage/affinity.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,261 @@
//! Affinity model for clonal selection: BLOSUM62-weighted, region-weighted
//! amino-acid distance to a target antigen, and target generation.

use crate::ir::{compute_codon_rail, Simulation};
use crate::rng::Rng;

/// 20 standard amino acids in the BLOSUM62 matrix's row/column order.
const AA_ORDER: &[u8; 20] = b"ARNDCQEGHILKMFPSTWYV";

/// BLOSUM62 substitution score for two amino-acid bytes (uppercase one-letter
/// codes). Unknown / non-standard residues (including `*` and `X`) score as a
/// strong mismatch so they contribute maximally to distance.
pub fn blosum62(a: u8, b: u8) -> i8 {
fn idx(aa: u8) -> Option<usize> {
AA_ORDER.iter().position(|&x| x == aa)
}
// Canonical BLOSUM62, rows/cols in AA_ORDER.
const M: [[i8; 20]; 20] = [
// A R N D C Q E G H I L K M F P S T W Y V
[ 4,-1,-2,-2, 0,-1,-1, 0,-2,-1,-1,-1,-1,-2,-1, 1, 0,-3,-2, 0],
[-1, 5, 0,-2,-3, 1, 0,-2, 0,-3,-2, 2,-1,-3,-2,-1,-1,-3,-2,-3],
[-2, 0, 6, 1,-3, 0, 0, 0, 1,-3,-3, 0,-2,-3,-2, 1, 0,-4,-2,-3],
[-2,-2, 1, 6,-3, 0, 2,-1,-1,-3,-4,-1,-3,-3,-1, 0,-1,-4,-3,-3],
[ 0,-3,-3,-3, 9,-3,-4,-3,-3,-1,-1,-3,-1,-2,-3,-1,-1,-2,-2,-1],
[-1, 1, 0, 0,-3, 5, 2,-2, 0,-3,-2, 1, 0,-3,-1, 0,-1,-2,-1,-2],
[-1, 0, 0, 2,-4, 2, 5,-2, 0,-3,-3, 1,-2,-3,-1, 0,-1,-3,-2,-2],
[ 0,-2, 0,-1,-3,-2,-2, 6,-2,-4,-4,-2,-3,-3,-2, 0,-2,-2,-3,-3],
[-2, 0, 1,-1,-3, 0, 0,-2, 8,-3,-3,-1,-2,-1,-2,-1,-2,-2, 2,-3],
[-1,-3,-3,-3,-1,-3,-3,-4,-3, 4, 2,-3, 1, 0,-3,-2,-1,-3,-1, 3],
[-1,-2,-3,-4,-1,-2,-3,-4,-3, 2, 4,-2, 2, 0,-3,-2,-1,-2,-1, 1],
[-1, 2, 0,-1,-3, 1, 1,-2,-1,-3,-2, 5,-1,-3,-1, 0,-1,-3,-2,-2],
[-1,-1,-2,-3,-1, 0,-2,-3,-2, 1, 2,-1, 5, 0,-2,-1,-1,-1,-1, 1],
[-2,-3,-3,-3,-2,-3,-3,-3,-1, 0, 0,-3, 0, 6,-4,-2,-2, 1, 3,-1],
[-1,-2,-2,-1,-3,-1,-1,-2,-2,-3,-3,-1,-2,-4, 7,-1,-1,-4,-3,-2],
[ 1,-1, 1, 0,-1, 0, 0, 0,-1,-2,-2, 0,-1,-2,-1, 4, 1,-3,-2,-2],
[ 0,-1, 0,-1,-1,-1,-1,-2,-2,-1,-1,-1,-1,-2,-1, 1, 5,-2,-2, 0],
[-3,-3,-4,-4,-2,-2,-3,-2,-2,-3,-2,-3,-1, 1,-4,-3,-2,11, 2,-3],
[-2,-2,-2,-3,-2,-1,-2,-3, 2,-1,-1,-2,-1, 3,-3,-2,-2, 2, 7,-1],
[ 0,-3,-3,-3,-1,-2,-2,-3,-3, 3, 1,-2, 1,-1,-2,-2, 0,-3,-1, 4],
];
match (idx(a), idx(b)) {
(Some(i), Some(j)) => M[i][j],
_ => -4,
}
}

/// Per-position substitution cost from BLOSUM62: `S(a,a) - S(a,b)`, which is 0
/// when `b == a` and grows as the substitution becomes less favorable.
fn substitution_cost(a: u8, b: u8) -> f64 {
(blosum62(a, a) as f64 - blosum62(a, b) as f64).max(0.0)
}

/// Region-weighted BLOSUM62 amino-acid distance between two aa sequences.
/// `weights` is per amino-acid position; positions beyond the shorter sequence
/// are ignored, and weights shorter than the sequences default to 1.0.
pub fn weighted_aa_distance(a: &[u8], b: &[u8], weights: &[f64]) -> f64 {
let n = a.len().min(b.len());
let mut d = 0.0;
for i in 0..n {
let w = weights.get(i).copied().unwrap_or(1.0);
d += w * substitution_cost(a[i], b[i]);
}
d
}

/// Generate a "mature" target: the naive aa sequence with up to `m` random
/// single-residue substitutions to a different standard amino acid.
/// Deterministic for the given `rng` state.
pub fn make_mature_target(naive_aa: &[u8], m: u32, rng: &mut Rng) -> Vec<u8> {
let mut target = naive_aa.to_vec();
if target.is_empty() {
return target;
}
for _ in 0..m {
// Unbiased uniform draws (Lemire `range_u32`), matching the engine's
// RNG convention rather than `% len` modulo bias.
let pos = rng.range_u32(target.len() as u32) as usize;
let cur = target[pos];
// Pick a standard amino acid uniformly among the 19 != cur (no skew).
let pick = match AA_ORDER.iter().position(|&x| x == cur) {
Some(ci) => {
let r = rng.range_u32(19) as usize;
AA_ORDER[if r >= ci { r + 1 } else { r }]
}
None => AA_ORDER[rng.range_u32(20) as usize],
};
target[pos] = pick;
}
target
}

/// Translate a `Simulation` to its amino-acid sequence by concatenating the
/// in-frame translation of each region (respecting each region's frame phase),
/// in biological order. Used to score a cell's affinity to a target antigen.
pub fn sim_to_aa(sim: &Simulation) -> Vec<u8> {
let mut aa = Vec::new();
for region in sim.sequence.regions.iter() {
let rail = compute_codon_rail(region, &sim.pool);
aa.extend_from_slice(&rail.amino_acids);
}
aa
}

/// Affinity-selection model: maps a cell's amino-acid sequence to an affinity in
/// (0, 1] (1 = identical to target) and to a fitness multiplier for its offspring
/// rate. `selection_strength == 0` makes fitness identically 1 (neutral).
#[derive(Clone, Debug)]
pub struct AffinityModel {
target_aa: Vec<u8>,
aa_weights: Vec<f64>,
beta: f64,
selection_strength: f64,
/// Affinity of the founder; fitness is measured relative to this baseline so
/// the founder has fitness ~1 and cells that improve on it exceed 1.
founder_affinity: f64,
}

impl AffinityModel {
/// Build a model. `founder_aa` is the naive founder's aa sequence; its
/// affinity becomes the fitness baseline.
pub fn new(
target_aa: Vec<u8>,
aa_weights: Vec<f64>,
beta: f64,
selection_strength: f64,
founder_aa: &[u8],
) -> Self {
let founder_affinity =
(-beta * weighted_aa_distance(founder_aa, &target_aa, &aa_weights)).exp();
Self {
target_aa,
aa_weights,
beta,
selection_strength,
founder_affinity,
}
}

/// Affinity in (0, 1]: `exp(-beta * region-weighted BLOSUM distance to target)`.
pub fn affinity_value(&self, aa: &[u8]) -> f64 {
(-self.beta * weighted_aa_distance(aa, &self.target_aa, &self.aa_weights)).exp()
}

/// Fitness from an already-computed affinity value (avoids re-translation).
pub fn fitness_from_affinity(&self, affinity_value: f64) -> f64 {
(1.0 + self.selection_strength * (affinity_value - self.founder_affinity)).max(0.0)
}

/// Fitness multiplier for offspring rate: `1 + strength * (affinity - founder_affinity)`,
/// clamped at 0. `selection_strength == 0` ⇒ always 1 (neutral).
pub fn fitness(&self, aa: &[u8]) -> f64 {
self.fitness_from_affinity(self.affinity_value(aa))
}
}

#[cfg(test)]
mod tests {
use super::*;
use crate::ir::{Nucleotide, NucHandle, Region, Segment, Simulation};
use crate::rng::Rng;

fn aa_founder(seq: &[u8]) -> Simulation {
let mut sim = Simulation::new();
for (i, b) in seq.iter().enumerate() {
let (next, _) = sim.with_nucleotide_pushed(
Nucleotide::germline(*b, i as u16, Segment::V));
sim = next;
}
sim.with_region_added(Region::new(Segment::V, NucHandle::new(0), NucHandle::new(seq.len() as u32)))
}

#[test]
fn sim_to_aa_translates_in_frame() {
// 8 'A' bases -> codons AAA, AAA (last 2 ignored) -> "KK" (Lys, Lys)
let sim = aa_founder(b"AAAAAAAA");
assert_eq!(sim_to_aa(&sim), b"KK".to_vec());
}

#[test]
fn affinity_value_is_one_at_target() {
let m = AffinityModel::new(b"KK".to_vec(), vec![1.0; 2], 1.0, 1.0, b"KK");
assert!((m.affinity_value(b"KK") - 1.0).abs() < 1e-9);
}

#[test]
fn affinity_value_decreases_with_distance() {
let m = AffinityModel::new(b"KK".to_vec(), vec![1.0; 2], 1.0, 1.0, b"KK");
let near = m.affinity_value(b"KK");
let far = m.affinity_value(b"WW");
assert!(far < near, "far {far} should be < near {near}");
assert!(far > 0.0);
}

#[test]
fn fitness_is_neutral_when_strength_zero() {
// selection_strength = 0 => fitness == 1.0 for ANY sequence
let m = AffinityModel::new(b"KK".to_vec(), vec![1.0; 2], 1.0, 0.0, b"NN");
assert!((m.fitness(b"KK") - 1.0).abs() < 1e-9);
assert!((m.fitness(b"WW") - 1.0).abs() < 1e-9);
assert!((m.fitness(b"NN") - 1.0).abs() < 1e-9);
}

#[test]
fn fitness_rewards_closer_than_founder_penalizes_farther() {
// founder = "NN" (far from target "KK"); strength 1.
let m = AffinityModel::new(b"KK".to_vec(), vec![1.0; 2], 1.0, 1.0, b"NN");
// a cell AT the target is fitter than the founder baseline (fitness > 1)
assert!(m.fitness(b"KK") > 1.0);
// a cell equal to the founder has fitness exactly 1 (baseline)
assert!((m.fitness(b"NN") - 1.0).abs() < 1e-9);
// fitness never goes negative
assert!(m.fitness(b"WWWWWWWW") >= 0.0);
}

#[test]
fn blosum62_known_entries() {
assert_eq!(blosum62(b'A', b'A'), 4);
assert_eq!(blosum62(b'W', b'W'), 11);
assert_eq!(blosum62(b'C', b'C'), 9);
assert_eq!(blosum62(b'A', b'R'), -1);
assert_eq!(blosum62(b'W', b'C'), -2);
assert_eq!(blosum62(b'D', b'E'), 2);
assert_eq!(blosum62(b'R', b'A'), blosum62(b'A', b'R'));
let _ = blosum62(b'*', b'A');
let _ = blosum62(b'X', b'X');
}

#[test]
fn weighted_distance_zero_for_identical() {
let a = b"ACDEW";
let w = vec![1.0; 5];
assert!(weighted_aa_distance(a, a, &w).abs() < 1e-9);
}

#[test]
fn weighted_distance_increases_with_substitutions_and_respects_weights() {
let a = b"AAAAA";
let b = b"AAAAW";
let flat = vec![1.0; 5];
let d_flat = weighted_aa_distance(a, b, &flat);
assert!(d_flat > 0.0);
let mut heavy = vec![1.0; 5];
heavy[4] = 10.0;
assert!(weighted_aa_distance(a, b, &heavy) > d_flat);
let mut zero = vec![1.0; 5];
zero[4] = 0.0;
assert!(weighted_aa_distance(a, b, &zero).abs() < 1e-9);
}

#[test]
fn mature_target_applies_m_substitutions_deterministically() {
let naive = b"ACDEFGHIKLMNPQRSTVWY".to_vec();
let mut rng = Rng::new(42);
let t = make_mature_target(&naive, 3, &mut rng);
assert_eq!(t.len(), naive.len());
let diffs = naive.iter().zip(&t).filter(|(a, b)| a != b).count();
assert!(diffs >= 1 && diffs <= 3, "expected up to 3 aa substitutions, got {diffs}");
let mut rng2 = Rng::new(42);
assert_eq!(make_mature_target(&naive, 3, &mut rng2), t);
}
}
Loading
Loading