diff --git a/app/src/explore/fasta-prep-client.ts b/app/src/explore/fasta-prep-client.ts
index 86bc593b..f9387908 100644
--- a/app/src/explore/fasta-prep-client.ts
+++ b/app/src/explore/fasta-prep-client.ts
@@ -83,7 +83,13 @@ function describeBundleFailure(status: number): string {
 }
 
 /** @public */
-export type FastaPrepStage = 'queued' | 'embedding' | 'projecting' | 'annotating' | 'bundling';
+export type FastaPrepStage =
+  | 'queued'
+  | 'embedding'
+  | 'projecting'
+  | 'annotating'
+  | 'bundling'
+  | 'computing_statistics';
 
 /** @public */
 export interface FastaPrepOptions {
diff --git a/app/src/explore/runtime.ts b/app/src/explore/runtime.ts
index 68b07fb6..5171ec0b 100644
--- a/app/src/explore/runtime.ts
+++ b/app/src/explore/runtime.ts
@@ -191,8 +191,18 @@ export async function initializeExploreRuntime(): Promise<ExploreController> {
               lastProgress = 70;
               overlayController.update(true, lastProgress, 'Preparing FASTA…', 'Projecting…');
             } else if (stage === 'bundling') {
+              stopCreep();
               lastProgress = 90;
               overlayController.update(true, lastProgress, 'Preparing FASTA…', 'Bundling…');
+            } else if (stage === 'computing_statistics') {
+              stopCreep();
+              lastProgress = 95;
+              overlayController.update(
+                true,
+                lastProgress,
+                'Preparing FASTA…',
+                'Computing statistics…',
+              );
             }
           },
         });
diff --git a/openspec/changes/add-projection-statistics/.openspec.yaml b/openspec/changes/add-projection-statistics/.openspec.yaml
new file mode 100644
index 00000000..a4ac4d78
--- /dev/null
+++ b/openspec/changes/add-projection-statistics/.openspec.yaml
@@ -0,0 +1,2 @@
+schema: spec-driven
+created: 2026-06-23
diff --git a/openspec/changes/add-projection-statistics/design.md b/openspec/changes/add-projection-statistics/design.md
new file mode 100644
index 00000000..e85da105
--- /dev/null
+++ b/openspec/changes/add-projection-statistics/design.md
@@ -0,0 +1,266 @@
+## Context
+
+ProtSpace prepares data with the `protspace` Python package (`embed → project → annotate → bundle`)
+and renders the resulting `.parquetbundle` in the `protspace_web` frontend. The bundle is Apache
+Parquet tables concatenated with a `---PARQUET_DELIMITER---` separator:
+
+1. `selected_annotations` (in-memory Arrow key `protein_annotations`) — identifier + annotations
+2. `projections_metadata` — `projection_name` (e.g. `UMAP_2`), `dimensions`, `info_json` (reducer
+   params incl. the distance `metric`)
+3. `projections_data` — long table of `(projection_name, identifier, x, y, z)`
+4. `settings` (optional) — annotation styles / display config
+
+The bundle **does not retain the high-dimensional embeddings** — only reduced coordinates. Bundle
+I/O lives in `data/io/bundle.py`: `write_bundle(tables, path, settings=None)`,
+`read_bundle → (parts[:3], settings)` (a **2-tuple**, unpacked as `_, settings = read_bundle(...)`
+at two sites in `utils/add_annotation_style.py`), `extract_bundle_to_dir`, and
+`replace_settings_in_bundle` (used by `protspace style`, rebuilds `parts[:3] + settings`). All hard-
+fail outside 3–4 parts. The frontend reader
+(`packages/core/src/components/data-loader/utils/bundle.ts`) throws unless 2 or 3 delimiters and
+keys off **delimiter count** (not part contents); `cli/app.py:_register_commands()` registers
+commands from a **hardcoded list** (no auto-discovery).
+
+Issue #219 (from #216) asks for statistics to interpret a projection. Owner's metric list: elbow
+(optimal cluster count), silhouette, Davies–Bouldin, Calinski–Harabasz. A prototype,
+`ProtSpaceExtractor` (`~/Downloads/ProtSpaceExtractor_v1.7.4_mod 1.py`), performs proximity/
+neighborhood mining; its distance-to-chord knee algorithm is reused (re-implemented, not imported).
+
+### Decisions taken during brainstorming + two adversarial review rounds
+
+- **Deliverable:** the MVP metric set now, expandable architecture.
+- **Compute home:** the `protspace` core package (shared by CLI + web bundle).
+- **Timing:** baked at prep time; interactivity deferred.
+- **Scope (trimmed):** **per-projection only** — `cluster_validity` (unsupervised KMeans+elbow) +
+  `faithfulness` (kNN-overlap, trustworthiness/continuity vs the embedding). Embedding-space
+  cluster-validity and annotation-feature label sources are deferred (non-breaking expansions).
+- **Metric set:** owner's four **plus** projection-faithfulness (#216's "competitive" framing).
+- **Carriage:** a dedicated fifth parquet part.
+- **Sequencing:** infra-only — baked, **not rendered** (committed follow-up).
+- **Spec home:** this OpenSpec change; engine internals built in the `protspace` PR.
+
+## Goals / Non-Goals
+
+**Goals**
+
+- Per projection: cluster-validity (elbow K + silhouette/DB/CH on the coords) and faithfulness
+  (kNN-overlap, trustworthiness/continuity of the projection vs its source embedding), baked in.
+- A registry where a new **scalar** statistic, label source, or space is a small unit plus a
+  registry entry — no driver rewrite, no bundle schema migration.
+- Backward-compatible carriage across **all** readers/writers, including `protspace style`.
+- Statistics are **secondary**: computing them never fails a prep job and never costs the bundle.
+- Trustworthy, reproducible numbers (seeded; bounded cost; degenerate cases guarded; provenance
+  recorded).
+
+**Non-Goals**
+
+- Embedding-space cluster-validity; annotation-feature label sources; interactive recompute; the
+  full `ProtSpaceExtractor` workflow; frontend rendering; other metric sets. Seams left for the
+  scalar expansions; the rest are explicit future work.
+
+## Decisions
+
+### D1. New `protspace.stats` package, generalized `Statistic` contract, lazy registry
+
+A scalar `(X, labels) -> float` metric is too narrow — the elbow emits a vector + chosen `K`,
+faithfulness needs **two** point sets and **no** labels, and statistics take parameters. So:
+
+```python
+# stats/base.py
+@dataclass
+class StatContext:
+    space_kind: str            # "projection" (MVP); "embedding" later
+    space_name: str            # == projections_metadata.projection_name (joinable)
+    coords: np.ndarray         # (n, d) projection coordinates
+    embedding: np.ndarray | None       # (n, D) source embedding, id-joined to coords
+    embedding_name: str | None         # which embedding set produced this projection
+    high_dim_metric: str               # reducer's own metric (from info_json); cosine fallback
+    ids: list[str]
+    rng_seed: int
+    params: dict               # k, kmax, sample_size, sample_threshold, ...
+
+class Statistic(Protocol):
+    family: str                # "cluster_validity" | "faithfulness"
+    requires_embedding: bool
+    def compute(self, ctx: StatContext) -> list[StatRow]: ...
+```
+
+`stats/__init__.py` holds a lazy `STATISTICS` registry (mirroring `REDUCERS`) and
+`compute_statistics(...)`. **All sklearn imports inside `stats/` are function-local** to preserve the
+package's ~50 ms CLI startup (the registry and `cli/stats.py` registration must not import compute
+modules at module load).
+
+**Honest scope of the registry:** it is the seam for **scalar** statistics, label sources, and
+spaces (new rows, no schema change). The `ProtSpaceExtractor`'s pair/edge/set/graph outputs do
+**not** fit this row model and are **not** claimed to; they get their own future typed bundle parts.
+
+`cluster_validity` is split internally into a `Clusterer` (KMeans+elbow → labels + diagnostics) and
+the sklearn validity scorers that consume those labels, so a future `annotation_feature` grouping or
+`hdbscan` clusterer slots in without touching the scorers.
+
+### D2. Output is one tidy long-format table with split, joinable keys
+
+`statistics.parquet` — **eight** columns, one statistic value per row:
+
+| column        | type   | example                                                                                                           |
+| ------------- | ------ | ----------------------------------------------------------------------------------------------------------------- |
+| `space_kind`  | string | `projection`                                                                                                      |
+| `space_name`  | string | `UMAP_2` (== `projections_metadata.projection_name`)                                                              |
+| `stat_family` | string | `cluster_validity`, `faithfulness`                                                                                |
+| `label_kind`  | string | `kmeans_elbow`, `none`                                                                                            |
+| `metric`      | string | `silhouette`, `davies_bouldin`, `calinski_harabasz`, `n_clusters`, `knn_overlap`, `trustworthiness`, `continuity` |
+| `metric_kind` | string | `validity`, `meta`, `faithfulness`                                                                                |
+| `value`       | double | `0.42`                                                                                                            |
+| `extra_json`  | string | `{"k":15,"metric":"cosine","seed":42,"sampled":false,"knee_confidence":"high","embedding":"prot_t5"}`             |
+
+`metric_kind` is a **column** (not buried in JSON) so consumers can aggregate validity scores without
+folding in `n_clusters` (`metric_kind="meta"`). New scalar statistics add **rows**; a future
+annotation label source carries its column name **inside `extra_json`** (`{"label_name":"family"}`),
+not as a new column — the eight columns stay frozen. `space_name` equals
+`projections_metadata.projection_name` so the table is joinable without string-parsing. Cluster rows
+carry `label_kind="kmeans_elbow"`; faithfulness rows carry `label_kind="none"`.
+
+### D3. Scope = projections only; both statistic families per projection
+
+For each **projection** space:
+
+- **cluster_validity** — `coords` only: KMeans sweep → elbow `K` → silhouette/DB/CH at `K`.
+- **faithfulness** — `coords` + its **source** `embedding`: neighbourhood preservation.
+
+`ReductionPipeline.run` holds `embedding_sets` (high-dim) and `all_reductions` (coords, each knowing
+its `emb_set.name`) together before `save_output`; the driver runs there. **Annotations are not
+needed**, removing the `identifier`/`protein_id`/`sp|…` join hazards. Deferred expansions stay
+non-breaking: `space_kind:"embedding"` and `label_kind:"annotation"` are new **rows**.
+
+### D4. Elbow — distance-to-chord knee, argmax **index** → K, confidence-guarded
+
+Sweep `K ∈ [2, K_max]` (`K_max = min(round(sqrt(n)), 50)`), record inertia, compute the perpendicular
+deviation of each point from the first→last chord of the (normalised) inertia curve, take
+`k_idx = argmax(deviation)`, select `K = k_range[k_idx]`. **Implement the deviation directly** — the
+algorithm originates in `ProtSpaceExtractor._knee_distance_to_chord`, but that function returns the
+curve _y-value_ (a distance cutoff), so we reuse only the argmax-index logic, not its return. The
+prototype's `_knee_median_jump` ensemble half is intentionally **not** used (tuned for sorted-
+distance distributions, not inertia). KMeans runs on **2-D/3-D coords** where the knee is reliable.
+Guards: if normalised max deviation is small (≈ linear), record `knee_confidence:"low"`; also report
+the silhouette-optimal K over the sweep so consumers can cross-check. KMeans uses
+`random_state=rng_seed`, `n_init ≥ 10`. Emit `n_clusters` with `metric_kind="meta"`.
+
+### D5. Faithfulness — neighbourhood preservation, embedding vs projection (corrected)
+
+- **kNN-overlap@k**: mean neighbour-set overlap (embedding vs projection) per point; default
+  `k = 15` (recorded). Built with a neighbour query, `O(n·k)` after the index.
+- **trustworthiness** and **continuity**: `sklearn.manifold.trustworthiness` computes
+  trustworthiness; **continuity is the same function with arguments swapped** (there is no
+  `continuity` in sklearn). `trustworthiness(A, B, metric=m)` applies `m` to `A`. So the high-dim
+  metric must attach to whichever call has the **embedding** as the first arg:
+  - trustworthiness = `trustworthiness(embedding, coords, metric=high_dim_metric)`
+  - continuity = `trustworthiness(coords, embedding, metric=projection_metric)` (Euclidean)
+    The metric is recorded **per row** (trustworthiness and continuity may differ).
+
+**High-dim metric default = the reducer's own metric**, read from
+`projections_metadata.info_json` / `ReducerParams.metric` (Euclidean by default; PCA/MDS are
+Euclidean), falling back to cosine only when unknown — so faithfulness scores the same neighbourhood
+graph the reducer optimised, and cross-method comparison stays valid. Recorded in `extra_json`.
+
+**Cost guard (critical).** `sklearn.manifold.trustworthiness` densifies a full `n×n` distance matrix
+over the high-dim vectors — there is **no** ANN path. Above a threshold (default 5000, shared with
+silhouette), trustworthiness/continuity/kNN-overlap run on a **fixed-seed subsample** derived from
+`(rng_seed, sorted ids)` so **all projections of the same embedding use the identical subset**
+(apples-to-apples); record `sampled`/`sample_size`. Beyond a hard ceiling, skip with a recorded
+`skipped:"n_too_large"` row. This applies on the uncapped CLI `prepare --stats` path, not just the
+1500-capped web path.
+
+**Projection→embedding mapping.** Multi-embedding runs put projections from several embeddings into
+one `projections_data.parquet`, named per embedding (`ProtT5 — UMAP 2`). Each projection's
+faithfulness must use **its own** source embedding. In-process this comes from the reduction's
+`emb_set.name`; the discrete path records the source embedding in provenance and the standalone
+`stats` command accepts multiple `-i` and matches by embedding name, **skipping** projections whose
+embedding isn't supplied (recorded). Embedding rows are aligned to coords by an **id intersection
+join** (not a positional zip — `load_h5` drops NaN rows and `cli/project.py` writes all coords
+against `embedding_sets[0].headers`), asserting/ recording any id-set mismatch.
+
+### D6. Bundle carriage — dedicated fifth part, ALL readers/writers, no arity breaks
+
+Layout `core(3) + settings? + statistics?`. "Empty settings slot" = **zero bytes** (matches the
+existing `if len(parts)==4 and parts[3]` truthiness check). Readers/writers branch on the **fourth
+part's emptiness**, not raw count.
+
+- `data/io/bundle.py`: `write_bundle(tables, path, settings=None, statistics=None)`. **Keep
+  `read_bundle`'s 2-tuple signature** (two `add_annotation_style.py` callers unpack it) and add a
+  separate `read_statistics_from_bundle(path) -> bytes | None`; `extract_bundle_to_dir` writes
+  `STATISTICS_FILENAME` when present. Accept 3–5 parts.
+- **`replace_settings_in_bundle`** (the `protspace style` path) — rewrite with explicit part-count
+  cases so it never drops stats: `len==3 → core+settings`; `len==4 → settings at [3], no stats,
+replace [3]`; `len==5 → stats at [4], emit core + new_settings + parts[4]`, using a zero-byte
+  settings slot when inserting settings ahead of stats. Verify `protspace style` round-trips both a
+  settings+stats and a stats-only (empty-settings) 5-part bundle.
+- `base_processor.save_output` / `create_output`: thread an optional statistics table.
+- `cli/bundle.py`: optional `-s/--statistics`.
+- Frontend `bundle.ts`: accept **4 delimiters** (5 parts); **branch on part-4 byte length** — empty
+  ⇒ settings absent, part 5 = stats; non-empty ⇒ part 4 = settings, part 5 = stats. Guard
+  `extractSettings` to return `null` on a zero-byte buffer **before** `assertValidParquetMagic`.
+  Skip part 5. **Invert** `bundle.test.ts`'s "should reject bundle with 4 delimiters (5 parts)"
+  test; widen the `"Expected 2 or 3 delimiters"` guard; update the layout comment in
+  `packages/utils/src/parquet/bundle-writer.ts`. The frontend _writer_ (`createParquetBundle`) is
+  unchanged and drops stats on re-export — a documented MVP limitation.
+
+**Why a dedicated part, not `settings` JSON:** statistics are results, not display state; `settings`
+is rewritten by `style`/publish and would stale or drop them.
+
+### D7. Wiring
+
+- **`prepare` path (engine, CLI users):** `ReductionPipeline.run` calls `compute_statistics(
+embedding_sets, all_reductions, config)` when enabled; the table is threaded through
+  `create_output`/`save_output` into `write_bundle` in one pass. `cli/prepare.py` adds
+  `--stats/--no-stats` (default on); the faithfulness cost guard (D5) bounds the work.
+- **Discrete path (engine):** `protspace stats -i emb.h5 [-i emb2.h5 ...] -p project_dir
+-o statistics.parquet` — loads the H5 embeddings + a **new projections-parquet loader** that turns
+  `projections_data.parquet` into `{projection_name: (coords ndarray, ids)}`, id-joins each
+  projection to its source embedding (by name + id intersection), runs the driver. **No `-a`.**
+  Register `stats` in `cli/app.py:_register_commands()` (hardcoded list — explicit edit).
+- **Prep service (web):** to make stats truly non-fatal against the single shared
+  `pipeline_timeout_seconds`, **produce the core bundle first** (within the parent budget), then run
+  `protspace stats` as a best-effort step under its **own** `asyncio.timeout(PREP_STATS_TIMEOUT_
+SECONDS)`, catching every failure/timeout **locally** (never reaching the parent handler) and, on
+  success, re-running `bundle` with `-s` to fold in the fifth part. A stats failure/timeout leaves
+  the already-shipped stats-less bundle. Gated by `PREP_STATS` (default on) **and** a one-time
+  `protspace stats --help` feature probe (stale install ⇒ skip). Emits an SSE `computing_statistics`
+  stage and adds it to the frontend `FastaPrepStage` union. The `protspace` floor is raised to the
+  release that introduces `protspace stats` (filled in once the engine PR cuts the version).
+
+### D8. Robustness, determinism, guards
+
+- **Failure isolation:** the driver catches per-statistic exceptions and omits that row; total
+  failure returns an empty report and never raises.
+- **Determinism:** one `rng_seed` (default 42) threads into KMeans (`random_state`, `n_init ≥ 10`)
+  and all subsampling; recorded per row. Faithfulness subsamples a shared id subset across a
+  projection group (D5). Projection-space stats are only as reproducible as the (already-seeded)
+  reducer.
+- **Cost:** silhouette and trustworthiness/continuity are `O(n²)`; both honour the sampling
+  threshold/ceiling (D5). The 1500 web cap keeps the web path cheap; the CLI path relies on the
+  guard.
+- **Degenerate guards:** silhouette needs `2 ≤ K ≤ n−1`; DB/CH require ≥ 2 members per cluster (else
+  omit); `n < 3` → no rows.
+
+## Risks / Trade-offs
+
+- **Faithfulness cost.** The densified `n×n` is the dominant risk; mitigated by the sampling
+  threshold + hard ceiling + skip row, the web cap, and the prep step's own timeout. `--stats`
+  default-on is safe only because of the guard.
+- **Cross-repo coordination.** Engine releases before web consumes it; the frontend reader is
+  backward compatible and independent, and the prep step is version-floored + feature-probed.
+- **Bundle positional fragility.** The zero-byte-settings convention is encoded in `write_bundle`,
+  enforced across all writers (incl. `replace_settings_in_bundle` by part-count case), with both
+  readers branching on part-4 emptiness; covered by round-trip tests in both repos.
+- **No user-visible value yet.** Infra-only; rendering is a committed follow-up; re-export drops the
+  part until then (documented).
+- **Faithfulness metric choices.** `k` and the high-dim metric drive the numbers; both are recorded,
+  and the metric defaults to the reducer's own for validity of cross-method comparison.
+
+## Open Questions
+
+- Default `k` (15), `K_max` (`min(round(sqrt(n)), 50)`), sampling threshold (5000) and hard ceiling —
+  confirm against representative datasets.
+- Engine release version for the `protspace` floor — filled once the engine PR's semantic-release
+  version is cut.
+- Whether to expose parsed statistics on the frontend `BundleExtractionResult` now (forward-compat)
+  or skip part 5 entirely — default skip-but-tolerate.
diff --git a/openspec/changes/add-projection-statistics/proposal.md b/openspec/changes/add-projection-statistics/proposal.md
new file mode 100644
index 00000000..92ec1560
--- /dev/null
+++ b/openspec/changes/add-projection-statistics/proposal.md
@@ -0,0 +1,103 @@
+## Why
+
+ProtSpace renders dimensionality-reduction projections but gives users **no quantitative way to
+judge them**: _Is this projection meaningful? How many clusters are there? Can I trust the geometry,
+or did the reduction distort it?_ Issue #216 catalogued the statistics ProtSpace needs to be
+competitive; this change (issue #219) implements the MVP.
+
+Today the preparation pipeline (`embed → project → annotate → bundle`) ships coordinates and
+annotations with **zero quality metrics**, so interpretation is purely visual. This change adds a
+**projection-statistics** capability computed at prep time and baked into the `.parquetbundle`,
+covering two complementary questions per projection:
+
+- **Cluster structure** — KMeans with an **elbow** estimate of the optimal cluster count, scored by
+  **silhouette**, **Davies–Bouldin**, and **Calinski–Harabasz** (issue #219's metric set).
+- **Projection faithfulness** — **kNN-overlap** and **trustworthiness / continuity** between the
+  original embedding and the projection, i.e. how much the reduction preserved or distorted the
+  neighbourhood structure. These are the metrics that most directly tell a user whether to trust the
+  map (#216's "competitive" framing).
+
+It is built as an **expandable subsystem** so further statistics can be added without rework. The
+honest boundary of that claim (see `design.md`): the registry + tidy long-format table make new
+**scalar** statistics, label sources, and spaces cheap to add (new rows, no schema change); the
+richer pairwise/neighborhood analyses prototyped in the standalone `ProtSpaceExtractor` script
+(query↔reference proximity, cross-method consensus, Top-N mining) are pair/edge/set-shaped and will
+ride **their own future typed bundle parts**, reusing the same registry pattern but not this table.
+
+## What Changes
+
+Two PRs: the engine (`protspace`) first, then this repo (`protspace_web`) consumes it.
+
+- **Engine — `protspace` package (separate PR):** a new `protspace.stats` module with a generalized
+  `Statistic` contract (each statistic declares the inputs it needs — projection coords, embeddings,
+  and/or labels — and returns one or more result rows) and a light registry. MVP statistics, all
+  **per projection**:
+  - `cluster_validity`: KMeans sweep + elbow (knee on the inertia curve) selecting `K`; then
+    silhouette, Davies–Bouldin, Calinski–Harabasz on the projection coordinates at that `K`.
+  - `faithfulness`: kNN-overlap and trustworthiness / continuity between the embedding and the
+    projection.
+    Wired into `ReductionPipeline.run` — the one stage holding embeddings **and** projections —
+    behind a `--stats/--no-stats` flag, and exposed as `protspace stats -i emb.h5 -p project_dir
+-o statistics.parquet` (no annotations needed for the MVP).
+- **Bundle format:** an optional **fifth part** `statistics.parquet`. Layout
+  `core(3) + settings? + statistics?`; when statistics is present without settings, a **zero-byte
+  settings slot** keeps the fifth position unambiguous. **All** bundle readers/writers are updated:
+  `write_bundle`, `read_bundle`, `extract_bundle_to_dir`, **and `replace_settings_in_bundle`** (the
+  `protspace style` path, which today would silently drop a fifth part).
+- **Prep service (`services/protspace-prep`):** the core bundle is **produced first**; a `stats` step
+  then runs `protspace stats` best-effort under its **own nested timeout**, caught locally so it can
+  never reach the parent handler, re-bundling with `-s` on success. On stats failure or timeout the
+  already-shipped stats-less bundle stands (the job never fails for a secondary artifact). The
+  `protspace` dependency floor is raised to the stats-bearing release and the step feature-probes the
+  subcommand before use.
+- **Frontend reader (`@protspace/utils` + core data-loader):** accept a five-part bundle **without
+  error** (the existing test asserting five-part bundles are _rejected_ is inverted). The statistics
+  part is parsed-but-unused; rendering is a committed follow-up, out of scope here.
+
+**Scope (MVP):** per-projection `cluster_validity` (unsupervised/elbow) + `faithfulness`, baked at
+prep time, carried in the bundle, not yet rendered.
+
+**Non-goals (explicit, non-breaking expansions):** embedding-space cluster-validity; annotation-
+feature label sources; interactive / on-demand recompute; the broader `ProtSpaceExtractor` analyses
+(future typed bundle parts); frontend rendering of the statistics. The registries and long-format
+table leave seams for the scalar expansions; the others are acknowledged as new parts/work.
+
+## Capabilities
+
+### New Capabilities
+
+- `projection-statistics`: per-projection cluster-validity and faithfulness statistics computed at
+  preparation time and carried in the `.parquetbundle` as an optional statistics part. Covers the
+  bundle-boundary data contract (a stable tidy long-format table), production by the prep pipeline,
+  the backward-compatible fifth-part layout (including the `protspace style` round-trip),
+  reproducibility and robustness guards, and reader tolerance.
+
+## Impact
+
+- **Upstream (`protspace` repo, separate PR):**
+  - New `src/protspace/stats/` package (generalized `Statistic` contract + registry, cluster-validity
+    statistics, faithfulness statistics, driver).
+  - `data/io/bundle.py`: `write_bundle` / `read_bundle` / `extract_bundle_to_dir` extended to a fifth
+    statistics part (`STATISTICS_FILENAME`), **and `replace_settings_in_bundle` updated to preserve a
+    trailing statistics part** so `protspace style` is non-lossy.
+  - `utils/add_annotation_style.py` (the `style` command) verified/tested against five-part bundles.
+  - `data/processors/base_processor.py` (`create_output`/`save_output`) and `pipeline.py`
+    (`ReductionPipeline.run`) thread an optional statistics table; embeddings (already in memory) feed
+    faithfulness.
+  - `cli/prepare.py` gains `--stats/--no-stats`; new `cli/stats.py`; `cli/bundle.py` gains
+    `-s/--statistics`.
+  - Tests with **known-answer numeric fixtures** (blob separation; faithfulness on identity vs random
+    projections) and a label-permutation alignment test.
+  - No new dependency: scikit-learn (KMeans, silhouette/DB/CH, `manifold.trustworthiness`) is already a
+    core dep; the elbow knee is ported from `ProtSpaceExtractor` (distance-to-chord, **argmax index →
+    K**).
+- **This repo (`protspace_web`):**
+  - `services/protspace-prep/src/protspace_prep/pipeline.py` (+ `config.py`): the `stats` step with its
+    own timeout, failure isolation, version probe, and an SSE `computing_statistics` stage;
+    `services/protspace-prep/tests/`.
+  - `packages/core/src/components/data-loader/utils/bundle.ts` (+ `packages/utils/src/parquet/*`):
+    accept the optional fifth part; **invert** `bundle.test.ts`'s five-part-rejection test; document
+    that frontend re-export (`createParquetBundle`) currently drops the part.
+- **Data-format change:** additive, backward compatible. Existing three- and four-part bundles read
+  and write unchanged.
+- **API / dependencies:** no HTTP API change; no new dependencies.
diff --git a/openspec/changes/add-projection-statistics/specs/projection-statistics/spec.md b/openspec/changes/add-projection-statistics/specs/projection-statistics/spec.md
new file mode 100644
index 00000000..b4873996
--- /dev/null
+++ b/openspec/changes/add-projection-statistics/specs/projection-statistics/spec.md
@@ -0,0 +1,178 @@
+## ADDED Requirements
+
+### Requirement: Per-projection statistics are computed at preparation time
+
+The preparation pipeline SHALL compute, for each projection, both **cluster-validity** and
+**projection-faithfulness** statistics during data preparation and embed them in the produced
+`.parquetbundle` when statistics are enabled (the default). Cluster-validity SHALL include the
+silhouette score, the Davies–Bouldin index, the Calinski–Harabasz index, and the elbow-estimated
+optimal cluster count, and its rows SHALL carry `label_kind` `kmeans_elbow`. Faithfulness SHALL
+include kNN-overlap and trustworthiness/continuity of the projection relative to its source
+embedding.
+
+#### Scenario: Statistics are produced by default
+
+- **WHEN** a bundle is prepared from embeddings with statistics enabled
+- **THEN** the bundle contains a statistics part with, for every projection, cluster-validity rows
+  (silhouette, davies_bouldin, calinski_harabasz, n_clusters) and faithfulness rows (knn_overlap,
+  trustworthiness, continuity)
+
+#### Scenario: Statistics can be disabled
+
+- **WHEN** preparation is run with statistics disabled (`--no-stats` / prep flag off)
+- **THEN** no statistics part is produced and the bundle remains a valid three- or four-part bundle
+
+### Requirement: The elbow estimates the optimal cluster count from the inertia knee
+
+The unsupervised cluster source SHALL sweep KMeans across a bounded range of cluster counts on the
+projection coordinates and select the optimal count using the **index** of the maximum
+perpendicular deviation of the inertia curve from its first-to-last chord (not a distance value). It
+SHALL report the selected count as `n_clusters` with `metric_kind` `meta`, label points accordingly
+so the validity metrics score that clustering, and record a knee-confidence indicator and the
+silhouette-optimal count for cross-checking.
+
+#### Scenario: Elbow recovers a known cluster count
+
+- **WHEN** the data forms `k` well-separated clusters
+- **THEN** the elbow-estimated `n_clusters` is within one of `k` and the silhouette of the selected
+  clustering is high relative to an overlapping-cluster baseline
+
+#### Scenario: No clear knee is flagged, not faked
+
+- **WHEN** the inertia curve is approximately linear (no distinct knee)
+- **THEN** the selected count is still emitted but marked with low knee confidence in `extra_json`
+
+### Requirement: Faithfulness compares embedding and projection neighbourhoods
+
+Faithfulness statistics SHALL take a projection and **its source embedding** as input and measure how
+well the projection preserves each point's neighbourhood. Continuity SHALL be computed as
+trustworthiness with the embedding and projection arguments swapped, and the high-dimensional
+distance metric SHALL be applied to whichever computation has the embedding as its primary input.
+The high-dimensional metric SHALL default to the projection's own reducer metric (Euclidean by
+default), falling back to cosine only when unknown, and SHALL be recorded per row. These statistics
+carry `metric_kind` `faithfulness` and `label_kind` `none`, and SHALL bound their cost on large
+inputs (sampling a shared subset above a threshold, skipping with a recorded marker beyond a hard
+ceiling) since trustworthiness materialises a full pairwise distance matrix.
+
+#### Scenario: A faithful projection scores high
+
+- **WHEN** the projection preserves neighbourhoods (e.g. a near-identity reduction)
+- **THEN** kNN-overlap and trustworthiness are near their maximum
+
+#### Scenario: A distorting projection scores lower
+
+- **WHEN** the projection scrambles neighbourhoods (e.g. a random projection)
+- **THEN** kNN-overlap and trustworthiness are markedly lower, with the neighbourhood size and the
+  per-row distance metric recorded in `extra_json`
+
+#### Scenario: Each projection uses its own embedding
+
+- **WHEN** a run produces projections from more than one embedding
+- **THEN** each projection's faithfulness is computed against the embedding that produced it (matched
+  by name and id-intersection join), and any projection whose embedding is unavailable is skipped
+  with a recorded marker rather than scored against the wrong embedding
+
+#### Scenario: Large inputs are bounded
+
+- **WHEN** the number of points exceeds the sampling threshold
+- **THEN** faithfulness (and silhouette) are computed on a fixed-seed shared subsample with the
+  sample size recorded, and beyond a hard ceiling the statistic is skipped with a recorded marker
+  rather than exhausting memory
+
+### Requirement: Statistics are a stable tidy long-format table with joinable keys
+
+The statistics part SHALL be a tidy long-format table with a fixed eight-column schema: `space_kind`
+(string), `space_name` (string), `stat_family` (string), `label_kind` (string), `metric` (string),
+`metric_kind` (string), `value` (double), and `extra_json` (string), with one statistic value per
+row. `metric_kind` (`validity` | `meta` | `faithfulness`) SHALL be a column so consumers can
+aggregate validity scores without folding in meta rows such as `n_clusters`. `space_name` SHALL equal
+the corresponding `projections_metadata.projection_name` so the table is joinable without string-
+parsing. Adding a new scalar statistic, label source, or space SHALL add **rows** and SHALL NOT
+change the column schema; any per-source attribute (e.g. an annotation column name) SHALL be carried
+inside `extra_json`, not as a new column.
+
+#### Scenario: Each row is self-describing and joinable
+
+- **WHEN** a consumer reads the statistics part
+- **THEN** every row identifies its space (kind + name), family, label kind, metric, and metric kind,
+  carries a numeric value, and its `space_name` matches a `projection_name` in the projections metadata
+
+#### Scenario: Meta rows are separable from validity rows
+
+- **WHEN** a consumer aggregates cluster-validity scores
+- **THEN** it can exclude `n_clusters` by its `metric_kind` `meta` column without parsing `extra_json`
+
+#### Scenario: New statistics do not change the schema
+
+- **WHEN** a later expansion adds an embedding space or an annotation-feature label source
+- **THEN** it appears as additional rows (e.g. `space_kind` `embedding`, or `label_kind` `annotation`
+  with the source column in `extra_json`) under the same eight-column schema
+
+### Requirement: Statistics ride in the bundle as an optional fifth part, backward compatibly
+
+The `.parquetbundle` SHALL carry statistics as an optional fifth parquet part in the positional order
+`core(3) + settings? + statistics?`. When statistics are present but settings are absent, a zero-byte
+settings part SHALL occupy the fourth slot, and all readers and writers SHALL distinguish settings-
+present from settings-absent by the **fourth part's emptiness**, not by raw part count. All bundle
+readers and writers — including `read_bundle` (whose existing return shape is preserved so its callers
+do not break), `extract_bundle_to_dir`, and `replace_settings_in_bundle` (the styling path) — SHALL
+handle the fifth part; three- and four-part bundles SHALL retain their exact current meaning.
+
+#### Scenario: Legacy bundles are unaffected
+
+- **WHEN** a three-part (core only) or four-part (core + settings) bundle is read or rewritten
+- **THEN** it behaves exactly as before, with no statistics part
+
+#### Scenario: Five-part bundle round-trips
+
+- **WHEN** a five-part bundle (core + settings + statistics) is read
+- **THEN** projection, annotation, and settings data load normally and the statistics part is
+  recovered
+
+#### Scenario: Styling preserves statistics for both shapes
+
+- **WHEN** an annotation-styling / settings-replacement operation rewrites a statistics-bearing
+  bundle, whether it has display settings or only a zero-byte settings slot
+- **THEN** the statistics part is preserved (not dropped), with a valid settings part written and the
+  statistics part kept as the fifth
+
+### Requirement: The frontend reader tolerates the statistics part
+
+The frontend bundle reader SHALL load a five-part bundle without error in both the settings+statistics
+and the zero-byte-settings (statistics-only) shapes, distinguishing them by the fourth part's
+emptiness, and treating the statistics part as optional and parsed-but-unused. Its presence or absence
+SHALL NOT affect rendering of projections, annotations, or settings.
+
+#### Scenario: App loads a statistics-bearing bundle
+
+- **WHEN** the app loads a five-part bundle produced by the prep pipeline (with or without settings)
+- **THEN** the projection renders normally with no error and the statistics part is ignored for now
+
+### Requirement: Statistics computation is non-fatal, reproducible, and guarded
+
+Statistics are secondary to the core bundle. In the prep service the **core bundle SHALL be produced
+first**, and computing statistics — under its own bounded timeout caught locally — SHALL NOT fail the
+job, lose the bundle, or consume the budget the bundle needs; a stats failure or timeout SHALL leave
+the already-produced stats-less bundle in place. Computation SHALL be deterministic under a recorded
+seed (KMeans, silhouette sampling, and faithfulness subsampling), and SHALL guard expensive and
+degenerate cases: silhouette and faithfulness SHALL use a bounded shared sample above a size
+threshold and skip beyond a hard ceiling; clusters with fewer than two members SHALL be excluded from
+Davies–Bouldin / Calinski–Harabasz; and inputs too small to score (fewer than three points, a single
+cluster) SHALL yield no row rather than raising. Provenance (seed, neighbourhood size, distance
+metric, sampling, knee confidence, source embedding) SHALL be recorded in `extra_json`.
+
+#### Scenario: A computation failure does not fail the job or lose the bundle
+
+- **WHEN** statistics computation fails or times out in the prep service
+- **THEN** the job still succeeds and the core bundle (produced before the stats step) is delivered
+  without a statistics part
+
+#### Scenario: A stale engine degrades to skip
+
+- **WHEN** the installed `protspace` does not provide the `stats` subcommand
+- **THEN** the prep service detects this via a feature probe and skips statistics rather than failing
+
+#### Scenario: Results are reproducible
+
+- **WHEN** the same input is prepared twice with the same seed and projection
+- **THEN** the statistics values are identical, and each row records the seed used
diff --git a/openspec/changes/add-projection-statistics/tasks.md b/openspec/changes/add-projection-statistics/tasks.md
new file mode 100644
index 00000000..275f1f12
--- /dev/null
+++ b/openspec/changes/add-projection-statistics/tasks.md
@@ -0,0 +1,138 @@
+Two PRs: **A — upstream `protspace` engine** (sections 1–7), then **B — this repo `protspace_web`**
+(sections 8–10). TDD; every statistic ships a **known-answer numeric fixture**, not a "rows exist"
+check. All sklearn imports under `stats/` are **function-local** (preserve ~50 ms CLI startup).
+
+## 1. Engine — `protspace.stats` scaffolding & contract
+
+- [ ] 1.1 `src/protspace/stats/base.py`: `StatContext` (space_kind, space_name, coords, embedding,
+      embedding_name, high_dim_metric, ids, rng_seed, params); `Statistic` Protocol (`family`,
+      `requires_embedding`, `compute(ctx) -> list[StatRow]`); `StatRow` (space_kind, space_name,
+      stat_family, label_kind, metric, **metric_kind**, value, extra: dict); `StatsReport.to_arrow()`
+      → the **8-column** tidy table (`extra` → `extra_json`).
+- [ ] 1.2 `stats/__init__.py`: lazy `STATISTICS` registry (mirroring `REDUCERS`) +
+      `compute_statistics(embedding_sets, reductions, config)` iterating registered statistics per
+      projection, isolating per-statistic failures (catch → omit row). No compute-module imports at
+      module load.
+- [ ] 1.3 Tests: registry lookup; `to_arrow()` 8-column schema; empty-report round-trip; partial
+      report when one statistic raises.
+
+## 2. Engine — cluster_validity (elbow + silhouette/DB/CH)
+
+- [ ] 2.1 `stats/cluster/kmeans_elbow.py`: KMeans sweep `K ∈ [2, min(round(sqrt(n)), 50)]`
+      (`random_state=ctx.rng_seed`, `n_init>=10`); record inertia; select `K` via the **argmax index**
+      of perpendicular deviation from the inertia curve's first→last chord (implement directly;
+      algorithm from `ProtSpaceExtractor._knee_distance_to_chord` but reuse the index, **not** its
+      y-value return; do **not** port `_knee_median_jump`). Return labels at `K` + diagnostics
+      (inertia curve, `knee_confidence`, silhouette-optimal K) in `extra`. Emit `n_clusters` with
+      `metric_kind="meta"`. Cluster rows carry `label_kind="kmeans_elbow"`.
+- [ ] 2.2 `stats/metrics/validity.py`: silhouette (fixed-seed sample above threshold; record
+      `sampled`/`sample_size`), davies_bouldin, calinski_harabasz on the clusterer's labels;
+      `metric_kind="validity"`. Guard `2 ≤ K ≤ n−1`; drop singleton clusters for DB/CH.
+- [ ] 2.3 Register `cluster_validity` in `STATISTICS`.
+- [ ] 2.4 Known-answer tests: `k`-blob → elbow `K ∈ {k−1,k,k+1}`; silhouette `>0.6` (separated) vs
+      `<0.2` (overlapping); n<3 / single cluster → no row; near-linear inertia → `knee_confidence:"low"`.
+
+## 3. Engine — faithfulness (kNN-overlap, trustworthiness/continuity)
+
+- [ ] 3.1 `stats/metrics/faithfulness.py`: kNN-overlap@k (default `k=15`); **trustworthiness =
+      `trustworthiness(embedding, coords, metric=ctx.high_dim_metric)`** and **continuity =
+      `trustworthiness(coords, embedding, metric=<projection euclidean>)`** (no `continuity` in
+      sklearn; args swapped, metric attaches to the embedding-first call). `metric_kind="faithfulness"`,
+      `label_kind="none"`, `requires_embedding=True`. Record the metric **per row** in `extra`.
+- [ ] 3.2 **Cost guard:** above `sample_threshold` (default 5000) subsample a **shared** id subset
+      derived from `(rng_seed, sorted ids)` for all projections of an embedding (record
+      `sampled`/`sample_size`); beyond a hard ceiling emit a `skipped:"n_too_large"` row. (Trustworthiness
+      densifies an `n×n` matrix — no ANN path.)
+- [ ] 3.3 Register in `STATISTICS`.
+- [ ] 3.4 Known-answer tests: near-identity projection → faithfulness ≈ 1.0; random projection →
+      markedly lower; `k`/metric recorded; large-n path samples (and the very-large path skips).
+
+## 4. Engine — driver: mapping, alignment, determinism
+
+- [ ] 4.1 `stats/driver.py`: per projection, build a `StatContext` — select **its source embedding**
+      by `emb_set.name`; set `high_dim_metric` from the reduction's `info_json` (reducer metric;
+      cosine fallback); **id-intersection join** embedding↔coords (not positional zip; `load_h5` drops
+      NaN rows and `cli/project.py` writes coords against `embedding_sets[0].headers`); record id-set
+      mismatches; skip `requires_embedding` statistics when no matching embedding.
+- [ ] 4.2 Tests: label/id-permutation yields identical scores; an H5 with an extra and a dropped id
+      vs `projections_data` is handled by intersection (not mis-paired); multi-embedding run maps each
+      projection to the correct embedding.
+
+## 5. Engine — bundle format (fifth part, ALL readers/writers, no arity break)
+
+- [ ] 5.1 `data/io/bundle.py`: `STATISTICS_FILENAME`; `write_bundle(tables, path, settings=None,
+statistics=None)` emitting `core(3) + settings? + statistics?` with a **zero-byte** settings slot
+      when statistics is given without settings.
+- [ ] 5.2 **Keep `read_bundle`'s 2-tuple** (two `add_annotation_style.py` callers unpack it); add
+      `read_statistics_from_bundle(path) -> bytes | None`; `extract_bundle_to_dir` writes
+      `STATISTICS_FILENAME` when present; accept 3–5 parts (branch on part-4 emptiness). Update the
+      "Expected 3 or 4 parts" message.
+- [ ] 5.3 **Rewrite `replace_settings_in_bundle`** by part-count case (`len==3/4/5`) to preserve a
+      trailing statistics part (zero-byte settings slot when inserting settings ahead of stats); verify
+      `utils/add_annotation_style.py` / `protspace style`.
+- [ ] 5.4 Round-trip tests: 3-, 4-, 5- (settings+stats), 5-empty-settings (stats-only) read back
+      correctly; **`protspace style` preserves stats on BOTH settings+stats and stats-only inputs**;
+      legacy bundles unchanged.
+
+## 6. Engine — pipeline & CLI wiring
+
+- [ ] 6.1 `data/processors/base_processor.py`: `create_output` accepts an optional statistics table;
+      `save_output` passes it to `write_bundle`.
+- [ ] 6.2 `data/processors/pipeline.py`: `stats` flag on `PipelineConfig`; in `ReductionPipeline.run`,
+      call `compute_statistics(embedding_sets, all_reductions, ...)` before `create_output` when
+      enabled; thread the table through.
+- [ ] 6.3 `cli/prepare.py`: add `--stats/--no-stats` (default on, same on/off convention as the
+      existing `--scores`); log it.
+- [ ] 6.4 `cli/stats.py`: `protspace stats -i emb.h5 [-i ...] -p project_dir -o statistics.parquet`
+      (no `-a`). Build a **projections-parquet loader** (`projections_data.parquet` →
+      `{projection_name: (coords ndarray, ids)}`) and id-join each projection to its source embedding.
+      **Add `stats` to `cli/app.py:_register_commands()`** (hardcoded list).
+- [ ] 6.5 `cli/bundle.py`: optional `-s/--statistics`; pass to `write_bundle`. (Hatchling src-layout
+      auto-includes `src/protspace/stats/`; no `[tool.hatch]` change needed.)
+
+## 7. Engine — verification (PR A)
+
+- [ ] 7.1 `uv run pytest tests/ -m "not slow"` green; `uv run ruff check src/ tests/` clean; confirm
+      CLI startup not regressed (lazy imports).
+- [ ] 7.2 End-to-end on a **real, committed fixture** (`data/sizes/phosphatase.h5` does **not** exist;
+      use an existing test H5 or add a small generated fixture): `prepare ... --stats` → 5-part bundle
+      with cluster_validity + faithfulness rows per projection; `protspace stats` standalone matches;
+      `--no-stats` → 3-part bundle; `protspace style` on the 5-part bundle keeps stats.
+- [ ] 7.3 CHANGELOG/version bump (semantic-release); record the released version for the web floor;
+      open the draft PR.
+
+## 8. This repo — prep service stats step (bundle-first, non-fatal, probed)
+
+- [ ] 8.1 `services/protspace-prep/.../config.py`: add `PREP_STATS` (default on) and
+      `PREP_STATS_TIMEOUT_SECONDS` (default 120); raise the `protspace` floor (pyproject + lock) to the
+      release from task 7.3.
+- [ ] 8.2 `services/protspace-prep/.../pipeline.py`: **produce the core bundle first** (within the
+      parent `pipeline_timeout_seconds`), then run `protspace stats -i <resolved h5_files[0]> -p
+<project_dir> -o statistics.parquet` under a **nested** `asyncio.timeout(PREP_STATS_TIMEOUT_
+SECONDS)`, catching failure/timeout **locally** so it never reaches the outer handler; on success
+      re-run `bundle` with `-s`. A one-time `protspace stats --help` probe → skip if absent. Emit an
+      SSE `computing_statistics` stage.
+- [ ] 8.3 Tests (`tests/test_pipeline.py`): stats invoked with the resolved H5 + project dir and folded
+      via `-s`; **a stats failure/timeout still yields the core bundle and a successful job** (and does
+      not consume the parent budget such that bundle is lost); missing-subcommand probe skips; SSE event
+      emitted.
+
+## 9. This repo — frontend reader tolerance
+
+- [ ] 9.1 `packages/core/.../data-loader/utils/bundle.ts`: accept **4 delimiters** (5 parts); branch on
+      **part-4 byte length** (empty ⇒ settings absent, part 5 = stats; non-empty ⇒ part 4 = settings,
+      part 5 = stats); guard `extractSettings` to return null on a zero-byte buffer **before**
+      `assertValidParquetMagic`; skip part 5. Keep 2/3-delimiter behaviour identical.
+- [ ] 9.2 **Invert** `bundle.test.ts`'s "should reject bundle with 4 delimiters (5 parts)" test; update
+      the layout comment in `packages/utils/src/parquet/bundle-writer.ts`; add a `FastaPrepStage` union
+      entry `'computing_statistics'` (and any exhaustive consumer); note `createParquetBundle` re-export
+      drops the stats part (documented limitation).
+- [ ] 9.3 Tests: 5-part (settings+stats) AND 5-part (empty-settings, stats-only) load projections +
+      annotations + settings with no error and ignore statistics; 3/4-part bundles unchanged.
+
+## 10. This repo — verification (PR B)
+
+- [ ] 10.1 `pnpm precommit` green; prep-service `uv run pytest -q` green.
+- [ ] 10.2 Load a real engine-produced 5-part bundle (both settings+stats and stats-only) in the app —
+      renders normally, no console errors, statistics ignored.
+- [ ] 10.3 Open the draft PR; reference issue #219 and the upstream engine PR.
diff --git a/openspec/changes/route-projection-statistics/.openspec.yaml b/openspec/changes/route-projection-statistics/.openspec.yaml
new file mode 100644
index 00000000..fab62b41
--- /dev/null
+++ b/openspec/changes/route-projection-statistics/.openspec.yaml
@@ -0,0 +1,2 @@
+schema: spec-driven
+created: 2026-06-24
diff --git a/openspec/changes/route-projection-statistics/design.md b/openspec/changes/route-projection-statistics/design.md
new file mode 100644
index 00000000..b611a6a8
--- /dev/null
+++ b/openspec/changes/route-projection-statistics/design.md
@@ -0,0 +1,194 @@
+## Context
+
+`add-projection-statistics` shipped two statistic families, computed per projection at prep time and
+baked into the `.parquetbundle` as a single fifth part (`statistics.parquet`, a tidy 8-column
+long-format table). The frontend reader was made to _tolerate_ that part but not render it —
+"frontend rendering" was an explicit committed follow-up.
+
+The `.parquetbundle` is concatenated Parquet tables separated by `---PARQUET_DELIMITER---`:
+
+1. `protein_annotations` — `identifier` + arbitrary annotation columns (categorical or numeric);
+   the frontend's **color-by** control enumerates these columns.
+2. `projections_metadata` — `projection_name`, `dimensions`, `info_json` (reducer params incl. the
+   distance `metric`), `source`.
+3. `projections_data` — long `(projection_name, identifier, x, y, z)`.
+4. `settings` (optional) — annotation **styles** / display config (palettes, pinned values).
+5. `statistics` (optional, from `add-projection-statistics`) — the 8-column stats table.
+
+In his PR #61 review, tsenoner asked us to route each statistic to the part whose **existing
+frontend consumer** matches its **granularity**:
+
+| Statistic                                                           | Granularity    | Destination part                     | Existing consumer it rides            |
+| ------------------------------------------------------------------- | -------------- | ------------------------------------ | ------------------------------------- |
+| KMeans cluster label @ elbow `K`                                    | per protein    | `protein_annotations` (1)            | color-by (categorical)                |
+| per-sample silhouette @ `K`                                         | per protein    | `protein_annotations` (1)            | color-by (continuous)                 |
+| kNN-overlap / trustworthiness / continuity                          | per projection | `projections_metadata.info_json` (2) | projection method/info surface        |
+| silhouette mean / Davies–Bouldin / Calinski–Harabasz / `n_clusters` | per projection | `statistics.parquet` (5)             | (dedicated quality table — new, thin) |
+
+The payoff: the two per-protein columns and the per-method scalars render through machinery that
+already exists, shrinking "frontend rendering" to (at most) a small aggregate-validity table.
+
+## Goals / Non-Goals
+
+**Goals**
+
+- Route each already-computed statistic to its consumer-aligned bundle part (the table above).
+- Surface per-protein outputs through the existing color-by control (membership pre-styled), and
+  faithfulness through the existing projection-info surface, with minimal new frontend code.
+- Preserve every guarantee from `add-projection-statistics`: statistics are **secondary** (the core
+  bundle ships even if they fail), deterministic under a seed, cost-bounded, and backward compatible.
+
+**Non-Goals**
+
+- Changing **which** statistics are computed, the elbow algorithm, or the cost guards — unchanged.
+- A rich statistics dashboard. The aggregate-validity table is minimal and may be a thin follow-up.
+- External/label-based validation (ARI/NMI), embedding-space validity, interactive recompute — still
+  future work, unchanged from the prior change's non-goals.
+
+## Decisions
+
+### D1 — A statistic output declares a destination
+
+Today a statistic returns `StatRow`s that all land in one table. Introduce a **destination** on each
+output: `annotation` (per-protein, carries `identifier`+value), `projection_metadata` (per-method
+scalar), or `statistics_part` (the long table). The carriage layer partitions outputs by destination
+and fans them to the matching bundle part. This keeps the registry/driver shape and lets future
+statistics pick a destination without a new mechanism.
+
+### D2 — Per-protein outputs (cluster membership + per-sample silhouette) → annotations
+
+`cluster_validity` already computes KMeans `labels` at the elbow `K` and discards them; surface them
+as a per-protein **categorical** annotation, plus **per-sample silhouette** (`silhouette_samples`,
+new but cheap, scikit-learn) as a per-protein **numeric** annotation. Both are **per projection** —
+one column pair per reduction, since each reduction has its own elbow `K`.
+
+- **Naming (default):** `cluster_<projection_name>` and `silhouette_<projection_name>` (e.g.
+  `cluster_umap2`). The elbow `K` is recorded in annotation metadata / display label, not baked into
+  the machine column name (keeps names stable if `K` shifts on re-run). **Open for tsenoner** — he
+  wrote "Kx with x variable"; if he wants `K` in the visible name, it goes in the display label.
+- **Auto-style (default yes):** generate a categorical palette for each membership column into the
+  `settings` part so clusters are colored on load; the numeric silhouette column uses the default
+  continuous ramp. This is the concrete "frontend for free" win.
+- **Provenance:** membership/silhouette columns are marked computed (not retrieved biological
+  annotation) so the UI/consumers can distinguish them; the source projection + `K` + seed live in
+  the column metadata.
+
+### D3 — Faithfulness → `projections_metadata.info_json.quality`
+
+kNN-overlap, trustworthiness, continuity are single scalars per projection. Fold them into the
+projection's existing `info_json` under a `quality` object (e.g.
+`info_json.quality = {knn_overlap, trustworthiness, continuity, k, metric, sampled, ...}`). Additive
+(no new metadata column); existing `info_json` consumers ignore unknown keys. This is also the
+**natural compute site**: faithfulness needs the embedding _and_ the projection together, which the
+`prepare`/`project` stage has in hand — so it is written into metadata there, and the standalone
+`stats` command recomputes + merges into `projections_metadata.parquet` for existing projects.
+
+### D4 — Aggregate cluster-validity stays in `statistics.parquet`
+
+silhouette mean, Davies–Bouldin, Calinski–Harabasz, and the `n_clusters` meta row remain the
+8-column tidy table — now the **only** thing in the fifth part. Its schema and the "new scalar = new
+row" property are unchanged; it simply no longer carries per-protein or faithfulness data.
+
+### D5 — Robustness & decoupling preserved via core-first re-enrichment
+
+`add-projection-statistics` guarantees the prep job never fails for statistics and never loses the
+bundle. Under routing, per-protein outputs live in `protein_annotations` (a **core** part), so
+enrichment now rewrites parts 1/2/4/5, not just appends part 5. Preserve the guarantee by keeping the
+**existing shape**: build the core (un-enriched) bundle first within the parent timeout; then a
+best-effort step computes statistics and **re-bundles to a temp file with the routed parts merged
+in**, atomically `os.replace`-ing it over the core bundle on success. On any stats failure/timeout
+the un-enriched core bundle stands. No regression in the safety property; the diff is _which_ parts
+the enrichment rewrites.
+
+### D6 — Backward compatibility
+
+- Annotations: extra computed columns; readers already accept arbitrary annotation columns.
+- `info_json`: an extra `quality` key; readers parse JSON and ignore unknown keys.
+- `statistics.parquet`: same 8-column schema, fewer row families (aggregate only). Readers that
+  tolerate the fifth part are unaffected; consumers that previously expected faithfulness/labels in
+  the part must now read them from metadata/annotations — but the only such consumer is our own
+  not-yet-shipped frontend, so there is no external break. Bundles produced before this change still
+  read; bundles without statistics are unchanged.
+
+## Risks / trade-offs
+
+- **Annotation-table growth.** One membership + one silhouette column **per projection** (6 DR
+  methods → 12 computed columns). Mitigate with the `cluster_*`/`silhouette_*` prefix + computed-
+  provenance flag so the UI can group/collapse them; consider a default of styling only the membership
+  columns. Reviewers should sanity-check color-by UX with ~12 extra columns.
+- **Loss of single-format generality.** The prior design's "all statistics are rows in one tidy
+  table" is deliberately broken up. Accepted: consumer-fit beats storage uniformity here, and it is
+  the maintainer's call.
+- **Tighter coupling of stats into the core bundle.** Mitigated by D5 (core-first re-enrichment);
+  must be covered by a test asserting a stats failure still yields the core bundle.
+- **Frontend assumptions unverified.** This design assumes (a) color-by enumerates all annotation
+  columns including computed ones, (b) there is a projection info/method surface that can show
+  `info_json.quality`, and (c) styles in the `settings` part drive initial coloring. These are
+  **assumptions to verify against the actual frontend** (a review lens below targets exactly this);
+  if (b) does not exist, faithfulness display becomes a small new surface (still cheaper than a full
+  stats panel).
+
+## Migration / sequencing
+
+Engine PR first (routing + per-point outputs + metadata/annotation writers), then the web PR (prep
+re-enrichment + frontend surfacing). Both layer on top of the still-open `add-projection-statistics`
+PRs (#61 / #295); cleanest is to **fold this routing into those PRs before merge** rather than ship
+the opaque fifth part and immediately refactor it — to be confirmed with tsenoner (amend #61/#295 vs
+stacked follow-up PRs).
+
+## Open questions for tsenoner (defaulted, non-blocking)
+
+1. Column naming + whether `K` shows in the visible name; elbow-`K`-only vs a small `K` sweep.
+2. Amend #61/#295 in place vs land them and stack this refactor on top.
+3. Should the aggregate-validity fifth part get a minimal UI now, or stay carried-but-unrendered
+   until a later dashboard change.
+
+## Review outcomes (4-lens fan-out) — revised decisions
+
+A parallel review (frontend-feasibility, engine-feasibility, spec-quality, adversarial) grounded in
+the real code corrected several first-pass assumptions. Revised decisions:
+
+- **D-route (NEW, supersedes part of D3/D5): fan out at bundle-assembly time, not in `protspace
+stats`.** `cli/stats.py` reads only `projections_data`/`projections_metadata` and writes one
+  `statistics.parquet` ("No annotations are needed"); it has no annotations handle and the prep
+  service calls it directly. So `stats` stays a **pure aggregate-only producer**; all routing happens
+  where annotations + metadata + coords coexist — the `prepare`/pipeline path (`pipeline.run`'s
+  `metadata` frame is the join target) and the prep re-bundle. This removes the showstopper of
+  "rewrite the `stats` command into a project rewriter."
+
+- **D-phase (NEW): phase per-protein behind faithfulness+aggregate, and stack on #61/#295.** See
+  proposal "Sequencing". Faithfulness→metadata + aggregate-only fifth part (Phase 1) carry almost no
+  core-bundle risk; the per-protein annotations (Phase 2) are where the typing/UX/coupling cost lives.
+
+- **D2 corrected — typing is not automatic.** `base_processor._create_protein_annotations_table`
+  does `df.fillna("").astype(str)` on the whole frame, and the frontend infers type from **content**
+  (`conversion.ts:inferAnnotationType`). So integer cluster labels would infer as _numeric_ (wrong);
+  membership MUST be written as non-numeric strings (`cluster 0`). Per-point silhouette must survive
+  as clean numeric strings (no `;`/`|`, empty for missing) to infer continuous. `silhouette_samples`
+  is **not** "cheap" — unlike the aggregate mean it has no sampling path; it needs its own hard-ceiling
+  skip guard.
+
+- **D-style corrected — `settings` is a full envelope, not a palette; "colored on load" is false.**
+  The settings part stores `LegendPersistedSettings` per annotation (`maxVisibleValues`, `shapeSize`,
+  `sortMode`, `hiddenValues`, `enableDuplicateStackUI`, `selectedPaletteId`, `categories:{value:
+{color,shape,zOrder}}`), and `sanitizeLegendSettingsEntry` **drops** any incomplete entry. The
+  engine must emit the whole envelope, categories keyed by the exact label strings. Initial color-by
+  is `annotations[0]` and file settings apply only to the _selected_ annotation, so the guarantee is
+  "colored **when selected**"; making a membership column the initial selection is separate frontend
+  work (initial-view / `publishState`).
+
+- **D3 corrected — faithfulness display is small new code, not free.** `projection-metadata.ts`
+  flattens `info_json` one level; a nested `quality` object renders as a raw `JSON.stringify` blob,
+  so the component must be extended to expand `quality` into per-metric rows.
+
+- **Carriage hazard — filename mismatch.** `ArrowReader._load_data` reads
+  `selected_annotations.parquet` but `ArrowReader.save_data` writes `protein_annotations.parquet`;
+  a router that reuses `save_data` would silently lose annotations. Build the augmented annotations
+  table directly and re-bundle via `write_bundle` (which already accepts `statistics=`);
+  `replace_settings_in_bundle` preserves core parts byte-for-byte and **cannot add columns**.
+
+- **Determinism — elbow `K` must be stable**, else membership labels re-bucket between runs (added to
+  the reproducibility scenario).
+
+- **`metric_kind` enum — `faithfulness` value is retired from the fifth part** (now only `validity`
+  / `meta` rows there); the column may still admit it but it is not produced.
diff --git a/openspec/changes/route-projection-statistics/proposal.md b/openspec/changes/route-projection-statistics/proposal.md
new file mode 100644
index 00000000..1e5e4e2d
--- /dev/null
+++ b/openspec/changes/route-projection-statistics/proposal.md
@@ -0,0 +1,134 @@
+## Why
+
+The `add-projection-statistics` change (issue #219, engine PR tsenoner/protspace#61 + web #295)
+computes the right numbers but delivers **all** of them into a single opaque fifth part
+(`statistics.parquet`) that the frontend parses-but-ignores — so nothing is visible to a user, and
+"render the statistics" was left as an open follow-up.
+
+In his review of PR #61, the maintainer (tsenoner) asked us to **place each statistic where the
+frontend already knows how to consume that shape of data**, rather than in one catch-all part:
+
+> - Cluster Elbow Kx → save as an annotation
+> - Cluster Silhouette Kx → save as annotation (x variable)
+> - kNN-overlap + trustworthiness + continuity → metadata per dimensionality-reduction method
+> - silhouette, Davies–Bouldin, Calinski–Harabasz → separate parquet file (as currently)
+
+The insight: the statistics differ in **granularity and consumer**. Per-protein quantities (which
+cluster a point lands in; how well it fits that cluster) are _annotations_ — the color-by machinery
+already renders them. Per-method scalars (how faithful a projection is) are _reducer metadata_ —
+they belong with the projection's other info. Only the per-method **aggregate** validity scalars
+need a dedicated part. Routing this way makes most of the statistics render through existing UI
+surfaces with little or no new frontend code — turning the deferred "frontend rendering" follow-up
+into mostly-free reuse.
+
+This change does **not** alter which statistics are computed (that shipped in
+`add-projection-statistics`); it changes **where they are carried in the bundle and how they reach
+the screen**.
+
+## What Changes
+
+The work spans the engine (`protspace`) and this repo (`protspace_web`), engine first.
+
+- **Engine — `cluster_validity` gains per-point outputs.** The KMeans labels at the elbow `K` are
+  already computed and currently discarded; surface them as a **per-protein cluster-membership**
+  output. Add **per-sample silhouette** (`sklearn.metrics.silhouette_samples`) at that `K` as a
+  second per-protein output. Both are produced **per projection** (each reduction has its own elbow
+  `K`).
+- **Engine — routing at carriage time.** A statistic now declares a **destination** for each output:
+  `annotation` (per-protein, joined on identifier), `projection_metadata` (per-method scalar), or
+  `statistics_part` (the long-format table). The bundle writer fans each output to the matching part:
+  - cluster membership + per-point silhouette → **`protein_annotations`** columns (one pair per
+    projection), with an auto-generated categorical color **style** for the membership column so it
+    is colorable on load;
+  - faithfulness (kNN-overlap, trustworthiness, continuity) → **`projections_metadata`**, folded into
+    each projection's `info_json` under a `quality` key;
+  - aggregate cluster-validity (silhouette mean, Davies–Bouldin, Calinski–Harabasz, `n_clusters`) →
+    **`statistics.parquet`** (the existing fifth part, now carrying only these).
+- **Fan-out happens at bundle-assembly time, not in `protspace stats`.** The standalone `stats`
+  command today reads only projections and writes a single `statistics.parquet` (its docstring: "No
+  annotations are needed") — it has **no annotations handle**, so it stays a pure aggregate-only
+  producer. Routing is done where annotations + metadata + coordinates are already in hand: the
+  `prepare`/pipeline path writes the routed outputs inline, and the prep service's re-bundle step
+  merges them. (Review finding — see `design.md` D-route.)
+- **Prep service (`services/protspace-prep`):** the same **core-bundle-first** shape, but the
+  best-effort enrichment step now rewrites parts **1/2/4/5** (annotations, metadata, settings,
+  statistics), not just appends part 5 — so it re-bundles the full set to a temp file and atomically
+  swaps it in on success. This is a **larger correctness surface** than the prior append-only step
+  (it touches user-visible color-by data), so it is not "free": it needs an explicit test that a
+  successful enrichment equals the core bundle plus exactly the computed columns (no row drops /
+  reordering / retyping). A stats failure still leaves the already-shipped, un-enriched core bundle
+  in place.
+- **Frontend (`@protspace/core` + `@protspace/utils`):** the routed statistics surface through
+  existing consumers — cluster-membership and per-point-silhouette appear in the **color-by**
+  control automatically (membership pre-styled); faithfulness is shown in the projection's
+  **method/info** surface. The aggregate `statistics.parquet` part may render as a small per-
+  projection quality table (the only genuinely new UI; may itself be a thin follow-up).
+
+## Capabilities
+
+### Modified Capabilities
+
+- `projection-statistics`: change the **carriage and delivery** of the already-computed statistics
+  from "one opaque fifth part, parsed-but-unused" to "routed to consumer-aligned bundle parts and
+  surfaced through existing frontend controls". The fifth-part contract narrows to aggregate
+  validity only; per-protein and per-method outputs move to the annotations and projection-metadata
+  parts; the prep robustness/decoupling guarantees are preserved under the new re-enrichment shape.
+
+## Impact
+
+- **Upstream (`protspace` repo, separate PR):**
+  - `stats/base.py`: each `StatRow`/output declares a **destination** (`annotation` |
+    `projection_metadata` | `statistics_part`); `StatsReport` can partition outputs by destination.
+  - `stats/metrics/validity.py`: emit per-protein **cluster membership** (the existing labels) and
+    **per-sample silhouette** (`silhouette_samples`) as `annotation`-destined outputs keyed by
+    identifier; keep the aggregate silhouette/DB/CH + `n_clusters` as `statistics_part` outputs.
+  - `stats/metrics/faithfulness.py`: mark its scalars `projection_metadata`-destined.
+  - `data/io/bundle.py` + `data/processors/base_processor.py`: a **router** that merges
+    annotation-destined outputs into `protein_annotations`, faithfulness into
+    `projections_metadata.info_json.quality`, and aggregate rows into `statistics.parquet`; generate
+    a categorical style for each cluster-membership column (`utils/add_annotation_style.py`).
+  - `cli/stats.py` / `cli/prepare.py`: write the routed outputs (annotations + metadata + statistics)
+    instead of a single statistics parquet.
+  - Tests: per-protein outputs join correctly on identifiers; faithfulness lands in `info_json`;
+    aggregate-only statistics part; styling round-trip.
+- **This repo (`protspace_web`):**
+  - `services/protspace-prep/src/protspace_prep/pipeline.py`: re-enrichment step rewrites parts
+    1/2/4/5 atomically, still core-bundle-first and non-fatal; SSE stage retained.
+  - `packages/core` data-loader + `packages/utils`: read faithfulness from `info_json.quality` and
+    surface it; ensure computed cluster/silhouette annotation columns flow into the color-by control;
+    optional aggregate-validity table.
+- **Data-format change:** additive and backward compatible. Bundles without statistics are
+  unchanged; the annotations and `info_json` additions are extra columns/keys existing readers
+  tolerate; the statistics part keeps its eight-column schema (now aggregate rows only).
+- **API / dependencies:** no HTTP API change; no new dependency (`silhouette_samples` is in the
+  already-present scikit-learn).
+
+## Sequencing & phasing (revised after fan-out review)
+
+The four-lens review converged on **stack, don't amend**: the open engine/web PRs (#61/#295) ship a
+self-contained, green, backward-compatible opaque fifth part with **zero user-visible surface**.
+Folding this correctness-heavy routing refactor into them holds the green infra hostage and makes any
+routing bug block the whole feature. Because the fifth part is unconsumed, landing it and then
+**removing** the per-protein/faithfulness rows in this follow-up is non-breaking.
+
+- **Phase 0 — land #61/#295 as-is** (opaque fifth part; reversible; no migration cost).
+- **Phase 1 — low-risk routing:** faithfulness → `info_json.quality`; narrow the fifth part to
+  aggregate validity only. No core-part (annotations) rewrite, so the prep robustness story is barely
+  touched. Satisfies three of tsenoner's four bullets.
+- **Phase 2 — per-protein annotations** (cluster membership + per-point silhouette + auto-styling +
+  the color-by/provenance UX): the part that pulls statistics into the **core** annotations table,
+  forces categorical/numeric typing through `base_processor`'s `.astype(str)` writer, generates full
+  legend-settings envelopes, and floods the color-by dropdown (one membership + one silhouette column
+  per projection; parameter sweeps / multi-embedding push this to 30–60 columns). Gate it behind a
+  flag and land it once the typing, provenance-grouping, and initial-selection questions are answered.
+
+## Open questions (defaulted now; confirm with tsenoner, do not block)
+
+1. **Annotation column naming / which K.** Default: one membership column `cluster_<projection>` and
+   one silhouette column `silhouette_<projection>` per projection, at the **elbow K only** (not a
+   sweep); the chosen `K` is recorded in annotation metadata and the display label. ("x variable" is
+   read as "the elbow K, which differs per projection".)
+2. **Auto-styling.** Default: **yes** — generate a categorical palette for each membership column so
+   clusters are colored on load.
+3. **Faithfulness placement.** Default: inside `projections_metadata.info_json` under a `quality`
+   sub-object (additive), not a new top-level metadata column.
diff --git a/openspec/changes/route-projection-statistics/specs/projection-statistics/spec.md b/openspec/changes/route-projection-statistics/specs/projection-statistics/spec.md
new file mode 100644
index 00000000..79b34578
--- /dev/null
+++ b/openspec/changes/route-projection-statistics/specs/projection-statistics/spec.md
@@ -0,0 +1,264 @@
+## ADDED Requirements
+
+### Requirement: Statistics are routed to consumer-aligned bundle parts
+
+Each computed statistic SHALL be carried in the bundle part that matches its **granularity and
+consumer**, not in a single catch-all part. Specifically: **per-protein** outputs (cluster
+membership, per-point silhouette) SHALL be carried as columns in `protein_annotations`; **per-method
+faithfulness** scalars SHALL be carried in `projections_metadata` (within each projection's
+`info_json`); and **per-method aggregate cluster-validity** scalars SHALL be carried in the
+`statistics.parquet` part. The routing SHALL be additive and backward compatible: bundles without
+statistics, and pre-routing bundles, SHALL still read; the annotation columns and `info_json`
+additions SHALL be ignorable by existing readers.
+
+#### Scenario: A statistics-bearing bundle routes outputs to three parts
+
+- **WHEN** a bundle is prepared from embeddings with statistics enabled
+- **THEN** for each projection the cluster membership and per-point silhouette appear as
+  `protein_annotations` columns, the faithfulness scalars appear in that projection's
+  `projections_metadata.info_json`, and the aggregate cluster-validity scalars appear in
+  `statistics.parquet`
+
+#### Scenario: A statistic declares its destination
+
+- **WHEN** a statistic produces an output
+- **THEN** the output carries a destination of `annotation`, `projection_metadata`, or
+  `statistics_part`, and the carriage layer fans it to the matching bundle part without the statistic
+  knowing the bundle layout
+
+#### Scenario: Existing readers tolerate the additions
+
+- **WHEN** a reader that predates this change loads a routed bundle
+- **THEN** the extra annotation columns and the `info_json.quality` key are ignored without error,
+  and projections/annotations/settings load normally
+
+#### Scenario: Statistics disabled produces an un-routed bundle
+
+- **WHEN** preparation is run with statistics disabled (`--no-stats` / prep flag off)
+- **THEN** no `cluster_*`/`silhouette_*` annotation columns are added, no `info_json.quality` key is
+  written, no auto-style is generated, and no `statistics.parquet` part is emitted — the bundle is
+  the same shape as a pre-routing core bundle
+
+### Requirement: Cluster membership is delivered as a per-projection annotation
+
+For each projection, the KMeans labelling at the elbow-estimated `K` SHALL be delivered as a
+**categorical per-protein annotation column**, joined to proteins by `identifier`. Membership values
+SHALL be written as **non-numeric category labels** (e.g. `cluster 0`, not `0`) so the frontend's
+content-based type inference (`conversion.ts:inferAnnotationType`) classifies the column as
+categorical rather than as a continuous numeric ramp; the all-string annotation serialization in
+`base_processor._create_protein_annotations_table` (which `.astype(str)`s every column) MUST NOT
+cause membership to be mis-typed. There SHALL be one membership column per projection (each projection
+has its own elbow `K`). The column SHALL be marked as a **computed** annotation (distinct from
+retrieved biological annotations), and its source projection, elbow `K`, and seed SHALL be recorded
+in the column's metadata. A protein absent from a projection SHALL have no membership value for that
+projection rather than a wrong one.
+
+#### Scenario: Membership appears in the color-by control
+
+- **WHEN** the app loads a routed bundle
+- **THEN** each projection's cluster-membership column is selectable in the color-by control and
+  colors points by cluster
+
+#### Scenario: Membership is per projection
+
+- **WHEN** a run produces more than one projection
+- **THEN** each projection contributes its own membership column at its own elbow `K`, and selecting
+  one does not mislabel points of another
+
+### Requirement: Per-point silhouette is delivered as a per-projection annotation
+
+For each projection, the per-sample silhouette values at the elbow `K` (`silhouette_samples`,
+computed over the **full** labelled point set — not the cost-bounded subsample the aggregate mean
+silhouette uses) SHALL be delivered as a **numeric per-protein annotation column**, joined by
+`identifier`, one column per projection, marked computed with source projection / `K` / seed in its
+metadata. The column MUST round-trip as clean numeric strings (no `;`/`|` separators, empty for
+missing) so the frontend infers it as a continuous field. Because `silhouette_samples` has no
+sampling escape hatch and is O(n²)-class, its own cost guard (skip beyond the hard ceiling, recording
+a marker) SHALL apply. Points excluded from silhouette scoring (degenerate `K`, beyond the ceiling,
+or absent from the projection) SHALL carry no value rather than a fabricated one.
+
+#### Scenario: Per-point silhouette is colorable as a continuous field
+
+- **WHEN** the app loads a routed bundle
+- **THEN** the per-point silhouette column is selectable in color-by and renders as a continuous
+  ramp, letting a user see which points fit their cluster well
+
+#### Scenario: Aggregate and per-point silhouette are distinct
+
+- **WHEN** both the per-point silhouette annotation and the aggregate silhouette statistic exist
+- **THEN** the annotation carries one value per protein and the aggregate carries one value per
+  projection in `statistics.parquet`; neither is derived by the consumer from the other
+
+### Requirement: Faithfulness is delivered as per-projection reducer metadata
+
+The faithfulness scalars (kNN-overlap, trustworthiness, continuity) SHALL be carried **per
+projection** inside `projections_metadata.info_json`, under a `quality` object, alongside the
+reducer's other info. Each value SHALL record its neighbourhood size `k`, the high-dimensional
+distance metric used, and any sampling/skip provenance. `info_json` SHALL remain valid JSON whose
+unknown keys are ignorable by existing consumers.
+
+#### Scenario: Faithfulness rides with the projection's other metadata
+
+- **WHEN** a consumer reads a projection's metadata
+- **THEN** `info_json.quality` exposes that projection's kNN-overlap, trustworthiness, and continuity
+  with their `k`, metric, and sampling provenance
+
+#### Scenario: Faithfulness is computed where the embedding is in hand
+
+- **WHEN** statistics are produced during `prepare`/`project` (embedding and projection both present)
+- **THEN** faithfulness is computed there and written into `projections_metadata`; the standalone
+  `stats` path recomputes and merges it into an existing project's `projections_metadata`
+
+#### Scenario: A projection without an available embedding omits faithfulness
+
+- **WHEN** a projection's source embedding is unavailable
+- **THEN** its `info_json.quality` omits faithfulness (no key) rather than recording a wrong value
+
+#### Scenario: Multi-embedding runs route to the correct projection's metadata
+
+- **WHEN** a run produces projections from more than one source embedding
+- **THEN** each projection's `info_json.quality` is computed against the embedding that produced it
+  (matched by name + id-intersection join), never a neighbour's
+
+### Requirement: Cluster-membership annotations are auto-styled
+
+When statistics are enabled, the carriage layer SHALL generate a complete legend-settings entry for
+each cluster-membership column and write it into the bundle's `settings` part so the membership is
+colored **when selected**, without a manual styling step. The generated entry SHALL be a full
+`LegendPersistedSettings` envelope as the frontend loader requires — `maxVisibleValues`, `shapeSize`,
+`sortMode`, `hiddenValues`, `enableDuplicateStackUI`, `selectedPaletteId`, and `categories` keyed by
+the exact membership label strings, each category carrying `color`, `shape`, and `zOrder` — because
+the loader's `sanitizeLegendSettingsEntry` rejects (drops) any entry missing these fields. Because the
+frontend's initial color-by selection is the first annotation column (not necessarily a membership
+column) and file settings apply only to the _selected_ annotation, "colored when selected" is the
+guarantee; making a membership column the **initial** selection is a separate frontend concern (see
+the frontend requirement). Generating styles SHALL NOT remove or overwrite existing user/annotation
+styles, and SHALL be skipped cleanly when statistics are disabled.
+
+#### Scenario: A selected membership column is colored without manual styling
+
+- **WHEN** the app loads a routed bundle with statistics enabled and the user selects a
+  cluster-membership column
+- **THEN** that column already has a complete legend-settings entry (categories colored per label),
+  so clusters are colored with no manual styling step
+
+#### Scenario: Incomplete style envelopes are not emitted
+
+- **WHEN** the carriage layer writes a generated membership style
+- **THEN** it writes the full `LegendPersistedSettings` envelope (not a bare color map), so the
+  loader's sanitizer does not discard the entry
+
+#### Scenario: Auto-styling preserves existing styles
+
+- **WHEN** the bundle already carries annotation styles in its settings part
+- **THEN** the generated cluster styles are added without dropping or altering the pre-existing styles
+
+### Requirement: Settings rewrites preserve routed statistics and generated styles
+
+The `protspace style` / settings-replacement path (`replace_settings_in_bundle`) SHALL preserve a
+routed bundle's statistics carriage across a settings rewrite: the `statistics.parquet` part SHALL be
+retained, and the generated `cluster_*` membership styles SHALL NOT be dropped, when a user re-runs
+styling on a routed bundle. (This replaces the prior change's "Styling preserves statistics for both
+shapes" guarantee, extended to cover the auto-generated cluster styles.)
+
+#### Scenario: Styling a routed bundle keeps statistics and cluster styles
+
+- **WHEN** a settings-replacement / `protspace style` operation rewrites a routed,
+  statistics-bearing bundle
+- **THEN** the statistics part is preserved and the auto-generated cluster-membership styles survive
+  the rewrite (merged with any user styling), rather than being clobbered
+
+## MODIFIED Requirements
+
+### Requirement: Statistics are a stable tidy long-format table with joinable keys
+
+The `statistics.parquet` part SHALL be a tidy long-format table with the fixed eight-column schema
+(`space_kind`, `space_name`, `stat_family`, `label_kind`, `metric`, `metric_kind`, `value`,
+`extra_json`), one statistic value per row, and SHALL now carry **only per-method aggregate
+cluster-validity** — the silhouette mean, Davies–Bouldin, Calinski–Harabasz (`metric_kind`
+`validity`) and the `n_clusters` row (`metric_kind` `meta`). Per-protein outputs and faithfulness
+SHALL NOT appear in this part (they are routed to annotations and projection metadata respectively).
+`space_name` SHALL still equal the corresponding `projections_metadata.projection_name` so the table
+is joinable, and adding a new **aggregate scalar** statistic SHALL add rows, not columns.
+
+#### Scenario: The statistics part holds aggregate validity only
+
+- **WHEN** a consumer reads `statistics.parquet`
+- **THEN** it finds per-projection silhouette/davies_bouldin/calinski_harabasz validity rows and the
+  `n_clusters` meta row, and finds no faithfulness rows and no per-protein rows
+
+#### Scenario: Aggregate rows remain joinable to projections
+
+- **WHEN** a consumer joins the statistics part to projection metadata
+- **THEN** each row's `space_name` matches a `projection_name`, and `metric_kind` `meta` separates
+  `n_clusters` from the `validity` scores
+
+### Requirement: The frontend reader tolerates the statistics part
+
+The frontend SHALL **surface** the routed statistics through existing consumers rather than only
+tolerating an opaque part: cluster-membership and per-point-silhouette annotation columns SHALL be
+available in the color-by control (membership colored via its generated style; silhouette as a
+continuous ramp). Each projection's `info_json.quality` faithfulness SHALL be displayed in the
+projection's method/info surface (`projection-metadata.ts`); because that component currently flattens
+`info_json` only one level (rendering a nested object as a raw JSON blob), it SHALL be extended to
+render the `quality` sub-object as discrete per-metric rows. The aggregate `statistics.parquet` part
+MAY be surfaced as a small per-projection quality table; if not yet surfaced, its presence or absence
+SHALL NOT affect rendering. A bundle lacking any statistics SHALL render exactly as before.
+
+#### Scenario: Routed statistics render through existing controls
+
+- **WHEN** the app loads a routed bundle
+- **THEN** cluster and per-point-silhouette columns appear in color-by, faithfulness is available
+  per projection for display, and the projection renders with no error
+
+#### Scenario: A statistics-less bundle is unaffected
+
+- **WHEN** the app loads a bundle prepared with statistics disabled
+- **THEN** no statistics-derived columns, metadata quality, or table appear, and rendering is
+  identical to pre-change behaviour
+
+### Requirement: Statistics computation is non-fatal, reproducible, and guarded
+
+Statistics remain **secondary** to the core bundle under the new routing. In the prep service the
+**core (un-enriched) bundle SHALL be produced first**; a best-effort enrichment step — under its own
+bounded timeout caught locally — SHALL then recompute the routed statistics and **rewrite the
+affected parts (annotations, projection metadata, settings, and the statistics part) into a temporary
+bundle swapped in atomically on success**. A stats failure or timeout SHALL leave the already-produced
+un-enriched bundle in place and SHALL NOT fail the job or lose the bundle. Computation SHALL remain
+deterministic under a recorded seed and SHALL keep the existing cost and degeneracy guards (bounded
+sampling above a threshold, skip beyond a hard ceiling, singleton-cluster exclusion for DB/CH, no row
+for inputs too small to score). Provenance (seed, neighbourhood size, distance metric, sampling, knee
+confidence, source embedding, elbow `K`) SHALL be recorded with each output.
+
+#### Scenario: Enrichment failure preserves the core bundle
+
+- **WHEN** the enrichment step fails or times out
+- **THEN** the job still succeeds and the un-enriched core bundle (produced first) is delivered, with
+  no statistics columns, metadata quality, or statistics part
+
+#### Scenario: Enrichment is atomic
+
+- **WHEN** the enrichment step is interrupted mid-write
+- **THEN** the original core bundle is never left partially overwritten — the enriched bundle is
+  swapped in only when fully written
+
+#### Scenario: Results are reproducible
+
+- **WHEN** the same input is prepared twice with the same seed and projection
+- **THEN** the selected elbow `K` per projection is identical, and the routed statistics (membership,
+  per-point silhouette, faithfulness, aggregate validity) are identical, with the seed recorded — a
+  stable `K` being required so membership labels do not silently re-bucket between runs
+
+## REMOVED Requirements
+
+### Requirement: Statistics ride in the bundle as an optional fifth part, backward compatibly
+
+**Reason:** Replaced by "Statistics are routed to consumer-aligned bundle parts". Statistics are no
+longer carried solely as a fifth part; the fifth part now holds aggregate validity only, while
+per-protein and per-method outputs are routed to the annotations and projection-metadata parts. The
+backward-compatibility guarantee is carried by the routing requirement; the `protspace style`
+round-trip guarantee is **explicitly re-stated** in the new "Settings rewrites preserve routed
+statistics and generated styles" requirement (not silently dropped). **Migration:** the only consumer
+that read faithfulness/labels from the fifth part is our own not-yet-shipped frontend, so there is no
+external break; landing #61/#295 (the opaque fifth part) first and removing those rows in this
+follow-up is therefore non-breaking (see proposal "Sequencing").
diff --git a/openspec/changes/route-projection-statistics/tasks.md b/openspec/changes/route-projection-statistics/tasks.md
new file mode 100644
index 00000000..f7074d49
--- /dev/null
+++ b/openspec/changes/route-projection-statistics/tasks.md
@@ -0,0 +1,107 @@
+Phased per the fan-out review (see `proposal.md` "Sequencing"). **Stack on #61/#295, don't amend.**
+Routing fans out at **bundle-assembly time** (`prepare`/pipeline + prep re-bundle); `protspace stats`
+stays a pure aggregate-only producer. TDD; every routed output ships a known-answer fixture, not a
+"column exists" check. Engine PRs land before the matching web PRs.
+
+## Phase 0 — land the prior change (no work here)
+
+- [ ] 0.1 Merge engine #61 + web #295 as-is (opaque fifth part). They are green and reversible; this
+      change removes the per-protein/faithfulness rows from that part non-breakingly. Confirm with
+      tsenoner: stack (recommended) vs amend.
+
+## Phase 1 — low-risk routing (engine then web)
+
+Faithfulness → `info_json.quality`; narrow the fifth part to aggregate validity only. **No core
+(annotations) rewrite** — the prep robustness story is barely touched.
+
+### 1A. Engine — destination on outputs + faithfulness to metadata
+
+- [ ] 1A.1 `stats/base.py`: add `destination: str = "statistics_part"` to `StatRow` (default keeps
+      every existing construction valid; `STATS_SCHEMA`/`to_record` unchanged — `destination` is not a
+      column). Add `StatsReport.partition() -> dict[str, list[StatRow]]`; `to_arrow()` serializes only
+      the `statistics_part` bucket.
+- [ ] 1A.2 `stats/metrics/faithfulness.py`: mark its rows `destination="projection_metadata"`.
+- [ ] 1A.3 Carriage: a router (in `data/processors/base_processor.py` / `pipeline.py`) that, **before**
+      `create_output` builds `projections_metadata`, indexes faithfulness rows by `space_name` and
+      injects a `quality` object into the matching `reduction["info"]` dict (so it serializes into
+      `info_json`). Keep `info_json` valid JSON; unknown keys ignorable.
+- [ ] 1A.4 `cli/stats.py` stays aggregate-only (writes `statistics.parquet` with only
+      `validity`/`meta` rows). Confirm the prep `protspace stats … -o statistics.parquet` +
+      `protspace bundle -s` path still works (faithfulness now absent from the part).
+- [ ] 1A.5 Tests: faithfulness lands in `info_json.quality` with `k`/metric/sampling provenance;
+      multi-embedding routes to the correct projection; statistics part has no faithfulness rows;
+      `--no-stats` writes none of it; determinism incl. stable elbow `K`.
+- [ ] 1A.6 Version bump; record the release for the web floor; open engine PR (or fold into #61 if
+      amending).
+
+### 1B. Web — surface faithfulness + prep
+
+- [ ] 1B.1 `packages/core/.../scatter-plot/projection-metadata.ts`: expand `info_json.quality` into
+      discrete per-metric rows (it currently flattens one level → raw JSON blob). Add a test.
+- [ ] 1B.2 `services/protspace-prep`: bump `protspace` floor; the existing core-first + atomic
+      re-bundle step is unchanged in shape (faithfulness rides in metadata via the engine). Verify the
+      SSE stage + non-fatal behavior still hold.
+- [ ] 1B.3 Tests: a bundle with `info_json.quality` renders faithfulness; a stats-less bundle is
+      unchanged; existing readers tolerate the new key.
+
+## Phase 2 — per-protein annotations (flagged)
+
+Cluster membership + per-point silhouette as annotation columns, auto-styled. This is the
+core-bundle-coupling, typing, and color-by-UX work. Gate behind a flag until the open questions land.
+
+### 2A. Engine — per-point outputs
+
+- [ ] 2A.1 `stats/metrics/validity.py`: emit per-protein **cluster membership** (existing elbow-`K`
+      `labels`, already computed) as `destination="annotation"`, keyed by `ctx.ids`, values as
+      **non-numeric strings** (`cluster 0`) so content-based inference is categorical. Emit per-point
+      **`silhouette_samples`** over the **full** labelled set (not the aggregate's subsample) as a
+      numeric `annotation` output with its own hard-ceiling skip guard.
+- [ ] 2A.2 Carriage router: merge `annotation`-destined outputs into the `protein_annotations` Arrow
+      table **without** going through `ArrowReader.save_data` (it writes `protein_annotations.parquet`
+      while readers expect `selected_annotations.parquet` — silent-loss bug) and **outside** the
+      `.astype(str)` path for the numeric silhouette column (preserve float round-trip). One
+      membership + one silhouette column per projection; absent proteins → empty, not fabricated.
+- [ ] 2A.3 Auto-style: generate a **full `LegendPersistedSettings` envelope** per membership column
+      (categories keyed by exact label strings, each with `color`/`shape`/`zOrder`, plus
+      `maxVisibleValues`/`shapeSize`/`sortMode`/`hiddenValues`/`enableDuplicateStackUI`/
+      `selectedPaletteId`), merged into `settings` without clobbering existing styles. Style **only
+      membership** (not silhouette).
+- [ ] 2A.4 `replace_settings_in_bundle` / `protspace style`: preserve the statistics part **and** the
+      generated cluster styles across a settings rewrite (restore + extend the prior "styling preserves
+      statistics" round-trip).
+- [ ] 2A.5 Flag: `--stats-annotations/--no-stats-annotations` (or equivalent) gating Phase-2 output;
+      default per tsenoner.
+- [ ] 2A.6 Known-answer tests: membership joins by identifier across a bundle where annotation
+      id-set ≠ projection id-set ≠ embedding id-set; membership infers **categorical**, silhouette
+      **numeric**, in the actual frontend type-inference rules; incomplete style envelope is rejected
+      by the sanitizer (negative test); style round-trip; flag off → no columns.
+
+### 2B. Web — color-by surfacing + provenance UX
+
+- [ ] 2B.1 Confirm `cluster_*`/`silhouette_*` columns appear in color-by (no allowlist hides them) and
+      silhouette renders as a continuous ramp.
+- [ ] 2B.2 Provenance/grouping: register a "Computed/Statistics" annotation category (does not exist
+      today — `annotation-categories.ts` / `annotation-metadata.ts`) so ~12–60 computed columns don't
+      flood the flat "Other" group. Decide a default/initial selection (initial-view) if "colored on
+      load" is wanted.
+- [ ] 2B.3 Prep re-bundle now rewrites parts 1/2/4/5: add a test that a successful enrichment equals
+      the core annotations **plus exactly** the computed columns (no row drops / reorder / retype), and
+      that a stats failure still ships the un-enriched core bundle. Note the ~2× bundle I/O cost.
+- [ ] 2B.4 Tests: 5-part routed bundle (with computed annotations + styles) loads, colors clusters
+      when selected, ignores nothing erroneously; large-N / sweep bundle doesn't break color-by.
+
+## Verification (each PR)
+
+- [ ] V.1 Engine: `uv run pytest -m "not slow"` + `ruff check` + **`ruff format --check`** green
+      (the CI step that bit #61); CLI startup not regressed (lazy imports).
+- [ ] V.2 Web: `pnpm quality:ci` (`format:check && lint && quality`) green — run `format:check`, not
+      just `quality`; prep `uv run pytest -q` green.
+- [ ] V.3 Load a real engine-produced routed bundle in the app: faithfulness shows per-metric;
+      (Phase 2) clusters color when selected; no console errors.
+
+## Open decisions to resolve before Phase 2 (carry to tsenoner)
+
+- [ ] Column naming + whether `K` shows in the visible label; elbow-`K`-only vs a small sweep.
+- [ ] Initial-selection: should a membership column be the default color-by, or colored-when-selected.
+- [ ] Aggregate fifth-part UI now vs deferred.
+- [ ] Amend #61/#295 vs stack (recommended: stack).
diff --git a/packages/core/src/components/control-bar/annotation-categories.test.ts b/packages/core/src/components/control-bar/annotation-categories.test.ts
new file mode 100644
index 00000000..81581e0b
--- /dev/null
+++ b/packages/core/src/components/control-bar/annotation-categories.test.ts
@@ -0,0 +1,21 @@
+import { describe, it, expect } from 'vitest';
+import { groupAnnotations } from './annotation-categories';
+
+describe('groupAnnotations', () => {
+  it('groups computed cluster_/silhouette_ columns under a Statistics section', () => {
+    const groups = groupAnnotations(['cluster_PCA_2', 'silhouette_PCA_2', 'cluster_UMAP_2']);
+    const stats = groups.find((g) => g.category === 'Statistics');
+    expect(stats).toBeDefined();
+    expect(stats?.annotations).toEqual(['cluster_PCA_2', 'cluster_UMAP_2', 'silhouette_PCA_2']);
+    // computed columns must not leak into the catch-all Other section
+    expect(groups.find((g) => g.category === 'Other')).toBeUndefined();
+  });
+
+  it('keeps non-computed annotations out of the Statistics section', () => {
+    const groups = groupAnnotations(['cluster_PCA_2', 'my_random_label']);
+    const stats = groups.find((g) => g.category === 'Statistics');
+    expect(stats?.annotations).toEqual(['cluster_PCA_2']);
+    const other = groups.find((g) => g.category === 'Other');
+    expect(other?.annotations).toEqual(['my_random_label']);
+  });
+});
diff --git a/packages/core/src/components/control-bar/annotation-categories.ts b/packages/core/src/components/control-bar/annotation-categories.ts
index 36d3e783..c1d26b74 100644
--- a/packages/core/src/components/control-bar/annotation-categories.ts
+++ b/packages/core/src/components/control-bar/annotation-categories.ts
@@ -1,13 +1,30 @@
 import { annotationSource, compareTaxonomyRank, type AnnotationSource } from '@protspace/utils';
 
-/** Dropdown section names — one per annotation source (the predicted flag is shown per-row, not as a group). */
-export type CategoryName = 'Biocentral' | 'InterPro' | 'TED' | 'Taxonomy' | 'UniProt' | 'Other';
+/** Dropdown section names — one per annotation source, plus a Statistics section for
+ * computed columns (the predicted flag is shown per-row, not as a group). */
+export type CategoryName =
+  | 'Biocentral'
+  | 'InterPro'
+  | 'TED'
+  | 'Taxonomy'
+  | 'UniProt'
+  | 'Statistics'
+  | 'Other';
 
 export interface GroupedAnnotation {
   category: CategoryName;
   annotations: string[];
 }
 
+/** Prefixes of computed per-protein statistic columns (route-projection-statistics):
+ * one `cluster_<projection>` + one `silhouette_<projection>` per projection. Grouping
+ * them keeps ~12 computed columns from flooding the catch-all "Other" section. */
+const COMPUTED_STAT_PREFIXES = ['cluster_', 'silhouette_'];
+
+function isComputedStatistic(annotation: string): boolean {
+  return COMPUTED_STAT_PREFIXES.some((prefix) => annotation.startsWith(prefix));
+}
+
 /** Map an annotation's source to its dropdown section. */
 function categoryForSource(source: AnnotationSource): CategoryName {
   switch (source) {
@@ -26,13 +43,15 @@ function categoryForSource(source: AnnotationSource): CategoryName {
   }
 }
 
-// Display order of the dropdown sections (predicted sources first, then the rest).
+// Display order of the dropdown sections (predicted sources first, then the rest;
+// computed Statistics grouped just before the catch-all Other).
 const CATEGORY_ORDER: CategoryName[] = [
   'Biocentral',
   'InterPro',
   'TED',
   'Taxonomy',
   'UniProt',
+  'Statistics',
   'Other',
 ];
 
@@ -52,11 +71,15 @@ export function groupAnnotations(annotations: string[]): GroupedAnnotation[] {
     TED: [],
     Taxonomy: [],
     UniProt: [],
+    Statistics: [],
     Other: [],
   };
 
   for (const annotation of annotations) {
-    categorized[categoryForSource(annotationSource(annotation))].push(annotation);
+    const category = isComputedStatistic(annotation)
+      ? 'Statistics'
+      : categoryForSource(annotationSource(annotation));
+    categorized[category].push(annotation);
   }
 
   for (const category of CATEGORY_ORDER) {
diff --git a/packages/core/src/components/data-loader/utils/bundle-roundtrip.test.ts b/packages/core/src/components/data-loader/utils/bundle-roundtrip.test.ts
index 666357b8..ffdaa717 100644
--- a/packages/core/src/components/data-loader/utils/bundle-roundtrip.test.ts
+++ b/packages/core/src/components/data-loader/utils/bundle-roundtrip.test.ts
@@ -3,7 +3,13 @@ import { readFileSync } from 'fs';
 import { resolve } from 'path';
 import { extractRowsFromParquetBundle } from './bundle';
 import { convertParquetToVisualizationData } from './conversion';
-import { createParquetBundle, countBundleDelimiters, isParquetBundle } from '@protspace/utils';
+import {
+  createParquetBundle,
+  countBundleDelimiters,
+  isParquetBundle,
+  BUNDLE_DELIMITER_BYTES,
+  findBundleDelimiterPositions,
+} from '@protspace/utils';
 
 function loadArrayBuffer(filePath: string): ArrayBuffer {
   const buffer = readFileSync(filePath);
@@ -333,6 +339,93 @@ describe('metadata preservation through round-trip', () => {
   });
 });
 
+describe('5-part statistics bundle parsing', () => {
+  // The frontend writer (createParquetBundle) only emits 3/4 parts, so we splice
+  // real parquet parts into the 5-part shapes the backend produces.
+  const DELIM = BUNDLE_DELIMITER_BYTES;
+
+  function splitParts(buf: ArrayBuffer): Uint8Array[] {
+    const u8 = new Uint8Array(buf);
+    const pos = findBundleDelimiterPositions(u8);
+    const out: Uint8Array[] = [];
+    for (let i = 0; i <= pos.length; i++) {
+      const start = i === 0 ? 0 : pos[i - 1] + DELIM.length;
+      const end = i < pos.length ? pos[i] : u8.length;
+      out.push(u8.subarray(start, end).slice());
+    }
+    return out;
+  }
+
+  function assemble(parts: Uint8Array[]): ArrayBuffer {
+    let total = 0;
+    parts.forEach((p, i) => {
+      total += p.length + (i < parts.length - 1 ? DELIM.length : 0);
+    });
+    const out = new Uint8Array(total);
+    let off = 0;
+    parts.forEach((p, i) => {
+      out.set(p, off);
+      off += p.length;
+      if (i < parts.length - 1) {
+        out.set(DELIM, off);
+        off += DELIM.length;
+      }
+    });
+    return out.buffer;
+  }
+
+  const original = {
+    protein_ids: ['P1', 'P2', 'P3'],
+    projections: [{ name: 'UMAP', data: Float32Array.of(0, 0, 1, 1, 2, 2), dimension: 2 as const }],
+    annotations: {
+      family: {
+        kind: 'categorical' as const,
+        values: ['A', 'B'],
+        colors: ['#1F77B4', '#FF7F0E'],
+        shapes: ['circle', 'circle'],
+      },
+    },
+    annotation_data: { family: [[0], [1], [0]] },
+    annotation_scores: {},
+    annotation_evidence: {},
+  };
+
+  it('parses 5 parts with a zero-byte settings slot (statistics-only) → settings null', async () => {
+    const core = splitParts(createParquetBundle(original)); // [c0, c1, c2]
+    expect(core.length).toBe(3);
+    const stats = core[0]; // part 5 is ignored; any valid parquet bytes
+    const fivePart = assemble([core[0], core[1], core[2], new Uint8Array(0), stats]);
+    expect(countBundleDelimiters(new Uint8Array(fivePart))).toBe(4);
+    const res = await extractRowsFromParquetBundle(fivePart);
+    expect(res.projections.length).toBeGreaterThan(0);
+    expect(res.settings).toBeNull();
+  });
+
+  it('parses 5 parts with settings + statistics → settings parsed, statistics ignored', async () => {
+    const settings = {
+      legendSettings: {
+        family: {
+          maxVisibleValues: 10,
+          shapeSize: 24,
+          sortMode: 'size-desc' as const,
+          hiddenValues: [],
+          categories: {},
+          enableDuplicateStackUI: false,
+          selectedPaletteId: 'kellys',
+        },
+      },
+      exportOptions: {},
+    };
+    const ps = splitParts(createParquetBundle(original, { includeSettings: true, settings }));
+    expect(ps.length).toBe(4); // [c0, c1, c2, settings]
+    const fivePart = assemble([ps[0], ps[1], ps[2], ps[3], ps[0]]);
+    expect(countBundleDelimiters(new Uint8Array(fivePart))).toBe(4);
+    const res = await extractRowsFromParquetBundle(fivePart);
+    expect(res.projections.length).toBeGreaterThan(0);
+    expect(res.settings).not.toBeNull();
+  });
+});
+
 describe('numeric annotation round-trip', () => {
   it('preserves raw numeric annotations through bundle export and import', async () => {
     const original = {
diff --git a/packages/core/src/components/data-loader/utils/bundle.test.ts b/packages/core/src/components/data-loader/utils/bundle.test.ts
index efedb305..05bbfdb1 100644
--- a/packages/core/src/components/data-loader/utils/bundle.test.ts
+++ b/packages/core/src/components/data-loader/utils/bundle.test.ts
@@ -112,15 +112,29 @@ describe('bundle utilities', () => {
       const bundle = createMockBundle(2);
 
       await expect(extractRowsFromParquetBundle(bundle)).rejects.toThrow(
-        /Expected 2 or 3 delimiters/,
+        /Expected 2 to 4 delimiters/,
       );
     });
 
-    it('should reject bundle with 4 delimiters (5 parts)', async () => {
+    it('should accept a 5-part (core+settings+statistics) bundle structurally', async () => {
+      // 4 delimiters (5 parts) is now valid: core + settings + statistics. With
+      // mock (non-parquet) buffers it still fails later at parquet decode — but
+      // NOT at the delimiter-count guard, which is what we assert here.
       const bundle = createMockBundle(5);
+      let message = '';
+      try {
+        await extractRowsFromParquetBundle(bundle);
+      } catch (e) {
+        message = (e as Error).message;
+      }
+      expect(message).not.toMatch(/Expected 2 to 4 delimiters/);
+    });
+
+    it('should reject bundle with 5 delimiters (6 parts)', async () => {
+      const bundle = createMockBundle(6);
 
       await expect(extractRowsFromParquetBundle(bundle)).rejects.toThrow(
-        /Expected 2 or 3 delimiters/,
+        /Expected 2 to 4 delimiters/,
       );
     });
 
@@ -128,7 +142,7 @@ describe('bundle utilities', () => {
       const buffer = createMockParquetBuffer('no delimiter');
 
       await expect(extractRowsFromParquetBundle(buffer)).rejects.toThrow(
-        /Expected 2 or 3 delimiters/,
+        /Expected 2 to 4 delimiters/,
       );
     });
   });
diff --git a/packages/core/src/components/data-loader/utils/bundle.ts b/packages/core/src/components/data-loader/utils/bundle.ts
index 58ec5d03..9b7c1874 100644
--- a/packages/core/src/components/data-loader/utils/bundle.ts
+++ b/packages/core/src/components/data-loader/utils/bundle.ts
@@ -29,9 +29,11 @@ export interface BundleExtractionResult {
 /**
  * Extract rows and optional settings from a parquetbundle.
  *
- * Supports two formats:
+ * Supports:
  * - 2 delimiters (3 parts): Original format without settings
  * - 3 delimiters (4 parts): Extended format with settings
+ * - 4 delimiters (5 parts): With statistics (optional 5th part, ignored here;
+ *   the settings slot may be zero bytes when statistics are present without settings)
  */
 export async function extractRowsFromParquetBundle(
   arrayBuffer: ArrayBuffer,
@@ -39,36 +41,33 @@ export async function extractRowsFromParquetBundle(
   const uint8Array = new Uint8Array(arrayBuffer);
   const delimiterPositions = findBundleDelimiterPositions(uint8Array);
 
-  // Support both 2 delimiters (original) and 3 delimiters (with settings)
-  if (delimiterPositions.length !== 2 && delimiterPositions.length !== 3) {
+  // Supported shapes (by delimiter count):
+  //   2 → 3 parts (core only)
+  //   3 → 4 parts (core + settings)
+  //   4 → 5 parts (core + settings + statistics; the settings slot may be empty)
+  if (delimiterPositions.length < 2 || delimiterPositions.length > 4) {
     throw new Error(
-      `Expected 2 or 3 delimiters in parquetbundle, found ${delimiterPositions.length}`,
+      `Expected 2 to 4 delimiters in parquetbundle, found ${delimiterPositions.length}`,
     );
   }
 
-  const hasSettingsPart = delimiterPositions.length === 3;
-
-  // Extract the three required parts
-  let part1: ArrayBuffer | null = uint8Array.subarray(0, delimiterPositions[0]).slice().buffer;
-  let part2: ArrayBuffer | null = uint8Array
-    .subarray(delimiterPositions[0] + BUNDLE_DELIMITER_BYTES.length, delimiterPositions[1])
-    .slice().buffer;
-
-  let part3: ArrayBuffer | null;
-  let part4: ArrayBuffer | null = null;
-
-  if (hasSettingsPart) {
-    part3 = uint8Array
-      .subarray(delimiterPositions[1] + BUNDLE_DELIMITER_BYTES.length, delimiterPositions[2])
-      .slice().buffer;
-    part4 = uint8Array
-      .subarray(delimiterPositions[2] + BUNDLE_DELIMITER_BYTES.length)
-      .slice().buffer;
-  } else {
-    part3 = uint8Array
-      .subarray(delimiterPositions[1] + BUNDLE_DELIMITER_BYTES.length)
-      .slice().buffer;
-  }
+  const delimLen = BUNDLE_DELIMITER_BYTES.length;
+  const partStart = (i: number): number => (i === 0 ? 0 : delimiterPositions[i - 1] + delimLen);
+  const partEnd = (i: number): number =>
+    i < delimiterPositions.length ? delimiterPositions[i] : uint8Array.length;
+  const slicePart = (i: number): ArrayBuffer =>
+    uint8Array.subarray(partStart(i), partEnd(i)).slice().buffer;
+
+  // Extract the three required core parts.
+  let part1: ArrayBuffer | null = slicePart(0);
+  let part2: ArrayBuffer | null = slicePart(1);
+  let part3: ArrayBuffer | null = slicePart(2);
+
+  // Part 4 is settings (optional). It may be a zero-byte slot when a statistics
+  // part follows without settings — branch on emptiness, not the raw count.
+  // Part 5 is statistics, intentionally ignored for now (rendering is separate).
+  const part4: ArrayBuffer | null =
+    delimiterPositions.length >= 3 && partEnd(3) > partStart(3) ? slicePart(3) : null;
 
   // Validate parquet magic for each part before parsing
   assertValidParquetMagic(part1);
@@ -142,6 +141,10 @@ export async function extractRowsFromParquetBundle(
  */
 async function extractSettings(settingsBuffer: ArrayBuffer): Promise<BundleSettings | null> {
   try {
+    // A zero-byte settings slot (statistics-without-settings) is "no settings".
+    if (settingsBuffer.byteLength === 0) {
+      return null;
+    }
     // Validate parquet magic
     assertValidParquetMagic(settingsBuffer);
 
diff --git a/packages/core/src/components/scatter-plot/projection-metadata-helpers.test.ts b/packages/core/src/components/scatter-plot/projection-metadata-helpers.test.ts
new file mode 100644
index 00000000..5ee6542c
--- /dev/null
+++ b/packages/core/src/components/scatter-plot/projection-metadata-helpers.test.ts
@@ -0,0 +1,57 @@
+import { describe, it, expect } from 'vitest';
+import { buildProjectionMetadataRows } from './projection-metadata-helpers';
+
+describe('buildProjectionMetadataRows', () => {
+  it('returns [] for null or empty metadata', () => {
+    expect(buildProjectionMetadataRows(null)).toEqual([]);
+    expect(buildProjectionMetadataRows({})).toEqual([]);
+  });
+
+  it('skips internal dimension/name fields', () => {
+    const rows = buildProjectionMetadataRows({ name: 'PCA 2', dimensions: 2, foo: 'bar' });
+    const keys = rows.map(([k]) => k);
+    expect(keys).not.toContain('Name');
+    expect(keys).not.toContain('Dimensions');
+    expect(rows).toContainEqual(['Foo', 'bar']);
+  });
+
+  it('flattens info_json one level (existing behavior)', () => {
+    const rows = buildProjectionMetadataRows({
+      info_json: '{"metric":"cosine","n_neighbors":15}',
+    });
+    expect(rows).toContainEqual(['Metric', 'cosine']);
+    expect(rows).toContainEqual(['N Neighbors', '15']);
+  });
+
+  it('expands info_json.quality into discrete per-metric rows', () => {
+    const info = JSON.stringify({
+      metric: 'cosine',
+      quality: {
+        knn_overlap: { value: 0.83, k: 15, metric: 'cosine' },
+        trustworthiness: { value: 0.952, k: 15, metric: 'cosine' },
+        continuity: { value: 0.948, k: 15, metric: 'euclidean' },
+      },
+    });
+    const rows = buildProjectionMetadataRows({ info_json: info });
+    const map = new Map(rows);
+    // NOT a single "Quality" row holding a raw JSON blob
+    expect(map.has('Quality')).toBe(false);
+    expect(map.get('Knn Overlap')).toBe('0.830 (cosine, k=15)');
+    expect(map.get('Trustworthiness')).toBe('0.952 (cosine, k=15)');
+    expect(map.get('Continuity')).toBe('0.948 (euclidean, k=15)');
+  });
+
+  it('renders a skipped faithfulness metric as N/A with the marker', () => {
+    const info = JSON.stringify({
+      quality: { knn_overlap: { value: null, skipped: 'n_too_large', n: 30000 } },
+    });
+    const rows = buildProjectionMetadataRows({ info_json: info });
+    expect(new Map(rows).get('Knn Overlap')).toBe('N/A (skipped: n_too_large)');
+  });
+
+  it('tolerates a flat (scalar) quality value', () => {
+    const info = JSON.stringify({ quality: { knn_overlap: 0.83 } });
+    const rows = buildProjectionMetadataRows({ info_json: info });
+    expect(new Map(rows).get('Knn Overlap')).toBe('0.830');
+  });
+});
diff --git a/packages/core/src/components/scatter-plot/projection-metadata-helpers.ts b/packages/core/src/components/scatter-plot/projection-metadata-helpers.ts
new file mode 100644
index 00000000..b61cee0c
--- /dev/null
+++ b/packages/core/src/components/scatter-plot/projection-metadata-helpers.ts
@@ -0,0 +1,132 @@
+import { NA_DISPLAY } from '@protspace/utils';
+
+/**
+ * Build the display rows for a projection's metadata tooltip.
+ *
+ * Behaviour:
+ * - internal fields (`name`, `dimension(s)`) are skipped;
+ * - JSON-string fields (`info`, `info_json`, `*json*`) are parsed and flattened
+ *   one level into individual rows;
+ * - the `quality` sub-object inside `info_json` (per-projection faithfulness from
+ *   route-projection-statistics) is expanded into one row per metric — each
+ *   showing its value plus compact provenance (distance metric, `k`) — instead of
+ *   rendering as a single raw `JSON.stringify` blob.
+ */
+export function buildProjectionMetadataRows(
+  metadata: Record<string, unknown> | null | undefined,
+): Array<[string, string]> {
+  if (!metadata) return [];
+
+  const rows: Array<[string, string]> = [];
+
+  for (const [key, value] of Object.entries(metadata)) {
+    const lowerKey = key.toLowerCase();
+    if (lowerKey === 'dimension' || lowerKey === 'dimensions' || lowerKey === 'name') {
+      continue;
+    }
+
+    if (isJsonField(lowerKey) && typeof value === 'string') {
+      const parsed = tryParseJson(value);
+      if (isPlainObject(parsed)) {
+        for (const [innerKey, innerValue] of Object.entries(parsed)) {
+          if (innerKey.toLowerCase() === 'quality' && isPlainObject(innerValue)) {
+            rows.push(...expandQuality(innerValue));
+          } else {
+            rows.push([formatMetadataKey(innerKey), formatMetadataValue(innerValue, innerKey)]);
+          }
+        }
+        continue;
+      }
+    }
+
+    rows.push([formatMetadataKey(key), formatMetadataValue(value, key)]);
+  }
+
+  return rows;
+}
+
+/** Expand a faithfulness `quality` object into one display row per metric. */
+function expandQuality(quality: Record<string, unknown>): Array<[string, string]> {
+  return Object.entries(quality).map(([metric, raw]) => [
+    formatMetadataKey(metric),
+    formatQualityValue(raw),
+  ]);
+}
+
+/**
+ * Format one faithfulness metric. Each is `{ value, k, metric, ... }` (engine
+ * Phase 1A); a missing value (the skip case) renders as N/A with its marker, and
+ * a bare scalar (older flat shape) is formatted directly.
+ */
+function formatQualityValue(raw: unknown): string {
+  if (isPlainObject(raw) && 'value' in raw) {
+    const value = raw.value;
+    if (value == null) {
+      const skipped = typeof raw.skipped === 'string' ? raw.skipped : null;
+      return skipped ? `${NA_DISPLAY} (skipped: ${skipped})` : NA_DISPLAY;
+    }
+    const valueStr = formatSingleValue(value, false);
+    const provenance: string[] = [];
+    if (typeof raw.metric === 'string') provenance.push(raw.metric);
+    if (raw.k != null) provenance.push(`k=${formatSingleValue(raw.k, false)}`);
+    return provenance.length > 0 ? `${valueStr} (${provenance.join(', ')})` : valueStr;
+  }
+  if (isPlainObject(raw)) {
+    return JSON.stringify(raw);
+  }
+  return formatSingleValue(raw, false);
+}
+
+function isPlainObject(value: unknown): value is Record<string, unknown> {
+  return typeof value === 'object' && value !== null && !Array.isArray(value);
+}
+
+function isJsonField(key: string): boolean {
+  return key === 'info' || key === 'info_json' || key.includes('json');
+}
+
+function tryParseJson(value: string): unknown {
+  try {
+    return JSON.parse(value);
+  } catch {
+    return null;
+  }
+}
+
+function formatMetadataKey(key: string): string {
+  return key
+    .replace(/_/g, ' ')
+    .replace(/([A-Z])/g, ' $1')
+    .split(' ')
+    .filter((word) => word.length > 0)
+    .map((word) => word.charAt(0).toUpperCase() + word.slice(1).toLowerCase())
+    .join(' ');
+}
+
+function formatMetadataValue(value: unknown, key: string): string {
+  if (value == null) return NA_DISPLAY;
+
+  const lowerKey = key.toLowerCase();
+  const isVarianceRatio =
+    lowerKey.includes('explained_variance') || lowerKey.includes('variance_ratio');
+
+  if (Array.isArray(value)) {
+    return value.map((item) => formatSingleValue(item, isVarianceRatio)).join(', ');
+  }
+
+  return formatSingleValue(value, isVarianceRatio);
+}
+
+function formatSingleValue(value: unknown, isVarianceRatio: boolean): string {
+  if (typeof value === 'number') {
+    if (Number.isInteger(value)) return value.toString();
+    return value.toFixed(isVarianceRatio ? 2 : 3);
+  }
+  if (typeof value === 'boolean') {
+    return value ? 'Yes' : 'No';
+  }
+  if (typeof value === 'object' && value !== null) {
+    return JSON.stringify(value);
+  }
+  return String(value);
+}
diff --git a/packages/core/src/components/scatter-plot/projection-metadata.ts b/packages/core/src/components/scatter-plot/projection-metadata.ts
index 0b0f0339..d0db6b3b 100644
--- a/packages/core/src/components/scatter-plot/projection-metadata.ts
+++ b/packages/core/src/components/scatter-plot/projection-metadata.ts
@@ -2,8 +2,8 @@ import { LitElement, html } from 'lit';
 import { property } from 'lit/decorators.js';
 import { customElement } from '../../utils/safe-custom-element';
 import type { Projection } from '@protspace/utils';
-import { NA_DISPLAY } from '@protspace/utils';
 import { projectionMetadataStyles } from './projection-metadata.styles';
+import { buildProjectionMetadataRows } from './projection-metadata-helpers';
 
 @customElement('protspace-projection-metadata')
 class ProtspaceProjectionMetadata extends LitElement {
@@ -12,7 +12,7 @@ class ProtspaceProjectionMetadata extends LitElement {
   static styles = projectionMetadataStyles;
 
   render() {
-    const metadata = this._getProjectionMetadata();
+    const metadata = buildProjectionMetadataRows(this.projection?.metadata);
 
     if (metadata.length === 0) {
       return html``;
@@ -50,109 +50,6 @@ class ProtspaceProjectionMetadata extends LitElement {
       </div>
     `;
   }
-
-  /**
-   * Get formatted projection metadata for display
-   */
-  private _getProjectionMetadata(): Array<[string, string]> {
-    if (!this.projection?.metadata) {
-      return [];
-    }
-
-    const rawMetadata = this.projection.metadata;
-    const processedEntries: Array<[string, unknown]> = [];
-
-    // Filter and process metadata entries
-    for (const [key, value] of Object.entries(rawMetadata)) {
-      // Skip internal fields
-      const lowerKey = key.toLowerCase();
-      if (lowerKey === 'dimension' || lowerKey === 'dimensions' || lowerKey === 'name') {
-        continue;
-      }
-
-      // Parse and flatten JSON fields
-      if (this._isJsonField(lowerKey) && typeof value === 'string') {
-        const parsed = this._tryParseJson(value);
-        if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
-          processedEntries.push(...Object.entries(parsed));
-          continue;
-        }
-      }
-
-      processedEntries.push([key, value]);
-    }
-
-    // Format all entries
-    return processedEntries.map(([key, value]) => [
-      this._formatMetadataKey(key),
-      this._formatMetadataValue(value, key),
-    ]);
-  }
-
-  /**
-   * Check if a key indicates a JSON field
-   */
-  private _isJsonField(key: string): boolean {
-    return key === 'info' || key === 'info_json' || key.includes('json');
-  }
-
-  /**
-   * Safely parse JSON string
-   */
-  private _tryParseJson(value: string): unknown {
-    try {
-      return JSON.parse(value);
-    } catch {
-      return null;
-    }
-  }
-
-  /**
-   * Format metadata key to Title Case
-   */
-  private _formatMetadataKey(key: string): string {
-    return key
-      .replace(/_/g, ' ')
-      .replace(/([A-Z])/g, ' $1')
-      .split(' ')
-      .filter((word) => word.length > 0)
-      .map((word) => word.charAt(0).toUpperCase() + word.slice(1).toLowerCase())
-      .join(' ');
-  }
-
-  /**
-   * Format metadata value with appropriate precision
-   */
-  private _formatMetadataValue(value: unknown, key: string): string {
-    if (value == null) return NA_DISPLAY;
-
-    const lowerKey = key.toLowerCase();
-    const isVarianceRatio =
-      lowerKey.includes('explained_variance') || lowerKey.includes('variance_ratio');
-
-    if (Array.isArray(value)) {
-      return value.map((item) => this._formatSingleValue(item, isVarianceRatio)).join(', ');
-    }
-
-    return this._formatSingleValue(value, isVarianceRatio);
-  }
-
-  /**
-   * Format a single metadata value
-   */
-  private _formatSingleValue(value: unknown, isVarianceRatio: boolean): string {
-    if (typeof value === 'number') {
-      if (Number.isInteger(value)) return value.toString();
-      return value.toFixed(isVarianceRatio ? 2 : 3);
-    }
-    if (typeof value === 'boolean') {
-      return value ? 'Yes' : 'No';
-    }
-    if (typeof value === 'object' && value !== null) {
-      return JSON.stringify(value);
-    }
-    return String(value);
-  }
 }
 
 declare global {
diff --git a/packages/utils/src/parquet/bundle-writer.ts b/packages/utils/src/parquet/bundle-writer.ts
index a372e7fd..c21e0ae6 100644
--- a/packages/utils/src/parquet/bundle-writer.ts
+++ b/packages/utils/src/parquet/bundle-writer.ts
@@ -9,6 +9,11 @@
  * - Part 3: projections_data.parquet (projection_name, identifier, x, y, z)
  * - Delimiter: ---PARQUET_DELIMITER--- (optional, only if settings included)
  * - Part 4: settings.parquet (optional, settings_json column)
+ * - Part 5: statistics.parquet (optional, written by the backend prep service;
+ *   this writer does not currently emit or preserve it on re-export)
+ *
+ * Readers accept 3–5 parts; when statistics are present without settings, the
+ * settings slot is written as zero bytes so statistics stays the fifth part.
  */
 
 import { parquetWriteBuffer } from 'hyparquet-writer';
diff --git a/services/protspace-prep/src/protspace_prep/config.py b/services/protspace-prep/src/protspace_prep/config.py
index e900d92d..a2113d38 100644
--- a/services/protspace-prep/src/protspace_prep/config.py
+++ b/services/protspace-prep/src/protspace_prep/config.py
@@ -20,6 +20,8 @@ class Settings:
     annotations: str
     sweep_interval_seconds: int
     pipeline_timeout_seconds: int
+    stats_enabled: bool
+    stats_timeout_seconds: int
     log_level: str
     log_json_format: bool
     cors_allowed_origins: tuple[str, ...]
@@ -48,6 +50,8 @@ def load_settings() -> Settings:
         annotations=os.getenv("PREP_ANNOTATIONS", "default"),
         sweep_interval_seconds=int(os.getenv("PREP_SWEEP_INTERVAL_SECONDS", "300")),
         pipeline_timeout_seconds=int(os.getenv("PREP_PIPELINE_TIMEOUT_SECONDS", "420")),
+        stats_enabled=os.getenv("PREP_STATS", "true").lower() in {"1", "true", "yes"},
+        stats_timeout_seconds=int(os.getenv("PREP_STATS_TIMEOUT_SECONDS", "120")),
         log_level=os.getenv("PREP_LOG_LEVEL", "INFO"),
         log_json_format=os.getenv("PREP_LOG_JSON_FORMAT", "false").lower() in {"1", "true", "yes"},
         cors_allowed_origins=_parse_origins(os.getenv("CORS_ALLOWED_ORIGIN", "")),
diff --git a/services/protspace-prep/src/protspace_prep/pipeline.py b/services/protspace-prep/src/protspace_prep/pipeline.py
index 962c4c93..2bd20653 100644
--- a/services/protspace-prep/src/protspace_prep/pipeline.py
+++ b/services/protspace-prep/src/protspace_prep/pipeline.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 import asyncio
 import logging
+import os
 from pathlib import Path
 from typing import Awaitable, Callable, Sequence
 
@@ -179,9 +180,159 @@ async def run_protspace_prepare(
         raise PipelineFailure(
             "protspace bundle reported success but produced no .parquetbundle file."
         )
+
+    # Best-effort projection statistics — AFTER the core bundle exists and OUTSIDE
+    # the pipeline timeout budget, so a stats failure/timeout can never cost the
+    # job or lose the bundle.
+    if settings.stats_enabled:
+        await _maybe_add_statistics(
+            emit,
+            settings,
+            embed_dir=embed_dir,
+            project_dir=project_dir,
+            annotations_path=annotations_path,
+            bundle_path=bundle_path,
+        )
+
     return bundle_path
 
 
+_STATS_CLI_AVAILABLE: bool | None = None
+_STATS_CLI_PROBE_LOCK = asyncio.Lock()
+_STATS_PROBE_TIMEOUT_SECONDS = 10.0
+
+
+async def _stats_cli_available() -> bool:
+    """Bounded one-time probe: is the installed protspace new enough to have ``stats``?
+
+    Caches only DEFINITIVE answers — a clean exit (0 ⇒ present, non-zero / not on
+    PATH ⇒ absent). A transient spawn error or a hung probe returns unavailable for
+    THIS call but is NOT latched, so a later job can retry. The probe is bounded by a
+    hard timeout and kills a hung subprocess, so it can never stall the job or leak a
+    concurrency slot. A lock collapses the first concurrent wave to a single probe.
+    """
+    global _STATS_CLI_AVAILABLE
+    if _STATS_CLI_AVAILABLE is not None:
+        return _STATS_CLI_AVAILABLE
+    async with _STATS_CLI_PROBE_LOCK:
+        if _STATS_CLI_AVAILABLE is not None:  # re-check after acquiring the lock
+            return _STATS_CLI_AVAILABLE
+        try:
+            proc = await asyncio.create_subprocess_exec(
+                "protspace",
+                "stats",
+                "--help",
+                stdout=asyncio.subprocess.DEVNULL,
+                stderr=asyncio.subprocess.DEVNULL,
+            )
+        except FileNotFoundError:
+            _STATS_CLI_AVAILABLE = False  # binary not on PATH: definitively absent
+            return _STATS_CLI_AVAILABLE
+        except Exception:  # noqa: BLE001 - could not spawn; transient, do not latch
+            logger.warning("protspace stats probe could not spawn; will retry next job", exc_info=True)
+            return False
+        try:
+            rc = await asyncio.wait_for(proc.wait(), timeout=_STATS_PROBE_TIMEOUT_SECONDS)
+            _STATS_CLI_AVAILABLE = rc == 0  # definitive
+            return _STATS_CLI_AVAILABLE
+        except asyncio.TimeoutError:
+            proc.kill()
+            try:
+                await asyncio.wait_for(proc.wait(), timeout=5)
+            except asyncio.TimeoutError:
+                pass
+            logger.warning("protspace stats probe timed out; will retry next job")
+            return False  # transient hang, do not latch
+
+
+async def _maybe_add_statistics(
+    emit: EmitFn,
+    settings: Settings,
+    *,
+    embed_dir: Path,
+    project_dir: Path,
+    annotations_path: Path,
+    bundle_path: Path,
+) -> None:
+    """Compute projection statistics and fold them into the bundle (best-effort).
+
+    Runs ``protspace stats`` then re-bundles with ``-s`` under a dedicated
+    timeout, swallowing every failure so the already-shipped bundle stands.
+    """
+    try:
+        if not await _stats_cli_available():
+            logger.info("protspace `stats` subcommand unavailable; skipping statistics")
+            return
+        h5_files = sorted(embed_dir.glob("*.h5"))
+        if not h5_files:
+            return
+        stats_path = bundle_path.parent / "statistics.parquet"
+        # Auto-generated cluster-membership legend styles, written by `stats` and
+        # folded into the re-bundle so clusters are colored when selected.
+        styles_path = bundle_path.parent / "cluster_styles.json"
+        # Re-bundle to a sibling temp file, then atomically rename — a stats timeout
+        # /kill mid-write must never corrupt the already-shipped core bundle. The
+        # ".parquetbundle" suffix is required because `protspace bundle` forces it.
+        tmp_bundle = bundle_path.with_name(bundle_path.stem + ".stats-tmp.parquetbundle")
+        await emit("computing_statistics", {})
+        try:
+            async with asyncio.timeout(settings.stats_timeout_seconds):
+                await _run_step(
+                    "stats",
+                    [
+                        "protspace",
+                        "stats",
+                        "-i",
+                        str(h5_files[0]),
+                        "-p",
+                        str(project_dir),
+                        # Enrich the annotations file in place with per-protein
+                        # cluster-membership + silhouette columns so the re-bundle
+                        # below (`bundle -a`) carries them. Faithfulness is folded
+                        # into projections_metadata by the same command.
+                        "-a",
+                        str(annotations_path),
+                        "--settings-out",
+                        str(styles_path),
+                        "-o",
+                        str(stats_path),
+                    ],
+                )
+                if not stats_path.exists():
+                    return
+                bundle_cmd = [
+                    "protspace",
+                    "bundle",
+                    "-p",
+                    str(project_dir),
+                    "-a",
+                    str(annotations_path),
+                    "-s",
+                    str(stats_path),
+                    "-o",
+                    str(tmp_bundle),
+                ]
+                # Fold the auto-generated cluster styles into the bundle's settings
+                # part when `stats` produced them (older engines may not).
+                if styles_path.exists():
+                    bundle_cmd[-2:-2] = ["--settings", str(styles_path)]
+                await _run_step("bundle", bundle_cmd)
+            # Promote only a fully-written temp bundle (atomic, same filesystem).
+            if tmp_bundle.exists():
+                os.replace(tmp_bundle, bundle_path)
+        finally:
+            tmp_bundle.unlink(missing_ok=True)  # clear any partial temp on failure
+    except asyncio.TimeoutError:
+        logger.warning(
+            "statistics step timed out after %ss; bundle ships without stats",
+            settings.stats_timeout_seconds,
+        )
+    except PipelineFailure as exc:
+        logger.warning("statistics step failed; bundle ships without stats: %s", exc)
+    except Exception as exc:  # noqa: BLE001 - statistics are secondary
+        logger.warning("statistics step error; bundle ships without stats: %s", exc)
+
+
 async def _run_step(name: str, cmd: Sequence[str]) -> None:
     """Run one protspace subcommand. Raise PipelineFailure on non-zero exit.
 
diff --git a/services/protspace-prep/tests/test_pipeline.py b/services/protspace-prep/tests/test_pipeline.py
index ce5ecd2b..7658488c 100644
--- a/services/protspace-prep/tests/test_pipeline.py
+++ b/services/protspace-prep/tests/test_pipeline.py
@@ -350,3 +350,142 @@ def test_classify_failure_matches_503_service_unavailable():
     result = _classify_failure(exc)
     assert result.code == "BIOCENTRAL_UNAVAILABLE"
     assert "Biocentral embedding service is unavailable" in str(result)
+
+
+# --------------------------------------------------------------------------- #
+# Projection statistics step (bundle-first, best-effort, probed)
+# --------------------------------------------------------------------------- #
+
+
+@pytest.fixture(autouse=True)
+def _reset_stats_probe_cache():
+    """Keep the module-global probe cache from leaking across tests."""
+    import protspace_prep.pipeline as ppl
+
+    ppl._STATS_CLI_AVAILABLE = None
+    yield
+    ppl._STATS_CLI_AVAILABLE = None
+
+
+async def test_stats_probe_filenotfound_is_absent_and_latched():
+    import protspace_prep.pipeline as ppl
+
+    async def boom(*args, **kwargs):
+        raise FileNotFoundError
+
+    with patch("asyncio.create_subprocess_exec", new=boom):
+        assert await ppl._stats_cli_available() is False
+    assert ppl._STATS_CLI_AVAILABLE is False  # binary absent → definitive, cached
+
+
+async def test_stats_probe_timeout_is_bounded_and_not_latched(monkeypatch):
+    import protspace_prep.pipeline as ppl
+
+    monkeypatch.setattr(ppl, "_STATS_PROBE_TIMEOUT_SECONDS", 0.05)
+    proc = MagicMock()
+    proc.kill = MagicMock()
+
+    async def _hang():
+        await asyncio.sleep(0.2)
+        return 0
+
+    proc.wait = _hang
+
+    async def fake(*args, **kwargs):
+        return proc
+
+    with patch("asyncio.create_subprocess_exec", new=fake):
+        assert await ppl._stats_cli_available() is False  # bounded, returns quickly
+    assert proc.kill.called  # hung probe was killed
+    assert ppl._STATS_CLI_AVAILABLE is None  # transient hang → NOT latched, retried later
+
+
+def _stats_router(ctx: JobContext, *, stats_returncode: int = 0):
+    """Step router that also writes a statistics parquet on the `stats` step and
+    a distinct bundle on the stats re-bundle (`bundle ... -s`)."""
+    embed_dir = ctx.output_dir / "embed"
+    project_dir = ctx.output_dir / "project"
+    annotations_path = ctx.output_dir / "annotations.parquet"
+    bundle_path = ctx.output_dir / "data.parquetbundle"
+    stats_path = ctx.output_dir / "statistics.parquet"
+
+    async def fake_create(*args, **kwargs):
+        cmd = list(args)
+        if not cmd or cmd[0] != "protspace":
+            return _mock_subprocess(0)
+        step = cmd[1] if len(cmd) > 1 else ""
+        if step == "stats" and "--help" in cmd:
+            return _mock_subprocess(0)  # feature probe
+        if step == "embed":
+            embed_dir.mkdir(parents=True, exist_ok=True)
+            (embed_dir / "prot_t5.h5").write_bytes(b"H5")
+        elif step == "annotate":
+            annotations_path.write_bytes(b"P")
+        elif step == "project":
+            project_dir.mkdir(parents=True, exist_ok=True)
+            (project_dir / "projections_metadata.parquet").write_bytes(b"M")
+            (project_dir / "projections_data.parquet").write_bytes(b"D")
+        elif step == "stats":
+            if stats_returncode != 0:
+                return _mock_subprocess(stats_returncode, [b"ERROR stats boom\n"])
+            stats_path.write_bytes(b"STATS")
+        elif step == "bundle":
+            # Honor the -o target: core bundle → bundle_path; stats re-bundle → temp.
+            out = Path(cmd[cmd.index("-o") + 1])
+            out.write_bytes(b"BUNDLE+STATS" if "-s" in cmd else b"BUNDLE")
+        return _mock_subprocess(0)
+
+    return fake_create
+
+
+async def test_statistics_folds_into_bundle_and_emits_stage(ctx):
+    bundle_path = ctx.output_dir / "data.parquetbundle"
+    emitted = []
+
+    async def emit(stage, payload):
+        emitted.append(stage)
+
+    settings = load_settings()
+    with patch("protspace_prep.pipeline._stats_cli_available", new=AsyncMock(return_value=True)):
+        with patch("asyncio.create_subprocess_exec", new=_stats_router(ctx)):
+            result = await run_protspace_prepare(ctx, emit, settings=settings)
+    assert result == bundle_path
+    assert bundle_path.read_bytes() == b"BUNDLE+STATS"
+    assert (ctx.output_dir / "statistics.parquet").exists()
+    assert "computing_statistics" in emitted
+
+
+async def test_statistics_failure_keeps_core_bundle(ctx):
+    bundle_path = ctx.output_dir / "data.parquetbundle"
+    settings = load_settings()
+    with patch("protspace_prep.pipeline._stats_cli_available", new=AsyncMock(return_value=True)):
+        with patch("asyncio.create_subprocess_exec", new=_stats_router(ctx, stats_returncode=2)):
+            result = await run_protspace_prepare(ctx, AsyncMock(), settings=settings)
+    # Stats failed → job still succeeds with the core (stats-less) bundle.
+    assert result == bundle_path
+    assert bundle_path.read_bytes() == b"BUNDLE"
+
+
+async def test_statistics_skipped_when_subcommand_unavailable(ctx):
+    bundle_path = ctx.output_dir / "data.parquetbundle"
+    settings = load_settings()
+    with patch("protspace_prep.pipeline._stats_cli_available", new=AsyncMock(return_value=False)):
+        with patch("asyncio.create_subprocess_exec", new=_stats_router(ctx)):
+            result = await run_protspace_prepare(ctx, AsyncMock(), settings=settings)
+    assert result == bundle_path
+    assert bundle_path.read_bytes() == b"BUNDLE"
+    assert not (ctx.output_dir / "statistics.parquet").exists()
+
+
+async def test_statistics_disabled_by_settings(ctx):
+    bundle_path = ctx.output_dir / "data.parquetbundle"
+    base = load_settings()
+    fields = {k: getattr(base, k) for k in base.__slots__}
+    fields["stats_enabled"] = False
+    settings = type(base)(**fields)
+    probe = AsyncMock(return_value=True)
+    with patch("protspace_prep.pipeline._stats_cli_available", new=probe):
+        with patch("asyncio.create_subprocess_exec", new=_stats_router(ctx)):
+            await run_protspace_prepare(ctx, AsyncMock(), settings=settings)
+    assert bundle_path.read_bytes() == b"BUNDLE"
+    probe.assert_not_called()