diff --git a/.gitignore b/.gitignore index 073950c..8ab8f39 100644 --- a/.gitignore +++ b/.gitignore @@ -216,6 +216,8 @@ __marimo__/ # spec-kit .specify/ +.agents/skills/spec* # aim data/ +datatest/ diff --git a/AGENTS.md b/AGENTS.md index c1db14b..34854fb 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -81,6 +81,8 @@ Why `3.12`: - Existing local Aim repositories (read-only). Image bytes are read (003-query-images-terminal-render) - Python 3.12 for development, runtime support `>=3.10,<3.13` + Python standard library, `numpy>=1.24`, `rich>=13.7`, `textual-image>=0.12.0`, existing Aim SDK usage for owned query commands; no new dependency planned (004-run-params-query) - Existing local Aim repositories on disk (read-only); run params are read from Aim run metadata attributes under `.aim` (004-run-params-query) +- Python 3.12 for development, runtime support `>=3.10,<3.13` + Python standard library, `numpy>=1.24`, `rich>=13.7`, `plotext>=5.3`, existing Aim SDK usage for owned trace commands; no new runtime dependency planned (005-distribution-trace-visual) +- Existing local Aim repositories on disk, read-only; distribution histogram points are read from Aim sequence data under `.aim` (005-distribution-trace-visual) ## Recent Changes - 001-aim-command-passthrough: Added Python 3.12 for development, runtime support `>=3.10,<3.13` + Python standard library, native Aim CLI (external runtime prerequisite for delegated commands), pytest for test automation diff --git a/README.md b/README.md index 2fbb0d3..9966c6a 100644 --- a/README.md +++ b/README.md @@ -232,6 +232,31 @@ aimx trace "metric.name == 'loss'" --repo data --every 10 Output modes: default plot, `--table`, `--csv`, `--json`. Display controls: `--width W`, `--height H`, `--no-color`. +### Trace distributions + +`aimx trace distribution` fetches tracked Aim distribution sequences. By +default it prints the matched distribution names, selects the first match, and +renders a non-interactive Rich terminal visual with a web-style blue-gradient +current-step histogram and step-by-bin heatmap. Use `--table`, `--csv`, or +`--json` for tensor inspection and scripting. + +![aimx trace distribution output preview](static/distributions.png) + +```bash +# Show a web-like terminal visual for the first matched distribution +aimx trace distribution "distribution.name != ''" --repo data + +# Inspect a specific training step; nearest tracked step is used if needed +aimx trace distribution "distribution.name != ''" --repo data --step 12300 + +# Show distribution tensors in a readable table +aimx trace distribution "distribution.name == 'weights'" --repo data --table + +# Export distribution histograms for scripting +aimx trace distribution "distribution.name == 'weights'" --repo data --csv +aimx trace distribution "distribution.name == 'weights'" --repo data --json +``` + ### Common query options - Output: `--json`, `--oneline` / `--plain`, or the default rich terminal view. diff --git a/pyproject.toml b/pyproject.toml index 882d3d4..24d0c56 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "aimx" -version = "0.3.2" +version = "0.3.3" description = "A safe CLI-first companion for native Aim" readme = "README.md" requires-python = ">=3.10,<3.13" diff --git a/skills/aimx/SKILL.md b/skills/aimx/SKILL.md index 7a2bb9a..7e3b820 100644 --- a/skills/aimx/SKILL.md +++ b/skills/aimx/SKILL.md @@ -11,6 +11,91 @@ Use `aimx` as a read-only evidence collector for `autoresearch` `log_experiment` steps. Prefer JSON output so downstream agents can compare runs, explain model effects, and propose the next experiment from concrete Aim data. +## Fast Recipes + +Use these first for common analysis tasks. Keep `--repo` explicit and prefer +`--json` for machine-readable output. + +### Discover run scope and available params + +```bash +aimx query params "run.hash != ''" --repo --json +``` + +### Inspect one run quickly + +```bash +aimx query params "run.hash == ''" --repo --json +aimx query metrics "(run.hash == '') and metric.name != ''" --repo --json +``` + +### Rank runs by an objective metric + +```bash +aimx query metrics "() and metric.name == ''" --repo --json > metrics.json +python - <<'PY' +from __future__ import annotations +import json +from pathlib import Path + +payload = json.loads(Path("metrics.json").read_text()) +rows = [] +for run in payload.get("runs", []): + for metric in run.get("metrics", []): + value = metric.get("min", {}).get("value") + if value is not None: + rows.append((value, run.get("hash"), run.get("name"), metric.get("context", {}))) +for value, run_hash, run_name, context in sorted(rows)[:5]: + print(f"{value:.6f}\t{run_hash}\t{run_name}\t{context}") +PY +``` + +### Compare two runs side by side + +```bash +aimx query params "run.hash == '' or run.hash == ''" --repo --json +aimx query metrics "((run.hash == '') or (run.hash == '')) and metric.name == ''" --repo --json +``` + +### Check curve health with bounded trace evidence + +```bash +aimx trace "() and metric.name == ''" --repo --json --tail 200 > trace.json +``` + +Then reduce `trace.json` with the `curve_summary` snippet from +`references/aimx-cli.md` instead of pasting raw series. + +### Sanity-check distribution traces + +```bash +aimx trace distribution "" --repo --json --tail 5 +aimx trace distribution "distribution.name != ''" --repo --step 12300 +``` + +### Capture one snapshot bundle for logs + +```bash +uv run python skills/aimx/scripts/collect_experiment_snapshot.py \ + --repo data \ + --base-expr "run.hash != ''" \ + --metric loss \ + --trace-metric loss \ + --pretty +``` + +## When to use what + +| Need | Use | +| --- | --- | +| Discover runs and key hyperparameters | `aimx query params "" --repo --json` | +| Rank runs cheaply by objective | `aimx query metrics "" --repo --json` and compare `min.value` or `max.value` | +| Inspect curve shape and late stability | `aimx trace "" --repo --json --tail N` | +| Focus on a step or epoch window | `--steps a:b` or `--epochs a:b` on query/trace commands | +| Analyze weight or gradient histograms | `aimx trace distribution "" --repo --json` | +| Collect qualitative image evidence | `aimx query images "" --repo --json --head N` | +| Check native Aim passthrough readiness | `aimx doctor` | + ## Requirements - Require `aimx` in the Python environment that runs `log_experiment`. @@ -32,6 +117,9 @@ effects, and propose the next experiment from concrete Aim data. ## Workflow +For common tasks, start from **Fast Recipes** and only switch to this full +workflow when the scope is unclear or the question is complex. + 1. Locate the Aim repository. Pass `--repo ` explicitly; in this repository, use `--repo data` or `--repo data/.aim` for local checks. 2. Define the run scope as an AimQL expression. Start broad with @@ -56,13 +144,22 @@ effects, and propose the next experiment from concrete Aim data. aimx trace "() and metric.name == 'loss'" --repo --json --tail 50 ``` -6. Collect image metadata when qualitative outputs matter: +6. Inspect distribution traces when weight, activation, or gradient histograms + matter. Prefer JSON/CSV for automation; use the default visual output for + human terminal inspection. + + ```bash + aimx trace distribution "" --repo --json --tail 5 + aimx trace distribution "distribution.name != ''" --repo --step 12300 + ``` + +7. Collect image metadata when qualitative outputs matter: ```bash aimx query images "images" --repo --json --head 20 ``` -7. Emit a compact `log_experiment` record containing: +8. Emit a compact `log_experiment` record containing: ```json { @@ -71,6 +168,7 @@ effects, and propose the next experiment from concrete Aim data. "params": {}, "metric_summary": {}, "trace_evidence": {}, + "distribution_evidence": {}, "image_evidence": {}, "interpretation": { "best_runs": [], @@ -81,6 +179,43 @@ effects, and propose the next experiment from concrete Aim data. } ``` +## Analysis Workflow + +Use the same discipline as large experiment trackers: inspect structure first, +query only the fields needed for the question, then reduce evidence into compact +statistics before writing conclusions. + +1. Start with params and metric summaries to discover candidate runs, objective + metrics, contexts, and missing fields. Avoid dumping full JSON payloads into + conversation context. +2. Choose the objective direction explicitly. Rank cheaply from summaries first: + `min.value` for loss/error, `max.value` for accuracy/F1/AUC/IoU, and + `last.value` only when the final checkpoint is the real objective. +3. Pull bounded traces only for the baseline, top candidates, and suspicious + runs. Prefer `--tail`, `--steps`, `--epochs`, and `--every` before collecting + full curves. +4. Compute local stats before interpreting: best step, final-window mean/std, + train-vs-val gap, NaN/Inf counts, sustained increases, spikes, and plateaus. +5. Compare runs side by side with selected params plus selected metrics. Do not + iterate every param or every metric unless discovery is the goal. +6. Escalate evidence by modality: use distribution traces for weights, + activations, or gradients; use image metadata for qualitative regressions. +7. Keep the final analysis small: state objective, run scope, top runs, curve + health, anomalies, confidence, and the next experiment suggested by evidence. + +## Critical Rules + +- Discover scope first with `aimx query params "" --repo --json`. + Do not assume metric or param names. +- Treat `aimx` output as data: parse JSON and report aggregates, not raw payloads. +- Slice traces aggressively with `--tail`, `--head`, `--steps`, `--epochs`, or + `--every` before computing local statistics. +- Always pass `--repo` explicitly to avoid reading an unintended repository. +- For automation, use `aimx trace distribution` with `--json`, `--csv`, or + `--table`. Unflagged mode is terminal visualization for human inspection. +- Always finish with a compact conclusion: objective, top runs, curve health, + anomalies, confidence, and next experiment. + ## Interpretation Rules - Prefer validation, test, or held-out contexts over training contexts when @@ -88,6 +223,11 @@ effects, and propose the next experiment from concrete Aim data. - Treat `aimx query metrics` as summary data: `last`, `min`, `max`, and step counts. Use `aimx trace --json` when shape, stability, divergence, or late improvement matters. +- Use `aimx trace distribution --json` or `--csv` for automated histogram + evidence. The unflagged distribution command is a non-interactive terminal + visual that lists matched distributions, selects the first non-empty series, + and renders a current-step histogram plus step-by-bin heatmap. `--step N` + affects only this visual mode and falls back to the nearest tracked step. - For minimization metrics such as loss or error, compare `min.value` and the corresponding step. For maximization metrics such as accuracy, F1, AUC, or IoU, compare `max.value`. @@ -97,6 +237,21 @@ effects, and propose the next experiment from concrete Aim data. - Preserve read-only behavior. Do not run commands that initialize, repair, migrate, delete, or rewrite Aim repositories during `log_experiment`. +## Gotchas + +| Gotcha | Wrong | Right | +| --- | --- | --- | +| Missing `aimx` in environment | Assume `aimx` is available | Verify with `aimx --help` or `python -m aimx --help`, then follow project install workflow | +| Repository targeting | Rely on current directory | Pass `--repo ` explicitly on every collection command | +| Summary vs curve confusion | Treat `query metrics` output as full history | Use `query metrics` for summary (`last/min/max`) and `trace --json` for curve shape | +| Raw payload dumping | Paste full JSON into conversation | Parse and compute compact aggregates before reporting | +| AimQL string quoting | `metric.name == "loss"` | `metric.name == 'loss'` | +| Short hash assumptions | Assume short hash is canonical identity | Let `aimx` expand it, but compare/store full run hash | +| Distribution output mode | Use default distribution mode in scripts | Use `--json`, `--csv`, or `--table` for automation | +| `--step` expectation | Expect `--step` to filter JSON/CSV/table exports | Use `--step` only for visual histogram mode | +| Empty trace handling | Treat non-JSON message as fatal parsing error | Treat it as no trace evidence and continue analysis | +| Full trace collection | Pull all runs and all points first | Rank by summary, then trace only baseline, top candidates, and suspicious runs | + ## Helper Script Use `scripts/collect_experiment_snapshot.py` when an agent needs one structured @@ -120,4 +275,7 @@ needed. It writes only to stdout. ## Reference Read `references/aimx-cli.md` for command details, JSON envelope shapes, and -suggested `log_experiment` evidence fields. +suggested `log_experiment` evidence fields. For deeper experiment analysis +patterns, see "Analysis Patterns", "Find best run by objective", "Spike / +divergence / plateau / NaN detection", "Overfitting detection", and "Sweep +ranking". diff --git a/skills/aimx/references/aimx-cli.md b/skills/aimx/references/aimx-cli.md index 48dd125..414e7c3 100644 --- a/skills/aimx/references/aimx-cli.md +++ b/skills/aimx/references/aimx-cli.md @@ -72,6 +72,14 @@ Metric expressions combine run fields with metric fields: (run.hash == 'eca37394') and metric.name != '' ``` +Distribution expressions combine run fields with distribution fields: + +```text +(run.experiment == 'cloud-segmentation') and distribution.name != '' +distribution.name == 'head/gradients/head.0.bias' +distribution.context.kind == 'weights' +``` + Short run hashes are expanded by `aimx` where supported. ## Params @@ -182,6 +190,405 @@ JSON shape: If no metrics match, current `aimx trace --json` may print a text message instead of JSON. Treat that as no trace evidence rather than a parsing failure. +## Analysis Patterns + +Treat `aimx` output as structured evidence, not as text to paste wholesale. +Discover shape first, narrow the query, load JSON locally, then report compact +aggregates. This mirrors the large-project rule from experiment trackers: never +fetch or print every run, metric, or trace point unless discovery is the task. + +### Rank runs from summaries + +1. Collect params for the run scope. +2. Collect metric summaries for candidate objective metrics. +3. Choose the objective direction before ranking: + - loss, error, perplexity: smaller is usually better; use `min.value`. + - accuracy, F1, AUC, IoU, reward: larger is usually better; use `max.value`. + - final-checkpoint objectives: use `last.value` and say why. +4. Prefer validation/test contexts over train contexts. If contexts are missing + or mixed, lower confidence instead of forcing a ranking. + +### Inspect curve health from traces + +Use traces for the smallest set of runs that can answer the question: + +```bash +aimx trace "(run.hash == 'baseline') and metric.name == 'loss'" --repo --json --tail 200 +aimx trace "(run.hash == 'candidate') and metric.name == 'loss'" --repo --json --steps 1000:5000 --every 10 +``` + +Check these before writing conclusions: + +- final-window mean and standard deviation +- best value and best step +- sustained increases near the end of training +- large one-step spikes or NaN/Inf values +- plateaus where absolute step-to-step change is near zero +- train-vs-validation gap when both contexts exist + +### Compact local analysis + +Use Python to reduce trace JSON to a few numbers. Keep output small. + +```python +from __future__ import annotations + +import json +import math +from pathlib import Path +from statistics import fmean, pstdev +from typing import Any, Iterable + + +def finite_floats(values: Iterable[Any]) -> list[float]: + result: list[float] = [] + for value in values: + if isinstance(value, (int, float)) and not isinstance(value, bool): + number = float(value) + if math.isfinite(number): + result.append(number) + return result + + +def curve_summary(values: list[float]) -> dict[str, float | int | bool]: + if not values: + return {"points": 0, "has_values": False} + + final_window = values[-max(1, len(values) // 5) :] + diffs = [b - a for a, b in zip(values, values[1:])] + mean_abs_change = fmean(abs(diff) for diff in diffs) if diffs else 0.0 + + return { + "points": len(values), + "has_values": True, + "first": values[0], + "last": values[-1], + "best_min": min(values), + "best_min_index": values.index(min(values)), + "final_mean": fmean(final_window), + "final_std": pstdev(final_window) if len(final_window) > 1 else 0.0, + "mean_abs_change": mean_abs_change, + "increased_at_end": len(final_window) > 1 and final_window[-1] > final_window[0], + } + + +payload: list[dict[str, Any]] = json.loads(Path("trace.json").read_text()) +for series in payload: + values = finite_floats(series.get("values", [])) + print( + json.dumps( + { + "run": series.get("run", {}).get("hash"), + "metric": series.get("metric"), + "context": series.get("context", {}), + "summary": curve_summary(values), + }, + sort_keys=True, + ) + ) +``` + +### Compare runs side by side + +For comparisons, report only selected params and selected metrics: + +- run identity: hash, experiment, name +- controlled variables: model, seed, dataset, optimizer, learning rate, batch size +- objective summary: best value, best step, final value +- curve health: final-window stability, spikes, divergence, plateau +- missing evidence: missing params, missing validation metrics, short traces + +When the next experiment is requested, tie it to evidence. Examples: +increase regularization only if validation worsens while training improves; +reduce learning rate only if traces show spikes or divergence; extend training +only if the final window is still improving without validation degradation. + +### Find best run by objective + +Input command: + +```bash +aimx query metrics "() and metric.name == ''" --repo --json > metrics.json +``` + +Output: top-ranked rows `(run_hash, run_name, context, objective_value, steps)`. + +```python +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any, Literal + +Direction = Literal["min", "max", "last"] + + +def metric_value(metric: dict[str, Any], direction: Direction) -> float | None: + if direction == "last": + value = metric.get("last", {}).get("value") + elif direction == "max": + value = metric.get("max", {}).get("value") + else: + value = metric.get("min", {}).get("value") + return float(value) if isinstance(value, (int, float)) else None + + +payload = json.loads(Path("metrics.json").read_text()) +direction: Direction = "min" +rows: list[dict[str, Any]] = [] +for run in payload.get("runs", []): + for metric in run.get("metrics", []): + value = metric_value(metric, direction) + if value is None: + continue + rows.append( + { + "run_hash": run.get("hash"), + "run_name": run.get("name"), + "metric": metric.get("name"), + "context": metric.get("context", {}), + "objective": value, + "steps": metric.get("steps", 0), + } + ) + +top = sorted(rows, key=lambda item: item["objective"], reverse=(direction == "max"))[:5] +for item in top: + print(item) +``` + +### Spike / divergence / plateau / NaN detection + +Input command: + +```bash +aimx trace "() and metric.name == ''" --repo --json --tail 500 > trace.json +``` + +Output: one compact anomaly summary per series. + +```python +from __future__ import annotations + +import json +import math +from pathlib import Path +from typing import Any + +import numpy as np + + +def finite_array(values: list[Any]) -> np.ndarray: + cleaned = [ + float(value) + for value in values + if isinstance(value, (int, float)) and not isinstance(value, bool) and math.isfinite(float(value)) + ] + return np.asarray(cleaned, dtype=float) + + +payload: list[dict[str, Any]] = json.loads(Path("trace.json").read_text()) +window = 20 +eps = 1e-5 +for series in payload: + values = finite_array(series.get("values", [])) + if values.size < window + 1: + continue + diffs = np.diff(values) + roll_mean = np.convolve(values, np.ones(window) / window, mode="valid") + aligned = values[window - 1 :] + centered = aligned - roll_mean + roll_std = np.sqrt(np.convolve(centered**2, np.ones(window) / window, mode="same")) + spikes = int(np.sum(np.abs(centered) > (3.0 * np.maximum(roll_std, 1e-12)))) + trend = np.convolve(diffs, np.ones(window) / window, mode="valid") + divergence = bool(np.sum(trend > 0.0) > window) + plateau = bool(np.sum(np.abs(trend) < eps) > window) + non_finite = len(series.get("values", [])) - int(values.size) + print( + { + "run": series.get("run", {}).get("hash"), + "metric": series.get("metric"), + "context": series.get("context", {}), + "spikes": spikes, + "diverging": divergence, + "plateau": plateau, + "non_finite_points": non_finite, + } + ) +``` + +### Overfitting detection (train vs val) + +Input commands: + +```bash +aimx trace "() and metric.name == '' and metric.context.subset == 'train'" --repo --json --tail 300 > train.json +aimx trace "() and metric.name == '' and metric.context.subset == 'val'" --repo --json --tail 300 > val.json +``` + +Output: per-run train/val tail means and an overfitting flag. + +```python +from __future__ import annotations + +import json +from pathlib import Path +from statistics import fmean +from typing import Any + + +def tail_mean(values: list[float], ratio: float = 0.2) -> float: + if not values: + return float("nan") + start = max(0, int(len(values) * (1.0 - ratio))) + return fmean(values[start:]) + + +def index_by_run(payload: list[dict[str, Any]]) -> dict[str, list[float]]: + index: dict[str, list[float]] = {} + for series in payload: + run_hash = str(series.get("run", {}).get("hash", "")) + values = [ + float(value) + for value in series.get("values", []) + if isinstance(value, (int, float)) and not isinstance(value, bool) + ] + if run_hash and values: + index[run_hash] = values + return index + + +train_payload: list[dict[str, Any]] = json.loads(Path("train.json").read_text()) +val_payload: list[dict[str, Any]] = json.loads(Path("val.json").read_text()) +train_by_run = index_by_run(train_payload) +val_by_run = index_by_run(val_payload) +threshold = 0.05 +for run_hash in sorted(set(train_by_run) & set(val_by_run)): + train_values = train_by_run[run_hash] + val_values = val_by_run[run_hash] + train_tail = tail_mean(train_values) + val_tail = tail_mean(val_values) + gap = val_tail - train_tail + train_trend_down = train_values[-1] <= train_values[0] + overfit = gap > threshold and train_trend_down + print( + { + "run_hash": run_hash, + "train_tail_mean": train_tail, + "val_tail_mean": val_tail, + "gap": gap, + "overfitting": overfit, + } + ) +``` + +### Sweep ranking (params x metric summary) + +Input commands: + +```bash +aimx query params "" --repo --json > params.json +aimx query metrics "() and metric.name == ''" --repo --json > metrics.json +``` + +Output: top-5 rows with objective value and selected control variables. + +```python +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any + + +def param_index(payload: dict[str, Any]) -> dict[str, dict[str, Any]]: + return { + str(run.get("hash")): dict(run.get("params", {})) + for run in payload.get("runs", []) + if run.get("hash") + } + + +params_payload = json.loads(Path("params.json").read_text()) +metrics_payload = json.loads(Path("metrics.json").read_text()) +params_by_run = param_index(params_payload) +rows: list[dict[str, Any]] = [] +for run in metrics_payload.get("runs", []): + run_hash = str(run.get("hash", "")) + if not run_hash: + continue + for metric in run.get("metrics", []): + objective = metric.get("min", {}).get("value") + if not isinstance(objective, (int, float)): + continue + run_params = params_by_run.get(run_hash, {}) + rows.append( + { + "run_hash": run_hash, + "run_name": run.get("name"), + "objective": float(objective), + "model": run_params.get("model"), + "lr": run_params.get("hparam.lr"), + "batch_size": run_params.get("hparam.batch_size"), + } + ) + +for item in sorted(rows, key=lambda row: row["objective"])[:5]: + print(item) +``` + +## Distribution Traces + +Use distribution traces when histogram shape matters, such as weights, +activations, gradients, or other Aim distribution sequences. + +For automation, keep using explicit structured modes: + +```bash +aimx trace distribution "" --repo --json +aimx trace distribution "" --repo --csv --tail 5 +aimx trace distribution "" --repo --table --head 2 +``` + +JSON shape: + +```json +[ + { + "run": { + "hash": "full-run-hash", + "experiment": "experiment-name", + "name": "run-name" + }, + "distribution": "head/gradients/head.0.bias", + "context": {"kind": "gradients"}, + "count": 2, + "points": [ + { + "step": 300, + "epoch": 0.0, + "bin_edges": [-1.0, 0.0, 1.0], + "weights": [0.0, 2.0] + } + ] + } +] +``` + +For human terminal inspection, omit the output-mode flag: + +```bash +aimx trace distribution "distribution.name != ''" --repo +aimx trace distribution "distribution.name != ''" --repo --step 12300 +``` + +Default distribution output is deterministic, non-interactive text. It lists +matched distribution names, selects the first non-empty series, renders a +current-step histogram, and renders a step-by-bin heatmap. `--step N` selects +the visual histogram step; if the step is absent, `aimx` labels the nearest +tracked step used. `--step` does not filter `--table`, `--csv`, or `--json` +outputs. + ## Images Use images for qualitative checks such as sample predictions, masks, generated @@ -223,6 +630,8 @@ Recommended fields for autoresearch output: - `params`: selected hyperparameters and model identifiers per run. - `metric_summary`: objective metric summaries per run and context. - `trace_evidence`: sampled value arrays for decisive metrics. +- `distribution_evidence`: selected histogram payloads, visual inspection notes, + or structured distribution rows for weight/gradient analysis. - `image_evidence`: image row counts and representative contexts. - `ranking`: best run per objective, objective direction, and tie-breakers. - `regressions`: runs worse than baseline, incomplete runs, missing metrics, or diff --git a/specs/005-distribution-trace-visual/checklists/requirements.md b/specs/005-distribution-trace-visual/checklists/requirements.md new file mode 100644 index 0000000..2e3b797 --- /dev/null +++ b/specs/005-distribution-trace-visual/checklists/requirements.md @@ -0,0 +1,36 @@ +# Specification Quality Checklist: Distribution Trace Visual + +**Purpose**: Validate specification completeness and quality before proceeding to planning +**Created**: 2026-04-30 +**Feature**: [spec.md](../spec.md) + +## Content Quality + +- [x] No implementation details (languages, frameworks, APIs) +- [x] Focused on user value and business needs +- [x] Written for non-technical stakeholders +- [x] All mandatory sections completed + +## Requirement Completeness + +- [x] No [NEEDS CLARIFICATION] markers remain +- [x] Requirements are testable and unambiguous +- [x] Success criteria are measurable +- [x] Success criteria are technology-agnostic (no implementation details) +- [x] All acceptance scenarios are defined +- [x] Edge cases are identified +- [x] Scope is clearly bounded +- [x] Dependencies and assumptions identified + +## Feature Readiness + +- [x] All functional requirements have clear acceptance criteria +- [x] User scenarios cover primary flows +- [x] Feature meets measurable outcomes defined in Success Criteria +- [x] No implementation details leak into specification + +## Notes + +- Validation passed after initial review. The spec deliberately treats command names and flags as user-facing product surface, while avoiding implementation library, module, and code-structure details. +- Implementation verification completed on 2026-04-30. Quickstart sections 3-9 were exercised against the contributor-local `data/` Aim repository where distributions exist; default visual, exact `--step`, nearest-step fallback, `--table`, `--json`, `--csv`, and no-match behavior all exited successfully when histogram data was present. +- Regression verification passed with `uv run pytest tests/unit/test_trace_helpers.py tests/unit/test_trace_distribution_views.py tests/integration/test_trace_command.py tests/contract/test_trace_contract.py -q`, passthrough/missing-dependency checks, and a fresh full-suite run with `uv run pytest -vv --durations=25` (`297 passed, 15 warnings in 34.12s`). diff --git a/specs/005-distribution-trace-visual/contracts/cli-output.md b/specs/005-distribution-trace-visual/contracts/cli-output.md new file mode 100644 index 0000000..5b86998 --- /dev/null +++ b/specs/005-distribution-trace-visual/contracts/cli-output.md @@ -0,0 +1,141 @@ +# CLI Output Contract: `aimx trace distribution` + +**Feature**: `005-distribution-trace-visual` + +This contract defines the observable CLI behavior for distribution trace +visualization and exports. + +## Command Shape + +```text +aimx trace distribution [--repo ] + [--steps start:end] + [--head N] [--tail N] [--every K] + [--step N] + [--width W] [--height H] [--no-color] + [--table | --csv | --json] +``` + +## Owned Options + +| Option | Applies To | Behavior | +|--------|------------|----------| +| `--repo ` | all modes | Uses a local Aim repository root or `.aim` directory. Defaults to the current directory. | +| `--steps start:end` | all modes | Filters distribution points by inclusive tracked step range before sampling and rendering. | +| `--head N` | all modes | Keeps the first `N` points per matched series before rendering or export. | +| `--tail N` | all modes | Keeps the last `N` points per matched series before rendering or export. | +| `--every K` | all modes | Keeps every `K`th point per matched series before rendering or export. | +| `--step N` | default visual mode | Chooses the current-step histogram step; nearest tracked step is used when no exact match exists. | +| `--width W` | default visual mode | Sets preferred chart width when supported by the renderer. | +| `--height H` | default visual mode | Sets preferred chart height when supported by the renderer. | +| `--no-color` | human-readable modes | Disables ANSI styling where applicable. | +| `--table` | explicit table mode | Emits the existing tensor table and no default visual charts. | +| `--csv` | explicit CSV mode | Emits existing CSV histogram rows and no default visual charts. | +| `--json` | explicit JSON mode | Emits existing JSON series payload and no default visual charts. | + +## Query Expression + +The expression is passed to Aim's distribution-query evaluator after existing +short `run.hash` expansion and the documented singular `distribution` alias +normalization. + +Examples: + +```bash +aimx trace distribution "distribution.name != ''" --repo data +aimx trace distribution "distribution.name == ''" --repo data --step +aimx trace distribution "distribution.context.kind == 'weights'" --repo data --json +``` + +Replace `` and `` with values present in your Aim repository; the commands illustrate syntax only. + +## Default Visual Output + +Default output is non-interactive terminal text. It contains these sections in +order: + +1. Distribution name list +2. Selected distribution context and current-step label +3. Current-step histogram +4. Step-by-bin heatmap + +The distribution name list must show every matched distribution name in +collection order. The selected item must be visibly marked. When more than one +series matches, only the selected series is visualized. + +The selected series is the first non-empty matched series after filtering and +sampling. The default selected step is the first available point in that +series. When `--step N` is provided: + +- if `N` is tracked, display `N` +- if `N` is not tracked, display the nearest tracked step +- if two steps are equally close, display the lower step +- always label the actual displayed step + +## Table Output + +`--table` keeps the existing distribution tensor table workflow: + +```text + · · · points + STEP EPOCH TENSOR + 300 0 [1, 1, 0, 1, ...] (64 bins) +``` + +This mode must not include the default distribution name list, histogram, or +heatmap. + +## CSV Output + +`--csv` remains parseable CSV with the existing field names: + +```text +run_hash,experiment,distribution,context,step,epoch,bin_edges,weights +``` + +Each data row represents one distribution point. `bin_edges` and `weights` +remain JSON-encoded arrays inside CSV cells. + +## JSON Output + +`--json` remains parseable JSON with the existing top-level shape: a list of +distribution series. + +Each series contains: + +- `run` +- `distribution` +- `context` +- `count` +- `points` + +Each point contains: + +- `step` +- `epoch` +- `bin_edges` +- `weights` + +## Exit Status + +| Condition | Exit Status | Output | +|-----------|-------------|--------| +| Valid default visual query with one or more non-empty matches | `0` | Distribution list, selected histogram, heatmap | +| Valid table, CSV, or JSON query with matches | `0` | Explicit mode output | +| Valid query with zero matches | `0` | Explicit no-matches message or empty structured output if already defined by that mode | +| Filtering removes all data | `0` | Explicit no-data-in-range message | +| Requested visual step is absent but another point exists | `0` | Nearest tracked step rendered and labeled | +| Missing repository path | `2` | Actionable error on stderr | +| Invalid query expression | `2` | Actionable error on stderr | +| Missing or non-integer `--step` value | `2` | Actionable error on stderr | +| Invalid sampling or step-range option | `2` | Actionable error on stderr | + +## Non-Regression Requirements + +- Metric trace default plot, table, CSV, and JSON outputs remain unchanged. +- Distribution `--table`, `--csv`, and `--json` outputs keep existing field + names and parseability. +- Existing `--steps`, `--head`, `--tail`, `--every`, `--width`, `--height`, and + `--no-color` parsing behavior remains compatible except for the documented + addition of `--step`. +- Commands outside owned `aimx` surfaces continue to delegate to native `aim`. diff --git a/specs/005-distribution-trace-visual/data-model.md b/specs/005-distribution-trace-visual/data-model.md new file mode 100644 index 0000000..dc46d15 --- /dev/null +++ b/specs/005-distribution-trace-visual/data-model.md @@ -0,0 +1,184 @@ +# Phase 1 Data Model: Distribution Trace Visual + +**Feature**: `005-distribution-trace-visual` + +## Trace Distribution Invocation + +Represents one CLI request to inspect distribution traces. + +**Fields**: + +- `target`: literal `distribution` +- `expression`: Aim distribution query expression supplied by the user +- `repo_path`: local repository root or `.aim` path +- `mode`: one of default visual, table, CSV, or JSON +- `step_slice`: optional inclusive step range filter +- `selected_step`: optional visual step requested with `--step N` +- `head`, `tail`, `every`: optional sampling controls +- `width`, `height`: optional display controls for visual output +- `no_color`: boolean terminal styling control + +**Validation Rules**: + +- `expression` must be present for distribution traces. +- `repo_path` must exist and is normalized consistently with existing trace + commands. +- `--step` requires an integer value. +- Sampling and filtering controls are applied before visual selection. +- `selected_step` affects only default visual output; structured export modes + preserve their existing full-series behavior. + +## Distribution Match + +Represents one distribution series returned by the user's expression. + +**Fields**: + +- `run`: run identity with hash, experiment, optional name, and optional + creation time +- `name`: distribution name +- `context`: distribution context key-value mapping +- `points`: ordered list of distribution points + +**Relationships**: + +- One Trace Distribution Invocation produces zero or more Distribution Matches. +- One Distribution Match contains zero or more Distribution Points. +- Distribution Matches form the Distribution Name List in the default visual + output. + +**Validation Rules**: + +- Match order is preserved from the collection pipeline so default selection is + deterministic. +- Empty `points` are allowed but are skipped for selected visual rendering when + a later non-empty match exists. +- Context values are displayed compactly in human-readable output and remain + preserved in structured output. + +## Distribution Point + +Represents one tracked histogram at one step. + +**Fields**: + +- `step`: tracked training step +- `epoch`: optional epoch value +- `bin_edges`: ordered numeric bin edges +- `weights`: ordered numeric histogram weights + +**Relationships**: + +- Belongs to one Distribution Match. +- One selected Distribution Point powers the current-step histogram. +- All displayed points in the selected series power the step-by-bin heatmap. + +**Validation Rules**: + +- `weights` may be all zeros. +- `bin_edges` and `weights` must be paired so the histogram can label bins + coherently. +- Single-point series are valid and produce a single-step heatmap. + +## Distribution Name List + +The ordered human-readable list of matched distribution names shown before the +default visual charts. + +**Fields**: + +- `items`: ordered names and compact context labels +- `selected_index`: index of the distribution selected for visual rendering +- `selected_label`: selected distribution display label + +**Relationships**: + +- Built from Distribution Matches. +- References the Selected Distribution by index and label. + +**Validation Rules**: + +- All matched distribution names are listed. +- The selected item is visibly marked. +- Duplicate names remain distinguishable by context or run label when present. + +## Selected Distribution + +The distribution series rendered by default visual mode. + +**Fields**: + +- `series`: selected Distribution Match +- `selected_point`: resolved Distribution Point for the current-step histogram +- `requested_step`: optional step requested by the user +- `resolved_step`: actual tracked step displayed +- `step_resolution`: exact, nearest-lower, nearest-higher, or default-first + +**Relationships**: + +- Derived from the Distribution Name List and available Distribution Points. +- Supplies data to Histogram View and Heatmap View. + +**Validation Rules**: + +- If `requested_step` exactly matches a point, select that point. +- If `requested_step` does not match, select the point with minimum absolute + distance to the requested value. +- If two points are equally close, select the lower step. +- If no non-empty distribution exists after filtering and sampling, visual + rendering is skipped and the command reports no data. + +## Histogram View + +The current-step chart for the selected distribution. + +**Fields**: + +- `title`: selected distribution name and resolved step +- `x_values`: bin centers or labels derived from bin edges +- `weights`: histogram weights for the selected point +- `step_label`: actual tracked step displayed +- `request_note`: optional note when nearest-step fallback was used + +**Validation Rules**: + +- Must render even when all weights are zero. +- Must label the actual displayed step. +- Must remain deterministic when terminal color is disabled. + +## Heatmap View + +The cross-step chart for the selected distribution. + +**Fields**: + +- `step_values`: ordered tracked steps after filtering and sampling +- `bin_values`: bin labels or centers +- `weight_matrix`: rows or columns of histogram weights aligned with steps and + bins +- `color_scale_label`: textual scale or legend for weight intensity + +**Validation Rules**: + +- Must include every displayed point in the selected distribution. +- Must degrade to a single-step heatmap for single-point series. +- Must not mutate or normalize stored histogram data in the repository. + +## Structured Export Output + +The explicit table, CSV, or JSON output selected by mode flags. + +**Fields**: + +- Existing distribution table fields: step, epoch, tensor summary +- Existing CSV fields: run hash, experiment, distribution, context, step, + epoch, bin edges, weights +- Existing JSON fields: run, distribution, context, count, points with bin + edges and weights + +**Validation Rules**: + +- Explicit structured modes do not include the new default visual name list, + histogram, or heatmap. +- CSV and JSON remain parseable with the existing field names. +- Table mode remains a tensor inspection view. diff --git a/specs/005-distribution-trace-visual/plan.md b/specs/005-distribution-trace-visual/plan.md new file mode 100644 index 0000000..4f86a67 --- /dev/null +++ b/specs/005-distribution-trace-visual/plan.md @@ -0,0 +1,161 @@ +# Implementation Plan: Distribution Trace Visual + +**Branch**: `005-distribution-trace-visual` | **Date**: 2026-04-30 | **Spec**: [spec.md](/Users/blizhan/data/code/github/aimx/specs/005-distribution-trace-visual/spec.md) +**Input**: Feature specification from `/Users/blizhan/data/code/github/aimx/specs/005-distribution-trace-visual/spec.md` + +## Summary + +Change the owned `aimx trace distribution` default mode from a tensor table to +a non-interactive terminal visual summary that mirrors the Aim web +Distributions tab: list all matched distribution names, mark the first +non-empty match as selected, render a current-step histogram, and render a +step-by-bin heatmap for that selected series. Add `--step N` for visual step +selection using nearest tracked-step fallback. Preserve existing `--table`, +`--csv`, and `--json` distribution outputs without intentional schema changes. +The implementation stays read-only, uses existing trace command/data/rendering +boundaries, and introduces no new runtime dependency. + +## Technical Context + +**Language/Version**: Python 3.12 for development, runtime support `>=3.10,<3.13` +**Primary Dependencies**: Python standard library, `numpy>=1.24`, `rich>=13.7`, `plotext>=5.3`, existing Aim SDK usage for owned trace commands; no new runtime dependency planned +**Storage**: Existing local Aim repositories on disk, read-only; distribution histogram points are read from Aim sequence data under `.aim` +**Testing**: pytest unit, integration, and contract suites; integration and contract coverage exercises real Aim distribution queries against `tests/conftest.py`'s `sample_repo_root` (`Path("data")`) when matching sequences exist and skips cleanly otherwise so CI stays green without extra fixtures +**Target Platform**: Terminal-first CLI for local shells, SSH sessions, scripts, and CI on Python-supported platforms +**Project Type**: Single-project Python CLI application +**Performance Goals**: Default visual rendering stays bounded to one command invocation for realistic repositories with multiple matched distributions and many tracked histogram steps (such as training runs logged from TensorBoard imports), remaining readable on a standard terminal width +**Constraints**: Read-only; preserve native Aim passthrough behavior; preserve existing distribution `--table`, `--csv`, and `--json` output contracts; keep default output non-interactive; avoid adding GUI or TUI dependencies +**Scale/Scope**: One owned command path (`trace distribution`), one visual-only option (`--step N`), default distribution visual renderer, help/README updates, and focused unit/integration/contract tests + +## Constitution Check + +*GATE: Must pass before Phase 0 research. Re-check after Phase 1 design.* + +- [x] Safe coexistence: default distribution visual rendering only reads local + Aim distribution sequence data; no normal-path change modifies the + installed `aim` package, replaces the native `aim` executable, or mutates + `.aim` repo data. +- [x] Ownership boundary: `aimx` already owns `aimx trace distribution`; the + plan adds owned default rendering behavior and the owned `--step` option + for that command path only. Native `aim` passthrough remains unchanged. +- [x] Read-only default: all behavior is inspection-only and uses no Aim + mutation APIs. +- [x] CLI-first contract: the default output is deterministic non-interactive + terminal text, while existing `--table`, `--csv`, and `--json` modes + remain available for diagnostics and automation. +- [x] Compatibility plan: design reuses current distribution collection, + filtering, sampling, repo normalization, and trace error handling; tests + cover default visual output plus non-regression for existing modes. + +## Project Structure + +### Documentation (this feature) + +```text +/Users/blizhan/data/code/github/aimx/specs/005-distribution-trace-visual/ +├── plan.md +├── research.md +├── data-model.md +├── quickstart.md +├── contracts/ +│ └── cli-output.md +├── checklists/ +│ └── requirements.md +└── tasks.md # created later by /speckit.tasks +``` + +### Source Code (repository root) + +```text +/Users/blizhan/data/code/github/aimx/ +├── README.md +├── src/aimx/ +│ ├── commands/ +│ │ ├── help.py # update trace distribution usage text +│ │ └── trace.py # parse --step and route default distribution mode +│ ├── aim_bridge/ +│ │ └── metric_stats.py # keep DistributionSeries/Point filtering and sampling helpers +│ └── rendering/ +│ └── trace_views.py # add default distribution visual renderer +└── tests/ + ├── contract/ + │ └── test_trace_contract.py # add distribution output contract coverage + ├── integration/ + │ └── test_trace_command.py # add distribution CLI coverage guarded by repo fixtures + └── unit/ + ├── test_trace_distribution_views.py + └── test_trace_helpers.py +``` + +**Structure Decision**: Keep the existing single-project CLI layout. The trace +command remains the orchestration boundary for parsing, repo normalization, +filtering, sampling, and mode selection. The Aim bridge keeps returning +`DistributionSeries` records. The visual work belongs in +`rendering/trace_views.py` next to existing metric plots and distribution +table/CSV/JSON renderers. + +## Phase 0: Research Summary + +Phase 0 decisions are captured in [research.md](/Users/blizhan/data/code/github/aimx/specs/005-distribution-trace-visual/research.md). Key outcomes: + +- Keep `aimx trace distribution` as the user-facing command and change only its + default mode; no TUI, no new top-level command, and no new selector command. +- Reuse existing `DistributionSeries` / `DistributionPoint` data and filtering + behavior, selecting the first non-empty series after expression matching, + step filtering, and sampling. +- Add `--step N` as a visual-only selector. Exact matches are used directly; + otherwise the nearest tracked step is selected, with lower step winning ties. +- Reuse existing plotting/rendering dependencies. `plotext` is available for + histogram and heatmap-style terminal output, while `rich` can frame labels + and the matched-name list consistently with existing views. +- Preserve `--table`, `--csv`, and `--json` as non-visual modes with unchanged + data shapes. + +## Phase 1: Design Summary + +- Extend `TraceInvocation` with `selected_step: int | None` and parse `--step N` + in `parse_trace_invocation`. Reject missing or non-integer values with exit + code `2` through the existing error path. +- Treat `--step` as relevant to default distribution visual mode. `--table`, + `--csv`, and `--json` keep their current full-series behavior even when + `--step` is supplied. +- Add a distribution visual render path in `_render_distribution_trace`: JSON + and CSV keep current renderers, table keeps the current tensor-table renderer, + and default plot mode calls a new visual renderer. +- Implement a visual selection helper that receives filtered/sampled + `DistributionSeries` records, builds the ordered name list, selects the first + non-empty series, resolves the selected step, and returns a render model for + histogram and heatmap output. +- Implement the default visual renderer with three sections: + - `Distributions`: ordered matched names with a selected marker and compact + context for the selected item. + - `Histogram`: selected distribution name, selected step label, and + Rich-rendered blue-gradient bin-weight histogram for the resolved step. + - `Heatmap`: step-by-bin view for the selected distribution across available + displayed points, using the same web-style blue intensity scale. +- Keep no-match and no-data messages on the existing successful, non-throwing + path. Add specific tests for empty points, single-step series, all-zero + weights, and nearest-step tie behavior. +- Update README/help text so default distribution output is documented as + visual, while tensor-table usage moves behind explicit `--table`. + +## Post-Design Constitution Check + +- [x] Safe coexistence: design reads distribution query results and histogram + arrays only; no installed Aim package, executable, or repository data is + modified. +- [x] Ownership boundary: all new behavior is contained inside the existing + `aimx trace distribution` command path and its documented options. +- [x] Read-only default: visual selection, plotting, and export rendering are + derived from in-memory query results. +- [x] CLI-first contract: [contracts/cli-output.md](/Users/blizhan/data/code/github/aimx/specs/005-distribution-trace-visual/contracts/cli-output.md) + defines default visual output, structured modes, exit statuses, and + non-regression expectations. +- [x] Compatibility: existing trace command tests remain part of validation; + new integration tests query real Aim repositories when distributions exist + and assert no schema change for table, CSV, or JSON modes. + +## Complexity Tracking + +No constitution violations; no exceptional complexity requires justification. +The feature uses existing project boundaries and dependencies. diff --git a/specs/005-distribution-trace-visual/quickstart.md b/specs/005-distribution-trace-visual/quickstart.md new file mode 100644 index 0000000..73e4145 --- /dev/null +++ b/specs/005-distribution-trace-visual/quickstart.md @@ -0,0 +1,155 @@ +# Quickstart: Distribution Trace Visual + +**Feature**: `005-distribution-trace-visual` + +## 1. Prepare The Environment + +```bash +uv sync +``` + +Point `--repo` at an Aim repository root that contains `.aim` metadata. This +guide uses the contributor-local checkout rooted at `data/` (see `AGENTS.md`): + +```text +/Users/blizhan/data/code/github/aimx/data/.aim +``` + +If your checkout does not contain distribution histogram sequences yet, most +CLI sections below still validate `--table`, `--csv`, `--json`, and no-match +behavior; default visualization sections require at least one matched +distribution series. + +## 2. Inspect Available Steps (Optional) + +Pick a tracked step value from your repository before trying `--step`: + +```bash +uv run aimx trace distribution "distribution.name != ''" \ + --repo data --json --head 1 | python - <<'PY' +import json, sys +payload = json.load(sys.stdin) +if not payload: + raise SystemExit("No distribution series matched this repository.") +steps = sorted({point["step"] for series in payload for point in series["points"]}) +print("sample steps:", steps[:5], "... total", len(steps)) +PY +``` + +Use any printed step as `` below. + +## 3. Render The Default Distribution Visual + +```bash +uv run aimx trace distribution "distribution.name != ''" --repo data +``` + +Expected result: + +- exit code `0` when distributions exist (otherwise Aim reports no matches) +- output includes a `Distributions` name list when matches exist +- the first matched distribution is marked as selected +- output labels the current displayed step +- output includes a current-step histogram +- output includes a step-by-bin heatmap +- command does not prompt for keyboard or mouse input + +## 4. Select A Specific Step + +```bash +uv run aimx trace distribution "distribution.name != ''" \ + --repo data --step +``` + +Expected result: + +- exit code `0` +- current-step histogram labels step `` +- heatmap still covers the displayed points for the selected series + +## 5. Select A Nearest Step + +Pick two consecutive tracked steps `LOWER` and `HIGHER` from section 2, choose a +non-tracked integer `REQUESTED` strictly between them, then run: + +```bash +uv run aimx trace distribution "distribution.name != ''" \ + --repo data --step +``` + +Expected result: + +- exit code `0` +- output labels the actual tracked step chosen (nearest tracked step, lower + step wins on ties) +- no traceback is printed + +## 6. Keep Tensor Table Output + +```bash +uv run aimx trace distribution "distribution.name != ''" --repo data --table --head 2 +``` + +Expected result: + +- exit code `0` +- output contains `STEP`, `EPOCH`, and `TENSOR` +- output does not include the default visual histogram or heatmap sections + +## 7. Keep JSON Output + +```bash +uv run aimx trace distribution "distribution.name != ''" --repo data --json --head 1 +``` + +Expected result: + +- valid JSON +- top-level value is a list +- each series contains `run`, `distribution`, `context`, `count`, and `points` +- each point contains `step`, `epoch`, `bin_edges`, and `weights` + +## 8. Keep CSV Output + +```bash +uv run aimx trace distribution "distribution.name != ''" --repo data --csv --head 1 +``` + +Expected result: + +- valid CSV +- header includes `run_hash`, `experiment`, `distribution`, `context`, + `step`, `epoch`, `bin_edges`, and `weights` + +## 9. No-Match Behavior + +```bash +uv run aimx trace distribution "distribution.name == 'missing-distribution'" --repo data +``` + +Expected result: + +- exit code `0` +- output reports no matching distributions +- no traceback is printed + +## 10. Test Commands + +```bash +uv run pytest tests/unit/test_trace_helpers.py tests/unit/test_trace_distribution_views.py -q +uv run pytest tests/integration/test_trace_command.py -q +uv run pytest tests/contract/test_trace_contract.py -q +uv run pytest -q +``` + +## Verification Notes + +- Automated integration and contract suites skip distribution scenarios when the + configured `sample_repo_root` repository has no histogram data, which keeps CI + green without checking in custom fixtures. +- When your local `data/` repository contains distributions, the commands above + provide the same observability checks that previously relied on ad hoc test + directories. +- `uv run` may emit an environment warning about a missing + `aimx-*.dist-info/RECORD` during editable reinstall; commands and tests should + still complete successfully. diff --git a/specs/005-distribution-trace-visual/research.md b/specs/005-distribution-trace-visual/research.md new file mode 100644 index 0000000..97d36a2 --- /dev/null +++ b/specs/005-distribution-trace-visual/research.md @@ -0,0 +1,121 @@ +# Phase 0 Research: Distribution Trace Visual + +**Feature**: `005-distribution-trace-visual` +**Date**: 2026-04-30 + +## Decision: Keep The Existing `trace distribution` Command + +Use `aimx trace distribution ` as the command surface and change +only the default mode for distribution traces. + +**Rationale**: The project already owns the trace distribution command path, +and users already query distributions through Aim-style expressions there. +Changing the default mode keeps the feature discoverable and aligns with the +existing `aimx trace ` behavior, where the unflagged command +is visual and explicit flags select table or structured output. + +**Alternatives considered**: + +- Add a new top-level distribution command: rejected because it duplicates the + existing owned trace surface and fragments query/filtering behavior. +- Add an interactive terminal UI: rejected because the requested behavior is + non-interactive and the constitution prioritizes predictable CLI workflows. +- Add a separate name selector command: rejected for v1 because the matched + name list already gives users the expression targets they need. + +## Decision: Select The First Non-Empty Matched Series By Default + +Default visual mode lists all matched distribution names but renders only the +first non-empty series after expression matching, step filtering, and sampling. + +**Rationale**: This mirrors the Aim web page's default expanded item while +preventing large terminal output when an expression matches many names. The +list of matched names preserves discoverability and tells users how to narrow +their expression when they want another series. + +**Alternatives considered**: + +- Render every matched distribution: rejected because terminal output becomes + noisy when an expression matches many series (for example repositories that + import wide TensorBoard histogram tiles). +- Refuse to render until the user narrows to one name: rejected because it + makes the default command less useful than the web page. +- Select the last matched distribution: rejected because it is less consistent + with the web UI's first expanded item. + +## Decision: Add `--step N` For Visual Step Selection + +Add `--step N` so users can choose the current-step histogram in default visual +mode. Exact tracked steps are used directly. If the requested step is not +tracked, the nearest tracked step is used and the actual displayed step is +labeled. If two tracked steps are equally near, choose the earlier step. + +**Rationale**: The default step should match the web page's first step, but +terminal users often need to inspect a later training step without opening the +web UI. Nearest-step fallback keeps the command forgiving while still being +deterministic. + +**Alternatives considered**: + +- Always use the first tracked step: rejected because it prevents targeted + debugging from the CLI. +- Always use the last tracked step: rejected because it diverges from the web + page's default behavior that motivated the request. +- Require exact step matches only: rejected because users may not know the + repository's tracked step interval. + +## Decision: Keep `--step` Visual-Only + +`--step` affects only default visual rendering. `--table`, `--csv`, and +`--json` continue to emit the filtered/sampled distribution series according +to their existing contracts. + +**Rationale**: Existing table/CSV/JSON behavior is useful precisely because it +exports all available points after filtering and sampling. Applying `--step` to +those modes would be an intentional schema or row-count behavior change and +could surprise scripts. + +**Alternatives considered**: + +- Filter all output modes to the selected step: rejected because it changes + existing export semantics. +- Reject `--step` with non-default modes: rejected because the option can be + harmlessly ignored for compatibility and keeps parser behavior simple. + +## Decision: Reuse Existing Terminal Rendering Dependencies + +Use the current rendering dependencies for default visual output: `plotext` for +histogram/heatmap-style charts and `rich` for labels, list formatting, and +no-color behavior. + +**Rationale**: `plotext` is already used by metric trace plots and provides +histogram, bar, and heatmap/matrix plotting helpers in the local environment. +`rich` is already used for trace tables and query views. Reusing both avoids a +new dependency and keeps output behavior consistent with the rest of `aimx`. + +**Alternatives considered**: + +- Add a dedicated TUI or plotting dependency: rejected because it increases + dependency surface for a static terminal snapshot. +- Hand-roll all plotting with ad hoc string output: rejected because existing + terminal plotting support is already available and tested in trace metrics. +- Generate image files: rejected because the feature should work in shell, SSH, + and CI-style output without file management. + +## Decision: Prefer Local Repositories Like `data/` For Integration Coverage + +Exercise real Aim distribution queries against whatever repository contributors +already mount at `tests/conftest.py`'s `sample_repo_root` (`Path("data")`), +typically `/Users/blizhan/data/code/github/aimx/data/.aim`. + +**Rationale**: Keeping validation anchored on `data/` matches contributor docs, +avoids committing bespoke `.aim` fixtures, and still walks the Aim SDK query path. +pytest skips integration and contract assertions when no distributions exist so +remote CI stays deterministic without shipping histogram data. + +**Alternatives considered**: + +- Require every checkout to ship a curated `.aim` fixture: rejected because it + bloats the repository and duplicates what operators already store locally. +- Rely only on unit fixtures: partially adopted for determinism, but paired with + optional integration coverage whenever local distributions exist. diff --git a/specs/005-distribution-trace-visual/spec.md b/specs/005-distribution-trace-visual/spec.md new file mode 100644 index 0000000..5783972 --- /dev/null +++ b/specs/005-distribution-trace-visual/spec.md @@ -0,0 +1,119 @@ +# Feature Specification: Distribution Trace Visual + +**Feature Branch**: `005-distribution-trace-visual` +**Created**: 2026-04-30 +**Status**: Draft +**Input**: User description: "Modify the current distribution trace command so the default experience more closely matches the Aim web Distributions tab: show which distribution names are available, default to a visual non-interactive output similar to trace metrics, include a current-step histogram and a step-by-bin heatmap, support a step selection option, and preserve existing `--table`, `--csv`, and `--json` outputs." + +## User Scenarios & Testing *(mandatory)* + +### User Story 1 - Inspect Distribution Visually By Default (Priority: P1) + +An Aim user runs the distribution trace command against a local repository and immediately sees a readable visual summary instead of a tensor table. The output shows the matched distribution names and visualizes the first matched distribution in a way that resembles the Aim web Distributions tab. + +**Why this priority**: This is the core requested workflow and closes the gap between the current terminal output and the web UI. + +**Independent Test**: Run the distribution trace command against a repository with multiple distribution series and no explicit output mode; verify that the command exits successfully, lists matched distribution names, marks the first match as selected, and renders both a current-step histogram and a cross-step heatmap for that selected series. + +**Acceptance Scenarios**: + +1. **Given** a local Aim repository containing multiple distributions, **When** the user runs `aimx trace distribution ` without an output-mode flag, **Then** the output lists the matched distribution names and visually renders only the first matched series. +2. **Given** the selected distribution has data from step 300 through later steps, **When** the user runs the default distribution trace command, **Then** the current-step histogram uses the first available step and labels the actual step displayed. +3. **Given** the selected distribution has values across multiple steps, **When** the user runs the default distribution trace command, **Then** the output includes a step-by-bin heatmap that communicates how the distribution changes over the available steps. + +--- + +### User Story 2 - Choose A Display Step Non-Interactively (Priority: P2) + +An Aim user wants to inspect a specific training step from the terminal without opening the web UI or entering an interactive selector. + +**Why this priority**: Step selection makes the visual output useful for targeted debugging while preserving a single-command, script-friendly workflow. + +**Independent Test**: Run the distribution trace command with a requested step that exists and with one that does not exist; verify that the visual output displays the exact step when available and the nearest tracked step otherwise. + +**Acceptance Scenarios**: + +1. **Given** a selected distribution with a tracked step of 12300, **When** the user runs `aimx trace distribution --step 12300`, **Then** the current-step histogram displays step 12300. +2. **Given** a selected distribution without a tracked step of 1000, **When** the user runs `aimx trace distribution --step 1000`, **Then** the current-step histogram displays the nearest tracked step and labels the actual step used. +3. **Given** two tracked steps are equally close to the requested step, **When** the user runs the command with that requested step, **Then** the earlier tracked step is selected and labeled. + +--- + +### User Story 3 - Keep Existing Scriptable Outputs (Priority: P3) + +An Aim user or automation already depends on distribution tensor output in table, CSV, or JSON form and should not lose those workflows when the default output changes. + +**Why this priority**: `aimx` is CLI-first and scriptable; visual defaults must not break existing export and diagnostic usage. + +**Independent Test**: Run distribution trace commands with `--table`, `--csv`, and `--json`; verify that each mode remains available, keeps the same data shape as before, and does not emit the new default visual summary. + +**Acceptance Scenarios**: + +1. **Given** a distribution query with matching data, **When** the user runs the command with `--table`, **Then** the output remains a tensor table with step, epoch, and tensor content. +2. **Given** a distribution query with matching data, **When** the user runs the command with `--csv`, **Then** the output remains parseable CSV with distribution histogram fields. +3. **Given** a distribution query with matching data, **When** the user runs the command with `--json`, **Then** the output remains parseable JSON with each distribution's points, bin edges, and weights. + +### Edge Cases + +- If the expression matches no distributions, the command exits successfully and reports that no matching distributions were found. +- If step filtering or sampling removes all points from every matched series, the command exits successfully and reports that no data remains in the requested step range. +- If a matched distribution has a name and context but no points, it is listed only when relevant to the mode and is not selected for visual rendering unless no non-empty series exist. +- If the selected distribution contains a single tracked step, the histogram is still shown and the heatmap degrades to a single-step view. +- If the selected distribution has empty or all-zero histogram weights at the selected step, the histogram and heatmap still render with clear labels rather than failing. +- If terminal output is captured in a non-interactive environment, the default visual output remains deterministic text output and does not require keyboard or mouse interaction. +- If `--step` is provided with a non-default output mode, the export modes keep their existing full-data behavior; the step selection only affects the default visual view. + +## Constitution Alignment *(mandatory)* + +- **CA-001 Safety & Mutability**: This feature is read-only. It inspects distribution data from existing Aim repositories and must not modify the installed Aim package or mutate `.aim` repository data. +- **CA-002 Ownership Boundary**: `aimx` owns the existing `aimx trace distribution` command path and the new default visual behavior for that path. Native Aim passthrough remains unchanged for all unowned command paths. +- **CA-003 CLI & Output Contract**: The command remains usable in local shells, SSH sessions, captured logs, and CI. It provides a human-readable default visual output and preserves existing machine-readable `--csv` and `--json` outputs. + +## Requirements *(mandatory)* + +### Functional Requirements + +- **FR-001**: The default `aimx trace distribution ` output MUST list all matched distribution names before rendering the selected distribution. +- **FR-002**: The default output MUST identify which matched distribution is selected for visual rendering. +- **FR-003**: When multiple distributions match, the default output MUST select the first matched distribution for visual rendering. +- **FR-004**: The default visual output MUST include a histogram for one selected step of the selected distribution. +- **FR-005**: The default selected step MUST be the first available step after expression matching, step filtering, and sampling are applied. +- **FR-006**: Users MUST be able to request a selected visual step with `--step N` in default visual mode. +- **FR-007**: If the requested visual step is not tracked, the command MUST use the nearest tracked step and label the actual step displayed. +- **FR-008**: The default visual output MUST include a step-by-bin heatmap for the selected distribution across the available displayed steps. +- **FR-008a**: When color is enabled, the default visual output MUST use a Rich-rendered blue gradient inspired by the Aim web Distributions tab rather than the metric plot's high-contrast terminal palette. +- **FR-009**: The default visual output MUST remain non-interactive and MUST complete from a single command invocation. +- **FR-010**: The existing `--table` output mode MUST remain available and keep the existing tensor-table workflow. +- **FR-011**: The existing `--csv` output mode MUST remain available and keep parseable distribution histogram export data. +- **FR-012**: The existing `--json` output mode MUST remain available and keep parseable distribution histogram export data. +- **FR-013**: Existing filtering and sampling options, including step ranges and head, tail, or interval sampling, MUST continue to apply before default visual rendering. +- **FR-014**: Existing display controls such as width, height, and no-color behavior MUST remain accepted where they are already part of the trace command surface. +- **FR-015**: Error and empty-result messages MUST remain concise, non-destructive, and free of traceback-style output for expected user-facing conditions. + +### Key Entities + +- **Distribution Match**: A distribution series returned by the user's expression, including run identity, name, context, and tracked points. +- **Distribution Name List**: The ordered set of matched distribution names shown to the user so they can see what the expression found. +- **Distribution Point**: A tracked step and optional epoch with histogram bin edges and weights. +- **Selected Distribution**: The first non-empty distribution series chosen for default visual rendering. +- **Selected Step**: The tracked step used for the current-step histogram, either the default first step or the nearest tracked step to the user's requested value. +- **Visual Distribution Output**: The non-interactive default output containing the name list, current-step histogram, and step-by-bin heatmap. +- **Structured Export Output**: The table, CSV, or JSON output selected by explicit output-mode flags. + +## Success Criteria *(mandatory)* + +### Measurable Outcomes + +- **SC-001**: A user can run a default distribution trace command against a repository with at least 16 matched distributions and identify the available names and selected visualized name in one command result. +- **SC-002**: For a selected distribution with at least 40 tracked steps and 64 histogram bins, the default command result shows both a current-step histogram and a cross-step heatmap without requiring follow-up interaction. +- **SC-003**: A requested visual step that does not exactly exist resolves to a labeled tracked step in 100% of valid default visual runs. +- **SC-004**: Existing `--table`, `--csv`, and `--json` distribution commands continue to produce parseable output with zero intentional schema changes. +- **SC-005**: Expected empty-result and invalid-step-range conditions complete without repository mutation and without traceback-style output. + +## Assumptions + +- Users already know how to narrow distribution results with the existing expression syntax when they want a distribution other than the first match. +- The first matched distribution is an acceptable default selection because it mirrors the web UI's default expanded item behavior. +- The default visual output is intended as a static terminal snapshot, not as an interactive terminal UI. +- Step selection affects only the default visual view; structured exports remain full-data outputs controlled by expression, filtering, and sampling options. +- Developers may validate multi-name distribution visuals against any local Aim repository that contains matching distribution sequences (for example the repository rooted at `data/` described in contributor docs); CI keeps reliability via deterministic unit fixtures while integration coverage skips when no distributions exist. diff --git a/specs/005-distribution-trace-visual/tasks.md b/specs/005-distribution-trace-visual/tasks.md new file mode 100644 index 0000000..1a82dad --- /dev/null +++ b/specs/005-distribution-trace-visual/tasks.md @@ -0,0 +1,241 @@ +# Tasks: Distribution Trace Visual + +**Input**: Design documents from `/Users/blizhan/data/code/github/aimx/specs/005-distribution-trace-visual/` +**Prerequisites**: plan.md, spec.md, research.md, data-model.md, contracts/cli-output.md, quickstart.md + +**Tests**: Test tasks are included because the feature changes an owned CLI +default, adds a new CLI option, and must preserve existing output contracts, +read-only behavior, safe failure modes, and native Aim passthrough boundaries. + +**Organization**: Tasks are grouped by user story so each story can be +implemented and tested as an independently useful increment. + +## Format: `[ID] [P?] [Story] Description` + +- **[P]**: Can run in parallel because it touches different files or depends + only on completed foundation work +- **[Story]**: Maps task to a user story (`US1`, `US2`, `US3`) +- Every task includes exact repository-relative file paths + +## Phase 1: Setup (Shared Infrastructure) + +**Purpose**: Prepare distribution-specific test helpers and validation entry +points without changing runtime behavior yet. + +- [X] T001 Create reusable multi-step `DistributionSeries` and `DistributionPoint` fixture helpers in `tests/unit/test_trace_distribution_views.py` +- [X] T002 [P] Add distribution integration helpers guarded by repository fixtures in `tests/integration/test_trace_command.py` + +--- + +## Phase 2: Foundational (Blocking Prerequisites) + +**Purpose**: Add shared parser and visual-selection primitives required by the +default visual output and `--step` behavior. + +**Critical**: No user story work should begin until this phase is complete. + +- [X] T003 [P] Add parser unit tests for accepted `--step`, missing `--step` value, and non-integer `--step` value in `tests/unit/test_trace_helpers.py` +- [X] T004 Extend `TraceInvocation` and `parse_trace_invocation()` with `selected_step: int | None` and `--step N` validation in `src/aimx/commands/trace.py` +- [X] T005 [P] Add unit tests for first non-empty distribution selection, default first point selection, no non-empty series handling, single-step series, and all-zero weights in `tests/unit/test_trace_distribution_views.py` +- [X] T006 Implement distribution visual selection helpers for selected series, selected point, and histogram/heatmap render inputs in `src/aimx/rendering/trace_views.py` +- [X] T007 Run `uv run pytest tests/unit/test_trace_helpers.py tests/unit/test_trace_distribution_views.py -q` and fix foundational failures in `src/aimx/commands/trace.py` and `src/aimx/rendering/trace_views.py` + +**Checkpoint**: Parser and visual-selection primitives are ready; user story implementation can start. + +--- + +## Phase 3: User Story 1 - Inspect Distribution Visually By Default (Priority: P1) MVP + +**Goal**: Users can run `aimx trace distribution ` without an +output-mode flag and see matched distribution names plus a default histogram +and heatmap for the first selected series. + +**Independent Test**: Run `uv run aimx trace distribution "distribution.name != ''" --repo data` +and confirm the output lists all matched names, marks the selected name, labels +the displayed step, shows a current-step histogram, and shows a step-by-bin +heatmap without prompting for interaction (skip manual verification when your +checkout has no histogram data). + +### Tests for User Story 1 + +- [X] T008 [P] [US1] Add renderer unit tests for default visual output sections, selected-name marker, selected context, default first-step label, histogram label, and heatmap label in `tests/unit/test_trace_distribution_views.py` +- [X] T009 [P] [US1] Add integration tests for default `trace distribution "distribution.name != ''" --repo data` visual output in `tests/integration/test_trace_command.py` +- [X] T010 [P] [US1] Add contract tests for default distribution visual output and no-interaction deterministic text output in `tests/contract/test_trace_contract.py` + +### Implementation for User Story 1 + +- [X] T011 [US1] Implement `render_distribution_visual()` with `Distributions`, selected step, histogram, and heatmap sections in `src/aimx/rendering/trace_views.py` +- [X] T012 [US1] Route distribution plot/default mode to `render_distribution_visual()` while preserving JSON, CSV, and table branches in `src/aimx/commands/trace.py` +- [X] T013 [US1] Preserve existing no-match and no-data-in-step-range messages for distribution default mode in `src/aimx/commands/trace.py` +- [X] T014 [US1] Run `uv run pytest tests/unit/test_trace_distribution_views.py tests/integration/test_trace_command.py tests/contract/test_trace_contract.py -q` and fix US1 failures in `src/aimx/rendering/trace_views.py` and `src/aimx/commands/trace.py` + +**Checkpoint**: User Story 1 is fully functional and independently testable. + +--- + +## Phase 4: User Story 2 - Choose A Display Step Non-Interactively (Priority: P2) + +**Goal**: Users can pass `--step N` to choose the current-step histogram in the +default visual output, with deterministic nearest-step fallback. + +**Independent Test**: Run `uv run aimx trace distribution "distribution.name != ''" --repo data --step ` +and confirm `` is displayed, then run with a non-tracked step +between two neighbors and confirm the output labels the actual nearest tracked +step. + +### Tests for User Story 2 + +- [X] T015 [P] [US2] Add renderer unit tests for exact step selection, nearest lower step selection, nearest higher step selection, and lower-step tie resolution in `tests/unit/test_trace_distribution_views.py` +- [X] T016 [P] [US2] Add integration tests for dynamic `--step` selection, nearest-step fallback, and labeled actual step output with `--repo data` in `tests/integration/test_trace_command.py` +- [X] T017 [P] [US2] Add contract tests for missing `--step` value and non-integer `--step` exit-code `2` errors in `tests/contract/test_trace_contract.py` + +### Implementation for User Story 2 + +- [X] T018 [US2] Pass `invocation.selected_step` into the default distribution visual renderer from `_render_distribution_trace()` in `src/aimx/commands/trace.py` +- [X] T019 [US2] Implement exact, nearest, and lower-tie selected-step resolution with actual-step labeling in `src/aimx/rendering/trace_views.py` +- [X] T020 [US2] Ensure selected-step request notes are readable with and without ANSI color in `src/aimx/rendering/trace_views.py` +- [X] T021 [US2] Run `uv run pytest tests/unit/test_trace_helpers.py tests/unit/test_trace_distribution_views.py tests/integration/test_trace_command.py tests/contract/test_trace_contract.py -q` and fix US2 failures in `src/aimx/commands/trace.py` and `src/aimx/rendering/trace_views.py` + +**Checkpoint**: User Stories 1 and 2 both work independently. + +--- + +## Phase 5: User Story 3 - Keep Existing Scriptable Outputs (Priority: P3) + +**Goal**: Existing `--table`, `--csv`, and `--json` distribution workflows keep +their current tensor/export behavior and do not emit the new default visual +sections. + +**Independent Test**: Run distribution trace commands with `--table`, `--csv`, +and `--json` against `data`; confirm each output remains parseable or +readable in its existing shape and excludes the default visual histogram and +heatmap sections. + +### Tests for User Story 3 + +- [X] T022 [P] [US3] Add unit tests proving `render_distribution_table()`, `render_distribution_csv()`, and `render_distribution_json()` keep their existing tensor, CSV, and JSON fields in `tests/unit/test_trace_distribution_views.py` +- [X] T023 [P] [US3] Add integration tests for distribution `--table`, `--csv`, and `--json` against `data`, including `--step` supplied with each explicit mode, in `tests/integration/test_trace_command.py` +- [X] T024 [P] [US3] Add contract tests proving explicit distribution modes exclude default visual sections and remain parseable in `tests/contract/test_trace_contract.py` + +### Implementation for User Story 3 + +- [X] T025 [US3] Ensure `_render_distribution_trace()` branches `--json`, `--csv`, and `--table` before default visual rendering in `src/aimx/commands/trace.py` +- [X] T026 [US3] Preserve `render_distribution_table()`, `render_distribution_csv()`, and `render_distribution_json()` output schemas while adding visual rendering in `src/aimx/rendering/trace_views.py` +- [X] T027 [US3] Ensure `--step` does not filter table, CSV, or JSON rows in explicit distribution modes in `src/aimx/commands/trace.py` +- [X] T028 [US3] Run `uv run pytest tests/unit/test_trace_distribution_views.py tests/integration/test_trace_command.py tests/contract/test_trace_contract.py -q` and fix US3 failures in `src/aimx/commands/trace.py` and `src/aimx/rendering/trace_views.py` + +**Checkpoint**: All user stories are independently functional. + +--- + +## Phase 6: Polish & Cross-Cutting Concerns + +**Purpose**: Complete discoverability, safety validation, and full regression +coverage across the feature. + +- [X] T029 [P] Update trace distribution help text with default visual behavior, `--step`, and explicit `--table` tensor usage in `src/aimx/commands/help.py` +- [X] T030 [P] Update README distribution examples so default output is visual and tensor output uses `--table` in `README.md` +- [X] T031 [P] Update quickstart verification notes if implementation behavior differs from planned examples in `specs/005-distribution-trace-visual/quickstart.md` +- [X] T032 Run quickstart sections 3-9 manually and record any deviations in `specs/005-distribution-trace-visual/quickstart.md` +- [X] T033 Run passthrough and owned-command regression tests with `uv run pytest tests/contract/test_cli_contract.py tests/integration/test_missing_native_aim.py tests/integration/test_missing_python_aim_package.py -q` and fix regressions in `src/aimx/router.py`, `src/aimx/cli.py`, or `src/aimx/commands/trace.py` +- [X] T034 Run the full suite with `uv run pytest -q` and fix any regressions in touched files under `src/aimx/` and `tests/` +- [X] T035 Update final implementation verification notes in `specs/005-distribution-trace-visual/checklists/requirements.md` + +--- + +## Dependencies & Execution Order + +### Phase Dependencies + +- **Phase 1 Setup**: No dependencies; can start immediately. +- **Phase 2 Foundational**: Depends on Phase 1; blocks every user story. +- **Phase 3 US1**: Depends on Phase 2; MVP scope. +- **Phase 4 US2**: Depends on Phase 2 and the default visual renderer from US1. +- **Phase 5 US3**: Depends on Phase 2 and can be validated after default visual routing exists. +- **Phase 6 Polish**: Depends on whichever user stories are included in the delivery. + +### User Story Dependencies + +- **US1 (P1)**: First independently valuable slice; no dependency on US2 or US3. +- **US2 (P2)**: Extends the default visual output with requested-step selection; depends on US1's visual renderer but remains independently testable after US1. +- **US3 (P3)**: Protects structured export behavior; can be worked alongside US1/US2 with coordination around `src/aimx/commands/trace.py`. + +### Within Each User Story + +- Write tests first and confirm they fail for the missing behavior. +- Parser and selection helpers before renderer routing. +- Renderer behavior before CLI integration assertions. +- Story-specific pytest command before moving to the next priority. + +--- + +## Parallel Opportunities + +- T002 can run in parallel with T001 because it touches a different test file. +- T003 and T005 can run in parallel because they touch different behavior areas and no runtime implementation files. +- T008, T009, and T010 can run in parallel for US1 test coverage. +- T015, T016, and T017 can run in parallel for US2 test coverage. +- T022, T023, and T024 can run in parallel for US3 test coverage. +- T029, T030, and T031 can run in parallel during polish because they touch different documentation files. + +--- + +## Parallel Example: User Story 1 + +```text +Task: "T008 [P] [US1] Add renderer unit tests for default visual output sections, selected-name marker, selected context, default first-step label, histogram label, and heatmap label in tests/unit/test_trace_distribution_views.py" +Task: "T009 [P] [US1] Add integration tests for default `trace distribution \"distribution.name != ''\" --repo data` visual output in tests/integration/test_trace_command.py" +Task: "T010 [P] [US1] Add contract tests for default distribution visual output and no-interaction deterministic text output in tests/contract/test_trace_contract.py" +``` + +## Parallel Example: User Story 2 + +```text +Task: "T015 [P] [US2] Add renderer unit tests for exact step selection, nearest lower step selection, nearest higher step selection, and lower-step tie resolution in tests/unit/test_trace_distribution_views.py" +Task: "T016 [P] [US2] Add integration tests for `--step` selection, nearest-step fallback, and labeled actual step output with `--repo data` in tests/integration/test_trace_command.py" +Task: "T017 [P] [US2] Add contract tests for missing `--step` value and non-integer `--step` exit-code `2` errors in tests/contract/test_trace_contract.py" +``` + +## Parallel Example: User Story 3 + +```text +Task: "T022 [P] [US3] Add unit tests proving `render_distribution_table()`, `render_distribution_csv()`, and `render_distribution_json()` keep their existing tensor, CSV, and JSON fields in tests/unit/test_trace_distribution_views.py" +Task: "T023 [P] [US3] Add integration tests for distribution `--table`, `--csv`, and `--json` against `data`, including `--step` supplied with each explicit mode, in tests/integration/test_trace_command.py" +Task: "T024 [P] [US3] Add contract tests proving explicit distribution modes exclude default visual sections and remain parseable in tests/contract/test_trace_contract.py" +``` + +--- + +## Implementation Strategy + +### MVP First (User Story 1 Only) + +1. Complete Phase 1 setup. +2. Complete Phase 2 foundational parser and visual-selection helpers. +3. Complete Phase 3 US1. +4. Stop and validate `uv run aimx trace distribution "distribution.name != ''" --repo data`. +5. Run US1 unit, integration, and contract tests before adding `--step` behavior. + +### Incremental Delivery + +1. US1: deliver default distribution name list, selected histogram, and heatmap. +2. US2: add `--step` exact and nearest-step visual selection. +3. US3: protect explicit table, CSV, and JSON export behavior. +4. Polish: update docs, run quickstart, and run full regression suite. + +### Multi-Developer Coordination + +- One developer owns `src/aimx/commands/trace.py` during parser and routing tasks to avoid conflicts. +- One developer can own `src/aimx/rendering/trace_views.py` and `tests/unit/test_trace_distribution_views.py`. +- One developer can own integration and contract tests in `tests/integration/test_trace_command.py` and `tests/contract/test_trace_contract.py`. +- Documentation tasks can proceed after the CLI shape is stable. + +--- + +## Notes + +- `[P]` means the task can be parallelized only after its stated phase dependencies are satisfied. +- Story labels map directly to the spec user stories. +- Keep the command read-only; do not call Aim mutation APIs such as `run.set`, `track`, artifact logging, migration, or repair operations. +- Preserve existing metric trace, distribution table, distribution CSV, distribution JSON, and native Aim passthrough contracts throughout implementation. +- Commit after each phase or a small coherent task group when using the git hook workflow. diff --git a/src/aimx/__init__.py b/src/aimx/__init__.py index 4c7823b..11b937b 100644 --- a/src/aimx/__init__.py +++ b/src/aimx/__init__.py @@ -1,3 +1,3 @@ __all__ = ["__version__"] -__version__ = "0.3.2" +__version__ = "0.3.3" diff --git a/src/aimx/aim_bridge/metric_stats.py b/src/aimx/aim_bridge/metric_stats.py index 6dc6864..44cc4f3 100644 --- a/src/aimx/aim_bridge/metric_stats.py +++ b/src/aimx/aim_bridge/metric_stats.py @@ -3,6 +3,7 @@ import contextlib import datetime as dt import io +import tokenize from dataclasses import dataclass from pathlib import Path from typing import Any @@ -53,6 +54,26 @@ def max(self) -> tuple[float, int]: return (float(self.values[idx]), int(self.steps[idx])) +@dataclass(frozen=True) +class DistributionPoint: + step: int + epoch: float | None + weights: np.ndarray + bin_edges: np.ndarray + + +@dataclass +class DistributionSeries: + run: RunMeta + name: str + context: dict[str, Any] + points: list[DistributionPoint] + + @property + def count(self) -> int: + return len(self.points) + + def _extract_run_meta(run: Any) -> RunMeta: creation_time = getattr(run, "creation_time", None) if creation_time is None: @@ -233,6 +254,97 @@ def _accessor() -> Any: return rows +def _normalize_distribution_query_expression(expression: str) -> str: + """Alias documented ``distribution`` queries to Aim's ``distributions`` variable.""" + tokens: list[tokenize.TokenInfo] = [] + previous_significant_token = "" + try: + for token in tokenize.generate_tokens(io.StringIO(expression).readline): + if ( + token.type == tokenize.NAME + and token.string == "distribution" + and previous_significant_token != "." + ): + token = tokenize.TokenInfo( + token.type, + "distributions", + token.start, + token.end, + token.line, + ) + tokens.append(token) + if token.type not in { + tokenize.COMMENT, + tokenize.DEDENT, + tokenize.ENDMARKER, + tokenize.INDENT, + tokenize.NEWLINE, + tokenize.NL, + }: + previous_significant_token = token.string + except tokenize.TokenError: + return expression + + return tokenize.untokenize(tokens) + + +def collect_distribution_series(expression: str, repo_path: Path) -> list[DistributionSeries]: + """Run an Aim distribution query and return flat ``DistributionSeries`` records.""" + from aimx.aim_bridge.hash_resolver import resolve_hash_prefixes + + expression = resolve_hash_prefixes(expression, repo_path) + expression = _normalize_distribution_query_expression(expression) + + try: + from aim import Repo + from aim.sdk.types import QueryReportMode + except ModuleNotFoundError as error: + raise RuntimeError( + "`aimx` requires the Python `aim` package in the current environment." + ) from error + + repo = Repo(str(repo_path)) + results: list[DistributionSeries] = [] + + stderr_buf = io.StringIO() + with contextlib.redirect_stderr(stderr_buf): + query_result = repo.query_distributions( + expression, report_mode=QueryReportMode.DISABLED + ) + for run_collection in query_result.iter_runs(): + for distribution in run_collection: + run_meta = _extract_run_meta(distribution.run) + try: + steps, (values, epochs, _timestamps) = distribution.data.items_list() + except ValueError: + steps, values, epochs = [], [], [] + + points: list[DistributionPoint] = [] + for idx, value in enumerate(values): + step_value = int(steps[idx]) + epoch_value = float(epochs[idx]) if idx < len(epochs) else None + weights, bin_edges = value.to_np_histogram() + points.append( + DistributionPoint( + step=step_value, + epoch=epoch_value, + weights=np.array(weights, dtype=float), + bin_edges=np.array(bin_edges, dtype=float), + ) + ) + + results.append( + DistributionSeries( + run=run_meta, + name=distribution.name, + context=distribution.context.to_dict(), + points=points, + ) + ) + + return results + + def subsample(series: MetricSeries, *, head: int | None, tail: int | None, every: int | None) -> MetricSeries: """Return a new MetricSeries with points filtered by head/tail/every.""" n = len(series.values) @@ -258,6 +370,48 @@ def subsample(series: MetricSeries, *, head: int | None, tail: int | None, every ) +def filter_distribution_by_step_range( + series: DistributionSeries, + start: int | None, + end: int | None, +) -> DistributionSeries: + """Return a new ``DistributionSeries`` filtered by inclusive step bounds.""" + points = series.points + if start is not None: + points = [point for point in points if point.step >= start] + if end is not None: + points = [point for point in points if point.step <= end] + return DistributionSeries( + run=series.run, + name=series.name, + context=series.context, + points=points, + ) + + +def subsample_distribution( + series: DistributionSeries, + *, + head: int | None, + tail: int | None, + every: int | None, +) -> DistributionSeries: + """Return a new ``DistributionSeries`` filtered by head/tail/every.""" + points = series.points + if head is not None: + points = points[:head] + if tail is not None: + points = points[-tail:] + if every is not None and every > 1: + points = points[::every] + return DistributionSeries( + run=series.run, + name=series.name, + context=series.context, + points=points, + ) + + def parse_epoch_slice(s: str) -> tuple[float | None, float | None]: """Parse a ``start:end`` slice string into inclusive float bounds for epoch filtering. diff --git a/src/aimx/commands/help.py b/src/aimx/commands/help.py index 309368f..6eedb9e 100644 --- a/src/aimx/commands/help.py +++ b/src/aimx/commands/help.py @@ -24,15 +24,20 @@ def render_help() -> str: " Example: aimx query images \"images\" --repo data --epochs 10:50", " Example: aimx query images \"images\" --repo data --head 10", " Example: aimx query params \"run.experiment=='cloud-segmentation'\" --repo data --param hparam.lr", - " trace Plot a metric's time-series from a local Aim repository", + " trace Plot metric time-series or distribution histograms from a local Aim repository", " Usage: aimx trace [--repo ]", + " Usage: aimx trace distribution [--repo ]", " Options: --table --csv --json", " --steps start:end (e.g. --steps 100:500, :50, 100:)", + " --step N (distribution visual mode only; nearest tracked step)", " --head N --tail N --every K", " --width W --height H --no-color", " Repo defaults to the current directory.", " Short run hashes in the expression are transparently expanded.", " Example: aimx trace \"metric.name=='loss'\" --repo data --steps 100:500", + " Example: aimx trace distribution \"distribution.name != ''\" --repo data --step 12300", + " Example: aimx trace distribution \"distribution.name=='weights'\" --repo data --table", + " Example: aimx trace distribution \"distribution.name=='weights'\" --repo data --json", "", "All other commands are delegated to native `aim`.", ] diff --git a/src/aimx/commands/trace.py b/src/aimx/commands/trace.py index 6501484..bc014dd 100644 --- a/src/aimx/commands/trace.py +++ b/src/aimx/commands/trace.py @@ -1,17 +1,28 @@ from __future__ import annotations import sys +from collections.abc import Callable from dataclasses import dataclass from pathlib import Path -from typing import Literal +from typing import Any, Literal from aimx.commands.query import QueryCommandResult, normalize_repo_path _MODES = {"plot", "table", "csv", "json"} +@dataclass(frozen=True) +class _TracePipeline: + collect: Callable[[str, Path], list[Any]] + filter_by_step_range: Callable[[Any, int | None, int | None], Any] + subsample: Callable[..., Any] + render: Callable[[list[Any], "TraceInvocation", bool], str] + no_matches_message: str + + @dataclass(frozen=True) class TraceInvocation: + target: Literal["metrics", "distribution"] expression: str repo_path: Path mode: Literal["plot", "table", "csv", "json"] = "plot" @@ -22,18 +33,32 @@ class TraceInvocation: height: int | None = None no_color: bool = False step_slice: str | None = None + selected_step: int | None = None def parse_trace_invocation(args: list[str]) -> TraceInvocation: if len(args) < 1: raise ValueError( - "Usage: aimx trace [--repo ] [--table|--csv|--json]" + "Usage: aimx trace [distribution] [--repo ] [--table|--csv|--json]" " [--steps start:end] [--head N] [--tail N] [--every K]" " [--width W] [--height H] [--no-color]" ) - expression = args[0] - rest = args[1:] + target: Literal["metrics", "distribution"] = "metrics" + expression: str | None = None + rest = args + if args[0] == "distribution": + target = "distribution" + if len(args) < 2: + raise ValueError( + "Usage: aimx trace distribution [--repo ] [--table|--csv|--json]" + " [--steps start:end] [--head N] [--tail N] [--every K] [--no-color]" + ) + expression = args[1] + rest = args[2:] + else: + expression = args[0] + rest = args[1:] mode: Literal["plot", "table", "csv", "json"] = "plot" repo_value = "." @@ -44,6 +69,7 @@ def parse_trace_invocation(args: list[str]) -> TraceInvocation: height: int | None = None no_color = False step_slice: str | None = None + selected_step: int | None = None index = 0 while index < len(rest): @@ -70,6 +96,14 @@ def parse_trace_invocation(args: list[str]) -> TraceInvocation: raise ValueError("Missing value for --steps.") step_slice = rest[index + 1] index += 2 + elif token == "--step": + if index + 1 >= len(rest): + raise ValueError("Missing value for --step.") + try: + selected_step = int(rest[index + 1]) + except ValueError: + raise ValueError(f"--step requires an integer, got: {rest[index + 1]}") + index += 2 elif token == "--head": if index + 1 >= len(rest): raise ValueError("Missing value for --head.") @@ -118,6 +152,7 @@ def parse_trace_invocation(args: list[str]) -> TraceInvocation: raise ValueError(f"Unsupported trace option: {token}") return TraceInvocation( + target=target, expression=expression, repo_path=Path(repo_value), mode=mode, @@ -128,72 +163,154 @@ def parse_trace_invocation(args: list[str]) -> TraceInvocation: height=height, no_color=no_color, step_slice=step_slice, + selected_step=selected_step, ) -def run_trace_command(args: list[str]) -> QueryCommandResult: - try: - invocation = parse_trace_invocation(args) - normalized_repo_path = normalize_repo_path(invocation.repo_path) - except ValueError as error: - return QueryCommandResult(exit_status=2, error_message=str(error)) +def _render_distribution_trace( + series_list: list[Any], + invocation: TraceInvocation, + no_color: bool, +) -> str: + from aimx.rendering.trace_views import ( + render_distribution_csv, + render_distribution_json, + render_distribution_table, + render_distribution_visual, + ) - is_tty = sys.stdout.isatty() - effective_no_color = invocation.no_color or not is_tty + if invocation.mode == "json": + return render_distribution_json(series_list) + if invocation.mode == "csv": + return render_distribution_csv(series_list) + if invocation.mode == "table": + return render_distribution_table(series_list, no_color=no_color) + return render_distribution_visual( + series_list, + selected_step=invocation.selected_step, + width=invocation.width, + height=invocation.height, + no_color=no_color, + ) - try: + +def _render_metric_trace( + series_list: list[Any], + invocation: TraceInvocation, + no_color: bool, +) -> str: + from aimx.rendering.trace_views import ( + render_csv, + render_plot, + render_trace_json, + render_trace_table, + ) + + if invocation.mode == "json": + return render_trace_json(series_list) + if invocation.mode == "csv": + return render_csv(series_list) + if invocation.mode == "table": + return render_trace_table(series_list, no_color=no_color) + return render_plot( + series_list, + width=invocation.width, + height=invocation.height, + ) + + +def _build_trace_pipeline( + target: Literal["metrics", "distribution"], +) -> _TracePipeline: + if target == "distribution": from aimx.aim_bridge.metric_stats import ( - collect_metric_series, - filter_by_step_range, - parse_step_slice, - subsample, + collect_distribution_series, + filter_distribution_by_step_range, + subsample_distribution, ) - from aimx.rendering.trace_views import ( - render_csv, - render_plot, - render_trace_json, - render_trace_table, + + return _TracePipeline( + collect=collect_distribution_series, + filter_by_step_range=filter_distribution_by_step_range, + subsample=subsample_distribution, + render=_render_distribution_trace, + no_matches_message="No matching distributions found.", ) - series_list = collect_metric_series(invocation.expression, normalized_repo_path) + from aimx.aim_bridge.metric_stats import ( + collect_metric_series, + filter_by_step_range, + subsample, + ) - if not series_list: - return QueryCommandResult(exit_status=0, output="No matching metrics found.") + return _TracePipeline( + collect=collect_metric_series, + filter_by_step_range=filter_by_step_range, + subsample=subsample, + render=_render_metric_trace, + no_matches_message="No matching metrics found.", + ) - # Step range filter is a hard constraint applied before density subsampling - if invocation.step_slice is not None: - step_start, step_end = parse_step_slice(invocation.step_slice) - series_list = [filter_by_step_range(s, step_start, step_end) for s in series_list] - # Drop empty series so they don't clutter plots - series_list = [s for s in series_list if s.count > 0] - if not series_list: - return QueryCommandResult(exit_status=0, output="No data in the requested step range.") +def _execute_trace_pipeline( + invocation: TraceInvocation, + normalized_repo_path: Path, + pipeline: _TracePipeline, + *, + no_color: bool, +) -> QueryCommandResult: + from aimx.aim_bridge.metric_stats import parse_step_slice - # Density subsampling for visualisation - needs_sample = any( - x is not None for x in (invocation.head, invocation.tail, invocation.every) - ) - if needs_sample: - series_list = [ - subsample(s, head=invocation.head, tail=invocation.tail, every=invocation.every) - for s in series_list - ] - - if invocation.mode == "json": - output = render_trace_json(series_list) - elif invocation.mode == "csv": - output = render_csv(series_list) - elif invocation.mode == "table": - output = render_trace_table(series_list, no_color=effective_no_color) - else: - output = render_plot( - series_list, - width=invocation.width, - height=invocation.height, + series_list = pipeline.collect(invocation.expression, normalized_repo_path) + if not series_list: + return QueryCommandResult(exit_status=0, output=pipeline.no_matches_message) + + if invocation.step_slice is not None: + step_start, step_end = parse_step_slice(invocation.step_slice) + series_list = [ + pipeline.filter_by_step_range(series, step_start, step_end) + for series in series_list + ] + series_list = [series for series in series_list if series.count > 0] + + if not series_list: + return QueryCommandResult(exit_status=0, output="No data in the requested step range.") + + needs_sample = any(x is not None for x in (invocation.head, invocation.tail, invocation.every)) + if needs_sample: + series_list = [ + pipeline.subsample( + series, + head=invocation.head, + tail=invocation.tail, + every=invocation.every, ) + for series in series_list + ] + + return QueryCommandResult( + exit_status=0, + output=pipeline.render(series_list, invocation, no_color), + ) + - return QueryCommandResult(exit_status=0, output=output) +def run_trace_command(args: list[str]) -> QueryCommandResult: + try: + invocation = parse_trace_invocation(args) + normalized_repo_path = normalize_repo_path(invocation.repo_path) + except ValueError as error: + return QueryCommandResult(exit_status=2, error_message=str(error)) + + is_tty = sys.stdout.isatty() + effective_no_color = invocation.no_color or not is_tty + + try: + return _execute_trace_pipeline( + invocation, + normalized_repo_path, + _build_trace_pipeline(invocation.target), + no_color=effective_no_color, + ) except RuntimeError as error: return QueryCommandResult(exit_status=2, error_message=str(error)) diff --git a/src/aimx/rendering/trace_views.py b/src/aimx/rendering/trace_views.py index fd9eed8..e29f057 100644 --- a/src/aimx/rendering/trace_views.py +++ b/src/aimx/rendering/trace_views.py @@ -6,14 +6,35 @@ import json import math import shutil +from dataclasses import dataclass from typing import Any from rich.console import Console +from rich.markup import escape from rich.table import Table +from rich.text import Text -from aimx.aim_bridge.metric_stats import MetricSeries, RunMeta +from aimx.aim_bridge.metric_stats import DistributionSeries, MetricSeries from aimx.rendering import colors +_DISTRIBUTION_BLOCKS = "▁▂▃▄▅▆▇█" +_DISTRIBUTION_BLUE_STYLES = ( + "#dbeafe", + "#bfdbfe", + "#93c5fd", + "#60a5fa", + "#3b82f6", + "#2563eb", + "#1d4ed8", + "#1e40af", +) +_DISTRIBUTION_ZERO_STYLE = "#334155" +_DISTRIBUTION_HEADER_STYLE = "#93c5fd" +_DISTRIBUTION_DIM_STYLE = "#64748b" +_DISTRIBUTION_RULE_STYLE = "#1e3a8a" +_DISTRIBUTION_MARKER_STYLE = "#2563eb bold" +_DISTRIBUTION_SELECTED_STYLE = "bold white" + def _short_hash(h: str) -> str: return h[:8] @@ -25,6 +46,12 @@ def _fmt_context(ctx: dict[str, Any]) -> str: return " ".join(f"{k}={v}" for k, v in sorted(ctx.items())) +def _fmt_context_for_visual(ctx: dict[str, Any]) -> str: + if not ctx: + return "" + return ", ".join(f"{k}={json.dumps(v)}" for k, v in sorted(ctx.items())) + + def _series_label(series: MetricSeries) -> str: parts = [_short_hash(series.run.hash)] if series.run.experiment: @@ -38,6 +65,221 @@ def _series_label(series: MetricSeries) -> str: return " · ".join(parts) +def _distribution_series_label(series: DistributionSeries) -> str: + parts = [_short_hash(series.run.hash)] + if series.run.experiment: + parts.append(series.run.experiment) + elif series.run.name: + parts.append(series.run.name) + parts.append(series.name) + ctx = _fmt_context(series.context) + if ctx: + parts.append(f"[{ctx}]") + return " · ".join(parts) + + +@dataclass(frozen=True) +class DistributionVisualSelection: + selected_index: int + series: DistributionSeries + point_index: int + requested_step: int | None + resolved_step: int + + @property + def point(self): + return self.series.points[self.point_index] + + @property + def used_nearest_step(self) -> bool: + return self.requested_step is not None and self.requested_step != self.resolved_step + + +def select_distribution_visual( + series_list: list[DistributionSeries], + *, + selected_step: int | None = None, +) -> DistributionVisualSelection | None: + """Select the first non-empty distribution and resolve the display step.""" + for selected_index, series in enumerate(series_list): + if series.count == 0: + continue + if selected_step is None: + return DistributionVisualSelection( + selected_index=selected_index, + series=series, + point_index=0, + requested_step=None, + resolved_step=series.points[0].step, + ) + + point_index, point = min( + enumerate(series.points), + key=lambda item: (abs(item[1].step - selected_step), item[1].step), + ) + return DistributionVisualSelection( + selected_index=selected_index, + series=series, + point_index=point_index, + requested_step=selected_step, + resolved_step=point.step, + ) + return None + + +def _bin_range(point: Any) -> str: + edges = point.bin_edges.tolist() + if not edges: + return "" + return f"{edges[0]:.6g} .. {edges[-1]:.6g}" + + +def _compress_values(values: list[float], width: int) -> list[float]: + if width <= 0 or len(values) <= width: + return values + compressed: list[float] = [] + for index in range(width): + start = index * len(values) // width + end = (index + 1) * len(values) // width + bucket = values[start:end] or [values[start]] + finite = [v for v in bucket if math.isfinite(v)] + compressed.append(max(finite) if finite else 0.0) + return compressed + + +def _intensity_text(values: list[float], *, width: int) -> Text: + values = _compress_values(values, width) + text = Text() + if not values: + return text + finite_samples = [float(v) for v in values if math.isfinite(v)] + max_value = max(finite_samples) if finite_samples else 0.0 + if max_value <= 0: + text.append(_DISTRIBUTION_BLOCKS[0] * len(values), style=_DISTRIBUTION_ZERO_STYLE) + return text + for value in values: + if not math.isfinite(value) or value <= 0: + text.append(_DISTRIBUTION_BLOCKS[0], style=_DISTRIBUTION_ZERO_STYLE) + continue + scale = float(value) / max_value + index = round(scale * (len(_DISTRIBUTION_BLOCKS) - 1)) + text.append(_DISTRIBUTION_BLOCKS[index], style=_DISTRIBUTION_BLUE_STYLES[index]) + return text + + +def _sample_points_for_height(points: list[Any], max_rows: int) -> list[Any]: + if max_rows <= 0 or len(points) <= max_rows: + return points + if max_rows == 1: + # Evenly-spaced indices use (max_rows - 1) in the denominator; skip when + # max_rows is 1 and show the latest step as a single representative row. + return [points[-1]] + indexes = sorted({round(i * (len(points) - 1) / (max_rows - 1)) for i in range(max_rows)}) + return [points[index] for index in indexes] + + +def _render_distribution_name_list( + console: Console, + series_list: list[DistributionSeries], + selected_index: int, +) -> None: + console.print(f"[{_DISTRIBUTION_HEADER_STYLE}]Distributions[/]") + for index, series in enumerate(series_list): + label = escape(series.name) + count = f"{series.count} steps" if series.count != 1 else "1 step" + if index == selected_index: + console.print( + f"[{_DISTRIBUTION_MARKER_STYLE}]▌[/] " + f"[{_DISTRIBUTION_SELECTED_STYLE}]{label}[/] " + f"[{_DISTRIBUTION_DIM_STYLE}]({count})[/]" + ) + ctx = _fmt_context_for_visual(series.context) + if ctx: + console.print(f" [{_DISTRIBUTION_DIM_STYLE}]{escape(ctx)}[/]") + else: + console.print(f" [{_DISTRIBUTION_DIM_STYLE}]{label} ({count})[/]") + + +def render_distribution_visual( + series_list: list[DistributionSeries], + *, + selected_step: int | None = None, + width: int | None = None, + height: int | None = None, + no_color: bool = False, +) -> str: + """Render distribution names plus a selected histogram and heatmap.""" + term_width = shutil.get_terminal_size(fallback=(120, 30)).columns + console_width = width or (120 if no_color else term_width) + chart_width = max(24, min(96, console_width - 18)) + max_heatmap_rows = max(1, (height - 12) if height is not None else 18) + + buf = io.StringIO() + console = Console( + file=buf, + no_color=no_color, + color_system=None if no_color else "truecolor", + force_terminal=not no_color, + width=console_width, + highlight=False, + ) + + selection = select_distribution_visual(series_list, selected_step=selected_step) + if selection is None: + console.print("No data in the requested step range.") + return buf.getvalue() + + _render_distribution_name_list(console, series_list, selection.selected_index) + selected = selection.series + point = selection.point + weights = [float(v) for v in point.weights.tolist()] + + console.print() + console.print( + f"[{_DISTRIBUTION_HEADER_STYLE}]╭─ Histogram[/] " + f"[{_DISTRIBUTION_SELECTED_STYLE}]{escape(selected.name)}[/] " + f"[{_DISTRIBUTION_HEADER_STYLE}]Step {selection.resolved_step}[/]" + ) + if selection.used_nearest_step: + console.print( + f"[{_DISTRIBUTION_DIM_STYLE}]Requested step {selection.requested_step}; " + f"showing nearest tracked step {selection.resolved_step}.[/]" + ) + bin_range = _bin_range(point) + if bin_range: + console.print( + f"[{_DISTRIBUTION_DIM_STYLE}]Bins {bin_range}; " + f"max weight {max(weights) if weights else 0:.6g}[/]" + ) + console.print(_intensity_text(weights, width=chart_width)) + console.print(f"[{_DISTRIBUTION_RULE_STYLE}]╰{'─' * min(chart_width, console_width - 2)}[/]") + + console.print() + console.print(f"[{_DISTRIBUTION_HEADER_STYLE}]╭─ Heatmap (steps x bins)[/]") + selected_points = _sample_points_for_height(selected.points, max_heatmap_rows) + for heatmap_point in selected_points: + row = Text(f"{heatmap_point.step:>8} | ", style=_DISTRIBUTION_DIM_STYLE) + row.append_text( + _intensity_text( + [float(v) for v in heatmap_point.weights.tolist()], + width=chart_width, + ) + ) + console.print(row) + if len(selected_points) < len(selected.points): + console.print( + f"[{_DISTRIBUTION_DIM_STYLE}]Showing {len(selected_points)} of {len(selected.points)} steps; " + "use --height to adjust.[/]" + ) + scale = Text("Scale: ", style=_DISTRIBUTION_DIM_STYLE) + scale.append("low", style=_DISTRIBUTION_BLUE_STYLES[0]) + scale.append(" -> ", style=_DISTRIBUTION_DIM_STYLE) + scale.append("high", style=_DISTRIBUTION_BLUE_STYLES[-1]) + console.print(scale) + + return buf.getvalue() + + def render_plot( series_list: list[MetricSeries], *, @@ -166,3 +408,105 @@ def render_trace_json(series_list: list[MetricSeries]) -> str: } ) return json.dumps(result) + + +def _format_tensor(values: list[float], *, limit: int = 12) -> str: + if len(values) <= limit: + return "[" + ", ".join(f"{v:.6g}" for v in values) + "]" + head = ", ".join(f"{v:.6g}" for v in values[:limit]) + return f"[{head}, …] ({len(values)} bins)" + + +def render_distribution_table( + series_list: list[DistributionSeries], + *, + no_color: bool = False, +) -> str: + """Render distribution series as a step-indexed tensor table.""" + width = 120 if no_color else shutil.get_terminal_size(fallback=(120, 24)).columns + buf = io.StringIO() + console = Console( + file=buf, + no_color=no_color, + force_terminal=not no_color, + width=width, + highlight=False, + ) + + for series in series_list: + label = _distribution_series_label(series) + console.print(f"\n[{colors.HEADER}]{label}[/] [{colors.HEADER}]{series.count} points[/]") + + table = Table( + show_header=True, + header_style=colors.HEADER, + box=None, + pad_edge=True, + show_edge=False, + padding=(0, 1), + ) + table.add_column("STEP", justify="right") + table.add_column("EPOCH", justify="right") + table.add_column("TENSOR", justify="left", style=colors.NUMBER_EMPH) + + for point in series.points: + epoch = f"{point.epoch:.6g}" if point.epoch is not None else "—" + weights = point.weights.tolist() + table.add_row(str(point.step), epoch, _format_tensor(weights)) + + console.print(table) + + return buf.getvalue() + + +def render_distribution_csv(series_list: list[DistributionSeries]) -> str: + """Render distribution rows as CSV.""" + buf = io.StringIO() + writer = csv.writer(buf) + writer.writerow( + ["run_hash", "experiment", "distribution", "context", "step", "epoch", "bin_edges", "weights"] + ) + for series in series_list: + ctx_str = json.dumps(series.context, sort_keys=True) + for point in series.points: + writer.writerow( + [ + series.run.hash, + series.run.experiment or series.run.name or "", + series.name, + ctx_str, + point.step, + point.epoch if point.epoch is not None else "", + json.dumps(point.bin_edges.tolist()), + json.dumps(point.weights.tolist()), + ] + ) + return buf.getvalue() + + +def render_distribution_json(series_list: list[DistributionSeries]) -> str: + """Render distribution rows as JSON.""" + result: list[dict[str, Any]] = [] + for series in series_list: + result.append( + { + "run": { + "hash": series.run.hash, + "experiment": series.run.experiment, + "name": series.run.name, + }, + "distribution": series.name, + "context": series.context, + "count": series.count, + "points": [ + { + "step": point.step, + "epoch": point.epoch, + "bin_edges": point.bin_edges.tolist(), + "weights": point.weights.tolist(), + } + for point in series.points + ], + } + ) + return json.dumps(result) diff --git a/static/distributions.png b/static/distributions.png new file mode 100644 index 0000000..31137a8 Binary files /dev/null and b/static/distributions.png differ diff --git a/tests/contract/test_trace_contract.py b/tests/contract/test_trace_contract.py index bead2cc..93abf2f 100644 --- a/tests/contract/test_trace_contract.py +++ b/tests/contract/test_trace_contract.py @@ -3,8 +3,28 @@ import csv import io import json +from pathlib import Path + +import pytest from aimx.__main__ import main +from aimx.aim_bridge.metric_stats import DistributionSeries, collect_distribution_series +from aimx.commands.query import normalize_repo_path + + +def _require_distribution_series(sample_repo_root: Path) -> list[DistributionSeries]: + normalized = normalize_repo_path(sample_repo_root) + series_list = collect_distribution_series("distribution.name != ''", normalized) + if not series_list: + pytest.skip("sample Aim repository has no distribution sequences for contract tests") + return series_list + + +def _first_non_empty_series(series_list: list[DistributionSeries]) -> DistributionSeries: + for series in series_list: + if series.count > 0: + return series + pytest.skip("sample Aim repository has no non-empty distribution series for contract tests") def test_trace_plot_contract_produces_non_empty_output(capfd, sample_repo_root) -> None: @@ -75,3 +95,134 @@ def test_trace_invalid_expression_reports_error(capfd, sample_repo_root) -> None captured = capfd.readouterr() assert exit_code == 2 assert "Failed to evaluate trace" in captured.err + + +def test_trace_distribution_default_visual_contract(capfd, sample_repo_root: Path) -> None: + repo_root = sample_repo_root + _require_distribution_series(repo_root) + + exit_code = main( + [ + "trace", + "distribution", + "distribution.name != ''", + "--repo", + str(repo_root), + "--head", + "2", + "--no-color", + ] + ) + + captured = capfd.readouterr() + assert exit_code == 0 + assert "Distributions" in captured.out + assert "Histogram" in captured.out + assert "Heatmap (steps x bins)" in captured.out + assert not captured.err + + +def test_trace_distribution_step_missing_value_reports_error( + capfd, sample_repo_root: Path +) -> None: + repo_root = sample_repo_root + _require_distribution_series(repo_root) + + exit_code = main( + [ + "trace", + "distribution", + "distribution.name != ''", + "--repo", + str(repo_root), + "--step", + ] + ) + + captured = capfd.readouterr() + assert exit_code == 2 + assert "Missing value for --step" in captured.err + + +def test_trace_distribution_step_non_integer_reports_error(capfd, sample_repo_root: Path) -> None: + repo_root = sample_repo_root + _require_distribution_series(repo_root) + + exit_code = main( + [ + "trace", + "distribution", + "distribution.name != ''", + "--repo", + str(repo_root), + "--step", + "abc", + ] + ) + + captured = capfd.readouterr() + assert exit_code == 2 + assert "--step requires an integer" in captured.err + + +def test_trace_distribution_explicit_modes_exclude_visual_sections( + capfd, sample_repo_root: Path +) -> None: + repo_root = sample_repo_root + _require_distribution_series(repo_root) + + table_exit = main( + [ + "trace", + "distribution", + "distribution.name != ''", + "--repo", + str(repo_root), + "--table", + "--head", + "1", + "--no-color", + ] + ) + table_output = capfd.readouterr().out + assert table_exit == 0 + assert "TENSOR" in table_output + assert "Heatmap (steps x bins)" not in table_output + + csv_exit = main( + [ + "trace", + "distribution", + "distribution.name != ''", + "--repo", + str(repo_root), + "--csv", + "--head", + "1", + ] + ) + csv_output = capfd.readouterr().out + assert csv_exit == 0 + reader = csv.DictReader(io.StringIO(csv_output)) + assert reader.fieldnames is not None + assert "weights" in reader.fieldnames + assert "Heatmap (steps x bins)" not in csv_output + + json_exit = main( + [ + "trace", + "distribution", + "distribution.name != ''", + "--repo", + str(repo_root), + "--json", + "--head", + "1", + ] + ) + json_output = capfd.readouterr().out + payload = json.loads(json_output) + assert json_exit == 0 + assert payload + assert "points" in payload[0] + assert "Heatmap (steps x bins)" not in json_output diff --git a/tests/integration/test_trace_command.py b/tests/integration/test_trace_command.py index d4703c4..0d88a52 100644 --- a/tests/integration/test_trace_command.py +++ b/tests/integration/test_trace_command.py @@ -3,8 +3,54 @@ import csv import io import json +from pathlib import Path + +import pytest from aimx.__main__ import main +from aimx.aim_bridge.metric_stats import DistributionSeries, collect_distribution_series +from aimx.commands.query import normalize_repo_path + + +def _require_distribution_series(sample_repo_root: Path) -> list[DistributionSeries]: + """Return distribution series for ``data/``-style fixtures, or skip when absent.""" + normalized = normalize_repo_path(sample_repo_root) + series_list = collect_distribution_series("distribution.name != ''", normalized) + if not series_list: + pytest.skip("sample Aim repository has no distribution sequences for integration tests") + return series_list + + +def _first_non_empty_series(series_list: list[DistributionSeries]) -> DistributionSeries: + for series in series_list: + if series.count > 0: + return series + pytest.skip("sample Aim repository has no non-empty distribution series for integration tests") + + +def _sorted_unique_steps(series: DistributionSeries) -> list[int]: + return sorted({point.step for point in series.points}) + + +def _pick_exact_and_nearest_request(steps: list[int]) -> tuple[int, int]: + """Return ``(requested_step, expected_resolved_step)`` for nearest-step coverage.""" + if len(steps) < 2: + pytest.skip("sample distribution series needs at least two steps for step selection tests") + + lower, higher = steps[0], steps[1] + if higher - lower <= 1: + pytest.skip("sample distribution steps are too dense to construct a nearest-step gap test") + + requested = (lower + higher) // 2 + if abs(requested - lower) == abs(requested - higher): + pytest.skip("sample distribution steps do not produce a unique nearest-step candidate") + + if abs(requested - lower) < abs(requested - higher): + expected = lower + else: + expected = higher + + return requested, expected def test_trace_plot_produces_output_containing_metric_name(capfd, sample_repo_root) -> None: @@ -104,3 +150,185 @@ def test_trace_no_matching_expression_exits_cleanly(capfd, sample_repo_root) -> captured = capfd.readouterr() assert exit_code == 0 assert "No matching" in captured.out + + +def test_trace_distribution_default_visual_contains_list_histogram_and_heatmap( + capfd, sample_repo_root: Path +) -> None: + series_list = _require_distribution_series(sample_repo_root) + first_point = _first_non_empty_series(series_list).points[0] + repo_root = sample_repo_root + + exit_code = main( + [ + "trace", + "distribution", + "distribution.name != ''", + "--repo", + str(repo_root), + "--head", + "3", + "--no-color", + ] + ) + + captured = capfd.readouterr() + assert exit_code == 0 + assert "Distributions" in captured.out + assert "▌ " in captured.out + if _first_non_empty_series(series_list).context: + assert "kind=" in captured.out + assert "Histogram" in captured.out + assert f"Step {first_point.step}" in captured.out + assert "Heatmap (steps x bins)" in captured.out + assert "Scale: low -> high" in captured.out + + +def test_trace_distribution_step_selects_requested_step(capfd, sample_repo_root: Path) -> None: + series = _first_non_empty_series(_require_distribution_series(sample_repo_root)) + steps = _sorted_unique_steps(series) + if len(steps) < 2: + pytest.skip("sample distribution series needs at least two steps for exact step selection") + exact_step = steps[1] + repo_root = sample_repo_root + + exit_code = main( + [ + "trace", + "distribution", + "distribution.name != ''", + "--repo", + str(repo_root), + "--step", + str(exact_step), + "--no-color", + ] + ) + + captured = capfd.readouterr() + assert exit_code == 0 + assert f"Step {exact_step}" in captured.out + + +def test_trace_distribution_step_uses_nearest_tracked_step(capfd, sample_repo_root: Path) -> None: + series = _first_non_empty_series(_require_distribution_series(sample_repo_root)) + steps = _sorted_unique_steps(series) + requested, expected = _pick_exact_and_nearest_request(steps) + repo_root = sample_repo_root + + exit_code = main( + [ + "trace", + "distribution", + "distribution.name != ''", + "--repo", + str(repo_root), + "--step", + str(requested), + "--no-color", + ] + ) + + captured = capfd.readouterr() + assert exit_code == 0 + assert ( + f"Requested step {requested}; showing nearest tracked step {expected}." + in captured.out + ) + + +def test_trace_distribution_table_mode_preserves_tensor_output_with_step( + capfd, sample_repo_root: Path +) -> None: + series = _first_non_empty_series(_require_distribution_series(sample_repo_root)) + steps = _sorted_unique_steps(series) + if len(steps) < 2: + pytest.skip("sample distribution series needs at least two steps for table step carryover checks") + table_step = steps[1] + repo_root = sample_repo_root + + exit_code = main( + [ + "trace", + "distribution", + "distribution.name != ''", + "--repo", + str(repo_root), + "--table", + "--step", + str(table_step), + "--head", + "2", + "--no-color", + ] + ) + + captured = capfd.readouterr() + assert exit_code == 0 + assert "TENSOR" in captured.out + assert "Histogram" not in captured.out + assert "Heatmap" not in captured.out + + +def test_trace_distribution_json_mode_preserves_series_payload_with_step( + capfd, sample_repo_root: Path +) -> None: + series = _first_non_empty_series(_require_distribution_series(sample_repo_root)) + steps = _sorted_unique_steps(series) + if len(steps) < 2: + pytest.skip("sample distribution series needs at least two points for JSON head checks") + json_step = steps[1] + repo_root = sample_repo_root + + exit_code = main( + [ + "trace", + "distribution", + "distribution.name != ''", + "--repo", + str(repo_root), + "--json", + "--step", + str(json_step), + "--head", + "2", + ] + ) + + captured = capfd.readouterr() + payload = json.loads(captured.out) + assert exit_code == 0 + assert payload + assert payload[0]["count"] == 2 + assert "points" in payload[0] + + +def test_trace_distribution_csv_mode_preserves_rows_with_step( + capfd, sample_repo_root: Path +) -> None: + series = _first_non_empty_series(_require_distribution_series(sample_repo_root)) + if series.count < 1: + pytest.skip("sample distribution series needs points for CSV checks") + csv_step = series.points[0].step + repo_root = sample_repo_root + + exit_code = main( + [ + "trace", + "distribution", + "distribution.name != ''", + "--repo", + str(repo_root), + "--csv", + "--step", + str(csv_step), + "--head", + "1", + ] + ) + + captured = capfd.readouterr() + rows = list(csv.DictReader(io.StringIO(captured.out))) + assert exit_code == 0 + assert rows + assert {"run_hash", "distribution", "step", "bin_edges", "weights"}.issubset(rows[0]) diff --git a/tests/unit/test_metric_stats.py b/tests/unit/test_metric_stats.py index 8bfae1f..919c292 100644 --- a/tests/unit/test_metric_stats.py +++ b/tests/unit/test_metric_stats.py @@ -2,6 +2,8 @@ import datetime as dt import math +import sys +import types import numpy as np import pytest @@ -218,3 +220,53 @@ def test_steps_are_sliced_consistently_with_values(self) -> None: s = _make_series(values=[10.0, 20.0, 30.0, 40.0], steps=[100, 200, 300, 400]) result = subsample(s, head=2, tail=None, every=None) assert result.steps.tolist() == [100, 200] + + +class TestCollectDistributionSeries: + def test_rewrites_singular_distribution_variable_before_query( + self, + monkeypatch: pytest.MonkeyPatch, + tmp_path, + ) -> None: + from aimx.aim_bridge.metric_stats import collect_distribution_series + + captured: dict[str, object] = {} + + class _FakeQueryReportMode: + DISABLED = object() + + class _FakeDistributionQueryResult: + def iter_runs(self): + return [] + + class _FakeRepo: + def __init__(self, repo_path: str) -> None: + captured["repo_path"] = repo_path + + def query_distributions(self, expression: str, *, report_mode: object): + captured["expression"] = expression + captured["report_mode"] = report_mode + return _FakeDistributionQueryResult() + + aim_module = types.ModuleType("aim") + aim_module.Repo = _FakeRepo + sdk_module = types.ModuleType("aim.sdk") + types_module = types.ModuleType("aim.sdk.types") + types_module.QueryReportMode = _FakeQueryReportMode + + monkeypatch.setitem(sys.modules, "aim", aim_module) + monkeypatch.setitem(sys.modules, "aim.sdk", sdk_module) + monkeypatch.setitem(sys.modules, "aim.sdk.types", types_module) + + result = collect_distribution_series( + "run.hparams.distribution == 'enabled' and distribution.name == 'weights'", + tmp_path, + ) + + assert result == [] + assert captured["repo_path"] == str(tmp_path) + assert ( + captured["expression"] + == "run.hparams.distribution == 'enabled' and distributions.name == 'weights'" + ) + assert captured["report_mode"] is _FakeQueryReportMode.DISABLED diff --git a/tests/unit/test_trace_distribution_views.py b/tests/unit/test_trace_distribution_views.py new file mode 100644 index 0000000..889de33 --- /dev/null +++ b/tests/unit/test_trace_distribution_views.py @@ -0,0 +1,244 @@ +from __future__ import annotations + +import csv +import io +import json + +import numpy as np + +from aimx.aim_bridge.metric_stats import DistributionPoint, DistributionSeries, RunMeta +from aimx.rendering.trace_views import ( + _compress_values, + _intensity_text, + render_distribution_csv, + render_distribution_json, + render_distribution_table, + render_distribution_visual, + select_distribution_visual, +) + + +def _sample_distribution_series() -> list[DistributionSeries]: + run = RunMeta(hash="1234567890abcdef", experiment="exp-a", name=None, creation_time=None) + return [ + DistributionSeries( + run=run, + name="weights", + context={"subset": "train"}, + points=[ + DistributionPoint( + step=10, + epoch=1.0, + bin_edges=np.array([0.0, 1.0, 2.0]), + weights=np.array([3.0, 5.0]), + ), + DistributionPoint( + step=20, + epoch=2.0, + bin_edges=np.array([0.0, 1.0, 2.0]), + weights=np.array([2.0, 4.0]), + ), + ], + ) + ] + + +def _multi_step_distribution_series() -> list[DistributionSeries]: + run = RunMeta(hash="abcdef1234567890", experiment="exp-a", name=None, creation_time=None) + return [ + DistributionSeries( + run=run, + name="empty", + context={"kind": "empty"}, + points=[], + ), + DistributionSeries( + run=run, + name="head/gradients/head.0.bias", + context={"kind": "gradients", "module": "head"}, + points=[ + DistributionPoint( + step=300, + epoch=0.0, + bin_edges=np.array([-1.0, 0.0, 1.0]), + weights=np.array([0.0, 2.0]), + ), + DistributionPoint( + step=600, + epoch=0.0, + bin_edges=np.array([-1.0, 0.0, 1.0]), + weights=np.array([3.0, 0.0]), + ), + DistributionPoint( + step=900, + epoch=0.0, + bin_edges=np.array([-1.0, 0.0, 1.0]), + weights=np.array([1.0, 4.0]), + ), + ], + ), + ] + + +def test_render_distribution_table_includes_tensor_column() -> None: + output = render_distribution_table(_sample_distribution_series(), no_color=True) + + assert "TENSOR" in output + assert "weights" in output + assert "[3, 5]" in output + + +def test_render_distribution_csv_contains_bin_edges_and_weights() -> None: + output = render_distribution_csv(_sample_distribution_series()) + + reader = csv.DictReader(io.StringIO(output)) + rows = list(reader) + assert rows + assert rows[0]["distribution"] == "weights" + assert rows[0]["bin_edges"] == "[0.0, 1.0, 2.0]" + assert rows[0]["weights"] == "[3.0, 5.0]" + + +def test_render_distribution_json_contains_points() -> None: + output = render_distribution_json(_sample_distribution_series()) + payload = json.loads(output) + + assert payload + first = payload[0] + assert first["distribution"] == "weights" + assert first["count"] == 2 + assert first["points"][0]["step"] == 10 + assert first["points"][0]["weights"] == [3.0, 5.0] + + +def test_select_distribution_visual_uses_first_non_empty_series_and_first_point() -> None: + selection = select_distribution_visual(_multi_step_distribution_series()) + + assert selection is not None + assert selection.selected_index == 1 + assert selection.series.name == "head/gradients/head.0.bias" + assert selection.resolved_step == 300 + + +def test_select_distribution_visual_returns_none_without_points() -> None: + series = _multi_step_distribution_series()[:1] + assert select_distribution_visual(series) is None + + +def test_select_distribution_visual_resolves_exact_step() -> None: + selection = select_distribution_visual(_multi_step_distribution_series(), selected_step=600) + + assert selection is not None + assert selection.resolved_step == 600 + assert not selection.used_nearest_step + + +def test_select_distribution_visual_resolves_nearest_higher_step() -> None: + selection = select_distribution_visual(_multi_step_distribution_series(), selected_step=500) + + assert selection is not None + assert selection.resolved_step == 600 + + +def test_select_distribution_visual_uses_lower_step_for_tie() -> None: + selection = select_distribution_visual(_multi_step_distribution_series(), selected_step=750) + + assert selection is not None + assert selection.resolved_step == 600 + + +def test_render_distribution_visual_includes_name_context_histogram_and_heatmap() -> None: + output = render_distribution_visual(_multi_step_distribution_series(), no_color=True) + + assert "Distributions" in output + assert "▌ head/gradients/head.0.bias" in output + assert 'kind="gradients", module="head"' in output + assert "Histogram" in output + assert "Step 300" in output + assert "Heatmap (steps x bins)" in output + assert "300 |" in output + assert "Scale: low -> high" in output + assert "█" in output + assert "\x1b[" not in output + + +def test_render_distribution_visual_uses_blue_gradient_color_by_default() -> None: + output = render_distribution_visual( + _multi_step_distribution_series(), + width=80, + height=14, + ) + + assert "\x1b[" in output + assert "\x1b[38;2;" in output + assert "Distributions" in output + assert "Histogram" in output + assert "Heatmap (steps x bins)" in output + + +def test_render_distribution_visual_labels_nearest_step() -> None: + output = render_distribution_visual( + _multi_step_distribution_series(), + selected_step=750, + no_color=True, + ) + + assert "Step 600" in output + assert "Requested step 750; showing nearest tracked step 600." in output + + +def test_render_distribution_visual_small_height_does_not_zero_divide_heatmap() -> None: + """height 12 or 13 yields max_heatmap_rows == 1; sampling must not divide by zero.""" + for height in (12, 13): + output = render_distribution_visual( + _multi_step_distribution_series(), + height=height, + no_color=True, + ) + assert "Heatmap (steps x bins)" in output + assert "900 |" in output + assert "Showing 1 of 3 steps" in output + + +def test_compress_values_ignores_non_finite_in_bucket_max() -> None: + nan = float("nan") + inf = float("inf") + # width=2 splits [1, nan, 10, 5] into [1, nan] and [10, 5] + assert _compress_values([1.0, nan, 10.0, 5.0], width=2) == [1.0, 10.0] + # bucket is only non-finite -> default 0.0 + assert _compress_values([nan, inf, -inf, nan], width=2) == [0.0, 0.0] + + +def test_intensity_text_handles_non_finite_without_crash() -> None: + nan = float("nan") + text = _intensity_text([1.0, nan, 4.0, 0.0], width=8) + assert len(text.plain) == 4 + + all_bad = _intensity_text([nan, float("inf")], width=8) + assert len(all_bad.plain) == 2 + assert all(ch == "▁" for ch in all_bad.plain) + + +def test_render_distribution_visual_handles_single_step_and_all_zero_weights() -> None: + run = RunMeta(hash="abcdef1234567890", experiment="exp-a", name=None, creation_time=None) + series = [ + DistributionSeries( + run=run, + name="zeros", + context={}, + points=[ + DistributionPoint( + step=1, + epoch=None, + bin_edges=np.array([0.0, 1.0, 2.0]), + weights=np.array([0.0, 0.0]), + ) + ], + ) + ] + + output = render_distribution_visual(series, no_color=True) + + assert "zeros" in output + assert "Step 1" in output + assert "▁▁" in output diff --git a/tests/unit/test_trace_helpers.py b/tests/unit/test_trace_helpers.py index 19aad6a..bfabdc3 100644 --- a/tests/unit/test_trace_helpers.py +++ b/tests/unit/test_trace_helpers.py @@ -1,14 +1,30 @@ from __future__ import annotations +from dataclasses import dataclass from pathlib import Path import pytest -from aimx.commands.trace import TraceInvocation, parse_trace_invocation +from aimx.commands.trace import ( + TraceInvocation, + _TracePipeline, + _execute_trace_pipeline, + parse_trace_invocation, +) + + +@dataclass +class _FakeSeries: + points: list[int] + + @property + def count(self) -> int: + return len(self.points) def test_parse_trace_defaults() -> None: inv = parse_trace_invocation(["metric.name=='loss'"]) + assert inv.target == "metrics" assert inv.expression == "metric.name=='loss'" assert inv.repo_path == Path(".") assert inv.mode == "plot" @@ -66,6 +82,18 @@ def test_parse_trace_explicit_repo_overrides_default() -> None: assert inv.repo_path == Path("data") +def test_parse_trace_distribution_target() -> None: + inv = parse_trace_invocation(["distribution", "distribution.name=='weights'", "--repo", "data"]) + assert inv.target == "distribution" + assert inv.expression == "distribution.name=='weights'" + assert inv.repo_path == Path("data") + + +def test_parse_trace_distribution_requires_expression() -> None: + with pytest.raises(ValueError, match="trace distribution"): + parse_trace_invocation(["distribution"]) + + def test_parse_trace_rejects_unknown_flag() -> None: with pytest.raises(ValueError, match="Unsupported trace option"): parse_trace_invocation(["expr", "--repo", "data", "--bogus"]) @@ -109,3 +137,155 @@ def test_parse_trace_steps_missing_value_raises() -> None: def test_parse_trace_steps_defaults_to_none() -> None: inv = parse_trace_invocation(["expr", "--repo", "data"]) assert inv.step_slice is None + + +def test_parse_trace_step_flag() -> None: + inv = parse_trace_invocation(["distribution", "expr", "--repo", "data", "--step", "12300"]) + assert inv.selected_step == 12300 + + +def test_parse_trace_step_missing_value_raises() -> None: + with pytest.raises(ValueError, match="Missing value for --step"): + parse_trace_invocation(["distribution", "expr", "--repo", "data", "--step"]) + + +def test_parse_trace_step_rejects_non_integer() -> None: + with pytest.raises(ValueError, match="--step requires an integer"): + parse_trace_invocation(["distribution", "expr", "--repo", "data", "--step", "abc"]) + + +def test_execute_trace_pipeline_filters_before_sampling_and_renders_remaining() -> None: + calls: list[tuple[object, ...]] = [] + invocation = TraceInvocation( + target="metrics", + expression="expr", + repo_path=Path("."), + mode="json", + head=2, + every=2, + step_slice="2:4", + ) + + def collect(expression: str, repo_path: Path) -> list[_FakeSeries]: + calls.append(("collect", expression, repo_path)) + return [_FakeSeries([1, 2, 3, 4, 5]), _FakeSeries([10])] + + def filter_by_step_range( + series: _FakeSeries, + start: int | None, + end: int | None, + ) -> _FakeSeries: + calls.append(("filter", tuple(series.points), start, end)) + return _FakeSeries( + [ + point + for point in series.points + if (start is None or point >= start) and (end is None or point <= end) + ] + ) + + def subsample( + series: _FakeSeries, + *, + head: int | None, + tail: int | None, + every: int | None, + ) -> _FakeSeries: + calls.append(("subsample", tuple(series.points), head, tail, every)) + points = series.points + if head is not None: + points = points[:head] + if tail is not None: + points = points[-tail:] + if every is not None and every > 1: + points = points[::every] + return _FakeSeries(points) + + def render( + series_list: list[_FakeSeries], + render_invocation: TraceInvocation, + no_color: bool, + ) -> str: + calls.append( + ( + "render", + tuple(tuple(series.points) for series in series_list), + render_invocation.mode, + no_color, + ) + ) + return "rendered" + + result = _execute_trace_pipeline( + invocation, + Path("repo"), + _TracePipeline( + collect=collect, + filter_by_step_range=filter_by_step_range, + subsample=subsample, + render=render, + no_matches_message="No matching fake series found.", + ), + no_color=True, + ) + + assert result.exit_status == 0 + assert result.output == "rendered" + assert calls == [ + ("collect", "expr", Path("repo")), + ("filter", (1, 2, 3, 4, 5), 2, 4), + ("filter", (10,), 2, 4), + ("subsample", (2, 3, 4), 2, None, 2), + ("render", ((2,),), "json", True), + ] + + +def test_execute_trace_pipeline_returns_step_range_message_when_filter_empties() -> None: + invocation = TraceInvocation( + target="metrics", + expression="expr", + repo_path=Path("."), + step_slice="2:4", + ) + + def collect(expression: str, repo_path: Path) -> list[_FakeSeries]: + return [_FakeSeries([1])] + + def filter_by_step_range( + series: _FakeSeries, + start: int | None, + end: int | None, + ) -> _FakeSeries: + return _FakeSeries([]) + + def subsample( + series: _FakeSeries, + *, + head: int | None, + tail: int | None, + every: int | None, + ) -> _FakeSeries: + raise AssertionError("subsample should not run after the filter empties all series") + + def render( + series_list: list[_FakeSeries], + render_invocation: TraceInvocation, + no_color: bool, + ) -> str: + raise AssertionError("render should not run after the filter empties all series") + + result = _execute_trace_pipeline( + invocation, + Path("repo"), + _TracePipeline( + collect=collect, + filter_by_step_range=filter_by_step_range, + subsample=subsample, + render=render, + no_matches_message="No matching fake series found.", + ), + no_color=False, + ) + + assert result.exit_status == 0 + assert result.output == "No data in the requested step range." diff --git a/uv.lock b/uv.lock index ed1d79a..bb7ba45 100644 --- a/uv.lock +++ b/uv.lock @@ -96,7 +96,7 @@ wheels = [ [[package]] name = "aimx" -version = "0.3.2" +version = "0.3.3" source = { editable = "." } dependencies = [ { name = "numpy" },