diff --git a/.agents/skills/aimx b/.agents/skills/aimx new file mode 120000 index 0000000..3afe07d --- /dev/null +++ b/.agents/skills/aimx @@ -0,0 +1 @@ +../../skills/aimx \ No newline at end of file diff --git a/.gitignore b/.gitignore index ff447ff..073950c 100644 --- a/.gitignore +++ b/.gitignore @@ -137,6 +137,8 @@ celerybeat.pid # Environments .env .envrc +.env* +.venv/ .venv env/ venv/ @@ -188,6 +190,12 @@ cython_debug/ # you could uncomment the following to ignore the entire vscode folder # .vscode/ +# OS / editor +.DS_Store +Thumbs.db +*.tmp +*.swp + # Ruff stuff: .ruff_cache/ @@ -210,4 +218,4 @@ __marimo__/ .specify/ # aim -data/ \ No newline at end of file +data/ diff --git a/AGENTS.md b/AGENTS.md index 0132f9a..c1db14b 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -79,6 +79,8 @@ Why `3.12`: - Existing local Aim repositories on disk, including repo roots that contain a `.aim` metadata directory (001-aim-command-passthrough) - Python 3.12 for development, runtime support `>=3.10,<3.13` + `rich>=13.7`, `textual-image>=0.12.0` (already declared in (003-query-images-terminal-render) - Existing local Aim repositories (read-only). Image bytes are read (003-query-images-terminal-render) +- Python 3.12 for development, runtime support `>=3.10,<3.13` + Python standard library, `numpy>=1.24`, `rich>=13.7`, `textual-image>=0.12.0`, existing Aim SDK usage for owned query commands; no new dependency planned (004-run-params-query) +- Existing local Aim repositories on disk (read-only); run params are read from Aim run metadata attributes under `.aim` (004-run-params-query) ## Recent Changes - 001-aim-command-passthrough: Added Python 3.12 for development, runtime support `>=3.10,<3.13` + Python standard library, native Aim CLI (external runtime prerequisite for delegated commands), pytest for test automation diff --git a/README.md b/README.md index c4477d2..2fbb0d3 100644 --- a/README.md +++ b/README.md @@ -1,187 +1,249 @@ # aimx +![aimx trace output preview](static/trace.png) + [![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](./LICENSE) [![Python](https://img.shields.io/badge/python-%3E%3D3.10%2C%3C3.13-blue.svg)](./pyproject.toml) [![PyPI](https://img.shields.io/pypi/v/aimx.svg?color=blue)](https://pypi.org/project/aimx/) [![CI](https://github.com/blizhan/aimx/actions/workflows/CI.yaml/badge.svg)](https://github.com/blizhan/aimx/actions/workflows/CI.yaml) [![Publish](https://github.com/blizhan/aimx/actions/workflows/publish.yaml/badge.svg)](https://github.com/blizhan/aimx/actions/workflows/publish.yaml) -![aimx trace output preview](static/trace.png) - `aimx` is a safe, additive, CLI-first companion for native [Aim](https://github.com/aimhubio/aim). -It keeps a small owned command surface for diagnostics and guidance, and -delegates everything else to the native [`aim`](https://github.com/aimhubio/aim) executable already available in +It adds focused terminal workflows for querying, comparing, previewing, and +exporting Aim run data. Commands that `aimx` does not own are delegated to the +native [`aim`](https://github.com/aimhubio/aim) executable already available in the user's environment. -## Installation + +## Quick Start + +### Install ```bash -# Using uv (recommended) +# install into the current project uv add aimx -# Or using pip +# Or use pip pip install aimx ``` -## What aimx owns +### Install the agent skill -- `aimx` -- `aimx --help` -- `aimx help` -- `aimx version` -- `aimx doctor` -- `aimx query` -- `aimx trace` +This repository also includes an `aimx` skill for agent workflows such as +`autoresearch` `log_experiment`, where an LLM needs to collect run parameters, +metric summaries, traces, and image evidence from a local Aim repository. -These commands explain how `aimx` works, show the `aimx` version, and report -whether native Aim is available for passthrough. +```bash +npx skills install blizhan/aimx +``` + +After installation, invoke the skill as `$aimx`. The skill assumes the `aimx` +CLI is available in the environment that performs the experiment inspection. -`--repo` is optional for owned `query` and `trace` commands and defaults to the -current directory `.`. When provided, it accepts either the repository root, -such as `data`, or the metadata directory itself, such as `data/.aim`. +### Check your environment -Both `aimx query` and `aimx trace` accept **AimQL** expressions (Aim's native -Python-like query language) as their filter argument — e.g. -`"metric.name == 'loss' and run.hparams.learning_rate > 0.001"`. For the full -syntax, supported properties (`run.*`, `metric.*`, `images.*`), and security -restrictions, see the upstream docs: -[Aim — Query language basics](https://aimstack.readthedocs.io/en/latest/using/search.html). +```bash +aimx --help +aimx version +aimx doctor +``` -### `aimx query` — discover and summarise metrics +### Query an Aim repository -Queries an Aim repository and shows a grouped table with per-metric statistics -(step count, last value, min/max with step). +If your current working directory is an Aim repo root, `--repo` can be omitted. +When provided, `--repo` accepts either the repository root, such as `data`, or +the metadata directory itself, such as `data/.aim`. -![aimx query output preview](static/metrics.png) +```bash +# Summarize matching metrics +aimx query metrics "metric.name == 'loss'" --repo data + +# Preview matching images in supported terminals +aimx query images "images" --repo data + +# Compare run parameters across matching runs +aimx query params "run.hash != ''" --repo data + +# Plot a metric time series +aimx trace "metric.name == 'loss'" --repo data +``` + +## Features + +- **Safe native Aim coexistence**: `aimx` does not replace the `aim` executable + or modify installed Aim packages. +- **Explicit command ownership**: owned `aimx` commands stay small and focused; + everything else is passed through to native Aim. +- **Scriptable query output**: query metrics, images, and run params as rich + terminal tables, plain text, or JSON. +- **Terminal image previews**: render Aim image artifacts inline when the + terminal supports graphics, with a safe text fallback. +- **Metric tracing**: plot, tabulate, or export matching time series with step + filters and sampling controls. +- **Read-only defaults**: inspection, query, diagnostic, and passthrough flows + do not mutate `.aim` repository data. + +## Commands + +### Owned commands + +| Command | Purpose | +| --- | --- | +| `aimx` / `aimx --help` / `aimx help` | Show CLI help and owned command guidance. | +| `aimx version` | Print the installed `aimx` version. | +| `aimx doctor` | Check whether native Aim is available for passthrough. | +| `aimx query metrics` | Summarize matching metric series. | +| `aimx query images` | List and optionally preview matching image records. | +| `aimx query params` | Compare run-level parameters across matching runs. | +| `aimx trace` | Plot, tabulate, or export metric time series. | + +Both `aimx query` and `aimx trace` accept **AimQL** expressions as their filter +argument. AimQL is Aim's native Python-like query language. ```bash -# If your current working directory is the Aim repo root, --repo can be omitted -aimx query metrics "metric.name == 'loss'" +aimx query metrics "metric.name == 'loss' and run.hparams.learning_rate > 0.001" +``` -# Rich table (default, colored in terminal) +For syntax, supported properties (`run.*`, `metric.*`, `images.*`), and +security restrictions, see +[Aim - Query language basics](https://aimstack.readthedocs.io/en/latest/using/search.html). + +### Query metrics + +`aimx query metrics` groups matching metric series by run and reports useful +statistics such as step count, last value, min value, and max value. + +![aimx query output preview](static/metrics.png) + +```bash +# Rich table, colored in terminals by default aimx query metrics "metric.name == 'loss'" --repo data # Short run hashes are transparently expanded to full hashes -aimx query metrics "run.hash=='eca37394' and metric.name=='loss'" --repo data +aimx query metrics "run.hash == 'eca37394' and metric.name == 'loss'" --repo data -# Tab-separated plain text, suitable for awk/grep +# Tab-separated output for shell tools aimx query metrics "metric.name == 'loss'" --repo data --oneline -# Structured JSON (nested by run) +# Structured JSON, nested by run aimx query metrics "metric.name == 'loss'" --repo data --json -# Step range filter — statistics recomputed within the window +# Step or epoch windows aimx query metrics "metric.name == 'loss'" --repo data --steps 100:500 -aimx query metrics "metric.name == 'loss'" --repo data --steps :50 # first 50 steps -aimx query metrics "metric.name == 'loss'" --repo data --steps 100: # from step 100 onwards - -# Epoch range filter (mutually exclusive with --steps) aimx query metrics "metric.name == 'loss'" --repo data --epochs 1:10 -aimx query metrics "metric.name == 'loss'" --repo data --epochs :5 -# Density subsampling: first N / last N / every K-th point per series +# Density sampling aimx query metrics "metric.name == 'loss'" --repo data --head 20 aimx query metrics "metric.name == 'loss'" --repo data --tail 20 aimx query metrics "metric.name == 'loss'" --repo data --every 5 +``` -# Combine short hash + step range + head -aimx query metrics "run.hash=='eca37394' and metric.name=='loss'" --repo data --steps 100:300 --head 10 +### Query images -# Images — metadata table only (--json / --plain / redirected stdout) -aimx query images "images" --repo data --json -aimx query images "images" --repo data --plain +`aimx query images` reads image metadata and, when possible, renders matched +images directly in the terminal. -# Images — filter by epoch range (affects all output modes) -aimx query images "images" --repo data --epochs 10:50 --plain -aimx query images "images" --repo data --epochs :30 --json +![aimx query images output preview](static/images.png) -# Images — global row subsampling (applied to the sorted result list) -aimx query images "images" --repo data --head 5 -aimx query images "images" --repo data --tail 5 -aimx query images "images" --repo data --every 3 +```bash +# Inline preview in modern terminals +aimx query images "images" --repo data -# Images — inline preview in a modern terminal (iTerm2 / Kitty / WezTerm / Ghostty) -aimx query images "images" --repo data # default: renders up to 6 images inline -aimx query images "images" --repo data --max-images 20 # render more -aimx query images "images" --repo data --max-images 0 # no cap (render all) +# Metadata output only +aimx query images "images" --repo data --plain +aimx query images "images" --repo data --json -# Combine epoch filter + head + TTY cap -aimx query images "images" --repo data --epochs 10:50 --head 10 --max-images 4 -``` +# Filter and sample matching image rows +aimx query images "images" --repo data --epochs 10:50 --head 10 -Output modes: `--json` (nested runs→metrics), `--oneline` / `--plain` (tab-separated), -default (rich table with inline image preview). -Filter/sampling flags (affect all output modes): `--steps start:end | --epochs start:end` -(mutually exclusive), `--head N`, `--tail N`, `--every K`. -Additional flags: `--no-color`, `--verbose`, `--max-images N` (images TTY cap only). +# Control the TTY preview cap +aimx query images "images" --repo data --max-images 20 +aimx query images "images" --repo data --max-images 0 +``` -#### Inline image preview +When stdout is a TTY and `aimx` detects a graphics-capable terminal, matched +images render inline. On plain ANSI terminals, `aimx` falls back to half-block +character art and still exits with code `0`. -![aimx query images output preview](static/images.png) +Terminal rendering is provided by +[`textual-image`](https://github.com/lnqs/textual-image/tree/main#support-matrix-1). +Confirmed working terminals include iTerm2, Kitty, Konsole, WezTerm, foot, tmux +(Sixel), xterm (Sixel), Windows Terminal, and VS Code integrated terminal. Warp +and GNOME Terminal are not supported. -When stdout is a TTY and `aimx` detects a graphics-capable terminal, `aimx query images` -renders matched images directly in the terminal. On plain ANSI terminals it falls back -to half-block character art — exit code is always `0`. +To disable inline rendering, redirect stdout or use `--plain` / `--json`. -Terminal support is provided by [`textual-image`](https://github.com/lnqs/textual-image/tree/main#support-matrix-1). -Confirmed working terminals include: iTerm2, Kitty, Konsole, WezTerm, foot, tmux (Sixel), -xterm (Sixel), Windows Terminal, and VS Code integrated terminal. Warp and GNOME Terminal -are not supported. +### Query run params -To disable inline rendering without changing flags, redirect stdout `aimx query images > out.txt` or use `--plain` / `--json`. +`aimx query params` reads run-level Aim metadata without modifying the +repository. By default, it shows a readable set of discovered parameter columns. +Use `--param KEY` one or more times to align specific flattened params across +matching runs. -### `aimx trace` — plot or export a metric time series +![aimx query params output preview](static/params.png) -Fetches the full value sequence for one or more metrics and renders a curve, -table, or structured export. Multiple matching runs are overlaid on the same plot. +```bash +# Compare discovered params across all matching runs +aimx query params "run.hash != ''" --repo data -![aimx trace output preview](static/trace.png) +# Select specific params +aimx query params "run.experiment == 'cloud-segmentation'" --repo data \ + --param hparam.lr \ + --param hparam.optimizer -```bash -# If your current working directory is the Aim repo root, --repo can be omitted -aimx trace "metric.name=='loss'" +# Script-friendly output +aimx query params "run.experiment == 'cloud-segmentation'" --repo data --plain +aimx query params "run.experiment == 'cloud-segmentation'" --repo data --json -# Plot loss curve for a specific run — short hash transparently expanded -aimx trace "run.hash=='eca37394' and metric.name=='loss'" --repo data +# Filter with AimQL run fields +aimx query params "run.hparam.lr == 0.0001" --repo data --param hparam.lr +``` -# Compare train vs val loss across all runs -aimx trace "metric.name=='loss'" --repo data +Missing selected params are displayed as `-` in terminal/plain output and listed +under `missing_params` in JSON. -# Step-by-step table -aimx trace "metric.name=='loss'" --repo data --table +### Trace metrics -# CSV export -aimx trace "metric.name=='loss'" --repo data --csv > loss.csv +`aimx trace` fetches the full value sequence for one or more matching metrics +and renders a curve, table, CSV, or JSON export. Multiple matching runs are +overlaid on the same plot. -# JSON with full value arrays -aimx trace "metric.name=='loss'" --repo data --json +```bash +# Plot all matching loss curves +aimx trace "metric.name == 'loss'" --repo data -# Step range filter (hard constraint, applied before sampling) -aimx trace "metric.name=='loss'" --repo data --steps 100:500 -aimx trace "metric.name=='loss'" --repo data --steps :50 # first 50 steps -aimx trace "metric.name=='loss'" --repo data --steps 100: # step 100 onwards +# Plot one run by short hash +aimx trace "run.hash == 'eca37394' and metric.name == 'loss'" --repo data -# Combine step filter + JSON -aimx trace "run.hash=='eca37394' and metric.name=='loss'" --repo data --steps 1:200 --json +# Step-by-step table +aimx trace "metric.name == 'loss'" --repo data --table -# Limit to first 50 points per series (density subsampling, applied after --steps) -aimx trace "metric.name=='loss'" --repo data --head 50 +# CSV or JSON export +aimx trace "metric.name == 'loss'" --repo data --csv > loss.csv +aimx trace "metric.name == 'loss'" --repo data --json -# Sample every 10th point -aimx trace "metric.name=='loss'" --repo data --every 10 +# Step filtering and sampling +aimx trace "metric.name == 'loss'" --repo data --steps 100:500 +aimx trace "metric.name == 'loss'" --repo data --head 50 +aimx trace "metric.name == 'loss'" --repo data --every 10 ``` -Output modes: default (plotext chart), `--table`, `--csv`, `--json`. -Step filtering: `--steps start:end` (inclusive, open-ended sides allowed). -Sampling: `--head N`, `--tail N`, `--every K`. -Display: `--width W`, `--height H`, `--no-color`. +Output modes: default plot, `--table`, `--csv`, `--json`. +Display controls: `--width W`, `--height H`, `--no-color`. -## What aimx delegates +### Common query options -Any unowned command path is passed through to native `aim`. +- Output: `--json`, `--oneline` / `--plain`, or the default rich terminal view. +- Filtering: `--steps start:end` or `--epochs start:end` where supported. +- Sampling: `--head N`, `--tail N`, `--every K`. +- Images: `--max-images N` controls the TTY preview cap. +- Params: `--param KEY` can be repeated to select parameter columns. +- Diagnostics: `--verbose` prints additional details where supported. -Examples: +### Native Aim passthrough + +Any unowned command path is passed through to native `aim`. ```bash aimx up @@ -190,24 +252,41 @@ aimx runs --help aimx runs ls ``` -## Runtime contract +## Documentation + +- [AimQL query language](https://aimstack.readthedocs.io/en/latest/using/search.html) + explains the filter syntax used by `aimx query` and `aimx trace`. +- [textual-image terminal support](https://github.com/lnqs/textual-image/tree/main#support-matrix-1) + lists terminals that can render inline images. +- [CONSTITUTION.md](./CONSTITUTION.md) documents the project safety and scope + rules. +- [specs/](./specs) contains feature specs, plans, contracts, and quickstarts + for implemented `aimx` capabilities. +- [TODO.md](./TODO.md) tracks early roadmap notes. + +## Runtime Contract - `aimx` does not replace the `aim` executable. - `aimx` does not modify the installed `aim` package. -- `aimx` does not mutate `.aim` data during help, version, doctor, or -passthrough flows. +- `aimx` does not mutate `.aim` data during help, version, doctor, query, + trace, or passthrough flows. - Native Aim remains an external runtime prerequisite for delegated commands. -- The repo's development dependency on Aim is only for local development and -testing convenience. +- The repository's development dependency on Aim is only for local development + and testing convenience. -## Local development +## Development + +The project uses Python 3.12 for local development and supports +`>=3.10,<3.13` at runtime. ```bash +uv python install 3.12 +uv venv --python 3.12 uv sync --group dev uv run pytest ``` -## Quick checks +Useful local checks: ```bash uv run aimx --help @@ -216,8 +295,3 @@ uv run aimx doctor uv run aimx query metrics "metric.name == 'loss'" --repo data uv run aimx query images "images" --repo data/.aim --json ``` - -## TODO - -- [ ] Introduce `skills` — composable, reusable workflow modules that layer higher-level experiment - analysis and auto-research capabilities on top of `aimx`. diff --git a/TODO.md b/TODO.md new file mode 100644 index 0000000..1399631 --- /dev/null +++ b/TODO.md @@ -0,0 +1,3 @@ +[ ] diff commands +[ ] skills + [ ] log_experiment \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index cc6f5e8..882d3d4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "aimx" -version = "0.3.1" +version = "0.3.2" description = "A safe CLI-first companion for native Aim" readme = "README.md" requires-python = ">=3.10,<3.13" diff --git a/skills/aimx/SKILL.md b/skills/aimx/SKILL.md new file mode 100644 index 0000000..7a2bb9a --- /dev/null +++ b/skills/aimx/SKILL.md @@ -0,0 +1,123 @@ +--- +name: aimx +description: Use when autoresearch, log_experiment, experiment analysis, or automatic iteration workflows need to inspect local Aim repositories with aimx, collect run params, metric summaries, traces, image evidence, compare training runs, or summarize model results without mutating Aim data. +--- + +# Aimx + +## Overview + +Use `aimx` as a read-only evidence collector for `autoresearch` `log_experiment` +steps. Prefer JSON output so downstream agents can compare runs, explain model +effects, and propose the next experiment from concrete Aim data. + +## Requirements + +- Require `aimx` in the Python environment that runs `log_experiment`. + Verify with `aimx --help` or `python -m aimx --help` before collecting data. +- When `aimx` is missing, first identify and follow the user's current + dependency-management workflow from project files and instructions + (`pyproject.toml`, lockfiles, README, AGENTS/CLAUDE/GEMINI notes, CI config). + Use that manager to add or install `aimx`. In this repository, prefer + `uv sync`, `uv run ...`, and `uv add aimx` because repo policy requires `uv`. +- If there is no project environment to modify and the user only needs a + standalone CLI, install `aimx` as a tool using the user's preferred tool + manager. Use `uv tool install aimx` only when `uv` is already the chosen or + accepted tool workflow. +- Require read access to a local Aim repository path. Pass `--repo` explicitly + and keep collection commands read-only. +- If native Aim availability matters for passthrough checks, run `aimx doctor`; + do not initialize, repair, migrate, or rewrite Aim repositories from this + skill. + +## Workflow + +1. Locate the Aim repository. Pass `--repo ` explicitly; in + this repository, use `--repo data` or `--repo data/.aim` for local checks. +2. Define the run scope as an AimQL expression. Start broad with + `run.hash != ''`, then narrow by `run.hash`, `run.experiment`, or `run.name`. +3. Collect run parameters: + + ```bash + aimx query params "" --repo --json + aimx query params "" --repo --json --param hparam.lr --param model + ``` + +4. Collect metric summaries for candidate objective metrics: + + ```bash + aimx query metrics "() and metric.name == 'loss'" --repo --json + aimx query metrics "() and metric.name != ''" --repo --json + ``` + +5. Collect traces only for decisive metrics or suspected anomalies: + + ```bash + aimx trace "() and metric.name == 'loss'" --repo --json --tail 50 + ``` + +6. Collect image metadata when qualitative outputs matter: + + ```bash + aimx query images "images" --repo --json --head 20 + ``` + +7. Emit a compact `log_experiment` record containing: + + ```json + { + "repo": "", + "run_scope": "", + "params": {}, + "metric_summary": {}, + "trace_evidence": {}, + "image_evidence": {}, + "interpretation": { + "best_runs": [], + "regressions": [], + "confidence": "low|medium|high", + "next_experiments": [] + } + } + ``` + +## Interpretation Rules + +- Prefer validation, test, or held-out contexts over training contexts when + ranking runs. +- Treat `aimx query metrics` as summary data: `last`, `min`, `max`, and step + counts. Use `aimx trace --json` when shape, stability, divergence, or late + improvement matters. +- For minimization metrics such as loss or error, compare `min.value` and the + corresponding step. For maximization metrics such as accuracy, F1, AUC, or + IoU, compare `max.value`. +- Separate incomplete or failed runs from strong results before drawing + conclusions. Very low step counts, missing params, or missing validation + metrics should reduce confidence. +- Preserve read-only behavior. Do not run commands that initialize, repair, + migrate, delete, or rewrite Aim repositories during `log_experiment`. + +## Helper Script + +Use `scripts/collect_experiment_snapshot.py` when an agent needs one structured +JSON bundle instead of several manual commands. + +```bash +uv run python skills/aimx/scripts/collect_experiment_snapshot.py \ + --repo data \ + --base-expr "run.experiment == 'cloud-segmentation'" \ + --metric loss \ + --trace-metric loss \ + --param hparam.lr \ + --param model \ + --pretty +``` + +The script uses the current Python interpreter as `python -m aimx` by default. +Pass `--aimx "aimx"` or `--aimx "uv run aimx"` when a different launcher is +needed. It writes only to stdout. + +## Reference + +Read `references/aimx-cli.md` for command details, JSON envelope shapes, and +suggested `log_experiment` evidence fields. diff --git a/skills/aimx/agents/openai.yaml b/skills/aimx/agents/openai.yaml new file mode 100644 index 0000000..c57e0e1 --- /dev/null +++ b/skills/aimx/agents/openai.yaml @@ -0,0 +1,4 @@ +interface: + display_name: "Aimx" + short_description: "Collect Aim experiment evidence" + default_prompt: "Use $aimx to collect params, metrics, traces, and image evidence from an Aim repo before proposing the next experiment." diff --git a/skills/aimx/references/aimx-cli.md b/skills/aimx/references/aimx-cli.md new file mode 100644 index 0000000..48dd125 --- /dev/null +++ b/skills/aimx/references/aimx-cli.md @@ -0,0 +1,233 @@ +# aimx CLI Reference for log_experiment + +Use this reference when building an `autoresearch` `log_experiment` step from a +local Aim repository. All commands below are read-only inspection commands. + +## Installation Requirement + +`aimx` must be installed in the environment that runs the collection commands. +Verify one of these before collecting: + +```bash +aimx --help +python -m aimx --help +``` + +If it is missing, inspect the current project first and use the dependency +manager it already uses. Check project instructions, `pyproject.toml`, lockfiles, +README setup docs, and CI config before choosing a command. + +Examples: + +```bash +# uv-managed project +uv add aimx +uv sync + +# Poetry-managed project +poetry add aimx + +# Pipenv-managed project +pipenv install aimx + +# requirements.txt project +python -m pip install aimx +python -m pip freeze > requirements.txt +``` + +Use the project's normal lockfile/update flow. For this `aimx` repository, +prefer `uv sync`, `uv run ...`, and `uv add aimx` because repo policy requires +`uv`. For a standalone CLI with no project dependency to modify, use the user's +preferred tool manager; use `uv tool install aimx` only when `uv` is already the +chosen or accepted workflow. + +## Repository Paths + +`--repo` accepts either the Aim repository root or the `.aim` metadata +directory. Prefer explicit repo paths so automation does not accidentally read a +user-level or unrelated Aim repository. + +Examples: + +```bash +aimx query params "run.hash != ''" --repo data --json +aimx query params "run.hash != ''" --repo data/.aim --json +``` + +## AimQL Scoping + +Use AimQL in the expression argument. Common run scopes: + +```text +run.hash != '' +run.hash == 'eca37394' +run.experiment == 'cloud-segmentation' +run.name == 'resnet-ft-0420' +``` + +Metric expressions combine run fields with metric fields: + +```text +(run.experiment == 'cloud-segmentation') and metric.name == 'loss' +(run.hash == 'eca37394') and metric.name != '' +``` + +Short run hashes are expanded by `aimx` where supported. + +## Params + +Use params first to understand the design of each run. + +```bash +aimx query params "" --repo --json +aimx query params "" --repo --json --param hparam.lr --param model +``` + +JSON shape: + +```json +{ + "target": "params", + "repo": "data", + "expression": "run.hash != ''", + "runs_count": 2, + "param_keys": ["hparam.lr", "model"], + "runs": [ + { + "hash": "full-run-hash", + "experiment": "experiment-name", + "name": "run-name", + "params": {"hparam.lr": 0.0001, "model": "ResNet"}, + "missing_params": [] + } + ] +} +``` + +Use `missing_params` as a confidence signal when comparing runs. + +## Metric Summaries + +Use metric summaries to rank runs cheaply. + +```bash +aimx query metrics "" --repo --json +aimx query metrics "" --repo --json --steps 100:500 +aimx query metrics "" --repo --json --epochs 1:10 +``` + +JSON shape: + +```json +{ + "target": "metrics", + "repo": "data", + "expression": "metric.name == 'loss'", + "runs_count": 1, + "metrics_count": 2, + "runs": [ + { + "hash": "full-run-hash", + "experiment": "experiment-name", + "name": "run-name", + "metrics": [ + { + "name": "loss", + "context": {"subset": "val"}, + "steps": 110, + "last": {"value": 0.43, "step": 110}, + "min": {"value": 0.32, "step": 60}, + "max": {"value": 0.48, "step": 107} + } + ] + } + ] +} +``` + +Use `context` to distinguish train, val, test, dataset split, seed, or other +metric dimensions. + +## Traces + +Use traces when a summary hides important behavior such as late overfitting, +instability, divergence, or plateauing. + +```bash +aimx trace "" --repo --json +aimx trace "" --repo --json --tail 50 +aimx trace "" --repo --json --steps 100:500 --every 5 +``` + +JSON shape: + +```json +[ + { + "run": { + "hash": "full-run-hash", + "experiment": "experiment-name", + "name": "run-name" + }, + "metric": "loss", + "context": {"subset": "val"}, + "count": 50, + "steps": [1, 2, 3], + "epochs": [1.0, 2.0, 3.0], + "values": [0.9, 0.7, 0.5] + } +] +``` + +If no metrics match, current `aimx trace --json` may print a text message +instead of JSON. Treat that as no trace evidence rather than a parsing failure. + +## Images + +Use images for qualitative checks such as sample predictions, masks, generated +outputs, confusion examples, or visual regressions. + +```bash +aimx query images "images" --repo --json +aimx query images "images" --repo --json --head 20 +aimx query images "images" --repo --json --epochs 10:50 +``` + +JSON shape: + +```json +{ + "target": "images", + "repo": "data", + "expression": "images", + "count": 1, + "rows": [ + { + "run_hash": "full-run-hash", + "experiment": "experiment-name", + "name": "example", + "context": {"epoch": 10, "subset": "val"} + } + ] +} +``` + +Use image metadata in automated logs; render images manually only when the user +asks for visual inspection. + +## log_experiment Evidence Fields + +Recommended fields for autoresearch output: + +- `run_scope`: AimQL expression and repo path used for evidence. +- `params`: selected hyperparameters and model identifiers per run. +- `metric_summary`: objective metric summaries per run and context. +- `trace_evidence`: sampled value arrays for decisive metrics. +- `image_evidence`: image row counts and representative contexts. +- `ranking`: best run per objective, objective direction, and tie-breakers. +- `regressions`: runs worse than baseline, incomplete runs, missing metrics, or + suspicious curves. +- `next_experiments`: concrete parameter changes grounded in the evidence. + +Keep conclusions tied to the collected data. When metric direction is unknown, +state the assumption before ranking. diff --git a/skills/aimx/scripts/collect_experiment_snapshot.py b/skills/aimx/scripts/collect_experiment_snapshot.py new file mode 100755 index 0000000..271db5e --- /dev/null +++ b/skills/aimx/scripts/collect_experiment_snapshot.py @@ -0,0 +1,217 @@ +#!/usr/bin/env python3 +"""Collect a read-only aimx experiment snapshot for autoresearch logs.""" + +from __future__ import annotations + +import argparse +import json +import shlex +import subprocess +import sys +from dataclasses import dataclass +from typing import Any + + +@dataclass +class CommandPayload: + argv: list[str] + ok: bool + data: Any | None = None + message: str | None = None + error: str | None = None + + def as_dict(self) -> dict[str, Any]: + payload: dict[str, Any] = {"argv": self.argv, "ok": self.ok} + if self.data is not None: + payload["data"] = self.data + if self.message: + payload["message"] = self.message + if self.error: + payload["error"] = self.error + return payload + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Collect params, metric summaries, optional traces, and image metadata with aimx." + ) + parser.add_argument("--repo", default=".", help="Aim repo root or .aim metadata path.") + parser.add_argument( + "--base-expr", + default="run.hash != ''", + help="AimQL run scope used for params and generated metric expressions.", + ) + parser.add_argument( + "--metric", + action="append", + default=[], + help="Metric name to summarize; repeatable. Defaults to all metrics when no metric expression is supplied.", + ) + parser.add_argument( + "--metric-expr", + action="append", + default=[], + help="Full AimQL metric expression to summarize; repeatable.", + ) + parser.add_argument( + "--trace-metric", + action="append", + default=[], + help="Metric name to collect trace samples for; repeatable.", + ) + parser.add_argument( + "--trace-expr", + action="append", + default=[], + help="Full AimQL trace expression; repeatable.", + ) + parser.add_argument( + "--trace-tail", + type=int, + default=50, + help="Tail sample count for trace commands. Use 0 to disable tail sampling.", + ) + parser.add_argument( + "--param", + action="append", + default=[], + help="Parameter key to select; repeatable. Omit to let aimx discover params.", + ) + parser.add_argument( + "--include-images", + action="store_true", + help="Also collect image metadata with aimx query images.", + ) + parser.add_argument( + "--image-expr", + default="images", + help="AimQL image expression used when --include-images is set.", + ) + parser.add_argument( + "--image-head", + type=int, + default=20, + help="Limit image metadata rows when --include-images is set.", + ) + parser.add_argument( + "--aimx", + default=f"{sys.executable} -m aimx", + help='Launcher for aimx, for example "aimx", "uv run aimx", or "python -m aimx".', + ) + parser.add_argument("--timeout", type=int, default=60, help="Per-command timeout in seconds.") + parser.add_argument("--pretty", action="store_true", help="Pretty-print JSON output.") + return parser.parse_args() + + +def aimql_string(value: str) -> str: + return "'" + value.replace("\\", "\\\\").replace("'", "\\'") + "'" + + +def metric_expression(base_expr: str, metric_name: str) -> str: + return f"({base_expr}) and metric.name == {aimql_string(metric_name)}" + + +def run_json(base_cmd: list[str], args: list[str], timeout: int) -> CommandPayload: + argv = [*base_cmd, *args] + try: + completed = subprocess.run( + argv, + check=False, + capture_output=True, + text=True, + timeout=timeout, + ) + except FileNotFoundError as exc: + return CommandPayload(argv=argv, ok=False, error=str(exc)) + except subprocess.TimeoutExpired as exc: + return CommandPayload(argv=argv, ok=False, error=f"Timed out after {timeout}s: {exc}") + + stdout = completed.stdout.strip() + stderr = completed.stderr.strip() + if completed.returncode != 0: + return CommandPayload( + argv=argv, + ok=False, + message=stdout or None, + error=stderr or f"Command exited with status {completed.returncode}", + ) + + if not stdout: + return CommandPayload(argv=argv, ok=True, data=None) + + try: + return CommandPayload(argv=argv, ok=True, data=json.loads(stdout)) + except json.JSONDecodeError: + return CommandPayload(argv=argv, ok=True, message=stdout) + + +def collect_snapshot(args: argparse.Namespace) -> tuple[dict[str, Any], bool]: + base_cmd = shlex.split(args.aimx) + metric_exprs = list(args.metric_expr) + metric_exprs.extend(metric_expression(args.base_expr, name) for name in args.metric) + if not metric_exprs: + metric_exprs.append(f"({args.base_expr}) and metric.name != ''") + + trace_exprs = list(args.trace_expr) + trace_exprs.extend(metric_expression(args.base_expr, name) for name in args.trace_metric) + + params_args = ["query", "params", args.base_expr, "--repo", args.repo, "--json"] + for key in args.param: + params_args.extend(["--param", key]) + + params = run_json(base_cmd, params_args, args.timeout) + + metrics = [ + run_json(base_cmd, ["query", "metrics", expr, "--repo", args.repo, "--json"], args.timeout) + for expr in metric_exprs + ] + + trace_common = ["--repo", args.repo, "--json"] + if args.trace_tail > 0: + trace_common.extend(["--tail", str(args.trace_tail)]) + traces = [ + run_json(base_cmd, ["trace", expr, *trace_common], args.timeout) + for expr in trace_exprs + ] + + images = None + if args.include_images: + image_args = [ + "query", + "images", + args.image_expr, + "--repo", + args.repo, + "--json", + "--head", + str(args.image_head), + ] + images = run_json(base_cmd, image_args, args.timeout) + + snapshot: dict[str, Any] = { + "repo": args.repo, + "base_expr": args.base_expr, + "read_only": True, + "params": params.as_dict(), + "metrics": [item.as_dict() for item in metrics], + "traces": [item.as_dict() for item in traces], + "images": images.as_dict() if images else None, + } + + failures = [params, *metrics, *traces] + if images: + failures.append(images) + ok = all(item.ok for item in failures) + return snapshot, ok + + +def main() -> int: + args = parse_args() + snapshot, ok = collect_snapshot(args) + indent = 2 if args.pretty else None + print(json.dumps(snapshot, indent=indent, sort_keys=args.pretty)) + return 0 if ok else 2 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/specs/004-run-params-query/checklists/requirements.md b/specs/004-run-params-query/checklists/requirements.md new file mode 100644 index 0000000..6a52f72 --- /dev/null +++ b/specs/004-run-params-query/checklists/requirements.md @@ -0,0 +1,42 @@ +# Specification Quality Checklist: Run Params Query And Experiment Comparison + +**Purpose**: Validate specification completeness and quality before proceeding to planning +**Created**: 2026-04-24 +**Feature**: [spec.md](../spec.md) + +## Content Quality + +- [x] No implementation details (languages, frameworks, APIs) +- [x] Focused on user value and business needs +- [x] Written for non-technical stakeholders +- [x] All mandatory sections completed + +## Requirement Completeness + +- [x] No [NEEDS CLARIFICATION] markers remain +- [x] Requirements are testable and unambiguous +- [x] Success criteria are measurable +- [x] Success criteria are technology-agnostic (no implementation details) +- [x] All acceptance scenarios are defined +- [x] Edge cases are identified +- [x] Scope is clearly bounded +- [x] Dependencies and assumptions identified + +## Feature Readiness + +- [x] All functional requirements have clear acceptance criteria +- [x] User scenarios cover primary flows +- [x] Feature meets measurable outcomes defined in Success Criteria +- [x] No implementation details leak into specification + +## Notes + +- Validation pass 1 completed on 2026-04-24. +- No clarification markers remain. The spec assumes experiment-name comparison + means filtering, grouping, sorting, and labeling params results by experiment + name rather than building a broader experiment analytics dashboard. +- Implementation verification completed on 2026-04-24. +- Quickstart sections 2-7 were smoke-checked against `data/.aim`; invalid repo + and invalid `--param` usage returned the expected exit code 2 errors. +- Full regression suite passed with `uv run pytest -q`: 259 passed, 1 skipped, + 15 warnings. diff --git a/specs/004-run-params-query/contracts/cli-output.md b/specs/004-run-params-query/contracts/cli-output.md new file mode 100644 index 0000000..9d212e7 --- /dev/null +++ b/specs/004-run-params-query/contracts/cli-output.md @@ -0,0 +1,107 @@ +# CLI Output Contract: `aimx query params` + +**Feature**: `004-run-params-query` + +This contract defines the observable CLI behavior for run parameter querying. + +## Command Shape + +```text +aimx query params [--repo ] [--json] [--oneline | --plain] + [--no-color] [--verbose] [--param ]... +``` + +## Owned Options + +| Option | Applies To | Behavior | +|--------|------------|----------| +| `--repo ` | params | Uses a local Aim repository root or `.aim` directory. Defaults to the current directory. | +| `--json` | params | Emits a JSON document and no rich formatting. | +| `--oneline` / `--plain` | params | Emits tab-separated rows suitable for shell pipelines. | +| `--no-color` | params | Disables ANSI styling in human-readable output. | +| `--verbose` | params | Includes expanded header details when supported by the renderer. | +| `--param ` | params only | Adds one flattened parameter key to the comparison view. May be repeated. | + +Using `--param` with `metrics` or `images` is an error with exit code `2`. + +## Query Expression + +The expression is passed to Aim's run-query evaluator after existing short +`run.hash` literals are expanded. + +Examples: + +```bash +aimx query params "run.experiment == 'cloud-segmentation'" --repo data +aimx query params "run.hparam.lr == 0.0001" --repo data --param hparam.lr +aimx query params "run.hash == 'eca37394'" --repo data --json +``` + +## Human-Readable Output + +Default output is a comparison table with: + +- repository and query summary +- run hash +- experiment label +- run name when available +- selected parameter columns, or a readable default set of discovered params + +Missing requested params are displayed as `-`. Runs with no params remain in +the output and are marked clearly. + +## Plain Output + +Plain output emits one tab-separated row per matched run: + +```text + ... +``` + +Missing requested params are emitted as `key=-`. + +## JSON Output + +JSON output uses this stable top-level envelope: + +```json +{ + "target": "params", + "repo": "data", + "expression": "run.experiment == 'cloud-segmentation'", + "runs_count": 1, + "param_keys": ["hparam.lr", "hparam.optimizer"], + "runs": [ + { + "hash": "eca37394eeb84f48a5d2d736", + "experiment": "cloud-segmentation", + "name": "ucloudnet-pre-0503", + "params": { + "hparam.lr": 0.0001, + "hparam.optimizer": "AdamW" + }, + "missing_params": [] + } + ] +} +``` + +## Exit Status + +| Condition | Exit Status | Output | +|-----------|-------------|--------| +| Valid query with one or more matches | `0` | Rendered params result | +| Valid query with zero matches | `0` | Explicit no-results message or empty JSON envelope | +| Run has no params | `0` | Run remains visible with no-params marker | +| Missing requested param on a run | `0` | Missing value marker for that run | +| Missing repository path | `2` | Actionable error on stderr | +| Invalid query expression | `2` | Actionable error on stderr | +| Invalid `--param` usage | `2` | Actionable error on stderr | + +## Non-Regression Requirements + +- `aimx query metrics ...` JSON, plain, and rich output shapes remain unchanged. +- `aimx query images ...` JSON, plain, rich, and inline image behavior remain + unchanged. +- Commands outside the owned `aimx` surfaces continue to delegate to native + `aim`. diff --git a/specs/004-run-params-query/data-model.md b/specs/004-run-params-query/data-model.md new file mode 100644 index 0000000..5d5ced0 --- /dev/null +++ b/specs/004-run-params-query/data-model.md @@ -0,0 +1,121 @@ +# Phase 1 Data Model: Run Params Query And Experiment Comparison + +**Feature**: `004-run-params-query` + +## Params Query Invocation + +Represents one CLI request to inspect run parameters. + +**Fields**: + +- `target`: literal `params` +- `expression`: Aim run query expression supplied by the user +- `repo_path`: local repository root or `.aim` path +- `output_mode`: one of rich, plain/oneline, or JSON +- `param_keys`: ordered tuple of user-requested flattened parameter keys + +**Validation Rules**: + +- `expression` must not be empty. +- `repo_path` must exist and is normalized so a `.aim` path resolves to its + parent repository root. +- `param_keys` may be empty; when present, each key must be non-empty after + trimming whitespace. +- Duplicate `param_keys` are rejected after trimming to avoid duplicate output + columns. +- `--param` is valid only for the `params` target. + +## Run Identity + +Identifies one matched Aim run in all output modes. + +**Fields**: + +- `hash`: full Aim run hash +- `short_hash`: display helper derived from the full hash +- `experiment`: experiment name, nullable +- `name`: run display name, nullable +- `creation_time`: run creation timestamp, nullable + +**Relationships**: + +- One Run Identity belongs to one Run Parameter Set. +- Run Identity is used for sorting and grouping display output. + +**Validation Rules**: + +- `hash` must be present for every returned run. +- Missing `experiment`, `name`, or `creation_time` values are allowed and must + not hide the run. + +## Run Parameter Set + +The parameter data associated with one matched run. + +**Fields**: + +- `run`: Run Identity +- `params`: flattened dictionary of parameter key to value +- `selected_keys`: ordered tuple of keys requested for comparison +- `missing_keys`: ordered tuple of requested keys absent from this run + +**Relationships**: + +- Produced by collecting metadata attributes from one Aim run. +- Included as one row in a Params Query Result. + +**Validation Rules**: + +- Nested metadata dictionaries are flattened with dot-separated paths. +- Scalar values are preserved where possible. +- Lists or nested non-scalar values are preserved in JSON and shortened for + human-readable output. +- Missing requested keys are represented explicitly rather than dropping the + run. + +## Experiment Label + +The experiment grouping value used in params comparison output. + +**Fields**: + +- `value`: experiment name from the run, nullable +- `display_value`: non-empty display fallback for missing names + +**Relationships**: + +- Derived from Run Identity. +- Used to sort or group Params Query Result rows. + +**Validation Rules**: + +- Empty or missing experiment names must remain distinguishable from real + experiment names. +- Sorting must remain deterministic when experiment labels collide. + +## Params Query Result + +The complete response returned by the params query workflow. + +**Fields**: + +- `target`: literal `params` +- `repo`: normalized repository path +- `expression`: original query expression +- `runs_count`: number of matched runs +- `param_keys`: keys selected for comparison or displayed by default +- `runs`: ordered list of Run Parameter Sets +- `omitted_param_keys`: keys omitted from the human-readable default view, if + any + +**Relationships**: + +- Contains zero or more Run Parameter Sets. +- Rendered into rich, plain/oneline, or JSON output. + +**Validation Rules**: + +- Zero-result queries produce an empty `runs` list and a successful command + result. +- JSON output must include full selected parameter data for every returned run. +- Human-readable output may shorten long values but must not hide run identity. diff --git a/specs/004-run-params-query/plan.md b/specs/004-run-params-query/plan.md new file mode 100644 index 0000000..053c6a6 --- /dev/null +++ b/specs/004-run-params-query/plan.md @@ -0,0 +1,175 @@ +# Implementation Plan: Run Params Query And Experiment Comparison + +**Branch**: `004-run-params-query` | **Date**: 2026-04-24 | **Spec**: [spec.md](/Users/blizhan/data/code/github/aimx/specs/004-run-params-query/spec.md) +**Input**: Feature specification from `/Users/blizhan/data/code/github/aimx/specs/004-run-params-query/spec.md` + +## Summary + +Extend the existing `aimx`-owned `query` command with a read-only `params` +target for run-level parameter comparison. The implementation will route +`aimx query params ` through Aim's run-query surface, collect each +matching run's metadata attributes from the local repository, flatten nested +parameter dictionaries into stable dotted keys, and render the result as a +terminal comparison table, tab-separated plain output, or a stable JSON +envelope. The feature stays within the current companion CLI model: no +repository mutation, no native `aim` replacement, no new runtime dependency, +and no behavior change for existing `metrics`, `images`, `trace`, or passthrough +commands. + +## Technical Context + +**Language/Version**: Python 3.12 for development, runtime support `>=3.10,<3.13` +**Primary Dependencies**: Python standard library, `numpy>=1.24`, `rich>=13.7`, `textual-image>=0.12.0`, existing Aim SDK usage for owned query commands via the local/dev environment; no new dependency planned +**Storage**: Existing local Aim repositories on disk, read-only; run params are read from Aim run metadata attributes under `.aim` +**Testing**: pytest unit, integration, and contract suites; sample Aim +repository rooted at `/Users/blizhan/data/code/github/aimx/data` for end-to-end +validation +**Target Platform**: Terminal-first CLI for local shells, SSH sessions, scripts, +and CI on Python-supported platforms +**Project Type**: Single-project Python CLI application +**Performance Goals**: Params queries over the sample repository complete in a +single command invocation; comparison of at least 3 selected params across at +least 3 runs remains readable in terminal output; machine-readable output +includes all returned run rows without truncating selected values +**Constraints**: Read-only; preserve native Aim passthrough behavior; keep +existing `query metrics` and `query images` output contracts stable; avoid +loading metric/image blobs for params queries; support repo root and `.aim` +paths consistently +**Scale/Scope**: One new `query` target (`params`), repeatable `--param KEY` +selection, run-param collection helper, params renderers, help/README updates, +and focused tests + +## Constitution Check + +*GATE: Must pass before Phase 0 research. Re-check after Phase 1 design.* + +- [x] Safe coexistence: params queries only read local Aim run metadata; no + normal-path change modifies the installed `aim` package, replaces the + native `aim` executable, or mutates `.aim` repo data. +- [x] Ownership boundary: `aimx` newly owns only the `query params` target and + the `--param KEY` option for that target; existing `metrics`, `images`, + `trace`, help, doctor, version, and native passthrough boundaries remain + unchanged. +- [x] Read-only default: all params query behavior is inspection-only and uses + no Aim mutation APIs. +- [x] CLI-first contract: the plan defines rich terminal output, tab-separated + plain output, and JSON output so shell, SSH, automation, and CI users can + consume the feature. +- [x] Compatibility plan: design reuses the same repo normalization, short-hash + expansion, Aim query error handling, and pytest suites used by existing + owned query commands. + +## Project Structure + +### Documentation (this feature) + +```text +/Users/blizhan/data/code/github/aimx/specs/004-run-params-query/ +├── plan.md +├── research.md +├── data-model.md +├── quickstart.md +├── contracts/ +│ └── cli-output.md +├── checklists/ +│ └── requirements.md +└── tasks.md # created later by /speckit.tasks +``` + +### Source Code (repository root) + +```text +/Users/blizhan/data/code/github/aimx/ +├── README.md # document query params examples +├── src/aimx/ +│ ├── commands/ +│ │ ├── help.py # add params usage to owned-command help +│ │ └── query.py # add params target, --param parsing, dispatch +│ ├── aim_bridge/ +│ │ └── run_params.py # NEW: collect and normalize run params +│ └── rendering/ +│ └── params_views.py # NEW: rich/plain/JSON params renderers +└── tests/ + ├── contract/ + │ └── test_query_contract.py # add params output contract coverage + ├── integration/ + │ └── test_query_command.py # add sample-repo params query coverage + └── unit/ + ├── test_query_helpers.py # add params target and --param parsing tests + └── test_run_params.py # NEW: flattening, selection, missing values +``` + +**Structure Decision**: Keep the existing single-project CLI layout. Add +`aim_bridge/run_params.py` so params extraction is not buried in the existing +metric/image bridge, and add `rendering/params_views.py` so query renderers stay +focused by target. `commands/query.py` remains the thin orchestration point that +parses CLI arguments, normalizes repository paths, invokes the bridge, and +chooses the output renderer. + +## Phase 0: Research Summary + +Phase 0 decisions are captured in [research.md](/Users/blizhan/data/code/github/aimx/specs/004-run-params-query/research.md). Key outcomes: + +- Use Aim `Repo.query_runs(expression, report_mode=QueryReportMode.DISABLED)` + for params queries because it returns run-level matches without iterating + metric or image sequences. +- Extract user-visible params from each matched run's + `run.meta_run_tree.collect()["attrs"]`, then flatten nested dictionaries into + dotted keys such as `hparam.lr` and `hparam.optimizer`. +- Support a repeatable `--param KEY` option for focused comparison. Keys are + matched after flattening, so users can request nested params by dotted path. +- Preserve existing query expression behavior by resolving short `run.hash` + literals before forwarding the expression to Aim. Experiment-name filtering + uses Aim's existing run expression fields, for example + `run.experiment == 'cloud-segmentation'`. +- Sort/group params output by experiment label, then run name, then run hash to + make experiment comparison stable and easy to scan. + +## Phase 1: Design Summary + +- Extend `SUPPORTED_TARGETS` with `params` and add `param_keys: tuple[str, ...]` + to `QueryInvocation`. Parse `--param KEY` as a repeatable params-only option; + reject missing values, empty keys, duplicate keys after trimming, and usage on + `metrics` or `images`. +- Introduce `RunParams` in `aim_bridge/run_params.py` with `run: RunMeta`, + `params: dict[str, Any]`, `selected_keys: tuple[str, ...]`, and + `missing_keys: tuple[str, ...]`. +- Implement `collect_run_params(expression, repo_path, selected_keys)`: + normalize short hashes, run `Repo.query_runs`, iterate `collection.run`, + extract run metadata with the same semantics as existing query collectors, + flatten metadata attrs, apply selected-key filtering, and never call Aim write + APIs. +- Implement `params_views.py` renderers: + - Rich table: one row per run, columns for run hash, experiment, run name, and + selected/default params; missing values render as `-`; long values are + shortened for terminal fit. + - Plain output: tab-separated rows with repo, short hash, experiment, run + name, and `key=value` cells. + - JSON output: stable envelope with `target`, `repo`, `expression`, + `runs_count`, `param_keys`, and `runs`. +- Default key behavior: when no `--param` is provided, use the sorted union of + flattened param keys for the human/plain view, capped to a readable default + column budget with an omitted-count note; JSON includes the complete params + object for every run. +- Update help and README with examples for all three output modes and for + experiment-name filtering. + +## Post-Design Constitution Check + +- [x] Safe coexistence: design reads run metadata through Aim's public run query + surface and does not modify the installed package, executable, or repo. +- [x] Ownership boundary: all new behavior is inside `aimx query params`; no + unowned native Aim commands are intercepted. +- [x] Read-only default: bridge code extracts metadata attributes only and + avoids `run.set`, tracking, artifact logging, or migration APIs. +- [x] CLI-first contract: rich, plain, and JSON outputs are defined in + `/Users/blizhan/data/code/github/aimx/specs/004-run-params-query/contracts/cli-output.md`. +- [x] Compatibility: existing metrics/images query contracts and passthrough + tests remain part of the validation set; params query uses the same Aim + package path already required by owned query commands. + +## Complexity Tracking + +No constitution violations; no exceptional complexity requires justification. +The main design choice is adding two small focused modules instead of extending +the already-mixed metric/image files further. diff --git a/specs/004-run-params-query/quickstart.md b/specs/004-run-params-query/quickstart.md new file mode 100644 index 0000000..c2fa71b --- /dev/null +++ b/specs/004-run-params-query/quickstart.md @@ -0,0 +1,112 @@ +# Quickstart: Run Params Query And Experiment Comparison + +**Feature**: `004-run-params-query` + +## 1. Prepare The Environment + +```bash +uv sync +``` + +The sample Aim repository should exist at: + +```text +/Users/blizhan/data/code/github/aimx/data/.aim +``` + +## 2. Query Params For All Matching Runs + +```bash +uv run aimx query params "run.experiment == 'cloud-segmentation'" --repo data +``` + +Expected result: + +- exit code `0` +- output includes `Repo: data` +- output includes run hashes and experiment labels +- output includes visible parameter values or a no-params marker per run + +## 3. Compare Selected Params + +```bash +uv run aimx query params "run.experiment == 'cloud-segmentation'" --repo data \ + --param hparam.lr \ + --param hparam.optimizer \ + --param hparam.weight_decay +``` + +Expected result: + +- exit code `0` +- the selected keys appear as comparable columns or cells +- missing values are shown as `-` + +## 4. Use Params In The Query Expression + +```bash +uv run aimx query params "run.hparam.lr == 0.0001" --repo data --param hparam.lr +``` + +Expected result: + +- exit code `0` +- every returned row matches the Aim run-query expression + +## 5. Machine-Readable Output + +```bash +uv run aimx query params "run.experiment == 'cloud-segmentation'" --repo data \ + --param hparam.lr \ + --param hparam.optimizer \ + --json +``` + +Expected result: + +- valid JSON +- top-level `target` is `params` +- `runs_count` equals the length of `runs` +- each run contains `hash`, `experiment`, `name`, `params`, and + `missing_params` + +## 6. Plain Output For Pipelines + +```bash +uv run aimx query params "run.experiment == 'cloud-segmentation'" --repo data \ + --param hparam.lr \ + --plain +``` + +Expected result: + +- one tab-separated row per matched run +- no ANSI styling +- cells include `hparam.lr=` or `hparam.lr=-` + +## 7. Error Checks + +Invalid repository: + +```bash +uv run aimx query params "run.hash != ''" --repo missing-repo +``` + +Expected: exit code `2`, actionable repository error on stderr. + +Invalid params option usage: + +```bash +uv run aimx query metrics "metric.name == 'loss'" --repo data --param hparam.lr +``` + +Expected: exit code `2`, actionable option error on stderr. + +## 8. Test Commands + +```bash +uv run pytest tests/unit/test_query_helpers.py tests/unit/test_run_params.py -q +uv run pytest tests/integration/test_query_command.py -q +uv run pytest tests/contract/test_query_contract.py -q +uv run pytest -q +``` diff --git a/specs/004-run-params-query/research.md b/specs/004-run-params-query/research.md new file mode 100644 index 0000000..a824abd --- /dev/null +++ b/specs/004-run-params-query/research.md @@ -0,0 +1,127 @@ +# Phase 0 Research: Run Params Query And Experiment Comparison + +**Feature**: `004-run-params-query` +**Date**: 2026-04-24 + +## Decision: Add `aimx query params` As A New Owned Query Target + +Use `aimx query params ` for run parameter inspection and +comparison. + +**Rationale**: The existing command already uses target names (`metrics`, +`images`) to select a data shape. Params are run-level data rather than metric +or image sequences, so a third target keeps behavior discoverable and avoids +overloading the existing outputs. + +**Alternatives considered**: + +- Add params columns to `query metrics`: rejected because metrics queries return + metric series, not runs, and would mix two result granularities. +- Add a separate top-level `aimx params` command: rejected because repository + selection, query expressions, output modes, and error handling already exist + under `aimx query`. + +## Decision: Use Aim `Repo.query_runs()` For Matching Runs + +Collect params by calling `Repo.query_runs(expression, +report_mode=QueryReportMode.DISABLED)` and iterating the returned run +collections. + +**Rationale**: Local probing against the sample repository showed Aim exposes +`query_runs` alongside `query_metrics` and `query_images`. It returns run-level +matches without requiring traversal of metric/image sequences, which keeps this +feature read-only and avoids unnecessary blob loading. + +**Alternatives considered**: + +- Derive runs from metric query results: rejected because runs without matching + metrics would be excluded and query expressions would require metric fields. +- Walk repository internals directly: rejected because it would increase + coupling to Aim storage internals beyond the existing public query surface. + +## Decision: Extract Params From Run Metadata Attributes + +Read params from `run.meta_run_tree.collect()["attrs"]` and flatten nested +dictionaries into dotted keys such as `hparam.lr`, `hparam.optimizer`, and +`model`. + +**Rationale**: The sample repository stores user-facing run attributes under +`attrs`; for example, one run has `attrs.hparam.lr`, `attrs.hparam.optimizer`, +and `attrs.model`. Flattening produces stable CLI column names while preserving +nested values for JSON. + +**Alternatives considered**: + +- Use `run.hparams` or `run.params` attributes: rejected because local probing + showed those attributes are absent for the current Aim SDK/sample repository. +- Output raw nested dictionaries only: rejected because side-by-side terminal + comparison needs stable scalar-ish columns. + +## Decision: Select Params With Repeatable `--param KEY` + +Support focused comparison via one or more `--param KEY` options, where `KEY` +is a flattened dotted parameter path. + +**Rationale**: A repeatable option avoids comma parsing ambiguities for param +names, matches common CLI convention, and stays simple in the current manual +argument parser. It also lets users compare nested Aim params directly: +`--param hparam.lr --param hparam.optimizer`. + +**Alternatives considered**: + +- `--params a,b,c`: rejected for v1 because it introduces escaping questions + for commas and nested values. +- Positional param names after the expression: rejected because the existing + parser treats tokens after the expression as flags, and unflagged values would + make errors less clear. + +## Decision: Preserve Aim Query Expression Semantics + +Forward the user expression to Aim after the existing short-hash expansion. For +experiment comparison, users can write expressions such as +`run.experiment == 'cloud-segmentation'`; for param filtering, Aim supports +expressions such as `run.hparam.lr == 0.0001` in the sample repository. + +**Rationale**: Reusing AimQL-style expressions keeps `aimx` a companion CLI and +avoids inventing a second filtering language. Existing short-hash expansion is +already part of the owned query contract and should apply consistently. + +**Alternatives considered**: + +- Add custom `--experiment` filtering: rejected for v1 because Aim expressions + already support experiment fields and custom filters would duplicate query + language behavior. +- Post-filter runs in `aimx`: rejected because it would split filtering logic + between Aim and `aimx`, increasing edge cases. + +## Decision: Use Three Output Modes Matching Existing Query Commands + +Render params as rich table output by default, tab-separated rows for +`--plain`/`--oneline`, and a stable JSON envelope for `--json`. + +**Rationale**: This mirrors existing `metrics` and `images` query behavior, +keeps terminal comparison ergonomic, and provides scriptable output without +introducing a separate export feature. + +**Alternatives considered**: + +- JSON only: rejected because the feature is explicitly about convenient + comparison in terminal workflows. +- Rich table only: rejected because the constitution requires scriptable + interfaces where structured automation is realistic. + +## Decision: Sort Params Results For Experiment Comparison + +Order human-readable and plain params results by experiment label, then run +name, then full run hash. + +**Rationale**: The requested experiment-name comparison is easier to scan when +related runs stay together. The run hash remains present so duplicate run names +or empty experiment names are still distinguishable. + +**Alternatives considered**: + +- Preserve Aim iterator order only: rejected because it is less useful for + cross-experiment comparison and may vary with repository internals. +- Add a sorting flag in v1: rejected as unnecessary scope until real users need + alternate ordering. diff --git a/specs/004-run-params-query/spec.md b/specs/004-run-params-query/spec.md new file mode 100644 index 0000000..e716bf5 --- /dev/null +++ b/specs/004-run-params-query/spec.md @@ -0,0 +1,206 @@ +# Feature Specification: Run Params Query And Experiment Comparison + +**Feature Branch**: `004-run-params-query` +**Created**: 2026-04-24 +**Status**: Draft +**Input**: User description: "支持Run Params的query,可以查到Params方便对比多个Runs的参数,以及实验名相关对比" + +## User Scenarios & Testing *(mandatory)* + +### User Story 1 - Query Run Parameters From CLI (Priority: P1) + +As an Aim user working in the terminal, I want `aimx query` to return run +parameter values for matching runs so I can inspect training configuration +differences without opening the Aim UI or writing Python. + +**Why this priority**: The central value of this feature is making run params +queryable from the CLI, especially when comparing multiple runs. + +**Independent Test**: Run a params query against the local test repository rooted +at `data` and confirm that matching runs include their run identity, +experiment name, and parameter values in the output. + +**Acceptance Scenarios**: + +1. **Given** a local Aim repository contains runs with recorded parameters, + **When** the user runs a params query for matching runs, **Then** the output + lists each matching run with its available run parameters. +2. **Given** the query matches multiple runs, **When** the command completes, + **Then** the user can compare parameter values across those runs in a single + terminal result. +3. **Given** a matching run has no recorded parameters, **When** it appears in + the result set, **Then** the output clearly marks that run as having no + parameter values instead of failing the whole query. + +--- + +### User Story 2 - Compare Selected Parameters Across Runs (Priority: P1) + +As a user comparing experiments, I want to focus the params output on selected +parameter names so I can quickly see which hyperparameters or configuration +values differ across runs. + +**Why this priority**: Large runs can contain many parameter fields. Comparison +is only useful if the user can narrow the view to the parameter keys that matter +for the decision at hand. + +**Independent Test**: Run a params query that requests a small set of parameter +names and verify that each matching run is displayed with comparable values for +those names, including clear placeholders for missing values. + +**Acceptance Scenarios**: + +1. **Given** multiple matching runs have overlapping parameter names, **When** + the user requests specific parameter names, **Then** the output aligns those + names across all matching runs for side-by-side comparison. +2. **Given** one run is missing a requested parameter, **When** results are + rendered, **Then** that missing value is shown explicitly without hiding the + run. +3. **Given** the user requests machine-readable output, **When** the params + query succeeds, **Then** the output includes a stable structured + representation of each run and its parameter values for downstream scripts. + +--- + +### User Story 3 - Compare Runs By Experiment Name (Priority: P2) + +As a user tracking multiple experiments, I want params query results to include +and support filtering by experiment name so I can compare runs within one +experiment or across related experiments. + +**Why this priority**: Experiment names are often the natural grouping for +analysis. This story extends the core params query into the experiment-level +comparison requested by the user. + +**Independent Test**: Run params queries that target one experiment name and +multiple experiment names, then verify that results expose experiment labels and +make cross-experiment comparison possible. + +**Acceptance Scenarios**: + +1. **Given** runs belong to different experiments, **When** the user queries + params, **Then** each result includes the experiment name associated with the + run. +2. **Given** the user filters for a specific experiment name, **When** the query + runs, **Then** only runs from the requested experiment are returned. +3. **Given** the user compares related experiment names, **When** the command + completes, **Then** the output groups or sorts results so experiment-level + differences are easy to scan. + +### Edge Cases + +- The query expression is valid but matches zero runs. +- Some matching runs do not have any params recorded. +- Some requested parameter names exist on only a subset of matching runs. +- Parameter values include nested structures, long strings, numbers, booleans, + or null-like values. +- Two or more runs share the same display name but have different run hashes or + experiment names. +- Experiment names are missing, empty, duplicated, or differ only by case. +- The result set is large enough that terminal output must remain concise while + still supporting complete machine-readable output. +- A repository path may point to either a repository root such as `data` or an + Aim metadata directory such as `data/.aim`. + +## Constitution Alignment *(mandatory)* + +- **CA-001 Safety & Mutability**: This feature is read-only. It inspects run + parameters and experiment metadata from existing Aim repositories and MUST NOT + modify `.aim` data, run records, artifacts, or the installed native `aim` + package. +- **CA-002 Ownership Boundary**: `aimx` owns only the new params query behavior + under the existing `query` surface. Existing metrics and images query behavior + remains unchanged, and all commands outside the explicitly owned `aimx` + command surfaces continue to pass through to native Aim. +- **CA-003 CLI & Output Contract**: The feature must work in local shells, SSH + sessions, scripts, and CI. Human-readable output must support quick terminal + comparison, and machine-readable output must remain stable enough for scripts + and tests. + +## Requirements *(mandatory)* + +### Functional Requirements + +- **FR-001**: The system MUST provide a read-only params query workflow under + `aimx query` for inspecting run parameter values in local Aim repositories. +- **FR-002**: The params query workflow MUST accept a user-provided run query + expression and evaluate it against the selected local repository. +- **FR-003**: Each returned run MUST include a stable run identifier, + experiment name when available, run name when available, and the matching run + parameter values. +- **FR-004**: Users MUST be able to request specific parameter names for focused + comparison across matching runs. +- **FR-005**: When no specific parameter names are requested, the system MUST + provide a useful default params view that identifies available parameter keys + and exposes enough values for users to understand the matched runs. +- **FR-006**: The system MUST represent missing parameter values explicitly when + a requested parameter does not exist on a matching run. +- **FR-007**: The system MUST include experiment names in params query output + and MUST support queries that filter matching runs by experiment name. +- **FR-008**: The system MUST make comparison across experiments easy to scan by + grouping, sorting, or otherwise clearly labeling results by experiment name. +- **FR-009**: The system MUST provide a default human-readable output mode for + terminal comparison of run params. +- **FR-010**: The system MUST provide a machine-readable output mode with a + stable top-level shape containing the query target, match count, run metadata, + and parameter values. +- **FR-011**: Zero-match params queries MUST complete as successful, + non-destructive outcomes with an explicit no-results message. +- **FR-012**: Invalid query expressions, invalid repository paths, and + unsupported params query options MUST fail clearly with actionable messages. +- **FR-013**: The params query workflow MUST NOT alter existing metrics query, + images query, or native Aim passthrough behavior. +- **FR-014**: The system MUST document params query usage in the user-facing CLI + help or adjacent project documentation so users can discover the workflow. + +### Key Entities *(include if feature involves data)* + +- **Params Query Invocation**: A single params query request, including the + repository path, run query expression, optional requested parameter names, and + output mode. +- **Run Parameter Set**: The parameter values associated with one Aim run, + including scalar and structured values that users want to compare. +- **Run Identity**: The run identifier and optional run name that distinguish a + run from other results. +- **Experiment Label**: The experiment name associated with a run, used for + filtering, grouping, sorting, and comparison. +- **Params Query Result**: The collection of matched runs and parameter values + returned to the user in human-readable or machine-readable form. + +## Success Criteria *(mandatory)* + +### Measurable Outcomes + +- **SC-001**: A user can retrieve run parameter values from the local test + repository rooted at `data` in one params query invocation without writing + Python or opening a GUI. +- **SC-002**: In acceptance testing, 100% of matching runs in a params query + result show run identity and experiment information when those fields exist in + the repository. +- **SC-003**: In acceptance testing, users can compare at least three selected + parameter names across at least three matching runs in a single terminal + result. +- **SC-004**: In acceptance testing, zero-match queries, runs with no params, + and missing requested parameter values all produce clear non-destructive + output without traceback-style failures. +- **SC-005**: Machine-readable params query output is parseable by automation + and includes match count, run identity, experiment label, and parameter values + for 100% of returned runs. +- **SC-006**: Existing metrics query, images query, and passthrough contract + tests continue to pass after the params query workflow is added. + +## Assumptions + +- "Run Params" refers to run-level parameter data recorded in an Aim + repository, including hyperparameter-style fields users commonly compare + across training runs. +- "Experiment name related comparison" means params results must expose + experiment names and support filtering or scanning by experiment name; it does + not require a full statistical experiment dashboard in this feature. +- The params query should follow the existing `aimx query` command conventions + for repository selection, query expressions, and output modes. +- Human-readable output may summarize very large or deeply nested values for + terminal readability, while machine-readable output should preserve values in + a script-friendly representation. +- This feature remains limited to read-only local repository inspection and does + not add write, sync, migration, or server-side behavior. diff --git a/specs/004-run-params-query/tasks.md b/specs/004-run-params-query/tasks.md new file mode 100644 index 0000000..24936fe --- /dev/null +++ b/specs/004-run-params-query/tasks.md @@ -0,0 +1,241 @@ +# Tasks: Run Params Query And Experiment Comparison + +**Input**: Design documents from `/Users/blizhan/data/code/github/aimx/specs/004-run-params-query/` +**Prerequisites**: plan.md, spec.md, research.md, data-model.md, contracts/cli-output.md, quickstart.md + +**Tests**: Test tasks are included because the feature changes an owned CLI +surface and the constitution requires validation for output contracts, +read-only behavior, safe failure modes, and passthrough non-regression. + +**Organization**: Tasks are grouped by user story so each story can be +implemented and tested as an independently useful increment. + +## Format: `[ID] [P?] [Story] Description` + +- **[P]**: Can run in parallel because it touches different files or depends + only on completed foundation work +- **[Story]**: Maps task to a user story (`US1`, `US2`, `US3`) +- Every task includes exact repository-relative file paths + +## Phase 1: Setup (Shared Infrastructure) + +**Purpose**: Prepare the new params-specific modules and documentation touch +points without changing behavior yet. + +- [X] T001 Create empty implementation modules with module docstrings in `src/aimx/aim_bridge/run_params.py` and `src/aimx/rendering/params_views.py` +- [X] T002 [P] Create the params unit-test placeholder file `tests/unit/test_run_params.py` +- [X] T003 [P] Review and preserve current owned-query behavior references in `src/aimx/commands/query.py`, `src/aimx/rendering/query_views.py`, and `tests/contract/test_query_contract.py` + +--- + +## Phase 2: Foundational (Blocking Prerequisites) + +**Purpose**: Add shared parser and params-data primitives required by every +user story. + +**Critical**: No user story work should begin until this phase is complete. + +- [X] T004 [P] Add parser unit tests for `params` target defaults, repeatable `--param`, missing `--param` value, empty `--param` value, duplicate `--param`, and `--param` rejection on `metrics`/`images` in `tests/unit/test_query_helpers.py` +- [X] T005 Extend `QueryInvocation` and `parse_query_invocation` with `params` target and `param_keys` validation in `src/aimx/commands/query.py` +- [X] T006 [P] Add unit tests for nested params flattening, scalar preservation, non-scalar preservation, and deterministic key ordering in `tests/unit/test_run_params.py` +- [X] T007 Implement `RunParams`, `flatten_params`, and deterministic params key helpers in `src/aimx/aim_bridge/run_params.py` +- [X] T008 Run `uv run pytest tests/unit/test_query_helpers.py tests/unit/test_run_params.py -q` and fix failures in `src/aimx/commands/query.py` and `src/aimx/aim_bridge/run_params.py` + +**Checkpoint**: Parser and params primitives are ready; user story implementation can start. + +--- + +## Phase 3: User Story 1 - Query Run Parameters From CLI (Priority: P1) MVP + +**Goal**: Users can run a read-only params query and see matching run identity, +experiment label, run name, and parameter values. + +**Independent Test**: Run `uv run aimx query params "run.hash != ''" --repo data` +and confirm the output lists matching runs with params or an explicit no-params +marker. + +### Tests for User Story 1 + +- [X] T009 [P] [US1] Add contract tests for default `query params` rich output and JSON envelope shape in `tests/contract/test_query_contract.py` +- [X] T010 [P] [US1] Add integration tests for sample-repo `query params "run.hash != ''" --repo data` and `--repo data/.aim` equivalence in `tests/integration/test_query_command.py` +- [X] T011 [P] [US1] Add integration tests for zero-match params queries and runs with no params in `tests/integration/test_query_command.py` + +### Implementation for User Story 1 + +- [X] T012 [US1] Implement read-only `collect_run_params()` using `Repo.query_runs(..., QueryReportMode.DISABLED)` and existing short-hash expansion in `src/aimx/aim_bridge/run_params.py` +- [X] T013 [US1] Implement params rich, plain, and JSON renderers for default all-param output in `src/aimx/rendering/params_views.py` +- [X] T014 [US1] Wire `params` dispatch through `run_query_command()` and a new `_run_params_query()` branch in `src/aimx/commands/query.py` +- [X] T015 [US1] Ensure missing repositories and invalid params query expressions return exit code `2` with actionable stderr messages via `src/aimx/commands/query.py` +- [X] T016 [US1] Run `uv run pytest tests/contract/test_query_contract.py tests/integration/test_query_command.py -q` and fix US1 failures in `src/aimx/aim_bridge/run_params.py`, `src/aimx/rendering/params_views.py`, and `src/aimx/commands/query.py` + +**Checkpoint**: User Story 1 is fully functional and independently testable. + +--- + +## Phase 4: User Story 2 - Compare Selected Parameters Across Runs (Priority: P1) + +**Goal**: Users can focus params output on selected parameter names and see +explicit missing-value markers across runs. + +**Independent Test**: Run `uv run aimx query params "run.experiment == 'cloud-segmentation'" --repo data --param hparam.lr --param hparam.optimizer --param hparam.weight_decay` +and confirm the selected keys align across matching runs with `-` for missing +values. + +### Tests for User Story 2 + +- [X] T017 [P] [US2] Add unit tests for selected-key filtering and missing-key tracking in `tests/unit/test_run_params.py` +- [X] T018 [P] [US2] Add contract tests for `--param` JSON `param_keys`, `params`, and `missing_params` fields in `tests/contract/test_query_contract.py` +- [X] T019 [P] [US2] Add integration tests for three selected params across at least three sample-repo runs in `tests/integration/test_query_command.py` + +### Implementation for User Story 2 + +- [X] T020 [US2] Implement selected-key filtering and `missing_keys` calculation in `src/aimx/aim_bridge/run_params.py` +- [X] T021 [US2] Update params renderers to align selected keys and render missing selected values as `-` in `src/aimx/rendering/params_views.py` +- [X] T022 [US2] Pass `invocation.param_keys` into `collect_run_params()` from `_run_params_query()` in `src/aimx/commands/query.py` +- [X] T023 [US2] Run `uv run pytest tests/unit/test_run_params.py tests/contract/test_query_contract.py tests/integration/test_query_command.py -q` and fix US2 failures in `src/aimx/aim_bridge/run_params.py`, `src/aimx/rendering/params_views.py`, and `src/aimx/commands/query.py` + +**Checkpoint**: User Stories 1 and 2 both work independently. + +--- + +## Phase 5: User Story 3 - Compare Runs By Experiment Name (Priority: P2) + +**Goal**: Params results include experiment labels, support Aim expression +filtering by experiment name, and sort/group results so experiment comparisons +are easy to scan. + +**Independent Test**: Run `uv run aimx query params "run.experiment == 'cloud-segmentation'" --repo data --param hparam.lr` +and confirm only matching experiment rows are returned, with stable experiment +labels in human-readable and JSON output. + +### Tests for User Story 3 + +- [X] T024 [P] [US3] Add unit tests for experiment-aware sorting with missing, empty, duplicate, and case-varied experiment labels in `tests/unit/test_run_params.py` +- [X] T025 [P] [US3] Add integration tests for experiment-name filtering and experiment-label presence in params JSON/plain/rich outputs in `tests/integration/test_query_command.py` +- [X] T026 [P] [US3] Add contract tests proving `query metrics` and `query images` reject `--param` while existing output envelopes remain unchanged in `tests/contract/test_query_contract.py` + +### Implementation for User Story 3 + +- [X] T027 [US3] Implement experiment-aware params result sorting by experiment label, run name, then run hash in `src/aimx/aim_bridge/run_params.py` +- [X] T028 [US3] Update params renderers to keep experiment labels visible in rich, plain, and JSON output in `src/aimx/rendering/params_views.py` +- [X] T029 [US3] Update owned-command help text with `query params` and experiment-filter examples in `src/aimx/commands/help.py` +- [X] T030 [US3] Run `uv run pytest tests/unit/test_run_params.py tests/integration/test_query_command.py tests/contract/test_query_contract.py -q` and fix US3 failures in `src/aimx/aim_bridge/run_params.py`, `src/aimx/rendering/params_views.py`, `src/aimx/commands/query.py`, and `src/aimx/commands/help.py` + +**Checkpoint**: All user stories are independently functional. + +--- + +## Phase 6: Polish & Cross-Cutting Concerns + +**Purpose**: Complete discoverability, safety validation, and full regression +coverage across the feature. + +- [X] T031 [P] Update README params-query examples for rich, `--plain`, `--json`, `--param`, and experiment filtering in `README.md` +- [X] T032 [P] Update quickstart verification notes if implementation behavior differs from planned examples in `specs/004-run-params-query/quickstart.md` +- [X] T033 Run quickstart sections 2-7 manually and record any deviations in `specs/004-run-params-query/quickstart.md` +- [X] T034 Run passthrough and owned-command regression tests with `uv run pytest tests/contract/test_cli_contract.py tests/integration/test_missing_native_aim.py tests/integration/test_missing_python_aim_package.py -q` and fix regressions in `src/aimx/router.py`, `src/aimx/cli.py`, or `src/aimx/commands/query.py` +- [X] T035 Run the full suite with `uv run pytest -q` and fix any regressions in touched files under `src/aimx/` and `tests/` +- [X] T036 Update `specs/004-run-params-query/checklists/requirements.md` with final implementation verification notes for params query readiness + +--- + +## Dependencies & Execution Order + +### Phase Dependencies + +- **Phase 1 Setup**: No dependencies; can start immediately. +- **Phase 2 Foundational**: Depends on Phase 1; blocks every user story. +- **Phase 3 US1**: Depends on Phase 2; MVP scope. +- **Phase 4 US2**: Depends on Phase 2 and can start after US1 render/collection shapes exist. +- **Phase 5 US3**: Depends on Phase 2 and benefits from US1/US2 renderer coverage. +- **Phase 6 Polish**: Depends on whichever user stories are included in the delivery. + +### User Story Dependencies + +- **US1 (P1)**: First independently valuable slice; no dependency on US2 or US3. +- **US2 (P1)**: Builds on the same params target and parser foundation; requires `param_keys` plumbing from Phase 2 and can be implemented after or alongside late US1 renderer work with coordination. +- **US3 (P2)**: Uses the params rows from US1 and selected-key behavior from US2 for richer comparison, but experiment filtering itself is independently testable through Aim expressions. + +### Within Each User Story + +- Write tests first and confirm they fail for the missing behavior. +- Implement bridge/data extraction before renderers. +- Implement renderers before command dispatch assertions. +- Run the story-specific pytest command before moving to the next story. + +--- + +## Parallel Opportunities + +- T002 and T003 can run in parallel with T001 once file ownership is clear. +- T004 and T006 can run in parallel because they touch different test files. +- T009, T010, and T011 can run in parallel for US1 test coverage. +- T017, T018, and T019 can run in parallel for US2 test coverage. +- T024, T025, and T026 can run in parallel for US3 test coverage. +- T031 and T032 can run in parallel during polish because they touch different docs. + +--- + +## Parallel Example: User Story 1 + +```text +Task: "T009 [P] [US1] Add contract tests for default `query params` rich output and JSON envelope shape in tests/contract/test_query_contract.py" +Task: "T010 [P] [US1] Add integration tests for sample-repo `query params \"run.hash != ''\" --repo data` and `--repo data/.aim` equivalence in tests/integration/test_query_command.py" +Task: "T011 [P] [US1] Add integration tests for zero-match params queries and runs with no params in tests/integration/test_query_command.py" +``` + +## Parallel Example: User Story 2 + +```text +Task: "T017 [P] [US2] Add unit tests for selected-key filtering and missing-key tracking in tests/unit/test_run_params.py" +Task: "T018 [P] [US2] Add contract tests for `--param` JSON `param_keys`, `params`, and `missing_params` fields in tests/contract/test_query_contract.py" +Task: "T019 [P] [US2] Add integration tests for three selected params across at least three sample-repo runs in tests/integration/test_query_command.py" +``` + +## Parallel Example: User Story 3 + +```text +Task: "T024 [P] [US3] Add unit tests for experiment-aware sorting with missing, empty, duplicate, and case-varied experiment labels in tests/unit/test_run_params.py" +Task: "T025 [P] [US3] Add integration tests for experiment-name filtering and experiment-label presence in params JSON/plain/rich outputs in tests/integration/test_query_command.py" +Task: "T026 [P] [US3] Add contract tests proving `query metrics` and `query images` reject `--param` while existing output envelopes remain unchanged in tests/contract/test_query_contract.py" +``` + +--- + +## Implementation Strategy + +### MVP First (User Story 1 Only) + +1. Complete Phase 1 setup. +2. Complete Phase 2 foundational parser and params primitives. +3. Complete Phase 3 US1. +4. Stop and validate `uv run aimx query params "run.hash != ''" --repo data`. +5. Run US1 contract and integration tests before adding selection or experiment-specific polish. + +### Incremental Delivery + +1. US1: deliver default params discovery and all-output-mode support. +2. US2: add focused comparison via repeatable `--param`. +3. US3: improve experiment-name comparison, sorting, and discoverability. +4. Polish: update docs, run quickstart, and run full regression suite. + +### Multi-Developer Coordination + +- One developer owns `src/aimx/commands/query.py` during parser/dispatch tasks to avoid conflicts. +- One developer can own `src/aimx/aim_bridge/run_params.py` and `tests/unit/test_run_params.py`. +- One developer can own `src/aimx/rendering/params_views.py` and output contract tests. +- Documentation tasks can proceed after the CLI shape is stable. + +--- + +## Notes + +- `[P]` means the task can be parallelized only after its stated phase + dependencies are satisfied. +- Story labels map directly to the spec user stories. +- Keep the command read-only; do not call Aim mutation APIs such as `run.set`, + `track`, artifact logging, migration, or repair operations. +- Preserve existing `query metrics`, `query images`, `trace`, and native Aim + passthrough contracts throughout implementation. +- Commit after each phase or a small coherent task group when using the git + hook workflow. diff --git a/src/aimx/__init__.py b/src/aimx/__init__.py index 7d18268..4c7823b 100644 --- a/src/aimx/__init__.py +++ b/src/aimx/__init__.py @@ -1,3 +1,3 @@ __all__ = ["__version__"] -__version__ = "0.3.1" +__version__ = "0.3.2" diff --git a/src/aimx/aim_bridge/run_params.py b/src/aimx/aim_bridge/run_params.py new file mode 100644 index 0000000..c4203de --- /dev/null +++ b/src/aimx/aim_bridge/run_params.py @@ -0,0 +1,106 @@ +"""Read-only helpers for collecting Aim run parameters.""" + +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path +from typing import Any + +from aimx.aim_bridge.metric_stats import RunMeta, _extract_run_meta + + +@dataclass(frozen=True) +class RunParams: + run: RunMeta + params: dict[str, Any] + selected_keys: tuple[str, ...] = () + missing_keys: tuple[str, ...] = () + + +def flatten_params(params: dict[str, Any], *, prefix: str = "") -> dict[str, Any]: + """Flatten nested dictionaries into stable dotted parameter keys.""" + flattened: dict[str, Any] = {} + for key in sorted(params): + value = params[key] + dotted_key = f"{prefix}.{key}" if prefix else str(key) + if isinstance(value, dict): + flattened.update(flatten_params(value, prefix=dotted_key)) + else: + flattened[dotted_key] = value + return flattened + + +def default_param_keys(rows: list[RunParams]) -> tuple[str, ...]: + keys: set[str] = set() + for row in rows: + keys.update(row.params) + return tuple(sorted(keys)) + + +def select_params(params: dict[str, Any], selected_keys: tuple[str, ...]) -> tuple[dict[str, Any], tuple[str, ...]]: + if not selected_keys: + return params, () + selected: dict[str, Any] = {} + missing: list[str] = [] + for key in selected_keys: + if key in params: + selected[key] = params[key] + else: + missing.append(key) + return selected, tuple(missing) + + +def sort_run_params(rows: list[RunParams]) -> list[RunParams]: + return sorted( + rows, + key=lambda row: ( + (row.run.experiment or "").casefold(), + row.run.name or "", + row.run.hash, + ), + ) + + +def _metadata_attrs(run: Any) -> dict[str, Any]: + try: + collected = run.meta_run_tree.collect() + except Exception: # noqa: BLE001 + return {} + attrs = collected.get("attrs", {}) + return attrs if isinstance(attrs, dict) else {} + + +def collect_run_params( + expression: str, + repo_path: Path, + selected_keys: tuple[str, ...] = (), +) -> list[RunParams]: + """Run an Aim run query and return flattened params for each matched run.""" + from aimx.aim_bridge.hash_resolver import resolve_hash_prefixes + + expression = resolve_hash_prefixes(expression, repo_path) + + try: + from aim import Repo + from aim.sdk.types import QueryReportMode + except ModuleNotFoundError as error: + raise RuntimeError( + "`aimx` requires the Python `aim` package in the current environment." + ) from error + + repo = Repo(str(repo_path)) + rows: list[RunParams] = [] + query_result = repo.query_runs(expression, report_mode=QueryReportMode.DISABLED) + for run_collection in query_result.iter_runs(): + run = run_collection.run + flattened = flatten_params(_metadata_attrs(run)) + params, missing = select_params(flattened, selected_keys) + rows.append( + RunParams( + run=_extract_run_meta(run), + params=params, + selected_keys=selected_keys, + missing_keys=missing, + ) + ) + return sort_run_params(rows) diff --git a/src/aimx/commands/help.py b/src/aimx/commands/help.py index e861da6..309368f 100644 --- a/src/aimx/commands/help.py +++ b/src/aimx/commands/help.py @@ -10,18 +10,20 @@ def render_help() -> str: " help Show this help message", " version Show the aimx version and detected native Aim version", " doctor Show native Aim availability and passthrough readiness", - " query Query metrics or images from a local Aim repository", - " Usage: aimx query [--repo ]", + " query Query metrics, images, or run params from a local Aim repository", + " Usage: aimx query [--repo ]", " Options: --json --oneline --no-color --verbose", " --steps start:end | --epochs start:end (mutually exclusive)", " --head N --tail N --every K", " --max-images N (images only, TTY rendering cap; default 6)", + " --param KEY (params only, repeatable selected parameter)", " Repo defaults to the current directory; paths may point at either", " the repo root or its .aim directory", " Short run hashes in the expression are transparently expanded.", " Example: aimx query metrics \"run.hash=='eca37394'\" --repo data", " Example: aimx query images \"images\" --repo data --epochs 10:50", " Example: aimx query images \"images\" --repo data --head 10", + " Example: aimx query params \"run.experiment=='cloud-segmentation'\" --repo data --param hparam.lr", " trace Plot a metric's time-series from a local Aim repository", " Usage: aimx trace [--repo ]", " Options: --table --csv --json", diff --git a/src/aimx/commands/query.py b/src/aimx/commands/query.py index f9bbc8a..b942ed3 100644 --- a/src/aimx/commands/query.py +++ b/src/aimx/commands/query.py @@ -5,7 +5,15 @@ from pathlib import Path from typing import Any -SUPPORTED_TARGETS = {"metrics", "images"} +SUPPORTED_TARGETS = {"metrics", "images", "params"} +PARAMS_UNSUPPORTED_QUERY_FLAGS = { + "--steps", + "--epochs", + "--head", + "--tail", + "--every", + "--max-images", +} @dataclass(frozen=True) @@ -23,6 +31,7 @@ class QueryInvocation: tail: int | None = None every: int | None = None max_images: int = 6 + param_keys: tuple[str, ...] = () def __post_init__(self) -> None: if self.target not in SUPPORTED_TARGETS: @@ -39,6 +48,8 @@ def __post_init__(self) -> None: raise ValueError("--steps and --epochs are mutually exclusive.") if self.every is not None and self.every < 1: raise ValueError(f"--every must be >= 1, got: {self.every!r}.") + if self.target != "params" and self.param_keys: + raise ValueError("--param is only supported for query params.") @dataclass(frozen=True) @@ -81,11 +92,11 @@ def _parse_non_negative_int(flag: str, raw: str) -> int: def parse_query_invocation(args: list[str]) -> QueryInvocation: if len(args) < 2: raise ValueError( - "Usage: aimx query [--repo ] " + "Usage: aimx query [--repo ] " "[--json] [--oneline] [--no-color] [--verbose] " "[--steps start:end | --epochs start:end] " "[--head N] [--tail N] [--every K] " - "[--max-images N]" + "[--max-images N] [--param KEY]" ) target = args[0] @@ -103,10 +114,13 @@ def parse_query_invocation(args: list[str]) -> QueryInvocation: every: int | None = None repo_value = "." max_images: int = 6 + param_keys: list[str] = [] index = 0 while index < len(rest): token = rest[index] + if target == "params" and token in PARAMS_UNSUPPORTED_QUERY_FLAGS: + raise ValueError(f"{token} is not supported for query params.") if token == "--json": output_json = True index += 1 @@ -149,6 +163,18 @@ def parse_query_invocation(args: list[str]) -> QueryInvocation: raise ValueError("Missing value for --repo.") repo_value = rest[index + 1] index += 2 + elif token == "--param": + if index + 1 >= len(rest): + raise ValueError("Missing value for --param.") + if target != "params": + raise ValueError("--param is only supported for query params.") + value = rest[index + 1].strip() + if not value: + raise ValueError("--param must not be empty.") + if value in param_keys: + raise ValueError(f"Duplicate --param value: {value}") + param_keys.append(value) + index += 2 elif token == "--max-images": if index + 1 >= len(rest): raise ValueError("Missing value for --max-images.") @@ -177,6 +203,7 @@ def parse_query_invocation(args: list[str]) -> QueryInvocation: tail=tail, every=every, max_images=max_images, + param_keys=tuple(param_keys), ) @@ -245,7 +272,9 @@ def run_query_command(args: list[str]) -> QueryCommandResult: try: if invocation.target == "metrics": return _run_metrics_query(invocation, normalized_repo_path, header_info, effective_no_color) - return _run_images_query(invocation, normalized_repo_path, header_info, effective_no_color) + if invocation.target == "images": + return _run_images_query(invocation, normalized_repo_path, header_info, effective_no_color) + return _run_params_query(invocation, normalized_repo_path, header_info, effective_no_color) except RuntimeError as error: return QueryCommandResult(exit_status=2, error_message=str(error)) except Exception as error: @@ -367,3 +396,28 @@ def _run_images_query( return QueryCommandResult(exit_status=0, output=combined_output) return QueryCommandResult(exit_status=0, output=summary) + + +def _run_params_query( + invocation: QueryInvocation, + repo_path: Path, + header_info: dict[str, Any], + no_color: bool, +) -> QueryCommandResult: + from aimx.aim_bridge.run_params import collect_run_params + from aimx.rendering.params_views import ( + render_params_json, + render_params_oneline, + render_params_rich_table, + ) + + header_info = {**header_info, "param_keys": invocation.param_keys} + rows = collect_run_params(invocation.expression, repo_path, invocation.param_keys) + if invocation.output_json: + return QueryCommandResult(exit_status=0, output=render_params_json(rows, header_info)) + if invocation.plain: + return QueryCommandResult(exit_status=0, output=render_params_oneline(rows, header_info)) + return QueryCommandResult( + exit_status=0, + output=render_params_rich_table(rows, header_info, no_color=no_color), + ) diff --git a/src/aimx/rendering/params_views.py b/src/aimx/rendering/params_views.py new file mode 100644 index 0000000..d061747 --- /dev/null +++ b/src/aimx/rendering/params_views.py @@ -0,0 +1,160 @@ +"""Renderers for `aimx query params` output.""" + +from __future__ import annotations + +import io +import json +import shutil +from typing import Any + +from rich.console import Console +from rich.table import Table + +from aimx.aim_bridge.run_params import RunParams, default_param_keys +from aimx.rendering import colors + +DEFAULT_PARAM_COLUMN_LIMIT = 6 + + +def _short_hash(value: str) -> str: + return value[:8] + + +def _display(value: Any, *, max_len: int = 48) -> str: + if value is None: + text = "null" + elif isinstance(value, bool): + text = "true" if value else "false" + else: + text = str(value) + if len(text) > max_len: + return f"{text[: max_len - 1]}…" + return text + + +def _jsonable(value: Any) -> Any: + if isinstance(value, dict): + return {str(key): _jsonable(item) for key, item in value.items()} + if isinstance(value, (list, tuple)): + return [_jsonable(item) for item in value] + if value is None or isinstance(value, (str, int, float, bool)): + return value + return str(value) + + +def _keys_for_display(rows: list[RunParams], limit: int | None = DEFAULT_PARAM_COLUMN_LIMIT) -> tuple[tuple[str, ...], int]: + selected = next((row.selected_keys for row in rows if row.selected_keys), ()) + keys = selected or default_param_keys(rows) + if limit is not None and not selected and len(keys) > limit: + return keys[:limit], len(keys) - limit + return keys, 0 + + +def render_params_rich_table( + rows: list[RunParams], + header_info: dict[str, Any], + *, + no_color: bool = False, +) -> str: + width = 120 if no_color else shutil.get_terminal_size(fallback=(120, 24)).columns + buf = io.StringIO() + console = Console( + file=buf, + no_color=no_color, + force_terminal=not no_color, + width=width, + highlight=False, + ) + + expr = header_info.get("expression", "") + repo = header_info.get("repo", "") + target = header_info.get("target", "params") + total = len(rows) + console.print( + f"[{colors.HEADER}]Repo:[/] {repo} [{colors.HEADER}]·[/] " + f"[{colors.NUMBER_EMPH}]{total}[/] [{colors.HEADER}]match{'es' if total != 1 else ''}[/] " + f"[{colors.HEADER}]·[/] [{colors.HEADER}]{target} where[/] {expr}" + ) + + if not rows: + return buf.getvalue() + + keys, omitted = _keys_for_display(rows) + table = Table( + show_header=True, + header_style=colors.HEADER, + box=None, + pad_edge=True, + show_edge=False, + padding=(0, 1), + ) + table.add_column("RUN", style=colors.RUN_HASH, no_wrap=True) + table.add_column("EXPERIMENT", style=colors.EXPERIMENT, no_wrap=True) + table.add_column("NAME", style=colors.METRIC_NAME, no_wrap=True) + for key in keys: + table.add_column(key, style=colors.CONTEXT_VAL) + if not keys: + table.add_column("PARAMS", style=colors.CONTEXT_VAL) + + for row in rows: + cells = [ + _short_hash(row.run.hash), + row.run.experiment or "", + row.run.name or "", + ] + if keys: + for key in keys: + cells.append(_display(row.params[key]) if key in row.params else "-") + else: + cells.append("no params") + table.add_row(*cells) + + console.print(table) + if omitted: + console.print(f"[{colors.HEADER}]... omitted {omitted} parameter columns; use --json for all[/]") + return buf.getvalue() + + +def render_params_oneline(rows: list[RunParams], header_info: dict[str, Any]) -> str: + repo = header_info.get("repo", "") + keys, _ = _keys_for_display(rows, limit=None) + lines: list[str] = [] + for row in rows: + cells = [ + repo, + _short_hash(row.run.hash), + row.run.experiment or "", + row.run.name or "", + ] + for key in keys: + value = _display(row.params[key]) if key in row.params else "-" + cells.append(f"{key}={value}") + if not keys: + cells.append("params=-") + lines.append("\t".join(cells)) + return "\n".join(lines) + + +def render_params_json(rows: list[RunParams], header_info: dict[str, Any]) -> str: + selected = tuple(header_info.get("param_keys") or ()) or next( + (row.selected_keys for row in rows if row.selected_keys), () + ) + param_keys = selected or default_param_keys(rows) + payload: dict[str, Any] = { + "target": header_info.get("target", "params"), + "repo": header_info.get("repo", ""), + "expression": header_info.get("expression", ""), + "runs_count": len(rows), + "param_keys": list(param_keys), + "runs": [ + { + "hash": row.run.hash, + "experiment": row.run.experiment, + "name": row.run.name, + "params": _jsonable(row.params), + "missing_params": list(row.missing_keys), + } + for row in rows + ], + } + return json.dumps(payload) diff --git a/static/params.png b/static/params.png new file mode 100644 index 0000000..8dfeb76 Binary files /dev/null and b/static/params.png differ diff --git a/tests/contract/test_query_contract.py b/tests/contract/test_query_contract.py index 2bb80f0..e6a1594 100644 --- a/tests/contract/test_query_contract.py +++ b/tests/contract/test_query_contract.py @@ -90,6 +90,129 @@ def test_query_images_json_contract_uses_stable_envelope(capfd, sample_repo_root assert payload["rows"][0]["name"] == "example" +def test_query_params_json_contract_uses_stable_envelope(capfd, sample_repo_root) -> None: + exit_code = main( + ["query", "params", "run.hash != ''", "--repo", str(sample_repo_root), "--json"] + ) + + captured = capfd.readouterr() + payload = json.loads(captured.out) + assert exit_code == 0 + assert payload["target"] == "params" + assert payload["expression"] == "run.hash != ''" + assert payload["repo"] == str(sample_repo_root) + assert payload["runs_count"] > 0 + assert payload["param_keys"] + assert payload["runs"] + first_run = payload["runs"][0] + assert "hash" in first_run + assert "experiment" in first_run + assert "name" in first_run + assert "params" in first_run + assert "missing_params" in first_run + + +def test_query_params_text_contract_reports_repo_count_and_params( + capfd, sample_repo_root +) -> None: + exit_code = main(["query", "params", "run.hash != ''", "--repo", str(sample_repo_root)]) + + captured = capfd.readouterr() + assert exit_code == 0 + assert "Repo:" in captured.out + assert "match" in captured.out + assert "hparam.lr" in captured.out + + +def test_query_params_json_contract_honors_selected_params( + capfd, sample_repo_root +) -> None: + exit_code = main( + [ + "query", + "params", + "run.hash != ''", + "--repo", + str(sample_repo_root), + "--param", + "hparam.lr", + "--param", + "hparam.weight_decay", + "--param", + "missing.key", + "--json", + ] + ) + + captured = capfd.readouterr() + payload = json.loads(captured.out) + assert exit_code == 0 + assert payload["param_keys"] == ["hparam.lr", "hparam.weight_decay", "missing.key"] + assert payload["runs"] + first_run = payload["runs"][0] + assert set(first_run["params"]) <= {"hparam.lr", "hparam.weight_decay"} + assert "missing.key" in first_run["missing_params"] + + +def test_query_params_zero_match_json_preserves_selected_param_keys( + capfd, sample_repo_root +) -> None: + exit_code = main( + [ + "query", + "params", + "run.name == 'definitely-missing-run'", + "--repo", + str(sample_repo_root), + "--param", + "hparam.lr", + "--param", + "missing.key", + "--json", + ] + ) + + captured = capfd.readouterr() + payload = json.loads(captured.out) + assert exit_code == 0 + assert payload["runs_count"] == 0 + assert payload["param_keys"] == ["hparam.lr", "missing.key"] + assert payload["runs"] == [] + + +def test_query_param_option_rejected_for_metrics_and_images(capfd, sample_repo_root) -> None: + metrics_exit = main( + [ + "query", + "metrics", + "metric.name == 'loss'", + "--repo", + str(sample_repo_root), + "--param", + "hparam.lr", + ] + ) + metrics_captured = capfd.readouterr() + + images_exit = main( + [ + "query", + "images", + "images", + "--repo", + str(sample_repo_root), + "--param", + "hparam.lr", + ] + ) + images_captured = capfd.readouterr() + + assert metrics_exit == 2 + assert images_exit == 2 + assert "--param is only supported for query params" in metrics_captured.err + assert "--param is only supported for query params" in images_captured.err + + def test_query_invalid_expression_reports_actionable_error(capfd, sample_repo_root) -> None: exit_code = main( ["query", "metrics", "metric.name ==", "--repo", str(sample_repo_root)] diff --git a/tests/integration/test_query_command.py b/tests/integration/test_query_command.py index fe9d104..0cb70e0 100644 --- a/tests/integration/test_query_command.py +++ b/tests/integration/test_query_command.py @@ -136,6 +136,120 @@ def test_image_query_returns_matches_from_sample_repository(capfd, sample_repo_r assert payload["rows"][0]["name"] == "example" +def test_params_query_accepts_repo_root_and_dot_aim_paths( + capfd, sample_repo_root, sample_repo_dot_aim +) -> None: + root_exit_code = main( + ["query", "params", "run.hash != ''", "--repo", str(sample_repo_root), "--json"] + ) + root_captured = capfd.readouterr() + root_payload = json.loads(root_captured.out) + + dot_aim_exit_code = main( + ["query", "params", "run.hash != ''", "--repo", str(sample_repo_dot_aim), "--json"] + ) + dot_aim_captured = capfd.readouterr() + dot_aim_payload = json.loads(dot_aim_captured.out) + + assert root_exit_code == 0 + assert dot_aim_exit_code == 0 + assert root_payload["runs_count"] == dot_aim_payload["runs_count"] + assert root_payload["param_keys"] == dot_aim_payload["param_keys"] + + +def test_params_query_returns_matches_from_sample_repository( + capfd, sample_repo_root +) -> None: + exit_code = main(["query", "params", "run.hash != ''", "--repo", str(sample_repo_root)]) + + captured = capfd.readouterr() + assert exit_code == 0 + assert "match" in captured.out + assert "cloud-segmentation" in captured.out + assert "hparam.lr" in captured.out + + +def test_params_query_zero_matches_succeeds(capfd, sample_repo_root) -> None: + exit_code = main( + [ + "query", + "params", + "run.name == 'definitely-missing-run'", + "--repo", + str(sample_repo_root), + ] + ) + + captured = capfd.readouterr() + assert exit_code == 0 + assert "0 matches" in captured.out + + +def test_params_query_selected_params_compare_across_runs( + capfd, sample_repo_root +) -> None: + exit_code = main( + [ + "query", + "params", + "run.experiment == 'cloud-segmentation'", + "--repo", + str(sample_repo_root), + "--param", + "hparam.lr", + "--param", + "hparam.optimizer", + "--param", + "missing.key", + "--json", + ] + ) + + captured = capfd.readouterr() + payload = json.loads(captured.out) + assert exit_code == 0 + assert payload["runs_count"] >= 3 + assert payload["param_keys"] == ["hparam.lr", "hparam.optimizer", "missing.key"] + for run in payload["runs"][:3]: + assert "hparam.lr" in run["params"] + assert "hparam.optimizer" in run["params"] + assert "missing.key" in run["missing_params"] + + +def test_params_query_experiment_filter_visible_in_json_plain_and_rich( + capfd, sample_repo_root +) -> None: + args = [ + "query", + "params", + "run.experiment == 'cloud-segmentation'", + "--repo", + str(sample_repo_root), + "--param", + "hparam.lr", + ] + + rich_exit = main(args) + rich_captured = capfd.readouterr() + assert rich_exit == 0 + assert "cloud-segmentation" in rich_captured.out + assert "hparam.lr" in rich_captured.out + + plain_exit = main([*args, "--plain"]) + plain_captured = capfd.readouterr() + assert plain_exit == 0 + plain_lines = [line for line in plain_captured.out.splitlines() if line.strip()] + assert plain_lines + assert all("cloud-segmentation" in line for line in plain_lines) + + json_exit = main([*args, "--json"]) + json_captured = capfd.readouterr() + payload = json.loads(json_captured.out) + assert json_exit == 0 + assert payload["runs_count"] > 0 + assert {run["experiment"] for run in payload["runs"]} == {"cloud-segmentation"} + + def test_invalid_query_expression_fails_cleanly(capfd, sample_repo_root) -> None: exit_code = main( ["query", "metrics", "metric.name ==", "--repo", str(sample_repo_root)] diff --git a/tests/integration/test_short_hash_and_steps.py b/tests/integration/test_short_hash_and_steps.py index 8b66da5..95b4cf8 100644 --- a/tests/integration/test_short_hash_and_steps.py +++ b/tests/integration/test_short_hash_and_steps.py @@ -19,10 +19,9 @@ def _first_run_hash(sample_repo_root) -> str: """Return the full hash of one run known to have a 'loss' metric.""" - from aim import Repo + from aimx.aim_bridge.metric_stats import collect_metric_series - repo = Repo(str(sample_repo_root)) - return repo.list_all_runs()[0] + return collect_metric_series("metric.name == 'loss'", sample_repo_root)[0].run.hash # --------------------------------------------------------------------------- diff --git a/tests/unit/test_query_helpers.py b/tests/unit/test_query_helpers.py index 001ef37..4189007 100644 --- a/tests/unit/test_query_helpers.py +++ b/tests/unit/test_query_helpers.py @@ -52,6 +52,75 @@ def test_query_invocation_rejects_unsupported_target() -> None: ) +def test_parse_query_invocation_params_defaults() -> None: + inv = parse_query_invocation(["params", "run.hash != ''"]) + + assert inv.target == "params" + assert inv.expression == "run.hash != ''" + assert inv.repo_path == Path(".") + assert inv.param_keys == () + + +def test_parse_query_invocation_params_repeated_param_keys() -> None: + inv = parse_query_invocation( + [ + "params", + "run.hash != ''", + "--repo", + "data", + "--param", + "hparam.lr", + "--param", + " hparam.optimizer ", + ] + ) + + assert inv.param_keys == ("hparam.lr", "hparam.optimizer") + + +def test_parse_query_invocation_param_missing_value_raises() -> None: + with pytest.raises(ValueError, match="Missing value for --param"): + parse_query_invocation(["params", "run.hash != ''", "--param"]) + + +def test_parse_query_invocation_param_empty_value_raises() -> None: + with pytest.raises(ValueError, match="--param must not be empty"): + parse_query_invocation(["params", "run.hash != ''", "--param", " "]) + + +def test_parse_query_invocation_param_duplicate_value_raises() -> None: + with pytest.raises(ValueError, match="Duplicate --param value"): + parse_query_invocation( + ["params", "run.hash != ''", "--param", "hparam.lr", "--param", " hparam.lr "] + ) + + +def test_parse_query_invocation_param_rejected_for_metrics_and_images() -> None: + with pytest.raises(ValueError, match="--param is only supported for query params"): + parse_query_invocation(["metrics", "metric.name == 'loss'", "--param", "hparam.lr"]) + + with pytest.raises(ValueError, match="--param is only supported for query params"): + parse_query_invocation(["images", "images", "--param", "hparam.lr"]) + + +@pytest.mark.parametrize( + "extra_args", + [ + ["--steps", "1:10"], + ["--epochs", "1:10"], + ["--head", "1"], + ["--tail", "1"], + ["--every", "2"], + ["--max-images", "1"], + ], +) +def test_parse_query_invocation_params_rejects_unimplemented_query_flags( + extra_args: list[str], +) -> None: + with pytest.raises(ValueError, match="not supported for query params"): + parse_query_invocation(["params", "run.hash != ''", *extra_args]) + + def test_parse_query_invocation_defaults() -> None: inv = parse_query_invocation(["metrics", "metric.name == 'loss'"]) assert inv.target == "metrics" diff --git a/tests/unit/test_run_params.py b/tests/unit/test_run_params.py new file mode 100644 index 0000000..dff648c --- /dev/null +++ b/tests/unit/test_run_params.py @@ -0,0 +1,91 @@ +from __future__ import annotations + +from aimx.aim_bridge.metric_stats import RunMeta +from aimx.aim_bridge.run_params import ( + RunParams, + default_param_keys, + flatten_params, + select_params, + sort_run_params, +) +from aimx.rendering.params_views import render_params_oneline, render_params_rich_table + + +def test_flatten_params_preserves_scalar_values_with_dotted_keys() -> None: + params = { + "hparam": {"lr": 0.0001, "optimizer": "AdamW"}, + "model": "UCloudNet", + "enabled": True, + "nothing": None, + } + + assert flatten_params(params) == { + "enabled": True, + "hparam.lr": 0.0001, + "hparam.optimizer": "AdamW", + "model": "UCloudNet", + "nothing": None, + } + + +def test_flatten_params_preserves_non_scalar_values() -> None: + params = {"layers": [32, 64], "nested": {"schedule": {"milestones": [1, 2]}}} + + assert flatten_params(params) == { + "layers": [32, 64], + "nested.schedule.milestones": [1, 2], + } + + +def test_default_param_keys_are_deterministic() -> None: + rows = [ + RunParams( + run=RunMeta("b", "exp", None, None), + params={"z": 1, "a": 2}, + selected_keys=(), + missing_keys=(), + ), + RunParams( + run=RunMeta("a", "exp", None, None), + params={"m": 3, "a": 4}, + selected_keys=(), + missing_keys=(), + ), + ] + + assert default_param_keys(rows) == ("a", "m", "z") + + +def test_select_params_tracks_missing_requested_keys() -> None: + selected, missing = select_params( + {"hparam.lr": 0.0001, "hparam.optimizer": "AdamW"}, + ("hparam.lr", "hparam.weight_decay"), + ) + + assert selected == {"hparam.lr": 0.0001} + assert missing == ("hparam.weight_decay",) + + +def test_sort_run_params_orders_by_experiment_name_and_hash() -> None: + rows = [ + RunParams(RunMeta("ccc", "Zeta", "run", None), {"p": 1}), + RunParams(RunMeta("bbb", "", "run", None), {"p": 1}), + RunParams(RunMeta("eee", None, "run", None), {"p": 1}), + RunParams(RunMeta("aaa", "alpha", "run-b", None), {"p": 1}), + RunParams(RunMeta("ddd", "Alpha", "run-a", None), {"p": 1}), + ] + + result = sort_run_params(rows) + + assert [row.run.hash for row in result] == ["bbb", "eee", "ddd", "aaa", "ccc"] + + +def test_render_params_marks_runs_with_no_params() -> None: + rows = [RunParams(RunMeta("abc123", "exp", "run", None), {})] + header = {"target": "params", "repo": "repo", "expression": "run.hash != ''"} + + rich = render_params_rich_table(rows, header, no_color=True) + plain = render_params_oneline(rows, header) + + assert "no params" in rich + assert "params=-" in plain diff --git a/uv.lock b/uv.lock index ab28e6a..ed1d79a 100644 --- a/uv.lock +++ b/uv.lock @@ -96,7 +96,7 @@ wheels = [ [[package]] name = "aimx" -version = "0.3.1" +version = "0.3.2" source = { editable = "." } dependencies = [ { name = "numpy" },