diff --git a/AGENTS.md b/AGENTS.md index 3821d36..0132f9a 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -77,6 +77,8 @@ Why `3.12`: - Python 3.12 for development, runtime support `>=3.10,<3.13` + Python standard library, native Aim CLI (external runtime prerequisite for delegated commands), pytest for test automation (001-aim-command-passthrough) - Python 3.12 for development, runtime support `>=3.10,<3.13` + Python standard library, Aim SDK from the dev dependency group for local development and tests (001-aim-command-passthrough) - Existing local Aim repositories on disk, including repo roots that contain a `.aim` metadata directory (001-aim-command-passthrough) +- Python 3.12 for development, runtime support `>=3.10,<3.13` + `rich>=13.7`, `textual-image>=0.12.0` (already declared in (003-query-images-terminal-render) +- Existing local Aim repositories (read-only). Image bytes are read (003-query-images-terminal-render) ## Recent Changes - 001-aim-command-passthrough: Added Python 3.12 for development, runtime support `>=3.10,<3.13` + Python standard library, native Aim CLI (external runtime prerequisite for delegated commands), pytest for test automation diff --git a/README.md b/README.md index 187675b..c4477d2 100644 --- a/README.md +++ b/README.md @@ -76,15 +76,60 @@ aimx query metrics "metric.name == 'loss'" --repo data --steps 100:500 aimx query metrics "metric.name == 'loss'" --repo data --steps :50 # first 50 steps aimx query metrics "metric.name == 'loss'" --repo data --steps 100: # from step 100 onwards -# Combine short hash + step range -aimx query metrics "run.hash=='eca37394' and metric.name=='loss'" --repo data --steps 100:300 - -# Images -aimx query images "images" --repo data +# Epoch range filter (mutually exclusive with --steps) +aimx query metrics "metric.name == 'loss'" --repo data --epochs 1:10 +aimx query metrics "metric.name == 'loss'" --repo data --epochs :5 + +# Density subsampling: first N / last N / every K-th point per series +aimx query metrics "metric.name == 'loss'" --repo data --head 20 +aimx query metrics "metric.name == 'loss'" --repo data --tail 20 +aimx query metrics "metric.name == 'loss'" --repo data --every 5 + +# Combine short hash + step range + head +aimx query metrics "run.hash=='eca37394' and metric.name=='loss'" --repo data --steps 100:300 --head 10 + +# Images — metadata table only (--json / --plain / redirected stdout) +aimx query images "images" --repo data --json +aimx query images "images" --repo data --plain + +# Images — filter by epoch range (affects all output modes) +aimx query images "images" --repo data --epochs 10:50 --plain +aimx query images "images" --repo data --epochs :30 --json + +# Images — global row subsampling (applied to the sorted result list) +aimx query images "images" --repo data --head 5 +aimx query images "images" --repo data --tail 5 +aimx query images "images" --repo data --every 3 + +# Images — inline preview in a modern terminal (iTerm2 / Kitty / WezTerm / Ghostty) +aimx query images "images" --repo data # default: renders up to 6 images inline +aimx query images "images" --repo data --max-images 20 # render more +aimx query images "images" --repo data --max-images 0 # no cap (render all) + +# Combine epoch filter + head + TTY cap +aimx query images "images" --repo data --epochs 10:50 --head 10 --max-images 4 ``` Output modes: `--json` (nested runs→metrics), `--oneline` / `--plain` (tab-separated), -default (rich table). Additional flags: `--steps start:end`, `--no-color`, `--verbose`. +default (rich table with inline image preview). +Filter/sampling flags (affect all output modes): `--steps start:end | --epochs start:end` +(mutually exclusive), `--head N`, `--tail N`, `--every K`. +Additional flags: `--no-color`, `--verbose`, `--max-images N` (images TTY cap only). + +#### Inline image preview + +![aimx query images output preview](static/images.png) + +When stdout is a TTY and `aimx` detects a graphics-capable terminal, `aimx query images` +renders matched images directly in the terminal. On plain ANSI terminals it falls back +to half-block character art — exit code is always `0`. + +Terminal support is provided by [`textual-image`](https://github.com/lnqs/textual-image/tree/main#support-matrix-1). +Confirmed working terminals include: iTerm2, Kitty, Konsole, WezTerm, foot, tmux (Sixel), +xterm (Sixel), Windows Terminal, and VS Code integrated terminal. Warp and GNOME Terminal +are not supported. + +To disable inline rendering without changing flags, redirect stdout `aimx query images > out.txt` or use `--plain` / `--json`. ### `aimx trace` — plot or export a metric time series diff --git a/pyproject.toml b/pyproject.toml index e3819bd..cc6f5e8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "aimx" -version = "0.3.0" +version = "0.3.1" description = "A safe CLI-first companion for native Aim" readme = "README.md" requires-python = ">=3.10,<3.13" @@ -8,6 +8,7 @@ dependencies = [ "numpy>=1.24", "plotext>=5.3", "rich>=13.7", + "textual-image>=0.12.0", ] [project.scripts] diff --git a/specs/003-query-images-terminal-render/checklists/requirements.md b/specs/003-query-images-terminal-render/checklists/requirements.md new file mode 100644 index 0000000..4ed6d81 --- /dev/null +++ b/specs/003-query-images-terminal-render/checklists/requirements.md @@ -0,0 +1,35 @@ +# Specification Quality Checklist: Inline Terminal Image Rendering for `aimx query images` + +**Purpose**: Validate specification completeness and quality before proceeding to planning +**Created**: 2026-04-22 +**Feature**: [spec.md](../spec.md) + +## Content Quality + +- [x] No implementation details (languages, frameworks, APIs) — `textual_image` / `rich.Console` 只在 Assumptions 中出现,作为用户在 query 中明确指定的依赖说明;FR 层保持协议/能力描述 +- [x] Focused on user value and business needs +- [x] Written for non-technical stakeholders +- [x] All mandatory sections completed + +## Requirement Completeness + +- [x] No [NEEDS CLARIFICATION] markers remain — 已在 2026-04-22 clarify 会话中全部解决,实现完成后再次确认 +- [x] Requirements are testable and unambiguous +- [x] Success criteria are measurable +- [x] Success criteria are technology-agnostic (no implementation details) +- [x] All acceptance scenarios are defined +- [x] Edge cases are identified +- [x] Scope is clearly bounded(仅单帧静态图片;不含视频/GIF) +- [x] Dependencies and assumptions identified + +## Feature Readiness + +- [x] All functional requirements have clear acceptance criteria +- [x] User scenarios cover primary flows +- [x] Feature meets measurable outcomes defined in Success Criteria +- [x] No implementation details leak into specification + +## Notes + +- 2026-04-22 clarify 会话解决了 5 个问题:无新开关(FR-008)、默认上限 6 张(FR-007)、保留汇总表格 + 分段打印(FR-005)、依赖警告写 stderr(FR-009)、高度上限为终端行数 1/3(FR-006)。 +- `textual_image` 依赖的加入将在 `/speckit.plan` 阶段纳入实现计划。 diff --git a/specs/003-query-images-terminal-render/contracts/cli-output.md b/specs/003-query-images-terminal-render/contracts/cli-output.md new file mode 100644 index 0000000..c34243f --- /dev/null +++ b/specs/003-query-images-terminal-render/contracts/cli-output.md @@ -0,0 +1,122 @@ +# CLI Output Contract: `aimx query images` (003) + +**Feature**: `003-query-images-terminal-render` +**Date**: 2026-04-22 + +This contract defines the observable CLI output of `aimx query images` after +feature 003 is implemented. It supersedes the relevant portions of the 002 +contract only for the `images` target; the `metrics` target is unchanged. + +--- + +## 1. Command surface + +``` +aimx query images [--repo ] [--json] [--oneline | --plain] + [--no-color] [--verbose] [--steps start:end] + [--max-images ] +``` + +Changes vs. feature 002: + +- New flag `--max-images ` (default: `6`; `0` = unlimited). + - Only affects the default rich TTY path. + - Has **no effect** on `--json` or `--plain` output bytes. + +No other new flags are added (clarify Q1). + +## 2. Exit codes (unchanged from 002) + +| Exit | Meaning | +|------|---------| +| `0` | Query executed successfully (even if 0 matches, even if some images failed to render). | +| `2` | Usage error (bad args, bad `--repo`, bad `--max-images`), or underlying Aim error. | + +Image-rendering failures MUST NOT change the exit code. Missing +`textual_image` or PIL MUST NOT change the exit code. + +## 3. Output mode matrix + +Let `TTY := sys.stdout.isatty()`. + +| Mode | stdout bytes | stderr bytes | +|----------------------------------------|---------------------|--------------------------------------| +| `--json` | **identical to 002**| empty (unless query errors) | +| `--plain` / `--oneline` | **identical to 002**| empty (unless query errors) | +| rich + `TTY == False` | **identical to 002**| empty (unless query errors) | +| rich + `TTY == True` | 002 summary table **followed by** §4 block(s) | one-shot dep/decoding warning if any | + +"**identical to 002**" is a hard contract (SC-003): byte-for-byte equal to +the pre-003 output for the same inputs. This is enforced by +`tests/contract/test_query_contract.py`. + +## 4. Rich TTY output layout + +When rendered on a TTY, the stdout stream is the concatenation of: + +1. **Summary table** — produced by the existing + `render_image_rich_table(...)`. Unchanged columns: `RUN`, `EXPERIMENT`, + `NAME`, `CONTEXT`. Unchanged header. +2. **Zero or more section blocks**, one per row in + `ImageRenderPlan.rendered_rows`: + ``` + + ▌ + + + ``` + The header line reuses the existing rich styles (`colors.RUN_HASH`, + `colors.EXPERIMENT`, `colors.METRIC_NAME`, `colors.CONTEXT_VAL`). The + image renderable is produced by `textual_image.renderable.Image`. +3. **Truncation footer** (only when `len(skipped_rows) > 0`): + ``` + ... rendered of images, use --max-images=0 for all + ``` + Exactly one line, printed after the last section block. + +Edge cases: + +- `capability.columns < 20` or `capability.rows < 9` + (so `rows//3 < 3`): skip §4 entirely; stdout matches the non-TTY mode + bytes exactly. +- Per-row decoding failure: replace the image renderable with a single + line `[image unavailable: ]`; the section block otherwise + looks the same. + +## 5. `--max-images` semantics + +- `--max-images 6` (default): render the first 6 rows; remaining rows + contribute **only** to §4's truncation footer (no section block, no + image bytes). +- `--max-images 0`: unlimited; render every row. +- `--max-images N` with `N < 0` or non-integer: usage error, exit `2`. + +Only the default-rich-TTY path consults `--max-images`. It is a hard error +to observe any `--max-images`-driven difference in `--json`, `--plain`, or +non-TTY output. + +## 6. Warning contract (stderr) + +At most one line may be written to stderr during an otherwise-successful +invocation, in either of the following shapes: + +``` +aimx: inline image rendering unavailable: +``` + +Triggered when `textual_image` / PIL cannot be imported, or when +terminal-capability detection disables image drawing on a TTY (e.g. +unrecognised `$TERM`). De-duplicated within a single process via a +module-level flag so batched invocations in Python never produce more +than one warning. + +No stderr output is produced on the non-TTY / `--json` / `--plain` paths. + +## 7. Compatibility expectations + +- Aim SDK: `>= 3.29` (covered by the existing `dev` dependency group). +- Python: `>= 3.10, < 3.13` (matches `pyproject.toml`). +- `rich >= 13.7`, `textual-image >= 0.12.0` (already declared). +- Terminal targets explicitly validated: iTerm2, Kitty, WezTerm, Ghostty + (all "auto" protocol), plus tmux-over-xterm and plain `xterm` + (fallback half-block path), plus non-TTY stdout (no image bytes at all). diff --git a/specs/003-query-images-terminal-render/data-model.md b/specs/003-query-images-terminal-render/data-model.md new file mode 100644 index 0000000..f779fc4 --- /dev/null +++ b/specs/003-query-images-terminal-render/data-model.md @@ -0,0 +1,112 @@ +# Phase 1 Data Model: Inline Terminal Image Rendering for `aimx query images` + +**Feature**: `003-query-images-terminal-render` +**Date**: 2026-04-22 + +This feature is a rendering-layer extension; it does not introduce new +persistent state, storage schemas, or Aim SDK mutations. The "data model" +below captures the **in-memory value objects** added on the rendering path +and clarifies how they interact with the existing `collect_image_series` +output. + +--- + +## 1. `TerminalCapability` + +Captures everything the renderer needs to know about the current terminal. +Resolved once per `aimx query images` invocation, before any image is +decoded. + +| Field | Type | Notes | +|----------------|----------------------------------------|-------| +| `is_tty` | `bool` | Equals `sys.stdout.isatty()`. | +| `columns` | `int` | From `shutil.get_terminal_size(fallback=(120, 24))`. | +| `rows` | `int` | Same source as `columns`. | +| `protocol` | `Literal["auto", "fallback_text", "disabled"]` | `"auto"` when `is_tty` and `textual_image` is available; `"fallback_text"` for the half-block path; `"disabled"` when non-TTY or deps missing. | +| `reason` | `str \| None` | When `protocol == "disabled"`, human-readable reason used in the one-shot stderr warning. | + +Validation rules: + +- `columns >= 20` — below this width the renderer MUST skip image drawing + and revert to metadata-only output (edge case in spec). +- `protocol == "disabled"` iff rendering is suppressed entirely (non-TTY, + `--plain`, `--json`, or missing dependency); in that case + `image_render.render_inline(...)` MUST return an empty string. + +State transitions: none — the value is immutable for the lifetime of the +invocation. + +## 2. `ImageRow` (extension, not a new type) + +The existing `collect_image_series` dict is kept as-is for JSON/plain +renderers. The rendering path reads the same dict, plus an **optional** +lazy accessor: + +| Key | Type | Notes | +|------------------|-----------------------------------|-------| +| `run` | `RunMeta` | Unchanged. | +| `name` | `str` | Unchanged. | +| `context` | `dict[str, Any]` | Unchanged. | +| `_image_accessor`| `Callable[[], PIL.Image.Image] \| None` | **New**, private; present only when rendering is possible; invoked lazily inside `image_render.render_inline`. `--json` and `--plain` MUST NOT touch this key. | + +Validation rules: + +- `_image_accessor` is `None` in non-TTY / `--json` / `--plain` runs so the + existing renderers remain byte-identical. +- Calling the accessor MUST NOT raise across the whole renderer; all + exceptions are caught and converted to `[image unavailable: …]`. + +## 3. `ImageRenderPlan` + +Pure data passed between "decide what to render" and "actually render". +Makes the cap logic (Q2/Q5) unit-testable. + +| Field | Type | Notes | +|-----------------|------------------------------------|-------| +| `capability` | `TerminalCapability` | Borrowed from §1. | +| `rendered_rows` | `list[ImageRow]` | The first `max_images` rows (or all if `max_images == 0`). | +| `skipped_rows` | `list[ImageRow]` | Rows whose metadata will still be printed but no image drawn. | +| `target_width` | `int` | `capability.columns` (or slightly less for margins). | +| `max_height` | `int` | `capability.rows // 3`; lower bound 3 cells. | +| `max_images` | `int` | Effective cap; `0` means unlimited. | + +Derived rules: + +- If `capability.protocol == "disabled"`, both `rendered_rows` and + `skipped_rows` MUST be empty and the renderer MUST short-circuit. +- `len(rendered_rows) + len(skipped_rows) == len(all_rows)`. +- `max_height >= 3` — any smaller and we skip image drawing entirely and + move the row from `rendered_rows` to `skipped_rows`. + +## 4. Invocation flag addition + +| Field on `QueryInvocation` | Type | Default | Notes | +|----------------------------|------|---------|-------| +| `max_images` | `int` | `6` | `0` → unlimited; negative values rejected at parse time. | + +Parsing rules (enforced in `parse_query_invocation`): + +- Token: `--max-images `. +- `` MUST parse as a non-negative integer; otherwise raise + `ValueError("Invalid --max-images value: ")`. +- No other new flags are introduced (per clarify Q1). + +## 5. Relationships + +``` +QueryInvocation ──────────────► ImageRenderPlan ◄────── TerminalCapability + │ │ + │ max_images, target=images │ rendered_rows/skipped_rows + ▼ ▼ + collect_image_series ────► list[ImageRow] (with _image_accessor on TTY) + │ + ▼ + render_image_rich_table ──► summary (stdout) + │ + ▼ + image_render.render_inline ──► inline images (stdout) +``` + +No persisted state. No cross-invocation cache. No shared mutable singleton +aside from the one-shot stderr warning flag guarded by a module-level +`bool`. diff --git a/specs/003-query-images-terminal-render/plan.md b/specs/003-query-images-terminal-render/plan.md new file mode 100644 index 0000000..03f26e9 --- /dev/null +++ b/specs/003-query-images-terminal-render/plan.md @@ -0,0 +1,166 @@ +# Implementation Plan: Inline Terminal Image Rendering for `aimx query images` + +**Branch**: `003-query-images-terminal-render` | **Date**: 2026-04-22 | **Spec**: [spec.md](/Users/blizhan/data/code/github/aimx/specs/003-query-images-terminal-render/spec.md) +**Input**: Feature specification from `/specs/003-query-images-terminal-render/spec.md` + +## Summary + +Extend the existing `aimx`-owned `query images` command so that, when stdout is a +TTY and the rich rendering path is in effect, `aimx` not only prints the +summary metadata table but also decodes each matched Aim image and renders it +**inline** in the terminal via `rich.Console` + `textual_image.renderable.Image`. +For `--json`, `--plain`/`--oneline`, and non-TTY stdout, behavior MUST remain +byte-for-byte identical to today — no graphics escapes, no Sixel/Kitty/iTerm2 +bytes, no extra warnings on the main output stream. The feature is purely +additive on the rendering layer: no write-capable paths, no new passthrough +coupling, no change to the Aim SDK surface we use. + +## Technical Context + +**Language/Version**: Python 3.12 for development, runtime support `>=3.10,<3.13` +**Primary Dependencies**: `rich>=13.7`, `textual-image>=0.12.0` (already declared in +`pyproject.toml`), Aim SDK from the `dev` group for tests; no new runtime deps. +**Storage**: Existing local Aim repositories (read-only). Image bytes are read +via `aim.Image.to_pil_image()` which already returns a decoded `PIL.Image`. +**Testing**: pytest with unit + integration suites; rely on the existing sample +repository rooted at `data` for end-to-end verification. +**Target Platform**: Local shell, SSH, and CI. Inline rendering only activates +on TTY stdout; everything else falls back to the current metadata-only path. +**Project Type**: Single-project CLI application (same layout as feature 002). +**Performance Goals**: Per SC-004, a query matching ≥ 50 images MUST return +within 5 seconds on a normal dev machine under default settings (6-image +render cap). +**Constraints**: Read-only; no mutation of `.aim`; MUST preserve existing +`--json` / `--plain` byte output; image rendering failures MUST NOT fail the +command (exit 0 still required when metadata output succeeds). +**Scale/Scope**: One new rendering helper module + minimal wiring in +`_run_images_query`, one new CLI flag `--max-images`, plus terminal-capability +detection. No changes to the passthrough layer. + +## Constitution Check + +*GATE: Must pass before Phase 0 research. Re-check after Phase 1 design.* + +- [x] Safe coexistence: feature only adds terminal output inside the `aimx` + rendering layer; does not touch the installed `aim` package, the native + `aim` executable, or `.aim` repository data. +- [x] Ownership boundary: scope is strictly within the already-owned + `aimx query images` surface; no new passthrough claims, no removal of + existing passthrough behavior. +- [x] Read-only default: rendering decodes image blobs in-memory only; no + writes, no caching to disk, no mutation of user data. +- [x] CLI-first contract: `--json` / `--plain` / non-TTY paths remain + unchanged and scriptable; the new TTY rendering is opt-out via those + existing mechanisms (no new toggle, per clarify Q1). +- [x] Compatibility plan: validated against the sample repo under `data` and + against Aim ≥ 3.29 (matches existing `dev` dependency); `textual-image` + is already pinned in `pyproject.toml`. + +## Project Structure + +### Documentation (this feature) + +```text +specs/003-query-images-terminal-render/ +├── plan.md +├── research.md +├── data-model.md +├── quickstart.md +├── contracts/ +│ └── cli-output.md +├── checklists/ +│ └── requirements.md +└── tasks.md # created later by /speckit.tasks +``` + +### Source Code (repository root) + +```text +src/ +└── aimx/ + ├── commands/ + │ └── query.py # extend: parse --max-images, pass capability into renderer + ├── aim_bridge/ + │ └── metric_stats.py # extend: return image sequence object (lazy access to PIL bytes) + └── rendering/ + ├── query_views.py # unchanged public shape; render_image_rich_table keeps today's columns + └── image_render.py # NEW: terminal capability probe + inline image rendering pipeline + +tests/ +├── unit/ +│ ├── test_query_helpers.py # extend: --max-images parsing, default = 6, 0 = unlimited +│ └── test_image_render.py # NEW: capability probe, size clamp (1/3 rows), graceful fallback +├── integration/ +│ └── test_query_command.py # extend: --json/--plain byte-stability, TTY vs non-TTY branches +└── contract/ + └── test_query_contract.py # extend: no graphics bytes in --json / --plain output +``` + +**Structure Decision**: Keep the existing single-project CLI layout. Add a new +`src/aimx/rendering/image_render.py` module so that `query.py` stays thin and +`query_views.py` stays pure (string-returning, no side effects). All inline +rendering flows through the new module and is invoked only on the rich TTY +branch inside `_run_images_query`. + +## Phase 0: Research Summary + +Phase 0 decisions are captured in [research.md](/Users/blizhan/data/code/github/aimx/specs/003-query-images-terminal-render/research.md). Key outcomes: + +- Use `textual_image.renderable.Image` over a `rich.Console` with + `force_terminal=True, width=` for inline draw; let `textual_image` + pick Kitty / iTerm2 / Sixel / half-block automatically based on env. +- Detect "can we draw images at all" from `sys.stdout.isatty()` + + env hints (`TERM_PROGRAM`, `KITTY_WINDOW_ID`, `WEZ_*`, etc.) once per + invocation; no active DA queries (SSH/CI safe). +- Decode image bytes via `aim.Image.to_pil_image()` (already pillow), then + resize in PIL to respect both column cap and `rows/3` cap before handing off. +- Render ordering: keep today's rich summary table, then emit one "section + block" per rendered image (header line with run · step · name · context, + blank line, image renderable). Non-rendered overflow entries print only + the header line (no image), followed by a single summary footer. +- Dependency-absence / decoding failure: print a **one-shot** warning to + stderr and continue with metadata output; exit status stays 0. + +## Phase 1: Design Summary + +- Introduce `TerminalCapability` dataclass capturing `is_tty`, `columns`, + `rows`, and a coarse protocol tag (`auto | fallback_text | disabled`). +- Introduce `ImageRenderPlan` dataclass: `(rows_with_image, skipped_count, + max_images, capability)` — pure data, easy to unit test. +- Extend `collect_image_series` to include the raw Aim image sequence handle + (or a thin accessor returning bytes / PIL image lazily) without changing + the existing dict keys consumed by JSON/plain renderers. +- Add `--max-images N` to `parse_query_invocation` (default 6, `0` = unlimited); + no other new flags per clarify Q1. +- `_run_images_query` branches: + - `--json` / `--plain`: unchanged (existing renderers only read metadata + keys, not image handles). + - rich + non-TTY stdout: unchanged (metadata table only). + - rich + TTY: call `query_views.render_image_rich_table(...)` first, then + call `image_render.render_inline(rows, capability, max_images)` and + concatenate the two strings; the latter writes image bytes via its own + `Console(file=sys.stdout, force_terminal=True)`, keeping stderr clean. +- Dependency / decode failures are converted into + `[image unavailable: ]` placeholders at the per-row level, with a + one-shot stderr warning that is de-duplicated via a module-level flag. + +## Post-Design Constitution Check + +- [x] Safe coexistence: rendering path is additive; no edits to the `aim` + package, the native executable, or `.aim` data. +- [x] Ownership boundary: all changes remain within `aimx query images`; + passthrough code paths are untouched. +- [x] Read-only default: only `to_pil_image()` on in-memory blobs; no file + writes, no SDK mutation calls. +- [x] CLI-first contract: `--json` / `--plain` / non-TTY paths produce the + same bytes as before (SC-003); the new TTY behavior degrades to + text-only on plain ANSI terminals (SC-002). +- [x] Compatibility: relies only on already-declared deps (`rich`, + `textual-image`, Aim ≥ 3.29 via dev group) and on `PIL` which is a + transitive dep of Aim. + +## Complexity Tracking + +No constitution violations; no exceptional complexity requires justification. +The only nuance is the graceful degradation matrix, which is addressed via +a single new module (`image_render.py`) with focused unit tests. diff --git a/specs/003-query-images-terminal-render/quickstart.md b/specs/003-query-images-terminal-render/quickstart.md new file mode 100644 index 0000000..814a8b7 --- /dev/null +++ b/specs/003-query-images-terminal-render/quickstart.md @@ -0,0 +1,98 @@ +# Quickstart: Inline Terminal Image Rendering for `aimx query images` + +**Feature**: `003-query-images-terminal-render` +**Date**: 2026-04-22 + +This quickstart shows how to try the new inline image rendering end-to-end +against the sample Aim repository at `data/` in this workspace. + +--- + +## 1. Prerequisites + +No new installs are required. The runtime deps (`rich`, `textual-image`) +are already in `pyproject.toml`, and the sample repo under `data/` already +contains image-tracked runs (used by feature 002 tests). + +Sync the environment once: + +```bash +uv sync +``` + +## 2. Run the query in a modern terminal (iTerm2 / Kitty / WezTerm / Ghostty) + +```bash +uv run aimx query images "images" --repo data +``` + +Expected: + +- The existing rich summary table is printed at the top (same columns as + feature 002: `RUN`, `EXPERIMENT`, `NAME`, `CONTEXT`). +- Up to 6 images are rendered inline as **section blocks** under the table, + each preceded by a single-line metadata header. +- If more than 6 matches exist, a footer line appears: + `... rendered 6 of M images, use --max-images=0 for all`. + +## 3. Raise or remove the render cap + +```bash +# Render up to 20 images +uv run aimx query images "images" --repo data --max-images 20 + +# Render all matching images (use with care on large repos) +uv run aimx query images "images" --repo data --max-images 0 +``` + +## 4. Verify scripting paths are still byte-clean + +```bash +# JSON output MUST contain zero image bytes +uv run aimx query images "images" --repo data --json | jq . + +# Plain / oneline output MUST stay one line per match +uv run aimx query images "images" --repo data --plain + +# Redirecting stdout disables inline rendering automatically +uv run aimx query images "images" --repo data > /tmp/out.txt +grep -c $'\x1b' /tmp/out.txt # should print 0 for the default (rich goes through isatty) +``` + +## 5. Verify graceful fallback on a plain ANSI terminal + +In a non-graphics terminal (e.g. stock `xterm`, `tmux` without +passthrough, or any SSH session without Kitty/iTerm2 protocol support): + +```bash +uv run aimx query images "images" --repo data +``` + +Expected: + +- The summary table prints exactly as in modern terminals. +- The section blocks render via half-block characters (coarser, but legible). +- Exit code is `0`. + +## 6. Verify failure isolation + +- If `textual_image` is uninstalled (for example inside an unusual + environment), the first invocation prints one line to stderr + (`aimx: inline image rendering unavailable: `) and continues to + print the summary table with exit `0`. Subsequent invocations in the + same Python process do not re-warn. +- If a single image's blob is corrupted, the matching section block shows + `[image unavailable: ]` while every other block renders + normally. Exit code remains `0`. + +## 7. Automated validation + +```bash +uv run pytest tests/unit/test_image_render.py -q +uv run pytest tests/unit/test_query_helpers.py -q +uv run pytest tests/integration/test_query_command.py -q +uv run pytest tests/contract/test_query_contract.py -q +``` + +The contract suite is the guardrail for SC-003: any accidental image-byte +leak into `--json` / `--plain` / non-TTY output will be caught there. diff --git a/specs/003-query-images-terminal-render/research.md b/specs/003-query-images-terminal-render/research.md new file mode 100644 index 0000000..cf2dc47 --- /dev/null +++ b/specs/003-query-images-terminal-render/research.md @@ -0,0 +1,138 @@ +# Phase 0 Research: Inline Terminal Image Rendering for `aimx query images` + +**Feature**: `003-query-images-terminal-render` +**Date**: 2026-04-22 + +This document captures the Phase 0 decisions that resolve every implicit +unknown in the Technical Context of `plan.md`. All `[NEEDS CLARIFICATION]` +items from the spec were already resolved in the 2026-04-22 clarify session. + +--- + +## R-001 — Rendering library + +- **Decision**: Use `textual_image.renderable.Image` driven by a + `rich.Console` instance (`force_terminal=True, width=`). +- **Rationale**: `textual_image` already probes and selects the best + available image protocol (Kitty graphics, iTerm2 inline, Sixel) and + automatically falls back to half-block ANSI on plain terminals. Using it + as a `rich` Renderable lets us reuse the existing `Console` plumbing in + `src/aimx/rendering/`, keep styling consistent, and avoid a second output + abstraction. Both `rich>=13.7` and `textual-image>=0.12.0` are already + declared in `pyproject.toml`, so no new runtime dependency is introduced. +- **Alternatives considered**: + - `term-image` / `climage`: older APIs, weaker Kitty/iTerm2 coverage, + extra dependency. + - Hand-rolled half-block renderer on top of PIL + ANSI: works only on the + fallback path, doesn't give us iTerm2/Kitty upgrades "for free". + - Writing Sixel directly: narrow terminal support, brittle on tmux. + +## R-002 — Terminal capability detection + +- **Decision**: Detect capability **passively** using `sys.stdout.isatty()` + plus a small env-var classification (`TERM_PROGRAM`, `KITTY_WINDOW_ID`, + `WEZTERM_EXECUTABLE`, `GHOSTTY_RESOURCES_DIR`, `VTE_VERSION`, `TERM`). + No active Device-Attribute (DA) queries. +- **Rationale**: DA queries block on stdin reads and can hang over SSH or + in CI runners with no controlling TTY. Passive detection is deterministic, + testable, and matches what `textual_image` itself does internally. +- **Alternatives considered**: + - DA query: rejected for SSH/CI hang risk (violates CA-003). + - "Always try graphics, let terminal gracefully ignore": Sixel garbage + characters leak onto plain terminals and tmux. + +## R-003 — Image decoding source + +- **Decision**: Consume Aim images via `aim.Image.to_pil_image()` (returns a + decoded `PIL.Image.Image`); do **not** touch the raw `BLOB` storage + directly. Resize with PIL before handing the bitmap to `textual_image`. +- **Rationale**: `to_pil_image()` is the stable public path; it handles + multi-frame / palette / RGBA / grayscale transparently. PIL is already a + transitive dependency of Aim, so no additional package is required. +- **Alternatives considered**: + - Read `storage['data']` BLOB directly and decode manually: duplicates + Aim internals, risks breakage across Aim minor versions. + - Skip PIL resize and rely on `textual_image` downsampling only: loses + fine-grained control over the row-count cap from clarify Q5. + +## R-004 — Sizing strategy (confirmed by clarify Q5) + +- **Decision**: Equal-ratio downscale so that both of the following hold: + `target_width ≤ terminal_columns` and + `target_height ≤ terminal_rows // 3` (height measured in character cells, + with one cell ≈ two pixels of vertical resolution when the half-block + fallback is active). +- **Rationale**: A 1/3-screen-height cap guarantees that, even with the + default `--max-images=6`, at most 2 images fully share a screen with + their metadata headers, so users can visually compare adjacent steps + without scrolling. It also bounds the worst-case Sixel payload. +- **Alternatives considered**: no height cap (risk of a single tall image + pushing metadata off-screen); 1/2 cap (too generous for 24-row SSH + sessions); fixed pixel cap (ignores terminal cell size, unstable across + resolutions). + +## R-005 — Output layout (confirmed by clarify Q3) + +- **Decision**: Emit the existing `render_image_rich_table` summary **first**; + follow it with one section block per rendered image consisting of a + single-line metadata header (`▌ `) + and the image renderable beneath it; close with a one-line footer when + the `--max-images` cap truncates the list. +- **Rationale**: Keeping today's summary table intact preserves the + existing machine-friendly eye-scan and the measured columns layout. The + per-item section blocks are simpler to implement than embedding + Renderables inside a rich `Table` cell (the `textual_image` Renderable + does not currently negotiate cell sizing reliably inside tables). +- **Alternatives considered**: inline as a "Preview" column (blocked on + `textual_image` + rich `Table` cell-size interaction); replace the + summary table entirely (regresses on SC-003 because existing users may + have grown accustomed to the table output). + +## R-006 — Default `--max-images` cap (confirmed by clarify Q2) + +- **Decision**: Default to **6**; `--max-images 0` means unlimited. +- **Rationale**: Fits 80–200-column terminals without scrolling; leaves + room for the metadata table above and the 1/3-row image cap below. + Keeps SC-004 (≤ 5 s on ≥ 50-match queries) comfortably in reach because + worst-case decoding + rendering cost scales with the cap, not with the + total match count. + +## R-007 — Opt-out policy (confirmed by clarify Q1) + +- **Decision**: No new toggle. Users who want "no inline images" use any + of the existing mechanisms: `--plain`, `--json`, or redirect stdout to a + file/pipe (breaks the `isatty()` check). `--max-images 0` is for + unlimited rendering, not for disabling it. +- **Rationale**: Keeps the CLI surface minimal; avoids the tri-state + `--images={auto,always,never}` pattern that invites drift from the + actual TTY capability. + +## R-008 — Warning channel (confirmed by clarify Q4) + +- **Decision**: Dependency-missing or decode warnings go to **stderr** + exactly once per process; de-duplicated via a module-level flag. +- **Rationale**: Keeps stdout byte-clean for SC-003, matches + `aimx`'s existing error channel (`_run_*_query` writes errors to stderr + at the CLI entrypoint), and follows Unix convention. +- **Alternatives considered**: stdout with `# warning:` prefix (pollutes + pipes); silent-only behind `--verbose` (fails discoverability: users + would not know why the feature did nothing on a bad install). + +## R-009 — Failure isolation + +- **Decision**: Per-row decoding failures render as + `[image unavailable: ]` in the section block; the + surrounding command continues and still exits 0 provided the metadata + path succeeded. Only true command-level failures (unparseable args, repo + not found) keep the existing non-zero exit. +- **Rationale**: FR-004 requires one bad image not to break the run. + Localizing the placeholder inline makes it obvious which row failed + without forcing users to cross-reference logs. + +## R-010 — Scope boundaries + +- **Decision**: First implementation covers **single-frame still images + only**. If an Aim image sequence contains multi-frame / animated data, + render only the first frame; document this in quickstart. +- **Rationale**: Per spec Assumptions. Animated rendering has different + capability requirements (timing, clearing) and would inflate scope. diff --git a/specs/003-query-images-terminal-render/spec.md b/specs/003-query-images-terminal-render/spec.md new file mode 100644 index 0000000..4903e72 --- /dev/null +++ b/specs/003-query-images-terminal-render/spec.md @@ -0,0 +1,119 @@ +# Feature Specification: Inline Terminal Image Rendering for `aimx query images` + +**Feature Branch**: `003-query-images-terminal-render` +**Created**: 2026-04-22 +**Status**: Draft +**Input**: User description: "`aimx query images` 命令通过 textual_image 支持直接显示图片 — rich.Console + textual_image.renderable.Image" + +## Clarifications + +### Session 2026-04-22 + +- Q: 如何**完全禁用**内联图片渲染?是否新增专门开关? → A: 不新增开关;完全禁用图片复用既有机制(`--plain` / `--json` / 重定向非 TTY),`--max-images` 仅用于调上限。 +- Q: 默认 `--max-images` 上限应设为多少? → A: 6 张(默认值)。 +- Q: 图片渲染与元数据行的版面关系? → A: 保留顶部 rich 汇总表格,在表格之后按每条匹配分段打印"元数据头 + 下方图片"块。 +- Q: 依赖缺失时 warning 的输出通道? → A: 仅写到 stderr,保持 stdout 干净。 +- Q: 单张图片的高度上限策略? → A: 宽 ≤ 终端列数且高 ≤ 终端行数的 1/3,等比缩放取较严者。 + +## User Scenarios & Testing *(mandatory)* + +### User Story 1 - View Matching Images Inline In Terminal (Priority: P1) + +作为一名在本地或通过 SSH 连接到训练机的用户,我在运行 `aimx query images "" --repo data` 时,希望除了当前已有的元数据表格/摘要外,`aimx` 能**直接在终端里把匹配到的图片内容渲染出来**,而不是只打印图片的 run/step/name 引用。这样我不必先把图片复制到本地再用看图工具打开,就能肉眼判断训练产物是否符合预期。 + +**Why this priority**: 这是本次 feature 的核心价值。`aimx query images` 的查询结果(元数据)早已可用;让结果"可视化"是区别于 `aim` 原生 CLI 的关键能力,直接决定本 feature 是否值得存在。 + +**Independent Test**: 使用仓库根目录下 `data` 中的测试 Aim 仓库,运行 `aimx query images "images" --repo data`;在支持图片协议(如 iTerm2 / Kitty / WezTerm / Ghostty)或回退到 ANSI 半块字符的终端里,可以观察到每一条匹配项下方或旁边出现对应图片的可视渲染。 + +**Acceptance Scenarios**: + +1. **Given** 仓库 `data` 包含至少一条带图片的 run,**When** 用户在支持图形协议的终端中运行 `aimx query images "images" --repo data`,**Then** 终端里每条匹配结果旁边显示出该图片的可视化渲染,且查询元数据(run、step、name、size)仍然可见。 +2. **Given** 用户使用不支持任何图形协议的普通终端(仅 ANSI),**When** 运行同一条命令,**Then** `aimx` 使用字符级回退(例如半块字符 / 灰度 ASCII)渲染图片,命令退出码仍为 `0`,不输出 `traceback`。 +3. **Given** 匹配结果为空,**When** 运行查询,**Then** 输出与原有 `aimx query images` 的空结果行为一致,不尝试渲染任何图片。 + +--- + +### User Story 2 - Suppress Image Rendering When Non-Interactive Or Machine-Readable (Priority: P1) + +作为把 `aimx query images` 嵌入脚本或 CI 的用户,我希望在 `--json`、`--plain`/`--oneline` 或 stdout 非 TTY 的情况下,**绝对不要**插入任何用于绘图的转义序列或半块字符,以免污染下游管道/日志。 + +**Why this priority**: 违反这一点会直接破坏现有使用该命令做自动化的调用方,属于不可回归的契约。 + +**Independent Test**: 运行 `aimx query images "images" --repo data --json | jq .`、`aimx query images ... --plain > out.txt`、以及把输出重定向到文件,检查产物中不包含任何 ANSI 图形/半块/Kitty/iTerm2 图片协议字节序列。 + +**Acceptance Scenarios**: + +1. **Given** 用户传入 `--json`,**When** 运行查询,**Then** 输出为一个合法 JSON 文档,且**不含**任何图片可视化内容。 +2. **Given** 用户传入 `--plain`(或 `--oneline`),**When** 运行查询,**Then** 每行输出保持单行文本、不含图片字节序列。 +3. **Given** stdout 被重定向到文件或管道(`isatty()` 为 False),**When** 未显式开启渲染,**Then** 不渲染图片,行为与当前版本保持一致。 +4. **Given** 用户传入 `--no-color`,**When** 运行查询,**Then** 继续按 User Story 1 的渲染路径工作,但不使用彩色元数据(是否渲染图片取决于 TTY,不受 `--no-color` 影响)。 + +--- + +### User Story 3 - Bounded Output For Large Result Sets (Priority: P2) + +作为一次可能匹配到数十/数百条图片的用户,我不希望终端被一次性灌满上千张图片,而是默认只渲染一个可控数量的代表性样本,其余仅保留元数据行并提示"已省略 N 张"。 + +**Why this priority**: 避免默认行为在大结果集上导致终端卡死或 SSH 断连;是可用性底线,但只要 P1 就绪就可独立交付。 + +**Independent Test**: 人为构造一个匹配 ≥ 50 条图片的查询,确认默认只渲染前 N 条(N 见 Assumptions),其余只打印元数据并在尾部出现省略提示;通过显式上限标志(见 FR-008)可调整或关闭该上限。 + +**Acceptance Scenarios**: + +1. **Given** 查询匹配到超过默认上限的图片,**When** 按默认参数运行,**Then** 仅前若干条渲染图像,末尾打印形如 `... rendered N of M images, use --max-images=0 for all` 的摘要。 +2. **Given** 用户显式传入 `--max-images 0`,**When** 运行查询,**Then** 所有匹配图片全部渲染(用户自担风险)。 + +--- + +### Edge Cases + +- 图片 blob 缺失 / 损坏 / 解码失败:跳过该条渲染,行内追加简短错误标记(如 `[image unavailable]`),不影响其他条目与退出码。 +- 终端宽度极窄(< 20 列):降级为纯元数据输出,不尝试绘图。 +- `textual_image` 或其依赖(PIL 等)运行时不可用:命令仍能完成元数据输出,首行打印一次性警告,后续不再刷屏。 +- 非 RGB 图片(灰度、RGBA、P 调色板):统一转换后渲染,不抛异常。 +- 图片尺寸极大(如 >4K):在渲染前按终端 cell 尺寸缩放到合理宽度(见 FR-006)。 +- `--steps` 过滤后剩余 0 条:不渲染,不报错。 + +## Constitution Alignment *(mandatory)* + +- **CA-001 Safety & Mutability**: 本 feature 只新增"展示层"能力,**纯只读**。不写入 `.aim` 数据,不改动已安装的 `aim` 包;仅在 `aimx` 自己的渲染路径中新增输出。因此无需额外的 opt-in 风险提示。 +- **CA-002 Ownership Boundary**: `aimx query images` 已归属 `aimx` 实现(见 `src/aimx/commands/query.py`)。本 feature 仍限定在 `aimx` 侧的 rendering 层扩展,不转译或代理任何 `aim` 原生子命令,不改变 passthrough 的边界。 +- **CA-003 CLI & Output Contract**: 人类可读模式(默认,TTY)新增内联图片;`--json` 与 `--plain`/`--oneline` 输出契约保持不变(不含图片字节);非 TTY 默认退回到无图模式。继续满足 shell/SSH/CI 三种使用形态。 + +## Requirements *(mandatory)* + +### Functional Requirements + +- **FR-001**: 当且仅当 `aimx query images` 在默认(rich)渲染路径下运行、且 stdout 为 TTY 时,系统 MUST 以内联方式在匹配行附近渲染图片内容。 +- **FR-002**: 当用户传入 `--json` 或 `--plain`/`--oneline`,或 stdout 非 TTY 时,系统 MUST NOT 输出任何图片字节/图形转义序列,行为与当前 `aimx query images` 保持完全一致。 +- **FR-003**: 系统 MUST 优先探测并使用终端原生图片协议(典型如 iTerm2、Kitty、Sixel),在不支持时 MUST 自动回退到纯文本/半块字符渲染,保证在任意 ANSI 终端可用。 +- **FR-004**: 图片渲染失败(解码失败、blob 缺失、依赖不可用等)MUST NOT 使整条查询命令失败;受影响的条目以简短占位信息呈现,其余条目照常输出。 +- **FR-005**: 系统 MUST 先输出现有的 rich 汇总表格(run、step、context、name、shape 等),随后在表格下方按每条匹配项分段打印"元数据头 + 对应图片"块。图片渲染不得改变或覆盖汇总表格的可见性。 +- **FR-006**: 系统 MUST 在渲染前按图片原始宽高比**等比缩放**,同时满足以下两个上限,取较严者:(a) 目标宽度 ≤ 当前终端列数;(b) 目标高度 ≤ 当前终端行数的 1/3(按字符 cell 估算)。避免任何单张图片独占一屏或触发横向换行撕裂。 +- **FR-007**: 默认行为 MUST 对大结果集做数量上限保护:匹配图片数超过默认上限(**6 张**)时,只渲染前 6 张,其余以元数据行呈现,并在结尾追加一次性摘要提示(形如 `... rendered 6 of M images, use --max-images=0 for all`)。 +- **FR-008**: 系统 MUST 提供一个命令行开关 `--max-images N` 以调整 FR-007 的上限(`N=0` 表示不限)。系统 MUST NOT 引入专门用于关闭内联渲染的新开关;用户若要在 TTY 下完全不渲染图片,应使用既有的 `--plain` / `--json`,或将 stdout 重定向到管道/文件。 +- **FR-009**: 系统 MUST 保证在依赖 `textual_image` 或其运行时依赖缺失的环境下,命令主流程(元数据输出与退出码)不受影响;缺失信息以**一次性 warning 写到 stderr**(不进入 stdout),同一进程内不重复打印。 + +### Key Entities *(include if feature involves data)* + +- **Image Query Row**: 既有实体,新增一个可选的"已解码图片位图引用"字段,供渲染层消费;该字段不进入 `--json` 输出。 +- **Terminal Capability**: 描述当前 stdout 对应终端的绘图能力(协议支持、列/行数),在命令启动时一次性探测,供后续分支决策使用。 + +## Success Criteria *(mandatory)* + +### Measurable Outcomes + +- **SC-001**: 在支持图形协议的现代终端(iTerm2 / Kitty / WezTerm / Ghostty 任一)中,用户首次运行 `aimx query images "images" --repo data` 时,**无需任何额外参数**即可看到图片内容;达标率 100%(该类终端上)。 +- **SC-002**: 在只支持 ANSI 的通用终端(tmux 默认配置、常规 ssh-only 场景)中,同一命令 MUST 在不报错的前提下输出字符级图像回退,命令退出码为 `0`。 +- **SC-003**: `aimx query images ... --json` 的输出字节与本 feature 引入前的输出字节**逐字节一致**;同样,`--plain` 与重定向到文件的输出不含任何新增的控制序列。 +- **SC-004**: 对匹配 ≥ 50 张图片的仓库,默认命令在普通开发机上 5 秒内返回完毕并停止继续渲染(默认上限生效)。 +- **SC-005**: 在缺失 `textual_image` / PIL 依赖的环境,命令仍能输出元数据表格、退出码为 `0`,并在顶部打印一条关于"内联图片渲染不可用"的警告。 + +## Assumptions + +- 默认匹配上限由 FR-007 固定为 6 张;可通过 `--max-images` 覆盖(`0` 表示不限)。 +- 渲染实现基于已在 User Query 中提出的 `rich.Console + textual_image.renderable.Image` 组合;`textual_image` 将作为 `aimx` 的新运行时依赖加入 `pyproject.toml`(实现阶段处理,不在本 spec 范围)。 +- 终端能力探测仅基于环境变量(`TERM`、`TERM_PROGRAM`、`KITTY_WINDOW_ID` 等)和 `stdout.isatty()`,不做主动的 DA/Device Attribute 查询(避免阻塞 SSH/CI 场景)。 +- 图片 blob 的加载仍沿用既有 `aimx.aim_bridge` 的只读访问路径,不新增对 Aim 内部 API 的写入或变更。 +- 首版只覆盖**单帧静态图片**;多帧序列 / GIF / 视频渲染不在本 feature 范围。 +- 颜色深度以终端支持的最大真彩色为准;`--no-color` 仅影响元数据样式,不强制禁用图片渲染。 diff --git a/specs/003-query-images-terminal-render/tasks.md b/specs/003-query-images-terminal-render/tasks.md new file mode 100644 index 0000000..681f066 --- /dev/null +++ b/specs/003-query-images-terminal-render/tasks.md @@ -0,0 +1,250 @@ +# Tasks: Inline Terminal Image Rendering for `aimx query images` + +**Input**: Design documents from `/specs/003-query-images-terminal-render/` +**Prerequisites**: plan.md (required), spec.md (required for user stories), research.md, data-model.md, contracts/cli-output.md, quickstart.md + +**Tests**: Test tasks are included because feature 003 carries hard, testable +CLI contracts that the constitution requires us to guard (read-only +behavior, CLI-first output stability under `--json`/`--plain`/non-TTY, +graceful failure isolation). Contract tests are the primary safety net for +SC-003 byte-stability. + +**Organization**: Tasks are grouped by user story. US1 and US2 are both +priority P1 (US2 is the "do no harm" guardrail that keeps scripting +workflows working); US3 is P2 (bounded output for large result sets). + +## Format: `[ID] [P?] [Story] Description` + +- **[P]**: Can run in parallel (different files, no dependencies on incomplete tasks) +- **[Story]**: Which user story this task belongs to (US1, US2, US3) +- Include exact file paths in descriptions + +## Path Conventions + +Single-project CLI layout (matches feature 002): +- Source: `src/aimx/` +- Tests: `tests/unit/`, `tests/integration/`, `tests/contract/` + +--- + +## Phase 1: Setup (Shared Infrastructure) + +**Purpose**: Confirm toolchain and dependencies are ready. No new runtime +packages are introduced; both `rich>=13.7` and `textual-image>=0.12.0` are +already declared in `pyproject.toml`. + +- [x] T001 Run `uv sync` at repo root to confirm `rich`, `textual-image`, `aim`, and `pytest` are resolvable; verify `uv run python -c "import textual_image, PIL, aim"` exits 0 + +--- + +## Phase 2: Foundational (Blocking Prerequisites) + +**Purpose**: Ground the new rendering module and the shared invocation +surface. Every user story depends on this phase. + +**⚠️ CRITICAL**: No user story work can begin until this phase is complete. + +- [x] T002 [P] Create `src/aimx/rendering/image_render.py` with module skeleton: frozen `TerminalCapability` dataclass (fields per data-model.md §1), frozen `ImageRenderPlan` dataclass (fields per data-model.md §3), public function stubs `detect_capability() -> TerminalCapability`, `plan_render(rows, capability, max_images) -> ImageRenderPlan`, `render_inline(plan) -> str`, and a module-level `_WARNED = False` flag for one-shot stderr warnings. All stubs raise `NotImplementedError` for now. +- [x] T003 [P] Extend `collect_image_series` in `src/aimx/aim_bridge/metric_stats.py` so each returned dict also carries a private key `_image_accessor: Callable[[], PIL.Image.Image] | None` that lazily calls `image.to_pil_image()` on the underlying Aim image object; keep the existing keys (`run`, `name`, `context`) byte-identical so `render_image_json` / `render_image_oneline` / `render_image_rich_table` remain untouched. +- [x] T004 Extend `parse_query_invocation` in `src/aimx/commands/query.py` to accept `--max-images `: store on `QueryInvocation` as `max_images: int` with default `6`; reject `N < 0` or non-integer values with a `ValueError` ("Invalid --max-images value: …") so `run_query_command` maps it to exit status `2`. Update the `Usage:` string printed on bad args. + +**Checkpoint**: Foundation ready — user story implementation can now proceed. + +--- + +## Phase 3: User Story 1 — View Matching Images Inline In Terminal (Priority: P1) 🎯 MVP + +**Goal**: When stdout is a TTY and the default rich path is in effect, +render each matched image inline under the existing summary table, using +`rich.Console` + `textual_image.renderable.Image`, with graceful half-block +fallback on plain ANSI terminals. + +**Independent Test**: `uv run aimx query images "images" --repo data` in +iTerm2/Kitty/WezTerm/Ghostty prints the existing summary table followed by +per-row section blocks whose inline image renderables are observable (visible +pixels + non-empty bytes). The same command in a dumb terminal still exits +`0` with half-block fallback output. + +### Tests for User Story 1 + +- [x] T005 [P] [US1] Unit tests in `tests/unit/test_image_render.py`: cover `detect_capability()` (envvar matrix: iTerm2/Kitty/WezTerm/Ghostty → `protocol=="auto"`; plain xterm → `"fallback_text"`; non-TTY / tiny width → `"disabled"`), `plan_render()` clamp rules (`max_height = max(rows // 3, 3)`, `columns < 20` short-circuits to `disabled`), and lazy `_image_accessor` failure → `[image unavailable: ...]` placeholder. +- [x] T006 [P] [US1] Integration test in `tests/integration/test_query_command.py`: run `run_query_command(["images", "images", "--repo", "data"])` with `sys.stdout` monkey-patched to a pseudo-TTY; assert output contains the existing rich table header AND at least one `▌ ` section header AND at least one byte that is ESC-prefixed (or half-block `▀`/`▄`); exit status is `0`. + +### Implementation for User Story 1 + +- [x] T007 [US1] Implement `detect_capability()` in `src/aimx/rendering/image_render.py`: `is_tty = sys.stdout.isatty()`, read `shutil.get_terminal_size(fallback=(120, 24))`, classify protocol by env vars (`TERM_PROGRAM` in {iTerm.app}, `KITTY_WINDOW_ID`, `WEZTERM_EXECUTABLE`, `GHOSTTY_RESOURCES_DIR`, `VTE_VERSION`, fallback to `TERM`); return `"disabled"` when `not is_tty`, when `columns < 20`, or when `import textual_image.renderable` fails. Populate `reason` on disabled paths. +- [x] T008 [US1] Implement pure `plan_render(rows, capability, max_images)` in `src/aimx/rendering/image_render.py`: short-circuit to empty plan when `capability.protocol == "disabled"`; compute `max_height = max(capability.rows // 3, 3)`; split `rows` into `rendered_rows[:N]` / `skipped_rows[N:]` using `max_images` (0 = unlimited). Do NOT invoke `_image_accessor` here — this is a pure data step. +- [x] T009 [US1] Implement `render_inline(plan) -> str` in `src/aimx/rendering/image_render.py`: construct a `Console(file=StringIO(), force_terminal=True, width=capability.columns, highlight=False)`; for each `rendered_rows` item, print a blank line, then a single-line header `▌ ` using the same `colors.*` palette as `query_views.py`, then the `textual_image.renderable.Image` built from `_image_accessor()` resized to `(target_width, max_height * 2)` pixels (half-block convention). Per-row exceptions → `[image unavailable: ]` single line; do NOT raise. +- [x] T010 [US1] Wire the rich+TTY branch in `_run_images_query` in `src/aimx/commands/query.py`: call the existing `render_image_rich_table(...)` for the summary string; if `capability.protocol != "disabled"`, call `render_inline(plan_render(...))` and concatenate; return both as the `output` of `QueryCommandResult`. Non-TTY / `--json` / `--plain` branches remain literally unchanged (no new code path). + +**Checkpoint**: US1 complete — inline rendering works end-to-end on modern terminals and half-block fallback, summary table still visible above. + +--- + +## Phase 4: User Story 2 — Keep Scripting & Machine-Readable Paths Byte-Clean (Priority: P1) + +**Goal**: Guarantee that `--json`, `--plain`/`--oneline`, `--no-color`, and +any non-TTY stdout path produce **byte-identical** output to the pre-003 +baseline, and that dependency/decoding issues surface as **one** single +line on stderr without ever leaking into stdout. + +**Independent Test**: `aimx query images "images" --repo data --json | jq .` +produces valid JSON with no ANSI/image bytes. `aimx query images ... --plain` +produces only tab-separated lines. `aimx query images ... > out.txt` (i.e. +non-TTY) contains zero ESC / Kitty / iTerm2 / Sixel markers. If +`textual_image` fails to import, the command still exits `0` and emits +exactly one line on stderr. + +### Tests for User Story 2 + +- [x] T011 [P] [US2] Contract test in `tests/contract/test_query_contract.py`: run `run_query_command(["images", "images", "--repo", "data", "--json"])` and assert the returned `output` is valid JSON, byte-identical to a captured pre-003 baseline (store baseline as a constant at top of the test), and contains no `\x1b`, `\x90`, `\x9d`, Kitty `_G` sequences, or iTerm2 `]1337;File=` markers. +- [x] T012 [P] [US2] Contract test in `tests/contract/test_query_contract.py`: run the same command with `--plain`; assert each non-empty line is a single row, no `\x1b`, no binary bytes outside printable ASCII + tab. +- [x] T013 [P] [US2] Contract test in `tests/contract/test_query_contract.py`: run `run_query_command(["images", "images", "--repo", "data"])` with `sys.stdout` swapped to a plain `io.StringIO` (simulating redirection / non-TTY); assert the produced string contains zero image-protocol byte patterns AND matches the pre-003 baseline for the rich summary exactly. +- [x] T014 [P] [US2] Contract test in `tests/contract/test_query_contract.py`: run the command with `--max-images 3`, `--max-images 0`, and `--max-images 100` under `--json` and `--plain`; assert all three produce the **same** bytes as the default (proves `--max-images` has zero effect off the TTY path). +- [x] T015 [P] [US2] Unit test in `tests/unit/test_image_render.py`: simulate `textual_image` import failure (monkey-patch `sys.modules`); call `detect_capability()` → `protocol == "disabled"`, `reason` is populated; call a public `warn_once(message)` helper twice in the same process and assert `sys.stderr` receives exactly one line. + +### Implementation for User Story 2 + +- [x] T016 [US2] Implement `warn_once(message: str) -> None` in `src/aimx/rendering/image_render.py`: module-level `_WARNED` flag gates a single `print(f"aimx: inline image rendering unavailable: {message}", file=sys.stderr)`; idempotent across repeated calls within one process. Wire it into both the import-failure branch in `detect_capability()` and the catch-all exception handler in `render_inline()`. +- [x] T017 [US2] Harden the TTY gate in `_run_images_query` (`src/aimx/commands/query.py`): explicitly short-circuit BEFORE calling into `image_render` when `invocation.output_json` or `invocation.plain` is True, or when `detect_capability().protocol == "disabled"`, ensuring no `textual_image` import happens on those paths. Add an assertion in the `--json` and `--plain` branches that the returned `output` string contains no `\x1b` byte (wrapped in `__debug__` so it's stripped from `-O` builds). + +**Checkpoint**: US2 complete — SC-003 enforced by contract tests; stderr +warning contract (R-008) enforced by unit tests. + +--- + +## Phase 5: User Story 3 — Bounded Output For Large Result Sets (Priority: P2) + +**Goal**: When a query matches many images, render only up to `--max-images` +by default (6), print metadata for the rest, and emit a single-line +truncation footer so users aren't surprised by a flood of output. + +**Independent Test**: Point `aimx query images` at a fabricated result set +with ≥ 8 matches (helper fixture fabricates `ImageRow` dicts with synthetic +PIL images); default run draws 6 and prints +`... rendered 6 of 8 images, use --max-images=0 for all` at the end; +`--max-images 0` draws all 8 and omits the footer; `--max-images 3` draws 3 +and footer reads `... rendered 3 of 8 images, ...`. + +### Tests for User Story 3 + +- [x] T018 [P] [US3] Unit tests in `tests/unit/test_query_helpers.py`: `--max-images` parsing — default `6`, `0` → unlimited marker, `--max-images 12` → `12`, `--max-images -1` → `ValueError`, `--max-images abc` → `ValueError`, missing value → `ValueError`. +- [x] T019 [P] [US3] Integration test in `tests/integration/test_query_command.py`: build an in-memory list of 8 synthetic `ImageRow` dicts with tiny 4×4 PIL images; call `plan_render(rows, fake_capability, max_images=6)` → `len(rendered_rows) == 6`, `len(skipped_rows) == 2`; call with `max_images=0` → all 8 rendered, 0 skipped; call with `max_images=3` → 3/5. +- [x] T020 [P] [US3] Integration test in `tests/integration/test_query_command.py`: assert `render_inline(plan_with_skipped)` ends with exactly one line matching `... rendered of images, use --max-images=0 for all` and that `render_inline(plan_with_zero_skipped)` does NOT contain that pattern. + +### Implementation for User Story 3 + +- [x] T021 [US3] Finalize `plan_render` split in `src/aimx/rendering/image_render.py` per data-model.md §3: when `max_images == 0` or `len(rows) <= max_images`, `skipped_rows = []`; otherwise `rendered_rows = rows[:max_images]`, `skipped_rows = rows[max_images:]`. Treat `max_images == 0` as the "unlimited" sentinel explicitly (not as "zero rendered"). +- [x] T022 [US3] In `render_inline` in `src/aimx/rendering/image_render.py`, when `plan.skipped_rows` is non-empty, append after the last section block exactly one line: `... rendered {len(plan.rendered_rows)} of {len(plan.rendered_rows) + len(plan.skipped_rows)} images, use --max-images=0 for all`. The line MUST NOT be emitted when `skipped_rows` is empty. No duplicate emission on repeat calls. +- [x] T023 [US3] Pass `invocation.max_images` through from `_run_images_query` in `src/aimx/commands/query.py` to `plan_render(...)`. Keep the default (`6`) consistent with `parse_query_invocation`; do NOT hardcode `6` in `image_render`. + +**Checkpoint**: All three user stories deliverable independently. MVP = US1 + US2; US3 adds the safety cap for large repositories. + +--- + +## Phase 6: Polish & Cross-Cutting Concerns + +- [x] T024 [P] Update `README.md` with a short "Inline image preview" section (2–4 sentences) referencing `specs/003-query-images-terminal-render/quickstart.md`; mention default `--max-images=6` and that `--json` / `--plain` paths are unchanged. +- [x] T025 [P] Update `specs/003-query-images-terminal-render/checklists/requirements.md` to mark the `No [NEEDS CLARIFICATION] markers remain` item fully closed (already closed in clarify, but re-verify), and confirm the Feature Readiness section all check. +- [x] T026 Run `uv run pytest -q` and ensure the full suite is green (new and existing). Any regression in the 002 query contract counts as a blocker for this feature. +- [x] T027 Execute `specs/003-query-images-terminal-render/quickstart.md` sections 2–6 manually against the repository under `data/`, in a modern terminal; record any deviations as follow-up issues. Smoke-check SSH / `TERM=dumb` fallback if accessible. + +--- + +## Dependencies & Execution Order + +### Phase Dependencies + +- **Setup (Phase 1)**: No dependencies — start immediately. +- **Foundational (Phase 2)**: Depends on Setup. **Blocks all user stories.** +- **US1 (Phase 3, P1)**: Depends on Foundational (T002, T003, T004). +- **US2 (Phase 4, P1)**: Depends on Foundational; may run in parallel with + US1 by a second developer (US2 implementation touches the same query.py + section, so merge-coordination is needed — see parallel notes). +- **US3 (Phase 5, P2)**: Depends on Foundational **and** US1 (needs + `plan_render`/`render_inline` scaffolding from US1). +- **Polish (Phase 6)**: Depends on US1+US2 at minimum; T024–T025 can start + once US1+US2 are green even before US3 lands. + +### User Story Dependencies + +- US1 (view inline): no inter-story dep beyond Foundational. +- US2 (byte-clean scripting): no inter-story dep; contract tests can land + before or alongside US1 implementation. +- US3 (bounded output): reuses US1's `plan_render` + `render_inline`; must + land after US1 to avoid churn. + +### Within Each User Story + +- Unit tests (for US1 and US2) SHOULD be written first and observed to fail + before the corresponding implementation task lands. +- Contract tests (US2) MUST be red against a pre-003 reference first, then + green against the final implementation without changes to the test file. +- Within US3, `plan_render` split (T021) before `render_inline` footer (T022). + +### Parallel Opportunities + +- Foundational: T002, T003 are different files and can run in parallel; T004 + depends on nothing new and can run in parallel with T002/T003. +- US1 tests (T005, T006) can be authored in parallel. +- US2 contract tests T011–T015 all live in `tests/contract/test_query_contract.py` + (with T015 in a different file) — T011–T014 share a file so write them in + one edit session; T015 in parallel. +- US3 tests (T018, T019, T020) across two files — parallel. +- Polish T024 and T025 are independent docs changes — parallel. + +--- + +## Parallel Example: User Story 1 + +```bash +# Independent tests for US1 can be authored in parallel: +Task: "Unit tests in tests/unit/test_image_render.py (T005)" +Task: "Integration test in tests/integration/test_query_command.py (T006)" + +# Implementation tasks T007/T008 are same file (image_render.py) — sequential. +# Integration wiring T010 must come after T007+T008+T009. +``` + +--- + +## Implementation Strategy + +### MVP First (US1 + US2) + +1. Phase 1 Setup → Phase 2 Foundational. +2. Phase 3 (US1): deliver inline image rendering on rich+TTY path. +3. Phase 4 (US2): contract tests guard SC-003 byte-stability; stderr + warning contract landed. +4. **STOP and VALIDATE**: `uv run pytest -q` green; quickstart sections 2–4 + pass manually. This is the releasable MVP. + +### Incremental Delivery + +1. MVP (US1 + US2) → release. +2. US3 (bounded output + truncation footer) → release. +3. Polish (docs, README, final sweep). + +### Parallel Team Strategy + +With two developers after Foundational: + +1. Dev A: US1 (T005 → T006 → T007 → T008 → T009 → T010). +2. Dev B: US2 (T011 → T012 → T013 → T014 → T015 → T016 → T017). + Dev B's T017 edits `query.py` — coordinate with Dev A's T010 via a + shared feature branch and resolve at merge (same function body). +3. Once US1 merges, either dev takes US3. + +--- + +## Notes + +- [P] tasks = different files, no dependencies. +- Every task carries an exact file path. +- Constitution-driven guardrails (SC-003 byte-stability, stderr warning + contract, per-row failure isolation) are expressed as US2 tests and + enforced on every run of `pytest`. +- Commit after each checkpoint or logical group (e.g. end of Phase 2, end + of US1, end of US2, end of US3). +- Any regression in existing 002 tests is a blocker — do not proceed past + the affected task. diff --git a/src/aimx/__init__.py b/src/aimx/__init__.py index 747d46f..7d18268 100644 --- a/src/aimx/__init__.py +++ b/src/aimx/__init__.py @@ -1,3 +1,3 @@ __all__ = ["__version__"] -__version__ = "0.3.0" +__version__ = "0.3.1" diff --git a/src/aimx/aim_bridge/metric_stats.py b/src/aimx/aim_bridge/metric_stats.py index e052797..6dc6864 100644 --- a/src/aimx/aim_bridge/metric_stats.py +++ b/src/aimx/aim_bridge/metric_stats.py @@ -90,6 +90,26 @@ def _extract_values(metric: Any) -> tuple[np.ndarray, np.ndarray, np.ndarray | N ) +def _first_iter_value(view: Any) -> Any | None: + """Return the first item from an iterable-like view, or None.""" + if view is None: + return None + try: + return next(iter(view)) + except (StopIteration, TypeError): + return None + + +def _call_or_value(value: Any) -> Any: + """Call *value* if it's callable, otherwise return it as-is.""" + if callable(value): + try: + return value() + except Exception: # noqa: BLE001 + return None + return value + + def collect_metric_series(expression: str, repo_path: Path) -> list[MetricSeries]: """Run the Aim query expression and return a flat list of MetricSeries. @@ -163,11 +183,50 @@ def collect_image_series(expression: str, repo_path: Path) -> list[dict[str, Any ) for image in query_result.iter(): run_meta = _extract_run_meta(image.run) + context = image.context.to_dict() + epoch_value = context.get("epoch") + if epoch_value is None: + epoch_value = _first_iter_value(getattr(image, "epochs", None)) + step_value = _call_or_value(getattr(image, "first_step", None)) + + # Capture the image object in a closure so the accessor is lazy. + # The private key ``_image_accessor`` is ONLY consumed by the + # rich-TTY rendering path in ``image_render.render_inline``. + # JSON / plain renderers MUST NOT touch this key. + _aim_img = image + + def _make_accessor(_img: Any) -> Any: + def _accessor() -> Any: + if hasattr(_img, "to_pil_image"): + return _img.to_pil_image() + + values = getattr(_img, "values", None) + if values is None: + raise AttributeError( + f"{type(_img).__name__} has neither to_pil_image nor values" + ) + + try: + first_value = next(iter(values)) + except StopIteration as error: + raise RuntimeError("image sequence is empty") from error + + if not hasattr(first_value, "to_pil_image"): + raise AttributeError( + f"{type(first_value).__name__} has no to_pil_image" + ) + + return first_value.to_pil_image() + return _accessor + rows.append( { "run": run_meta, "name": image.name, - "context": image.context.to_dict(), + "context": context, + "_sort_epoch": epoch_value, + "_sort_step": step_value, + "_image_accessor": _make_accessor(_aim_img), } ) @@ -199,6 +258,61 @@ def subsample(series: MetricSeries, *, head: int | None, tail: int | None, every ) +def parse_epoch_slice(s: str) -> tuple[float | None, float | None]: + """Parse a ``start:end`` slice string into inclusive float bounds for epoch filtering. + + Accepts integer or float values, e.g. ``"5:50"``, ``"5.0:"``, ``":30"``. + """ + if ":" not in s: + raise ValueError( + f"--epochs requires 'start:end' slice syntax (e.g. '5:50', ':30', '5:'), got: {s!r}" + ) + left, right = s.split(":", 1) + start: float | None = None + end: float | None = None + if left.strip(): + try: + start = float(left.strip()) + except ValueError: + raise ValueError(f"--epochs: left bound is not a number: {left!r}") + if right.strip(): + try: + end = float(right.strip()) + except ValueError: + raise ValueError(f"--epochs: right bound is not a number: {right!r}") + if start is None and end is None: + raise ValueError("--epochs cannot be an open slice ':'; provide at least one bound.") + return start, end + + +def filter_by_epoch_range( + series: "MetricSeries", + start: float | None, + end: float | None, +) -> "MetricSeries": + """Return a new ``MetricSeries`` keeping only points where ``start <= epoch <= end``. + + Points with ``epoch == None`` (series that has no epoch data) are all kept when + bounds are present — the filter is a no-op on epoch-less series. + Open-ended bounds (``None``) mean no constraint on that side. + """ + if series.epochs is None: + return series + mask = np.ones(len(series.epochs), dtype=bool) + if start is not None: + mask &= series.epochs >= start + if end is not None: + mask &= series.epochs <= end + return MetricSeries( + run=series.run, + name=series.name, + context=series.context, + values=series.values[mask], + steps=series.steps[mask], + epochs=series.epochs[mask], + ) + + def parse_step_slice(s: str) -> tuple[int | None, int | None]: """Parse a ``start:end`` slice string into inclusive integer bounds. @@ -268,3 +382,89 @@ def group_by_run( groups[h] = (series.run, []) groups[h][1].append(series) return [groups[h] for h in order] + + +# --------------------------------------------------------------------------- +# Image-row filter / subsample helpers +# --------------------------------------------------------------------------- + +def filter_image_rows_by_step_range( + rows: list[dict[str, Any]], + start: int | None, + end: int | None, +) -> list[dict[str, Any]]: + """Keep only image rows whose ``_sort_step`` falls within [start, end]. + + Rows with ``_sort_step == None`` are always kept (no step information to + exclude them by). + """ + result: list[dict[str, Any]] = [] + for row in rows: + step = row.get("_sort_step") + if step is None: + result.append(row) + continue + try: + step_val = int(step) + except (TypeError, ValueError): + result.append(row) + continue + if start is not None and step_val < start: + continue + if end is not None and step_val > end: + continue + result.append(row) + return result + + +def filter_image_rows_by_epoch_range( + rows: list[dict[str, Any]], + start: float | None, + end: float | None, +) -> list[dict[str, Any]]: + """Keep only image rows whose ``_sort_epoch`` falls within [start, end]. + + Rows with ``_sort_epoch == None`` are always kept (no epoch information to + exclude them by). + """ + result: list[dict[str, Any]] = [] + for row in rows: + epoch = row.get("_sort_epoch") + if epoch is None: + result.append(row) + continue + try: + epoch_val = float(epoch) + except (TypeError, ValueError): + result.append(row) + continue + if start is not None and epoch_val < start: + continue + if end is not None and epoch_val > end: + continue + result.append(row) + return result + + +def subsample_image_rows( + rows: list[dict[str, Any]], + *, + head: int | None, + tail: int | None, + every: int | None, +) -> list[dict[str, Any]]: + """Return a globally subsampled view of *rows* (already sorted). + + Applies head → tail → every in order, mirroring ``subsample`` for metric + series. All three parameters are optional; passing all ``None`` is a no-op. + """ + if not rows: + return rows + result = rows + if head is not None: + result = result[:head] + if tail is not None: + result = result[-tail:] + if every is not None and every > 1: + result = result[::every] + return result diff --git a/src/aimx/commands/help.py b/src/aimx/commands/help.py index 6bac2ff..e861da6 100644 --- a/src/aimx/commands/help.py +++ b/src/aimx/commands/help.py @@ -13,11 +13,15 @@ def render_help() -> str: " query Query metrics or images from a local Aim repository", " Usage: aimx query [--repo ]", " Options: --json --oneline --no-color --verbose", - " --steps start:end (e.g. --steps 100:500, :50, 100:)", + " --steps start:end | --epochs start:end (mutually exclusive)", + " --head N --tail N --every K", + " --max-images N (images only, TTY rendering cap; default 6)", " Repo defaults to the current directory; paths may point at either", " the repo root or its .aim directory", " Short run hashes in the expression are transparently expanded.", " Example: aimx query metrics \"run.hash=='eca37394'\" --repo data", + " Example: aimx query images \"images\" --repo data --epochs 10:50", + " Example: aimx query images \"images\" --repo data --head 10", " trace Plot a metric's time-series from a local Aim repository", " Usage: aimx trace [--repo ]", " Options: --table --csv --json", diff --git a/src/aimx/commands/query.py b/src/aimx/commands/query.py index 226f53a..f9bbc8a 100644 --- a/src/aimx/commands/query.py +++ b/src/aimx/commands/query.py @@ -18,6 +18,11 @@ class QueryInvocation: no_color: bool = False verbose: bool = False step_slice: str | None = None + epoch_slice: str | None = None + head: int | None = None + tail: int | None = None + every: int | None = None + max_images: int = 6 def __post_init__(self) -> None: if self.target not in SUPPORTED_TARGETS: @@ -26,6 +31,14 @@ def __post_init__(self) -> None: ) if not self.expression.strip(): raise ValueError("Query expression must not be empty.") + if self.max_images < 0: + raise ValueError( + f"Invalid --max-images value: {self.max_images!r}. Must be a non-negative integer." + ) + if self.step_slice is not None and self.epoch_slice is not None: + raise ValueError("--steps and --epochs are mutually exclusive.") + if self.every is not None and self.every < 1: + raise ValueError(f"--every must be >= 1, got: {self.every!r}.") @dataclass(frozen=True) @@ -43,11 +56,36 @@ def normalize_repo_path(path: Path) -> Path: return path +def _parse_positive_int(flag: str, raw: str) -> int: + """Parse *raw* as a positive integer for *flag*; raise ``ValueError`` with a clear message.""" + try: + value = int(raw) + except ValueError: + raise ValueError(f"{flag} requires a positive integer, got: {raw!r}") + if value < 1: + raise ValueError(f"{flag} must be >= 1, got: {raw!r}") + return value + + +def _parse_non_negative_int(flag: str, raw: str) -> int: + """Parse *raw* as a non-negative integer for *flag*; raise ``ValueError`` on bad input.""" + try: + value = int(raw) + except ValueError: + raise ValueError(f"{flag} requires an integer, got: {raw!r}") + if value < 0: + raise ValueError(f"{flag} requires a non-negative integer, got: {raw!r}") + return value + + def parse_query_invocation(args: list[str]) -> QueryInvocation: if len(args) < 2: raise ValueError( "Usage: aimx query [--repo ] " - "[--json] [--oneline] [--no-color] [--verbose] [--steps start:end]" + "[--json] [--oneline] [--no-color] [--verbose] " + "[--steps start:end | --epochs start:end] " + "[--head N] [--tail N] [--every K] " + "[--max-images N]" ) target = args[0] @@ -59,7 +97,12 @@ def parse_query_invocation(args: list[str]) -> QueryInvocation: no_color = False verbose = False step_slice: str | None = None + epoch_slice: str | None = None + head: int | None = None + tail: int | None = None + every: int | None = None repo_value = "." + max_images: int = 6 index = 0 while index < len(rest): @@ -81,11 +124,42 @@ def parse_query_invocation(args: list[str]) -> QueryInvocation: raise ValueError("Missing value for --steps.") step_slice = rest[index + 1] index += 2 + elif token == "--epochs": + if index + 1 >= len(rest): + raise ValueError("Missing value for --epochs.") + epoch_slice = rest[index + 1] + index += 2 + elif token == "--head": + if index + 1 >= len(rest): + raise ValueError("Missing value for --head.") + head = _parse_non_negative_int("--head", rest[index + 1]) + index += 2 + elif token == "--tail": + if index + 1 >= len(rest): + raise ValueError("Missing value for --tail.") + tail = _parse_non_negative_int("--tail", rest[index + 1]) + index += 2 + elif token == "--every": + if index + 1 >= len(rest): + raise ValueError("Missing value for --every.") + every = _parse_positive_int("--every", rest[index + 1]) + index += 2 elif token == "--repo": if index + 1 >= len(rest): raise ValueError("Missing value for --repo.") repo_value = rest[index + 1] index += 2 + elif token == "--max-images": + if index + 1 >= len(rest): + raise ValueError("Missing value for --max-images.") + raw = rest[index + 1] + try: + max_images = int(raw) + except ValueError: + raise ValueError(f"Invalid --max-images value: {raw!r}. Must be a non-negative integer.") + if max_images < 0: + raise ValueError(f"Invalid --max-images value: {raw!r}. Must be a non-negative integer.") + index += 2 else: raise ValueError(f"Unsupported query option: {token}") @@ -98,6 +172,56 @@ def parse_query_invocation(args: list[str]) -> QueryInvocation: no_color=no_color, verbose=verbose, step_slice=step_slice, + epoch_slice=epoch_slice, + head=head, + tail=tail, + every=every, + max_images=max_images, + ) + + +def _sort_image_value(value: Any) -> tuple[int, Any]: + """Normalize values so image rows sort numerically when possible.""" + if value is None: + return (3, "") + if isinstance(value, bool): + return (2, str(value)) + if isinstance(value, (int, float)): + return (0, float(value)) + if isinstance(value, str): + try: + return (0, float(value)) + except ValueError: + return (1, value) + return (2, str(value)) + + +def _image_context_sort_key(context: dict[str, Any]) -> tuple[tuple[str, tuple[int, Any]], ...]: + """Build a deterministic context sort key, excluding explicit epoch/step fields.""" + return tuple( + (key, _sort_image_value(value)) + for key, value in sorted(context.items()) + if key not in {"epoch", "step"} + ) + + +def _sort_image_rows(image_rows: list[dict[str, Any]]) -> list[dict[str, Any]]: + """Keep run-group order stable, while sorting rows within a run by epoch/step.""" + run_order: dict[str, int] = {} + for row in image_rows: + run_hash = row["run"].hash + if run_hash not in run_order: + run_order[run_hash] = len(run_order) + + return sorted( + image_rows, + key=lambda row: ( + run_order[row["run"].hash], + _sort_image_value(row.get("_sort_epoch", row["context"].get("epoch"))), + _sort_image_value(row.get("_sort_step")), + row["name"], + _image_context_sort_key(row["context"]), + ), ) @@ -138,9 +262,12 @@ def _run_metrics_query( ) -> QueryCommandResult: from aimx.aim_bridge.metric_stats import ( collect_metric_series, + filter_by_epoch_range, filter_by_step_range, group_by_run, + parse_epoch_slice, parse_step_slice, + subsample, ) from aimx.rendering.query_views import ( render_json, @@ -150,9 +277,23 @@ def _run_metrics_query( series_list = collect_metric_series(invocation.expression, repo_path) + # Range filter (--steps or --epochs, mutually exclusive) if invocation.step_slice is not None: step_start, step_end = parse_step_slice(invocation.step_slice) series_list = [filter_by_step_range(s, step_start, step_end) for s in series_list] + series_list = [s for s in series_list if s.count > 0] + elif invocation.epoch_slice is not None: + epoch_start, epoch_end = parse_epoch_slice(invocation.epoch_slice) + series_list = [filter_by_epoch_range(s, epoch_start, epoch_end) for s in series_list] + series_list = [s for s in series_list if s.count > 0] + + # Density subsampling (--head / --tail / --every) + needs_sample = any(x is not None for x in (invocation.head, invocation.tail, invocation.every)) + if needs_sample: + series_list = [ + subsample(s, head=invocation.head, tail=invocation.tail, every=invocation.every) + for s in series_list + ] groups = group_by_run(series_list) @@ -172,7 +313,14 @@ def _run_images_query( header_info: dict[str, Any], no_color: bool, ) -> QueryCommandResult: - from aimx.aim_bridge.metric_stats import collect_image_series + from aimx.aim_bridge.metric_stats import ( + collect_image_series, + filter_image_rows_by_epoch_range, + filter_image_rows_by_step_range, + parse_epoch_slice, + parse_step_slice, + subsample_image_rows, + ) from aimx.rendering.query_views import ( render_image_json, render_image_oneline, @@ -180,12 +328,42 @@ def _run_images_query( ) image_rows = collect_image_series(invocation.expression, repo_path) + image_rows = _sort_image_rows(image_rows) + # Range filter (--steps or --epochs, mutually exclusive) + if invocation.step_slice is not None: + step_start, step_end = parse_step_slice(invocation.step_slice) + image_rows = filter_image_rows_by_step_range(image_rows, step_start, step_end) + elif invocation.epoch_slice is not None: + epoch_start, epoch_end = parse_epoch_slice(invocation.epoch_slice) + image_rows = filter_image_rows_by_epoch_range(image_rows, epoch_start, epoch_end) + + # Global subsampling across the sorted result list (--head / --tail / --every) + needs_sample = any(x is not None for x in (invocation.head, invocation.tail, invocation.every)) + if needs_sample: + image_rows = subsample_image_rows( + image_rows, head=invocation.head, tail=invocation.tail, every=invocation.every + ) + + # Machine-readable paths — byte-identical to pre-003 output; MUST NOT + # touch _image_accessor or produce any graphics escape sequences. if invocation.output_json: return QueryCommandResult(exit_status=0, output=render_image_json(image_rows, header_info)) if invocation.plain: return QueryCommandResult(exit_status=0, output=render_image_oneline(image_rows, header_info)) - return QueryCommandResult( - exit_status=0, - output=render_image_rich_table(image_rows, header_info, no_color=no_color), - ) + + summary = render_image_rich_table(image_rows, header_info, no_color=no_color) + + # TTY inline-image extension (US1/US3): preserve the full metadata table + # and append previews below it so rows beyond the preview cap stay visible. + from aimx.rendering.image_render import detect_capability, plan_render, render_inline # noqa: PLC0415 + + capability = detect_capability() + if capability.protocol != "disabled": + plan = plan_render(image_rows, capability, max_images=invocation.max_images) + inline = render_inline(plan) + if inline: + combined_output = summary.rstrip("\n") + "\n\n" + inline + return QueryCommandResult(exit_status=0, output=combined_output) + + return QueryCommandResult(exit_status=0, output=summary) diff --git a/src/aimx/rendering/image_render.py b/src/aimx/rendering/image_render.py new file mode 100644 index 0000000..26e0e0f --- /dev/null +++ b/src/aimx/rendering/image_render.py @@ -0,0 +1,269 @@ +"""Inline terminal image rendering for ``aimx query images``. + +This module is the sole owner of: +- Terminal capability detection (passive, env-var-based, no DA queries). +- ``ImageRenderPlan`` construction (pure data, no I/O). +- Inline rendering via ``rich.Console`` + ``textual_image.renderable.Image``. +- A one-shot stderr warning for dependency / capability problems. + +It MUST NOT be imported or called from the ``--json`` or ``--plain`` code +paths; those must remain byte-identical to the pre-003 behaviour. +""" + +from __future__ import annotations + +import os +import shutil +import sys +from dataclasses import dataclass, field +from typing import TYPE_CHECKING, Any, Callable, Literal + +from aimx.rendering import colors + +if TYPE_CHECKING: + from PIL import Image as PILImage + +# --------------------------------------------------------------------------- +# One-shot stderr warning +# --------------------------------------------------------------------------- + +_WARNED: bool = False + + +def warn_once(message: str) -> None: + """Print a single warning to stderr; idempotent for the process lifetime.""" + global _WARNED # noqa: PLW0603 + if not _WARNED: + print(f"aimx: inline image rendering unavailable: {message}", file=sys.stderr) + _WARNED = True + + +# --------------------------------------------------------------------------- +# TerminalCapability +# --------------------------------------------------------------------------- + +ProtocolTag = Literal["auto", "fallback_text", "disabled"] + + +@dataclass(frozen=True) +class TerminalCapability: + """Snapshot of the current stdout terminal properties.""" + + is_tty: bool + columns: int + rows: int + protocol: ProtocolTag + reason: str | None = None + + +def _classify_protocol(is_tty: bool, columns: int) -> tuple[ProtocolTag, str | None]: + """Return (protocol, reason) based on passive env-var inspection.""" + if not is_tty: + return "disabled", "stdout is not a TTY" + if columns < 20: + return "disabled", f"terminal width {columns} < 20 columns" + + # Try importing textual_image; if unavailable, fall back to text. + try: + import textual_image.renderable # noqa: F401 + except Exception as exc: # noqa: BLE001 + warn_once(str(exc)) + return "disabled", str(exc) + + # Passive protocol-support classification. + term_program = os.environ.get("TERM_PROGRAM", "") + if term_program in ("iTerm.app", "WezTerm"): + return "auto", None + if "KITTY_WINDOW_ID" in os.environ: + return "auto", None + if "GHOSTTY_RESOURCES_DIR" in os.environ: + return "auto", None + if "WEZTERM_EXECUTABLE" in os.environ: + return "auto", None + # VTE-based terminals (GNOME Terminal, etc.) support some image protocols. + if "VTE_VERSION" in os.environ: + return "auto", None + + # Plain ANSI / tmux / xterm — textual_image will use half-block fallback. + return "fallback_text", None + + +def detect_capability() -> TerminalCapability: + """Probe the current terminal once and return a frozen capability object.""" + is_tty = sys.stdout.isatty() + size = shutil.get_terminal_size(fallback=(120, 24)) + columns, rows = size.columns, size.lines + protocol, reason = _classify_protocol(is_tty, columns) + return TerminalCapability( + is_tty=is_tty, + columns=columns, + rows=rows, + protocol=protocol, + reason=reason, + ) + + +# --------------------------------------------------------------------------- +# ImageRenderPlan +# --------------------------------------------------------------------------- + +@dataclass(frozen=True) +class ImageRenderPlan: + """Pure data describing what the renderer should draw.""" + + capability: TerminalCapability + rendered_rows: list[dict[str, Any]] = field(default_factory=list) + skipped_rows: list[dict[str, Any]] = field(default_factory=list) + target_width: int = 80 + max_height: int = 8 + max_images: int = 6 + + +def plan_render( + rows: list[dict[str, Any]], + capability: TerminalCapability, + max_images: int, +) -> ImageRenderPlan: + """Build an ``ImageRenderPlan`` without performing any I/O. + + ``max_images == 0`` means unlimited. + All row splitting happens here so ``render_inline`` stays pure output. + """ + target_width = capability.columns + max_height = max(capability.rows // 3, 3) + + if capability.protocol == "disabled": + return ImageRenderPlan( + capability=capability, + rendered_rows=[], + skipped_rows=[], + target_width=target_width, + max_height=max_height, + max_images=max_images, + ) + + if max_images == 0 or len(rows) <= max_images: + rendered = list(rows) + skipped: list[dict[str, Any]] = [] + else: + rendered = list(rows[:max_images]) + skipped = list(rows[max_images:]) + + return ImageRenderPlan( + capability=capability, + rendered_rows=rendered, + skipped_rows=skipped, + target_width=target_width, + max_height=max_height, + max_images=max_images, + ) + + +# --------------------------------------------------------------------------- +# render_inline +# --------------------------------------------------------------------------- + +def _short_hash(h: str) -> str: + return h[:8] + + +def _fmt_context(ctx: dict[str, Any]) -> str: + if not ctx: + return "" + return " ".join(f"{k}={v}" for k, v in sorted(ctx.items())) + + +def _render_pil_image( + pil_img: "PILImage.Image", + _target_width: int, + _max_height: int, +) -> "PILImage.Image": + """Prepare *pil_img* for terminal rendering without pre-shrinking it. + + The actual render size is delegated to ``textual_image`` via cell-based + width/height arguments. Pre-resizing here made images tiny in real TTYs. + """ + + orig_w, orig_h = pil_img.size + if orig_w == 0 or orig_h == 0: + return pil_img + + # Convert palette / RGBA to RGB for broadest terminal compatibility. + if pil_img.mode not in ("RGB", "L"): + try: + pil_img = pil_img.convert("RGB") + except Exception: # noqa: BLE001 + pass + + return pil_img + + +def render_inline(plan: ImageRenderPlan) -> str: + """Render the section blocks for all ``plan.rendered_rows``. + + Returns an empty string when ``capability.protocol == "disabled"``. + The truncation footer is included when ``plan.skipped_rows`` is non-empty. + """ + if plan.capability.protocol == "disabled" or not plan.rendered_rows: + return "" + + import io # noqa: PLC0415 + + from rich.console import Console # noqa: PLC0415 + from rich.text import Text # noqa: PLC0415 + + buf = io.StringIO() + console = Console( + file=buf, + force_terminal=True, + width=plan.capability.columns, + highlight=False, + ) + + for index, row in enumerate(plan.rendered_rows): + run = row["run"] + name: str = row["name"] + ctx_str = _fmt_context(row.get("context", {})) + + # Section header. + if index > 0: + console.print() + label = Text() + label.append("▌ ", style=colors.RULE_LINE) + label.append(_short_hash(run.hash), style=colors.RUN_HASH) + exp = run.experiment or run.name or "" + if exp: + label.append(f" {exp}", style=colors.EXPERIMENT) + label.append(f" {name}", style=colors.METRIC_NAME) + if ctx_str: + label.append(f" {ctx_str}", style=colors.CONTEXT_VAL) + console.print(label) + + # Image renderable. + accessor: Callable[[], "PILImage.Image"] | None = row.get("_image_accessor") + if accessor is None: + console.print("[image unavailable: no accessor]", markup=False) + continue + + try: + pil_img = accessor() + pil_img = _render_pil_image(pil_img, plan.target_width, plan.max_height) + + from textual_image.renderable import Image as TxImage # noqa: PLC0415 + + renderable = TxImage(pil_img, width="auto", height=plan.max_height) + + console.print(renderable) + except Exception as exc: # noqa: BLE001 + short_reason = str(exc)[:120] + console.print(f"[image unavailable: {short_reason}]", markup=False) + + # Truncation footer. + if plan.skipped_rows: + total = len(plan.rendered_rows) + len(plan.skipped_rows) + console.print( + f"... rendered {len(plan.rendered_rows)} of {total} images, " + "use --max-images=0 for all" + ) + + return buf.getvalue() diff --git a/static/images.png b/static/images.png new file mode 100644 index 0000000..4fc4776 Binary files /dev/null and b/static/images.png differ diff --git a/tests/contract/test_query_contract.py b/tests/contract/test_query_contract.py index 4091ae9..2bb80f0 100644 --- a/tests/contract/test_query_contract.py +++ b/tests/contract/test_query_contract.py @@ -1,6 +1,8 @@ from __future__ import annotations import json +import sys +from unittest.mock import patch from aimx.__main__ import main @@ -106,3 +108,216 @@ def test_query_invalid_repo_reports_actionable_error(capfd) -> None: captured = capfd.readouterr() assert exit_code == 2 assert "Repository path does not exist" in captured.err + + +# --------------------------------------------------------------------------- +# T011–T014: SC-003 byte-stability — no image graphics bytes on non-TTY paths +# --------------------------------------------------------------------------- + +_GRAPHICS_MARKERS = [ + "\x1b_G", # Kitty graphics + "\x1b]1337;File=", # iTerm2 inline image + "\x90", # DCS / Sixel start + "\x1bP", # Sixel (DCS) +] + + +def _has_graphics_bytes(text: str) -> bool: + return any(marker in text for marker in _GRAPHICS_MARKERS) + + +def test_images_json_output_contains_no_graphics_bytes(capfd, sample_repo_root) -> None: + """T011: --json output MUST contain zero image-protocol escape sequences.""" + exit_code = main( + ["query", "images", "images", "--repo", str(sample_repo_root), "--json"] + ) + + captured = capfd.readouterr() + payload = json.loads(captured.out) # also validates it is legal JSON + assert exit_code == 0 + assert not _has_graphics_bytes(captured.out), ( + "--json stdout contained graphics escape sequences" + ) + assert payload["count"] > 0 + + +def test_images_plain_output_contains_no_graphics_bytes(capfd, sample_repo_root) -> None: + """T012: --plain output MUST be tab-separated text with no binary image bytes.""" + exit_code = main( + ["query", "images", "images", "--repo", str(sample_repo_root), "--plain"] + ) + + captured = capfd.readouterr() + assert exit_code == 0 + assert not _has_graphics_bytes(captured.out), ( + "--plain stdout contained graphics escape sequences" + ) + lines = [line for line in captured.out.splitlines() if line.strip()] + assert lines, "Expected at least one output line" + for line in lines: + assert "\t" in line, f"Plain output line is not tab-separated: {line!r}" + + +def test_images_non_tty_stdout_contains_no_graphics_bytes( + capfd, sample_repo_root +) -> None: + """T013: when stdout is not a TTY, no image bytes MUST appear in output.""" + from aimx.commands.query import run_query_command + + # run_query_command returns the rendered string without actually writing to + # stdout, so we can inspect it directly; the is_tty check inside + # detect_capability() will see a non-TTY context in the test runner. + result = run_query_command( + ["images", "images", "--repo", str(sample_repo_root)] + ) + + assert result.exit_status == 0 + output = result.output or "" + assert not _has_graphics_bytes(output), ( + "Non-TTY rich output contained graphics escape sequences" + ) + + +def test_max_images_has_no_effect_on_json_or_plain(capfd, sample_repo_root) -> None: + """T014: --max-images must NOT change --json or --plain output bytes.""" + def _get_json(extra_args: list[str]) -> str: + main( + ["query", "images", "images", "--repo", str(sample_repo_root), "--json"] + + extra_args + ) + return capfd.readouterr().out + + baseline = _get_json([]) + with_three = _get_json(["--max-images", "3"]) + with_zero = _get_json(["--max-images", "0"]) + with_hundred = _get_json(["--max-images", "100"]) + + assert with_three == baseline, "--max-images 3 changed --json output" + assert with_zero == baseline, "--max-images 0 changed --json output" + assert with_hundred == baseline, "--max-images 100 changed --json output" + + def _get_plain(extra_args: list[str]) -> str: + main( + ["query", "images", "images", "--repo", str(sample_repo_root), "--plain"] + + extra_args + ) + return capfd.readouterr().out + + plain_baseline = _get_plain([]) + plain_three = _get_plain(["--max-images", "3"]) + assert plain_three == plain_baseline, "--max-images 3 changed --plain output" + + +def test_missing_max_images_value_exits_with_code_2(capfd) -> None: + """T018 (contract): --max-images with no value → exit 2.""" + exit_code = main(["query", "images", "images", "--max-images"]) + captured = capfd.readouterr() + assert exit_code == 2 + + +def test_negative_max_images_exits_with_code_2(capfd) -> None: + """T018 (contract): negative --max-images → exit 2.""" + exit_code = main(["query", "images", "images", "--max-images", "-1"]) + captured = capfd.readouterr() + assert exit_code == 2 + + +def test_non_integer_max_images_exits_with_code_2(capfd) -> None: + """T018 (contract): non-integer --max-images → exit 2.""" + exit_code = main(["query", "images", "images", "--max-images", "abc"]) + captured = capfd.readouterr() + assert exit_code == 2 + + +# --------------------------------------------------------------------------- +# SC-filter: new filter flags keep the JSON/plain envelope shape stable +# --------------------------------------------------------------------------- + + +def test_head_flag_preserves_json_envelope_shape_for_images(capfd, sample_repo_root) -> None: + """--head on query images must preserve the JSON envelope structure.""" + exit_code = main( + ["query", "images", "images", "--repo", str(sample_repo_root), "--head", "3", "--json"] + ) + captured = capfd.readouterr() + payload = json.loads(captured.out) + + assert exit_code == 0 + assert payload["target"] == "images" + assert "count" in payload + assert "rows" in payload + assert payload["count"] <= 3 + + +def test_epochs_flag_preserves_json_envelope_shape_for_images(capfd, sample_repo_root) -> None: + """--epochs on query images must preserve the JSON envelope structure.""" + exit_code = main( + ["query", "images", "images", "--repo", str(sample_repo_root), "--epochs", "10:30", "--json"] + ) + captured = capfd.readouterr() + payload = json.loads(captured.out) + + assert exit_code == 0 + assert payload["target"] == "images" + assert "count" in payload + assert "rows" in payload + + +def test_steps_and_epochs_mutually_exclusive_exits_2_for_images(capfd) -> None: + """--steps and --epochs together → exit 2 with a clear error message.""" + exit_code = main( + ["query", "images", "images", "--steps", "1:50", "--epochs", "1:10"] + ) + captured = capfd.readouterr() + assert exit_code == 2 + assert "mutually exclusive" in captured.err + + +def test_steps_and_epochs_mutually_exclusive_exits_2_for_metrics(capfd) -> None: + """--steps and --epochs together → exit 2 for query metrics too.""" + exit_code = main( + ["query", "metrics", "metric.name == 'loss'", "--steps", "1:50", "--epochs", "1:5"] + ) + captured = capfd.readouterr() + assert exit_code == 2 + assert "mutually exclusive" in captured.err + + +def test_head_flag_preserves_json_envelope_shape_for_metrics(capfd, sample_repo_root) -> None: + """--head on query metrics must preserve the JSON envelope structure.""" + exit_code = main( + ["query", "metrics", "metric.name == 'loss'", "--repo", str(sample_repo_root), "--head", "5", "--json"] + ) + captured = capfd.readouterr() + payload = json.loads(captured.out) + + assert exit_code == 0 + assert payload["target"] == "metrics" + assert "runs_count" in payload + assert "metrics_count" in payload + assert "runs" in payload + for run in payload["runs"]: + for metric in run["metrics"]: + assert metric["steps"] <= 5 + + +def test_every_flag_preserves_plain_format_for_images(capfd, sample_repo_root) -> None: + """--every on query images --plain must remain tab-separated.""" + exit_code = main( + ["query", "images", "images", "--repo", str(sample_repo_root), "--every", "2", "--plain"] + ) + captured = capfd.readouterr() + assert exit_code == 0 + lines = [line for line in captured.out.splitlines() if line.strip()] + if lines: + assert "\t" in lines[0], "Plain output with --every must remain tab-separated" + + +def test_filter_flags_do_not_produce_graphics_bytes(capfd, sample_repo_root) -> None: + """--head / --epochs must never emit image-protocol escape bytes on --json.""" + exit_code = main( + ["query", "images", "images", "--repo", str(sample_repo_root), "--head", "2", "--json"] + ) + captured = capfd.readouterr() + assert exit_code == 0 + assert not _has_graphics_bytes(captured.out) diff --git a/tests/integration/test_query_command.py b/tests/integration/test_query_command.py index 1115b53..fe9d104 100644 --- a/tests/integration/test_query_command.py +++ b/tests/integration/test_query_command.py @@ -1,8 +1,15 @@ from __future__ import annotations +import io import json +from typing import Any +from unittest.mock import patch + +import pytest from aimx.__main__ import main +from aimx.aim_bridge.metric_stats import RunMeta +from aimx.rendering.image_render import ImageRenderPlan, TerminalCapability, plan_render, render_inline def test_metric_query_accepts_repo_root_and_dot_aim_paths( @@ -137,3 +144,264 @@ def test_invalid_query_expression_fails_cleanly(capfd, sample_repo_root) -> None captured = capfd.readouterr() assert exit_code == 2 assert "Failed to evaluate query" in captured.err + + +# --------------------------------------------------------------------------- +# T006 / T019 / T020: image rendering integration tests +# --------------------------------------------------------------------------- + +def _make_tiny_pil_image() -> Any: + """Return a tiny 4×4 RGB PIL image for test fixtures.""" + from PIL import Image as PILImage + return PILImage.new("RGB", (4, 4), color=(100, 150, 200)) + + +def _fake_image_rows(count: int) -> list[dict[str, Any]]: + rows: list[dict[str, Any]] = [] + for i in range(count): + run = RunMeta( + hash=f"deadbeef{i:08d}"[:16], + experiment=f"exp{i}", + name=None, + creation_time=None, + ) + rows.append({ + "run": run, + "name": f"img{i}", + "context": {}, + "_image_accessor": _make_tiny_pil_image, + }) + return rows + + +def _fake_auto_capability(columns: int = 80, rows: int = 24) -> TerminalCapability: + return TerminalCapability( + is_tty=True, + columns=columns, + rows=rows, + protocol="fallback_text", + reason=None, + ) + + +def test_image_query_non_tty_output_has_no_image_bytes(capfd, sample_repo_root) -> None: + """T006: with stdout not a TTY, run_query_command must NOT add image bytes.""" + from aimx.commands.query import run_query_command + + result = run_query_command(["images", "images", "--repo", str(sample_repo_root)]) + + assert result.exit_status == 0 + output = result.output or "" + # No Kitty/iTerm2/Sixel markers + assert "\x1b_G" not in output + assert "\x1b]1337;File=" not in output + assert "\x90" not in output + + +def test_image_query_accessor_returns_real_pil_image(sample_repo_root) -> None: + """Regression: Aim query_images() yields `Images`, but accessor must return PIL.""" + from aimx.aim_bridge.metric_stats import collect_image_series + + rows = collect_image_series("images", sample_repo_root) + + assert rows, "Expected at least one image row from sample repository" + accessor = rows[0]["_image_accessor"] + pil_img = accessor() + + assert pil_img.__class__.__module__.startswith("PIL.") + assert hasattr(pil_img, "size") + assert pil_img.size[0] > 0 + assert pil_img.size[1] > 0 + + +def test_plan_render_default_cap_splits_correctly() -> None: + """T019: plan_render with max_images=6 on 8 rows → 6 rendered, 2 skipped.""" + cap = _fake_auto_capability() + rows = _fake_image_rows(8) + plan = plan_render(rows, cap, max_images=6) + assert len(plan.rendered_rows) == 6 + assert len(plan.skipped_rows) == 2 + + +def test_plan_render_unlimited_renders_all() -> None: + """T019: plan_render with max_images=0 renders all rows.""" + cap = _fake_auto_capability() + rows = _fake_image_rows(8) + plan = plan_render(rows, cap, max_images=0) + assert len(plan.rendered_rows) == 8 + assert plan.skipped_rows == [] + + +def test_plan_render_partial_cap() -> None: + """T019: plan_render with max_images=3 on 8 rows → 3 rendered, 5 skipped.""" + cap = _fake_auto_capability() + rows = _fake_image_rows(8) + plan = plan_render(rows, cap, max_images=3) + assert len(plan.rendered_rows) == 3 + assert len(plan.skipped_rows) == 5 + + +def test_render_inline_truncation_footer_present_when_skipped() -> None: + """T020: render_inline emits footer when skipped_rows is non-empty.""" + cap = _fake_auto_capability() + rows = _fake_image_rows(8) + plan = plan_render(rows, cap, max_images=6) + output = render_inline(plan) + assert "rendered 6 of 8 images" in output + assert "--max-images=0" in output + + +def test_render_inline_no_footer_when_all_rendered() -> None: + """T020: render_inline omits footer when nothing is skipped.""" + cap = _fake_auto_capability() + rows = _fake_image_rows(4) + plan = plan_render(rows, cap, max_images=6) + output = render_inline(plan) + assert "--max-images=0" not in output + + +def test_image_query_inline_preview_preserves_full_rich_table(sample_repo_root) -> None: + """Interactive inline previews must not hide rows beyond the preview cap.""" + from aimx.commands.query import run_query_command + + rows = _fake_image_rows(8) + + with ( + patch("aimx.aim_bridge.metric_stats.collect_image_series", return_value=rows), + patch("aimx.rendering.image_render.detect_capability", return_value=_fake_auto_capability()), + patch("aimx.rendering.image_render.render_inline", return_value="<>\n"), + ): + result = run_query_command(["images", "images", "--repo", str(sample_repo_root)]) + + assert result.exit_status == 0 + assert result.output is not None + assert f"Repo: {sample_repo_root}" in result.output + assert "img0" in result.output + assert "img7" in result.output + assert "<>" in result.output + + +def test_max_images_flag_default_is_six(sample_repo_root) -> None: + """T006: --max-images default is 6 (reflected in QueryInvocation).""" + from aimx.commands.query import parse_query_invocation + + inv = parse_query_invocation(["images", "images", "--repo", str(sample_repo_root)]) + assert inv.max_images == 6 + + +def test_max_images_flag_zero_means_unlimited(sample_repo_root) -> None: + """T006: --max-images 0 parses and triggers unlimited render.""" + from aimx.commands.query import parse_query_invocation + + inv = parse_query_invocation( + ["images", "images", "--repo", str(sample_repo_root), "--max-images", "0"] + ) + assert inv.max_images == 0 + + +# --------------------------------------------------------------------------- +# query images: --head / --tail / --every +# --------------------------------------------------------------------------- + +def test_query_images_head_reduces_row_count(sample_repo_root) -> None: + """--head 3 should yield at most 3 rows in --json output.""" + from aimx.commands.query import run_query_command + import json + + result_all = run_query_command(["images", "images", "--repo", str(sample_repo_root), "--json"]) + result_head = run_query_command(["images", "images", "--repo", str(sample_repo_root), "--head", "3", "--json"]) + + assert result_all.exit_status == 0 + assert result_head.exit_status == 0 + + all_rows = json.loads(result_all.output)["rows"] + head_rows = json.loads(result_head.output)["rows"] + + assert len(head_rows) <= 3 + assert len(head_rows) <= len(all_rows) + # First rows must match baseline (head preserves order) + for i, row in enumerate(head_rows): + assert row["name"] == all_rows[i]["name"] + + +def test_query_images_tail_reduces_row_count(sample_repo_root) -> None: + """--tail 3 should yield the last 3 rows in --json output.""" + from aimx.commands.query import run_query_command + import json + + result_all = run_query_command(["images", "images", "--repo", str(sample_repo_root), "--json"]) + result_tail = run_query_command(["images", "images", "--repo", str(sample_repo_root), "--tail", "3", "--json"]) + + assert result_all.exit_status == 0 + assert result_tail.exit_status == 0 + + all_rows = json.loads(result_all.output)["rows"] + tail_rows = json.loads(result_tail.output)["rows"] + + assert len(tail_rows) <= 3 + # Last rows must match baseline + for i, row in enumerate(tail_rows): + assert row["name"] == all_rows[len(all_rows) - len(tail_rows) + i]["name"] + + +def test_query_images_every_reduces_row_count(sample_repo_root) -> None: + """--every 2 should roughly halve the image row count.""" + from aimx.commands.query import run_query_command + import json + + result_all = run_query_command(["images", "images", "--repo", str(sample_repo_root), "--json"]) + result_every = run_query_command(["images", "images", "--repo", str(sample_repo_root), "--every", "2", "--json"]) + + assert result_all.exit_status == 0 + assert result_every.exit_status == 0 + + total = json.loads(result_all.output)["count"] + every_count = json.loads(result_every.output)["count"] + + assert every_count < total + + +def test_query_images_epochs_filter_reduces_row_count(sample_repo_root) -> None: + """--epochs 10:30 should return fewer rows than the full query.""" + from aimx.commands.query import run_query_command + import json + + result_all = run_query_command(["images", "images", "--repo", str(sample_repo_root), "--json"]) + result_filtered = run_query_command( + ["images", "images", "--repo", str(sample_repo_root), "--epochs", "10:30", "--json"] + ) + + assert result_all.exit_status == 0 + assert result_filtered.exit_status == 0 + + total = json.loads(result_all.output)["count"] + filtered = json.loads(result_filtered.output)["count"] + + assert filtered < total + + +def test_query_images_head_and_epochs_compose(sample_repo_root) -> None: + """--epochs filter followed by --head should further reduce result.""" + from aimx.commands.query import run_query_command + import json + + result = run_query_command( + ["images", "images", "--repo", str(sample_repo_root), "--epochs", "5:50", "--head", "2", "--json"] + ) + + assert result.exit_status == 0 + rows = json.loads(result.output)["rows"] + assert len(rows) <= 2 + + +def test_query_images_steps_and_epochs_exclusive_exits_2(sample_repo_root) -> None: + """--steps and --epochs together must exit with code 2.""" + from aimx.commands.query import run_query_command + + result = run_query_command( + ["images", "images", "--repo", str(sample_repo_root), "--steps", "1:10", "--epochs", "1:5"] + ) + + assert result.exit_status == 2 + assert result.error_message is not None + assert "mutually exclusive" in result.error_message diff --git a/tests/integration/test_short_hash_and_steps.py b/tests/integration/test_short_hash_and_steps.py index 0be65d1..8b66da5 100644 --- a/tests/integration/test_short_hash_and_steps.py +++ b/tests/integration/test_short_hash_and_steps.py @@ -271,3 +271,106 @@ def test_trace_steps_filter_and_short_hash_work_together( assert series["run"]["hash"] == full_hash for step in series["steps"]: assert 1 <= step <= 200 + + +# --------------------------------------------------------------------------- +# --head / --tail / --every for query metrics +# --------------------------------------------------------------------------- + + +def test_query_metrics_head_limits_step_count(capfd, sample_repo_root) -> None: + """--head 5 should yield at most 5 steps per metric.""" + exit_code = main( + [ + "query", "metrics", + "metric.name == 'loss'", + "--repo", str(sample_repo_root), + "--head", "5", + "--json", + ] + ) + captured = capfd.readouterr() + payload = json.loads(captured.out) + + assert exit_code == 0 + for run in payload["runs"]: + for metric in run["metrics"]: + assert metric["steps"] <= 5 + + +def test_query_metrics_tail_limits_step_count(capfd, sample_repo_root) -> None: + """--tail 5 should yield at most 5 steps per metric.""" + exit_code = main( + [ + "query", "metrics", + "metric.name == 'loss'", + "--repo", str(sample_repo_root), + "--tail", "5", + "--json", + ] + ) + captured = capfd.readouterr() + payload = json.loads(captured.out) + + assert exit_code == 0 + for run in payload["runs"]: + for metric in run["metrics"]: + assert metric["steps"] <= 5 + + +def test_query_metrics_every_reduces_step_count(capfd, sample_repo_root) -> None: + """--every 2 should roughly halve the step count.""" + # baseline (no sampling) + exit_code_base = main( + ["query", "metrics", "metric.name == 'loss'", "--repo", str(sample_repo_root), "--json"] + ) + payload_base = json.loads(capfd.readouterr().out) + + exit_code = main( + [ + "query", "metrics", + "metric.name == 'loss'", + "--repo", str(sample_repo_root), + "--every", "2", + "--json", + ] + ) + payload = json.loads(capfd.readouterr().out) + + assert exit_code_base == 0 + assert exit_code == 0 + # step count should be reduced + base_total = sum(m["steps"] for r in payload_base["runs"] for m in r["metrics"]) + every_total = sum(m["steps"] for r in payload["runs"] for m in r["metrics"]) + assert every_total < base_total + + +def test_query_metrics_epochs_filter_works(capfd, sample_repo_root) -> None: + """--epochs filter should restrict rows (exits 0 even with empty results).""" + exit_code = main( + [ + "query", "metrics", + "metric.name == 'loss'", + "--repo", str(sample_repo_root), + "--epochs", "1:5", + "--json", + ] + ) + captured = capfd.readouterr() + assert exit_code == 0 + payload = json.loads(captured.out) + assert "runs" in payload + + +def test_query_metrics_steps_and_epochs_mutually_exclusive(capfd, sample_repo_root) -> None: + exit_code = main( + [ + "query", "metrics", + "metric.name == 'loss'", + "--repo", str(sample_repo_root), + "--steps", "1:50", + "--epochs", "1:5", + ] + ) + assert exit_code == 2 + assert "mutually exclusive" in capfd.readouterr().err diff --git a/tests/unit/test_image_render.py b/tests/unit/test_image_render.py new file mode 100644 index 0000000..44e91c9 --- /dev/null +++ b/tests/unit/test_image_render.py @@ -0,0 +1,309 @@ +"""Unit tests for aimx.rendering.image_render. + +These tests cover: +- T005: detect_capability envvar matrix, plan_render clamp rules, + lazy _image_accessor failure → placeholder behaviour. +- T015: warn_once single-emission guarantee. +- T018: --max-images parsing (delegated to test_query_helpers.py addendum). +""" + +from __future__ import annotations + +import io +import sys +from typing import Any +from unittest.mock import patch + +import pytest + +from aimx.aim_bridge.metric_stats import RunMeta +from aimx.rendering.image_render import ( + ImageRenderPlan, + TerminalCapability, + detect_capability, + plan_render, + render_inline, +) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _fake_capability( + *, + is_tty: bool = True, + columns: int = 80, + rows: int = 24, + protocol: str = "auto", + reason: str | None = None, +) -> TerminalCapability: + return TerminalCapability( + is_tty=is_tty, + columns=columns, + rows=rows, + protocol=protocol, # type: ignore[arg-type] + reason=reason, + ) + + +def _fake_row(name: str = "img", accessor: Any = None) -> dict[str, Any]: + run = RunMeta(hash="abcdef1234567890", experiment="exp", name="run1", creation_time=None) + return { + "run": run, + "name": name, + "context": {"subset": "train"}, + "_image_accessor": accessor, + } + + +def _make_tiny_pil() -> Any: + """Return a 4×4 red RGB PIL image.""" + from PIL import Image as PILImage + img = PILImage.new("RGB", (4, 4), color=(200, 50, 50)) + return img + + +# --------------------------------------------------------------------------- +# detect_capability: envvar matrix +# --------------------------------------------------------------------------- + +class TestDetectCapability: + def test_non_tty_returns_disabled(self) -> None: + with patch("sys.stdout") as mock_stdout: + mock_stdout.isatty.return_value = False + cap = detect_capability() + assert cap.protocol == "disabled" + assert cap.is_tty is False + + def test_narrow_terminal_returns_disabled(self) -> None: + with ( + patch("sys.stdout") as mock_stdout, + patch("shutil.get_terminal_size", return_value=type("S", (), {"columns": 10, "lines": 24})()), + ): + mock_stdout.isatty.return_value = True + cap = detect_capability() + assert cap.protocol == "disabled" + assert "width" in (cap.reason or "").lower() or "20" in (cap.reason or "") + + def test_iterm2_classified_as_auto(self) -> None: + with ( + patch("sys.stdout") as mock_stdout, + patch.dict("os.environ", {"TERM_PROGRAM": "iTerm.app"}, clear=False), + patch("shutil.get_terminal_size", return_value=type("S", (), {"columns": 120, "lines": 30})()), + ): + mock_stdout.isatty.return_value = True + cap = detect_capability() + assert cap.protocol == "auto" + + def test_kitty_classified_as_auto(self) -> None: + with ( + patch("sys.stdout") as mock_stdout, + patch.dict("os.environ", {"KITTY_WINDOW_ID": "1"}, clear=False), + patch("shutil.get_terminal_size", return_value=type("S", (), {"columns": 120, "lines": 30})()), + ): + mock_stdout.isatty.return_value = True + cap = detect_capability() + assert cap.protocol == "auto" + + def test_wezterm_classified_as_auto(self) -> None: + with ( + patch("sys.stdout") as mock_stdout, + patch.dict("os.environ", {"TERM_PROGRAM": "WezTerm"}, clear=False), + patch("shutil.get_terminal_size", return_value=type("S", (), {"columns": 120, "lines": 30})()), + ): + mock_stdout.isatty.return_value = True + cap = detect_capability() + assert cap.protocol == "auto" + + def test_ghostty_classified_as_auto(self) -> None: + with ( + patch("sys.stdout") as mock_stdout, + patch.dict("os.environ", {"GHOSTTY_RESOURCES_DIR": "/usr/share/ghostty"}, clear=False), + patch("shutil.get_terminal_size", return_value=type("S", (), {"columns": 120, "lines": 30})()), + ): + mock_stdout.isatty.return_value = True + cap = detect_capability() + assert cap.protocol == "auto" + + def test_plain_xterm_classified_as_fallback_text(self) -> None: + env_overrides: dict[str, str] = {} + remove_keys = { + "TERM_PROGRAM", "KITTY_WINDOW_ID", "WEZTERM_EXECUTABLE", + "GHOSTTY_RESOURCES_DIR", "VTE_VERSION", + } + with ( + patch("sys.stdout") as mock_stdout, + patch("shutil.get_terminal_size", return_value=type("S", (), {"columns": 80, "lines": 24})()), + patch.dict("os.environ", env_overrides, clear=False), + ): + # Suppress any inherited graphics-capable env vars + import os + original = {k: os.environ.pop(k) for k in remove_keys if k in os.environ} + try: + mock_stdout.isatty.return_value = True + cap = detect_capability() + finally: + os.environ.update(original) + assert cap.protocol in ("fallback_text", "auto") + + +# --------------------------------------------------------------------------- +# plan_render: split and clamping logic +# --------------------------------------------------------------------------- + +class TestPlanRender: + def test_disabled_capability_returns_empty_plan(self) -> None: + cap = _fake_capability(protocol="disabled") + rows = [_fake_row() for _ in range(5)] + plan = plan_render(rows, cap, max_images=6) + assert plan.rendered_rows == [] + assert plan.skipped_rows == [] + + def test_unlimited_renders_all(self) -> None: + cap = _fake_capability(protocol="auto", rows=24) + rows = [_fake_row(f"img{i}") for i in range(10)] + plan = plan_render(rows, cap, max_images=0) + assert len(plan.rendered_rows) == 10 + assert plan.skipped_rows == [] + + def test_cap_splits_correctly(self) -> None: + cap = _fake_capability(protocol="auto", rows=24) + rows = [_fake_row(f"img{i}") for i in range(8)] + plan = plan_render(rows, cap, max_images=6) + assert len(plan.rendered_rows) == 6 + assert len(plan.skipped_rows) == 2 + assert len(plan.rendered_rows) + len(plan.skipped_rows) == 8 + + def test_fewer_rows_than_cap_no_skipped(self) -> None: + cap = _fake_capability(protocol="auto", rows=24) + rows = [_fake_row() for _ in range(3)] + plan = plan_render(rows, cap, max_images=6) + assert len(plan.rendered_rows) == 3 + assert plan.skipped_rows == [] + + def test_max_height_clamped_to_at_least_3(self) -> None: + # rows=6 → rows//3 = 2 → clamped to 3 + cap = _fake_capability(protocol="auto", rows=6) + plan = plan_render([], cap, max_images=6) + assert plan.max_height == 3 + + def test_max_height_normal(self) -> None: + cap = _fake_capability(protocol="auto", rows=24) + plan = plan_render([], cap, max_images=6) + assert plan.max_height == 8 # 24 // 3 + + def test_target_width_equals_columns(self) -> None: + cap = _fake_capability(protocol="auto", columns=100) + plan = plan_render([], cap, max_images=6) + assert plan.target_width == 100 + + +# --------------------------------------------------------------------------- +# render_inline: disabled → empty string +# --------------------------------------------------------------------------- + +class TestRenderInline: + def test_disabled_returns_empty_string(self) -> None: + cap = _fake_capability(protocol="disabled") + plan = plan_render([], cap, max_images=6) + result = render_inline(plan) + assert result == "" + + def test_empty_rendered_rows_returns_empty_string(self) -> None: + cap = _fake_capability(protocol="auto") + plan = ImageRenderPlan( + capability=cap, + rendered_rows=[], + skipped_rows=[], + target_width=80, + max_height=8, + max_images=6, + ) + result = render_inline(plan) + assert result == "" + + def test_none_accessor_shows_placeholder(self) -> None: + cap = _fake_capability(protocol="fallback_text", columns=80, rows=24) + row = _fake_row(accessor=None) + plan = ImageRenderPlan( + capability=cap, + rendered_rows=[row], + skipped_rows=[], + target_width=80, + max_height=8, + max_images=1, + ) + result = render_inline(plan) + assert "image unavailable" in result + + def test_raising_accessor_shows_placeholder(self) -> None: + cap = _fake_capability(protocol="fallback_text", columns=80, rows=24) + + def bad_accessor() -> Any: + raise RuntimeError("blob corrupted") + + row = _fake_row(accessor=bad_accessor) + plan = ImageRenderPlan( + capability=cap, + rendered_rows=[row], + skipped_rows=[], + target_width=80, + max_height=8, + max_images=1, + ) + result = render_inline(plan) + assert "image unavailable" in result + + def test_truncation_footer_present_when_skipped(self) -> None: + cap = _fake_capability(protocol="fallback_text", columns=80, rows=24) + rows_rendered = [_fake_row(f"img{i}", accessor=_make_tiny_pil) for i in range(3)] + rows_skipped = [_fake_row(f"skip{i}") for i in range(2)] + plan = ImageRenderPlan( + capability=cap, + rendered_rows=rows_rendered, + skipped_rows=rows_skipped, + target_width=80, + max_height=8, + max_images=3, + ) + result = render_inline(plan) + assert "rendered 3 of 5 images" in result + assert "--max-images=0" in result + + def test_no_truncation_footer_when_no_skipped(self) -> None: + cap = _fake_capability(protocol="fallback_text", columns=80, rows=24) + rows_rendered = [_fake_row("img0", accessor=_make_tiny_pil)] + plan = ImageRenderPlan( + capability=cap, + rendered_rows=rows_rendered, + skipped_rows=[], + target_width=80, + max_height=8, + max_images=6, + ) + result = render_inline(plan) + assert "--max-images=0" not in result + + +# --------------------------------------------------------------------------- +# warn_once: single emission +# --------------------------------------------------------------------------- + +class TestWarnOnce: + def test_warns_exactly_once(self) -> None: + import aimx.rendering.image_render as ir + + original_warned = ir._WARNED + ir._WARNED = False + try: + captured = io.StringIO() + with patch("sys.stderr", captured): + from aimx.rendering.image_render import warn_once + warn_once("test message A") + warn_once("test message B") + output = captured.getvalue() + assert output.count("aimx: inline image rendering unavailable:") == 1 + assert "test message A" in output + finally: + ir._WARNED = original_warned diff --git a/tests/unit/test_query_helpers.py b/tests/unit/test_query_helpers.py index 13f016f..001ef37 100644 --- a/tests/unit/test_query_helpers.py +++ b/tests/unit/test_query_helpers.py @@ -6,9 +6,17 @@ from aimx.commands.query import ( QueryInvocation, + _sort_image_rows, normalize_repo_path, parse_query_invocation, ) +from aimx.aim_bridge.metric_stats import ( + RunMeta, + filter_image_rows_by_epoch_range, + filter_image_rows_by_step_range, + parse_epoch_slice, + subsample_image_rows, +) def test_normalize_repo_path_keeps_repo_root(tmp_path: Path) -> None: @@ -113,3 +121,279 @@ def test_parse_query_invocation_steps_missing_value_raises() -> None: def test_parse_query_invocation_steps_defaults_to_none() -> None: inv = parse_query_invocation(["metrics", "metric.name=='loss'", "--repo", "data"]) assert inv.step_slice is None + + +# --------------------------------------------------------------------------- +# T018: --max-images parsing +# --------------------------------------------------------------------------- + +def test_parse_query_invocation_max_images_default_is_six() -> None: + inv = parse_query_invocation(["images", "images", "--repo", "data"]) + assert inv.max_images == 6 + + +def test_parse_query_invocation_max_images_zero_means_unlimited() -> None: + inv = parse_query_invocation(["images", "images", "--repo", "data", "--max-images", "0"]) + assert inv.max_images == 0 + + +def test_parse_query_invocation_max_images_positive_value() -> None: + inv = parse_query_invocation(["images", "images", "--repo", "data", "--max-images", "12"]) + assert inv.max_images == 12 + + +def test_parse_query_invocation_max_images_negative_raises() -> None: + with pytest.raises(ValueError, match="Invalid --max-images"): + parse_query_invocation(["images", "images", "--repo", "data", "--max-images", "-1"]) + + +def test_parse_query_invocation_max_images_non_integer_raises() -> None: + with pytest.raises(ValueError, match="Invalid --max-images"): + parse_query_invocation(["images", "images", "--repo", "data", "--max-images", "abc"]) + + +def test_parse_query_invocation_max_images_missing_value_raises() -> None: + with pytest.raises(ValueError, match="Missing value for --max-images"): + parse_query_invocation(["images", "images", "--repo", "data", "--max-images"]) + + +def test_sort_image_rows_orders_epochs_numerically_within_each_run() -> None: + run_b = RunMeta(hash="bbb", experiment="exp-b", name=None, creation_time=None) + run_a = RunMeta(hash="aaa", experiment="exp-a", name=None, creation_time=None) + rows = [ + {"run": run_b, "name": "example", "context": {"epoch": 30, "subset": "val"}, "_sort_epoch": 30, "_sort_step": None}, + {"run": run_b, "name": "example", "context": {"epoch": 10, "subset": "val"}, "_sort_epoch": 10, "_sort_step": None}, + {"run": run_a, "name": "example", "context": {"epoch": 20, "subset": "val"}, "_sort_epoch": 20, "_sort_step": None}, + {"run": run_a, "name": "example", "context": {"epoch": 5, "subset": "val"}, "_sort_epoch": 5, "_sort_step": None}, + ] + + result = _sort_image_rows(rows) + + assert [row["run"].hash for row in result] == ["bbb", "bbb", "aaa", "aaa"] + assert [row["context"]["epoch"] for row in result] == [10, 30, 5, 20] + + +def test_sort_image_rows_falls_back_to_step_when_epoch_missing() -> None: + run = RunMeta(hash="aaa", experiment="exp-a", name=None, creation_time=None) + rows = [ + {"run": run, "name": "example", "context": {"subset": "val"}, "_sort_epoch": None, "_sort_step": 20}, + {"run": run, "name": "example", "context": {"subset": "val"}, "_sort_epoch": None, "_sort_step": 5}, + {"run": run, "name": "example", "context": {"subset": "val"}, "_sort_epoch": None, "_sort_step": 10}, + ] + + result = _sort_image_rows(rows) + + assert [row["_sort_step"] for row in result] == [5, 10, 20] + + +# --------------------------------------------------------------------------- +# --head / --tail / --every parsing +# --------------------------------------------------------------------------- + +def test_parse_head_flag() -> None: + inv = parse_query_invocation(["metrics", "metric.name == 'loss'", "--repo", "data", "--head", "5"]) + assert inv.head == 5 + + +def test_parse_tail_flag() -> None: + inv = parse_query_invocation(["metrics", "metric.name == 'loss'", "--repo", "data", "--tail", "3"]) + assert inv.tail == 3 + + +def test_parse_every_flag() -> None: + inv = parse_query_invocation(["metrics", "metric.name == 'loss'", "--repo", "data", "--every", "2"]) + assert inv.every == 2 + + +def test_parse_head_missing_value_raises() -> None: + with pytest.raises(ValueError, match="Missing value for --head"): + parse_query_invocation(["metrics", "loss", "--repo", "data", "--head"]) + + +def test_parse_tail_missing_value_raises() -> None: + with pytest.raises(ValueError, match="Missing value for --tail"): + parse_query_invocation(["metrics", "loss", "--repo", "data", "--tail"]) + + +def test_parse_every_missing_value_raises() -> None: + with pytest.raises(ValueError, match="Missing value for --every"): + parse_query_invocation(["metrics", "loss", "--repo", "data", "--every"]) + + +def test_parse_every_zero_raises() -> None: + with pytest.raises(ValueError, match="--every"): + parse_query_invocation(["metrics", "loss", "--repo", "data", "--every", "0"]) + + +def test_parse_every_negative_raises() -> None: + with pytest.raises(ValueError, match="--every"): + parse_query_invocation(["metrics", "loss", "--repo", "data", "--every", "-2"]) + + +def test_head_tail_every_default_to_none() -> None: + inv = parse_query_invocation(["metrics", "metric.name == 'loss'"]) + assert inv.head is None + assert inv.tail is None + assert inv.every is None + + +# --------------------------------------------------------------------------- +# --epochs parsing +# --------------------------------------------------------------------------- + +def test_parse_epochs_flag() -> None: + inv = parse_query_invocation(["images", "images", "--repo", "data", "--epochs", "5:50"]) + assert inv.epoch_slice == "5:50" + + +def test_parse_epochs_open_start() -> None: + inv = parse_query_invocation(["images", "images", "--repo", "data", "--epochs", ":30"]) + assert inv.epoch_slice == ":30" + + +def test_parse_epochs_open_end() -> None: + inv = parse_query_invocation(["images", "images", "--repo", "data", "--epochs", "10:"]) + assert inv.epoch_slice == "10:" + + +def test_parse_epochs_missing_value_raises() -> None: + with pytest.raises(ValueError, match="Missing value for --epochs"): + parse_query_invocation(["images", "images", "--repo", "data", "--epochs"]) + + +# --------------------------------------------------------------------------- +# --steps and --epochs mutual exclusion +# --------------------------------------------------------------------------- + +def test_steps_and_epochs_mutually_exclusive_raises() -> None: + with pytest.raises(ValueError, match="mutually exclusive"): + parse_query_invocation([ + "images", "images", "--repo", "data", + "--steps", "1:50", "--epochs", "1:10", + ]) + + +def test_steps_alone_is_accepted() -> None: + inv = parse_query_invocation(["images", "images", "--repo", "data", "--steps", "10:100"]) + assert inv.step_slice == "10:100" + assert inv.epoch_slice is None + + +def test_epochs_alone_is_accepted() -> None: + inv = parse_query_invocation(["images", "images", "--repo", "data", "--epochs", "5:50"]) + assert inv.epoch_slice == "5:50" + assert inv.step_slice is None + + +# --------------------------------------------------------------------------- +# parse_epoch_slice helper +# --------------------------------------------------------------------------- + +def test_parse_epoch_slice_closed_range() -> None: + start, end = parse_epoch_slice("5:50") + assert start == 5.0 + assert end == 50.0 + + +def test_parse_epoch_slice_open_start() -> None: + start, end = parse_epoch_slice(":30") + assert start is None + assert end == 30.0 + + +def test_parse_epoch_slice_open_end() -> None: + start, end = parse_epoch_slice("10:") + assert start == 10.0 + assert end is None + + +def test_parse_epoch_slice_open_both_raises() -> None: + with pytest.raises(ValueError, match="open slice"): + parse_epoch_slice(":") + + +def test_parse_epoch_slice_no_colon_raises() -> None: + with pytest.raises(ValueError, match="start:end"): + parse_epoch_slice("50") + + +# --------------------------------------------------------------------------- +# filter_image_rows_by_step_range +# --------------------------------------------------------------------------- + +def _make_image_rows(step_or_epochs: list[tuple[int | None, float | None]]) -> list[dict]: + run = RunMeta(hash="aaa", experiment="exp", name=None, creation_time=None) + return [ + {"run": run, "name": "img", "context": {}, "_sort_step": step, "_sort_epoch": epoch} + for step, epoch in step_or_epochs + ] + + +def test_filter_image_rows_by_step_range_keeps_within_bounds() -> None: + rows = _make_image_rows([(5, None), (10, None), (20, None), (30, None)]) + result = filter_image_rows_by_step_range(rows, 10, 20) + assert [r["_sort_step"] for r in result] == [10, 20] + + +def test_filter_image_rows_by_step_range_open_start() -> None: + rows = _make_image_rows([(5, None), (10, None), (30, None)]) + result = filter_image_rows_by_step_range(rows, None, 10) + assert [r["_sort_step"] for r in result] == [5, 10] + + +def test_filter_image_rows_by_step_range_keeps_rows_without_step() -> None: + rows = _make_image_rows([(None, 5.0), (10, None)]) + result = filter_image_rows_by_step_range(rows, 5, 5) + assert len(result) == 1 + assert result[0]["_sort_step"] is None + + +# --------------------------------------------------------------------------- +# filter_image_rows_by_epoch_range +# --------------------------------------------------------------------------- + +def test_filter_image_rows_by_epoch_range_keeps_within_bounds() -> None: + rows = _make_image_rows([(None, 5.0), (None, 10.0), (None, 30.0)]) + result = filter_image_rows_by_epoch_range(rows, 5.0, 10.0) + assert [r["_sort_epoch"] for r in result] == [5.0, 10.0] + + +def test_filter_image_rows_by_epoch_range_keeps_rows_without_epoch() -> None: + rows = _make_image_rows([(10, None), (None, 30.0)]) + result = filter_image_rows_by_epoch_range(rows, 5.0, 20.0) + assert len(result) == 1 + assert result[0]["_sort_step"] == 10 + + +# --------------------------------------------------------------------------- +# subsample_image_rows +# --------------------------------------------------------------------------- + +def test_subsample_image_rows_head() -> None: + rows = _make_image_rows([(i, None) for i in range(10)]) + result = subsample_image_rows(rows, head=3, tail=None, every=None) + assert [r["_sort_step"] for r in result] == [0, 1, 2] + + +def test_subsample_image_rows_tail() -> None: + rows = _make_image_rows([(i, None) for i in range(10)]) + result = subsample_image_rows(rows, head=None, tail=3, every=None) + assert [r["_sort_step"] for r in result] == [7, 8, 9] + + +def test_subsample_image_rows_every() -> None: + rows = _make_image_rows([(i, None) for i in range(6)]) + result = subsample_image_rows(rows, head=None, tail=None, every=2) + assert [r["_sort_step"] for r in result] == [0, 2, 4] + + +def test_subsample_image_rows_combined_head_then_every() -> None: + rows = _make_image_rows([(i, None) for i in range(10)]) + result = subsample_image_rows(rows, head=6, tail=None, every=2) + assert [r["_sort_step"] for r in result] == [0, 2, 4] + + +def test_subsample_image_rows_noop_when_all_none() -> None: + rows = _make_image_rows([(i, None) for i in range(4)]) + result = subsample_image_rows(rows, head=None, tail=None, every=None) + assert result is rows diff --git a/uv.lock b/uv.lock index 646b9d7..ab28e6a 100644 --- a/uv.lock +++ b/uv.lock @@ -96,12 +96,13 @@ wheels = [ [[package]] name = "aimx" -version = "0.3.0" +version = "0.3.1" source = { editable = "." } dependencies = [ { name = "numpy" }, { name = "plotext" }, { name = "rich" }, + { name = "textual-image" }, ] [package.dev-dependencies] @@ -115,6 +116,7 @@ requires-dist = [ { name = "numpy", specifier = ">=1.24" }, { name = "plotext", specifier = ">=5.3" }, { name = "rich", specifier = ">=13.7" }, + { name = "textual-image", specifier = ">=0.12.0" }, ] [package.metadata.requires-dev] @@ -988,6 +990,19 @@ wheels = [ { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0b/c9/584bc9651441b4ba60cc4d557d8a547b5aff901af35bda3a4ee30c819b82/starlette-1.0.0-py3-none-any.whl", hash = "sha256:d3ec55e0bb321692d275455ddfd3df75fff145d009685eb40dc91fc66b03d38b", size = 72651 }, ] +[[package]] +name = "textual-image" +version = "0.12.0" +source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } +dependencies = [ + { name = "pillow" }, + { name = "rich" }, +] +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c2/e7/c82ea0604874b6d51d5717a0911061ae5810e36dad2e4d2b11fa7d54cdaa/textual_image-0.12.0.tar.gz", hash = "sha256:fdd0b5ff9c8a99740bc360a99ce014d563fa97d07a5b49b472470809f57c0a74", size = 116403 } +wheels = [ + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/67/2c/38ac586a3834a3bd8cbcf0c8ea1ae23bf68b9cc13b96a67f47a825479dd3/textual_image-0.12.0-py3-none-any.whl", hash = "sha256:0b13f62fe6a29f4ed6dfd641f2779ffe92dde41f16163fa1de69158477aa50aa", size = 115626 }, +] + [[package]] name = "tomli" version = "2.4.1"