From e141273e82a644dd65ed8399e2e40d0c50681aed Mon Sep 17 00:00:00 2001 From: Vo Date: Wed, 13 May 2026 14:28:31 -0700 Subject: [PATCH 01/13] Add live plan for zppy links feature --- backend/docs/174-zppy-links/plan.md | 126 ++++++++++++++++++++++++++++ 1 file changed, 126 insertions(+) create mode 100644 backend/docs/174-zppy-links/plan.md diff --git a/backend/docs/174-zppy-links/plan.md b/backend/docs/174-zppy-links/plan.md new file mode 100644 index 00000000..4070884c --- /dev/null +++ b/backend/docs/174-zppy-links/plan.md @@ -0,0 +1,126 @@ +# Plan: Connect zppy Diagnostics to SimBoard Simulations + +## Task + +Replace manual diagnostic URL-pasting with automated, metadata-based linking of zppy diagnostics outputs to SimBoard simulation records. + +## Scope + +**In:** matching strategy (`case_name` + `machine` + `hpc_username`), persistence model, discovery mechanism, zppy manifest spec. +**Out:** frontend UI redesign, PACE changes, existing manual link workflow, Case uniqueness refactor (issue #136), diagnostics data ingestion (Phase 2+). + +## Key Decisions + +### Do NOT parse public HTML directories + +- **Fragility** — HTML layouts vary by web server; breaks on config changes. +- **Security** — SSRF and content-injection attack surface. +- **Coupling** — SimBoard depends on external web server availability/structure. +- **Latency** — Network crawling is slow and unreliable for production. + +### zppy writes a manifest file (no API call) + +zppy runs as a user-level Python package on HPC machines. SimBoard's API requires `SERVICE_ACCOUNT` or `ADMIN` bearer tokens — it is not feasible for each user to obtain and configure an API token in zppy. + +**Instead, zppy writes a small manifest file to a well-known location within its output directory.** This requires zero authentication, zero network access from zppy, and trivial zppy-side changes. + +```jsonc +// /.simboard-diagnostics.json +{ + "case_name": "v3.LR.historical_0201", + "machine": "chrysalis", + "hpc_username": "user123", + "diagnostics": [ + { + "kind": "e3sm_diags", + "url": "https://web.lcrc.anl.gov/...", + "label": "E3SM Diags", + }, + { + "kind": "mpas_analysis", + "url": "https://web.lcrc.anl.gov/...", + "label": "MPAS-Analysis", + }, + ], +} +``` + +zppy already knows `case_name`, `machine`, and the running user from its cfg. `mache` resolves machine-specific public web URL prefixes to construct the URLs. + +### SimBoard discovers manifests via filesystem scanning (not API push) + +SimBoard already has a CronJob-based filesystem scanner (`nersc_archive_ingestor.py`) that: + +- Walks mounted HPC directories every 15 min +- Uses state-based incremental dedup +- Authenticates with a single service account token +- Calls `POST /ingestions/from-path` internally + +The diagnostics linking should follow the same pattern: a SimBoard-side scanner discovers `.simboard-diagnostics.json` manifests, reads them, matches to existing Cases, and creates `ExternalLink` rows. **No per-user tokens needed.** + +### Persist links in database (not query-time resolution) + +Store `ExternalLink` rows on match. No remote calls during frontend queries. Same model as manually-added links — frontend works with zero changes. `ExternalLink.created_at` provides audit trail. + +## Approach + +1. **Join key:** `(case_name, machine, hpc_username)` — all three required. `case_name` alone is not globally unique (`Case.name` has a unique index but different users can reuse names). Adding `machine` + `hpc_username` disambiguates. `CASE_HASH` is unreliable across executions (see issue #136). zppy has all three values. + +2. **Matching query:** Machine is on `Simulation` not `Case`, so the resolver joins: `Case.name == X AND Simulation.machine_id == Y AND Simulation.hpc_username == Z`. All simulations in a case share the same machine in practice. + +3. **zppy-side (minimal change):** After diagnostics complete, zppy writes `.simboard-diagnostics.json` to its output directory. `mache` resolves public URL prefixes. No API call, no token. + +4. **SimBoard diagnostics scanner** — two options: + + **Option A — Extend NERSC archive ingestor (recommended for MVP):** Add a post-scan phase to the existing `nersc_archive_ingestor.py` that also walks known diagnostics output directories (or the same archive tree) looking for `.simboard-diagnostics.json` files. On discovery, it calls a new internal endpoint or directly creates `ExternalLink` rows via the existing service account token. + + **Option B — Separate diagnostics scanner script:** A new lightweight CronJob script (`diagnostics_link_scanner.py`) that walks diagnostics output directories. Same pattern as `nersc_archive_ingestor.py` — env-configured, state-file dedup, service account auth. Better separation of concerns, but more operational overhead. + +5. **API endpoint** (extend `backend/app/features/simulation/api.py`): + + ``` + POST /api/v1/diagnostics/link + Body: { "case_name": "...", "machine": "...", "hpc_username": "...", "diagnostics": [...] } + ``` + + Restricted to `ADMIN` / `SERVICE_ACCOUNT` roles (same as ingestion). Resolves the triple → `Case` → creates `ExternalLink` rows with `kind = diagnostic`. The scanner calls this endpoint; users don't call it directly. + +6. **Schema:** Add `DiagnosticsLinkRequest` to `backend/app/features/simulation/schemas.py`. + +7. **Migration:** None if linking to existing FK targets. Required if adding `case_id` FK to `ExternalLink` (see open question #1). + +8. **Frontend:** No changes. Existing `grouped_links` rendering picks up new diagnostic links automatically. + +### Alternative: Convention-based URL derivation (no zppy changes) + +For production runs with enforced path conventions, SimBoard could derive diagnostic URLs from simulation metadata + `mache` without any zppy changes or manifest files. Per issue #174, zppy outputs follow a fixed directory structure and `mache` resolves per-machine URL prefixes. + +This works only when path conventions are strict. The manifest approach is more robust for custom user paths. Could combine both: derive URLs for production campaigns, manifest for custom runs. + +## Tests + +- `backend/tests/features/simulation/test_api.py` — endpoint tests: + - Happy path: matching `(case_name, machine, hpc_username)` → links created + - Different user, same case_name + machine → no cross-linking (isolation test) + - No matching case → 404 + - Duplicate link idempotency + - Invalid payload → 422 +- Scanner tests: manifest discovery, state dedup, malformed manifest handling +- Run: `make backend-test && make pre-commit-run` + +## Risk + +**Score: 3 (normal)** + +1. **zppy adoption lag** — No data until zppy emits manifests. Mitigate with convention-based derivation for production runs. +2. **Case name collision** — `Case.name` is unique in DB but not globally meaningful. The `(case_name, machine, hpc_username)` triple mitigates. Broader fix tracked in issue #136. +3. **Diagnostics output path visibility** — Scanner must have filesystem access to zppy output directories. On NERSC this requires mounting the relevant CFS paths into the SimBoard container (same pattern as performance archive). +4. **Timing gap** — Scanner-based approach has up to 15-min latency. Acceptable for diagnostics linking. + +## Open Questions (ask colleagues) + +1. **Case-level vs execution-level linking?** zppy diagnostics run across simulation output in time increments — they're inherently case-scoped, not tied to a specific execution/LID. Current `ExternalLink` only has `simulation_id` FK. Options: (a) add optional `case_id` FK to `ExternalLink`, (b) create a separate `CaseLink` model, (c) link to reference simulation only as a pragmatic shortcut. This is the key schema decision. +2. **Case uniqueness long-term?** The `(case_name, machine, hpc_username)` triple is a pragmatic join key but `Case.name` as the sole DB unique constraint is fragile. Issue #136 is evaluating `CASE_HASH` but it's unstable across executions. Should Case uniqueness be strengthened in the model itself? +3. **Diagnostics output directory location?** Where are zppy outputs stored on each machine? Need the path pattern to configure the scanner. Per issue #174, the coupled group stores results on machine web servers — need the exact filesystem mount paths for NERSC (and other machines if applicable). +4. **Retroactive linking needed?** If yes, plan a one-time bulk-linking script (or convention-based derivation) for existing diagnostics that predate this feature. +5. **Convention-based derivation viable for MVP?** If zppy output paths are predictable enough from `(case_name, machine, hpc_username)` + `mache`, SimBoard could derive diagnostic URLs without any zppy changes. Worth evaluating as a faster MVP path. From 5b3580e725b1328b9cd0d6333cf26630f577ab71 Mon Sep 17 00:00:00 2001 From: Vo Date: Thu, 14 May 2026 11:09:05 -0700 Subject: [PATCH 02/13] Update plan.md --- backend/docs/174-zppy-links/plan.md | 285 +++++++++++++++++++--------- 1 file changed, 196 insertions(+), 89 deletions(-) diff --git a/backend/docs/174-zppy-links/plan.md b/backend/docs/174-zppy-links/plan.md index 4070884c..a9084863 100644 --- a/backend/docs/174-zppy-links/plan.md +++ b/backend/docs/174-zppy-links/plan.md @@ -1,126 +1,233 @@ # Plan: Connect zppy Diagnostics to SimBoard Simulations -## Task +## Goal -Replace manual diagnostic URL-pasting with automated, metadata-based linking of zppy diagnostics outputs to SimBoard simulation records. +Replace manual diagnostics URL entry with automated linking from zppy diagnostics outputs to existing SimBoard simulation records. ## Scope -**In:** matching strategy (`case_name` + `machine` + `hpc_username`), persistence model, discovery mechanism, zppy manifest spec. -**Out:** frontend UI redesign, PACE changes, existing manual link workflow, Case uniqueness refactor (issue #136), diagnostics data ingestion (Phase 2+). - -## Key Decisions - -### Do NOT parse public HTML directories - -- **Fragility** — HTML layouts vary by web server; breaks on config changes. -- **Security** — SSRF and content-injection attack surface. -- **Coupling** — SimBoard depends on external web server availability/structure. -- **Latency** — Network crawling is slow and unreliable for production. - -### zppy writes a manifest file (no API call) - -zppy runs as a user-level Python package on HPC machines. SimBoard's API requires `SERVICE_ACCOUNT` or `ADMIN` bearer tokens — it is not feasible for each user to obtain and configure an API token in zppy. - -**Instead, zppy writes a small manifest file to a well-known location within its output directory.** This requires zero authentication, zero network access from zppy, and trivial zppy-side changes. - -```jsonc -// /.simboard-diagnostics.json -{ - "case_name": "v3.LR.historical_0201", - "machine": "chrysalis", - "hpc_username": "user123", - "diagnostics": [ - { - "kind": "e3sm_diags", - "url": "https://web.lcrc.anl.gov/...", - "label": "E3SM Diags", - }, - { - "kind": "mpas_analysis", - "url": "https://web.lcrc.anl.gov/...", - "label": "MPAS-Analysis", - }, - ], -} +### In + +- Add required zppy provenance fields: `case_name`, `machine`, `hpc_username` +- Discover zppy diagnostics provenance files from configured filesystem roots +- Confirm diagnostics completion before linking +- Match diagnostics to SimBoard records using `(case_name, machine, hpc_username)` +- Create idempotent diagnostic `ExternalLink` rows +- Maintain scanner state to avoid repeated processing + +### Out + +- Frontend redesign +- Changes to manual external-link workflows +- PACE integration changes +- Case identity or uniqueness refactor +- Diagnostics content ingestion or indexing +- Public HTML directory scraping +- Historical backfill beyond configured provenance roots +- Optional build/campaign metadata ingestion + +## Core Decisions + +### Match diagnostics at case scope + +zppy runs against a full case output tree, not a single execution/LID. Use case identity as the primary join key: + +```text +(case_name, machine, hpc_username) +``` + +All three fields are required. `case_name` alone is not globally safe, and `CASE_HASH` is not reliable across executions. + +### Do not parse public HTML directories + +Avoid public directory scraping. It is fragile, web-server-coupled, slow, and expands the SSRF/content-injection attack surface. + +### Use zppy provenance cfg as the primary input + +Do not require zppy to call the SimBoard API. zppy runs as a user-level HPC package, while SimBoard API writes require `SERVICE_ACCOUNT` or `ADMIN` tokens. + +Instead, SimBoard discovers zppy provenance files from configured filesystem roots. Newer zppy runs already emit provenance cfg files under diagnostics output paths, for example: + +```text +post/scripts/provenance.20260303_230804_991619.cfg +``` + +Current cfg examples expose useful fields: + +- `case`: case name +- `input`: case run directory +- `output`: diagnostics filesystem root +- `www`: public diagnostics root +- `campaign`: optional campaign metadata + +But current cfg is not yet an authoritative join source because it may lack: + +- `machine` +- execution `LID` +- canonical simulation owner +- unambiguous `hpc_username` + +Path-derived usernames are unsafe. Example ambiguity: + +```text +input path owner: ac.wlin +output path owner: ac.zhang40 ``` -zppy already knows `case_name`, `machine`, and the running user from its cfg. `mache` resolves machine-specific public web URL prefixes to construct the URLs. +Therefore, zppy must enrich provenance cfg with required case identity copied from `case_scripts/env_case.xml`: + +| XML field | Provenance field | +| ---------- | ---------------- | +| `CASE` | `case_name` | +| `MACH` | `machine` | +| `REALUSER` | `hpc_username` | + +If any required field is missing, SimBoard skips the provenance file and logs it as invalid for linking. + +### Persist links, do not resolve at query time + +Create database rows when diagnostics are discovered. Frontend queries should not crawl filesystems or remote URLs. + +Use the existing manual-link rendering path where possible: diagnostics links become `ExternalLink` rows with `kind = diagnostic`. + +## Implementation + +Implement in order: provenance contract -> scanner -> storage target -> resolver/API -> frontend verification. + +### zppy -### SimBoard discovers manifests via filesystem scanning (not API push) +#### 1. Emit required provenance fields -SimBoard already has a CronJob-based filesystem scanner (`nersc_archive_ingestor.py`) that: +| Field | Source | +| -------------- | ------------------------- | +| `case_name` | `env_case.xml` `CASE` | +| `machine` | `env_case.xml` `MACH` | +| `hpc_username` | `env_case.xml` `REALUSER` | -- Walks mounted HPC directories every 15 min -- Uses state-based incremental dedup -- Authenticates with a single service account token -- Calls `POST /ingestions/from-path` internally +Tests: -The diagnostics linking should follow the same pattern: a SimBoard-side scanner discovers `.simboard-diagnostics.json` manifests, reads them, matches to existing Cases, and creates `ExternalLink` rows. **No per-user tokens needed.** +- emits `case_name`, `machine`, `hpc_username` +- parses values from `env_case.xml` +- handles missing `env_case.xml` +- preserves existing provenance behavior -### Persist links in database (not query-time resolution) +### SimBoard -Store `ExternalLink` rows on match. No remote calls during frontend queries. Same model as manually-added links — frontend works with zero changes. `ExternalLink.created_at` provides audit trail. +#### 1. Add diagnostics scanner -## Approach +Add `diagnostics_link_scanner.py`. -1. **Join key:** `(case_name, machine, hpc_username)` — all three required. `case_name` alone is not globally unique (`Case.name` has a unique index but different users can reuse names). Adding `machine` + `hpc_username` disambiguates. `CASE_HASH` is unreliable across executions (see issue #136). zppy has all three values. +Responsibilities: -2. **Matching query:** Machine is on `Simulation` not `Case`, so the resolver joins: `Case.name == X AND Simulation.machine_id == Y AND Simulation.hpc_username == Z`. All simulations in a case share the same machine in practice. +- scan configured diagnostics roots for `provenance*.cfg` +- dedup with state file +- verify diagnostics completion +- parse `case_name`, `machine`, `hpc_username` +- call internal API with service-account auth +- skip and log if full join key is unavailable -3. **zppy-side (minimal change):** After diagnostics complete, zppy writes `.simboard-diagnostics.json` to its output directory. `mache` resolves public URL prefixes. No API call, no token. +Tests: -4. **SimBoard diagnostics scanner** — two options: +- discovers cfgs +- parses required cfg identity +- handles malformed cfgs +- skips missing identity +- checks completion marker +- dedups state +- handles duplicate links idempotently - **Option A — Extend NERSC archive ingestor (recommended for MVP):** Add a post-scan phase to the existing `nersc_archive_ingestor.py` that also walks known diagnostics output directories (or the same archive tree) looking for `.simboard-diagnostics.json` files. On discovery, it calls a new internal endpoint or directly creates `ExternalLink` rows via the existing service account token. +#### 2. Resolve link storage - **Option B — Separate diagnostics scanner script:** A new lightweight CronJob script (`diagnostics_link_scanner.py`) that walks diagnostics output directories. Same pattern as `nersc_archive_ingestor.py` — env-configured, state-file dedup, service account auth. Better separation of concerns, but more operational overhead. +Add `DiagnosticsLinkRequest` in `backend/app/features/simulation/schemas.py`. -5. **API endpoint** (extend `backend/app/features/simulation/api.py`): +Storage options: - ``` - POST /api/v1/diagnostics/link - Body: { "case_name": "...", "machine": "...", "hpc_username": "...", "diagnostics": [...] } - ``` +1. Preferred: add `case_id` to `ExternalLink`. +2. Alternative: add `CaseLink`. +3. Shortcut: attach to reference simulation. - Restricted to `ADMIN` / `SERVICE_ACCOUNT` roles (same as ingestion). Resolves the triple → `Case` → creates `ExternalLink` rows with `kind = diagnostic`. The scanner calls this endpoint; users don't call it directly. +#### 3. Add matching resolver -6. **Schema:** Add `DiagnosticsLinkRequest` to `backend/app/features/simulation/schemas.py`. +| Input | Match | +| -------------- | ------------------------- | +| `case_name` | `Case.name` | +| `machine` | `Simulation.machine_id` | +| `hpc_username` | `Simulation.hpc_username` | -7. **Migration:** None if linking to existing FK targets. Required if adding `case_id` FK to `ExternalLink` (see open question #1). +Outcomes: -8. **Frontend:** No changes. Existing `grouped_links` rendering picks up new diagnostic links automatically. +- 1 match: create/update links +- 0 matches: `404` +- multiple matches: `409` -### Alternative: Convention-based URL derivation (no zppy changes) +Tests: -For production runs with enforced path conventions, SimBoard could derive diagnostic URLs from simulation metadata + `mache` without any zppy changes or manifest files. Per issue #174, zppy outputs follow a fixed directory structure and `mache` resolves per-machine URL prefixes. +- matching triple creates links +- same case/machine under different user does not cross-link +- no match returns `404` +- ambiguous match returns `409` -This works only when path conventions are strict. The manifest approach is more robust for custom user paths. Could combine both: derive URLs for production campaigns, manifest for custom runs. +#### 4. Add internal API endpoint -## Tests +Endpoint: `POST /api/v1/diagnostics/link` -- `backend/tests/features/simulation/test_api.py` — endpoint tests: - - Happy path: matching `(case_name, machine, hpc_username)` → links created - - Different user, same case_name + machine → no cross-linking (isolation test) - - No matching case → 404 - - Duplicate link idempotency - - Invalid payload → 422 -- Scanner tests: manifest discovery, state dedup, malformed manifest handling -- Run: `make backend-test && make pre-commit-run` +Roles: `ADMIN`, `SERVICE_ACCOUNT` -## Risk +Request: -**Score: 3 (normal)** +| Field | Required | +| -------------- | -------- | +| `case_name` | yes | +| `machine` | yes | +| `hpc_username` | yes | +| `diagnostics` | yes | -1. **zppy adoption lag** — No data until zppy emits manifests. Mitigate with convention-based derivation for production runs. -2. **Case name collision** — `Case.name` is unique in DB but not globally meaningful. The `(case_name, machine, hpc_username)` triple mitigates. Broader fix tracked in issue #136. -3. **Diagnostics output path visibility** — Scanner must have filesystem access to zppy output directories. On NERSC this requires mounting the relevant CFS paths into the SimBoard container (same pattern as performance archive). -4. **Timing gap** — Scanner-based approach has up to 15-min latency. Acceptable for diagnostics linking. +Diagnostics item: -## Open Questions (ask colleagues) +| Field | Required | +| ------------------- | -------- | +| `name` | yes | +| `url` | yes | +| `kind = diagnostic` | yes | + +Tests: + +- duplicate request is idempotent +- invalid payload returns `422` +- auth required + +#### 5. Keep frontend unchanged + +Existing external-link rendering should display diagnostic links once rows exist. + +## Fallbacks + +### Curated backfill + +Allow convention-based URL derivation only for controlled campaigns. Do not use as the primary MVP path. + +### Validation command + +```bash +make backend-test && make pre-commit-run +``` -1. **Case-level vs execution-level linking?** zppy diagnostics run across simulation output in time increments — they're inherently case-scoped, not tied to a specific execution/LID. Current `ExternalLink` only has `simulation_id` FK. Options: (a) add optional `case_id` FK to `ExternalLink`, (b) create a separate `CaseLink` model, (c) link to reference simulation only as a pragmatic shortcut. This is the key schema decision. -2. **Case uniqueness long-term?** The `(case_name, machine, hpc_username)` triple is a pragmatic join key but `Case.name` as the sole DB unique constraint is fragile. Issue #136 is evaluating `CASE_HASH` but it's unstable across executions. Should Case uniqueness be strengthened in the model itself? -3. **Diagnostics output directory location?** Where are zppy outputs stored on each machine? Need the path pattern to configure the scanner. Per issue #174, the coupled group stores results on machine web servers — need the exact filesystem mount paths for NERSC (and other machines if applicable). -4. **Retroactive linking needed?** If yes, plan a one-time bulk-linking script (or convention-based derivation) for existing diagnostics that predate this feature. -5. **Convention-based derivation viable for MVP?** If zppy output paths are predictable enough from `(case_name, machine, hpc_username)` + `mache`, SimBoard could derive diagnostic URLs without any zppy changes. Worth evaluating as a faster MVP path. +## Risks + +- **Storage gap**: diagnostics are case-scoped, but `ExternalLink` currently points at `simulation_id`. + Mitigation: decide storage target before implementing resolver/API behavior. +- **Missing identity**: SimBoard cannot link a provenance file without `case_name`, `machine`, and `hpc_username`. + Mitigation: require zppy provenance enrichment; skip and log invalid files. +- **Deployment variability**: zppy roots and public URL prefixes vary by machine/campaign. + Mitigation: use env-configured scanner roots and machine/public-prefix mappings. +- **Provenance drift**: cfg layout and required-field coverage may vary across zppy versions. + Mitigation: add parser tests, schema/version detection, and a documented support window. + +## Remaining Open Questions + +1. **Storage target:** Should diagnostics links attach to `Case`, `Simulation`, or a new link table? +2. **Provenance schema:** Should zppy emit a versioned normalized block or reuse existing top-level cfg fields? +3. **Completion signal:** Which artifact should SimBoard treat as authoritative completion: status file, generated index, or explicit provenance field? +4. **Deployment scope:** Which scanner roots, machines, and public URL prefixes are supported in MVP? +5. **Retroactive linking:** Does MVP include historical backfill, or only provenance files with the required join key? +6. **Case identity hardening:** Is `(case_name, machine, hpc_username)` sufficient until issue #136 is resolved? From f7f5c2576af0a3b3bb72ccbdf010fbd521b569f2 Mon Sep 17 00:00:00 2001 From: Vo Date: Thu, 14 May 2026 11:33:18 -0700 Subject: [PATCH 03/13] Update pland --- backend/docs/174-zppy-links/plan.md | 88 +++++++++++++++++------------ 1 file changed, 53 insertions(+), 35 deletions(-) diff --git a/backend/docs/174-zppy-links/plan.md b/backend/docs/174-zppy-links/plan.md index a9084863..beb3acd8 100644 --- a/backend/docs/174-zppy-links/plan.md +++ b/backend/docs/174-zppy-links/plan.md @@ -4,15 +4,18 @@ Replace manual diagnostics URL entry with automated linking from zppy diagnostics outputs to existing SimBoard simulation records. +MVP is NERSC-only. + ## Scope ### In - Add required zppy provenance fields: `case_name`, `machine`, `hpc_username` -- Discover zppy diagnostics provenance files from configured filesystem roots -- Confirm diagnostics completion before linking +- Add required diagnostics URLs in zppy provenance +- Discover zppy diagnostics provenance files from configured NERSC filesystem roots +- Confirm diagnostics completion from index page plus status files - Match diagnostics to SimBoard records using `(case_name, machine, hpc_username)` -- Create idempotent diagnostic `ExternalLink` rows +- Create idempotent case-scoped diagnostic links - Maintain scanner state to avoid repeated processing ### Out @@ -24,7 +27,7 @@ Replace manual diagnostics URL entry with automated linking from zppy diagnostic - Diagnostics content ingestion or indexing - Public HTML directory scraping - Historical backfill beyond configured provenance roots -- Optional build/campaign metadata ingestion +- Non-NERSC deployments ## Core Decisions @@ -44,14 +47,17 @@ Avoid public directory scraping. It is fragile, web-server-coupled, slow, and ex ### Use zppy provenance cfg as the primary input -Do not require zppy to call the SimBoard API. zppy runs as a user-level HPC package, while SimBoard API writes require `SERVICE_ACCOUNT` or `ADMIN` tokens. - -Instead, SimBoard discovers zppy provenance files from configured filesystem roots. Newer zppy runs already emit provenance cfg files under diagnostics output paths, for example: +SimBoard discovers zppy provenance files from configured NERSC filesystem roots. Newer zppy runs already emit provenance cfg files under diagnostics output paths, for example: ```text post/scripts/provenance.20260303_230804_991619.cfg ``` +Reference example: + +- https://github.com/E3SM-Project/zppy/blob/main/examples/post.v3.LR.historical.zppy_v3.cfg +- https://web.lcrc.anl.gov/public/e3sm/diagnostic_output/zppy_example/v3.2.0/v3.LR.historical_0051/provenance.20260303_230804_991619.cfg + Current cfg examples expose useful fields: - `case`: case name @@ -63,7 +69,6 @@ Current cfg examples expose useful fields: But current cfg is not yet an authoritative join source because it may lack: - `machine` -- execution `LID` - canonical simulation owner - unambiguous `hpc_username` @@ -74,7 +79,7 @@ input path owner: ac.wlin output path owner: ac.zhang40 ``` -Therefore, zppy must enrich provenance cfg with required case identity copied from `case_scripts/env_case.xml`: +Therefore, zppy must enrich provenance cfg with required case identity copied from `/case_scripts/env_case.xml`: | XML field | Provenance field | | ---------- | ---------------- | @@ -84,11 +89,21 @@ Therefore, zppy must enrich provenance cfg with required case identity copied fr If any required field is missing, SimBoard skips the provenance file and logs it as invalid for linking. +For MVP, zppy should reuse existing top-level cfg fields rather than emit a new versioned normalized block. + +### Require explicit diagnostics URLs in provenance + +For MVP, SimBoard should not derive diagnostics URLs from path conventions. zppy should emit explicit diagnostics URLs in provenance cfg. + +### Use index page plus status files as completion signal + +Treat diagnostics as complete only when the expected index page and zppy status files are present. + ### Persist links, do not resolve at query time Create database rows when diagnostics are discovered. Frontend queries should not crawl filesystems or remote URLs. -Use the existing manual-link rendering path where possible: diagnostics links become `ExternalLink` rows with `kind = diagnostic`. +Diagnostic links are case-scoped. For MVP, store them on `Case` by adding `case_id` to `ExternalLink`. Keep the existing manual-link rendering path where possible by surfacing case-scoped diagnostic links alongside current links. ## Implementation @@ -104,11 +119,20 @@ Implement in order: provenance contract -> scanner -> storage target -> resolver | `machine` | `env_case.xml` `MACH` | | `hpc_username` | `env_case.xml` `REALUSER` | +Implementation note: + +- For NERSC MVP, zppy can construct explicit diagnostics URLs from cfg `www` plus `mache` machine metadata. +- `mache.MachineInfo` exposes helpers such as `web_portal_base`, `web_portal_url`, and `username`. +- Reference: https://docs.e3sm.org/mache/main/developers_guide/generated/mache.MachineInfo.html + Tests: - emits `case_name`, `machine`, `hpc_username` +- emits explicit diagnostics URLs +- can construct explicit diagnostics URLs from cfg `www` plus `mache` machine metadata - parses values from `env_case.xml` -- handles missing `env_case.xml` +- parses values from `env_build.xml` +- handles missing `env_case.xml` or `env_build.xml` - preserves existing provenance behavior ### SimBoard @@ -119,10 +143,11 @@ Add `diagnostics_link_scanner.py`. Responsibilities: -- scan configured diagnostics roots for `provenance*.cfg` +- scan configured NERSC diagnostics roots for `provenance*.cfg` - dedup with state file -- verify diagnostics completion +- verify diagnostics completion from index page plus status files - parse `case_name`, `machine`, `hpc_username` +- parse explicit diagnostics URLs - call internal API with service-account auth - skip and log if full join key is unavailable @@ -132,7 +157,7 @@ Tests: - parses required cfg identity - handles malformed cfgs - skips missing identity -- checks completion marker +- checks index-plus-status completion marker - dedups state - handles duplicate links idempotently @@ -140,23 +165,19 @@ Tests: Add `DiagnosticsLinkRequest` in `backend/app/features/simulation/schemas.py`. -Storage options: - -1. Preferred: add `case_id` to `ExternalLink`. -2. Alternative: add `CaseLink`. -3. Shortcut: attach to reference simulation. +For MVP, add `case_id` to `ExternalLink` and store diagnostic links at case scope. #### 3. Add matching resolver -| Input | Match | -| -------------- | ------------------------- | -| `case_name` | `Case.name` | -| `machine` | `Simulation.machine_id` | -| `hpc_username` | `Simulation.hpc_username` | +| Input | Match | +| -------------- | ----------------------- | +| `case_name` | `Case.name` | +| `machine` | joined case simulations | +| `hpc_username` | joined case simulations | Outcomes: -- 1 match: create/update links +- 1 case match: create/update case-scoped links - 0 matches: `404` - multiple matches: `409` @@ -214,20 +235,17 @@ make backend-test && make pre-commit-run ## Risks -- **Storage gap**: diagnostics are case-scoped, but `ExternalLink` currently points at `simulation_id`. - Mitigation: decide storage target before implementing resolver/API behavior. +- **Case-scoped link migration**: diagnostics are case-scoped, but `ExternalLink` currently points at `simulation_id`. + Mitigation: add `case_id` for MVP and keep migration/API behavior narrow. - **Missing identity**: SimBoard cannot link a provenance file without `case_name`, `machine`, and `hpc_username`. Mitigation: require zppy provenance enrichment; skip and log invalid files. -- **Deployment variability**: zppy roots and public URL prefixes vary by machine/campaign. - Mitigation: use env-configured scanner roots and machine/public-prefix mappings. +- **NERSC deployment variability**: zppy roots and public URL prefixes may still vary by campaign or user layout within NERSC. + Mitigation: use env-configured NERSC scanner roots and NERSC public-prefix mappings. - **Provenance drift**: cfg layout and required-field coverage may vary across zppy versions. Mitigation: add parser tests, schema/version detection, and a documented support window. ## Remaining Open Questions -1. **Storage target:** Should diagnostics links attach to `Case`, `Simulation`, or a new link table? -2. **Provenance schema:** Should zppy emit a versioned normalized block or reuse existing top-level cfg fields? -3. **Completion signal:** Which artifact should SimBoard treat as authoritative completion: status file, generated index, or explicit provenance field? -4. **Deployment scope:** Which scanner roots, machines, and public URL prefixes are supported in MVP? -5. **Retroactive linking:** Does MVP include historical backfill, or only provenance files with the required join key? -6. **Case identity hardening:** Is `(case_name, machine, hpc_username)` sufficient until issue #136 is resolved? +1. **NERSC deployment scope:** Which NERSC scanner roots and public URL prefixes are supported in MVP? +2. **Retroactive linking:** Does MVP include historical backfill, or only provenance files with the required join key? +3. **Case identity hardening:** Is `(case_name, machine, hpc_username)` sufficient until issue #136 is resolved? From ca22bd842cb5a87156ed65cff9d79ef2756e452d Mon Sep 17 00:00:00 2001 From: Vo Date: Thu, 14 May 2026 11:44:38 -0700 Subject: [PATCH 04/13] Update plan --- backend/docs/174-zppy-links/plan.md | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/backend/docs/174-zppy-links/plan.md b/backend/docs/174-zppy-links/plan.md index beb3acd8..2072e289 100644 --- a/backend/docs/174-zppy-links/plan.md +++ b/backend/docs/174-zppy-links/plan.md @@ -12,7 +12,8 @@ MVP is NERSC-only. - Add required zppy provenance fields: `case_name`, `machine`, `hpc_username` - Add required diagnostics URLs in zppy provenance -- Discover zppy diagnostics provenance files from configured NERSC filesystem roots +- Require standardized zppy diagnostics output locations for NERSC production runs +- Discover zppy diagnostics provenance files from configured NERSC production filesystem roots - Confirm diagnostics completion from index page plus status files - Match diagnostics to SimBoard records using `(case_name, machine, hpc_username)` - Create idempotent case-scoped diagnostic links @@ -91,6 +92,12 @@ If any required field is missing, SimBoard skips the provenance file and logs it For MVP, zppy should reuse existing top-level cfg fields rather than emit a new versioned normalized block. +### Require standardized output locations for production runs + +For MVP, NERSC production runs must use standardized zppy diagnostics output locations. SimBoard relies on those known production roots for provenance discovery. + +Custom or ad hoc layouts do not block the overall design, but they are not the required path for MVP. + ### Require explicit diagnostics URLs in provenance For MVP, SimBoard should not derive diagnostics URLs from path conventions. zppy should emit explicit diagnostics URLs in provenance cfg. @@ -113,6 +120,8 @@ Implement in order: provenance contract -> scanner -> storage target -> resolver #### 1. Emit required provenance fields +For MVP, production runs must write diagnostics outputs and provenance cfg files to the standardized NERSC zppy output locations. + | Field | Source | | -------------- | ------------------------- | | `case_name` | `env_case.xml` `CASE` | @@ -127,6 +136,7 @@ Implementation note: Tests: +- uses standardized NERSC production output locations - emits `case_name`, `machine`, `hpc_username` - emits explicit diagnostics URLs - can construct explicit diagnostics URLs from cfg `www` plus `mache` machine metadata @@ -143,7 +153,7 @@ Add `diagnostics_link_scanner.py`. Responsibilities: -- scan configured NERSC diagnostics roots for `provenance*.cfg` +- scan configured NERSC production diagnostics roots for `provenance*.cfg` - dedup with state file - verify diagnostics completion from index page plus status files - parse `case_name`, `machine`, `hpc_username` From 2f34b2f02e2e731c5f7da4eb205824eddd87d368 Mon Sep 17 00:00:00 2001 From: Tom Vo Date: Thu, 14 May 2026 11:46:15 -0700 Subject: [PATCH 05/13] Update plan.md to remove PACE integration changes Removed PACE integration changes from the plan. --- backend/docs/174-zppy-links/plan.md | 1 - 1 file changed, 1 deletion(-) diff --git a/backend/docs/174-zppy-links/plan.md b/backend/docs/174-zppy-links/plan.md index 2072e289..d17d10d5 100644 --- a/backend/docs/174-zppy-links/plan.md +++ b/backend/docs/174-zppy-links/plan.md @@ -23,7 +23,6 @@ MVP is NERSC-only. - Frontend redesign - Changes to manual external-link workflows -- PACE integration changes - Case identity or uniqueness refactor - Diagnostics content ingestion or indexing - Public HTML directory scraping From ede5f2386d3bd057073900e7597c1ca6b8789015 Mon Sep 17 00:00:00 2001 From: Tom Vo Date: Fri, 22 May 2026 11:17:10 -0700 Subject: [PATCH 06/13] Update diagnostics URL handling in plan.md --- backend/docs/174-zppy-links/plan.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/docs/174-zppy-links/plan.md b/backend/docs/174-zppy-links/plan.md index d17d10d5..ae63f139 100644 --- a/backend/docs/174-zppy-links/plan.md +++ b/backend/docs/174-zppy-links/plan.md @@ -137,7 +137,7 @@ Tests: - uses standardized NERSC production output locations - emits `case_name`, `machine`, `hpc_username` -- emits explicit diagnostics URLs +- emits explicit diagnostics URLs (`diagnostic_url`) - can construct explicit diagnostics URLs from cfg `www` plus `mache` machine metadata - parses values from `env_case.xml` - parses values from `env_build.xml` @@ -156,7 +156,7 @@ Responsibilities: - dedup with state file - verify diagnostics completion from index page plus status files - parse `case_name`, `machine`, `hpc_username` -- parse explicit diagnostics URLs +- parse explicit diagnostics URLs (`diagnostic_url`) - call internal API with service-account auth - skip and log if full join key is unavailable From b346b60f72157f55303742c9776deaaf92cbb632 Mon Sep 17 00:00:00 2001 From: Vo Date: Fri, 22 May 2026 15:24:23 -0700 Subject: [PATCH 07/13] Add four phase implementation plan --- .../phase-1-case-link-storage.md | 59 +++++++++++++ .../phase-2-diagnostics-link-api.md | 74 ++++++++++++++++ .../phase-3-link-aggregation.md | 66 +++++++++++++++ .../phase-4-provenance-scanner.md | 84 +++++++++++++++++++ 4 files changed, 283 insertions(+) create mode 100644 backend/docs/174-zppy-links/phase-1-case-link-storage.md create mode 100644 backend/docs/174-zppy-links/phase-2-diagnostics-link-api.md create mode 100644 backend/docs/174-zppy-links/phase-3-link-aggregation.md create mode 100644 backend/docs/174-zppy-links/phase-4-provenance-scanner.md diff --git a/backend/docs/174-zppy-links/phase-1-case-link-storage.md b/backend/docs/174-zppy-links/phase-1-case-link-storage.md new file mode 100644 index 00000000..bb77e66e --- /dev/null +++ b/backend/docs/174-zppy-links/phase-1-case-link-storage.md @@ -0,0 +1,59 @@ +# Phase 1 Plan: Case-Scoped Link Storage + +## Task + +Extend backend data model so an external link can belong to either a simulation or a case, without changing existing simulation-scoped link behavior. + +## Scope + +### In scope + +- `ExternalLink` ownership model changes in `backend/app/features/simulation/models.py` +- Alembic migration in `backend/migrations/versions` +- ORM relationships for `Case.links` +- Model and schema test updates needed for new ownership rules + +### Out of scope + +- New diagnostics-link API endpoint +- Provenance scanner script +- Simulation response aggregation logic +- Frontend changes + +## Approach + +1. Update `ExternalLink` ownership in `backend/app/features/simulation/models.py`. + - Add nullable `case_id` foreign key to `cases.id`. + - Make `simulation_id` nullable. + - Add `Case.links` relationship and matching `ExternalLink.case` relationship. + - Keep existing `Simulation.links` relationship intact. + +2. Add database invariants in Alembic migration. + - Backfill nothing; existing rows remain simulation-owned. + - Add check constraint enforcing exactly one owner per row: `case_id` xor `simulation_id`. + - Preserve cascade semantics for both ownership paths. + +3. Preserve current write behavior. + - Manual simulation create flow and archive-ingestion flow continue writing only simulation-owned links. + - No API contract changes in this phase. + +## Tests + +- Update model and schema coverage to validate: + - simulation-owned external link is valid + - case-owned external link is valid + - ownerless external link is invalid + - dual-owned external link is invalid +- Run: + - `make backend-test` + +## Risk + +- Risk score: 3 +- Main failure modes: + - Migration breaks existing `external_links` rows or ORM loads. + - Owner constraint behaves differently across SQLite test DB and PostgreSQL. + +## Open Questions + +None. diff --git a/backend/docs/174-zppy-links/phase-2-diagnostics-link-api.md b/backend/docs/174-zppy-links/phase-2-diagnostics-link-api.md new file mode 100644 index 00000000..45aeccdd --- /dev/null +++ b/backend/docs/174-zppy-links/phase-2-diagnostics-link-api.md @@ -0,0 +1,74 @@ +# Phase 2 Plan: Internal Diagnostics Link API + +## Task + +Add backend-only endpoint that resolves one case by `(case_name, machine, hpc_username)` and creates idempotent case-scoped diagnostic links. + +## Scope + +### In scope + +- Diagnostics-link request schemas in `backend/app/features/simulation/schemas.py` +- Internal endpoint in `backend/app/features/simulation/api.py` +- Match resolver and authorization logic +- API test coverage for endpoint behavior + +### Out of scope + +- Provenance scanner script +- Frontend changes +- Aggregating case-scoped links into simulation responses + +## Approach + +1. Add diagnostics-link request schemas. + - Add `DiagnosticsLinkRequest` with required `case_name`, `machine`, `hpc_username`, and `diagnostics`. + - Add `DiagnosticsLinkItem` with required `name`, `url`, and `kind="diagnostic"`. + +2. Add new internal endpoint. + - Route: `POST /api/v1/diagnostics/link` + - Success response: `204 No Content` + - Endpoint lives in existing simulation feature router module. + +3. Enforce auth and access policy. + - Require authenticated user from existing `current_active_user` dependency. + - Allow only `ADMIN` and `SERVICE_ACCOUNT`. + - Return `403` for authenticated non-admin, non-service-account users. + +4. Resolve case match from existing data. + - Match `Case.name == case_name`. + - Join through `Simulation` and `Machine`. + - Require one joined simulation with matching `Machine.name == machine`. + - Require same joined simulation to have matching `Simulation.hpc_username == hpc_username`. + - Return `404` when no case matches. + - Return `409` when multiple cases match. + +5. Persist idempotent links. + - Create case-owned `ExternalLink` rows with `kind="diagnostic"`. + - Upsert by `(case_id, kind, url)` in application logic. + - If same URL already exists for case, update label from diagnostics item `name` instead of inserting duplicate. + +## Tests + +- Add API coverage for: + - successful create + - duplicate request remains idempotent + - `401` unauthenticated + - `403` authenticated but wrong role + - `404` no matching case + - `409` ambiguous match + - `422` invalid payload +- Reuse existing service-account token auth patterns already used by ingestion tests. +- Run: + - `make backend-test` + +## Risk + +- Risk score: 4 +- Main failure modes: + - Resolver query matches duplicate cases unexpectedly. + - Idempotency logic inserts duplicate rows under repeated requests. + +## Open Questions + +None. diff --git a/backend/docs/174-zppy-links/phase-3-link-aggregation.md b/backend/docs/174-zppy-links/phase-3-link-aggregation.md new file mode 100644 index 00000000..701d2c7e --- /dev/null +++ b/backend/docs/174-zppy-links/phase-3-link-aggregation.md @@ -0,0 +1,66 @@ +# Phase 3 Plan: Aggregate Case Diagnostics Into Existing Responses + +## Task + +Make existing simulation detail, simulation list, and assistant summary paths expose case-scoped diagnostic links without changing frontend contracts. + +## Scope + +### In scope + +- Response aggregation in simulation API +- Relationship loading updates needed to read `Case.links` +- Assistant snapshot and summary loading updates +- Backend tests proving payload shape stays compatible + +### Out of scope + +- Provenance scanner script +- New frontend UI or TypeScript contract changes +- Changing `CaseOut` + +## Approach + +1. Add merged-link helper in simulation feature. + - Merge `Simulation.links` with `Simulation.case.links`. + - Deduplicate by `(kind, url)`. + - Prefer simulation-owned row when same `(kind, url)` exists on both simulation and case. + +2. Use merged links in simulation responses. + - Keep `SimulationOut.links` shape unchanged. + - Keep computed `grouped_links` behavior unchanged. + - Ensure merged list is passed into `SimulationOut` generation for both simulation list and simulation detail endpoints. + +3. Load case links everywhere needed. + - Update list and detail query options in `backend/app/features/simulation/api.py` to eager-load `Case.links` alongside `Simulation.links`. + - Avoid introducing extra lazy-load queries in hot paths. + +4. Update assistant paths. + - Update `backend/app/features/assistant/api.py` query options so assistant summary loads case links. + - Update `backend/app/features/assistant/snapshot.py` so snapshot links include merged case-owned diagnostics, not only direct simulation links. + +5. Preserve current frontend behavior. + - Do not change frontend API shapes or field names. + - Do not add case-scoped links to `CaseOut` in this phase. + +## Tests + +- Update simulation API tests to verify: + - simulation detail includes case-owned diagnostic links + - simulation list includes case-owned diagnostic links + - duplicate `(kind, url)` across case and simulation appears once + - simulation-owned link wins on duplicate URL +- Update assistant tests to verify case-scoped diagnostic links are visible to summary generation and citations. +- Run: + - `make backend-test` + +## Risk + +- Risk score: 5 +- Main failure modes: + - Duplicate links leak into API payload. + - Assistant path and simulation path expose different link sets. + +## Open Questions + +None. diff --git a/backend/docs/174-zppy-links/phase-4-provenance-scanner.md b/backend/docs/174-zppy-links/phase-4-provenance-scanner.md new file mode 100644 index 00000000..4defa230 --- /dev/null +++ b/backend/docs/174-zppy-links/phase-4-provenance-scanner.md @@ -0,0 +1,84 @@ +# Phase 4 Plan: Provenance Scanner and Ops Docs + +## Task + +Add standalone scanner that discovers zppy provenance cfg files from configured NERSC roots, verifies completion markers, and calls internal diagnostics-link API with service-account auth. + +## Scope + +### In scope + +- New script `backend/app/scripts/ingestion/diagnostics_link_scanner.py` +- State-file persistence and retry behavior +- Provenance cfg parsing and completion checks +- Script test coverage +- Script documentation and env example updates + +### Out of scope + +- zppy repo changes +- Historical backfill tooling beyond normal scanner behavior +- New backend endpoint behavior outside Phase 2 contract + +## Approach + +1. Mirror existing operational script structure. + - Base new script on patterns from `backend/app/scripts/ingestion/nersc_archive_ingestor.py`. + - Reuse same style for config parsing, structured logs, dry-run handling, retry/backoff, and state persistence. + +2. Discover provenance cfg files. + - Recursively search configured roots from required env var `ZPPY_PROVENANCE_ROOTS`. + - Accept files matching `provenance*.cfg`. + +3. Parse required fields from each cfg. + - Require `case_name`, `machine`, `hpc_username`, `diagnostic_url`, and `output`. + - Also extract `www` from the provenance cfg for preserved diagnostics provenance context. + - Do not derive `diagnostic_url` from `www` in MVP; continue treating explicit `diagnostic_url` as authoritative. + - Treat missing required fields as terminal skip with structured log. + +4. Verify diagnostics completion before linking. + - Require `/index.html` to exist. + - Require every filename listed in env var `DIAGNOSTICS_REQUIRED_STATUS_FILES` to exist under ``. + - Skip incomplete diagnostics without calling API. + +5. Call internal diagnostics-link API. + - Send bearer token from `SIMBOARD_API_TOKEN`. + - POST one diagnostics-link request per eligible cfg to `POST /api/v1/diagnostics/link`. + - Use one diagnostics item for MVP: `name="zppy diagnostics"`, `url=diagnostic_url`, `kind="diagnostic"`. + +6. Persist scanner state. + - Store state in `DIAGNOSTICS_STATE_PATH`. + - Key by provenance file path. + - Persist cfg fingerprint, last outcome, and timestamp. + - Reprocess only when cfg fingerprint changes. + +7. Document operational config. + - Update `backend/app/scripts/README.md` with purpose, env vars, and example invocation. + - Add placeholders to `.envs/example/backend.env.example` only for operator-provided values required by this script. + +## Tests + +- Add `backend/tests/features/ingestion/test_diagnostics_link_scanner.py` covering: + - provenance discovery + - cfg parsing success and failure + - `www` extraction when present + - missing required identity or URL + - completion-marker checks + - dry-run behavior + - retry behavior for transient API failures + - state dedup and retry-on-fingerprint-change + - API payload formatting +- Run: + - `make backend-test` + - `make pre-commit-run` + +## Risk + +- Risk score: 5 +- Main failure modes: + - Completion-marker policy is too strict or too loose. + - State logic suppresses needed retries or replays unchanged cfgs. + +## Open Questions + +None. From 632368408109729c72bbc6f7ec7c52e685b8c034 Mon Sep 17 00:00:00 2001 From: Vo Date: Thu, 4 Jun 2026 14:32:33 -0700 Subject: [PATCH 08/13] Update plans --- backend/docs/174-zppy-links/phase-1-case-link-storage.md | 3 +++ .../docs/174-zppy-links/phase-2-diagnostics-link-api.md | 7 +++++-- backend/docs/174-zppy-links/plan.md | 7 +++++++ 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/backend/docs/174-zppy-links/phase-1-case-link-storage.md b/backend/docs/174-zppy-links/phase-1-case-link-storage.md index bb77e66e..d04d5220 100644 --- a/backend/docs/174-zppy-links/phase-1-case-link-storage.md +++ b/backend/docs/174-zppy-links/phase-1-case-link-storage.md @@ -31,6 +31,8 @@ Extend backend data model so an external link can belong to either a simulation 2. Add database invariants in Alembic migration. - Backfill nothing; existing rows remain simulation-owned. - Add check constraint enforcing exactly one owner per row: `case_id` xor `simulation_id`. + - Add partial unique index for case-owned diagnostics on `(case_id, kind, url)` where `case_id IS NOT NULL`. + - Leave simulation-owned link uniqueness unchanged in this phase. - Preserve cascade semantics for both ownership paths. 3. Preserve current write behavior. @@ -44,6 +46,7 @@ Extend backend data model so an external link can belong to either a simulation - case-owned external link is valid - ownerless external link is invalid - dual-owned external link is invalid + - duplicate case-owned diagnostic link violates the new DB uniqueness invariant - Run: - `make backend-test` diff --git a/backend/docs/174-zppy-links/phase-2-diagnostics-link-api.md b/backend/docs/174-zppy-links/phase-2-diagnostics-link-api.md index 45aeccdd..7f7f34ad 100644 --- a/backend/docs/174-zppy-links/phase-2-diagnostics-link-api.md +++ b/backend/docs/174-zppy-links/phase-2-diagnostics-link-api.md @@ -28,7 +28,8 @@ Add backend-only endpoint that resolves one case by `(case_name, machine, hpc_us 2. Add new internal endpoint. - Route: `POST /api/v1/diagnostics/link` - Success response: `204 No Content` - - Endpoint lives in existing simulation feature router module. + - Endpoint lives in `backend/app/features/simulation/api.py`, but uses a dedicated `diagnostics_router = APIRouter(prefix="/diagnostics", tags=["Diagnostics"])` rather than the existing `/simulations` router. + - Register `diagnostics_router` in `backend/app/main.py` with the same `API_BASE` prefix as other feature routers so the final path is exactly `/api/v1/diagnostics/link`. 3. Enforce auth and access policy. - Require authenticated user from existing `current_active_user` dependency. @@ -45,7 +46,8 @@ Add backend-only endpoint that resolves one case by `(case_name, machine, hpc_us 5. Persist idempotent links. - Create case-owned `ExternalLink` rows with `kind="diagnostic"`. - - Upsert by `(case_id, kind, url)` in application logic. + - Treat the Phase 1 partial unique index on `(case_id, kind, url)` as the source of truth for idempotency. + - Use conflict-safe write logic so repeated or concurrent requests for the same `(case_id, kind, url)` do not create duplicate rows. - If same URL already exists for case, update label from diagnostics item `name` instead of inserting duplicate. ## Tests @@ -53,6 +55,7 @@ Add backend-only endpoint that resolves one case by `(case_name, machine, hpc_us - Add API coverage for: - successful create - duplicate request remains idempotent + - concurrent duplicate request remains idempotent - `401` unauthenticated - `403` authenticated but wrong role - `404` no matching case diff --git a/backend/docs/174-zppy-links/plan.md b/backend/docs/174-zppy-links/plan.md index ae63f139..0c034abf 100644 --- a/backend/docs/174-zppy-links/plan.md +++ b/backend/docs/174-zppy-links/plan.md @@ -175,6 +175,7 @@ Tests: Add `DiagnosticsLinkRequest` in `backend/app/features/simulation/schemas.py`. For MVP, add `case_id` to `ExternalLink` and store diagnostic links at case scope. +Add a partial unique index on `(case_id, kind, url)` where `case_id IS NOT NULL` so case-owned diagnostic links remain idempotent under repeated or concurrent writes. #### 3. Add matching resolver @@ -201,6 +202,11 @@ Tests: Endpoint: `POST /api/v1/diagnostics/link` +Implementation note: + +- Define the endpoint in `backend/app/features/simulation/api.py` using a dedicated `diagnostics_router` with prefix `/diagnostics`. +- Register that router in `backend/app/main.py` with `API_BASE` so the public path remains exactly `/api/v1/diagnostics/link` instead of inheriting the `/simulations` prefix. + Roles: `ADMIN`, `SERVICE_ACCOUNT` Request: @@ -223,6 +229,7 @@ Diagnostics item: Tests: - duplicate request is idempotent +- concurrent duplicate request is idempotent - invalid payload returns `422` - auth required From b5a406acdae0edb13ca91f1b88432d40d9feb2ae Mon Sep 17 00:00:00 2001 From: Vo Date: Thu, 4 Jun 2026 14:40:42 -0700 Subject: [PATCH 09/13] Add ExternalLink support for case --- backend/app/features/simulation/models.py | 52 ++++- ...04_130000_add_case_owned_external_links.py | 86 +++++++ .../tests/features/simulation/test_models.py | 217 ++++++++++++++++++ .../tests/features/simulation/test_schemas.py | 37 +++ 4 files changed, 388 insertions(+), 4 deletions(-) create mode 100644 backend/migrations/versions/20260604_130000_add_case_owned_external_links.py create mode 100644 backend/tests/features/simulation/test_models.py diff --git a/backend/app/features/simulation/models.py b/backend/app/features/simulation/models.py index ae170bcc..8f9403f7 100644 --- a/backend/app/features/simulation/models.py +++ b/backend/app/features/simulation/models.py @@ -6,7 +6,16 @@ from typing import TYPE_CHECKING, Optional from uuid import UUID -from sqlalchemy import DateTime, ForeignKey, Integer, String, Text +from sqlalchemy import ( + CheckConstraint, + DateTime, + ForeignKey, + Index, + Integer, + String, + Text, + text, +) from sqlalchemy import Enum as SAEnum from sqlalchemy.dialects.postgresql import JSONB from sqlalchemy.dialects.postgresql import UUID as PG_UUID @@ -45,6 +54,13 @@ class Case(Base, IDMixin, TimestampMixin): cascade="all, delete-orphan", passive_deletes=True, ) + links: Mapped[list[ExternalLink]] = relationship( + "ExternalLink", + back_populates="case", + foreign_keys="ExternalLink.case_id", + cascade="all, delete-orphan", + passive_deletes=True, + ) class Simulation(Base, IDMixin, TimestampMixin): @@ -194,9 +210,30 @@ class Artifact(Base, IDMixin, TimestampMixin): class ExternalLink(Base, IDMixin, TimestampMixin): __tablename__ = "external_links" + __table_args__ = ( + CheckConstraint( + "(simulation_id IS NOT NULL) <> (case_id IS NOT NULL)", + name="exactly_one_owner", + ), + Index( + "uq_external_links_case_id_kind_url", + "case_id", + "kind", + "url", + unique=True, + postgresql_where=text("case_id IS NOT NULL"), + ), + ) - simulation_id: Mapped[UUID] = mapped_column( - PG_UUID(as_uuid=True), ForeignKey("simulations.id", ondelete="CASCADE") + simulation_id: Mapped[UUID | None] = mapped_column( + PG_UUID(as_uuid=True), + ForeignKey("simulations.id", ondelete="CASCADE"), + nullable=True, + ) + case_id: Mapped[UUID | None] = mapped_column( + PG_UUID(as_uuid=True), + ForeignKey("cases.id", ondelete="CASCADE"), + nullable=True, ) kind: Mapped[ExternalLinkKind] = mapped_column( @@ -212,8 +249,15 @@ class ExternalLink(Base, IDMixin, TimestampMixin): url: Mapped[str] = mapped_column(String(1000)) label: Mapped[Optional[str]] = mapped_column(String(200)) - simulation: Mapped[Simulation] = relationship( + simulation: Mapped[Simulation | None] = relationship( back_populates="links", primaryjoin="ExternalLink.simulation_id==Simulation.id", + foreign_keys=[simulation_id], + passive_deletes=True, + ) + case: Mapped[Case | None] = relationship( + back_populates="links", + primaryjoin="ExternalLink.case_id==Case.id", + foreign_keys=[case_id], passive_deletes=True, ) diff --git a/backend/migrations/versions/20260604_130000_add_case_owned_external_links.py b/backend/migrations/versions/20260604_130000_add_case_owned_external_links.py new file mode 100644 index 00000000..6a23bd1d --- /dev/null +++ b/backend/migrations/versions/20260604_130000_add_case_owned_external_links.py @@ -0,0 +1,86 @@ +"""Allow external links to belong to either simulations or cases. + +Revision ID: 20260604_130000 +Revises: 20260604_120000 +Create Date: 2026-06-04 13:00:00.000000 +""" + +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision: str = "20260604_130000" +down_revision: Union[str, Sequence[str], None] = "20260604_120000" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Add case ownership support for external links.""" + op.add_column( + "external_links", + sa.Column("case_id", postgresql.UUID(as_uuid=True), nullable=True), + ) + op.create_foreign_key( + op.f("fk_external_links_case_id_cases"), + "external_links", + "cases", + ["case_id"], + ["id"], + ondelete="CASCADE", + ) + op.alter_column( + "external_links", + "simulation_id", + existing_type=postgresql.UUID(as_uuid=True), + nullable=True, + ) + op.create_check_constraint( + op.f("ck_external_links_exactly_one_owner"), + "external_links", + "(simulation_id IS NOT NULL) <> (case_id IS NOT NULL)", + ) + op.create_index( + "uq_external_links_case_id_kind_url", + "external_links", + ["case_id", "kind", "url"], + unique=True, + postgresql_where=sa.text("case_id IS NOT NULL"), + ) + + +def downgrade() -> None: + """Remove case ownership support for external links.""" + connection = op.get_bind() + case_owned_count = connection.execute( + sa.text( + "SELECT COUNT(*) FROM external_links WHERE case_id IS NOT NULL" + ) + ).scalar_one() + if case_owned_count: + raise RuntimeError( + "Downgrade blocked: external_links contains case-owned rows that " + "cannot be represented after removing case ownership support." + ) + + op.drop_index("uq_external_links_case_id_kind_url", table_name="external_links") + op.drop_constraint( + op.f("ck_external_links_exactly_one_owner"), + "external_links", + type_="check", + ) + op.alter_column( + "external_links", + "simulation_id", + existing_type=postgresql.UUID(as_uuid=True), + nullable=False, + ) + op.drop_constraint( + op.f("fk_external_links_case_id_cases"), + "external_links", + type_="foreignkey", + ) + op.drop_column("external_links", "case_id") diff --git a/backend/tests/features/simulation/test_models.py b/backend/tests/features/simulation/test_models.py new file mode 100644 index 00000000..f737a420 --- /dev/null +++ b/backend/tests/features/simulation/test_models.py @@ -0,0 +1,217 @@ +from datetime import datetime, timezone +from uuid import UUID + +import pytest +from sqlalchemy.exc import IntegrityError +from sqlalchemy.orm import Session + +from app.features.ingestion.enums import IngestionSourceType, IngestionStatus +from app.features.ingestion.models import Ingestion +from app.features.machine.models import Machine +from app.features.simulation.enums import ( + ExternalLinkKind, + SimulationStatus, + SimulationType, +) +from app.features.simulation.models import Case, ExternalLink, Simulation + + +def _create_case(db: Session, name: str = "external-link-case") -> Case: + case = Case(name=name) + db.add(case) + db.flush() + return case + + +def _create_machine(db: Session) -> Machine: + machine = Machine( + name=f"machine-{datetime.now(timezone.utc).timestamp()}", + site="NERSC", + architecture="x86_64", + scheduler="SLURM", + gpu=False, + ) + db.add(machine) + db.flush() + return machine + + +def _create_ingestion( + db: Session, *, machine_id: UUID, user_id: UUID, source_reference: str +) -> Ingestion: + ingestion = Ingestion( + source_type=IngestionSourceType.BROWSER_UPLOAD, + source_reference=source_reference, + machine_id=machine_id, + triggered_by=user_id, + status=IngestionStatus.SUCCESS, + created_count=1, + duplicate_count=0, + error_count=0, + ) + db.add(ingestion) + db.flush() + return ingestion + + +def _create_simulation( + db: Session, + *, + case_id: UUID, + machine_id: UUID, + ingestion_id: UUID, + user_id: UUID, + execution_id: str, +) -> Simulation: + simulation = Simulation( + case_id=case_id, + execution_id=execution_id, + compset="AQUAPLANET", + compset_alias="QPC4", + grid_name="f19_f19", + grid_resolution="1.9x2.5", + simulation_type=SimulationType.EXPERIMENTAL, + status=SimulationStatus.CREATED, + initialization_type="startup", + machine_id=machine_id, + simulation_start_date=datetime(2023, 1, 1, tzinfo=timezone.utc), + created_by=user_id, + last_updated_by=user_id, + ingestion_id=ingestion_id, + extra={}, + ) + db.add(simulation) + db.flush() + return simulation + + +class TestExternalLinkOwnership: + def test_simulation_owned_external_link_is_valid( + self, db: Session, normal_user_sync + ) -> None: + case = _create_case(db, "simulation-owned-case") + machine = _create_machine(db) + ingestion = _create_ingestion( + db, + machine_id=machine.id, + user_id=normal_user_sync["id"], + source_reference="simulation-owned-external-link", + ) + simulation = _create_simulation( + db, + case_id=case.id, + machine_id=machine.id, + ingestion_id=ingestion.id, + user_id=normal_user_sync["id"], + execution_id="simulation-owned-exec", + ) + + link = ExternalLink( + simulation_id=simulation.id, + kind=ExternalLinkKind.DIAGNOSTIC, + url="https://example.com/sim-owned", + label="Simulation-owned", + ) + db.add(link) + db.commit() + db.refresh(simulation) + + assert simulation.links[0].id == link.id + + def test_case_owned_external_link_is_valid( + self, db: Session, normal_user_sync + ) -> None: + case = _create_case(db, "case-owned-case") + machine = _create_machine(db) + _create_ingestion( + db, + machine_id=machine.id, + user_id=normal_user_sync["id"], + source_reference="case-owned-external-link", + ) + + link = ExternalLink( + case_id=case.id, + kind=ExternalLinkKind.DIAGNOSTIC, + url="https://example.com/case-owned", + label="Case-owned", + ) + db.add(link) + db.commit() + db.refresh(case) + + assert case.links[0].id == link.id + + def test_ownerless_external_link_is_invalid(self, db: Session) -> None: + db.add( + ExternalLink( + kind=ExternalLinkKind.DIAGNOSTIC, + url="https://example.com/ownerless", + label="Ownerless", + ) + ) + + with pytest.raises(IntegrityError): + db.commit() + + db.rollback() + + def test_dual_owned_external_link_is_invalid( + self, db: Session, normal_user_sync + ) -> None: + case = _create_case(db, "dual-owned-case") + machine = _create_machine(db) + ingestion = _create_ingestion( + db, + machine_id=machine.id, + user_id=normal_user_sync["id"], + source_reference="dual-owned-external-link", + ) + simulation = _create_simulation( + db, + case_id=case.id, + machine_id=machine.id, + ingestion_id=ingestion.id, + user_id=normal_user_sync["id"], + execution_id="dual-owned-exec", + ) + + db.add( + ExternalLink( + simulation_id=simulation.id, + case_id=case.id, + kind=ExternalLinkKind.DIAGNOSTIC, + url="https://example.com/dual-owned", + label="Dual-owned", + ) + ) + + with pytest.raises(IntegrityError): + db.commit() + + db.rollback() + + def test_duplicate_case_owned_diagnostic_link_is_invalid(self, db: Session) -> None: + case = _create_case(db, "duplicate-case-link-case") + + db.add_all( + [ + ExternalLink( + case_id=case.id, + kind=ExternalLinkKind.DIAGNOSTIC, + url="https://example.com/duplicate-case-link", + label="First", + ), + ExternalLink( + case_id=case.id, + kind=ExternalLinkKind.DIAGNOSTIC, + url="https://example.com/duplicate-case-link", + label="Second", + ), + ] + ) + + with pytest.raises(IntegrityError): + db.commit() + + db.rollback() diff --git a/backend/tests/features/simulation/test_schemas.py b/backend/tests/features/simulation/test_schemas.py index d22ed7ab..f458e102 100644 --- a/backend/tests/features/simulation/test_schemas.py +++ b/backend/tests/features/simulation/test_schemas.py @@ -1,4 +1,5 @@ from datetime import datetime +from types import SimpleNamespace from uuid import uuid4 from pydantic import HttpUrl @@ -103,6 +104,42 @@ def test_valid_simulation_create_optional_fields(self): assert getattr(simulation_create, snake_case_key) == value +class TestExternalLinkOutSchema: + def test_validates_simulation_owned_external_link_from_attributes(self): + link = SimpleNamespace( + id=uuid4(), + kind=ExternalLinkKind.DIAGNOSTIC, + url="https://example.com/simulation-owned", + label="Simulation-owned", + simulation_id=uuid4(), + case_id=None, + created_at=datetime(2023, 1, 1, 0, 0, 0), + updated_at=datetime(2023, 1, 2, 0, 0, 0), + ) + + link_out = ExternalLinkOut.model_validate(link) + + assert link_out.url == HttpUrl("https://example.com/simulation-owned") + assert link_out.label == "Simulation-owned" + + def test_validates_case_owned_external_link_from_attributes(self): + link = SimpleNamespace( + id=uuid4(), + kind=ExternalLinkKind.DIAGNOSTIC, + url="https://example.com/case-owned", + label="Case-owned", + simulation_id=None, + case_id=uuid4(), + created_at=datetime(2023, 1, 1, 0, 0, 0), + updated_at=datetime(2023, 1, 2, 0, 0, 0), + ) + + link_out = ExternalLinkOut.model_validate(link) + + assert link_out.url == HttpUrl("https://example.com/case-owned") + assert link_out.label == "Case-owned" + + class TestSimulationOutSchema: def test_valid_simulation_out_required_fields(self): # Arrange: Define the required fields From 38a65aa88f1e6b6751da3bca60609f8fc548c110 Mon Sep 17 00:00:00 2001 From: Vo Date: Thu, 4 Jun 2026 14:59:10 -0700 Subject: [PATCH 10/13] Add diagnostics link API --- backend/app/features/simulation/api.py | 119 ++++- backend/app/features/simulation/schemas.py | 42 +- backend/app/main.py | 7 +- backend/tests/features/simulation/test_api.py | 453 +++++++++++++++++- 4 files changed, 615 insertions(+), 6 deletions(-) diff --git a/backend/app/features/simulation/api.py b/backend/app/features/simulation/api.py index a9ce6c39..a6dda283 100644 --- a/backend/app/features/simulation/api.py +++ b/backend/app/features/simulation/api.py @@ -2,6 +2,7 @@ from uuid import UUID from fastapi import APIRouter, Depends, HTTPException, Query, status +from sqlalchemy.dialects.postgresql import insert as pg_insert from sqlalchemy.orm import Session, joinedload, selectinload from app.common.dependencies import get_database_session @@ -9,19 +10,23 @@ from app.features.assistant.orchestrator import is_summary_llm_available from app.features.ingestion.enums import IngestionSourceType, IngestionStatus from app.features.ingestion.models import Ingestion +from app.features.machine.models import Machine +from app.features.simulation.enums import ExternalLinkKind from app.features.simulation.models import Artifact, Case, ExternalLink, Simulation from app.features.simulation.schemas import ( CaseOut, + DiagnosticsLinkRequest, SimulationCreate, SimulationOut, SimulationSummaryCapabilitiesOut, SimulationSummaryOut, ) from app.features.user.manager import current_active_user -from app.features.user.models import User +from app.features.user.models import User, UserRole simulation_router = APIRouter(prefix="/simulations", tags=["Simulations"]) case_router = APIRouter(prefix="/cases", tags=["Cases"]) +diagnostics_router = APIRouter(prefix="/diagnostics", tags=["Diagnostics"]) @case_router.get( @@ -278,6 +283,43 @@ def create_simulation( return result +@diagnostics_router.post( + "/link", + status_code=status.HTTP_204_NO_CONTENT, + responses={ + 204: {"description": "Diagnostics linked successfully."}, + 401: {"description": "Unauthorized."}, + 403: {"description": "Forbidden."}, + 404: {"description": "Matching case not found."}, + 409: {"description": "Ambiguous case match."}, + 422: {"description": "Validation error."}, + }, +) +def link_case_diagnostics( + payload: DiagnosticsLinkRequest, + db: Session = Depends(get_database_session), + user: User = Depends(current_active_user), +) -> None: + """Resolve one case and upsert case-scoped diagnostic links.""" + if user.role not in (UserRole.ADMIN, UserRole.SERVICE_ACCOUNT): + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="Only administrators and service accounts may link diagnostics.", + ) + + case_id = _resolve_case_id_for_diagnostics_link( + db=db, + case_name=payload.case_name, + machine_name=payload.machine, + hpc_username=payload.hpc_username, + ) + _upsert_case_diagnostic_links( + db=db, + case_id=case_id, + diagnostics=payload.diagnostics, + ) + + @simulation_router.get( "", response_model=list[SimulationOut], @@ -336,6 +378,81 @@ def list_simulations( return [_simulation_to_out(s) for s in sims] +def _resolve_case_id_for_diagnostics_link( + *, + db: Session, + case_name: str, + machine_name: str, + hpc_username: str, +) -> UUID: + """Resolve a unique case ID from case, machine, and HPC username.""" + matches = ( + db.query(Case.id) + .join(Simulation, Simulation.case_id == Case.id) + .join(Machine, Simulation.machine_id == Machine.id) + .filter(Case.name == case_name) + .filter(Machine.name == machine_name) + .filter(Simulation.hpc_username == hpc_username) + .distinct() + .all() + ) + + if not matches: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="No case matched the provided case_name, machine, and hpc_username.", + ) + + # TODO(#193): This ambiguity branch is not reachable while Case.name remains + # globally unique. If case identity moves to (case_name, machine, hpc_username), + # keep this 409 path and replace patched coverage with a DB-backed test. + # https://github.com/E3SM-Project/simboard/issues/193 + if len(matches) > 1: + raise HTTPException( + status_code=status.HTTP_409_CONFLICT, + detail="Multiple cases matched the provided case_name, machine, and hpc_username.", + ) + + return matches[0][0] + + +def _upsert_case_diagnostic_links( + *, + db: Session, + case_id: UUID, + diagnostics: list, +) -> None: + """Create or update case-owned diagnostic links idempotently.""" + now = datetime.now(timezone.utc) + + with transaction(db): + for diagnostic in diagnostics: + stmt = ( + pg_insert(ExternalLink) + .values( + case_id=case_id, + kind=ExternalLinkKind.DIAGNOSTIC, + url=str(diagnostic.url), + label=diagnostic.name, + created_at=now, + updated_at=now, + ) + .on_conflict_do_update( + index_elements=[ + ExternalLink.case_id, + ExternalLink.kind, + ExternalLink.url, + ], + index_where=ExternalLink.case_id.is_not(None), + set_={ + "label": diagnostic.name, + "updated_at": now, + }, + ) + ) + db.execute(stmt) + + @simulation_router.get( "/{sim_id}", response_model=SimulationOut, diff --git a/backend/app/features/simulation/schemas.py b/backend/app/features/simulation/schemas.py index 3d1b8fac..5a08bbf4 100644 --- a/backend/app/features/simulation/schemas.py +++ b/backend/app/features/simulation/schemas.py @@ -1,6 +1,6 @@ from collections import defaultdict from datetime import datetime -from typing import Annotated, Any +from typing import Annotated, Any, Literal from uuid import UUID from pydantic import Field, HttpUrl, computed_field @@ -56,6 +56,46 @@ class ExternalLinkOut(CamelOutBaseModel): ] +class DiagnosticsLinkItem(CamelInBaseModel): + """Schema for one diagnostic link to attach to a case.""" + + name: Annotated[str, Field(..., description="Human-readable diagnostic label.")] + url: Annotated[HttpUrl, Field(..., description="Diagnostic URL to attach.")] + kind: Literal[ExternalLinkKind.DIAGNOSTIC] = Field( + default=ExternalLinkKind.DIAGNOSTIC, + description="Link type for diagnostics payloads. Must be 'diagnostic'.", + ) + + +class DiagnosticsLinkRequest(CamelInBaseModel): + """Schema for linking diagnostics to a resolved case.""" + + case_name: Annotated[ + str, + Field(..., description="Exact case name used to resolve the target case."), + ] + machine: Annotated[ + str, + Field( + ..., + description="Exact machine name used alongside case name to resolve the case.", + ), + ] + hpc_username: Annotated[ + str, + Field( + ..., + description="Exact HPC username used alongside case name and machine to resolve the case.", + ), + ] + diagnostics: Annotated[ + list[DiagnosticsLinkItem], + Field( + ..., min_length=1, description="Diagnostic links to upsert for the case." + ), + ] + + class ArtifactCreate(CamelInBaseModel): """Schema for creating a new Artifact.""" diff --git a/backend/app/main.py b/backend/app/main.py index afcc1057..fb07c9f0 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -11,7 +11,11 @@ from app.features.ingestion.api import router as ingestion_router from app.features.machine.api import router as machine_router from app.features.pace.api import router as pace_router -from app.features.simulation.api import case_router, simulation_router +from app.features.simulation.api import ( + case_router, + diagnostics_router, + simulation_router, +) from app.features.user.api.oauth import auth_router, user_router from app.features.user.api.token import router as token_router @@ -36,6 +40,7 @@ def create_app() -> FastAPI: # Register routers. app.include_router(simulation_router, prefix=API_BASE) + app.include_router(diagnostics_router, prefix=API_BASE) app.include_router(assistant_router, prefix=API_BASE) app.include_router(case_router, prefix=API_BASE) app.include_router(machine_router, prefix=API_BASE) diff --git a/backend/tests/features/simulation/test_api.py b/backend/tests/features/simulation/test_api.py index 9f730eb9..28c22eeb 100644 --- a/backend/tests/features/simulation/test_api.py +++ b/backend/tests/features/simulation/test_api.py @@ -1,27 +1,49 @@ +from concurrent.futures import ThreadPoolExecutor from contextlib import nullcontext +from datetime import datetime, timezone from unittest.mock import MagicMock, patch from uuid import uuid4 import pytest from fastapi import HTTPException +from fastapi.testclient import TestClient +from sqlalchemy import delete from sqlalchemy.orm import Session from app.api.version import API_BASE +from app.common.dependencies import get_database_session from app.core.config import settings from app.features.ingestion.enums import IngestionSourceType, IngestionStatus from app.features.ingestion.models import Ingestion from app.features.machine.models import Machine from app.features.simulation.api import create_simulation -from app.features.simulation.models import Case, Simulation +from app.features.simulation.enums import ( + ExternalLinkKind, + SimulationStatus, + SimulationType, +) +from app.features.simulation.models import Case, ExternalLink, Simulation from app.features.simulation.schemas import SimulationCreate +from app.features.user.auth.token import generate_token from app.features.user.manager import current_active_user -from app.features.user.models import User, UserRole +from app.features.user.models import ApiToken, User, UserRole from app.main import app +from tests.conftest import TestingSessionLocal, engine + + +def use_real_auth(test_func): + """Flag tests that should bypass the default auth override.""" + test_func._use_real_auth = True + return test_func @pytest.fixture(autouse=True) -def override_auth_dependency(normal_user_sync): +def override_auth_dependency(request, normal_user_sync): """Auto-login a test user for endpoints requiring authentication.""" + if getattr(request.node.function, "_use_real_auth", False): + yield + app.dependency_overrides.clear() + return def fake_current_user(): return User( @@ -48,6 +70,85 @@ def _create_case(db: Session, name: str = "test_case") -> Case: return case +def _create_service_account_token( + db: Session, + *, + email: str | None = None, +) -> tuple[User, str]: + user = User( + email=email or f"svc-{uuid4()}@example.com", + is_active=True, + is_verified=True, + role=UserRole.SERVICE_ACCOUNT, + ) + db.add(user) + db.flush() + + raw_token, token_hash = generate_token() + db.add( + ApiToken( + name="Diagnostics Link Token", + token_hash=token_hash, + user_id=user.id, + created_at=datetime.now(timezone.utc), + revoked=False, + ) + ) + db.commit() + db.refresh(user) + + return user, raw_token + + +def _create_matching_simulation( + db: Session, + *, + case_name: str, + machine_id, + machine_name: str, + user_id, + execution_id: str, + hpc_username: str, + source_reference: str, +) -> tuple[Case, Simulation]: + case = _create_case(db, case_name) + ingestion = Ingestion( + source_type=IngestionSourceType.BROWSER_UPLOAD, + source_reference=source_reference, + machine_id=machine_id, + triggered_by=user_id, + status=IngestionStatus.SUCCESS, + created_count=1, + duplicate_count=0, + error_count=0, + ) + db.add(ingestion) + db.flush() + + simulation = Simulation( + case_id=case.id, + execution_id=execution_id, + compset="AQUAPLANET", + compset_alias="QPC4", + grid_name="f19_f19", + grid_resolution="1.9x2.5", + initialization_type="startup", + simulation_type=SimulationType.EXPERIMENTAL, + status=SimulationStatus.CREATED, + machine_id=machine_id, + simulation_start_date=datetime(2023, 1, 1, tzinfo=timezone.utc), + created_by=user_id, + last_updated_by=user_id, + ingestion_id=ingestion.id, + hpc_username=hpc_username, + extra={"machineName": machine_name}, + ) + db.add(simulation) + db.commit() + + return case, simulation + + class TestListCases: def test_endpoint_returns_empty_list(self, client): res = client.get(f"{API_BASE}/cases") @@ -844,3 +945,349 @@ def test_simulation_list_includes_case_name_and_id( assert data[0]["caseId"] == str(case.id) assert data[0]["caseName"] == "test_case_browser" assert data[0]["executionId"] == "browser-exec-1" + + +class TestLinkCaseDiagnostics: + @use_real_auth + def test_endpoint_creates_case_scoped_diagnostic_links( + self, client, db: Session + ) -> None: + machine = db.query(Machine).first() + assert machine is not None + + _, raw_token = _create_service_account_token(db) + case_name = f"diagnostics-case-{uuid4()}" + case, _ = _create_matching_simulation( + db, + case_name=case_name, + machine_id=machine.id, + machine_name=machine.name, + user_id=db.query(User) + .filter(User.role == UserRole.SERVICE_ACCOUNT) + .one() + .id, + execution_id=f"diag-exec-{uuid4()}", + hpc_username="diag-user", + source_reference=f"diag-source-{uuid4()}", + ) + + response = client.post( + f"{API_BASE}/diagnostics/link", + json={ + "caseName": case_name, + "machine": machine.name, + "hpcUsername": "diag-user", + "diagnostics": [ + { + "name": "Atmosphere diagnostics", + "url": "https://example.com/diag/atmosphere", + }, + { + "name": "Ocean diagnostics", + "url": "https://example.com/diag/ocean", + }, + ], + }, + headers={"Authorization": f"Bearer {raw_token}"}, + ) + + assert response.status_code == 204 + links = ( + db.query(ExternalLink) + .filter(ExternalLink.case_id == case.id) + .order_by(ExternalLink.url.asc()) + .all() + ) + assert [(link.kind, link.label, link.url) for link in links] == [ + ( + ExternalLinkKind.DIAGNOSTIC, + "Atmosphere diagnostics", + "https://example.com/diag/atmosphere", + ), + ( + ExternalLinkKind.DIAGNOSTIC, + "Ocean diagnostics", + "https://example.com/diag/ocean", + ), + ] + + @use_real_auth + def test_duplicate_request_remains_idempotent(self, client, db: Session) -> None: + machine = db.query(Machine).first() + assert machine is not None + + service_user, raw_token = _create_service_account_token(db) + case, _ = _create_matching_simulation( + db, + case_name=f"diagnostics-idempotent-{uuid4()}", + machine_id=machine.id, + machine_name=machine.name, + user_id=service_user.id, + execution_id=f"diag-idempotent-exec-{uuid4()}", + hpc_username="idempotent-user", + source_reference=f"diag-idempotent-source-{uuid4()}", + ) + payload = { + "caseName": case.name, + "machine": machine.name, + "hpcUsername": "idempotent-user", + "diagnostics": [ + { + "name": "Shared diagnostics", + "url": "https://example.com/diag/shared", + } + ], + } + + first = client.post( + f"{API_BASE}/diagnostics/link", + json=payload, + headers={"Authorization": f"Bearer {raw_token}"}, + ) + second = client.post( + f"{API_BASE}/diagnostics/link", + json=payload, + headers={"Authorization": f"Bearer {raw_token}"}, + ) + + assert first.status_code == 204 + assert second.status_code == 204 + links = db.query(ExternalLink).filter(ExternalLink.case_id == case.id).all() + assert len(links) == 1 + assert links[0].label == "Shared diagnostics" + + @use_real_auth + def test_concurrent_duplicate_request_remains_idempotent(self) -> None: + SessionFactory = TestingSessionLocal + seed_session = SessionFactory(bind=engine.connect()) + cleanup_session = None + service_user: User | None = None + app.dependency_overrides.pop(current_active_user, None) + + def override_get_database_session(): + session = SessionFactory(bind=engine.connect()) + try: + yield session + finally: + session.close() + + app.dependency_overrides[get_database_session] = override_get_database_session + + try: + machine = seed_session.query(Machine).first() + assert machine is not None + + service_user, raw_token = _create_service_account_token(seed_session) + case_name = f"diagnostics-concurrent-{uuid4()}" + execution_id = f"diag-concurrent-exec-{uuid4()}" + source_reference = f"diag-concurrent-source-{uuid4()}" + case, _ = _create_matching_simulation( + seed_session, + case_name=case_name, + machine_id=machine.id, + machine_name=machine.name, + user_id=service_user.id, + execution_id=execution_id, + hpc_username="concurrent-user", + source_reference=source_reference, + ) + + payload = { + "caseName": case_name, + "machine": machine.name, + "hpcUsername": "concurrent-user", + "diagnostics": [ + { + "name": "Concurrent diagnostics", + "url": "https://example.com/diag/concurrent", + } + ], + } + + with TestClient(app) as local_client: + + def send_request() -> int: + response = local_client.post( + f"{API_BASE}/diagnostics/link", + json=payload, + headers={"Authorization": f"Bearer {raw_token}"}, + ) + return response.status_code + + with ThreadPoolExecutor(max_workers=2) as executor: + statuses = list(executor.map(lambda _: send_request(), range(2))) + + assert statuses == [204, 204] + + cleanup_session = SessionFactory(bind=engine.connect()) + links = ( + cleanup_session.query(ExternalLink) + .filter(ExternalLink.case_id == case.id) + .all() + ) + assert len(links) == 1 + finally: + app.dependency_overrides.pop(get_database_session, None) + if cleanup_session is None: + cleanup_session = SessionFactory(bind=engine.connect()) + cleanup_session.execute( + delete(ExternalLink).where( + ExternalLink.url == "https://example.com/diag/concurrent" + ) + ) + cleanup_session.execute( + delete(Simulation).where( + Simulation.execution_id == locals().get("execution_id") + ) + ) + cleanup_session.execute( + delete(Ingestion).where( + Ingestion.source_reference == locals().get("source_reference") + ) + ) + cleanup_session.execute( + delete(Case).where(Case.name == locals().get("case_name")) + ) + if service_user is not None: + cleanup_session.execute( + delete(ApiToken).where(ApiToken.user_id == service_user.id) + ) + cleanup_session.execute( + delete(User).where(User.email == service_user.email) + ) + cleanup_session.commit() + cleanup_session.close() + seed_session.close() + + @use_real_auth + def test_endpoint_requires_authentication(self, client) -> None: + response = client.post( + f"{API_BASE}/diagnostics/link", + json={ + "caseName": "missing-auth-case", + "machine": "perlmutter", + "hpcUsername": "diag-user", + "diagnostics": [ + { + "name": "Missing auth", + "url": "https://example.com/diag/auth", + } + ], + }, + ) + + assert response.status_code == 401 + assert response.json()["detail"] == "Not authenticated" + + def test_endpoint_rejects_non_admin_non_service_account(self, client) -> None: + def fake_non_admin_user(): + return User( + id=uuid4(), + email="forbidden@example.com", + is_active=True, + is_verified=True, + role=UserRole.USER, + ) + + app.dependency_overrides[current_active_user] = fake_non_admin_user + + response = client.post( + f"{API_BASE}/diagnostics/link", + json={ + "caseName": "forbidden-case", + "machine": "perlmutter", + "hpcUsername": "diag-user", + "diagnostics": [ + { + "name": "Forbidden diagnostics", + "url": "https://example.com/diag/forbidden", + } + ], + }, + ) + + assert response.status_code == 403 + assert response.json()["detail"] == ( + "Only administrators and service accounts may link diagnostics." + ) + + @use_real_auth + def test_endpoint_returns_404_when_case_match_is_missing( + self, client, db: Session + ) -> None: + _, raw_token = _create_service_account_token(db) + + response = client.post( + f"{API_BASE}/diagnostics/link", + json={ + "caseName": "missing-case", + "machine": "perlmutter", + "hpcUsername": "diag-user", + "diagnostics": [ + { + "name": "Missing case diagnostics", + "url": "https://example.com/diag/missing-case", + } + ], + }, + headers={"Authorization": f"Bearer {raw_token}"}, + ) + + assert response.status_code == 404 + + def test_endpoint_returns_409_for_ambiguous_match(self, client) -> None: + def fake_admin_user(): + return User( + id=uuid4(), + email="admin-diagnostics@example.com", + is_active=True, + is_verified=True, + role=UserRole.ADMIN, + ) + + app.dependency_overrides[current_active_user] = fake_admin_user + + with patch( + "app.features.simulation.api._resolve_case_id_for_diagnostics_link", + side_effect=HTTPException( + status_code=409, + detail=( + "Multiple cases matched the provided case_name, machine, and hpc_username." + ), + ), + ): + response = client.post( + f"{API_BASE}/diagnostics/link", + json={ + "caseName": "ambiguous-case", + "machine": "perlmutter", + "hpcUsername": "diag-user", + "diagnostics": [ + { + "name": "Ambiguous diagnostics", + "url": "https://example.com/diag/ambiguous", + } + ], + }, + ) + + assert response.status_code == 409 + + @use_real_auth + def test_endpoint_returns_422_for_invalid_payload( + self, client, db: Session + ) -> None: + _, raw_token = _create_service_account_token(db) + + response = client.post( + f"{API_BASE}/diagnostics/link", + json={ + "caseName": "invalid-payload-case", + "machine": "perlmutter", + "hpcUsername": "diag-user", + "diagnostics": [{"name": "Broken diagnostics", "url": "not-a-url"}], + }, + headers={"Authorization": f"Bearer {raw_token}"}, + ) + + assert response.status_code == 422 From 827ef7fc10c458686cdfb5fc933936ebe1ad7800 Mon Sep 17 00:00:00 2001 From: Vo Date: Thu, 4 Jun 2026 15:03:14 -0700 Subject: [PATCH 11/13] Update plans --- .../phase-3-link-aggregation.md | 19 +++++++++++++------ .../phase-4-provenance-scanner.md | 2 +- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/backend/docs/174-zppy-links/phase-3-link-aggregation.md b/backend/docs/174-zppy-links/phase-3-link-aggregation.md index 701d2c7e..02fd9f84 100644 --- a/backend/docs/174-zppy-links/phase-3-link-aggregation.md +++ b/backend/docs/174-zppy-links/phase-3-link-aggregation.md @@ -2,22 +2,22 @@ ## Task -Make existing simulation detail, simulation list, and assistant summary paths expose case-scoped diagnostic links without changing frontend contracts. +Make simulation detail, simulation list, assistant summary, and case details paths expose case-scoped diagnostic links, including case-level diagnostics on case details page. ## Scope ### In scope - Response aggregation in simulation API +- Case detail API and UI updates needed to show case-owned diagnostic links - Relationship loading updates needed to read `Case.links` - Assistant snapshot and summary loading updates -- Backend tests proving payload shape stays compatible +- Backend and frontend tests proving payload shape stays compatible where required ### Out of scope - Provenance scanner script -- New frontend UI or TypeScript contract changes -- Changing `CaseOut` +- Unrelated frontend UI or TypeScript contract changes ## Approach @@ -39,9 +39,14 @@ Make existing simulation detail, simulation list, and assistant summary paths ex - Update `backend/app/features/assistant/api.py` query options so assistant summary loads case links. - Update `backend/app/features/assistant/snapshot.py` so snapshot links include merged case-owned diagnostics, not only direct simulation links. -5. Preserve current frontend behavior. +5. Update case details path. + - Ensure case detail response exposes case-owned diagnostic links needed by case details page. + - Update case details page to render case-level diagnostic links in diagnostics section. + - Preserve existing simulation-page contracts while adding case-page support. + +6. Preserve current frontend behavior where unchanged. - Do not change frontend API shapes or field names. - - Do not add case-scoped links to `CaseOut` in this phase. + - Keep simulation response contracts stable while making minimal changes required for case details. ## Tests @@ -51,8 +56,10 @@ Make existing simulation detail, simulation list, and assistant summary paths ex - duplicate `(kind, url)` across case and simulation appears once - simulation-owned link wins on duplicate URL - Update assistant tests to verify case-scoped diagnostic links are visible to summary generation and citations. +- Update case detail API and frontend tests to verify case-level diagnostic links render on case details page. - Run: - `make backend-test` + - `make frontend-lint` ## Risk diff --git a/backend/docs/174-zppy-links/phase-4-provenance-scanner.md b/backend/docs/174-zppy-links/phase-4-provenance-scanner.md index 4defa230..e62d50cb 100644 --- a/backend/docs/174-zppy-links/phase-4-provenance-scanner.md +++ b/backend/docs/174-zppy-links/phase-4-provenance-scanner.md @@ -23,7 +23,7 @@ Add standalone scanner that discovers zppy provenance cfg files from configured ## Approach 1. Mirror existing operational script structure. - - Base new script on patterns from `backend/app/scripts/ingestion/nersc_archive_ingestor.py`. + - Base new script on patterns from `backend/app/scripts/ingestion/nersc_upload_archive_ingestor.py`. - Reuse same style for config parsing, structured logs, dry-run handling, retry/backoff, and state persistence. 2. Discover provenance cfg files. From e1f03e808efec4207c8d6a94932c009dea20699f Mon Sep 17 00:00:00 2001 From: Vo Date: Thu, 4 Jun 2026 15:14:12 -0700 Subject: [PATCH 12/13] Add case-level diagnostic link aggregation --- backend/app/features/assistant/api.py | 4 +- backend/app/features/assistant/snapshot.py | 7 +- backend/app/features/simulation/api.py | 19 +- backend/app/features/simulation/link_utils.py | 30 +++ backend/app/features/simulation/schemas.py | 7 + ...04_130000_add_case_owned_external_links.py | 4 +- .../tests/features/assistant/test_service.py | 28 +++ .../tests/features/assistant/test_snapshot.py | 63 ++++- backend/tests/features/simulation/test_api.py | 225 ++++++++++++++++++ .../tests/features/simulation/test_schemas.py | 3 + .../features/simulations/CaseDetailsPage.tsx | 36 ++- frontend/src/types/simulation.ts | 1 + 12 files changed, 413 insertions(+), 14 deletions(-) create mode 100644 backend/app/features/simulation/link_utils.py diff --git a/backend/app/features/assistant/api.py b/backend/app/features/assistant/api.py index 7958661b..3b24b37a 100644 --- a/backend/app/features/assistant/api.py +++ b/backend/app/features/assistant/api.py @@ -12,7 +12,7 @@ from app.core.logger import _setup_custom_logger from app.features.assistant.orchestrator import generate_simulation_summary from app.features.assistant.schemas import SimulationSummaryResponse -from app.features.simulation.models import Simulation +from app.features.simulation.models import Case, Simulation from app.features.user.manager import optional_current_user from app.features.user.models import User @@ -42,7 +42,7 @@ async def summarize_simulation( stmt = ( select(Simulation) .options( - joinedload(Simulation.case), + joinedload(Simulation.case).selectinload(Case.links), joinedload(Simulation.machine), selectinload(Simulation.artifacts), selectinload(Simulation.links), diff --git a/backend/app/features/assistant/snapshot.py b/backend/app/features/assistant/snapshot.py index f99dd51a..d0435478 100644 --- a/backend/app/features/assistant/snapshot.py +++ b/backend/app/features/assistant/snapshot.py @@ -9,6 +9,7 @@ from pydantic import BaseModel, Field from app.core.config import settings +from app.features.simulation.link_utils import merge_simulation_and_case_links from app.features.simulation.models import Artifact, ExternalLink, Simulation SNAPSHOT_TRUNCATED_CAVEAT = ( @@ -201,6 +202,10 @@ def build_simulation_snapshot( *, max_chars: int | None = None, ) -> SimulationSnapshot: + merged_links = merge_simulation_and_case_links( + simulation.links, + simulation.case.links, + ) snapshot = SimulationSnapshot( simulation=SnapshotSimulationFields( id=str(simulation.id), @@ -240,7 +245,7 @@ def build_simulation_snapshot( else None ), artifacts=_sorted_artifacts(simulation.artifacts), - links=_sorted_links(simulation.links), + links=_sorted_links(merged_links), snapshot_caveats=[], ) diff --git a/backend/app/features/simulation/api.py b/backend/app/features/simulation/api.py index a6dda283..19a26c82 100644 --- a/backend/app/features/simulation/api.py +++ b/backend/app/features/simulation/api.py @@ -12,6 +12,7 @@ from app.features.ingestion.models import Ingestion from app.features.machine.models import Machine from app.features.simulation.enums import ExternalLinkKind +from app.features.simulation.link_utils import merge_simulation_and_case_links from app.features.simulation.models import Artifact, Case, ExternalLink, Simulation from app.features.simulation.schemas import ( CaseOut, @@ -121,7 +122,10 @@ def get_case(case_id: UUID, db: Session = Depends(get_database_session)) -> Case """ case = ( db.query(Case) - .options(selectinload(Case.simulations).selectinload(Simulation.machine)) + .options( + selectinload(Case.simulations).selectinload(Simulation.machine), + selectinload(Case.links), + ) .filter(Case.id == case_id) .first() ) @@ -129,12 +133,12 @@ def get_case(case_id: UUID, db: Session = Depends(get_database_session)) -> Case if not case: raise HTTPException(status_code=404, detail="Case not found") - resp = _case_to_out(case) + resp = _case_to_out(case, include_links=True) return resp -def _case_to_out(case: Case) -> CaseOut: +def _case_to_out(case: Case, *, include_links: bool = False) -> CaseOut: """Convert a Case ORM instance to CaseOut with nested SimulationSummaryOut. Parameters @@ -181,6 +185,7 @@ def _case_to_out(case: Case) -> CaseOut: simulations=summaries, machine_names=machine_names, hpc_usernames=hpc_usernames, + links=case.links if include_links else [], created_at=case.created_at, updated_at=case.updated_at, ) @@ -263,7 +268,7 @@ def create_simulation( sim_loaded = ( db.query(Simulation) .options( - joinedload(Simulation.case), + joinedload(Simulation.case).selectinload(Case.links), joinedload(Simulation.machine), selectinload(Simulation.artifacts), selectinload(Simulation.links), @@ -363,7 +368,7 @@ def list_simulations( in descending order. """ query = db.query(Simulation).options( - joinedload(Simulation.case), + joinedload(Simulation.case).selectinload(Case.links), joinedload(Simulation.machine), selectinload(Simulation.artifacts), selectinload(Simulation.links), @@ -487,7 +492,7 @@ def get_simulation(sim_id: UUID, db: Session = Depends(get_database_session)): sim = ( db.query(Simulation) .options( - joinedload(Simulation.case), + joinedload(Simulation.case).selectinload(Case.links), joinedload(Simulation.machine), selectinload(Simulation.artifacts), selectinload(Simulation.links), @@ -520,12 +525,14 @@ def _simulation_to_out(sim: Simulation) -> SimulationOut: """ case = sim.case llm_available = is_summary_llm_available() + merged_links = merge_simulation_and_case_links(sim.links, case.links) result = SimulationOut.model_validate( { **{k: v for k, v in sim.__dict__.items() if not k.startswith("_")}, "case_name": case.name, "case_group": case.case_group, + "links": merged_links, "summary_capabilities": SimulationSummaryCapabilitiesOut( llm_available=llm_available, auto_generate_deterministic_on_load=not llm_available, diff --git a/backend/app/features/simulation/link_utils.py b/backend/app/features/simulation/link_utils.py new file mode 100644 index 00000000..a8343393 --- /dev/null +++ b/backend/app/features/simulation/link_utils.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +from collections.abc import Iterable + +from app.features.simulation.models import ExternalLink + + +def merge_simulation_and_case_links( + simulation_links: Iterable[ExternalLink], + case_links: Iterable[ExternalLink], +) -> list[ExternalLink]: + """Merge simulation-owned and case-owned links with simulation precedence.""" + merged: list[ExternalLink] = [] + seen: set[tuple[str, str]] = set() + + for link in simulation_links: + key = (str(link.kind), link.url) + if key in seen: + continue + seen.add(key) + merged.append(link) + + for link in case_links: + key = (str(link.kind), link.url) + if key in seen: + continue + seen.add(key) + merged.append(link) + + return merged diff --git a/backend/app/features/simulation/schemas.py b/backend/app/features/simulation/schemas.py index 5a08bbf4..65ee6b28 100644 --- a/backend/app/features/simulation/schemas.py +++ b/backend/app/features/simulation/schemas.py @@ -409,6 +409,13 @@ class CaseOut(CamelOutBaseModel): description="Unique HPC usernames represented across this case's simulations.", ), ] + links: Annotated[ + list[ExternalLinkOut], + Field( + default_factory=list, + description="Optional list of external links associated with the case.", + ), + ] created_at: Annotated[ datetime, Field(..., description="Timestamp when the case was created") ] diff --git a/backend/migrations/versions/20260604_130000_add_case_owned_external_links.py b/backend/migrations/versions/20260604_130000_add_case_owned_external_links.py index 6a23bd1d..2346d33e 100644 --- a/backend/migrations/versions/20260604_130000_add_case_owned_external_links.py +++ b/backend/migrations/versions/20260604_130000_add_case_owned_external_links.py @@ -56,9 +56,7 @@ def downgrade() -> None: """Remove case ownership support for external links.""" connection = op.get_bind() case_owned_count = connection.execute( - sa.text( - "SELECT COUNT(*) FROM external_links WHERE case_id IS NOT NULL" - ) + sa.text("SELECT COUNT(*) FROM external_links WHERE case_id IS NOT NULL") ).scalar_one() if case_owned_count: raise RuntimeError( diff --git a/backend/tests/features/assistant/test_service.py b/backend/tests/features/assistant/test_service.py index 475ef65b..7deb4b34 100644 --- a/backend/tests/features/assistant/test_service.py +++ b/backend/tests/features/assistant/test_service.py @@ -210,6 +210,34 @@ def test_absent_diagnostics_adds_limitation_not_interpretation( "This summary uses only metadata already stored in SimBoard. It does not use retrieval, diagnostics interpretation, or LLM reasoning." ] + def test_case_owned_diagnostics_are_visible_to_summary_and_citations( + self, db: Session, normal_user_sync, admin_user_sync + ) -> None: + simulation = _create_simulation( + db, + normal_user_sync, + admin_user_sync, + execution_id="assistant-case-diagnostic", + with_diagnostics=False, + ) + db.add( + ExternalLink( + case_id=simulation.case_id, + kind=ExternalLinkKind.DIAGNOSTIC, + url="https://example.com/case-diagnostic", + label="Case diagnostic", + ) + ) + db.commit() + db.refresh(simulation) + + summary = build_simulation_summary(simulation) + + assert "SimBoard records 1 diagnostic link(s) for this run" in summary.answer + assert "links[kind=diagnostic]" in { + citation.path for citation in summary.citations + } + def test_snapshot_without_case_hash_omits_grouping_sentence(self) -> None: summary = build_simulation_summary( SimulationSnapshot( diff --git a/backend/tests/features/assistant/test_snapshot.py b/backend/tests/features/assistant/test_snapshot.py index 500d7e19..8f265873 100644 --- a/backend/tests/features/assistant/test_snapshot.py +++ b/backend/tests/features/assistant/test_snapshot.py @@ -1,4 +1,5 @@ from datetime import UTC, datetime +from uuid import uuid4 import pytest @@ -14,7 +15,12 @@ SnapshotSimulationFields, _SnapshotSizeBudget, ) -from app.features.simulation.enums import SimulationStatus +from app.features.simulation.enums import ( + ExternalLinkKind, + SimulationStatus, + SimulationType, +) +from app.features.simulation.models import Case, ExternalLink, Simulation def _make_snapshot() -> SimulationSnapshot: @@ -194,3 +200,58 @@ def test_apply_size_budget_raises_when_required_fields_too_large( assert exc_info.value.snapshot.simulation.known_issues is None assert exc_info.value.snapshot.simulation.extra == {} assert SNAPSHOT_TRUNCATED_CAVEAT in exc_info.value.snapshot.snapshot_caveats + + def test_build_snapshot_merges_case_links_with_simulation_precedence(self) -> None: + case = Case(id=uuid4(), name="snapshot-case", case_group="snapshot-group") + simulation = Simulation( + id=uuid4(), + case=case, + case_id=case.id, + execution_id="snapshot-exec", + compset="AQUAPLANET", + compset_alias="QPC4", + grid_name="f19_f19", + grid_resolution="1.9x2.5", + simulation_type=SimulationType.EXPERIMENTAL, + status=SimulationStatus.COMPLETED, + initialization_type="startup", + machine_id=uuid4(), + simulation_start_date=datetime(2024, 1, 1, tzinfo=UTC), + created_by=uuid4(), + last_updated_by=uuid4(), + ingestion_id=uuid4(), + extra={}, + ) + case.links = [ + ExternalLink( + case=case, + case_id=case.id, + kind=ExternalLinkKind.DIAGNOSTIC, + url="https://example.com/case-only", + label="Case only", + ), + ExternalLink( + case=case, + case_id=case.id, + kind=ExternalLinkKind.DIAGNOSTIC, + url="https://example.com/shared", + label="Case shared", + ), + ] + simulation.links = [ + ExternalLink( + simulation=simulation, + simulation_id=simulation.id, + kind=ExternalLinkKind.DIAGNOSTIC, + url="https://example.com/shared", + label="Simulation shared", + ) + ] + simulation.artifacts = [] + + snapshot = snapshot_module.build_simulation_snapshot(simulation) + + assert [(link.url, link.label) for link in snapshot.links] == [ + ("https://example.com/case-only", "Case only"), + ("https://example.com/shared", "Simulation shared"), + ] diff --git a/backend/tests/features/simulation/test_api.py b/backend/tests/features/simulation/test_api.py index 28c22eeb..2679c3db 100644 --- a/backend/tests/features/simulation/test_api.py +++ b/backend/tests/features/simulation/test_api.py @@ -323,6 +323,72 @@ def test_endpoint_returns_case_with_simulations( assert data["hpcUsernames"] == [] assert data["simulations"][0]["executionId"] == "case-detail-exec-1" assert data["simulations"][0]["caseHash"] == "detail-hash-1" + assert data["links"] == [] + + def test_endpoint_includes_case_level_diagnostic_links( + self, client, db: Session, normal_user_sync, admin_user_sync + ): + machine = db.query(Machine).first() + assert machine is not None + + case = _create_case(db, "test_case_detail_links") + + ingestion = Ingestion( + source_type=IngestionSourceType.BROWSER_UPLOAD, + source_reference="test_case_detail_links", + machine_id=machine.id, + triggered_by=normal_user_sync["id"], + status=IngestionStatus.SUCCESS, + created_count=1, + duplicate_count=0, + error_count=0, + ) + db.add(ingestion) + db.flush() + + db.add( + Simulation( + case_id=case.id, + execution_id="case-detail-links-exec-1", + case_hash="detail-links-hash-1", + compset="AQUAPLANET", + compset_alias="QPC4", + grid_name="f19_f19", + grid_resolution="1.9x2.5", + initialization_type="startup", + simulation_type="experimental", + status="created", + machine_id=machine.id, + simulation_start_date="2023-01-01T00:00:00Z", + created_by=normal_user_sync["id"], + last_updated_by=admin_user_sync["id"], + ingestion_id=ingestion.id, + ) + ) + db.flush() + db.add( + ExternalLink( + case_id=case.id, + kind=ExternalLinkKind.DIAGNOSTIC, + url="https://example.com/case-diagnostic", + label="Case diagnostic", + ) + ) + db.commit() + + res = client.get(f"{API_BASE}/cases/{case.id}") + assert res.status_code == 200 + data = res.json() + assert data["links"] == [ + { + "id": data["links"][0]["id"], + "kind": "diagnostic", + "url": "https://example.com/case-diagnostic", + "label": "Case diagnostic", + "createdAt": data["links"][0]["createdAt"], + "updatedAt": data["links"][0]["updatedAt"], + } + ] def test_endpoint_raises_404_if_case_not_found(self, client): res = client.get(f"{API_BASE}/cases/{uuid4()}") @@ -771,6 +837,86 @@ def test_filter_by_case_name_and_case_group( assert data[0]["caseName"] == "combo_case" assert data[0]["caseGroup"] == "combo_group" + def test_list_merges_case_owned_diagnostic_links_without_duplicates( + self, client, db: Session, normal_user_sync, admin_user_sync, monkeypatch + ): + monkeypatch.setattr(settings, "assistant_llm_enabled", False) + machine = db.query(Machine).first() + assert machine is not None + + case = _create_case(db, "test_case_list_links") + + ingestion = Ingestion( + source_type=IngestionSourceType.BROWSER_UPLOAD, + source_reference="test_case_list_links", + machine_id=machine.id, + triggered_by=normal_user_sync["id"], + status=IngestionStatus.SUCCESS, + created_count=1, + duplicate_count=0, + error_count=0, + ) + db.add(ingestion) + db.flush() + + sim = Simulation( + case_id=case.id, + execution_id="list-links-exec-1", + compset="AQUAPLANET", + compset_alias="QPC4", + grid_name="f19_f19", + grid_resolution="1.9x2.5", + initialization_type="startup", + simulation_type="experimental", + status="created", + machine_id=machine.id, + simulation_start_date="2023-01-01T00:00:00Z", + created_by=normal_user_sync["id"], + last_updated_by=admin_user_sync["id"], + ingestion_id=ingestion.id, + ) + db.add(sim) + db.flush() + db.add_all( + [ + ExternalLink( + case_id=case.id, + kind=ExternalLinkKind.DIAGNOSTIC, + url="https://example.com/case-only-diagnostic", + label="Case-only diagnostic", + ), + ExternalLink( + case_id=case.id, + kind=ExternalLinkKind.DIAGNOSTIC, + url="https://example.com/shared-diagnostic", + label="Case shared diagnostic", + ), + ExternalLink( + simulation_id=sim.id, + kind=ExternalLinkKind.DIAGNOSTIC, + url="https://example.com/shared-diagnostic", + label="Simulation shared diagnostic", + ), + ] + ) + db.commit() + + res = client.get(f"{API_BASE}/simulations") + assert res.status_code == 200 + data = res.json() + assert len(data) == 1 + + links_by_url = {link["url"]: link for link in data[0]["links"]} + assert set(links_by_url) == { + "https://example.com/case-only-diagnostic", + "https://example.com/shared-diagnostic", + } + assert ( + links_by_url["https://example.com/shared-diagnostic"]["label"] + == "Simulation shared diagnostic" + ) + assert data[0]["groupedLinks"]["diagnostic"][0]["kind"] == "diagnostic" + class TestGetSimulation: def test_endpoint_succeeds_with_valid_id( @@ -894,6 +1040,85 @@ def test_endpoint_raises_404_if_simulation_not_found(self, client): assert res.status_code == 404 assert res.json() == {"detail": "Simulation not found"} + def test_endpoint_merges_case_owned_diagnostic_links_with_simulation_precedence( + self, client, db: Session, normal_user_sync, admin_user_sync, monkeypatch + ): + monkeypatch.setattr(settings, "assistant_llm_enabled", False) + machine = db.query(Machine).first() + assert machine is not None + + case = _create_case(db, "test_case_get_links") + + ingestion = Ingestion( + source_type=IngestionSourceType.BROWSER_UPLOAD, + source_reference="test_simulation_get_links", + machine_id=machine.id, + triggered_by=normal_user_sync["id"], + status=IngestionStatus.SUCCESS, + created_count=1, + duplicate_count=0, + error_count=0, + ) + db.add(ingestion) + db.flush() + + sim = Simulation( + case_id=case.id, + execution_id="get-links-exec-1", + compset="AQUAPLANET", + compset_alias="QPC4", + grid_name="f19_f19", + grid_resolution="1.9x2.5", + initialization_type="startup", + simulation_type="experimental", + status="created", + machine_id=machine.id, + simulation_start_date="2023-01-01T00:00:00Z", + created_by=normal_user_sync["id"], + last_updated_by=admin_user_sync["id"], + ingestion_id=ingestion.id, + ) + db.add(sim) + db.flush() + db.add_all( + [ + ExternalLink( + case_id=case.id, + kind=ExternalLinkKind.DIAGNOSTIC, + url="https://example.com/case-diagnostic-only", + label="Case diagnostic only", + ), + ExternalLink( + case_id=case.id, + kind=ExternalLinkKind.DIAGNOSTIC, + url="https://example.com/shared-diagnostic-detail", + label="Case duplicate", + ), + ExternalLink( + simulation_id=sim.id, + kind=ExternalLinkKind.DIAGNOSTIC, + url="https://example.com/shared-diagnostic-detail", + label="Simulation duplicate", + ), + ] + ) + db.commit() + db.refresh(sim) + + res = client.get(f"{API_BASE}/simulations/{sim.id}") + assert res.status_code == 200 + data = res.json() + + links_by_url = {link["url"]: link for link in data["links"]} + assert set(links_by_url) == { + "https://example.com/case-diagnostic-only", + "https://example.com/shared-diagnostic-detail", + } + assert ( + links_by_url["https://example.com/shared-diagnostic-detail"]["label"] + == "Simulation duplicate" + ) + class TestSimulationBrowserIncludesCaseMetadata: def test_simulation_list_includes_case_name_and_id( diff --git a/backend/tests/features/simulation/test_schemas.py b/backend/tests/features/simulation/test_schemas.py index f458e102..bfaaee9e 100644 --- a/backend/tests/features/simulation/test_schemas.py +++ b/backend/tests/features/simulation/test_schemas.py @@ -531,6 +531,7 @@ def test_case_out_with_nested_simulations(self): ], machine_names=["chrysalis"], hpc_usernames=["ac.tvo"], + links=[], created_at=datetime(2023, 1, 1, 0, 0, 0), updated_at=datetime(2023, 1, 2, 0, 0, 0), ) @@ -550,9 +551,11 @@ def test_case_out_empty_simulations(self): simulations=[], machine_names=[], hpc_usernames=[], + links=[], created_at=datetime(2023, 1, 1, 0, 0, 0), updated_at=datetime(2023, 1, 2, 0, 0, 0), ) assert case_out.simulations == [] assert case_out.machine_names == [] assert case_out.hpc_usernames == [] + assert case_out.links == [] diff --git a/frontend/src/features/simulations/CaseDetailsPage.tsx b/frontend/src/features/simulations/CaseDetailsPage.tsx index 4f8bc04e..52e14448 100644 --- a/frontend/src/features/simulations/CaseDetailsPage.tsx +++ b/frontend/src/features/simulations/CaseDetailsPage.tsx @@ -30,7 +30,7 @@ import { } from '@/features/simulations/caseUtils'; import { useCase } from '@/features/simulations/hooks/useCase'; import { toast } from '@/hooks/use-toast'; -import type { SimulationOut, SimulationSummaryOut } from '@/types'; +import type { ExternalLinkOut, SimulationOut, SimulationSummaryOut } from '@/types'; const DetailField = ({ label, @@ -140,6 +140,19 @@ const getGroupRunDateWindow = (simulations: GroupSimulation[]) => { const countDistinctValues = (values: string[]) => new Set(values).size; +const renderExternalLink = (link: ExternalLinkOut) => ( +
  • + + {link.label || link.url} + +
  • +); + export const CaseDetailsPage = ({ simulations: allSimulations, selectedSimulationIds, @@ -337,6 +350,7 @@ export const CaseDetailsPage = ({ } const machineSummary = summarizeValues(caseRecord.machineNames); const hpcUsernameSummary = summarizeValues(caseRecord.hpcUsernames); + const diagnosticLinks = caseRecord.links.filter((link) => link.kind === 'diagnostic'); const isCompareButtonDisabled = selectedSimulationIds.length < 2; const filteredExecutionCount = filteredFlatSimulations.length; const activeSimulationCount = @@ -479,6 +493,26 @@ export const CaseDetailsPage = ({ +
    +
    +

    Diagnostics

    +

    + Case-level diagnostic links attached directly to this case. +

    +
    + + + {diagnosticLinks.length > 0 ? ( +
      {diagnosticLinks.map(renderExternalLink)}
    + ) : ( +

    + No case-level diagnostic links are recorded yet. +

    + )} +
    +
    +
    +
    diff --git a/frontend/src/types/simulation.ts b/frontend/src/types/simulation.ts index 6f69fd71..85706048 100644 --- a/frontend/src/types/simulation.ts +++ b/frontend/src/types/simulation.ts @@ -51,6 +51,7 @@ export interface CaseOut { simulations: SimulationSummaryOut[]; machineNames: string[]; hpcUsernames: string[]; + links: ExternalLinkOut[]; createdAt: string; updatedAt: string; } From 77cd47c429786e63f5d5be36e9b18a6445fd1bcc Mon Sep 17 00:00:00 2001 From: Vo Date: Thu, 4 Jun 2026 15:42:20 -0700 Subject: [PATCH 13/13] Refine case resources section UI --- .../features/simulations/CaseDetailsPage.tsx | 95 ++++++++++++------- 1 file changed, 62 insertions(+), 33 deletions(-) diff --git a/frontend/src/features/simulations/CaseDetailsPage.tsx b/frontend/src/features/simulations/CaseDetailsPage.tsx index 52e14448..de9e3b7c 100644 --- a/frontend/src/features/simulations/CaseDetailsPage.tsx +++ b/frontend/src/features/simulations/CaseDetailsPage.tsx @@ -1,4 +1,4 @@ -import { ArrowLeft, ChevronDown, Info, Search, Share2 } from 'lucide-react'; +import { ArrowLeft, ChevronDown, ExternalLink, Info, Search, Share2 } from 'lucide-react'; import { Fragment, useEffect, useMemo, useState } from 'react'; import { Link, useLocation, useNavigate, useParams } from 'react-router-dom'; @@ -86,6 +86,20 @@ const formatGroupSimulationWindow = (simulations: SimulationSummaryOut[]) => { const pluralize = (count: number, singular: string, plural = `${singular}s`) => `${count} ${count === 1 ? singular : plural}`; +const RESOURCE_GROUP_LABELS: Record = { + diagnostic: 'Diagnostics', + performance: 'Performance', + docs: 'Documentation', + other: 'Other resources', +}; + +const RESOURCE_KIND_DESCRIPTIONS: Record = { + diagnostic: 'zppy diagnostic output', + performance: 'performance output', + docs: 'linked documentation', + other: 'linked resource', +}; + interface CaseDetailsPageProps { simulations: SimulationOut[]; selectedSimulationIds: string[]; @@ -140,17 +154,34 @@ const getGroupRunDateWindow = (simulations: GroupSimulation[]) => { const countDistinctValues = (values: string[]) => new Set(values).size; -const renderExternalLink = (link: ExternalLinkOut) => ( -
  • - - {link.label || link.url} - -
  • +const renderResourceLink = (link: ExternalLinkOut) => ( + +
    +
    +

    + {link.label || link.url} +

    + + {RESOURCE_GROUP_LABELS[link.kind]} + +
    +

    {RESOURCE_KIND_DESCRIPTIONS[link.kind]}

    +
    +
    + + Opens in a new tab +
    +
    ); export const CaseDetailsPage = ({ @@ -350,7 +381,8 @@ export const CaseDetailsPage = ({ } const machineSummary = summarizeValues(caseRecord.machineNames); const hpcUsernameSummary = summarizeValues(caseRecord.hpcUsernames); - const diagnosticLinks = caseRecord.links.filter((link) => link.kind === 'diagnostic'); + const resourceLinks = caseRecord.links; + const resourceCount = caseRecord.links.length; const isCompareButtonDisabled = selectedSimulationIds.length < 2; const filteredExecutionCount = filteredFlatSimulations.length; const activeSimulationCount = @@ -489,29 +521,26 @@ export const CaseDetailsPage = ({
    - - - -
    -
    -

    Diagnostics

    -

    - Case-level diagnostic links attached directly to this case. -

    -
    - - - {diagnosticLinks.length > 0 ? ( -
      {diagnosticLinks.map(renderExternalLink)}
    - ) : ( -

    - No case-level diagnostic links are recorded yet. -

    - )} +
    +
    +
    +

    Resources

    + {resourceCount > 0 ? ( +

    ({resourceCount})

    + ) : null} +
    + + {resourceLinks.length > 0 ? ( +
    {resourceLinks.map(renderResourceLink)}
    + ) : ( +

    No linked resources yet.

    + )} +
    +
    -
    +