SemiAnalysisAI · rafaykhan-source · May 5, 2026 · May 13, 2026 · May 13, 2026 · May 13, 2026
diff --git a/packages/app/cypress/e2e/reproduce-drawer.cy.ts b/packages/app/cypress/e2e/reproduce-drawer.cy.ts
@@ -0,0 +1,125 @@
+/**
+ * Tests for the Reproduce drawer — opens from the inference table row,
+ * scatter pinned tooltip, and GPU graph tooltip. Verifies drawer state is
+ * URL-safe (closing does not perturb chart zoom or query string).
+ */
+describe('Reproduce drawer', () => {
+  beforeEach(() => {
+    cy.window().then((win) => {
+      win.localStorage.setItem('inferencex-star-modal-dismissed', String(Date.now()));
+    });
+    cy.visit('/inference');
+    cy.get('[data-testid="scatter-graph"]')
+      .first()
+      .find('svg .dot-group')
+      .should('have.length.greaterThan', 0);
+  });
+
+  it('opens from clicking an inference table row and shows the three tabs', () => {
+    cy.get('[data-testid="inference-table-view-btn"]').first().click();
+    cy.get('[data-testid="inference-results-table"]').should('be.visible');
+    cy.get('[data-testid="inference-results-table"] tbody tr').first().click();
+
+    cy.get('[data-testid="reproduce-drawer"]').should('be.visible');
+    cy.contains('Reproduce this benchmark').should('be.visible');
+    cy.contains('button', 'Command').should('be.visible');
+    cy.contains('button', 'Config JSON').should('be.visible');
+    cy.contains('button', 'Environment').should('be.visible');
+  });
+
+  it('exposes a copy button on every tab', () => {
+    cy.get('[data-testid="inference-table-view-btn"]').first().click();
+    cy.get('[data-testid="inference-results-table"] tbody tr').first().click();
+    cy.get('[data-testid="reproduce-drawer-copy"]').should('be.visible');
+    cy.contains('button', 'Config JSON').click();
+    cy.get('[data-testid="reproduce-drawer-copy"]').should('be.visible');
+    cy.contains('button', 'Environment').click();
+    cy.get('[data-testid="reproduce-drawer-copy"]').should('be.visible');
+  });
+
+  it('Config JSON tab shows config fields and excludes result metrics', () => {
+    cy.get('[data-testid="inference-table-view-btn"]').first().click();
+    cy.get('[data-testid="inference-results-table"] tbody tr').first().click();
+    cy.contains('button', 'Config JSON').click();
+    cy.get('[data-testid="reproduce-drawer"]')
+      .find('pre')
+      .first()
+      .invoke('text')
+      .then((text) => {
+        // Config / identity fields belong here.
+        expect(text).to.match(/"framework":/u);
+        expect(text).to.match(/"precision":/u);
+        expect(text).to.match(/"tp":/u);
+        // Raw result metrics from `benchmark_results.metrics` must NOT leak in.
+        expect(text).not.to.match(/"mean_ttft":/u);
+        expect(text).not.to.match(/"p99_e2el":/u);
+        expect(text).not.to.match(/"tput_per_gpu":/u);
+      });
+  });
+
+  it('Environment tab renders structured rows including env-only fields with graceful fallback', () => {
+    cy.get('[data-testid="inference-table-view-btn"]').first().click();
+    cy.get('[data-testid="inference-results-table"] tbody tr').first().click();
+    cy.contains('button', 'Environment').click();
+    // Core rows are always rendered. The values come from /api/v1/run-environment
+    // when available; otherwise they show "(not recorded)" — we assert the
+    // labels exist either way so a regression that drops a row is caught.
+    const labels = [
+      'GPU',
+      'GPU SKU',
+      'Framework',
+      'Framework version',
+      'Framework SHA',
+      'Container image',
+      'Driver',
+      'CUDA',
+      'PyTorch',
+      'Python',
+    ];
+    for (const label of labels) {
+      cy.get('[data-testid="reproduce-drawer"]').contains('dt', label).should('be.visible');
+    }
+  });
+
+  it('Esc closes the drawer without changing the URL hash', () => {
+    cy.get('[data-testid="inference-table-view-btn"]').first().click();
+    cy.url().then((before) => {
+      cy.get('[data-testid="inference-results-table"] tbody tr').first().click();
+      cy.get('[data-testid="reproduce-drawer"]').should('be.visible');
+      cy.get('body').type('{esc}');
+      cy.get('[data-testid="reproduce-drawer"]').should('not.exist');
+      cy.url().should('eq', before);
+    });
+  });
+
+  it('renders correctly for an unofficial-run overlay row when one is loaded', () => {
+    // Re-visit with the overlay query param. We do NOT assert which row is
+    // rendered — we only assert the drawer can be opened from whatever points
+    // appear for the official path on top of the overlay. The wiring is the
+    // same code path: clicking an inference table row feeds the InferenceData
+    // through to the drawer regardless of where the row originated.
+    const candidateRunId = '15000000000';
+    cy.visit(`/inference?unofficialrun=${candidateRunId}`);
+    cy.get('[data-testid="scatter-graph"]')
+      .first()
+      .find('svg .dot-group')
+      .should('have.length.greaterThan', 0);
+    cy.get('[data-testid="inference-table-view-btn"]').first().click();
+    cy.get('[data-testid="inference-results-table"]').should('be.visible');
+    cy.get('[data-testid="inference-results-table"] tbody tr').first().click();
+    cy.get('[data-testid="reproduce-drawer"]').should('be.visible');
+    // Same Config JSON guarantee for the overlay path — the drawer renders
+    // overlay points through the same `InferenceData` shape, so result-metric
+    // leakage would silently regress there too if we didn't assert it.
+    cy.contains('button', 'Config JSON').click();
+    cy.get('[data-testid="reproduce-drawer"]')
+      .find('pre')
+      .first()
+      .invoke('text')
+      .then((text) => {
+        expect(text).to.match(/"framework":/u);
+        expect(text).not.to.match(/"mean_ttft":/u);
+        expect(text).not.to.match(/"tput_per_gpu":/u);
+      });
+  });
+});
diff --git a/packages/app/cypress/fixtures/api/run-environment.json b/packages/app/cypress/fixtures/api/run-environment.json
@@ -0,0 +1,17 @@
+{
+  "workflow_run_id": 1,
+  "config_id": 1,
+  "environment": {
+    "source": "log_parse",
+    "image": null,
+    "framework_version": null,
+    "framework_sha": null,
+    "torch_version": null,
+    "python_version": null,
+    "cuda_version": null,
+    "rocm_version": null,
+    "driver_version": null,
+    "gpu_sku": null,
+    "extra": {}
+  }
+}
diff --git a/packages/app/cypress/support/mock-data.ts b/packages/app/cypress/support/mock-data.ts
@@ -250,6 +250,9 @@ export function createMockInferenceContext(
     activePresetId: null,
     setActivePresetId: namedStub('setActivePresetId'),
     presetGuardRef: { current: false } as React.RefObject<boolean>,
+    reproducePoint: null,
+    openReproduceDrawer: namedStub('openReproduceDrawer'),
+    closeReproduceDrawer: namedStub('closeReproduceDrawer'),
     ...overrides,
   };
 }

diff --git a/packages/app/scripts/capture-cypress-fixtures.ts b/packages/app/scripts/capture-cypress-fixtures.ts
@@ -3,7 +3,8 @@
  *
  * Hits the public production API by default and writes one JSON file per
  * endpoint into cypress/fixtures/api/. The cypress e2e suite uses these
- * fixtures via cy.intercept so tests run with no database.
+ * fixtures via server-side `FIXTURES_MODE` (E2E_FIXTURES=1) so tests run
+ * with no database.
  *
  * Usage:
  *   pnpm --filter app capture:fixtures                              (prod)
@@ -154,6 +155,11 @@ async function main() {
     precision: string;
     isl: number;
     osl: number;
+    // Optional: only present after the env-key PR ships. The capture script
+    // uses these to fetch a representative `/api/v1/run-environment` response;
+    // the route uses them as its sole identifier.
+    workflow_run_id?: number;
+    config_id?: number;
   }
   const benchmarks = await fetchJson<BenchmarkRow[]>(
     `/api/v1/benchmarks?model=${encodeURIComponent(BENCHMARK_MODEL)}`,
@@ -188,6 +194,51 @@ async function main() {
     `/api/v1/workflow-info?date=${encodeURIComponent(latestDate)}`,
   );
 
+  // run-environment: fired by `useRunEnvironment` every time the Reproduce
+  // drawer opens. We need a fixture so cypress' fixture mode doesn't 500.
+  // Try to pull a real one from prod, falling back to an all-nulls /
+  // log_parse placeholder. The placeholder is the worst-case end-state the
+  // drawer is designed to render (every env-only field shows "(not
+  // recorded)" with the "Some fields are approximated…" hint), so it's
+  // production-realistic even before the upstream env.json artifact lands.
+  const RUN_ENV_PLACEHOLDER = {
+    workflow_run_id: 1,
+    config_id: 1,
+    environment: {
+      source: 'log_parse',
+      image: null,
+      framework_version: null,
+      framework_sha: null,
+      torch_version: null,
+      python_version: null,
+      cuda_version: null,
+      rocm_version: null,
+      driver_version: null,
+      gpu_sku: null,
+      extra: {},
+    },
+  };
+  let runEnvironment: unknown = RUN_ENV_PLACEHOLDER;
+  const sampleRow = benchmarks.find((b) => b.workflow_run_id && b.config_id);
+  if (sampleRow) {
+    const envUrl =
+      `${baseUrl}/api/v1/run-environment` +
+      `?workflow_run_id=${sampleRow.workflow_run_id}` +
+      `&config_id=${sampleRow.config_id}`;
+    try {
+      const res = await fetch(envUrl);
+      if (res.ok) runEnvironment = await res.json();
+    } catch {
+      // Network or parse failure — keep the placeholder; logged below.
+    }
+  }
+  if (runEnvironment === RUN_ENV_PLACEHOLDER) {
+    console.log(
+      '  (note) run-environment: using placeholder — either prod predates the env PR, ' +
+        'the benchmark_environments table is empty, or the route is unavailable.',
+    );
+  }
+
   const N = TOP_DATES_PER_PARTITION;
   const sizes: [string, number][] = [
     [
@@ -250,6 +301,7 @@ async function main() {
       }),
     ],
     ['workflow-info', await writeFixture('workflow-info', workflowInfo)],
+    ['run-environment', await writeFixture('run-environment', runEnvironment)],
   ];
 
   for (const [name, bytes] of sizes) {

diff --git a/packages/app/src/app/api/v1/run-environment/route.test.ts b/packages/app/src/app/api/v1/run-environment/route.test.ts
@@ -0,0 +1,162 @@
+import { describe, expect, it, vi, beforeEach } from 'vitest';
+
+const { mockGetEnvironment, mockGetDb } = vi.hoisted(() => ({
+  mockGetEnvironment: vi.fn(),
+  mockGetDb: vi.fn(() => 'mock-sql'),
+}));
+
+vi.mock('@semianalysisai/inferencex-db/connection', () => ({
+  getDb: mockGetDb,
+  JSON_MODE: false,
+  FIXTURES_MODE: false,
+}));
+
+vi.mock('@semianalysisai/inferencex-db/queries/environments', () => ({
+  getEnvironmentForRunConfig: mockGetEnvironment,
+}));
+
+vi.mock('@/lib/api-cache', () => ({
+  cachedQuery: (fn: (...args: any[]) => any) => fn,
+  cachedJson: (data: unknown) => Response.json(data),
+}));
+
+import { GET } from './route';
+import { NextRequest } from 'next/server';
+
+function req(url: string): NextRequest {
+  return new NextRequest(new URL(url, 'http://localhost'));
+}
+
+beforeEach(() => {
+  vi.clearAllMocks();
+});
+
+const env = {
+  source: 'env_json' as const,
+  image: 'lmsysorg/sglang:latest',
+  framework_version: '0.4.3.post2',
+  framework_sha: 'e136d70cdc6101007017c05d57fb4cec5d6ed98f',
+  torch_version: '2.5.1+cu124',
+  python_version: '3.12.7',
+  cuda_version: '12.4',
+  rocm_version: null,
+  driver_version: '560.35.03',
+  gpu_sku: 'NVIDIA H100 80GB HBM3',
+  extra: {},
+};
+
+const VALID_QS = 'workflow_run_id=101&config_id=42';
+
+describe('GET /api/v1/run-environment', () => {
+  it('returns 400 when workflow_run_id is missing', async () => {
+    const res = await GET(req('/api/v1/run-environment?config_id=42'));
+    expect(res.status).toBe(400);
+  });
+
+  it('returns 400 when config_id is missing', async () => {
+    const res = await GET(req('/api/v1/run-environment?workflow_run_id=101'));
+    expect(res.status).toBe(400);
+  });
+
+  it('returns 400 when params are non-numeric', async () => {
+    const res = await GET(req('/api/v1/run-environment?workflow_run_id=abc&config_id=xyz'));
+    expect(res.status).toBe(400);
+  });
+
+  it('returns 404 when no environment row exists', async () => {
+    mockGetEnvironment.mockResolvedValueOnce(null);
+    const res = await GET(req(`/api/v1/run-environment?${VALID_QS}`));
+    expect(res.status).toBe(404);
+  });
+
+  it('returns env_json environment for valid (workflow_run_id, config_id)', async () => {
+    mockGetEnvironment.mockResolvedValueOnce(env);
+    const res = await GET(req(`/api/v1/run-environment?${VALID_QS}`));
+    expect(res.status).toBe(200);
+    const body = await res.json();
+    expect(body).toEqual({ workflow_run_id: 101, config_id: 42, environment: env });
+    expect(mockGetEnvironment).toHaveBeenCalledWith('mock-sql', 101, 42);
+  });
+
+  it('returns log_parse environment with nulls preserved', async () => {
+    mockGetEnvironment.mockResolvedValueOnce({
+      ...env,
+      source: 'log_parse',
+      framework_sha: null,
+      driver_version: null,
+      cuda_version: null,
+      gpu_sku: null,
+    });
+    const res = await GET(req(`/api/v1/run-environment?${VALID_QS}`));
+    expect(res.status).toBe(200);
+    const body = await res.json();
+    expect(body.environment.source).toBe('log_parse');
+    expect(body.environment.framework_sha).toBeNull();
+    expect(body.environment.driver_version).toBeNull();
+  });
+
+  it('returns 500 when query throws', async () => {
+    mockGetEnvironment.mockRejectedValueOnce(new Error('Connection reset'));
+    const res = await GET(req(`/api/v1/run-environment?${VALID_QS}`));
+    expect(res.status).toBe(500);
+  });
+});
+
+// Separate suite because FIXTURES_MODE is read at module-eval time — the only
+// way to flip it for a single test is to reset module cache + dynamic-import.
+describe('GET /api/v1/run-environment (FIXTURES_MODE)', () => {
+  it('short-circuits to the loaded fixture and never hits the env query', async () => {
+    vi.resetModules();
+    const mockLoadFixture = vi.fn(() => ({
+      workflow_run_id: 1,
+      config_id: 1,
+      environment: { ...env, source: 'log_parse' },
+    }));
+    vi.doMock('@semianalysisai/inferencex-db/connection', () => ({
+      getDb: mockGetDb,
+      JSON_MODE: false,
+      FIXTURES_MODE: true,
+    }));
+    vi.doMock('@semianalysisai/inferencex-db/queries/environments', () => ({
+      getEnvironmentForRunConfig: mockGetEnvironment,
+    }));
+    vi.doMock('@/lib/api-cache', () => ({
+      cachedQuery: (fn: (...args: any[]) => any) => fn,
+      cachedJson: (data: unknown) => Response.json(data),
+    }));
+    vi.doMock('@/lib/test-fixtures', () => ({ loadFixture: mockLoadFixture }));
+
+    const { GET: GETwithFixtures } = await import('./route');
+    const res = await GETwithFixtures(req(`/api/v1/run-environment?${VALID_QS}`));
+
+    expect(res.status).toBe(200);
+    const body = await res.json();
+    expect(body.environment.source).toBe('log_parse');
+    expect(mockLoadFixture).toHaveBeenCalledWith('run-environment');
+    expect(mockGetEnvironment).not.toHaveBeenCalled();
+  });
+
+  it('still 400s on missing params before consulting the fixture', async () => {
+    vi.resetModules();
+    const mockLoadFixture = vi.fn();
+    vi.doMock('@semianalysisai/inferencex-db/connection', () => ({
+      getDb: mockGetDb,
+      JSON_MODE: false,
+      FIXTURES_MODE: true,
+    }));
+    vi.doMock('@semianalysisai/inferencex-db/queries/environments', () => ({
+      getEnvironmentForRunConfig: mockGetEnvironment,
+    }));
+    vi.doMock('@/lib/api-cache', () => ({
+      cachedQuery: (fn: (...args: any[]) => any) => fn,
+      cachedJson: (data: unknown) => Response.json(data),
+    }));
+    vi.doMock('@/lib/test-fixtures', () => ({ loadFixture: mockLoadFixture }));
+
+    const { GET: GETwithFixtures } = await import('./route');
+    const res = await GETwithFixtures(req('/api/v1/run-environment?config_id=42'));
+
+    expect(res.status).toBe(400);
+    expect(mockLoadFixture).not.toHaveBeenCalled();
+  });
+});