Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 125 additions & 0 deletions packages/app/cypress/e2e/reproduce-drawer.cy.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
/**
* Tests for the Reproduce drawer — opens from the inference table row,
* scatter pinned tooltip, and GPU graph tooltip. Verifies drawer state is
* URL-safe (closing does not perturb chart zoom or query string).
*/
describe('Reproduce drawer', () => {
beforeEach(() => {
cy.window().then((win) => {
win.localStorage.setItem('inferencex-star-modal-dismissed', String(Date.now()));
});
cy.visit('/inference');
cy.get('[data-testid="scatter-graph"]')
.first()
.find('svg .dot-group')
.should('have.length.greaterThan', 0);
});

it('opens from clicking an inference table row and shows the three tabs', () => {
cy.get('[data-testid="inference-table-view-btn"]').first().click();
cy.get('[data-testid="inference-results-table"]').should('be.visible');
cy.get('[data-testid="inference-results-table"] tbody tr').first().click();

cy.get('[data-testid="reproduce-drawer"]').should('be.visible');
cy.contains('Reproduce this benchmark').should('be.visible');
cy.contains('button', 'Command').should('be.visible');
cy.contains('button', 'Config JSON').should('be.visible');
cy.contains('button', 'Environment').should('be.visible');
});

it('exposes a copy button on every tab', () => {
cy.get('[data-testid="inference-table-view-btn"]').first().click();
cy.get('[data-testid="inference-results-table"] tbody tr').first().click();
cy.get('[data-testid="reproduce-drawer-copy"]').should('be.visible');
cy.contains('button', 'Config JSON').click();
cy.get('[data-testid="reproduce-drawer-copy"]').should('be.visible');
cy.contains('button', 'Environment').click();
cy.get('[data-testid="reproduce-drawer-copy"]').should('be.visible');
});

it('Config JSON tab shows config fields and excludes result metrics', () => {
cy.get('[data-testid="inference-table-view-btn"]').first().click();
cy.get('[data-testid="inference-results-table"] tbody tr').first().click();
cy.contains('button', 'Config JSON').click();
cy.get('[data-testid="reproduce-drawer"]')
.find('pre')
.first()
.invoke('text')
.then((text) => {
// Config / identity fields belong here.
expect(text).to.match(/"framework":/u);
expect(text).to.match(/"precision":/u);
expect(text).to.match(/"tp":/u);
// Raw result metrics from `benchmark_results.metrics` must NOT leak in.
expect(text).not.to.match(/"mean_ttft":/u);
expect(text).not.to.match(/"p99_e2el":/u);
expect(text).not.to.match(/"tput_per_gpu":/u);
});
});

it('Environment tab renders structured rows including env-only fields with graceful fallback', () => {
cy.get('[data-testid="inference-table-view-btn"]').first().click();
cy.get('[data-testid="inference-results-table"] tbody tr').first().click();
cy.contains('button', 'Environment').click();
// Core rows are always rendered. The values come from /api/v1/run-environment
// when available; otherwise they show "(not recorded)" — we assert the
// labels exist either way so a regression that drops a row is caught.
const labels = [
'GPU',
'GPU SKU',
'Framework',
'Framework version',
'Framework SHA',
'Container image',
'Driver',
'CUDA',
'PyTorch',
'Python',
];
for (const label of labels) {
cy.get('[data-testid="reproduce-drawer"]').contains('dt', label).should('be.visible');
}
});

it('Esc closes the drawer without changing the URL hash', () => {
cy.get('[data-testid="inference-table-view-btn"]').first().click();
cy.url().then((before) => {
cy.get('[data-testid="inference-results-table"] tbody tr').first().click();
cy.get('[data-testid="reproduce-drawer"]').should('be.visible');
cy.get('body').type('{esc}');
cy.get('[data-testid="reproduce-drawer"]').should('not.exist');
cy.url().should('eq', before);
});
});

it('renders correctly for an unofficial-run overlay row when one is loaded', () => {
// Re-visit with the overlay query param. We do NOT assert which row is
// rendered — we only assert the drawer can be opened from whatever points
// appear for the official path on top of the overlay. The wiring is the
// same code path: clicking an inference table row feeds the InferenceData
// through to the drawer regardless of where the row originated.
const candidateRunId = '15000000000';
cy.visit(`/inference?unofficialrun=${candidateRunId}`);
cy.get('[data-testid="scatter-graph"]')
.first()
.find('svg .dot-group')
.should('have.length.greaterThan', 0);
cy.get('[data-testid="inference-table-view-btn"]').first().click();
cy.get('[data-testid="inference-results-table"]').should('be.visible');
cy.get('[data-testid="inference-results-table"] tbody tr').first().click();
cy.get('[data-testid="reproduce-drawer"]').should('be.visible');
// Same Config JSON guarantee for the overlay path — the drawer renders
// overlay points through the same `InferenceData` shape, so result-metric
// leakage would silently regress there too if we didn't assert it.
cy.contains('button', 'Config JSON').click();
cy.get('[data-testid="reproduce-drawer"]')
.find('pre')
.first()
.invoke('text')
.then((text) => {
expect(text).to.match(/"framework":/u);
expect(text).not.to.match(/"mean_ttft":/u);
expect(text).not.to.match(/"tput_per_gpu":/u);
});
});
});
17 changes: 17 additions & 0 deletions packages/app/cypress/fixtures/api/run-environment.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"workflow_run_id": 1,
"config_id": 1,
"environment": {
"source": "log_parse",
"image": null,
"framework_version": null,
"framework_sha": null,
"torch_version": null,
"python_version": null,
"cuda_version": null,
"rocm_version": null,
"driver_version": null,
"gpu_sku": null,
"extra": {}
}
}
3 changes: 3 additions & 0 deletions packages/app/cypress/support/mock-data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,9 @@ export function createMockInferenceContext(
activePresetId: null,
setActivePresetId: namedStub('setActivePresetId'),
presetGuardRef: { current: false } as React.RefObject<boolean>,
reproducePoint: null,
openReproduceDrawer: namedStub('openReproduceDrawer'),
closeReproduceDrawer: namedStub('closeReproduceDrawer'),
...overrides,
};
}
Expand Down
54 changes: 53 additions & 1 deletion packages/app/scripts/capture-cypress-fixtures.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
*
* Hits the public production API by default and writes one JSON file per
* endpoint into cypress/fixtures/api/. The cypress e2e suite uses these
* fixtures via cy.intercept so tests run with no database.
* fixtures via server-side `FIXTURES_MODE` (E2E_FIXTURES=1) so tests run
* with no database.
*
* Usage:
* pnpm --filter app capture:fixtures (prod)
Expand Down Expand Up @@ -154,6 +155,11 @@ async function main() {
precision: string;
isl: number;
osl: number;
// Optional: only present after the env-key PR ships. The capture script
// uses these to fetch a representative `/api/v1/run-environment` response;
// the route uses them as its sole identifier.
workflow_run_id?: number;
config_id?: number;
}
const benchmarks = await fetchJson<BenchmarkRow[]>(
`/api/v1/benchmarks?model=${encodeURIComponent(BENCHMARK_MODEL)}`,
Expand Down Expand Up @@ -188,6 +194,51 @@ async function main() {
`/api/v1/workflow-info?date=${encodeURIComponent(latestDate)}`,
);

// run-environment: fired by `useRunEnvironment` every time the Reproduce
// drawer opens. We need a fixture so cypress' fixture mode doesn't 500.
// Try to pull a real one from prod, falling back to an all-nulls /
// log_parse placeholder. The placeholder is the worst-case end-state the
// drawer is designed to render (every env-only field shows "(not
// recorded)" with the "Some fields are approximated…" hint), so it's
// production-realistic even before the upstream env.json artifact lands.
const RUN_ENV_PLACEHOLDER = {
workflow_run_id: 1,
config_id: 1,
environment: {
source: 'log_parse',
image: null,
framework_version: null,
framework_sha: null,
torch_version: null,
python_version: null,
cuda_version: null,
rocm_version: null,
driver_version: null,
gpu_sku: null,
extra: {},
},
};
let runEnvironment: unknown = RUN_ENV_PLACEHOLDER;
const sampleRow = benchmarks.find((b) => b.workflow_run_id && b.config_id);
if (sampleRow) {
const envUrl =
`${baseUrl}/api/v1/run-environment` +
`?workflow_run_id=${sampleRow.workflow_run_id}` +
`&config_id=${sampleRow.config_id}`;
try {
const res = await fetch(envUrl);
if (res.ok) runEnvironment = await res.json();
} catch {
// Network or parse failure — keep the placeholder; logged below.
}
}
if (runEnvironment === RUN_ENV_PLACEHOLDER) {
console.log(
' (note) run-environment: using placeholder — either prod predates the env PR, ' +
'the benchmark_environments table is empty, or the route is unavailable.',
);
}

const N = TOP_DATES_PER_PARTITION;
const sizes: [string, number][] = [
[
Expand Down Expand Up @@ -250,6 +301,7 @@ async function main() {
}),
],
['workflow-info', await writeFixture('workflow-info', workflowInfo)],
['run-environment', await writeFixture('run-environment', runEnvironment)],
];

for (const [name, bytes] of sizes) {
Expand Down
162 changes: 162 additions & 0 deletions packages/app/src/app/api/v1/run-environment/route.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
import { describe, expect, it, vi, beforeEach } from 'vitest';

const { mockGetEnvironment, mockGetDb } = vi.hoisted(() => ({
mockGetEnvironment: vi.fn(),
mockGetDb: vi.fn(() => 'mock-sql'),
}));

vi.mock('@semianalysisai/inferencex-db/connection', () => ({
getDb: mockGetDb,
JSON_MODE: false,
FIXTURES_MODE: false,
}));

vi.mock('@semianalysisai/inferencex-db/queries/environments', () => ({
getEnvironmentForRunConfig: mockGetEnvironment,
}));

vi.mock('@/lib/api-cache', () => ({
cachedQuery: (fn: (...args: any[]) => any) => fn,
cachedJson: (data: unknown) => Response.json(data),
}));

import { GET } from './route';
import { NextRequest } from 'next/server';

function req(url: string): NextRequest {
return new NextRequest(new URL(url, 'http://localhost'));
}

beforeEach(() => {
vi.clearAllMocks();
});

const env = {
source: 'env_json' as const,
image: 'lmsysorg/sglang:latest',
framework_version: '0.4.3.post2',
framework_sha: 'e136d70cdc6101007017c05d57fb4cec5d6ed98f',
torch_version: '2.5.1+cu124',
python_version: '3.12.7',
cuda_version: '12.4',
rocm_version: null,
driver_version: '560.35.03',
gpu_sku: 'NVIDIA H100 80GB HBM3',
extra: {},
};

const VALID_QS = 'workflow_run_id=101&config_id=42';

describe('GET /api/v1/run-environment', () => {
it('returns 400 when workflow_run_id is missing', async () => {
const res = await GET(req('/api/v1/run-environment?config_id=42'));
expect(res.status).toBe(400);
});

it('returns 400 when config_id is missing', async () => {
const res = await GET(req('/api/v1/run-environment?workflow_run_id=101'));
expect(res.status).toBe(400);
});

it('returns 400 when params are non-numeric', async () => {
const res = await GET(req('/api/v1/run-environment?workflow_run_id=abc&config_id=xyz'));
expect(res.status).toBe(400);
});

it('returns 404 when no environment row exists', async () => {
mockGetEnvironment.mockResolvedValueOnce(null);
const res = await GET(req(`/api/v1/run-environment?${VALID_QS}`));
expect(res.status).toBe(404);
});

it('returns env_json environment for valid (workflow_run_id, config_id)', async () => {
mockGetEnvironment.mockResolvedValueOnce(env);
const res = await GET(req(`/api/v1/run-environment?${VALID_QS}`));
expect(res.status).toBe(200);
const body = await res.json();
expect(body).toEqual({ workflow_run_id: 101, config_id: 42, environment: env });
expect(mockGetEnvironment).toHaveBeenCalledWith('mock-sql', 101, 42);
});

it('returns log_parse environment with nulls preserved', async () => {
mockGetEnvironment.mockResolvedValueOnce({
...env,
source: 'log_parse',
framework_sha: null,
driver_version: null,
cuda_version: null,
gpu_sku: null,
});
const res = await GET(req(`/api/v1/run-environment?${VALID_QS}`));
expect(res.status).toBe(200);
const body = await res.json();
expect(body.environment.source).toBe('log_parse');
expect(body.environment.framework_sha).toBeNull();
expect(body.environment.driver_version).toBeNull();
});

it('returns 500 when query throws', async () => {
mockGetEnvironment.mockRejectedValueOnce(new Error('Connection reset'));
const res = await GET(req(`/api/v1/run-environment?${VALID_QS}`));
expect(res.status).toBe(500);
});
});

// Separate suite because FIXTURES_MODE is read at module-eval time — the only
// way to flip it for a single test is to reset module cache + dynamic-import.
describe('GET /api/v1/run-environment (FIXTURES_MODE)', () => {
it('short-circuits to the loaded fixture and never hits the env query', async () => {
vi.resetModules();
const mockLoadFixture = vi.fn(() => ({
workflow_run_id: 1,
config_id: 1,
environment: { ...env, source: 'log_parse' },
}));
vi.doMock('@semianalysisai/inferencex-db/connection', () => ({
getDb: mockGetDb,
JSON_MODE: false,
FIXTURES_MODE: true,
}));
vi.doMock('@semianalysisai/inferencex-db/queries/environments', () => ({
getEnvironmentForRunConfig: mockGetEnvironment,
}));
vi.doMock('@/lib/api-cache', () => ({
cachedQuery: (fn: (...args: any[]) => any) => fn,
cachedJson: (data: unknown) => Response.json(data),
}));
vi.doMock('@/lib/test-fixtures', () => ({ loadFixture: mockLoadFixture }));

const { GET: GETwithFixtures } = await import('./route');
const res = await GETwithFixtures(req(`/api/v1/run-environment?${VALID_QS}`));

expect(res.status).toBe(200);
const body = await res.json();
expect(body.environment.source).toBe('log_parse');
expect(mockLoadFixture).toHaveBeenCalledWith('run-environment');
expect(mockGetEnvironment).not.toHaveBeenCalled();
});

it('still 400s on missing params before consulting the fixture', async () => {
vi.resetModules();
const mockLoadFixture = vi.fn();
vi.doMock('@semianalysisai/inferencex-db/connection', () => ({
getDb: mockGetDb,
JSON_MODE: false,
FIXTURES_MODE: true,
}));
vi.doMock('@semianalysisai/inferencex-db/queries/environments', () => ({
getEnvironmentForRunConfig: mockGetEnvironment,
}));
vi.doMock('@/lib/api-cache', () => ({
cachedQuery: (fn: (...args: any[]) => any) => fn,
cachedJson: (data: unknown) => Response.json(data),
}));
vi.doMock('@/lib/test-fixtures', () => ({ loadFixture: mockLoadFixture }));

const { GET: GETwithFixtures } = await import('./route');
const res = await GETwithFixtures(req('/api/v1/run-environment?config_id=42'));

expect(res.status).toBe(400);
expect(mockLoadFixture).not.toHaveBeenCalled();
});
});
Loading