Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
0e35e5f
feat: agentic benchmark ingest + UI with offload-mode halo
cquil11 Apr 23, 2026
9c43a76
fix: agentic offload variants — render both halos + map renamed fields
cquil11 May 1, 2026
07ba106
fix: render offload halo on every offload-on point, not just frontier
cquil11 May 1, 2026
95e9dc7
fix: strip runner-pool suffix (-p1, -p2, ...) from hw identifier
cquil11 May 1, 2026
982106d
feat: bold scatter labels with concurrency tag + collision avoidance
cquil11 May 1, 2026
9572b95
fix: stack multi-line point labels upward so they don't overlap the p…
cquil11 May 1, 2026
37eecc6
fix: anchor multi-line labels via first tspan + tspan-aware collision…
cquil11 May 1, 2026
f317377
fix: dedupe artifacts by logical name + skip 0-successful agg rows
cquil11 May 1, 2026
52d35ba
Merge remote-tracking branch 'origin/master' into feat/agentx
cquil11 May 1, 2026
c2f66f6
feat: add AIPerf to FRAMEWORK_LABELS
cquil11 May 7, 2026
024797a
fix(changelog): coerce ids to string when filtering changelog by run
cquil11 May 12, 2026
aa15419
feat: default sequence to Agentic Traces when available
cquil11 May 12, 2026
cb4e87c
Merge remote-tracking branch 'origin/master' into feat/agentx
cquil11 May 14, 2026
099a33e
fix(agentic): respect percentile selector for input-throughput x axis
cquil11 May 15, 2026
50a06d1
fix(agentic): default percentile to p99 and drop median option
cquil11 May 15, 2026
25305dc
Merge remote-tracking branch 'origin/master' into feat/agentx
cquil11 May 15, 2026
3c96e91
fix(agentic): keep only p90 as the percentile option
cquil11 May 15, 2026
642081a
fix(agentic): default percentile to p90, surface only p90/p99
functionstackx May 15, 2026
3f45f4d
fix(agentic): drop p99 + median TTFT, p90 only across selectors
functionstackx May 15, 2026
03c775a
fix(agentic): honor e2e TTFT override in agentic mode too
functionstackx May 15, 2026
49f2b27
fix(agentic): default e2e chart x-axis to p90 TTFT
functionstackx May 15, 2026
9e2c532
fix(tooltip): cap data-point numeric values at 3 decimal places
cquil11 May 15, 2026
50ed25f
fix(agentic): relabel x-axis title for natural-x case too
cquil11 May 15, 2026
e9d8e3f
fix(agentic): include percentile word in chart heading
cquil11 May 15, 2026
2046282
fix(agentic): include percentile in e2e chart heading dropdown
cquil11 May 15, 2026
9957f19
feat(agentic): per-point trace_replay storage + detail page POC
cquil11 May 20, 2026
0067bfc
feat(agentic): hover crosshair + expand-to-dialog on detail charts
cquil11 May 21, 2026
1d502ac
feat(inference): one chart with TTFT / E2E / Interactivity x-axis picker
cquil11 May 21, 2026
965c862
fix(inference): TTFT/E2E pick metric by sequence kind + add P75 option
cquil11 May 21, 2026
e4d97f2
feat(metrics): wire P75/P95 through frontend + register new aiperf keys
cquil11 May 21, 2026
a7a1354
fix(inference): don't drop agentic TTFT points over 60s as outliers
cquil11 May 21, 2026
07194de
fix(trace-histograms): chunk DB query + blob-cache to escape size caps
cquil11 May 21, 2026
a1e594b
feat(inference): run selector actually filters chart data
cquil11 May 21, 2026
b0d228a
feat(inference): Session Time + Prefill TPS x-axis (live from trace b…
cquil11 May 21, 2026
8af1f5c
fix(inference): show Mean Normalized Session Time in minutes
functionstackx May 21, 2026
be34e97
fix(inference): use global P90 of per-turn prefill TPS/user
functionstackx May 21, 2026
c774c00
fix(inference): no-data flash on session-time / prefill-tps modes
functionstackx May 21, 2026
d5dbda7
feat(agentic-detail): aggregates-across-configs view
cquil11 May 21, 2026
41ef33b
fix(agentic-aggregates): metric name + stream-parse oversized blobs
cquil11 May 21, 2026
1cedd24
feat(agentic-aggregates): pre-compute stats at ingest time
cquil11 May 21, 2026
9d9c7c1
fix(agentic-aggregates): drop .js extension on app-route-traced import
cquil11 May 21, 2026
6063d01
feat(agentic-detail): pre-compute chart_series at ingest time
cquil11 May 21, 2026
24fe8fe
feat(agentic-detail): per-request Gantt timeline view
cquil11 May 22, 2026
f2618f4
fix(agentic-detail): aggregate vllm metrics across all engine series
cquil11 May 22, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .eslintignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Stale agent worktrees produced by parallel Claude Code sessions — they
# hold their own branches and are linted as part of their own runs.
.claude/worktrees/
1 change: 1 addition & 0 deletions .oxlintrc.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
"no-undef": "off",
"no-underscore-dangle": "off",
"no-useless-undefined": "off",
"require-unicode-regexp": "off",
"no-warning-comments": "off",
"prefer-destructuring": "off",
"sort-imports": "off",
Expand Down
4 changes: 4 additions & 0 deletions packages/app/cypress/support/mock-data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -189,10 +189,14 @@ export function createMockInferenceContext(
workflowInfo: null,
selectedYAxisMetric: 'y_tpPerGpu',
setSelectedYAxisMetric: namedStub('setSelectedYAxisMetric'),
selectedPercentile: 'p90',
setSelectedPercentile: namedStub('setSelectedPercentile'),
selectedXAxisMetric: null,
setSelectedXAxisMetric: namedStub('setSelectedXAxisMetric'),
selectedE2eXAxisMetric: null,
setSelectedE2eXAxisMetric: namedStub('setSelectedE2eXAxisMetric'),
selectedXAxisMode: 'interactivity' as const,
setSelectedXAxisMode: namedStub('setSelectedXAxisMode'),
scaleType: 'auto',
setScaleType: namedStub('setScaleType'),
isLegendExpanded: true,
Expand Down
17 changes: 17 additions & 0 deletions packages/app/src/app/(dashboard)/inference/agentic/[id]/page.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import type { Metadata } from 'next';

import { AgenticPointDetail } from '@/components/inference/agentic-point/agentic-point-detail';

export const metadata: Metadata = {
title: 'Agentic trace detail | InferenceX',
robots: { index: false },
};

export default async function AgenticPointDetailPage({
params,
}: {
params: Promise<{ id: string }>;
}) {
const { id } = await params;
return <AgenticPointDetail id={Number(id)} />;
}
6 changes: 6 additions & 0 deletions packages/app/src/app/api/unofficial-run/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ export function normalizeArtifactRows(
if (!params) continue;
const { config } = params;
results.push({
// Synthetic id — overlay rows aren't persisted, so trace_replay lookups
// (keyed on benchmark_results.id) will always miss, which is the
// intended behaviour: overlays never have stored trace_replay blobs.
id: 0,
hardware: config.hardware,
framework: config.framework,
model: config.model,
Expand All @@ -50,6 +54,8 @@ export function normalizeArtifactRows(
decode_num_workers: config.decodeNumWorkers,
num_prefill_gpu: config.numPrefillGpu,
num_decode_gpu: config.numDecodeGpu,
benchmark_type: params.benchmarkType,
offload_mode: params.offloadMode,
isl: params.isl,
osl: params.osl,
conc: params.conc,
Expand Down
64 changes: 64 additions & 0 deletions packages/app/src/app/api/v1/agentic-aggregates/route.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import { type NextRequest, NextResponse } from 'next/server';

import { getDb } from '@semianalysisai/inferencex-db/connection';
import {
getAgenticAggregates,
type AgenticAggregateMap,
} from '@semianalysisai/inferencex-db/queries/agentic-aggregates';

import { cachedJson, cachedQuery } from '@/lib/api-cache';

export const dynamic = 'force-dynamic';

// blobOnly: response stays small (a few numbers per id), but generating it
// parses ~5-10 MB of decompressed JSONL + JSON per id. Cache so the
// "Aggregates" toggle stays snappy.
const getCachedAgenticAggregates = cachedQuery(
(ids: number[]): Promise<AgenticAggregateMap> => getAgenticAggregates(getDb(), ids),
'agentic-aggregates',
{ blobOnly: true },
);

const MAX_IDS_PER_REQUEST = 200;

/**
* GET /api/v1/agentic-aggregates?ids=1,2,3
*
* Returns per-id mean/p50/p75/p90/p99 for ISL, OSL, KV cache utilization,
* and prefix cache hit rate — computed live from the stored aiperf
* profile_export.jsonl + server_metrics_json blobs. Ids without a
* trace_replay blob (or with no usable samples) get nulls.
*/
export async function GET(request: NextRequest) {
const raw = request.nextUrl.searchParams.get('ids');
if (!raw) {
return NextResponse.json({ error: 'ids query param is required' }, { status: 400 });
}

const ids = [
...new Set(
raw
.split(',')
.map((s) => Number(s.trim()))
.filter((n) => Number.isFinite(n) && n > 0),
),
];
if (ids.length === 0) {
return NextResponse.json({ error: 'no valid ids provided' }, { status: 400 });
}
if (ids.length > MAX_IDS_PER_REQUEST) {
return NextResponse.json(
{ error: `too many ids (max ${MAX_IDS_PER_REQUEST})` },
{ status: 400 },
);
}

try {
const sorted = [...ids].toSorted((a, b) => a - b);
const result = await getCachedAgenticAggregates(sorted);
return cachedJson(result);
} catch (error) {
console.error('Error fetching agentic aggregates:', error);
return NextResponse.json({ error: 'Internal server error' }, { status: 500 });
}
}
38 changes: 38 additions & 0 deletions packages/app/src/app/api/v1/benchmark-siblings/route.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import { type NextRequest, NextResponse } from 'next/server';

import { getDb } from '@semianalysisai/inferencex-db/connection';
import {
getBenchmarkSiblings,
type BenchmarkSiblings,
} from '@semianalysisai/inferencex-db/queries/benchmark-siblings';

import { cachedJson, cachedQuery } from '@/lib/api-cache';

export const dynamic = 'force-dynamic';

const getCachedSiblings = cachedQuery(
(id: number): Promise<BenchmarkSiblings | null> => getBenchmarkSiblings(getDb(), id),
'benchmark-siblings',
);

/**
* GET /api/v1/benchmark-siblings?id=N
*
* Returns the SKU (hw/framework/model/precision/spec/benchmark_type) of the
* benchmark_result + all sibling rows that share that SKU within the same
* workflow_run. Used by the agentic detail page to render a navigator.
*/
export async function GET(request: NextRequest) {
const id = Number(request.nextUrl.searchParams.get('id'));
if (!id || !Number.isFinite(id)) {
return NextResponse.json({ error: 'id is required (benchmark_result_id)' }, { status: 400 });
}
try {
const data = await getCachedSiblings(id);
if (!data) return NextResponse.json({ error: 'Not found' }, { status: 404 });
return cachedJson(data);
} catch (error) {
console.error('Error fetching benchmark siblings:', error);
return NextResponse.json({ error: 'Internal server error' }, { status: 500 });
}
}
24 changes: 23 additions & 1 deletion packages/app/src/app/api/v1/benchmarks/route.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ describe('GET /api/v1/benchmarks', () => {
['dsr1'],
undefined,
undefined,
undefined,
);
});

Expand All @@ -72,6 +73,7 @@ describe('GET /api/v1/benchmarks', () => {
['dsr1'],
'2026-03-01',
undefined,
undefined,
);
});

Expand All @@ -82,7 +84,27 @@ describe('GET /api/v1/benchmarks', () => {
req('/api/v1/benchmarks?model=DeepSeek-R1-0528&date=2026-03-01&exact=true'),
);
expect(res.status).toBe(200);
expect(mockGetLatestBenchmarks).toHaveBeenCalledWith('mock-sql', ['dsr1'], '2026-03-01', true);
expect(mockGetLatestBenchmarks).toHaveBeenCalledWith(
'mock-sql',
['dsr1'],
'2026-03-01',
true,
undefined,
);
});

it('passes runId param to query when provided', async () => {
mockGetLatestBenchmarks.mockResolvedValueOnce([]);

const res = await GET(req('/api/v1/benchmarks?model=DeepSeek-R1-0528&runId=26194160120'));
expect(res.status).toBe(200);
expect(mockGetLatestBenchmarks).toHaveBeenCalledWith(
'mock-sql',
['dsr1'],
undefined,
undefined,
'26194160120',
);
});

it('returns 500 when query throws', async () => {
Expand Down
7 changes: 4 additions & 3 deletions packages/app/src/app/api/v1/benchmarks/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ import { loadFixture } from '@/lib/test-fixtures';
export const dynamic = 'force-dynamic';

const getCachedBenchmarks = cachedQuery(
(dbModelKeys: string[], date?: string, exact?: boolean) => {
(dbModelKeys: string[], date?: string, exact?: boolean, runId?: string) => {
if (JSON_MODE)
return Promise.resolve(jsonProvider.getLatestBenchmarks(dbModelKeys, date, exact));
return getLatestBenchmarks(getDb(), dbModelKeys, date, exact);
return getLatestBenchmarks(getDb(), dbModelKeys, date, exact, runId);
},
'benchmarks',
{ blobOnly: true },
Expand All @@ -25,14 +25,15 @@ export async function GET(request: NextRequest) {
const model = params.get('model') ?? '';
const date = params.get('date') ?? undefined;
const exact = params.get('exact') === 'true';
const runId = params.get('runId') ?? undefined;
const dbModelKeys = DISPLAY_MODEL_TO_DB[model];
if (!dbModelKeys || dbModelKeys.length === 0) {
return NextResponse.json({ error: 'Unknown model' }, { status: 400 });
}
if (FIXTURES_MODE) return cachedJson(loadFixture('benchmarks'));

try {
const rows = await getCachedBenchmarks(dbModelKeys, date, exact || undefined);
const rows = await getCachedBenchmarks(dbModelKeys, date, exact || undefined, runId);
return cachedJson(rows);
} catch (error) {
console.error('Error fetching benchmarks:', error);
Expand Down
71 changes: 71 additions & 0 deletions packages/app/src/app/api/v1/derived-agentic-metrics/route.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import { type NextRequest, NextResponse } from 'next/server';

import { getDb } from '@semianalysisai/inferencex-db/connection';
import {
getDerivedAgenticMetrics,
type DerivedAgenticMetricMap,
} from '@semianalysisai/inferencex-db/queries/derived-agentic-metrics';

import { cachedJson, cachedQuery } from '@/lib/api-cache';

export const dynamic = 'force-dynamic';

// blobOnly: the response is one entry per id with two numbers, but the
// derivation work parses thousands of JSONL records per blob — cache the
// computed result so a chart-refresh hits the warm path.
// Bumped to v2 when mean_p90_prefill_tps_per_user → p90_prefill_tps_per_user.
// Stale v1 cache entries return undefined for the new field and silently
// blank the chart with "No data available".
const getCachedDerivedAgenticMetrics = cachedQuery(
(ids: number[]): Promise<DerivedAgenticMetricMap> => getDerivedAgenticMetrics(getDb(), ids),
'derived-agentic-metrics-v2',
{ blobOnly: true },
);

const MAX_IDS_PER_REQUEST = 200;

/**
* GET /api/v1/derived-agentic-metrics?ids=1,2,3
*
* Returns per-id derived metrics computed live from the stored aiperf
* profile_export.jsonl blobs:
* - normalized_session_time_s: mean across sessions of session e2e time
* (Σ per-turn request_latency) rescaled by mean_load / session_load.
* - p90_prefill_tps_per_user: P90 of per-turn prefill TPS/user (ISL / TTFT)
* across every turn in every session.
*
* Ids without a trace_replay blob or with unparseable records are omitted.
*/
export async function GET(request: NextRequest) {
const raw = request.nextUrl.searchParams.get('ids');
if (!raw) {
return NextResponse.json({ error: 'ids query param is required' }, { status: 400 });
}

const ids = [
...new Set(
raw
.split(',')
.map((s) => Number(s.trim()))
.filter((n) => Number.isFinite(n) && n > 0),
),
];
if (ids.length === 0) {
return NextResponse.json({ error: 'no valid ids provided' }, { status: 400 });
}
if (ids.length > MAX_IDS_PER_REQUEST) {
return NextResponse.json(
{ error: `too many ids (max ${MAX_IDS_PER_REQUEST})` },
{ status: 400 },
);
}

try {
const sorted = [...ids].toSorted((a, b) => a - b);
const result = await getCachedDerivedAgenticMetrics(sorted);
return cachedJson(result);
} catch (error) {
console.error('Error fetching derived agentic metrics:', error);
return NextResponse.json({ error: 'Internal server error' }, { status: 500 });
}
}
40 changes: 40 additions & 0 deletions packages/app/src/app/api/v1/request-timeline/route.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import { type NextRequest, NextResponse } from 'next/server';

import { getDb } from '@semianalysisai/inferencex-db/connection';
import {
getRequestTimeline,
type RequestTimeline,
} from '@semianalysisai/inferencex-db/queries/request-timeline';

import { cachedJson, cachedQuery } from '@/lib/api-cache';

export const dynamic = 'force-dynamic';

const getCachedRequestTimeline = cachedQuery(
(id: number): Promise<RequestTimeline | null> => getRequestTimeline(getDb(), id),
'request-timeline',
{ blobOnly: true },
);

/**
* GET /api/v1/request-timeline?id=N
*
* Returns the per-request Gantt timeline for one agentic benchmark point.
* Each request entry has ns-from-start offsets for credit/start/ack/end,
* plus TTFT, ISL, OSL, conversation id, turn index, worker id. 404 if the
* point has no stored profile_export.jsonl blob.
*/
export async function GET(request: NextRequest) {
const id = Number(request.nextUrl.searchParams.get('id'));
if (!id || !Number.isFinite(id)) {
return NextResponse.json({ error: 'id is required (benchmark_result_id)' }, { status: 400 });
}
try {
const data = await getCachedRequestTimeline(id);
if (!data) return NextResponse.json({ error: 'Not found' }, { status: 404 });
return cachedJson(data);
} catch (error) {
console.error('Error fetching request timeline:', error);
return NextResponse.json({ error: 'Internal server error' }, { status: 500 });
}
}
Loading