SemiAnalysisAI · cquil11 · Apr 23, 2026 · May 1, 2026 · May 1, 2026 · May 1, 2026
diff --git a/.eslintignore b/.eslintignore
@@ -0,0 +1,3 @@
+# Stale agent worktrees produced by parallel Claude Code sessions — they
+# hold their own branches and are linted as part of their own runs.
+.claude/worktrees/
diff --git a/.oxlintrc.json b/.oxlintrc.json
@@ -28,6 +28,7 @@
     "no-undef": "off",
     "no-underscore-dangle": "off",
     "no-useless-undefined": "off",
+    "require-unicode-regexp": "off",
     "no-warning-comments": "off",
     "prefer-destructuring": "off",
     "sort-imports": "off",

diff --git a/packages/app/cypress/support/mock-data.ts b/packages/app/cypress/support/mock-data.ts
@@ -189,10 +189,14 @@ export function createMockInferenceContext(
     workflowInfo: null,
     selectedYAxisMetric: 'y_tpPerGpu',
     setSelectedYAxisMetric: namedStub('setSelectedYAxisMetric'),
+    selectedPercentile: 'p90',
+    setSelectedPercentile: namedStub('setSelectedPercentile'),
     selectedXAxisMetric: null,
     setSelectedXAxisMetric: namedStub('setSelectedXAxisMetric'),
     selectedE2eXAxisMetric: null,
     setSelectedE2eXAxisMetric: namedStub('setSelectedE2eXAxisMetric'),
+    selectedXAxisMode: 'interactivity' as const,
+    setSelectedXAxisMode: namedStub('setSelectedXAxisMode'),
     scaleType: 'auto',
     setScaleType: namedStub('setScaleType'),
     isLegendExpanded: true,

diff --git a/packages/app/src/app/(dashboard)/inference/agentic/[id]/page.tsx b/packages/app/src/app/(dashboard)/inference/agentic/[id]/page.tsx
@@ -0,0 +1,17 @@
+import type { Metadata } from 'next';
+
+import { AgenticPointDetail } from '@/components/inference/agentic-point/agentic-point-detail';
+
+export const metadata: Metadata = {
+  title: 'Agentic trace detail | InferenceX',
+  robots: { index: false },
+};
+
+export default async function AgenticPointDetailPage({
+  params,
+}: {
+  params: Promise<{ id: string }>;
+}) {
+  const { id } = await params;
+  return <AgenticPointDetail id={Number(id)} />;
+}
diff --git a/packages/app/src/app/api/unofficial-run/route.ts b/packages/app/src/app/api/unofficial-run/route.ts
@@ -33,6 +33,10 @@ export function normalizeArtifactRows(
     if (!params) continue;
     const { config } = params;
     results.push({
+      // Synthetic id — overlay rows aren't persisted, so trace_replay lookups
+      // (keyed on benchmark_results.id) will always miss, which is the
+      // intended behaviour: overlays never have stored trace_replay blobs.
+      id: 0,
       hardware: config.hardware,
       framework: config.framework,
       model: config.model,
@@ -50,6 +54,8 @@ export function normalizeArtifactRows(
       decode_num_workers: config.decodeNumWorkers,
       num_prefill_gpu: config.numPrefillGpu,
       num_decode_gpu: config.numDecodeGpu,
+      benchmark_type: params.benchmarkType,
+      offload_mode: params.offloadMode,
       isl: params.isl,
       osl: params.osl,
       conc: params.conc,

diff --git a/packages/app/src/app/api/v1/agentic-aggregates/route.ts b/packages/app/src/app/api/v1/agentic-aggregates/route.ts
@@ -0,0 +1,64 @@
+import { type NextRequest, NextResponse } from 'next/server';
+
+import { getDb } from '@semianalysisai/inferencex-db/connection';
+import {
+  getAgenticAggregates,
+  type AgenticAggregateMap,
+} from '@semianalysisai/inferencex-db/queries/agentic-aggregates';
+
+import { cachedJson, cachedQuery } from '@/lib/api-cache';
+
+export const dynamic = 'force-dynamic';
+
+// blobOnly: response stays small (a few numbers per id), but generating it
+// parses ~5-10 MB of decompressed JSONL + JSON per id. Cache so the
+// "Aggregates" toggle stays snappy.
+const getCachedAgenticAggregates = cachedQuery(
+  (ids: number[]): Promise<AgenticAggregateMap> => getAgenticAggregates(getDb(), ids),
+  'agentic-aggregates',
+  { blobOnly: true },
+);
+
+const MAX_IDS_PER_REQUEST = 200;
+
+/**
+ * GET /api/v1/agentic-aggregates?ids=1,2,3
+ *
+ * Returns per-id mean/p50/p75/p90/p99 for ISL, OSL, KV cache utilization,
+ * and prefix cache hit rate — computed live from the stored aiperf
+ * profile_export.jsonl + server_metrics_json blobs. Ids without a
+ * trace_replay blob (or with no usable samples) get nulls.
+ */
+export async function GET(request: NextRequest) {
+  const raw = request.nextUrl.searchParams.get('ids');
+  if (!raw) {
+    return NextResponse.json({ error: 'ids query param is required' }, { status: 400 });
+  }
+
+  const ids = [
+    ...new Set(
+      raw
+        .split(',')
+        .map((s) => Number(s.trim()))
+        .filter((n) => Number.isFinite(n) && n > 0),
+    ),
+  ];
+  if (ids.length === 0) {
+    return NextResponse.json({ error: 'no valid ids provided' }, { status: 400 });
+  }
+  if (ids.length > MAX_IDS_PER_REQUEST) {
+    return NextResponse.json(
+      { error: `too many ids (max ${MAX_IDS_PER_REQUEST})` },
+      { status: 400 },
+    );
+  }
+
+  try {
+    const sorted = [...ids].toSorted((a, b) => a - b);
+    const result = await getCachedAgenticAggregates(sorted);
+    return cachedJson(result);
+  } catch (error) {
+    console.error('Error fetching agentic aggregates:', error);
+    return NextResponse.json({ error: 'Internal server error' }, { status: 500 });
+  }
+}
diff --git a/packages/app/src/app/api/v1/benchmark-siblings/route.ts b/packages/app/src/app/api/v1/benchmark-siblings/route.ts
@@ -0,0 +1,38 @@
+import { type NextRequest, NextResponse } from 'next/server';
+
+import { getDb } from '@semianalysisai/inferencex-db/connection';
+import {
+  getBenchmarkSiblings,
+  type BenchmarkSiblings,
+} from '@semianalysisai/inferencex-db/queries/benchmark-siblings';
+
+import { cachedJson, cachedQuery } from '@/lib/api-cache';
+
+export const dynamic = 'force-dynamic';
+
+const getCachedSiblings = cachedQuery(
+  (id: number): Promise<BenchmarkSiblings | null> => getBenchmarkSiblings(getDb(), id),
+  'benchmark-siblings',
+);
+
+/**
+ * GET /api/v1/benchmark-siblings?id=N
+ *
+ * Returns the SKU (hw/framework/model/precision/spec/benchmark_type) of the
+ * benchmark_result + all sibling rows that share that SKU within the same
+ * workflow_run. Used by the agentic detail page to render a navigator.
+ */
+export async function GET(request: NextRequest) {
+  const id = Number(request.nextUrl.searchParams.get('id'));
+  if (!id || !Number.isFinite(id)) {
+    return NextResponse.json({ error: 'id is required (benchmark_result_id)' }, { status: 400 });
+  }
+  try {
+    const data = await getCachedSiblings(id);
+    if (!data) return NextResponse.json({ error: 'Not found' }, { status: 404 });
+    return cachedJson(data);
+  } catch (error) {
+    console.error('Error fetching benchmark siblings:', error);
+    return NextResponse.json({ error: 'Internal server error' }, { status: 500 });
+  }
+}
diff --git a/packages/app/src/app/api/v1/benchmarks/route.test.ts b/packages/app/src/app/api/v1/benchmarks/route.test.ts
@@ -59,6 +59,7 @@ describe('GET /api/v1/benchmarks', () => {
       ['dsr1'],
       undefined,
       undefined,
+      undefined,
     );
   });
 
@@ -72,6 +73,7 @@ describe('GET /api/v1/benchmarks', () => {
       ['dsr1'],
       '2026-03-01',
       undefined,
+      undefined,
     );
   });
 
@@ -82,7 +84,27 @@ describe('GET /api/v1/benchmarks', () => {
       req('/api/v1/benchmarks?model=DeepSeek-R1-0528&date=2026-03-01&exact=true'),
     );
     expect(res.status).toBe(200);
-    expect(mockGetLatestBenchmarks).toHaveBeenCalledWith('mock-sql', ['dsr1'], '2026-03-01', true);
+    expect(mockGetLatestBenchmarks).toHaveBeenCalledWith(
+      'mock-sql',
+      ['dsr1'],
+      '2026-03-01',
+      true,
+      undefined,
+    );
+  });
+
+  it('passes runId param to query when provided', async () => {
+    mockGetLatestBenchmarks.mockResolvedValueOnce([]);
+
+    const res = await GET(req('/api/v1/benchmarks?model=DeepSeek-R1-0528&runId=26194160120'));
+    expect(res.status).toBe(200);
+    expect(mockGetLatestBenchmarks).toHaveBeenCalledWith(
+      'mock-sql',
+      ['dsr1'],
+      undefined,
+      undefined,
+      '26194160120',
+    );
   });
 
   it('returns 500 when query throws', async () => {

diff --git a/packages/app/src/app/api/v1/benchmarks/route.ts b/packages/app/src/app/api/v1/benchmarks/route.ts
@@ -11,10 +11,10 @@ import { loadFixture } from '@/lib/test-fixtures';
 export const dynamic = 'force-dynamic';
 
 const getCachedBenchmarks = cachedQuery(
-  (dbModelKeys: string[], date?: string, exact?: boolean) => {
+  (dbModelKeys: string[], date?: string, exact?: boolean, runId?: string) => {
     if (JSON_MODE)
       return Promise.resolve(jsonProvider.getLatestBenchmarks(dbModelKeys, date, exact));
-    return getLatestBenchmarks(getDb(), dbModelKeys, date, exact);
+    return getLatestBenchmarks(getDb(), dbModelKeys, date, exact, runId);
   },
   'benchmarks',
   { blobOnly: true },
@@ -25,14 +25,15 @@ export async function GET(request: NextRequest) {
   const model = params.get('model') ?? '';
   const date = params.get('date') ?? undefined;
   const exact = params.get('exact') === 'true';
+  const runId = params.get('runId') ?? undefined;
   const dbModelKeys = DISPLAY_MODEL_TO_DB[model];
   if (!dbModelKeys || dbModelKeys.length === 0) {
     return NextResponse.json({ error: 'Unknown model' }, { status: 400 });
   }
   if (FIXTURES_MODE) return cachedJson(loadFixture('benchmarks'));
 
   try {
-    const rows = await getCachedBenchmarks(dbModelKeys, date, exact || undefined);
+    const rows = await getCachedBenchmarks(dbModelKeys, date, exact || undefined, runId);
     return cachedJson(rows);
   } catch (error) {
     console.error('Error fetching benchmarks:', error);

diff --git a/packages/app/src/app/api/v1/derived-agentic-metrics/route.ts b/packages/app/src/app/api/v1/derived-agentic-metrics/route.ts
@@ -0,0 +1,71 @@
+import { type NextRequest, NextResponse } from 'next/server';
+
+import { getDb } from '@semianalysisai/inferencex-db/connection';
+import {
+  getDerivedAgenticMetrics,
+  type DerivedAgenticMetricMap,
+} from '@semianalysisai/inferencex-db/queries/derived-agentic-metrics';
+
+import { cachedJson, cachedQuery } from '@/lib/api-cache';
+
+export const dynamic = 'force-dynamic';
+
+// blobOnly: the response is one entry per id with two numbers, but the
+// derivation work parses thousands of JSONL records per blob — cache the
+// computed result so a chart-refresh hits the warm path.
+// Bumped to v2 when mean_p90_prefill_tps_per_user → p90_prefill_tps_per_user.
+// Stale v1 cache entries return undefined for the new field and silently
+// blank the chart with "No data available".
+const getCachedDerivedAgenticMetrics = cachedQuery(
+  (ids: number[]): Promise<DerivedAgenticMetricMap> => getDerivedAgenticMetrics(getDb(), ids),
+  'derived-agentic-metrics-v2',
+  { blobOnly: true },
+);
+
+const MAX_IDS_PER_REQUEST = 200;
+
+/**
+ * GET /api/v1/derived-agentic-metrics?ids=1,2,3
+ *
+ * Returns per-id derived metrics computed live from the stored aiperf
+ * profile_export.jsonl blobs:
+ *  - normalized_session_time_s: mean across sessions of session e2e time
+ *    (Σ per-turn request_latency) rescaled by mean_load / session_load.
+ *  - p90_prefill_tps_per_user: P90 of per-turn prefill TPS/user (ISL / TTFT)
+ *    across every turn in every session.
+ *
+ * Ids without a trace_replay blob or with unparseable records are omitted.
+ */
+export async function GET(request: NextRequest) {
+  const raw = request.nextUrl.searchParams.get('ids');
+  if (!raw) {
+    return NextResponse.json({ error: 'ids query param is required' }, { status: 400 });
+  }
+
+  const ids = [
+    ...new Set(
+      raw
+        .split(',')
+        .map((s) => Number(s.trim()))
+        .filter((n) => Number.isFinite(n) && n > 0),
+    ),
+  ];
+  if (ids.length === 0) {
+    return NextResponse.json({ error: 'no valid ids provided' }, { status: 400 });
+  }
+  if (ids.length > MAX_IDS_PER_REQUEST) {
+    return NextResponse.json(
+      { error: `too many ids (max ${MAX_IDS_PER_REQUEST})` },
+      { status: 400 },
+    );
+  }
+
+  try {
+    const sorted = [...ids].toSorted((a, b) => a - b);
+    const result = await getCachedDerivedAgenticMetrics(sorted);
+    return cachedJson(result);
+  } catch (error) {
+    console.error('Error fetching derived agentic metrics:', error);
+    return NextResponse.json({ error: 'Internal server error' }, { status: 500 });
+  }
+}
diff --git a/packages/app/src/app/api/v1/request-timeline/route.ts b/packages/app/src/app/api/v1/request-timeline/route.ts
@@ -0,0 +1,40 @@
+import { type NextRequest, NextResponse } from 'next/server';
+
+import { getDb } from '@semianalysisai/inferencex-db/connection';
+import {
+  getRequestTimeline,
+  type RequestTimeline,
+} from '@semianalysisai/inferencex-db/queries/request-timeline';
+
+import { cachedJson, cachedQuery } from '@/lib/api-cache';
+
+export const dynamic = 'force-dynamic';
+
+const getCachedRequestTimeline = cachedQuery(
+  (id: number): Promise<RequestTimeline | null> => getRequestTimeline(getDb(), id),
+  'request-timeline',
+  { blobOnly: true },
+);
+
+/**
+ * GET /api/v1/request-timeline?id=N
+ *
+ * Returns the per-request Gantt timeline for one agentic benchmark point.
+ * Each request entry has ns-from-start offsets for credit/start/ack/end,
+ * plus TTFT, ISL, OSL, conversation id, turn index, worker id. 404 if the
+ * point has no stored profile_export.jsonl blob.
+ */
+export async function GET(request: NextRequest) {
+  const id = Number(request.nextUrl.searchParams.get('id'));
+  if (!id || !Number.isFinite(id)) {
+    return NextResponse.json({ error: 'id is required (benchmark_result_id)' }, { status: 400 });
+  }
+  try {
+    const data = await getCachedRequestTimeline(id);
+    if (!data) return NextResponse.json({ error: 'Not found' }, { status: 404 });
+    return cachedJson(data);
+  } catch (error) {
+    console.error('Error fetching request timeline:', error);
+    return NextResponse.json({ error: 'Internal server error' }, { status: 500 });
+  }
+}