From 0e35e5f0b10c2c9db10094031a2ac92e59fff9f3 Mon Sep 17 00:00:00 2001 From: Cam Quilici Date: Thu, 23 Apr 2026 13:40:27 -0500 Subject: [PATCH 01/41] feat: agentic benchmark ingest + UI with offload-mode halo Adds agentic_traces scenario end-to-end: - Schema migrations for agentic scenario, availability, and KV offload mode - DB ingest/ETL + query updates to carry scenario, offload_mode, and server/theoretical cache-hit rates through to the API layer - Frontend types, filters (GlobalFilterContext / InferenceContext / ChartControls), URL state, and tooltip rows for agentic-only fields - ScatterGraph: subtle dashed halo on Pareto-frontier points that used KV offload so the tradeoff is visible at a glance --- packages/app/cypress/support/mock-data.ts | 2 + .../app/src/app/api/unofficial-run/route.ts | 2 + .../src/components/GlobalFilterContext.tsx | 12 +- .../components/inference/InferenceContext.tsx | 15 ++- .../inference/hooks/useChartData.ts | 34 +++-- .../app/src/components/inference/types.ts | 26 ++++ .../components/inference/ui/ChartControls.tsx | 27 +++- .../components/inference/ui/ScatterGraph.tsx | 21 +++ .../inference/utils/tooltipUtils.ts | 54 +++++++- .../app/src/components/ui/chart-selectors.tsx | 124 ++++++++++++++++++ .../unofficial-run-provider.test.ts | 2 + .../components/unofficial-run-provider.tsx | 4 +- packages/app/src/lib/api.ts | 14 +- .../app/src/lib/benchmark-transform.test.ts | 2 + packages/app/src/lib/benchmark-transform.ts | 65 ++++++++- packages/app/src/lib/data-mappings.ts | 72 +++++++++- packages/app/src/lib/url-state.ts | 2 + packages/constants/src/models.ts | 17 +++ .../db/migrations/002_agentic_scenario.sql | 30 +++++ .../migrations/003_agentic_availability.sql | 21 +++ packages/db/migrations/004_offload_mode.sql | 42 ++++++ packages/db/src/etl/benchmark-ingest.ts | 28 ++-- packages/db/src/etl/benchmark-mapper.ts | 45 ++++++- packages/db/src/ingest-ci-run.ts | 6 +- packages/db/src/ingest-gcs-backup.ts | 6 +- packages/db/src/ingest-supplemental.ts | 14 +- packages/db/src/json-provider.ts | 8 +- packages/db/src/queries/benchmarks.ts | 13 +- packages/db/src/queries/workflow-info.ts | 15 ++- 29 files changed, 645 insertions(+), 78 deletions(-) create mode 100644 packages/db/migrations/002_agentic_scenario.sql create mode 100644 packages/db/migrations/003_agentic_availability.sql create mode 100644 packages/db/migrations/004_offload_mode.sql diff --git a/packages/app/cypress/support/mock-data.ts b/packages/app/cypress/support/mock-data.ts index e6720c0b..7a4f59a9 100644 --- a/packages/app/cypress/support/mock-data.ts +++ b/packages/app/cypress/support/mock-data.ts @@ -189,6 +189,8 @@ export function createMockInferenceContext( workflowInfo: null, selectedYAxisMetric: 'y_tpPerGpu', setSelectedYAxisMetric: namedStub('setSelectedYAxisMetric'), + selectedPercentile: 'median', + setSelectedPercentile: namedStub('setSelectedPercentile'), selectedXAxisMetric: null, setSelectedXAxisMetric: namedStub('setSelectedXAxisMetric'), selectedE2eXAxisMetric: null, diff --git a/packages/app/src/app/api/unofficial-run/route.ts b/packages/app/src/app/api/unofficial-run/route.ts index 79ac0665..dbfb9c33 100644 --- a/packages/app/src/app/api/unofficial-run/route.ts +++ b/packages/app/src/app/api/unofficial-run/route.ts @@ -49,6 +49,8 @@ export function normalizeArtifactRows( decode_num_workers: config.decodeNumWorkers, num_prefill_gpu: config.numPrefillGpu, num_decode_gpu: config.numDecodeGpu, + benchmark_type: params.benchmarkType, + offload_mode: params.offloadMode, isl: params.isl, osl: params.osl, conc: params.conc, diff --git a/packages/app/src/components/GlobalFilterContext.tsx b/packages/app/src/components/GlobalFilterContext.tsx index 65f510cd..f603081a 100644 --- a/packages/app/src/components/GlobalFilterContext.tsx +++ b/packages/app/src/components/GlobalFilterContext.tsx @@ -11,7 +11,7 @@ import { useState, } from 'react'; -import { DISPLAY_MODEL_TO_DB, islOslToSequence } from '@semianalysisai/inferencex-constants'; +import { DISPLAY_MODEL_TO_DB, rowToSequence } from '@semianalysisai/inferencex-constants'; import { useAvailability } from '@/hooks/api/use-availability'; import { useWorkflowInfo } from '@/hooks/api/use-workflow-info'; @@ -172,11 +172,7 @@ export function GlobalFilterProvider({ children }: { children: ReactNode }) { const availableSequences = useMemo(() => { if (!availabilityRows) return SEQUENCE_OPTIONS; const seqs = [ - ...new Set( - modelRows - .map((r) => islOslToSequence(r.isl, r.osl)) - .filter((s): s is Sequence => s !== null), - ), + ...new Set(modelRows.map((r) => rowToSequence(r)).filter((s): s is Sequence => s !== null)), ]; return seqs.length > 0 ? seqs : SEQUENCE_OPTIONS; }, [availabilityRows, modelRows]); @@ -190,7 +186,7 @@ export function GlobalFilterProvider({ children }: { children: ReactNode }) { // Precisions available for the selected model + sequence const availablePrecisions = useMemo(() => { if (!availabilityRows) return ['fp4']; - const rows = modelRows.filter((r) => islOslToSequence(r.isl, r.osl) === effectiveSequence); + const rows = modelRows.filter((r) => rowToSequence(r) === effectiveSequence); const precs = [...new Set(rows.map((r) => r.precision))].toSorted(); return precs.length > 0 ? precs : ['fp4']; }, [availabilityRows, modelRows, effectiveSequence]); @@ -205,7 +201,7 @@ export function GlobalFilterProvider({ children }: { children: ReactNode }) { // Dates available for selected model + sequence + precisions const availableDates = useMemo(() => { if (!availabilityRows) return []; - const seqRows = modelRows.filter((r) => islOslToSequence(r.isl, r.osl) === effectiveSequence); + const seqRows = modelRows.filter((r) => rowToSequence(r) === effectiveSequence); const rows = seqRows.filter((r) => effectivePrecisions.includes(r.precision)); if (rows.length === 0) { return [...new Set(seqRows.map((r) => r.date))].toSorted(); diff --git a/packages/app/src/components/inference/InferenceContext.tsx b/packages/app/src/components/inference/InferenceContext.tsx index 7fa416fd..6f45d8d7 100644 --- a/packages/app/src/components/inference/InferenceContext.tsx +++ b/packages/app/src/components/inference/InferenceContext.tsx @@ -11,7 +11,7 @@ import { useState, } from 'react'; -import { DISPLAY_MODEL_TO_DB, islOslToSequence } from '@semianalysisai/inferencex-constants'; +import { DISPLAY_MODEL_TO_DB, rowToSequence } from '@semianalysisai/inferencex-constants'; import { track } from '@/lib/analytics'; import { FAVORITE_PRESETS, type FavoritePreset } from '@/components/favorites/favorite-presets'; @@ -110,6 +110,11 @@ export function InferenceProvider({ const [selectedE2eXAxisMetric, setSelectedE2eXAxisMetric] = useState( () => getUrlParam('i_e2e_xmetric') || null, ); + // Latency percentile applied to the chart x-axis for agentic scenarios. + // Values: 'median' | 'p90' | 'p99' | 'p99.9'. Non-agentic charts ignore. + const [selectedPercentile, setSelectedPercentile] = useState( + () => getUrlParam('i_pctl') || 'median', + ); const [scaleType, setScaleType] = useState<'auto' | 'linear' | 'log'>( () => (getUrlParam('i_scale') as 'auto' | 'linear' | 'log') || 'auto', ); @@ -163,6 +168,7 @@ export function InferenceProvider({ effectiveRunDate, isActive, latestDate, + selectedPercentile, ); // For GPU comparison date picker — use shared availability data from global filters @@ -176,7 +182,7 @@ export function InferenceProvider({ if (!availabilityRows) return availableDates; const rows = availabilityRows.filter((r) => { if (!dbModelKeys.includes(r.model)) return false; - if (islOslToSequence(r.isl, r.osl) !== effectiveSequence) return false; + if (rowToSequence(r) !== effectiveSequence) return false; if (!effectivePrecisions.includes(r.precision)) return false; if (!r.hardware) return false; const hwKey = buildAvailabilityHwKey(r.hardware, r.framework, r.spec_method, r.disagg); @@ -201,7 +207,7 @@ export function InferenceProvider({ const hwKeys = new Set(); for (const r of availabilityRows) { if (!dbModelKeys.includes(r.model)) continue; - if (islOslToSequence(r.isl, r.osl) !== effectiveSequence) continue; + if (rowToSequence(r) !== effectiveSequence) continue; if (!effectivePrecisions.includes(r.precision)) continue; if (!r.hardware) continue; const hwKey = buildAvailabilityHwKey(r.hardware, r.framework, r.spec_method, r.disagg); @@ -589,6 +595,7 @@ export function InferenceProvider({ useUrlStateSync( { i_metric: selectedYAxisMetric, + i_pctl: selectedPercentile, i_gpus: selectedGPUs.join(','), i_dates: selectedDates.join(','), i_dstart: selectedDateRange.startDate, @@ -783,6 +790,8 @@ export function InferenceProvider({ workflowInfo, selectedYAxisMetric, setSelectedYAxisMetric: setSelectedYAxisMetricAndClear, + selectedPercentile, + setSelectedPercentile, selectedGPUs, setSelectedGPUs: setSelectedGPUsAndClear, availableGPUs, diff --git a/packages/app/src/components/inference/hooks/useChartData.ts b/packages/app/src/components/inference/hooks/useChartData.ts index 625e63ab..81ab0780 100644 --- a/packages/app/src/components/inference/hooks/useChartData.ts +++ b/packages/app/src/components/inference/hooks/useChartData.ts @@ -1,7 +1,7 @@ import { useMemo, useRef } from 'react'; import { useQueries } from '@tanstack/react-query'; -import { sequenceToIslOsl } from '@semianalysisai/inferencex-constants'; +import { rowToSequence } from '@semianalysisai/inferencex-constants'; import chartDefinitions from '@/components/inference/inference-chart-config.json'; import type { @@ -15,7 +15,7 @@ import type { import { filterDataByCostLimit } from '@/components/inference/utils'; import { useBenchmarks, benchmarkQueryOptions } from '@/hooks/api/use-benchmarks'; import { GPU_ALIAS_TO_CANONICAL, getModelSortIndex } from '@/lib/constants'; -import { transformBenchmarkRows } from '@/lib/benchmark-transform'; +import { transformBenchmarkRows, withPercentile } from '@/lib/benchmark-transform'; import type { Model, Sequence } from '@/lib/data-mappings'; import { calculateCostsForGpus, calculatePowerForGpus } from '@/lib/utils'; @@ -79,6 +79,7 @@ export function useChartData( selectedRunDate?: string, enabled = true, latestAvailableDate?: string, + selectedPercentile = 'median', ) { // When the selected date is the latest available, use '' (empty string) to match // the initial no-date query key, reusing the eagerly-fetched benchmarks from the @@ -119,11 +120,13 @@ export function useChartData( // Merge main rows with comparison date rows. // Stamp each row with the *requested* date (not the actual DB date) so that // GPUGraph's activeDates filter (keyed by user-selected date) matches the points. - const sequenceIslOsl = useMemo(() => sequenceToIslOsl(selectedSequence), [selectedSequence]); + // + // rowToSequence handles both fixed-seq (via isl/osl) and agentic (via + // benchmark_type), so one filter covers every scenario. const rows = useMemo(() => { - if (!allRows || !sequenceIslOsl) return []; - const seqFilter = (r: { isl: number; osl: number }) => - r.isl === sequenceIslOsl.isl && r.osl === sequenceIslOsl.osl; + if (!allRows) return []; + const seqFilter = (r: { isl: number | null; osl: number | null; benchmark_type: string }) => + rowToSequence(r) === selectedSequence; const seqFiltered = allRows.filter(seqFilter); // For each (hw, framework, spec_method, disagg, precision) group, keep only @@ -150,14 +153,14 @@ export function useChartData( .map((r) => ({ ...r, date: comparisonDates[i], actualDate: r.date })), ); return [...mainRows, ...extraRows]; - }, [allRows, sequenceIslOsl, comparisonDates, comparisonDataKey, selectedRunDate]); + }, [allRows, selectedSequence, comparisonDates, comparisonDataKey, selectedRunDate]); // Transform filtered rows into chart data const { chartData, hardwareConfig: rawHardwareConfig } = useMemo(() => { if (rows.length === 0) return { chartData: [] as InferenceData[][], hardwareConfig: {} as HardwareConfig }; - return transformBenchmarkRows(rows); - }, [rows]); + return transformBenchmarkRows(rows, selectedPercentile); + }, [rows, selectedPercentile]); // Sort hardware config — stabilize reference when keys haven't changed. // Different sequences for the same model often have the same GPU configs, @@ -192,8 +195,11 @@ export function useChartData( (chartDefinitions as ChartDefinition[]).map((chartDef) => { const metricKey = selectedYAxisMetric.replace('y_', '') as YAxisMetricKey; - // Determine dynamic x-axis - let xAxisField: keyof AggDataEntry = chartDef.x; + // Default x-axis = chart's natural latency metric, percentile-adjusted + // for the agentic case (median_e2el → p99_e2el etc.). For non-agentic + // scenarios `withPercentile` is a no-op when percentile === 'median'. + const naturalX = withPercentile(chartDef.x, selectedPercentile) as keyof AggDataEntry; + let xAxisField: keyof AggDataEntry = naturalX; let xAxisLabel = chartDef.x_label; const metricTitle = @@ -232,8 +238,10 @@ export function useChartData( // (e.g. interactivity → TTFT: "higher is better" → "lower is better"). // E2EL → TTFT keeps the same direction ("lower is better" for both), // so no roofline flip is needed for the e2e chart. + // Compare against `naturalX` (percentile-adjusted) — switching the + // percentile of the same logical metric is NOT a flip. const xAxisFlipped = - xAxisField !== chartDef.x && !(chartDef.chartType === 'e2e' && isTtftOverride); + xAxisField !== naturalX && !(chartDef.chartType === 'e2e' && isTtftOverride); const yLabelKey = `${selectedYAxisMetric}_label` as keyof ChartDefinition; const dynamicYLabel = chartDef[yLabelKey]; @@ -261,7 +269,7 @@ export function useChartData( xAxisField, }; }), - [selectedYAxisMetric, selectedXAxisMetric, selectedE2eXAxisMetric], + [selectedYAxisMetric, selectedXAxisMetric, selectedE2eXAxisMetric, selectedPercentile], ); // Build renderable graphs (data processing + stable chart definitions) diff --git a/packages/app/src/components/inference/types.ts b/packages/app/src/components/inference/types.ts index a23707ba..53c8d84c 100644 --- a/packages/app/src/components/inference/types.ts +++ b/packages/app/src/components/inference/types.ts @@ -88,6 +88,29 @@ export interface AggDataEntry { actualDate?: string; /** URL to the GitHub Actions workflow run that produced this data point. */ run_url?: string; + /** Benchmark scenario: `single_turn` (fixed-seq isl/osl) or `agentic_traces`. */ + benchmark_type?: string; + /** ISL in tokens — null for agentic_traces. */ + isl?: number | null; + /** OSL in tokens — null for agentic_traces. */ + osl?: number | null; + // ── Agentic-only fields (populated from metrics JSONB for `agentic_traces` rows) ── + /** "on" | "off" — whether KV cache offload to CPU was enabled. */ + offload_mode?: string; + /** Actual server-observed GPU prefix-cache hit rate (0..1). */ + server_gpu_cache_hit_rate?: number; + /** Actual server-observed CPU prefix-cache hit rate (0..1). */ + server_cpu_cache_hit_rate?: number; + /** Infinite-cache theoretical hit rate (0..1) computed from trace. */ + theoretical_cache_hit_rate?: number; + /** Total requests attempted during the window. */ + num_requests_total?: number; + /** Requests that completed successfully. */ + num_requests_successful?: number; + /** Total prompt tokens served. */ + total_prompt_tokens?: number; + /** Total generated (output) tokens. */ + total_generation_tokens?: number; } /** @@ -468,6 +491,9 @@ export interface InferenceChartContextType { workflowInfo: any; selectedYAxisMetric: string; setSelectedYAxisMetric: (metric: string) => void; + /** Latency percentile for the x-axis under agentic scenarios (median/p90/p99/p99.9). */ + selectedPercentile: string; + setSelectedPercentile: (p: string) => void; selectedXAxisMetric: string | null; setSelectedXAxisMetric: (metric: string | null) => void; selectedE2eXAxisMetric: string | null; diff --git a/packages/app/src/components/inference/ui/ChartControls.tsx b/packages/app/src/components/inference/ui/ChartControls.tsx index 5f8e7787..e4f55ad7 100644 --- a/packages/app/src/components/inference/ui/ChartControls.tsx +++ b/packages/app/src/components/inference/ui/ChartControls.tsx @@ -1,11 +1,14 @@ 'use client'; +import { useEffect, useState } from 'react'; + import { track } from '@/lib/analytics'; import { useInference } from '@/components/inference/InferenceContext'; import { ModelSelector, - SequenceSelector, + ScenarioSelector, + PercentileSelector, PrecisionSelector, } from '@/components/ui/chart-selectors'; import { DateRangePicker } from '@/components/ui/date-range-picker'; @@ -23,7 +26,7 @@ import { import { TooltipProvider } from '@/components/ui/tooltip'; import chartDefinitions from '@/components/inference/inference-chart-config.json'; import type { ChartDefinition } from '@/components/inference/types'; -import type { Model, Sequence } from '@/lib/data-mappings'; +import { Sequence, type Model, type Percentile } from '@/lib/data-mappings'; // Build Y-axis metric options from static chart config JSON — available immediately, no API wait const METRIC_GROUPS = [ @@ -78,6 +81,13 @@ interface ChartControlsProps { } export default function ChartControls({ hideGpuComparison = false }: ChartControlsProps) { + // The percentile selector is rendered conditionally on `selectedSequence`, + // which on the client is hydrated from URL params. SSR doesn't see the URL, + // so deferring the conditional until after mount keeps the initial DOM + // identical between server and client (avoids hydration warnings). + const [mounted, setMounted] = useState(false); + useEffect(() => setMounted(true), []); + const { selectedModel, setSelectedModel, @@ -87,6 +97,8 @@ export default function ChartControls({ hideGpuComparison = false }: ChartContro setSelectedPrecisions, selectedYAxisMetric, setSelectedYAxisMetric, + selectedPercentile, + setSelectedPercentile, graphs, selectedGPUs, setSelectedGPUs, @@ -203,12 +215,19 @@ export default function ChartControls({ hideGpuComparison = false }: ChartContro availableModels={availableModels} data-testid="model-selector" /> - + {mounted && selectedSequence === Sequence.AgenticTraces && ( + setSelectedPercentile(p)} + data-testid="percentile-selector" + /> + )} ('.dot-group').each(function (d) { + const onFrontier = optimalPointKeys.has(`${d.hwKey}_${d.precision}-${d.x}-${d.y}`); + const showHalo = onFrontier && d.offload_mode === 'on'; + d3.select(this) + .selectAll('.offload-halo') + .data(showHalo ? [true] : []) + .join('circle') + .attr('class', 'offload-halo') + .attr('r', POINT_SIZE + 4) + .attr('fill', 'none') + .attr('stroke', 'var(--foreground)') + .attr('stroke-width', 1.5) + .attr('stroke-dasharray', '3 2') + .attr('opacity', 0.9) + .attr('pointer-events', 'none'); + }); + // Double-click to track/untrack zoomGroup .selectAll('.dot-group') @@ -1567,6 +1585,9 @@ const ScatterGraph = React.memo( chartDefinition.chartType, xScaleConfig._isLog, yScaleConfig.type, + optimalPointKeys, + getCssColor, + resolveColor, ], ); diff --git a/packages/app/src/components/inference/utils/tooltipUtils.ts b/packages/app/src/components/inference/utils/tooltipUtils.ts index e88e9930..7391225e 100644 --- a/packages/app/src/components/inference/utils/tooltipUtils.ts +++ b/packages/app/src/components/inference/utils/tooltipUtils.ts @@ -88,6 +88,51 @@ const runLinkHTML = (runUrl?: string) => const tooltipLine = (label: string, value: string | number) => `
${label}: ${value}
`; +const formatPct = (v: number | undefined): string | null => + v === undefined || v === null || Number.isNaN(v) ? null : `${(v * 100).toFixed(1)}%`; + +/** + * Agentic-only tooltip rows: offload mode, KV cache hit rates, request + * success, token totals. Returns an empty string for non-agentic rows. + */ +const generateAgenticHTML = (d: InferenceData): string => { + if (d.benchmark_type !== 'agentic_traces') return ''; + + const parts: string[] = []; + if (d.offload_mode) { + parts.push(tooltipLine('Offload Mode', d.offload_mode.toUpperCase())); + } + + const gpuHit = formatPct(d.server_gpu_cache_hit_rate); + const cpuHit = formatPct(d.server_cpu_cache_hit_rate); + const theoHit = formatPct(d.theoretical_cache_hit_rate); + if (gpuHit) parts.push(tooltipLine('GPU Cache Hit Rate', gpuHit)); + if (cpuHit) parts.push(tooltipLine('CPU Cache Hit Rate', cpuHit)); + if (theoHit) parts.push(tooltipLine('Theoretical Cache Hit Rate', theoHit)); + + if (d.num_requests_total !== undefined && d.num_requests_successful !== undefined) { + const successPct = + d.num_requests_total > 0 + ? ` (${((d.num_requests_successful / d.num_requests_total) * 100).toFixed(0)}%)` + : ''; + parts.push( + tooltipLine( + 'Requests', + `${d.num_requests_successful} / ${d.num_requests_total}${successPct}`, + ), + ); + } + + if (d.total_prompt_tokens !== undefined) { + parts.push(tooltipLine('Prompt Tokens', formatNumber(d.total_prompt_tokens))); + } + if (d.total_generation_tokens !== undefined) { + parts.push(tooltipLine('Generated Tokens', formatNumber(d.total_generation_tokens))); + } + + return parts.join(''); +}; + /** * Generates HTML for the parallelism configuration section of a tooltip. * Falls back to GPU count for old data without parallelism fields. @@ -177,9 +222,10 @@ export const generateTooltipContent = (config: TooltipConfig): string => {
Concurrency: ${d.conc}
-
+
Precision: ${d.precision.toUpperCase()}
+ ${generateAgenticHTML(d)} ${runLinkHTML(runUrl)} ${ isPinned @@ -231,9 +277,10 @@ export const generateOverlayTooltipContent = (config: OverlayTooltipConfig): str
Concurrency: ${d.conc}
-
+
Precision: ${d.precision.toUpperCase()}
+ ${generateAgenticHTML(d)}
`; }; @@ -292,9 +339,10 @@ export const generateGPUGraphTooltipContent = (config: TooltipConfig): string =>
Concurrency: ${d.conc}
-
+
Precision: ${d.precision.toUpperCase()}
+ ${generateAgenticHTML(d)} ${runLinkHTML(runUrl)}
`; diff --git a/packages/app/src/components/ui/chart-selectors.tsx b/packages/app/src/components/ui/chart-selectors.tsx index 75e2f257..1c843e12 100644 --- a/packages/app/src/components/ui/chart-selectors.tsx +++ b/packages/app/src/components/ui/chart-selectors.tsx @@ -19,12 +19,16 @@ import { type Model, type Precision, type Sequence, + type Percentile, + PERCENTILE_OPTIONS, getModelCategory, getModelLabel, + getPercentileLabel, getPrecisionLabel, getSequenceCategory, getSequenceLabel, groupByCategory, + sequenceKind, } from '@/lib/data-mappings'; function DeprecatedLabel({ reason }: { reason: string }) { @@ -167,6 +171,126 @@ export function SequenceSelector({ ); } +interface ScenarioSelectorProps { + id?: string; + value: string; + onChange: (value: Sequence) => void; + availableSequences: string[]; + 'data-testid'?: string; +} + +/** + * Scenario selector — fixed-seq-len rows grouped under "Fixed Sequence Length", + * agentic-trace rows rendered flat below. Label is "Scenario" (the ISL/OSL + * framing only applies to the fixed-seq subset). + */ +export function ScenarioSelector({ + id = 'scenario-select', + value, + onChange, + availableSequences, + 'data-testid': testId, +}: ScenarioSelectorProps) { + const fixedSeq = availableSequences.filter((s) => sequenceKind(s as Sequence) === 'fixed-seq'); + const agentic = availableSequences.filter((s) => sequenceKind(s as Sequence) === 'agentic'); + const fixedGroups = groupByCategory(fixedSeq, (s) => getSequenceCategory(s as Sequence)); + + return ( +
+ + +
+ ); +} + +interface PercentileSelectorProps { + id?: string; + value: string; + onChange: (value: Percentile) => void; + 'data-testid'?: string; +} + +/** + * Latency percentile selector for agentic-trace charts. The selected value + * rewrites the chart x-axis metric from `median_*` to `{percentile}_*`, so + * picking p99 plots p99 e2e latency / interactivity instead of the median. + */ +export function PercentileSelector({ + id = 'percentile-select', + value, + onChange, + 'data-testid': testId, +}: PercentileSelectorProps) { + return ( +
+ + +
+ ); +} + interface PrecisionSelectorProps { id?: string; value: string[]; diff --git a/packages/app/src/components/unofficial-run-provider.test.ts b/packages/app/src/components/unofficial-run-provider.test.ts index f4263d2c..05b522c5 100644 --- a/packages/app/src/components/unofficial-run-provider.test.ts +++ b/packages/app/src/components/unofficial-run-provider.test.ts @@ -29,6 +29,8 @@ function stubRow(overrides: Partial = {}): BenchmarkRow { decode_num_workers: 0, num_prefill_gpu: 8, num_decode_gpu: 8, + benchmark_type: 'single_turn', + offload_mode: 'off', isl: 1024, osl: 1024, conc: 128, diff --git a/packages/app/src/components/unofficial-run-provider.tsx b/packages/app/src/components/unofficial-run-provider.tsx index 2dccdf7f..42530a51 100644 --- a/packages/app/src/components/unofficial-run-provider.tsx +++ b/packages/app/src/components/unofficial-run-provider.tsx @@ -12,7 +12,7 @@ import { import type { ChartDefinition, HardwareConfig, InferenceData } from '@/components/inference/types'; import { UnofficialBanner } from '@/components/ui/unofficial-banner'; -import { DB_MODEL_TO_DISPLAY, islOslToSequence } from '@semianalysisai/inferencex-constants'; +import { DB_MODEL_TO_DISPLAY, rowToSequence } from '@semianalysisai/inferencex-constants'; import { computeToggle } from '@/hooks/useTogglableSet'; import type { BenchmarkRow, EvalRow } from '@/lib/api'; import { normalizeEvalHardwareKey } from '@/lib/chart-utils'; @@ -93,7 +93,7 @@ export function buildChartData(benchmarks: BenchmarkRow[]): UnofficialChartData const groups = new Map(); for (const row of benchmarks) { const displayModel = DB_MODEL_TO_DISPLAY[row.model] ?? row.model; - const sequence = islOslToSequence(row.isl, row.osl); + const sequence = rowToSequence(row); if (!sequence) continue; const key = `${displayModel}_${sequence}`; if (!groups.has(key)) groups.set(key, []); diff --git a/packages/app/src/lib/api.ts b/packages/app/src/lib/api.ts index 11ba4521..240251c3 100644 --- a/packages/app/src/lib/api.ts +++ b/packages/app/src/lib/api.ts @@ -23,9 +23,13 @@ export interface BenchmarkRow { decode_num_workers: number; num_prefill_gpu: number; num_decode_gpu: number; - isl: number; - osl: number; + benchmark_type: string; + // Null for agentic_traces rows; numeric for single_turn fixed-seq rows. + isl: number | null; + osl: number | null; conc: number; + /** KV-cache offload mode: 'on' | 'off'. Defaults to 'off' for fixed-seq. */ + offload_mode: string; image: string | null; metrics: Record; date: string; @@ -140,13 +144,15 @@ export function fetchWorkflowInfo(date: string, signal?: AbortSignal) { export interface AvailabilityRow { model: string; - isl: number; - osl: number; + // Null for agentic_traces rows; numeric for single_turn fixed-seq rows. + isl: number | null; + osl: number | null; precision: string; hardware: string; framework: string; spec_method: string; disagg: boolean; + benchmark_type: string; date: string; } diff --git a/packages/app/src/lib/benchmark-transform.test.ts b/packages/app/src/lib/benchmark-transform.test.ts index be76438e..6a6c97c8 100644 --- a/packages/app/src/lib/benchmark-transform.test.ts +++ b/packages/app/src/lib/benchmark-transform.test.ts @@ -23,6 +23,8 @@ function makeRow(overrides: Partial = {}): BenchmarkRow { decode_num_workers: 0, num_prefill_gpu: 8, num_decode_gpu: 8, + benchmark_type: 'single_turn', + offload_mode: 'off', isl: 1024, osl: 1024, conc: 64, diff --git a/packages/app/src/lib/benchmark-transform.ts b/packages/app/src/lib/benchmark-transform.ts index 107f0b12..69745da2 100644 --- a/packages/app/src/lib/benchmark-transform.ts +++ b/packages/app/src/lib/benchmark-transform.ts @@ -15,9 +15,39 @@ import { createChartDataPoint, getHardwareKey } from '@/lib/chart-utils'; import { getHardwareConfig } from '@/lib/constants'; import type { BenchmarkRow } from '@/lib/api'; +/** + * Agentic trace-replay runs (`benchmark_type === 'agentic_traces'`) emit ttft/ttlt/itl + * but not the intvty/e2el/tpot keys the chart pipeline expects. Bridge them here: + * e2el ≡ ttlt (time-to-last-token == end-to-end latency) + * tpot ≡ itl (time-per-output-token == inter-token-latency for single-output) + * intvty ≡ 1/itl (tok/s from the user's perspective) + * Existing fields win if present; we only fill in the gaps. + */ +function agenticAliases(m: Record): Record { + const out: Record = {}; + for (const suffix of ['mean', 'median', 'p90', 'p99']) { + const itl = m[`${suffix}_itl`]; + const ttlt = m[`${suffix}_ttlt`]; + if (m[`${suffix}_e2el`] === undefined && ttlt !== undefined) out[`${suffix}_e2el`] = ttlt; + if (m[`${suffix}_tpot`] === undefined && itl !== undefined) out[`${suffix}_tpot`] = itl; + if (m[`${suffix}_intvty`] === undefined && itl !== undefined && itl > 0) { + out[`${suffix}_intvty`] = 1 / itl; + } + } + return out; +} + /** Convert a DB benchmark row to an AggDataEntry. */ export function rowToAggDataEntry(row: BenchmarkRow): AggDataEntry { - const m = row.metrics; + const isAgentic = row.benchmark_type === 'agentic_traces'; + const m = isAgentic ? { ...row.metrics, ...agenticAliases(row.metrics) } : row.metrics; + // Prefer the dedicated column (added in migration 004); fall back to the + // legacy stash inside `metrics` for any rows ingested before that column + // existed. + const rawMetrics = row.metrics as Record; + const offloadMode = + row.offload_mode ?? + (typeof rawMetrics.offload_mode === 'string' ? rawMetrics.offload_mode : undefined); return { hw: row.hardware, framework: row.framework, @@ -68,6 +98,17 @@ export function rowToAggDataEntry(row: BenchmarkRow): AggDataEntry { date: row.date, actualDate: (row as any).actualDate ?? row.date, run_url: row.run_url ?? undefined, + benchmark_type: row.benchmark_type, + isl: row.isl, + osl: row.osl, + offload_mode: offloadMode, + server_gpu_cache_hit_rate: m.server_gpu_cache_hit_rate, + server_cpu_cache_hit_rate: m.server_cpu_cache_hit_rate, + theoretical_cache_hit_rate: m.theoretical_cache_hit_rate, + num_requests_total: m.num_requests_total, + num_requests_successful: m.num_requests_successful, + total_prompt_tokens: m.total_prompt_tokens, + total_generation_tokens: m.total_generation_tokens, }; } @@ -77,13 +118,30 @@ interface PreparedEntry { date: string; } +/** + * Rewrite a chart x-axis key to use a different latency percentile prefix + * (`median_` → `p99_` etc). Only touches keys that start with a known + * percentile prefix; leaves everything else alone. + */ +export function withPercentile(key: string, percentile: string): string { + return key.replace(/^(mean|median|p90|p99|p99\.9)_/, `${percentile}_`); +} + /** * Transform raw BenchmarkRow[] into chart-ready InferenceData[][] and HardwareConfig. * Returns one InferenceData[] per chart definition (e2e, interactivity). * * Converts rows to AggDataEntry once, then reuses for each chart definition. + * + * @param percentile Optional latency percentile for the chart x-axis + * (default 'median'). Swaps `median_intvty`/`median_e2el` in the chart + * definition for the chosen percentile — only agentic rows carry the + * full set (median/p90/p99/p99.9) so this mainly affects that scenario. */ -export function transformBenchmarkRows(rows: BenchmarkRow[]): { +export function transformBenchmarkRows( + rows: BenchmarkRow[], + percentile = 'median', +): { chartData: InferenceData[][]; hardwareConfig: HardwareConfig; } { @@ -109,13 +167,14 @@ export function transformBenchmarkRows(rows: BenchmarkRow[]): { // Phase 2: Build chart data per chart definition (reusing prepared entries) const chartData = (chartDefinitions as ChartDefinition[]).map((chartDef) => { + const xKey = withPercentile(chartDef.x, percentile); const groupedByHw: Record = {}; for (const { entry, hwKey, date } of prepared) { const dataPoint = createChartDataPoint( date, entry, - chartDef.x as keyof AggDataEntry, + xKey as keyof AggDataEntry, chartDef.y as keyof AggDataEntry, hwKey, ); diff --git a/packages/app/src/lib/data-mappings.ts b/packages/app/src/lib/data-mappings.ts index 823b6823..8900f50e 100644 --- a/packages/app/src/lib/data-mappings.ts +++ b/packages/app/src/lib/data-mappings.ts @@ -102,17 +102,77 @@ export enum Sequence { OneK_OneK = '1k/1k', OneK_EightK = '1k/8k', EightK_OneK = '8k/1k', + AgenticTraces = 'agentic-traces', } -const SEQUENCE_CONFIG: Record = - { - [Sequence.OneK_OneK]: { label: '1K / 1K', compact: '1k1k', category: 'default' }, - [Sequence.OneK_EightK]: { label: '1K / 8K', compact: '1k8k', category: 'deprecated' }, - [Sequence.EightK_OneK]: { label: '8K / 1K', compact: '8k1k', category: 'default' }, - }; +/** + * Top-level scenario kind. Fixed-seq sequences cluster under a single group + * in the selector; agentic traces sit alongside as their own kind. + */ +export type ScenarioKind = 'fixed-seq' | 'agentic'; + +export function sequenceKind(seq: Sequence): ScenarioKind { + return seq === Sequence.AgenticTraces ? 'agentic' : 'fixed-seq'; +} + +const SEQUENCE_CONFIG: Record< + Sequence, + { label: string; compact: string; category: CategoryTag; kind: ScenarioKind } +> = { + [Sequence.OneK_OneK]: { + label: '1K / 1K', + compact: '1k1k', + category: 'default', + kind: 'fixed-seq', + }, + [Sequence.OneK_EightK]: { + label: '1K / 8K', + compact: '1k8k', + category: 'deprecated', + kind: 'fixed-seq', + }, + [Sequence.EightK_OneK]: { + label: '8K / 1K', + compact: '8k1k', + category: 'default', + kind: 'fixed-seq', + }, + [Sequence.AgenticTraces]: { + label: 'Agentic Traces', + compact: 'agentic', + category: 'default', + kind: 'agentic', + }, +}; export const SEQUENCE_OPTIONS = Object.keys(SEQUENCE_CONFIG) as Sequence[]; +/** + * Percentile of the latency distribution used for the chart x-axis when + * viewing agentic traces. Agentic rows carry median/p90/p99/p99.9 variants + * for ttft, ttlt (=e2el), and itl (and intvty derived from itl) — pick which + * slice to plot. + */ +export enum Percentile { + Median = 'median', + P90 = 'p90', + P99 = 'p99', + P99_9 = 'p99.9', +} + +const PERCENTILE_CONFIG: Record = { + [Percentile.Median]: { label: 'p50 (median)' }, + [Percentile.P90]: { label: 'p90' }, + [Percentile.P99]: { label: 'p99' }, + [Percentile.P99_9]: { label: 'p99.9' }, +}; + +export const PERCENTILE_OPTIONS = Object.keys(PERCENTILE_CONFIG) as Percentile[]; + +export function getPercentileLabel(p: Percentile): string { + return PERCENTILE_CONFIG[p]?.label ?? p; +} + export const DEPRECATED_SEQUENCES: ReadonlySet = new Set( (Object.entries(SEQUENCE_CONFIG) as [Sequence, (typeof SEQUENCE_CONFIG)[Sequence]][]) .filter(([, c]) => c.category === 'deprecated') diff --git a/packages/app/src/lib/url-state.ts b/packages/app/src/lib/url-state.ts index 3947488f..fb2e9d70 100644 --- a/packages/app/src/lib/url-state.ts +++ b/packages/app/src/lib/url-state.ts @@ -22,6 +22,7 @@ const URL_STATE_KEYS = [ 'i_seq', 'i_prec', 'i_metric', + 'i_pctl', 'i_xmetric', 'i_e2e_xmetric', 'i_scale', @@ -61,6 +62,7 @@ export const PARAM_DEFAULTS: Record = { i_seq: '8k/1k', i_prec: 'fp4', i_metric: 'y_tpPerGpu', + i_pctl: 'median', i_xmetric: 'p99_ttft', i_e2e_xmetric: '', i_scale: 'auto', diff --git a/packages/constants/src/models.ts b/packages/constants/src/models.ts index 6d646f08..d9a3d2d1 100644 --- a/packages/constants/src/models.ts +++ b/packages/constants/src/models.ts @@ -53,3 +53,20 @@ export function islOslToSequence(isl: number, osl: number): string | null { }; return map[`${isl}_${osl}`] ?? null; } + +/** + * Map a benchmark/availability row to its sequence (scenario) string. + * - `agentic_traces` rows map to `'agentic-traces'` regardless of isl/osl. + * - Other rows (today: `single_turn`) fall back to `islOslToSequence`. + * Returns `null` for rows that can't be classified (e.g. `single_turn` with + * unmapped isl/osl values). + */ +export function rowToSequence(row: { + isl: number | null; + osl: number | null; + benchmark_type: string; +}): string | null { + if (row.benchmark_type === 'agentic_traces') return 'agentic-traces'; + if (row.isl === null || row.osl === null) return null; + return islOslToSequence(row.isl, row.osl); +} diff --git a/packages/db/migrations/002_agentic_scenario.sql b/packages/db/migrations/002_agentic_scenario.sql new file mode 100644 index 00000000..c143914e --- /dev/null +++ b/packages/db/migrations/002_agentic_scenario.sql @@ -0,0 +1,30 @@ +-- Support agentic scenarios in benchmark_results. +-- +-- Scenarios are discriminated by benchmark_type: +-- 'single_turn' — fixed-seq-len runs (1k1k, 1k8k, 8k1k, …). isl/osl set. +-- 'agentic_traces' — trace-replay agentic runs. isl/osl NULL. +-- +-- conc retains its meaning (concurrent users/requests) for both. + +-- 1) isl/osl become nullable for agentic rows +alter table benchmark_results + alter column isl drop not null, + alter column osl drop not null; + +-- 2) CHECK constraints: positive-or-null +alter table benchmark_results + drop constraint benchmark_results_isl_positive, + drop constraint benchmark_results_osl_positive; + +alter table benchmark_results + add constraint benchmark_results_isl_positive check (isl is null or isl > 0), + add constraint benchmark_results_osl_positive check (osl is null or osl > 0); + +-- 3) Uniqueness must treat (NULL, NULL) pairs as equal so agentic rows +-- can't duplicate on (workflow_run_id, config_id, benchmark_type, conc). +alter table benchmark_results + drop constraint benchmark_results_unique; + +alter table benchmark_results + add constraint benchmark_results_unique unique nulls not distinct + (workflow_run_id, config_id, benchmark_type, isl, osl, conc); diff --git a/packages/db/migrations/003_agentic_availability.sql b/packages/db/migrations/003_agentic_availability.sql new file mode 100644 index 00000000..e96cbd50 --- /dev/null +++ b/packages/db/migrations/003_agentic_availability.sql @@ -0,0 +1,21 @@ +-- Extend the availability table to cover agentic scenarios. +-- +-- The 002 migration relaxed benchmark_results.isl/osl to nullable; do the same +-- for availability and add benchmark_type so the frontend can enumerate +-- agentic vs single_turn scenarios per model/date. +-- +-- Postgres primary keys require every column to be NOT NULL, so we drop the PK +-- and replace it with a UNIQUE NULLS NOT DISTINCT constraint — functionally +-- equivalent except it allows isl/osl to be NULL for agentic rows. + +alter table availability + drop constraint availability_pkey; + +alter table availability + alter column isl drop not null, + alter column osl drop not null, + add column benchmark_type text not null default 'single_turn'; + +alter table availability + add constraint availability_natural_key unique nulls not distinct + (model, isl, osl, precision, hardware, framework, spec_method, disagg, benchmark_type, date); diff --git a/packages/db/migrations/004_offload_mode.sql b/packages/db/migrations/004_offload_mode.sql new file mode 100644 index 00000000..24b617f1 --- /dev/null +++ b/packages/db/migrations/004_offload_mode.sql @@ -0,0 +1,42 @@ +-- Add offload_mode as a first-class dimension on benchmark_results. +-- +-- KV-cache offload (on/off) is a meaningful sweep dimension for agentic-trace +-- runs: a single run may emit two rows for the same (config, isl, osl, conc) +-- — one with offload disabled, one enabled. The pre-existing unique key +-- collapsed those into one row, forcing the ingest to skip variants. +-- +-- For fixed-seq runs `offload_mode` defaults to 'off', which matches the +-- assumption baked into the existing 5,500+ rows. + +alter table benchmark_results + add column offload_mode text not null default 'off'; + +-- Backfill agentic rows from the offload_mode value already living in metrics +-- JSONB (set during the earlier agentic ingest backfill). +update benchmark_results + set offload_mode = metrics->>'offload_mode' + where benchmark_type = 'agentic_traces' + and metrics ? 'offload_mode'; + +-- Replace the unique constraint so on/off variants can coexist. +alter table benchmark_results + drop constraint benchmark_results_unique; + +alter table benchmark_results + add constraint benchmark_results_unique unique nulls not distinct + (workflow_run_id, config_id, benchmark_type, isl, osl, conc, offload_mode); + +-- Rebuild the latest-per-config materialized view to dedupe by offload_mode too. +drop materialized view if exists latest_benchmarks cascade; + +create materialized view latest_benchmarks as +select distinct on (br.config_id, br.conc, br.isl, br.osl, br.offload_mode) + br.* +from benchmark_results br +join latest_workflow_runs wr on wr.id = br.workflow_run_id +where br.error is null +order by br.config_id, br.conc, br.isl, br.osl, br.offload_mode, br.date desc; + +create unique index latest_benchmarks_pk + on latest_benchmarks (config_id, conc, isl, osl, offload_mode) nulls not distinct; +create index latest_benchmarks_model_idx on latest_benchmarks (config_id); diff --git a/packages/db/src/etl/benchmark-ingest.ts b/packages/db/src/etl/benchmark-ingest.ts index 67173c64..ea802d3f 100644 --- a/packages/db/src/etl/benchmark-ingest.ts +++ b/packages/db/src/etl/benchmark-ingest.ts @@ -29,12 +29,19 @@ export async function bulkIngestBenchmarkRows( // Postgres rejects ON CONFLICT DO UPDATE if the same conflict key appears // more than once in a single batch. Deduplicate within the batch, keeping - // the last occurrence (last metrics for each unique config/isl/osl/conc). + // the last occurrence (last metrics for each unique config/benchmark_type/isl/osl/conc/offload_mode). const seen = new Map(); - for (const r of rows) seen.set(`${r.configId}-${r.isl}-${r.osl}-${r.conc}`, r); + for (const r of rows) { + seen.set( + `${r.configId}-${r.benchmarkType}-${r.isl ?? ''}-${r.osl ?? ''}-${r.conc}-${r.offloadMode}`, + r, + ); + } const deduped = [...seen.values()]; const configIds = deduped.map((r) => r.configId); + const benchmarkTypes = deduped.map((r) => r.benchmarkType); + const offloadModes = deduped.map((r) => r.offloadMode); const isls = deduped.map((r) => r.isl); const osls = deduped.map((r) => r.osl); const concs = deduped.map((r) => r.conc); @@ -43,20 +50,21 @@ export async function bulkIngestBenchmarkRows( const result = await sql<{ inserted: boolean; id: number }[]>` insert into benchmark_results ( - workflow_run_id, config_id, benchmark_type, date, + workflow_run_id, config_id, benchmark_type, offload_mode, date, isl, osl, conc, image, metrics ) select ${workflowRunId}, unnest(${sql.array(configIds)}::int[]), - 'single_turn', + unnest(${sql.array(benchmarkTypes)}::text[]), + unnest(${sql.array(offloadModes)}::text[]), ${date}::date, unnest(${sql.array(isls)}::int[]), unnest(${sql.array(osls)}::int[]), unnest(${sql.array(concs)}::int[]), unnest(${sql.array(images)}), unnest(${sql.array(metricsJsons)}::jsonb[]) - on conflict (workflow_run_id, config_id, benchmark_type, isl, osl, conc) + on conflict (workflow_run_id, config_id, benchmark_type, isl, osl, conc, offload_mode) do update set metrics = excluded.metrics, image = excluded.image @@ -147,13 +155,14 @@ export async function bulkUpsertAvailability( sql: Sql, rows: { model: string; - isl: number; - osl: number; + isl: number | null; + osl: number | null; precision: string; hardware: string; framework: string; specMethod: string; disagg: boolean; + benchmarkType: string; }[], date: string, ): Promise { @@ -162,7 +171,7 @@ export async function bulkUpsertAvailability( const seen = new Set(); const unique: typeof rows = []; for (const r of rows) { - const key = `${r.model}|${r.isl}|${r.osl}|${r.precision}|${r.hardware}|${r.framework}|${r.specMethod}|${r.disagg}|${date}`; + const key = `${r.model}|${r.isl ?? ''}|${r.osl ?? ''}|${r.precision}|${r.hardware}|${r.framework}|${r.specMethod}|${r.disagg}|${r.benchmarkType}|${date}`; if (!seen.has(key)) { seen.add(key); unique.push(r); @@ -170,7 +179,7 @@ export async function bulkUpsertAvailability( } await sql` - insert into availability (model, isl, osl, precision, hardware, framework, spec_method, disagg, date) + insert into availability (model, isl, osl, precision, hardware, framework, spec_method, disagg, benchmark_type, date) select unnest(${sql.array(unique.map((r) => r.model))}::text[]), unnest(${sql.array(unique.map((r) => r.isl))}::int[]), @@ -180,6 +189,7 @@ export async function bulkUpsertAvailability( unnest(${sql.array(unique.map((r) => r.framework))}::text[]), unnest(${sql.array(unique.map((r) => r.specMethod))}::text[]), unnest(${sql.array(unique.map((r) => r.disagg))}::bool[]), + unnest(${sql.array(unique.map((r) => r.benchmarkType))}::text[]), ${date}::date on conflict do nothing `; diff --git a/packages/db/src/etl/benchmark-mapper.ts b/packages/db/src/etl/benchmark-mapper.ts index 7d78e175..5b120843 100644 --- a/packages/db/src/etl/benchmark-mapper.ts +++ b/packages/db/src/etl/benchmark-mapper.ts @@ -57,8 +57,21 @@ const NON_METRIC_KEYS = new Set([ 'decode_num_workers', 'num_prefill_gpu', 'num_decode_gpu', + // agentic scenario + 'scenario_type', + 'users', + 'offload_mode', + 'num_requests_total', + 'num_requests_successful', ]); +/** + * `benchmark_type` values understood by the ingest. + * - `single_turn` — fixed sequence-length runs (isl/osl set). + * - `agentic_traces` — trace-replay agentic runs (isl/osl null, `users` → conc). + */ +export type BenchmarkType = 'single_turn' | 'agentic_traces'; + /** * METRIC_KEYS from constants is the canonical set of known metric keys. * Any numeric field outside this set and `NON_METRIC_KEYS` is auto-captured @@ -70,9 +83,13 @@ const _warnedMetricKeys = new Set(); export interface BenchmarkParams { config: ConfigParams; - isl: number; - osl: number; + benchmarkType: BenchmarkType; + // Null for agentic_traces; present for single_turn. + isl: number | null; + osl: number | null; conc: number; + /** 'on' | 'off' — KV cache offload to CPU. Defaults to 'off'. */ + offloadMode: string; image: string | null; metrics: Record; } @@ -114,10 +131,15 @@ export function mapBenchmarkRow( return null; } - const isl = parseInt2(row.isl) ?? islOslFallback?.isl; - const osl = parseInt2(row.osl) ?? islOslFallback?.osl; - const conc = parseInt2(row.conc); - if (!isl || !osl || !conc) { + // Agentic-trace runs emit `scenario_type: 'agentic-coding'` (and variants), + // no isl/osl, and `users` instead of `conc`. Everything else stays as-is. + const isAgentic = String(row.scenario_type ?? '').startsWith('agentic'); + const benchmarkType: BenchmarkType = isAgentic ? 'agentic_traces' : 'single_turn'; + + const isl = isAgentic ? null : (parseInt2(row.isl) ?? islOslFallback?.isl ?? null); + const osl = isAgentic ? null : (parseInt2(row.osl) ?? islOslFallback?.osl ?? null); + const conc = isAgentic ? parseInt2(row.users) : parseInt2(row.conc); + if (!conc || (!isAgentic && (!isl || !osl))) { tracker.skips.noIslOsl++; return null; } @@ -182,6 +204,12 @@ export function mapBenchmarkRow( } } + // Agentic rows emit `offload_mode: "on" | "off"` as a string — preserve it + // as a stringified metric so the frontend can expose it in tooltips. + if (isAgentic && typeof row.offload_mode === 'string') { + (metrics as Record).offload_mode = row.offload_mode; + } + // Artifact names encode '/' as '#' to avoid path separators; restore the URI. const image = row.image ? String(row.image).replaceAll('#', '/') : null; @@ -205,9 +233,14 @@ export function mapBenchmarkRow( numPrefillGpu, numDecodeGpu, }, + benchmarkType, isl, osl, conc, + offloadMode: + typeof row.offload_mode === 'string' && row.offload_mode.length > 0 + ? row.offload_mode + : 'off', image, metrics, }; diff --git a/packages/db/src/ingest-ci-run.ts b/packages/db/src/ingest-ci-run.ts index 14c7b4d0..8cce43ca 100644 --- a/packages/db/src/ingest-ci-run.ts +++ b/packages/db/src/ingest-ci-run.ts @@ -248,13 +248,14 @@ async function main(): Promise { const availRows: { model: string; - isl: number; - osl: number; + isl: number | null; + osl: number | null; precision: string; hardware: string; framework: string; specMethod: string; disagg: boolean; + benchmarkType: string; }[] = []; let totalNewBmk = 0, @@ -367,6 +368,7 @@ async function main(): Promise { framework: r.config.framework, specMethod: r.config.specMethod, disagg: r.config.disagg, + benchmarkType: r.benchmarkType, }); } diff --git a/packages/db/src/ingest-gcs-backup.ts b/packages/db/src/ingest-gcs-backup.ts index e20278d6..6dc604e9 100644 --- a/packages/db/src/ingest-gcs-backup.ts +++ b/packages/db/src/ingest-gcs-backup.ts @@ -596,13 +596,14 @@ async function main(): Promise { // Upsert availability rows only for successfully resolved configs const availRows: { model: string; - isl: number; - osl: number; + isl: number | null; + osl: number | null; precision: string; hardware: string; framework: string; specMethod: string; disagg: boolean; + benchmarkType: string; }[] = []; for (const r of allInserted) { availRows.push({ @@ -614,6 +615,7 @@ async function main(): Promise { framework: r.config.framework, specMethod: r.config.specMethod, disagg: r.config.disagg, + benchmarkType: r.benchmarkType, }); } if (availRows.length > 0) { diff --git a/packages/db/src/ingest-supplemental.ts b/packages/db/src/ingest-supplemental.ts index 1e494e9f..43aae047 100644 --- a/packages/db/src/ingest-supplemental.ts +++ b/packages/db/src/ingest-supplemental.ts @@ -219,8 +219,10 @@ async function ingestSupplementalBmk( const rows: { configId: number; - isl: number; - osl: number; + benchmarkType: 'single_turn' | 'agentic_traces'; + offloadMode: string; + isl: number | null; + osl: number | null; conc: number; image: string | null; metrics: Record; @@ -271,6 +273,8 @@ async function ingestSupplementalBmk( rows.push({ configId, + benchmarkType: 'single_turn', + offloadMode: 'off', isl: entry.isl, osl: entry.osl, conc: entry.conc, @@ -294,13 +298,14 @@ async function ingestSupplementalBmk( // to `rows` are exactly the valid ones. const availRows: { model: string; - isl: number; - osl: number; + isl: number | null; + osl: number | null; precision: string; hardware: string; framework: string; specMethod: string; disagg: boolean; + benchmarkType: string; }[] = []; for (const entry of entries) { const modelKey = resolveModelKey({ model: entry.model, infmax_model_prefix: undefined }); @@ -317,6 +322,7 @@ async function ingestSupplementalBmk( framework, specMethod, disagg, + benchmarkType: 'single_turn', }); } if (availRows.length > 0) { diff --git a/packages/db/src/json-provider.ts b/packages/db/src/json-provider.ts index 0d9373d3..f09a2686 100644 --- a/packages/db/src/json-provider.ts +++ b/packages/db/src/json-provider.ts @@ -290,6 +290,8 @@ function toBenchmarkRow( decode_num_workers: c.decode_num_workers, num_prefill_gpu: c.num_prefill_gpu, num_decode_gpu: c.num_decode_gpu, + benchmark_type: br.benchmark_type ?? 'single_turn', + offload_mode: (br as { offload_mode?: string }).offload_mode ?? 'off', isl: br.isl, osl: br.osl, conc: br.conc, @@ -410,7 +412,11 @@ export function getAvailabilityData(): AvailabilityRow[] { for (const a of s.availability) { const key = `${a.model}|${a.hardware}|${a.framework}|${a.precision}|${a.isl}|${a.osl}|${toDateString(a.date)}`; if (validKeys.has(key)) { - rows.push({ ...a, date: toDateString(a.date) }); + rows.push({ + ...a, + benchmark_type: (a as { benchmark_type?: string }).benchmark_type ?? 'single_turn', + date: toDateString(a.date), + }); } } diff --git a/packages/db/src/queries/benchmarks.ts b/packages/db/src/queries/benchmarks.ts index 1c30b1fd..74e20380 100644 --- a/packages/db/src/queries/benchmarks.ts +++ b/packages/db/src/queries/benchmarks.ts @@ -18,9 +18,13 @@ export interface BenchmarkRow { decode_num_workers: number; num_prefill_gpu: number; num_decode_gpu: number; - isl: number; - osl: number; + benchmark_type: string; + // Null for agentic_traces; numeric for single_turn fixed-seq runs. + isl: number | null; + osl: number | null; conc: number; + /** KV-cache offload mode: 'on' | 'off'. Defaults to 'off' for fixed-seq. */ + offload_mode: string; image: string | null; metrics: Record; date: string; @@ -68,6 +72,8 @@ export async function getLatestBenchmarks( c.decode_num_workers, c.num_prefill_gpu, c.num_decode_gpu, + br.benchmark_type, + br.offload_mode, br.isl, br.osl, br.conc, @@ -106,6 +112,8 @@ export async function getLatestBenchmarks( c.decode_num_workers, c.num_prefill_gpu, c.num_decode_gpu, + lb.benchmark_type, + lb.offload_mode, lb.isl, lb.osl, lb.conc, @@ -153,6 +161,7 @@ export async function getAllBenchmarksForHistory( c.decode_num_workers, c.num_prefill_gpu, c.num_decode_gpu, + br.benchmark_type, br.isl, br.osl, br.conc, diff --git a/packages/db/src/queries/workflow-info.ts b/packages/db/src/queries/workflow-info.ts index b4e4f255..d5e2d933 100644 --- a/packages/db/src/queries/workflow-info.ts +++ b/packages/db/src/queries/workflow-info.ts @@ -88,20 +88,22 @@ export async function getDateConfigs(sql: DbClient, date: string): Promise { const rows = await sql` - SELECT a.model, a.isl, a.osl, a.precision, a.hardware, a.framework, a.spec_method, a.disagg, a.date::text + SELECT a.model, a.isl, a.osl, a.precision, a.hardware, a.framework, a.spec_method, a.disagg, a.benchmark_type, a.date::text FROM availability a WHERE EXISTS ( SELECT 1 @@ -112,8 +114,9 @@ export async function getAvailabilityData(sql: DbClient): Promise Date: Thu, 30 Apr 2026 19:01:56 -0500 Subject: [PATCH 02/41] =?UTF-8?q?fix:=20agentic=20offload=20variants=20?= =?UTF-8?q?=E2=80=94=20render=20both=20halos=20+=20map=20renamed=20fields?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - ScatterGraph: include `offload_mode` in `buildPointConfigId` so d3's data join keeps both `on` and `off` variants for the same (config, conc). Without it, the second variant collapsed onto the first key, so FP8 offload-on points (and their halos) silently disappeared. - benchmark-mapper: handle older artifacts that emit `users`/`offload_mode` AND newer ones that emit `conc`/`offloading` (with 'none' → 'off' mapping). Co-Authored-By: Claude Opus 4.7 (1M context) --- .../components/inference/ui/ScatterGraph.tsx | 4 +++ packages/db/src/etl/benchmark-mapper.ts | 27 ++++++++++++------- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/packages/app/src/components/inference/ui/ScatterGraph.tsx b/packages/app/src/components/inference/ui/ScatterGraph.tsx index 15bb60f0..55a206ce 100644 --- a/packages/app/src/components/inference/ui/ScatterGraph.tsx +++ b/packages/app/src/components/inference/ui/ScatterGraph.tsx @@ -295,6 +295,10 @@ const ScatterGraph = React.memo( const buildPointConfigId = useCallback((point: InferenceData): string => { let key = `${point.hwKey}|${point.precision}|${point.tp}|${point.conc}|${point.decode_ep ?? 0}|${point.prefill_tp ?? 0}|${point.prefill_ep ?? 0}`; if (point.disagg) key += `|disagg|${point.num_prefill_gpu ?? 0}|${point.num_decode_gpu ?? 0}`; + // Agentic runs emit two rows per (config, conc) — one offload=on, one off. + // Without this suffix, d3's data join treats them as the same point and + // drops one variant (along with its halo). + if (point.offload_mode) key += `|offload-${point.offload_mode}`; return key; }, []); diff --git a/packages/db/src/etl/benchmark-mapper.ts b/packages/db/src/etl/benchmark-mapper.ts index 5b120843..d842276e 100644 --- a/packages/db/src/etl/benchmark-mapper.ts +++ b/packages/db/src/etl/benchmark-mapper.ts @@ -138,12 +138,24 @@ export function mapBenchmarkRow( const isl = isAgentic ? null : (parseInt2(row.isl) ?? islOslFallback?.isl ?? null); const osl = isAgentic ? null : (parseInt2(row.osl) ?? islOslFallback?.osl ?? null); - const conc = isAgentic ? parseInt2(row.users) : parseInt2(row.conc); + // Agentic artifacts encode concurrency as `users` in older schemas and `conc` in newer ones. + const conc = isAgentic ? (parseInt2(row.users) ?? parseInt2(row.conc)) : parseInt2(row.conc); if (!conc || (!isAgentic && (!isl || !osl))) { tracker.skips.noIslOsl++; return null; } + // Agentic offload signal: prefer `offload_mode` ('on'|'off'), fall back to `offloading` + // ('none' → 'off'; any other non-empty value → 'on'). + const offloadModeRaw = + typeof row.offload_mode === 'string' && row.offload_mode.length > 0 + ? row.offload_mode + : typeof row.offloading === 'string' && row.offloading.length > 0 + ? row.offloading === 'none' + ? 'off' + : 'on' + : 'off'; + const { framework, disagg } = normalizeFramework(String(row.framework ?? ''), row.disagg); const isMultinode = parseBool(row.is_multinode); const precision = normalizePrecision(String(row.precision ?? '')); @@ -204,10 +216,10 @@ export function mapBenchmarkRow( } } - // Agentic rows emit `offload_mode: "on" | "off"` as a string — preserve it - // as a stringified metric so the frontend can expose it in tooltips. - if (isAgentic && typeof row.offload_mode === 'string') { - (metrics as Record).offload_mode = row.offload_mode; + // Agentic rows emit `offload_mode: "on" | "off"` (or older `offloading: "none"|...`) + // — preserve as a stringified metric so the frontend can expose it in tooltips. + if (isAgentic) { + (metrics as Record).offload_mode = offloadModeRaw; } // Artifact names encode '/' as '#' to avoid path separators; restore the URI. @@ -237,10 +249,7 @@ export function mapBenchmarkRow( isl, osl, conc, - offloadMode: - typeof row.offload_mode === 'string' && row.offload_mode.length > 0 - ? row.offload_mode - : 'off', + offloadMode: offloadModeRaw, image, metrics, }; From 07ba10636dae87b5a819afa524d7c10322fae41b Mon Sep 17 00:00:00 2001 From: Cam Quilici Date: Fri, 1 May 2026 00:29:55 -0500 Subject: [PATCH 03/41] fix: render offload halo on every offload-on point, not just frontier The halo's purpose is to surface KV-offload usage; restricting it to Pareto-frontier-only points hid the indicator on most runs. Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/app/src/components/inference/ui/ScatterGraph.tsx | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/packages/app/src/components/inference/ui/ScatterGraph.tsx b/packages/app/src/components/inference/ui/ScatterGraph.tsx index 55a206ce..61ac0983 100644 --- a/packages/app/src/components/inference/ui/ScatterGraph.tsx +++ b/packages/app/src/components/inference/ui/ScatterGraph.tsx @@ -1516,10 +1516,9 @@ const ScatterGraph = React.memo( .attr('pointer-events', 'none'); }); - // Offload halo: dashed ring on frontier points that used KV offload + // Offload halo: dashed ring on every point that used KV offload (Pareto or not) zoomGroup.selectAll('.dot-group').each(function (d) { - const onFrontier = optimalPointKeys.has(`${d.hwKey}_${d.precision}-${d.x}-${d.y}`); - const showHalo = onFrontier && d.offload_mode === 'on'; + const showHalo = d.offload_mode === 'on'; d3.select(this) .selectAll('.offload-halo') .data(showHalo ? [true] : []) From 95e9dc77431adf5354ef0df36989816199624383 Mon Sep 17 00:00:00 2001 From: Cam Quilici Date: Fri, 1 May 2026 01:13:42 -0500 Subject: [PATCH 04/41] fix: strip runner-pool suffix (-p1, -p2, ...) from hw identifier b300-p1 (and similar) artifacts were skipping ingest because the runner-pool suffix wasn't in the strip list and didn't normalize to the canonical b300 GPU key. Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/db/src/etl/normalizers.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/db/src/etl/normalizers.ts b/packages/db/src/etl/normalizers.ts index ad12a454..bd497f7a 100644 --- a/packages/db/src/etl/normalizers.ts +++ b/packages/db/src/etl/normalizers.ts @@ -34,7 +34,8 @@ export function hwToGpuKey(hw: string): string | null { .replace(/-dgxc-slurm$/, '') .replace(/-dgxc$/, '') .replace(/-nb$/, '') - .replace(/-nv$/, ''); + .replace(/-nv$/, '') + .replace(/-p\d+$/, ''); // strip runner-pool suffix (e.g. b300-p1 → b300) return GPU_KEYS.has(base) ? base : null; } From 982106da5f4421983841304f0503b6467033852d Mon Sep 17 00:00:00 2001 From: Cam Quilici Date: Fri, 1 May 2026 09:25:33 -0500 Subject: [PATCH 05/41] feat: bold scatter labels with concurrency tag + collision avoidance - Label text now includes `C=` alongside the GPU/parallelism tag (default ` C=`, advanced ` C=`) - Bumped point-label font-weight to 700 so the labels read clearly against the chart fill - Greedy collision-avoidance pass on render and zoom: tries placing each label above/below the point through 4 candidate dy offsets, hiding the label only when no slot is free Co-Authored-By: Claude Opus 4.7 (1M context) --- .../components/inference/ui/ScatterGraph.tsx | 68 ++++++++++++++++++- .../src/lib/d3-chart/layers/scatter-points.ts | 1 + 2 files changed, 67 insertions(+), 2 deletions(-) diff --git a/packages/app/src/components/inference/ui/ScatterGraph.tsx b/packages/app/src/components/inference/ui/ScatterGraph.tsx index 61ac0983..3fbd8588 100644 --- a/packages/app/src/components/inference/ui/ScatterGraph.tsx +++ b/packages/app/src/components/inference/ui/ScatterGraph.tsx @@ -55,6 +55,63 @@ import { buildGradientColorMap, } from '@/components/inference/utils/paretoLabels'; +// Greedy label-collision avoidance: try positions above/below the point; +// hide labels that can't fit anywhere. Re-runs cheaply on each render/zoom. +function avoidLabelCollisions( + zoomGroup: d3.Selection, +): void { + const labels: { + el: SVGTextElement; + cx: number; + cy: number; + w: number; + h: number; + }[] = []; + zoomGroup.selectAll('.dot-group').each(function () { + const labelEl = this.querySelector('.point-label'); + if (!labelEl) return; + if ((this as SVGGElement).style.opacity === '0') return; + const transform = (this as SVGGElement).getAttribute('transform') ?? ''; + const m = transform.match(/translate\(([^,]+),([^)]+)\)/); + if (!m) return; + const cx = parseFloat(m[1]); + const cy = parseFloat(m[2]); + labelEl.setAttribute('dy', '-8'); + labelEl.style.opacity = '1'; + const bbox = labelEl.getBBox(); + labels.push({ el: labelEl, cx, cy, w: bbox.width, h: bbox.height }); + }); + labels.sort((a, b) => a.cx - b.cx); + const placed: { left: number; right: number; top: number; bottom: number }[] = []; + const pad = 1; + const candidates = [-8, 14, -22, 28]; + for (const lab of labels) { + let chosenDy: number | null = null; + let chosenBox: { left: number; right: number; top: number; bottom: number } | null = null; + for (const dy of candidates) { + const top = lab.cy + dy - lab.h - pad; + const bottom = lab.cy + dy + pad; + const left = lab.cx - lab.w / 2 - pad; + const right = lab.cx + lab.w / 2 + pad; + const collides = placed.some( + (p) => !(right < p.left || left > p.right || bottom < p.top || top > p.bottom), + ); + if (!collides) { + chosenDy = dy; + chosenBox = { left, right, top, bottom }; + break; + } + } + if (chosenDy !== null && chosenBox) { + lab.el.setAttribute('dy', String(chosenDy)); + lab.el.style.opacity = '1'; + placed.push(chosenBox); + } else { + lab.el.style.opacity = '0'; + } + } +} + // X-shape path for overlay (unofficial) data points const X_SIZE = 5; const X_HOVER_SIZE = 7; @@ -603,6 +660,7 @@ const ScatterGraph = React.memo( d3.axisLeft(newYS).ticks(10).tickFormat(logTickFormat(newYS)) as any, ); } + avoidLabelCollisions(ctx.layout.zoomGroup); }, }), [zoomResetEventName, eventPrefix, xScaleConfig._isLog, yScaleConfig.type], @@ -1251,7 +1309,8 @@ const ScatterGraph = React.memo( getOpacity: (d) => (isPointVisible(d) ? 1 : 0), getPointerEvents: (d) => (isPointVisible(d) ? 'auto' : 'none'), hideLabels: hidePointLabels || showGradientLabels, - getLabelText: (d) => (useAdvancedLabels ? getPointLabel(d) : String(d.tp)), + getLabelText: (d) => + useAdvancedLabels ? `${getPointLabel(d)} C=${d.conc}` : `${d.tp} C=${d.conc}`, foreground: 'var(--foreground)', dataAttrs: { 'hw-key': (d) => String(d.hwKey), @@ -1353,8 +1412,11 @@ const ScatterGraph = React.memo( .attr('text-anchor', 'middle') .style('fill', 'var(--foreground)') .attr('font-size', '10px') + .attr('font-weight', '700') .attr('pointer-events', 'none') - .text(useAdvancedLabels ? getPointLabel(d) : String(d.tp)); + .text( + useAdvancedLabels ? `${getPointLabel(d)} C=${d.conc}` : `${d.tp} C=${d.conc}`, + ); }); // Overlay tooltip handlers @@ -1566,6 +1628,8 @@ const ScatterGraph = React.memo( }); }); + avoidLabelCollisions(zoomGroup); + // Log tick formatting on initial render if (xScaleConfig._isLog) { const xScale = ctx.xScale as d3.ScaleLogarithmic; diff --git a/packages/app/src/lib/d3-chart/layers/scatter-points.ts b/packages/app/src/lib/d3-chart/layers/scatter-points.ts index 507654e1..9f2d2f38 100644 --- a/packages/app/src/lib/d3-chart/layers/scatter-points.ts +++ b/packages/app/src/lib/d3-chart/layers/scatter-points.ts @@ -72,6 +72,7 @@ export function renderScatterPoints Date: Fri, 1 May 2026 09:32:44 -0500 Subject: [PATCH 06/41] fix: stack multi-line point labels upward so they don't overlap the point MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tspans now ride above the text's `dy` anchor — the LAST line sits at the anchor (just above the point) and earlier lines stack above it. Previously the second tspan landed below the anchor and crashed into the marker. Also widened collision candidates by label height so the flipped-below position fully clears the point on multi-line labels. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../components/inference/ui/ScatterGraph.tsx | 28 +++++++--- .../src/lib/d3-chart/layers/scatter-points.ts | 52 +++++++++++++------ 2 files changed, 58 insertions(+), 22 deletions(-) diff --git a/packages/app/src/components/inference/ui/ScatterGraph.tsx b/packages/app/src/components/inference/ui/ScatterGraph.tsx index 3fbd8588..f8ce9b8f 100644 --- a/packages/app/src/components/inference/ui/ScatterGraph.tsx +++ b/packages/app/src/components/inference/ui/ScatterGraph.tsx @@ -84,8 +84,11 @@ function avoidLabelCollisions( labels.sort((a, b) => a.cx - b.cx); const placed: { left: number; right: number; top: number; bottom: number }[] = []; const pad = 1; - const candidates = [-8, 14, -22, 28]; for (const lab of labels) { + // Candidates scale with the label's own height so multi-line labels don't + // overlap the point shape when flipped below. + const below = lab.h + 8; + const candidates = [-8, below, -8 - below - 4, 2 * below]; let chosenDy: number | null = null; let chosenBox: { left: number; right: number; top: number; bottom: number } | null = null; for (const dy of candidates) { @@ -1310,7 +1313,7 @@ const ScatterGraph = React.memo( getPointerEvents: (d) => (isPointVisible(d) ? 'auto' : 'none'), hideLabels: hidePointLabels || showGradientLabels, getLabelText: (d) => - useAdvancedLabels ? `${getPointLabel(d)} C=${d.conc}` : `${d.tp} C=${d.conc}`, + useAdvancedLabels ? `${getPointLabel(d)}\nC=${d.conc}` : `${d.tp}\nC=${d.conc}`, foreground: 'var(--foreground)', dataAttrs: { 'hw-key': (d) => String(d.hwKey), @@ -1403,7 +1406,14 @@ const ScatterGraph = React.memo( // Labels const showLabels = !hidePointLabels && !showGradientLabels; overlayPoints.each(function (d) { - d3.select(this) + const lines = showLabels + ? (useAdvancedLabels + ? `${getPointLabel(d)}\nC=${d.conc}` + : `${d.tp}\nC=${d.conc}` + ).split('\n') + : []; + const text = d3 + .select(this) .selectAll('.overlay-label') .data(showLabels ? [true] : []) .join('text') @@ -1413,10 +1423,14 @@ const ScatterGraph = React.memo( .style('fill', 'var(--foreground)') .attr('font-size', '10px') .attr('font-weight', '700') - .attr('pointer-events', 'none') - .text( - useAdvancedLabels ? `${getPointLabel(d)} C=${d.conc}` : `${d.tp} C=${d.conc}`, - ); + .attr('pointer-events', 'none'); + text + .selectAll('tspan') + .data(lines) + .join('tspan') + .attr('x', 0) + .attr('dy', (_l, i) => (i === 0 ? `-${(lines.length - 1) * 1.1}em` : '1.1em')) + .text((l) => l); }); // Overlay tooltip handlers diff --git a/packages/app/src/lib/d3-chart/layers/scatter-points.ts b/packages/app/src/lib/d3-chart/layers/scatter-points.ts index 9f2d2f38..13c588d8 100644 --- a/packages/app/src/lib/d3-chart/layers/scatter-points.ts +++ b/packages/app/src/lib/d3-chart/layers/scatter-points.ts @@ -63,18 +63,30 @@ export function renderScatterPoints` element — the + // intra-stack offsets stay correct whether the label ends up above or below. if (!config.hideLabels && config.getLabelText && config.foreground) { - entered - .append('text') - .attr('class', 'point-label') - .attr('dy', -8) - .attr('text-anchor', 'middle') - .attr('fill', config.foreground) - .attr('font-size', '10px') - .attr('font-weight', '700') - .attr('pointer-events', 'none') - .text(config.getLabelText); + const labelGetter = config.getLabelText; + entered.each(function (d) { + const lines = labelGetter(d).split('\n'); + const text = d3 + .select(this) + .append('text') + .attr('class', 'point-label') + .attr('dy', -8) + .attr('text-anchor', 'middle') + .attr('fill', config.foreground!) + .attr('font-size', '10px') + .attr('font-weight', '700') + .attr('pointer-events', 'none'); + lines.forEach((line, i) => { + const tspanDy = i === 0 ? `-${(lines.length - 1) * 1.1}em` : '1.1em'; + text.append('tspan').attr('x', 0).attr('dy', tspanDy).text(line); + }); + }); } // Exit: remove stale points @@ -103,9 +115,12 @@ export function renderScatterPoints('.point-label') + const lines = labelGetter(d).split('\n'); + const text = d3 + .select(this) + .selectAll('.point-label') .data([true]) .join('text') .attr('class', 'point-label') @@ -113,8 +128,15 @@ export function renderScatterPoints('tspan') + .data(lines) + .join('tspan') + .attr('x', 0) + .attr('dy', (_l, i) => (i === 0 ? `-${(lines.length - 1) * 1.1}em` : '1.1em')) + .text((l) => l); }); } else { points.selectAll('.point-label').remove(); From 37eecc6e28c10751ffc52c8a0d0588177e43d4d8 Mon Sep 17 00:00:00 2001 From: Cam Quilici Date: Fri, 1 May 2026 09:38:39 -0500 Subject: [PATCH 07/41] fix: anchor multi-line labels via first tspan + tspan-aware collision pass MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a `` contains tspans, the parent's `dy` does not shift the bbox cleanly — its (unused) y=0 origin still factors in, so the rendered text ended up centered on the point. Move the absolute offset into the FIRST tspan's `dy`; later tspans cascade by 1.1em. Collision avoidance now drives the first tspan's `dy` and tries four candidate baselines (primary above, primary below, secondary above, secondary below), accounting for full label height when picking a non- overlapping slot. Labels still hidden as a last resort. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../components/inference/ui/ScatterGraph.tsx | 72 +++++++++++++------ .../src/lib/d3-chart/layers/scatter-points.ts | 25 ++++--- 2 files changed, 66 insertions(+), 31 deletions(-) diff --git a/packages/app/src/components/inference/ui/ScatterGraph.tsx b/packages/app/src/components/inference/ui/ScatterGraph.tsx index f8ce9b8f..27d3680c 100644 --- a/packages/app/src/components/inference/ui/ScatterGraph.tsx +++ b/packages/app/src/components/inference/ui/ScatterGraph.tsx @@ -55,58 +55,88 @@ import { buildGradientColorMap, } from '@/components/inference/utils/paretoLabels'; -// Greedy label-collision avoidance: try positions above/below the point; -// hide labels that can't fit anywhere. Re-runs cheaply on each render/zoom. +// Greedy label-collision avoidance. +// Each candidate is the y-position of the FIRST baseline (relative to point +// center) which we apply via the first tspan's `dy` — later tspans cascade +// down by 1.1em. We try above/below at primary and secondary offsets, and +// hide the label if all four positions collide. function avoidLabelCollisions( zoomGroup: d3.Selection, ): void { - const labels: { + interface LabelInfo { el: SVGTextElement; + firstTspan: SVGTSpanElement; cx: number; cy: number; w: number; - h: number; - }[] = []; + nLines: number; + defaultFirstY: number; + } + const labels: LabelInfo[] = []; + const ASCENT = 9; + const DESCENT = 3; + const LINE_H = 11; + zoomGroup.selectAll('.dot-group').each(function () { const labelEl = this.querySelector('.point-label'); if (!labelEl) return; if ((this as SVGGElement).style.opacity === '0') return; + const tspans = labelEl.querySelectorAll('tspan'); + if (tspans.length === 0) return; const transform = (this as SVGGElement).getAttribute('transform') ?? ''; const m = transform.match(/translate\(([^,]+),([^)]+)\)/); if (!m) return; const cx = parseFloat(m[1]); const cy = parseFloat(m[2]); - labelEl.setAttribute('dy', '-8'); + const nLines = tspans.length; + const defaultFirstY = -(8 + (nLines - 1) * LINE_H); // last baseline 8px above point + // Reset to default before measuring so prior positioning doesn't bias bbox + tspans[0].setAttribute('dy', `${defaultFirstY}px`); labelEl.style.opacity = '1'; const bbox = labelEl.getBBox(); - labels.push({ el: labelEl, cx, cy, w: bbox.width, h: bbox.height }); + labels.push({ + el: labelEl, + firstTspan: tspans[0], + cx, + cy, + w: bbox.width, + nLines, + defaultFirstY, + }); }); + labels.sort((a, b) => a.cx - b.cx); const placed: { left: number; right: number; top: number; bottom: number }[] = []; - const pad = 1; + const pad = 2; + for (const lab of labels) { - // Candidates scale with the label's own height so multi-line labels don't - // overlap the point shape when flipped below. - const below = lab.h + 8; - const candidates = [-8, below, -8 - below - 4, 2 * below]; - let chosenDy: number | null = null; + const blockH = (lab.nLines - 1) * LINE_H + ASCENT + DESCENT; + const aboveFirstY = lab.defaultFirstY; + const belowFirstY = 14; // first baseline 14px below point center + const candidates = [ + aboveFirstY, + belowFirstY, + aboveFirstY - blockH - 2, + belowFirstY + blockH + 2, + ]; + let chosenY: number | null = null; let chosenBox: { left: number; right: number; top: number; bottom: number } | null = null; - for (const dy of candidates) { - const top = lab.cy + dy - lab.h - pad; - const bottom = lab.cy + dy + pad; + for (const firstY of candidates) { + const top = lab.cy + firstY - ASCENT - pad; + const bottom = lab.cy + firstY + (lab.nLines - 1) * LINE_H + DESCENT + pad; const left = lab.cx - lab.w / 2 - pad; const right = lab.cx + lab.w / 2 + pad; const collides = placed.some( (p) => !(right < p.left || left > p.right || bottom < p.top || top > p.bottom), ); if (!collides) { - chosenDy = dy; + chosenY = firstY; chosenBox = { left, right, top, bottom }; break; } } - if (chosenDy !== null && chosenBox) { - lab.el.setAttribute('dy', String(chosenDy)); + if (chosenY !== null && chosenBox) { + lab.firstTspan.setAttribute('dy', `${chosenY}px`); lab.el.style.opacity = '1'; placed.push(chosenBox); } else { @@ -1418,18 +1448,18 @@ const ScatterGraph = React.memo( .data(showLabels ? [true] : []) .join('text') .attr('class', 'overlay-label') - .attr('dy', -10) .attr('text-anchor', 'middle') .style('fill', 'var(--foreground)') .attr('font-size', '10px') .attr('font-weight', '700') .attr('pointer-events', 'none'); + const firstDy = -(1 + (lines.length - 1) * 1.1); text .selectAll('tspan') .data(lines) .join('tspan') .attr('x', 0) - .attr('dy', (_l, i) => (i === 0 ? `-${(lines.length - 1) * 1.1}em` : '1.1em')) + .attr('dy', (_l, i) => (i === 0 ? `${firstDy}em` : '1.1em')) .text((l) => l); }); diff --git a/packages/app/src/lib/d3-chart/layers/scatter-points.ts b/packages/app/src/lib/d3-chart/layers/scatter-points.ts index 13c588d8..71d1f050 100644 --- a/packages/app/src/lib/d3-chart/layers/scatter-points.ts +++ b/packages/app/src/lib/d3-chart/layers/scatter-points.ts @@ -64,10 +64,10 @@ export function renderScatterPoints` element — the - // intra-stack offsets stay correct whether the label ends up above or below. + // we anchor the entire stack via the FIRST tspan's `dy` so getBBox() doesn't + // pick up the text element's own (unused) y=0 origin. The first tspan is + // raised so the LAST line baseline lands ~8px above the point; subsequent + // tspans cascade down by 1.1em. if (!config.hideLabels && config.getLabelText && config.foreground) { const labelGetter = config.getLabelText; entered.each(function (d) { @@ -76,15 +76,18 @@ export function renderScatterPoints { - const tspanDy = i === 0 ? `-${(lines.length - 1) * 1.1}em` : '1.1em'; - text.append('tspan').attr('x', 0).attr('dy', tspanDy).text(line); + text + .append('tspan') + .attr('x', 0) + .attr('dy', i === 0 ? `${firstDy}em` : '1.1em') + .text(line); }); }); } @@ -113,7 +116,9 @@ export function renderScatterPoints('tspan') .data(lines) .join('tspan') .attr('x', 0) - .attr('dy', (_l, i) => (i === 0 ? `-${(lines.length - 1) * 1.1}em` : '1.1em')) + .attr('dy', (_l, i) => (i === 0 ? `${firstDy}em` : '1.1em')) .text((l) => l); }); } else { From f317377dfaea35f9cb5dc435ea177966aa17fbf8 Mon Sep 17 00:00:00 2001 From: Cam Quilici Date: Fri, 1 May 2026 10:21:00 -0500 Subject: [PATCH 08/41] fix: dedupe artifacts by logical name + skip 0-successful agg rows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two complementary fixes for runs whose `results_bmk` aggregated artifact ends up containing both a successful row and a failed-attempt row for the same (config, conc, offload) — the failed row's null metrics were overwriting the good row via ON CONFLICT DO UPDATE. 1. Artifact-level: strip the trailing `__` suffix from each artifact name and group by the logical name, keeping only the most recent per group. 2. Row-level: skip rows with `num_requests_successful === 0` AND `num_requests_total > 0`. The aggregated artifact merges rows from all runners — including failed ones — so artifact-level dedup alone can't reach inside it. Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/db/src/etl/benchmark-mapper.ts | 14 +++++++++++ packages/db/src/etl/skip-tracker.ts | 10 +++++++- packages/db/src/ingest-ci-run.ts | 33 ++++++++++++++++++++----- packages/db/src/ingest-gcs-backup.ts | 1 + 4 files changed, 51 insertions(+), 7 deletions(-) diff --git a/packages/db/src/etl/benchmark-mapper.ts b/packages/db/src/etl/benchmark-mapper.ts index d842276e..1aff5ea9 100644 --- a/packages/db/src/etl/benchmark-mapper.ts +++ b/packages/db/src/etl/benchmark-mapper.ts @@ -145,6 +145,20 @@ export function mapBenchmarkRow( return null; } + // Failed-run guard: aggregated artifacts (`results_bmk`) merge rows from + // every runner, including ones with 0 successful requests and null metrics. + // Without this skip, the empty row's nulls overwrite a good row via + // ON CONFLICT DO UPDATE when both share the same (config, conc, offload). + if ( + typeof row.num_requests_successful === 'number' && + row.num_requests_successful === 0 && + typeof row.num_requests_total === 'number' && + row.num_requests_total > 0 + ) { + tracker.skips.failedRun++; + return null; + } + // Agentic offload signal: prefer `offload_mode` ('on'|'off'), fall back to `offloading` // ('none' → 'off'; any other non-empty value → 'on'). const offloadModeRaw = diff --git a/packages/db/src/etl/skip-tracker.ts b/packages/db/src/etl/skip-tracker.ts index 6166ea44..588718dd 100644 --- a/packages/db/src/etl/skip-tracker.ts +++ b/packages/db/src/etl/skip-tracker.ts @@ -8,6 +8,7 @@ export interface Skips { unmappedModel: number; unmappedHw: number; noIslOsl: number; + failedRun: number; dbError: number; } @@ -66,7 +67,14 @@ const MAX_DB_ERRORS = 10; * @returns A `SkipTracker` with zeroed counters and empty unmapped-name sets. */ export function createSkipTracker(): SkipTracker { - const skips: Skips = { badZip: 0, unmappedModel: 0, unmappedHw: 0, noIslOsl: 0, dbError: 0 }; + const skips: Skips = { + badZip: 0, + unmappedModel: 0, + unmappedHw: 0, + noIslOsl: 0, + failedRun: 0, + dbError: 0, + }; const unmappedModels = new Set(); const unmappedHws = new Set(); const unmappedPrecisions = new Set(); diff --git a/packages/db/src/ingest-ci-run.ts b/packages/db/src/ingest-ci-run.ts index 8cce43ca..fb1fbbbc 100644 --- a/packages/db/src/ingest-ci-run.ts +++ b/packages/db/src/ingest-ci-run.ts @@ -101,15 +101,30 @@ if (isDownloadMode) { } catch {} } - const byName = new Map(); + // Strip the trailing `__` token from each + // artifact name, then group by the resulting logical name and keep only + // the most recent per group. Without this, two artifacts produced on + // different runners for the same logical config (e.g. `…_h200-cw_00` and + // `…_h200-dgxc-slurm_1`) both land in the DB and the failed one's empty + // metrics can overwrite the good one via ON CONFLICT DO UPDATE. + // + // The runner pool name itself has no underscores (`h200-cw`, + // `h200-dgxc-slurm`, `b200-nb`), so `[a-zA-Z0-9.-]*` keeps the strip + // bounded — using `\w` here would over-match across earlier `_` + // separators and collapse different (conc, offload) variants into the + // same logical name. + const RUNNER_SUFFIX_RE = /_[a-zA-Z][a-zA-Z0-9.-]*_\d+$/; + const byLogical = new Map(); for (const a of allArtifacts) { - const existing = byName.get(a.name); + const key = a.name.replace(RUNNER_SUFFIX_RE, ''); + const existing = byLogical.get(key); if (!existing || a.created_at > existing.created_at) { - byName.set(a.name, a); + byLogical.set(key, a); } } - for (const [name, artifact] of byName) { + for (const [, artifact] of byLogical) { + const name = artifact.name; console.log(` ${name}`); const zipPath = path.join(artifactsDir, 'artifact.zip'); execSync(`gh api "${artifact.archive_download_url}" > "${zipPath}"`, { @@ -121,7 +136,7 @@ if (isDownloadMode) { fs.unlinkSync(zipPath); } - console.log(`\n Downloaded ${byName.size} artifact(s)`); + console.log(`\n Downloaded ${byLogical.size} artifact(s)`); // Fetch run attempt from API const attemptStr = execSync( @@ -510,11 +525,17 @@ async function main(): Promise { const { skips, unmappedModels, unmappedHws, unmappedPrecisions } = tracker; const totalSkips = - skips.badZip + skips.unmappedModel + skips.unmappedHw + skips.noIslOsl + skips.dbError; + skips.badZip + + skips.unmappedModel + + skips.unmappedHw + + skips.noIslOsl + + skips.failedRun + + skips.dbError; if (totalSkips > 0) { console.log(`\n Skipped: ${totalSkips} rows`); const skipLines: [string, number][] = [ ['no isl/osl (old format)', skips.noIslOsl], + ['failed run (0 successful)', skips.failedRun], ['unmapped model', skips.unmappedModel], ['unmapped hw', skips.unmappedHw], ['bad/empty zip', skips.badZip], diff --git a/packages/db/src/ingest-gcs-backup.ts b/packages/db/src/ingest-gcs-backup.ts index 6dc604e9..d67f5164 100644 --- a/packages/db/src/ingest-gcs-backup.ts +++ b/packages/db/src/ingest-gcs-backup.ts @@ -434,6 +434,7 @@ async function mapWorkflowDir( unmappedModel: local.skips.unmappedModel, unmappedHw: local.skips.unmappedHw, noIslOsl: local.skips.noIslOsl, + failedRun: local.skips.failedRun, }, localUnmappedModels: new Set(local.unmappedModels), localUnmappedHws: new Set(local.unmappedHws), From c2f66f62f5a1dedb6a87c7c5e58ca990b3cb0956 Mon Sep 17 00:00:00 2001 From: Cam Quilici Date: Thu, 7 May 2026 08:41:26 -0500 Subject: [PATCH 09/41] feat: add AIPerf to FRAMEWORK_LABELS Tag display name for the `aiperf` spec_method suffix used by the alternate-harness runs ingested for the agentic minimax sweep. Without this entry the legend shows 'AIPERF' from the default toUpperCase fallback. Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/constants/src/framework-aliases.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/constants/src/framework-aliases.ts b/packages/constants/src/framework-aliases.ts index cc5eb6b4..e23a93bc 100644 --- a/packages/constants/src/framework-aliases.ts +++ b/packages/constants/src/framework-aliases.ts @@ -44,6 +44,7 @@ export const FRAMEWORK_LABELS: Record = { ]), ), mtp: 'MTP', + aiperf: 'AIPerf', }; /** From 024797a978a2a6e2954f66a963de3205b62a149e Mon Sep 17 00:00:00 2001 From: Cam Quilici Date: Tue, 12 May 2026 15:02:07 -0500 Subject: [PATCH 10/41] fix(changelog): coerce ids to string when filtering changelog by run bigint workflow_run_id sometimes deserializes as a number on the frontend depending on the postgres adapter's behavior; strict === between a number and a string silently dropped every match, so the changelog popover always reported "no changelog data available." Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/app/src/components/GlobalFilterContext.tsx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/app/src/components/GlobalFilterContext.tsx b/packages/app/src/components/GlobalFilterContext.tsx index 08fc7094..11e56de7 100644 --- a/packages/app/src/components/GlobalFilterContext.tsx +++ b/packages/app/src/components/GlobalFilterContext.tsx @@ -87,7 +87,9 @@ function buildRunInfo(data: WorkflowInfoResponse): Record { const runs: Record = {}; for (const run of data.runs) { const runId = String(run.github_run_id); - const runChangelogs = data.changelogs.filter((c) => c.workflow_run_id === run.github_run_id); + const runChangelogs = data.changelogs.filter( + (c) => String(c.workflow_run_id) === String(run.github_run_id), + ); runs[runId] = { runId, runDate: run.created_at, From aa154193dfbc12535f25444cdf6fccc16a3e1382 Mon Sep 17 00:00:00 2001 From: Cam Quilici Date: Tue, 12 May 2026 15:36:57 -0500 Subject: [PATCH 11/41] feat: default sequence to Agentic Traces when available If the selected model has agentic_traces data, prefer that over the default 8K/1K fixed-seq when the user hasn't explicitly chosen via URL. effectiveSequence already falls back to availableSequences[0] for models without agentic, so models with only fixed-seq data still render correctly. Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/app/src/components/GlobalFilterContext.tsx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/app/src/components/GlobalFilterContext.tsx b/packages/app/src/components/GlobalFilterContext.tsx index 11e56de7..7813d079 100644 --- a/packages/app/src/components/GlobalFilterContext.tsx +++ b/packages/app/src/components/GlobalFilterContext.tsx @@ -125,7 +125,9 @@ export function GlobalFilterProvider({ children }: { children: ReactNode }) { const [selectedSequence, setSelectedSequence] = useState(() => { const urlSeq = getUrlParam('i_seq'); if (urlSeq && Object.values(Sequence).includes(urlSeq as Sequence)) return urlSeq as Sequence; - return Sequence.EightK_OneK; + // Prefer Agentic Traces by default when the selected model has it; the + // effectiveSequence fallback below handles models without agentic data. + return Sequence.AgenticTraces; }); const [selectedPrecisions, setSelectedPrecisionsRaw] = useState(() => { From 099a33efcb53f5130dc40d715a0f4b86d6136a93 Mon Sep 17 00:00:00 2001 From: Cam Quilici Date: Fri, 15 May 2026 12:25:25 -0500 Subject: [PATCH 12/41] fix(agentic): respect percentile selector for input-throughput x axis MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit rowToAggDataEntry was only copying median/p99 metric variants — picking p90/p99.9 in the percentile selector silently fell back to 0 and collapsed every point into a vertical line at x=0. Copy the full median/p90/p99/p99.9 set into AggDataEntry. Hide the X-Axis Metric dropdown for agentic mode (it doubled up with the percentile selector) and route the input-metric chart through withPercentile so picking p99 actually plots p99_ttft instead of the hard-coded p99_ttft config default. Percentile options pared back to median + p99. --- .../inference/hooks/useChartData.ts | 46 +++++++++++++++++-- .../app/src/components/inference/types.ts | 10 ++++ .../components/inference/ui/ChartControls.tsx | 3 +- packages/app/src/lib/benchmark-transform.ts | 12 ++++- packages/app/src/lib/data-mappings.ts | 8 +--- packages/app/src/lib/energy-metrics.test.ts | 10 ++++ 6 files changed, 77 insertions(+), 12 deletions(-) diff --git a/packages/app/src/components/inference/hooks/useChartData.ts b/packages/app/src/components/inference/hooks/useChartData.ts index 81ab0780..57e9a1c2 100644 --- a/packages/app/src/components/inference/hooks/useChartData.ts +++ b/packages/app/src/components/inference/hooks/useChartData.ts @@ -16,7 +16,7 @@ import { filterDataByCostLimit } from '@/components/inference/utils'; import { useBenchmarks, benchmarkQueryOptions } from '@/hooks/api/use-benchmarks'; import { GPU_ALIAS_TO_CANONICAL, getModelSortIndex } from '@/lib/constants'; import { transformBenchmarkRows, withPercentile } from '@/lib/benchmark-transform'; -import type { Model, Sequence } from '@/lib/data-mappings'; +import { Sequence, type Model } from '@/lib/data-mappings'; import { calculateCostsForGpus, calculatePowerForGpus } from '@/lib/utils'; /** Build deduplicated comparison dates, excluding the main run date. */ @@ -216,7 +216,14 @@ export function useChartData( ? 'P99 Time To First Token (s)' : 'Median Time To First Token (s)'; - if (effectiveXMetric && chartDef.chartType === 'interactivity' && isInputMetric) { + const isAgentic = selectedSequence === Sequence.AgenticTraces; + + if ( + effectiveXMetric && + chartDef.chartType === 'interactivity' && + isInputMetric && + !isAgentic + ) { xAxisField = effectiveXMetric as keyof AggDataEntry; const labelKey = `${selectedYAxisMetric}_x_label` as keyof ChartDefinition; if (effectiveXMetric === chartDef[`${selectedYAxisMetric}_x` as keyof ChartDefinition]) { @@ -225,15 +232,40 @@ export function useChartData( xAxisLabel = isTtftOverride ? ttftLabel : chartDef.x_label; } } else if (chartDef.chartType === 'interactivity' && isInputMetric) { + // Agentic falls through here too — the manual X-axis dropdown is + // hidden in agentic mode (would double up with the percentile + // selector), so the config default + percentile post-processing + // below drives the x axis. const xOverrideKey = `${selectedYAxisMetric}_x` as keyof ChartDefinition; const xLabelOverrideKey = `${selectedYAxisMetric}_x_label` as keyof ChartDefinition; xAxisField = (chartDef[xOverrideKey] as keyof AggDataEntry) || chartDef.x; xAxisLabel = (chartDef[xLabelOverrideKey] as string) || chartDef.x_label; - } else if (chartDef.chartType === 'e2e' && isTtftOverride) { + } else if (chartDef.chartType === 'e2e' && isTtftOverride && !isAgentic) { xAxisField = effectiveXMetric as keyof AggDataEntry; xAxisLabel = ttftLabel; } + // Agentic: rewrite the resolved x metric to the chosen percentile, + // and relabel accordingly. naturalX is already percentile-adjusted, + // so the per-metric override path is the only one that actually + // changes here. + if (isAgentic) { + const adjusted = withPercentile( + xAxisField as string, + selectedPercentile, + ) as keyof AggDataEntry; + if (adjusted !== xAxisField) { + const pctlWord = + selectedPercentile === 'median' + ? 'Median' + : selectedPercentile === 'p99.9' + ? 'P99.9' + : selectedPercentile.toUpperCase(); + xAxisLabel = xAxisLabel.replace(/^(Median|Mean|P90|P99(?:\.9)?)\b/iu, pctlWord); + xAxisField = adjusted; + } + } + // The x-axis is "flipped" only when the good-direction reverses // (e.g. interactivity → TTFT: "higher is better" → "lower is better"). // E2EL → TTFT keeps the same direction ("lower is better" for both), @@ -269,7 +301,13 @@ export function useChartData( xAxisField, }; }), - [selectedYAxisMetric, selectedXAxisMetric, selectedE2eXAxisMetric, selectedPercentile], + [ + selectedYAxisMetric, + selectedXAxisMetric, + selectedE2eXAxisMetric, + selectedPercentile, + selectedSequence, + ], ); // Build renderable graphs (data processing + stable chart definitions) diff --git a/packages/app/src/components/inference/types.ts b/packages/app/src/components/inference/types.ts index a2d9ef2e..cddeba54 100644 --- a/packages/app/src/components/inference/types.ts +++ b/packages/app/src/components/inference/types.ts @@ -50,23 +50,33 @@ export interface AggDataEntry { mean_ttft: number; median_ttft: number; std_ttft: number; + p90_ttft: number; p99_ttft: number; + 'p99.9_ttft': number; mean_tpot: number; mean_intvty: number; median_tpot: number; median_intvty: number; std_tpot: number; std_intvty: number; + p90_tpot: number; + p90_intvty: number; p99_tpot: number; p99_intvty: number; + 'p99.9_tpot': number; + 'p99.9_intvty': number; mean_itl: number; median_itl: number; std_itl: number; + p90_itl: number; p99_itl: number; + 'p99.9_itl': number; mean_e2el: number; median_e2el: number; std_e2el: number; + p90_e2el: number; p99_e2el: number; + 'p99.9_e2el': number; disagg: boolean; num_prefill_gpu: number; num_decode_gpu: number; diff --git a/packages/app/src/components/inference/ui/ChartControls.tsx b/packages/app/src/components/inference/ui/ChartControls.tsx index 6707bd9e..7b4fa08f 100644 --- a/packages/app/src/components/inference/ui/ChartControls.tsx +++ b/packages/app/src/components/inference/ui/ChartControls.tsx @@ -269,7 +269,8 @@ export default function ChartControls({ hideGpuComparison = false }: ChartContro
{graphs.some((g) => g.chartDefinition?.chartType === 'interactivity') && - isInputMetric && ( + isInputMetric && + selectedSequence !== Sequence.AgenticTraces && (
): Record { const out: Record = {}; - for (const suffix of ['mean', 'median', 'p90', 'p99']) { + for (const suffix of ['mean', 'median', 'p90', 'p99', 'p99.9']) { const itl = m[`${suffix}_itl`]; const ttlt = m[`${suffix}_ttlt`]; if (m[`${suffix}_e2el`] === undefined && ttlt !== undefined) out[`${suffix}_e2el`] = ttlt; @@ -62,23 +62,33 @@ export function rowToAggDataEntry(row: BenchmarkRow): AggDataEntry { mean_ttft: m.mean_ttft ?? 0, median_ttft: m.median_ttft ?? 0, std_ttft: m.std_ttft ?? 0, + p90_ttft: m.p90_ttft ?? 0, p99_ttft: m.p99_ttft ?? 0, + 'p99.9_ttft': m['p99.9_ttft'] ?? 0, mean_tpot: m.mean_tpot ?? 0, median_tpot: m.median_tpot ?? 0, std_tpot: m.std_tpot ?? 0, + p90_tpot: m.p90_tpot ?? 0, p99_tpot: m.p99_tpot ?? 0, + 'p99.9_tpot': m['p99.9_tpot'] ?? 0, mean_intvty: m.mean_intvty ?? 0, median_intvty: m.median_intvty ?? 0, std_intvty: m.std_intvty ?? 0, + p90_intvty: m.p90_intvty ?? 0, p99_intvty: m.p99_intvty ?? 0, + 'p99.9_intvty': m['p99.9_intvty'] ?? 0, mean_itl: m.mean_itl ?? 0, median_itl: m.median_itl ?? 0, std_itl: m.std_itl ?? 0, + p90_itl: m.p90_itl ?? 0, p99_itl: m.p99_itl ?? 0, + 'p99.9_itl': m['p99.9_itl'] ?? 0, mean_e2el: m.mean_e2el ?? 0, median_e2el: m.median_e2el ?? 0, std_e2el: m.std_e2el ?? 0, + p90_e2el: m.p90_e2el ?? 0, p99_e2el: m.p99_e2el ?? 0, + 'p99.9_e2el': m['p99.9_e2el'] ?? 0, disagg: row.disagg, num_prefill_gpu: row.num_prefill_gpu, num_decode_gpu: row.num_decode_gpu, diff --git a/packages/app/src/lib/data-mappings.ts b/packages/app/src/lib/data-mappings.ts index f137875c..bf48c864 100644 --- a/packages/app/src/lib/data-mappings.ts +++ b/packages/app/src/lib/data-mappings.ts @@ -186,21 +186,17 @@ export const SEQUENCE_OPTIONS = Object.keys(SEQUENCE_CONFIG) as Sequence[]; /** * Percentile of the latency distribution used for the chart x-axis when * viewing agentic traces. Agentic rows carry median/p90/p99/p99.9 variants - * for ttft, ttlt (=e2el), and itl (and intvty derived from itl) — pick which - * slice to plot. + * for ttft, ttlt (=e2el), and itl (and intvty derived from itl); only the + * two most commonly read slices (p50, p99) are surfaced in the UI. */ export enum Percentile { Median = 'median', - P90 = 'p90', P99 = 'p99', - P99_9 = 'p99.9', } const PERCENTILE_CONFIG: Record = { [Percentile.Median]: { label: 'p50 (median)' }, - [Percentile.P90]: { label: 'p90' }, [Percentile.P99]: { label: 'p99' }, - [Percentile.P99_9]: { label: 'p99.9' }, }; export const PERCENTILE_OPTIONS = Object.keys(PERCENTILE_CONFIG) as Percentile[]; diff --git a/packages/app/src/lib/energy-metrics.test.ts b/packages/app/src/lib/energy-metrics.test.ts index 28cc1e36..54788585 100644 --- a/packages/app/src/lib/energy-metrics.test.ts +++ b/packages/app/src/lib/energy-metrics.test.ts @@ -57,23 +57,33 @@ function makeEntry(overrides: Partial = {}): AggDataEntry { mean_ttft: 0.5, median_ttft: 0.4, std_ttft: 0.1, + p90_ttft: 0.7, p99_ttft: 0.8, + 'p99.9_ttft': 0.9, mean_tpot: 0.02, mean_intvty: 45, median_tpot: 0.02, median_intvty: 44, std_tpot: 0.005, std_intvty: 5, + p90_tpot: 0.025, + p90_intvty: 55, p99_tpot: 0.03, p99_intvty: 60, + 'p99.9_tpot': 0.035, + 'p99.9_intvty': 65, mean_itl: 0.01, median_itl: 0.01, std_itl: 0.002, + p90_itl: 0.013, p99_itl: 0.015, + 'p99.9_itl': 0.018, mean_e2el: 5, median_e2el: 4.8, std_e2el: 0.5, + p90_e2el: 5.5, p99_e2el: 6, + 'p99.9_e2el': 6.5, disagg: false, num_prefill_gpu: 0, num_decode_gpu: 0, From 50a06d1419c70ddd8d24b2c6545da44fe6be3a4d Mon Sep 17 00:00:00 2001 From: Cam Quilici Date: Fri, 15 May 2026 12:27:19 -0500 Subject: [PATCH 13/41] fix(agentic): default percentile to p99 and drop median option --- packages/app/src/components/inference/InferenceContext.tsx | 2 +- packages/app/src/lib/data-mappings.ts | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/packages/app/src/components/inference/InferenceContext.tsx b/packages/app/src/components/inference/InferenceContext.tsx index b4ccb9ef..af2d364e 100644 --- a/packages/app/src/components/inference/InferenceContext.tsx +++ b/packages/app/src/components/inference/InferenceContext.tsx @@ -122,7 +122,7 @@ export function InferenceProvider({ // Latency percentile applied to the chart x-axis for agentic scenarios. // Values: 'median' | 'p90' | 'p99' | 'p99.9'. Non-agentic charts ignore. const [selectedPercentile, setSelectedPercentile] = useState( - () => getUrlParam('i_pctl') || 'median', + () => getUrlParam('i_pctl') || 'p99', ); const [scaleType, setScaleType] = useState<'auto' | 'linear' | 'log'>( () => (getUrlParam('i_scale') as 'auto' | 'linear' | 'log') || 'auto', diff --git a/packages/app/src/lib/data-mappings.ts b/packages/app/src/lib/data-mappings.ts index bf48c864..1b4f47c3 100644 --- a/packages/app/src/lib/data-mappings.ts +++ b/packages/app/src/lib/data-mappings.ts @@ -186,16 +186,14 @@ export const SEQUENCE_OPTIONS = Object.keys(SEQUENCE_CONFIG) as Sequence[]; /** * Percentile of the latency distribution used for the chart x-axis when * viewing agentic traces. Agentic rows carry median/p90/p99/p99.9 variants - * for ttft, ttlt (=e2el), and itl (and intvty derived from itl); only the - * two most commonly read slices (p50, p99) are surfaced in the UI. + * for ttft, ttlt (=e2el), and itl (and intvty derived from itl); only p99 + * is surfaced in the UI. */ export enum Percentile { - Median = 'median', P99 = 'p99', } const PERCENTILE_CONFIG: Record = { - [Percentile.Median]: { label: 'p50 (median)' }, [Percentile.P99]: { label: 'p99' }, }; From 3c96e9137776d1c368a0acdfeee6e769d5733464 Mon Sep 17 00:00:00 2001 From: Cam Quilici Date: Fri, 15 May 2026 12:31:27 -0500 Subject: [PATCH 14/41] fix(agentic): keep only p90 as the percentile option --- packages/app/src/components/inference/InferenceContext.tsx | 2 +- packages/app/src/lib/data-mappings.ts | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/packages/app/src/components/inference/InferenceContext.tsx b/packages/app/src/components/inference/InferenceContext.tsx index 0ba14a21..accfdf9e 100644 --- a/packages/app/src/components/inference/InferenceContext.tsx +++ b/packages/app/src/components/inference/InferenceContext.tsx @@ -136,7 +136,7 @@ export function InferenceProvider({ // Latency percentile applied to the chart x-axis for agentic scenarios. // Values: 'median' | 'p90' | 'p99' | 'p99.9'. Non-agentic charts ignore. const [selectedPercentile, setSelectedPercentile] = useState( - () => getUrlParam('i_pctl') || 'p99', + () => getUrlParam('i_pctl') || 'p90', ); const [scaleType, setScaleType] = useState<'auto' | 'linear' | 'log'>( () => (getUrlParam('i_scale') as 'auto' | 'linear' | 'log') || 'auto', diff --git a/packages/app/src/lib/data-mappings.ts b/packages/app/src/lib/data-mappings.ts index 0afb304a..83e6648a 100644 --- a/packages/app/src/lib/data-mappings.ts +++ b/packages/app/src/lib/data-mappings.ts @@ -191,12 +191,10 @@ export const SEQUENCE_OPTIONS = Object.keys(SEQUENCE_CONFIG) as Sequence[]; */ export enum Percentile { P90 = 'p90', - P99 = 'p99', } const PERCENTILE_CONFIG: Record = { [Percentile.P90]: { label: 'p90' }, - [Percentile.P99]: { label: 'p99' }, }; export const PERCENTILE_OPTIONS = Object.keys(PERCENTILE_CONFIG) as Percentile[]; From 642081af77c8165ac89a5177abbd6c0244dfb9c0 Mon Sep 17 00:00:00 2001 From: functionstackx <47992694+functionstackx@users.noreply.github.com> Date: Fri, 15 May 2026 13:31:30 -0400 Subject: [PATCH 15/41] fix(agentic): default percentile to p90, surface only p90/p99 Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/app/cypress/support/mock-data.ts | 2 +- .../app/src/components/inference/InferenceContext.tsx | 2 +- .../app/src/components/inference/hooks/useChartData.ts | 9 ++------- packages/app/src/components/ui/chart-selectors.tsx | 2 +- packages/app/src/lib/data-mappings.ts | 6 ++++-- packages/app/src/lib/url-state.ts | 2 +- 6 files changed, 10 insertions(+), 13 deletions(-) diff --git a/packages/app/cypress/support/mock-data.ts b/packages/app/cypress/support/mock-data.ts index f267dcc9..34b89aba 100644 --- a/packages/app/cypress/support/mock-data.ts +++ b/packages/app/cypress/support/mock-data.ts @@ -189,7 +189,7 @@ export function createMockInferenceContext( workflowInfo: null, selectedYAxisMetric: 'y_tpPerGpu', setSelectedYAxisMetric: namedStub('setSelectedYAxisMetric'), - selectedPercentile: 'median', + selectedPercentile: 'p90', setSelectedPercentile: namedStub('setSelectedPercentile'), selectedXAxisMetric: null, setSelectedXAxisMetric: namedStub('setSelectedXAxisMetric'), diff --git a/packages/app/src/components/inference/InferenceContext.tsx b/packages/app/src/components/inference/InferenceContext.tsx index accfdf9e..36dc672d 100644 --- a/packages/app/src/components/inference/InferenceContext.tsx +++ b/packages/app/src/components/inference/InferenceContext.tsx @@ -134,7 +134,7 @@ export function InferenceProvider({ () => getUrlParam('i_e2e_xmetric') || null, ); // Latency percentile applied to the chart x-axis for agentic scenarios. - // Values: 'median' | 'p90' | 'p99' | 'p99.9'. Non-agentic charts ignore. + // Values: 'p90' | 'p99'. Non-agentic charts ignore. const [selectedPercentile, setSelectedPercentile] = useState( () => getUrlParam('i_pctl') || 'p90', ); diff --git a/packages/app/src/components/inference/hooks/useChartData.ts b/packages/app/src/components/inference/hooks/useChartData.ts index f2ef85ec..436fd662 100644 --- a/packages/app/src/components/inference/hooks/useChartData.ts +++ b/packages/app/src/components/inference/hooks/useChartData.ts @@ -83,7 +83,7 @@ export function useChartData( selectedRunDate?: string, enabled = true, latestAvailableDate?: string, - selectedPercentile = 'median', + selectedPercentile = 'p90', /** When set, only series for these two registry GPU keys are shown (compare pages). */ compareGpuPair?: readonly [string, string] | null, ) { @@ -261,12 +261,7 @@ export function useChartData( selectedPercentile, ) as keyof AggDataEntry; if (adjusted !== xAxisField) { - const pctlWord = - selectedPercentile === 'median' - ? 'Median' - : selectedPercentile === 'p99.9' - ? 'P99.9' - : selectedPercentile.toUpperCase(); + const pctlWord = selectedPercentile.toUpperCase(); xAxisLabel = xAxisLabel.replace(/^(Median|Mean|P90|P99(?:\.9)?)\b/iu, pctlWord); xAxisField = adjusted; } diff --git a/packages/app/src/components/ui/chart-selectors.tsx b/packages/app/src/components/ui/chart-selectors.tsx index d2940de4..e30816fa 100644 --- a/packages/app/src/components/ui/chart-selectors.tsx +++ b/packages/app/src/components/ui/chart-selectors.tsx @@ -315,7 +315,7 @@ export function PercentileSelector({ - P99 TTFT - Median TTFT + P90 TTFT
diff --git a/packages/app/src/components/inference/ui/ChartDisplay.tsx b/packages/app/src/components/inference/ui/ChartDisplay.tsx index f0e1692a..78df2c37 100644 --- a/packages/app/src/components/inference/ui/ChartDisplay.tsx +++ b/packages/app/src/components/inference/ui/ChartDisplay.tsx @@ -408,27 +408,20 @@ export default function ChartDisplay() { if ( graph.chartDefinition.chartType === 'interactivity' && isInputMetric && - selectedXAxisMetric + selectedXAxisMetric === 'p90_ttft' ) { - if (selectedXAxisMetric === 'p99_ttft') { - return 'vs. P99 Time To First Token'; - } else if (selectedXAxisMetric === 'median_ttft') { - return 'vs. Median Time To First Token'; - } + return 'vs. P90 Time To First Token'; } // For e2e chart: render clickable inline dropdown for x-axis if (graph.chartDefinition.chartType === 'e2e') { const xAxisLabel = - selectedE2eXAxisMetric === 'p99_ttft' - ? 'P99 TTFT' - : selectedE2eXAxisMetric === 'median_ttft' - ? 'Median TTFT' - : 'End-to-end Latency'; + selectedE2eXAxisMetric === 'p90_ttft' + ? 'P90 TTFT' + : 'End-to-end Latency'; const xAxisOptions = [ { value: null, label: 'End-to-end Latency' }, - { value: 'p99_ttft', label: 'P99 TTFT' }, - { value: 'median_ttft', label: 'Median TTFT' }, + { value: 'p90_ttft', label: 'P90 TTFT' }, ]; const zoomPrefix = selectedDateRange.startDate && diff --git a/packages/app/src/components/inference/utils.test.ts b/packages/app/src/components/inference/utils.test.ts index 8f8705e1..589ba580 100644 --- a/packages/app/src/components/inference/utils.test.ts +++ b/packages/app/src/components/inference/utils.test.ts @@ -157,12 +157,12 @@ describe('processOverlayChartData', () => { }); it('remaps x to config override for input metrics on interactivity chart', () => { - // inputTputPerGpu has x override to p99_ttft on interactivity chart + // inputTputPerGpu has x override to p90_ttft on interactivity chart const data = [ pt({ x: 100, inputTputPerGpu: { y: 5, roof: false }, - p99_ttft: 0.25, + p90_ttft: 0.25, median_intvty: 50, } as any), ]; @@ -176,16 +176,11 @@ describe('processOverlayChartData', () => { pt({ x: 100, inputTputPerGpu: { y: 5, roof: false }, - median_ttft: 0.1, + p90_ttft: 0.1, median_intvty: 50, } as any), ]; - const result = processOverlayChartData( - data, - 'interactivity', - 'y_inputTputPerGpu', - 'median_ttft', - ); + const result = processOverlayChartData(data, 'interactivity', 'y_inputTputPerGpu', 'p90_ttft'); expect(result).toHaveLength(1); expect(result[0].x).toBe(0.1); }); @@ -195,76 +190,62 @@ describe('processOverlayChartData', () => { pt({ x: 100, inputTputPerGpu: { y: 5, roof: false }, - p99_ttft: 0.25, + p90_ttft: 0.25, median_e2el: 2.5, } as any), ]; const result = processOverlayChartData(data, 'e2e', 'y_inputTputPerGpu', null); expect(result).toHaveLength(1); - // e2e uses median_e2el as x (from chart config default), not p99_ttft + // e2e uses median_e2el as x (from chart config default), not p90_ttft expect(result[0].x).toBe(2.5); }); - it('remaps x to TTFT for e2e chart when selectedXAxisMetric is p99_ttft', () => { - const data = [ - pt({ - x: 100, - tpPerGpu: { y: 42, roof: false }, - p99_ttft: 0.35, - median_e2el: 2.5, - } as any), - ]; - const result = processOverlayChartData(data, 'e2e', 'y_tpPerGpu', 'p99_ttft'); - expect(result).toHaveLength(1); - expect(result[0].x).toBe(0.35); - }); - - it('remaps x to TTFT for e2e chart when selectedXAxisMetric is median_ttft', () => { + it('remaps x to TTFT for e2e chart when selectedXAxisMetric is p90_ttft', () => { const data = [ pt({ x: 100, tpPerGpu: { y: 42, roof: false }, - median_ttft: 0.12, + p90_ttft: 0.12, median_e2el: 2.5, } as any), ]; - const result = processOverlayChartData(data, 'e2e', 'y_tpPerGpu', 'median_ttft'); + const result = processOverlayChartData(data, 'e2e', 'y_tpPerGpu', 'p90_ttft'); expect(result).toHaveLength(1); expect(result[0].x).toBe(0.12); }); it('filters e2e TTFT outliers exceeding y_latency_limit', () => { const data = [ - pt({ tpPerGpu: { y: 10, roof: false }, p99_ttft: 0.5, median_e2el: 1 } as any), - pt({ tpPerGpu: { y: 5, roof: false }, p99_ttft: 999, median_e2el: 2 } as any), + pt({ tpPerGpu: { y: 10, roof: false }, p90_ttft: 0.5, median_e2el: 1 } as any), + pt({ tpPerGpu: { y: 5, roof: false }, p90_ttft: 999, median_e2el: 2 } as any), ]; - const result = processOverlayChartData(data, 'e2e', 'y_tpPerGpu', 'p99_ttft'); + const result = processOverlayChartData(data, 'e2e', 'y_tpPerGpu', 'p90_ttft'); // y_latency_limit is 60 in the e2e chart config — the 999 outlier should be filtered expect(result).toHaveLength(1); expect(result[0].x).toBe(0.5); }); it('does not filter interactivity points by latency limit when x-axis is default', () => { - // Regression: selectedXAxisMetric defaults to 'p99_ttft' but the interactivity + // Regression: selectedXAxisMetric defaults to 'p90_ttft' but the interactivity // chart's x-axis stays median_intvty for non-input metrics. The latency limit // (60) must NOT apply to median_intvty values. const data = [ pt({ tpPerGpu: { y: 42, roof: false }, median_intvty: 200 } as any), pt({ tpPerGpu: { y: 10, roof: false }, median_intvty: 30 } as any), ]; - const result = processOverlayChartData(data, 'interactivity', 'y_tpPerGpu', 'p99_ttft'); + const result = processOverlayChartData(data, 'interactivity', 'y_tpPerGpu', 'p90_ttft'); expect(result).toHaveLength(2); }); it('applies latency limit on interactivity only when x-axis is actually overridden', () => { - // When an input metric IS selected and x-axis overrides to p99_ttft, + // When an input metric IS selected and x-axis overrides to p90_ttft, // the latency limit should apply. const data = [ - pt({ inputTputPerGpu: { y: 5, roof: false }, p99_ttft: 0.5, median_intvty: 10 } as any), - pt({ inputTputPerGpu: { y: 3, roof: false }, p99_ttft: 999, median_intvty: 20 } as any), + pt({ inputTputPerGpu: { y: 5, roof: false }, p90_ttft: 0.5, median_intvty: 10 } as any), + pt({ inputTputPerGpu: { y: 3, roof: false }, p90_ttft: 999, median_intvty: 20 } as any), ]; - const result = processOverlayChartData(data, 'interactivity', 'y_inputTputPerGpu', 'p99_ttft'); - // x-axis is overridden to p99_ttft for input metric — latency limit SHOULD filter 999 + const result = processOverlayChartData(data, 'interactivity', 'y_inputTputPerGpu', 'p90_ttft'); + // x-axis is overridden to p90_ttft for input metric — latency limit SHOULD filter 999 expect(result).toHaveLength(1); expect(result[0].x).toBe(0.5); }); diff --git a/packages/app/src/components/inference/utils.ts b/packages/app/src/components/inference/utils.ts index 4b5335b6..735007ab 100644 --- a/packages/app/src/components/inference/utils.ts +++ b/packages/app/src/components/inference/utils.ts @@ -88,8 +88,7 @@ export function processOverlayChartData( let xAxisField: string = chartDef.x; // selectedXAxisMetric is already the effective metric for this chart type // (interactivity uses selectedXAxisMetric, e2e uses selectedE2eXAxisMetric) - const isTtftOverride = - selectedXAxisMetric === 'p99_ttft' || selectedXAxisMetric === 'median_ttft'; + const isTtftOverride = selectedXAxisMetric === 'p90_ttft'; if (selectedXAxisMetric && chartDef.chartType === 'interactivity' && isInputMetric) { xAxisField = selectedXAxisMetric; diff --git a/packages/app/src/components/ui/chart-selectors.tsx b/packages/app/src/components/ui/chart-selectors.tsx index e30816fa..19b4bfb0 100644 --- a/packages/app/src/components/ui/chart-selectors.tsx +++ b/packages/app/src/components/ui/chart-selectors.tsx @@ -315,7 +315,7 @@ export function PercentileSelector({