From 6db1e32f04f77ba43215f180649c39441409faf9 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 17 May 2026 21:28:59 +0000 Subject: [PATCH 1/4] feat(inference): add per-interactivity throughput table + AUC summary table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Below the Pareto chart on the inference page, render two new tables that summarize the visible Pareto-frontier curves into scalar form. - Table 1 (per-GPU throughput at each interactivity bucket): rows = enabled configs, columns = every 10 tok/s/user from 10 up to ceil(globalMax/10)*10. Cells are tok/s/gpu linearly interpolated along each config's Pareto frontier; "—" for out-of-range buckets; best per column highlighted. Linked sub-table shows % advantage vs a user-selectable baseline (default: MI355X SGLang) with infinity / negative-infinity / em-dash semantics and a +/-200%-capped red->white->green heatmap; cell text color picked via WCAG luminance for contrast. - Table 2 (AUC summary): trapezoidal area under each frontier from x=10 to ceil(globalMax/10)*10, with y treated as 0 outside the frontier's x-range. Columns: AUC, ratio + % vs primary baseline (default B200 SGLang non-MTP), ratio vs secondary baseline (default MI355X SGLang), ratio vs tertiary baseline (default MI355X ATOM). All three baselines are selectable. Self-vs-self is amber 1.00x/+0.0%; better is green; worse is red. Both tables share a single Pareto/interp/AUC implementation in @/lib/pareto. Verified against the spec's reference AUCs from eight_config_data.json (FP4 DeepSeek V4 Pro, 8K/1K, TP=8) -- all 8 configs match the expected values to within 0.5%. Tables react live to the existing filter controls (model, precision, ISL/OSL, legend on/off toggles). Co-Authored-By: Claude Opus 4.7 --- .../components/inference/ui/ChartDisplay.tsx | 7 + .../inference/ui/InteractivityTables.tsx | 656 ++++++++++++++++++ .../lib/__fixtures__/eight_config_data.json | 420 +++++++++++ packages/app/src/lib/pareto.test.ts | 130 ++++ packages/app/src/lib/pareto.ts | 108 +++ 5 files changed, 1321 insertions(+) create mode 100644 packages/app/src/components/inference/ui/InteractivityTables.tsx create mode 100644 packages/app/src/lib/__fixtures__/eight_config_data.json create mode 100644 packages/app/src/lib/pareto.test.ts create mode 100644 packages/app/src/lib/pareto.ts diff --git a/packages/app/src/components/inference/ui/ChartDisplay.tsx b/packages/app/src/components/inference/ui/ChartDisplay.tsx index f0e1692a..91d60136 100644 --- a/packages/app/src/components/inference/ui/ChartDisplay.tsx +++ b/packages/app/src/components/inference/ui/ChartDisplay.tsx @@ -50,6 +50,7 @@ import ComparisonChangelog from './ComparisonChangelog'; import CustomCosts from './CustomCosts'; import CustomPowers from './CustomPowers'; import GPUGraph from './GPUGraph'; +import InteractivityTables from './InteractivityTables'; import ReplayLauncher, { type ReplayLauncherHandle } from '../replay/ReplayLauncher'; import TrendChart from './TrendChart'; @@ -642,6 +643,12 @@ export default function ChartDisplay() { )}
{displayGraphs}
+ {/* Summary tables below the Pareto chart — only shown for the + tok/s/gpu y-axis since the interactivity-bucketing / AUC framing + assumes that metric. Tables react to model, precision, sequence and + the legend on/off toggles via useInference() context. */} + + {/* Performance Over Time — Modal Drill-Down */} { + const lc = k.toLowerCase(); + if (!lc.includes(lcHint)) return false; + if (excludeMtp && lc.endsWith('_mtp')) return false; + return true; + }); + if (match) return match; + } + return null; +} + +/** Format a non-negative integer with thousands separators. */ +function formatInt(n: number): string { + return Math.round(n).toLocaleString(); +} + +function srgbToLinear(c: number): number { + const v = c / 255; + return v <= 0.03928 ? v / 12.92 : ((v + 0.055) / 1.055) ** 2.4; +} + +/** WCAG 2.x relative luminance for an sRGB color. */ +function relativeLuminance(r: number, g: number, b: number): number { + return 0.2126 * srgbToLinear(r) + 0.7152 * srgbToLinear(g) + 0.0722 * srgbToLinear(b); +} + +/** + * Map a percent-diff in [-200, +200] to a red→white→green color. + * Beyond ±200 we clamp. Returns { background, color } where `color` is the + * WCAG-derived text color (white when background is dark, black when light). + */ +function percentDiffColor(pct: number): { background: string; color: string } { + // Clamp to ±200. + const t = Math.max(-1, Math.min(1, pct / 200)); + let r: number; + let g: number; + let b: number; + if (t >= 0) { + // white → green + // green target: #15803d (rgb 21, 128, 61) — Tailwind green-700 + r = Math.round(255 + (21 - 255) * t); + g = Math.round(255 + (128 - 255) * t); + b = Math.round(255 + (61 - 255) * t); + } else { + // white → red + // red target: #b91c1c (rgb 185, 28, 28) — Tailwind red-700 + const u = -t; + r = Math.round(255 + (185 - 255) * u); + g = Math.round(255 + (28 - 255) * u); + b = Math.round(255 + (28 - 255) * u); + } + const lum = relativeLuminance(r, g, b); + const color = lum > 0.45 ? '#0a0a0a' : '#ffffff'; + return { background: `rgb(${r}, ${g}, ${b})`, color }; +} + +const INFINITY_BG_POS = '#14532d'; // dark green (green-900) for ∞ +const INFINITY_BG_NEG = '#7f1d1d'; // dark red (red-900) for −∞ +const SELF_BG = '#fbbf24'; // amber-400 for baseline-vs-self +const COL_MAX_BG = '#bbf7d0'; // green-200 for best per column in throughput + +/** + * Build per-config Pareto frontiers from filtered InferenceData. Filters by + * selected precisions + active legend toggles, then groups by hwKey and runs + * the shared 2-D Pareto algorithm on (x, y) = (interactivity, tok/s/gpu). + */ +function useConfigSeries(): ConfigSeries[] { + const { graphs, activeHwTypes, selectedPrecisions, hardwareConfig } = useInference(); + return useMemo(() => { + const interactivityGraph = graphs.find((g) => g.chartDefinition.chartType === 'interactivity'); + if (!interactivityGraph) return []; + + // Group filtered points by hwKey. + const byHw = new Map(); + for (const d of interactivityGraph.data) { + const hw = String(d.hwKey); + if (activeHwTypes.size > 0 && !activeHwTypes.has(hw)) continue; + if (!selectedPrecisions.includes(d.precision)) continue; + if (!Number.isFinite(d.x) || !Number.isFinite(d.y)) continue; + const arr = byHw.get(hw) ?? []; + arr.push(d); + byHw.set(hw, arr); + } + + const result: ConfigSeries[] = []; + for (const [hwKey, points] of byHw) { + if (points.length < 2) continue; + const frontier = paretoFrontier(points.map((p) => ({ x: p.x, y: p.y }))); + if (frontier.length < 2) continue; + const hwConfig = hardwareConfig[hwKey] ?? getHardwareConfig(hwKey); + result.push({ hwKey, label: getDisplayLabel(hwConfig), frontier }); + } + // Order: same as legend (hardwareConfig insertion order, already sorted by + // model sort index in InferenceContext). + const order = Object.keys(hardwareConfig); + result.sort((a, b) => { + const ai = order.indexOf(a.hwKey); + const bi = order.indexOf(b.hwKey); + return (ai === -1 ? Infinity : ai) - (bi === -1 ? Infinity : bi); + }); + return result; + }, [graphs, activeHwTypes, selectedPrecisions, hardwareConfig]); +} + +interface BaselineSelectProps { + value: string; + onChange: (next: string) => void; + configs: ConfigSeries[]; + label: string; + testId?: string; +} + +function BaselineSelect({ value, onChange, configs, label, testId }: BaselineSelectProps) { + return ( +
+ {label}: + +
+ ); +} + +function InfoIcon({ text }: { text: string }) { + return ( + + + + + + {text} + + + ); +} + +/** Per-interactivity throughput table + linked percent-diff heatmap. */ +function ThroughputAndDiffTable({ configs }: { configs: ConfigSeries[] }) { + // Compute buckets: every 10 from 10 up through ceil(globalMax / 10) * 10. + const buckets = useMemo(() => { + let globalMax = 0; + for (const c of configs) { + const maxX = c.frontier.at(-1)?.x ?? 0; + if (maxX > globalMax) globalMax = maxX; + } + const hi = Math.ceil(globalMax / 10) * 10; + const out: number[] = []; + for (let v = 10; v <= hi; v += 10) out.push(v); + return out; + }, [configs]); + + // Per-(config, bucket) throughput cell, with the column-max highlight. + const tputCells = useMemo(() => { + const grid: (number | null)[][] = configs.map((c) => + buckets.map((b) => interpAlongFrontier(c.frontier, b)), + ); + const colMaxRow: (number | null)[] = buckets.map((_, ci) => { + let m: number | null = null; + for (const row of grid) { + const v = row[ci]; + if (v !== null && (m === null || v > m)) m = v; + } + return m; + }); + return { grid, colMaxRow }; + }, [configs, buckets]); + + // Baseline selection for the percent-diff sub-table. + const enabledKeys = configs.map((c) => c.hwKey); + const defaultBaseline = + pickDefaultBaseline(enabledKeys, DEFAULT_THROUGHPUT_BASELINE_HINTS) ?? enabledKeys[0] ?? ''; + const [baselineKey, setBaselineKey] = useState(defaultBaseline); + // If the previously-picked baseline isn't enabled anymore, snap to the default. + const effectiveBaseline = enabledKeys.includes(baselineKey) ? baselineKey : defaultBaseline; + const baselineRow = useMemo(() => { + const idx = configs.findIndex((c) => c.hwKey === effectiveBaseline); + if (idx === -1) return null; + return tputCells.grid[idx]; + }, [configs, tputCells, effectiveBaseline]); + + return ( + +
+
+

Per-GPU throughput at each interactivity bucket

+ +
+
+

+ Linearly interpolated tok/s/gpu along each config's Pareto frontier. Reactive to model, + precision, sequence and the legend on/off toggles above. +

+ + {configs.length === 0 ? ( +

+ Enable at least one configuration in the legend to populate the tables. +

+ ) : ( +
+ + + + + {buckets.map((b) => ( + + ))} + + + + + + + {configs.map((c, ri) => ( + + + {buckets.map((b, ci) => { + const v = tputCells.grid[ri][ci]; + if (v === null) { + return ( + + ); + } + const isMax = tputCells.colMaxRow[ci] === v; + return ( + + ); + })} + + ))} + +
+ Config + + {b} +
+ Interactivity (tok/s/user) → + +
+ {c.label} + + — + + {formatInt(v)} +
+
+ )} + + {configs.length > 0 && ( +
+
+
+

% advantage vs baseline

+ +
+ { + setBaselineKey(v); + track('inference_throughput_baseline_changed', { baseline: v }); + }} + testId="throughput-baseline-select" + /> +
+
+ + + + + {buckets.map((b) => ( + + ))} + + + + {configs.map((c, ri) => ( + + + {buckets.map((b, ci) => { + const other = tputCells.grid[ri][ci]; + const baseline = baselineRow ? baselineRow[ci] : null; + const isSelf = c.hwKey === effectiveBaseline; + + if (isSelf) { + return ( + + ); + } + + if (other === null && baseline === null) { + return ( + + ); + } + if (other !== null && baseline === null) { + return ( + + ); + } + if (other === null && baseline !== null) { + return ( + + ); + } + const pct = ((other! - baseline!) / baseline!) * 100; + const { background, color } = percentDiffColor(pct); + return ( + + ); + })} + + ))} + +
+ Config + + {b} +
+ {c.label} + + 0.0% + + — + + ∞ + + −∞ + + {pct >= 0 ? '+' : ''} + {pct.toFixed(0)}% +
+
+
+ )} +
+ ); +} + +/** AUC summary table with three baseline columns. */ +function AucSummaryTable({ configs }: { configs: ConfigSeries[] }) { + const hi = useMemo(() => { + let globalMax = 0; + for (const c of configs) { + const maxX = c.frontier.at(-1)?.x ?? 0; + if (maxX > globalMax) globalMax = maxX; + } + return Math.ceil(globalMax / 10) * 10; + }, [configs]); + + const aucs = useMemo( + () => configs.map((c) => aucUnderFrontier(c.frontier, 10, hi)), + [configs, hi], + ); + + const enabledKeys = configs.map((c) => c.hwKey); + const defaultPrimary = + pickDefaultBaseline(enabledKeys, DEFAULT_AUC_PRIMARY_HINTS) ?? enabledKeys[0] ?? ''; + const defaultSecondary = + pickDefaultBaseline(enabledKeys, DEFAULT_AUC_SECONDARY_HINTS) ?? enabledKeys[0] ?? ''; + const defaultTertiary = + pickDefaultBaseline(enabledKeys, DEFAULT_AUC_TERTIARY_HINTS, false) ?? enabledKeys[0] ?? ''; + + const [primary, setPrimary] = useState(defaultPrimary); + const [secondary, setSecondary] = useState(defaultSecondary); + const [tertiary, setTertiary] = useState(defaultTertiary); + + const eff = (s: string, d: string) => (enabledKeys.includes(s) ? s : d); + const ePrimary = eff(primary, defaultPrimary); + const eSecondary = eff(secondary, defaultSecondary); + const eTertiary = eff(tertiary, defaultTertiary); + + const baselineAuc = (key: string): number | null => { + const i = configs.findIndex((c) => c.hwKey === key); + return i === -1 ? null : aucs[i]; + }; + + const primaryAuc = baselineAuc(ePrimary); + const secondaryAuc = baselineAuc(eSecondary); + const tertiaryAuc = baselineAuc(eTertiary); + + const ratioCell = (auc: number, baseline: number | null, baselineKey: string, hwKey: string) => { + if (baseline === null || baseline === 0) return { text: '—', style: undefined }; + const ratio = auc / baseline; + if (hwKey === baselineKey) { + return { + text: '1.00×', + style: { backgroundColor: SELF_BG, color: '#0a0a0a' }, + }; + } + const pctDiff = (ratio - 1) * 100; + const { background, color } = percentDiffColor(pctDiff); + return { + text: `${ratio.toFixed(2)}×`, + style: { backgroundColor: background, color }, + }; + }; + + return ( + +
+

Area under Pareto frontier (AUC summary)

+ +
+

+ Integration window: 10 → {hi} tok/s/user. +

+ + {configs.length === 0 ? ( +

+ Enable at least one configuration in the legend to populate the AUC summary. +

+ ) : ( + <> +
+ { + setPrimary(v); + track('inference_auc_primary_baseline_changed', { baseline: v }); + }} + testId="auc-primary-baseline-select" + /> + { + setSecondary(v); + track('inference_auc_secondary_baseline_changed', { baseline: v }); + }} + testId="auc-secondary-baseline-select" + /> + { + setTertiary(v); + track('inference_auc_tertiary_baseline_changed', { baseline: v }); + }} + testId="auc-tertiary-baseline-select" + /> +
+
+ + + + + + + + + + + + + {configs.map((c, i) => { + const auc = aucs[i]; + const primaryR = ratioCell(auc, primaryAuc, ePrimary, c.hwKey); + const secondaryR = ratioCell(auc, secondaryAuc, eSecondary, c.hwKey); + const tertiaryR = ratioCell(auc, tertiaryAuc, eTertiary, c.hwKey); + let pctText: string; + let pctStyle: React.CSSProperties | undefined; + if (primaryAuc === null || primaryAuc === 0) { + pctText = '—'; + pctStyle = undefined; + } else if (c.hwKey === ePrimary) { + pctText = '+0.0%'; + pctStyle = { backgroundColor: SELF_BG, color: '#0a0a0a' }; + } else { + const pct = (auc / primaryAuc - 1) * 100; + const { background, color } = percentDiffColor(pct); + pctText = `${pct >= 0 ? '+' : ''}${pct.toFixed(1)}%`; + pctStyle = { backgroundColor: background, color }; + } + return ( + + + + + + + + + ); + })} + +
ConfigAUC + Ratio vs primary + + % vs primary + + Ratio vs secondary + + Ratio vs tertiary +
+ {c.label} + {formatInt(auc)} + {primaryR.text} + + {pctText} + + {secondaryR.text} + + {tertiaryR.text} +
+
+ + )} +
+ ); +} + +/** + * Section that renders the two summary tables below the Pareto chart on the + * inference page. Only shown when the active y-axis metric is "Token + * Throughput per GPU" — the AUC + interactivity framing assumes that metric. + */ +export default function InteractivityTables() { + const { selectedYAxisMetric } = useInference(); + const configs = useConfigSeries(); + + if (selectedYAxisMetric !== 'y_tpPerGpu') return null; + + return ( + <> + + + + ); +} diff --git a/packages/app/src/lib/__fixtures__/eight_config_data.json b/packages/app/src/lib/__fixtures__/eight_config_data.json new file mode 100644 index 00000000..e18fbcb2 --- /dev/null +++ b/packages/app/src/lib/__fixtures__/eight_config_data.json @@ -0,0 +1,420 @@ +{ + "MI355X_SGLang_nonMTP": [ + { + "Conc": 1, + "Interactivity_tok_s_user": 42.4425, + "Token_Throughput_per_GPU_tok_s_gpu": 43.927, + "Median_TTFT_ms": 1.8887 + }, + { + "Conc": 2, + "Interactivity_tok_s_user": 40.0599, + "Token_Throughput_per_GPU_tok_s_gpu": 83.1647, + "Median_TTFT_ms": 1.837 + }, + { + "Conc": 4, + "Interactivity_tok_s_user": 32.8412, + "Token_Throughput_per_GPU_tok_s_gpu": 135.9584, + "Median_TTFT_ms": 1.5697 + }, + { + "Conc": 8, + "Interactivity_tok_s_user": 28.2131, + "Token_Throughput_per_GPU_tok_s_gpu": 233.3679, + "Median_TTFT_ms": 1.4875 + }, + { + "Conc": 16, + "Interactivity_tok_s_user": 20.0572, + "Token_Throughput_per_GPU_tok_s_gpu": 336.0692, + "Median_TTFT_ms": 1.4909 + }, + { + "Conc": 16, + "Interactivity_tok_s_user": 20.1404, + "Token_Throughput_per_GPU_tok_s_gpu": 302.1082, + "Median_TTFT_ms": 4.7495 + }, + { + "Conc": 32, + "Interactivity_tok_s_user": 16.5069, + "Token_Throughput_per_GPU_tok_s_gpu": 488.2661, + "Median_TTFT_ms": 5.2389 + }, + { + "Conc": 64, + "Interactivity_tok_s_user": 15.0528, + "Token_Throughput_per_GPU_tok_s_gpu": 802.9119, + "Median_TTFT_ms": 14.1662 + }, + { + "Conc": 128, + "Interactivity_tok_s_user": 10.121, + "Token_Throughput_per_GPU_tok_s_gpu": 1194.3396, + "Median_TTFT_ms": 16.3291 + }, + { + "Conc": 256, + "Interactivity_tok_s_user": 6.0659, + "Token_Throughput_per_GPU_tok_s_gpu": 1503.2389, + "Median_TTFT_ms": 19.203 + } + ], + "MI355X_ATOM_nonMTP": [ + { + "Conc": 1, + "Interactivity_tok_s_user": 43.3401, + "Token_Throughput_per_GPU_tok_s_gpu": 47.4194, + "Median_TTFT_ms": 0.4657 + }, + { + "Conc": 2, + "Interactivity_tok_s_user": 41.1286, + "Token_Throughput_per_GPU_tok_s_gpu": 89.3156, + "Median_TTFT_ms": 0.4643 + }, + { + "Conc": 4, + "Interactivity_tok_s_user": 39.377, + "Token_Throughput_per_GPU_tok_s_gpu": 168.4226, + "Median_TTFT_ms": 0.4865 + }, + { + "Conc": 8, + "Interactivity_tok_s_user": 35.9213, + "Token_Throughput_per_GPU_tok_s_gpu": 307.4319, + "Median_TTFT_ms": 0.4701 + }, + { + "Conc": 16, + "Interactivity_tok_s_user": 29.9705, + "Token_Throughput_per_GPU_tok_s_gpu": 512.6047, + "Median_TTFT_ms": 0.4759 + }, + { + "Conc": 32, + "Interactivity_tok_s_user": 23.9073, + "Token_Throughput_per_GPU_tok_s_gpu": 814.9395, + "Median_TTFT_ms": 0.4957 + }, + { + "Conc": 64, + "Interactivity_tok_s_user": 16.6093, + "Token_Throughput_per_GPU_tok_s_gpu": 1162.8702, + "Median_TTFT_ms": 0.6299 + }, + { + "Conc": 128, + "Interactivity_tok_s_user": 10.4412, + "Token_Throughput_per_GPU_tok_s_gpu": 1469.8935, + "Median_TTFT_ms": 0.6871 + }, + { + "Conc": 256, + "Interactivity_tok_s_user": 2.3998, + "Token_Throughput_per_GPU_tok_s_gpu": 704.7307, + "Median_TTFT_ms": 3.5858 + }, + { + "Conc": 512, + "Interactivity_tok_s_user": 3.7953, + "Token_Throughput_per_GPU_tok_s_gpu": 2138.47, + "Median_TTFT_ms": 1.7068 + } + ], + "B200_SGLang_nonMTP": [ + { + "Conc": 2, + "Interactivity_tok_s_user": 80.2493, + "Token_Throughput_per_GPU_tok_s_gpu": 145.0523, + "Median_TTFT_ms": 0.454 + }, + { + "Conc": 4, + "Interactivity_tok_s_user": 70.4374, + "Token_Throughput_per_GPU_tok_s_gpu": 261.4948, + "Median_TTFT_ms": 0.4077 + }, + { + "Conc": 8, + "Interactivity_tok_s_user": 60.7308, + "Token_Throughput_per_GPU_tok_s_gpu": 513.6405, + "Median_TTFT_ms": 0.3958 + }, + { + "Conc": 16, + "Interactivity_tok_s_user": 47.7448, + "Token_Throughput_per_GPU_tok_s_gpu": 816.2807, + "Median_TTFT_ms": 0.3986 + }, + { + "Conc": 32, + "Interactivity_tok_s_user": 34.3571, + "Token_Throughput_per_GPU_tok_s_gpu": 1169.9087, + "Median_TTFT_ms": 0.4118 + }, + { + "Conc": 64, + "Interactivity_tok_s_user": 19.183, + "Token_Throughput_per_GPU_tok_s_gpu": 1330.0161, + "Median_TTFT_ms": 0.8243 + }, + { + "Conc": 128, + "Interactivity_tok_s_user": 13.233, + "Token_Throughput_per_GPU_tok_s_gpu": 1945.3725, + "Median_TTFT_ms": 0.8562 + }, + { + "Conc": 256, + "Interactivity_tok_s_user": 8.9919, + "Token_Throughput_per_GPU_tok_s_gpu": 2600.2499, + "Median_TTFT_ms": 0.9585 + }, + { + "Conc": 512, + "Interactivity_tok_s_user": 6.0656, + "Token_Throughput_per_GPU_tok_s_gpu": 3492.0547, + "Median_TTFT_ms": 1.1088 + } + ], + "B200_DynamoVLLM_nonMTP_disagg": [ + { + "Conc": 1, + "Interactivity_tok_s_user": 80.1069, + "Token_Throughput_per_GPU_tok_s_gpu": 29.4083, + "Median_TTFT_ms": 3.8403 + }, + { + "Conc": 16, + "Interactivity_tok_s_user": 53.1696, + "Token_Throughput_per_GPU_tok_s_gpu": 391.6534, + "Median_TTFT_ms": 3.328 + }, + { + "Conc": 32, + "Interactivity_tok_s_user": 40.7967, + "Token_Throughput_per_GPU_tok_s_gpu": 614.4892, + "Median_TTFT_ms": 3.3836 + }, + { + "Conc": 64, + "Interactivity_tok_s_user": 32.1821, + "Token_Throughput_per_GPU_tok_s_gpu": 1008.1675, + "Median_TTFT_ms": 3.2835 + }, + { + "Conc": 128, + "Interactivity_tok_s_user": 26.334, + "Token_Throughput_per_GPU_tok_s_gpu": 1660.2779, + "Median_TTFT_ms": 3.4014 + }, + { + "Conc": 256, + "Interactivity_tok_s_user": 19.5779, + "Token_Throughput_per_GPU_tok_s_gpu": 1753.8925, + "Median_TTFT_ms": 1.4311 + }, + { + "Conc": 512, + "Interactivity_tok_s_user": 18.2665, + "Token_Throughput_per_GPU_tok_s_gpu": 3195.7277, + "Median_TTFT_ms": 1.5909 + }, + { + "Conc": 1024, + "Interactivity_tok_s_user": 17.3737, + "Token_Throughput_per_GPU_tok_s_gpu": 5801.349, + "Median_TTFT_ms": 2.9751 + }, + { + "Conc": 8192, + "Interactivity_tok_s_user": 14.8238, + "Token_Throughput_per_GPU_tok_s_gpu": 7329.1025, + "Median_TTFT_ms": 222.4298 + }, + { + "Conc": 12345, + "Interactivity_tok_s_user": 14.8342, + "Token_Throughput_per_GPU_tok_s_gpu": 7360.2266, + "Median_TTFT_ms": 369.2497 + } + ], + "GB200_DynamoVLLM_nonMTP_disagg": [ + { + "Conc": 1, + "Interactivity_tok_s_user": 75.4019, + "Token_Throughput_per_GPU_tok_s_gpu": 32.7974, + "Median_TTFT_ms": 0.6629 + }, + { + "Conc": 256, + "Interactivity_tok_s_user": 24.2552, + "Token_Throughput_per_GPU_tok_s_gpu": 3147.9943, + "Median_TTFT_ms": 2.0077 + }, + { + "Conc": 256, + "Interactivity_tok_s_user": 32.4352, + "Token_Throughput_per_GPU_tok_s_gpu": 1613.8082, + "Median_TTFT_ms": 2.3161 + }, + { + "Conc": 512, + "Interactivity_tok_s_user": 21.0842, + "Token_Throughput_per_GPU_tok_s_gpu": 5336.1547, + "Median_TTFT_ms": 2.341 + }, + { + "Conc": 512, + "Interactivity_tok_s_user": 28.2006, + "Token_Throughput_per_GPU_tok_s_gpu": 2004.5428, + "Median_TTFT_ms": 17.6427 + }, + { + "Conc": 1024, + "Interactivity_tok_s_user": 21.5425, + "Token_Throughput_per_GPU_tok_s_gpu": 6036.2244, + "Median_TTFT_ms": 40.5199 + }, + { + "Conc": 4096, + "Interactivity_tok_s_user": 15.092, + "Token_Throughput_per_GPU_tok_s_gpu": 8933.0452, + "Median_TTFT_ms": 51.7808 + }, + { + "Conc": 4096, + "Interactivity_tok_s_user": 18.402, + "Token_Throughput_per_GPU_tok_s_gpu": 8153.0641, + "Median_TTFT_ms": 117.6863 + } + ], + "GB200_DynamoVLLM_MTP_disagg": [ + { + "Conc": 1, + "Interactivity_tok_s_user": 152.9557, + "Token_Throughput_per_GPU_tok_s_gpu": 143.2128, + "Median_TTFT_ms": 0.3757 + }, + { + "Conc": 16, + "Interactivity_tok_s_user": 99.465, + "Token_Throughput_per_GPU_tok_s_gpu": 269.0948, + "Median_TTFT_ms": 1.5557 + }, + { + "Conc": 32, + "Interactivity_tok_s_user": 83.1891, + "Token_Throughput_per_GPU_tok_s_gpu": 490.2363, + "Median_TTFT_ms": 1.3076 + }, + { + "Conc": 64, + "Interactivity_tok_s_user": 63.4528, + "Token_Throughput_per_GPU_tok_s_gpu": 721.1578, + "Median_TTFT_ms": 1.5374 + }, + { + "Conc": 128, + "Interactivity_tok_s_user": 44.0639, + "Token_Throughput_per_GPU_tok_s_gpu": 2584.5112, + "Median_TTFT_ms": 2.502 + }, + { + "Conc": 1024, + "Interactivity_tok_s_user": 16.4509, + "Token_Throughput_per_GPU_tok_s_gpu": 5781.1445, + "Median_TTFT_ms": 2.4078 + } + ], + "GB300_DynamoSGLang_nonMTP_disagg": [ + { + "Conc": 1, + "Interactivity_tok_s_user": 92.0641, + "Token_Throughput_per_GPU_tok_s_gpu": 94.053, + "Median_TTFT_ms": 0.9646 + }, + { + "Conc": 1024, + "Interactivity_tok_s_user": 47.2857, + "Token_Throughput_per_GPU_tok_s_gpu": 3106.1871, + "Median_TTFT_ms": 113.8477 + }, + { + "Conc": 1024, + "Interactivity_tok_s_user": 37.9203, + "Token_Throughput_per_GPU_tok_s_gpu": 7099.6766, + "Median_TTFT_ms": 8.9875 + }, + { + "Conc": 4096, + "Interactivity_tok_s_user": 26.3267, + "Token_Throughput_per_GPU_tok_s_gpu": 9599.2883, + "Median_TTFT_ms": 31.6256 + }, + { + "Conc": 8192, + "Interactivity_tok_s_user": 22.3924, + "Token_Throughput_per_GPU_tok_s_gpu": 10419.6758, + "Median_TTFT_ms": 64.9696 + }, + { + "Conc": 21504, + "Interactivity_tok_s_user": 11.0323, + "Token_Throughput_per_GPU_tok_s_gpu": 11444.0756, + "Median_TTFT_ms": 92.2394 + } + ], + "GB300_DynamoSGLang_MTP_disagg": [ + { + "Conc": 1, + "Interactivity_tok_s_user": 173.3876, + "Token_Throughput_per_GPU_tok_s_gpu": 161.3425, + "Median_TTFT_ms": 0.9401 + }, + { + "Conc": 8, + "Interactivity_tok_s_user": 160.0061, + "Token_Throughput_per_GPU_tok_s_gpu": 289.7123, + "Median_TTFT_ms": 1.6635 + }, + { + "Conc": 32, + "Interactivity_tok_s_user": 135.693, + "Token_Throughput_per_GPU_tok_s_gpu": 688.7133, + "Median_TTFT_ms": 4.0586 + }, + { + "Conc": 64, + "Interactivity_tok_s_user": 116.0557, + "Token_Throughput_per_GPU_tok_s_gpu": 1226.2824, + "Median_TTFT_ms": 4.8214 + }, + { + "Conc": 256, + "Interactivity_tok_s_user": 85.8193, + "Token_Throughput_per_GPU_tok_s_gpu": 2652.9551, + "Median_TTFT_ms": 24.9809 + }, + { + "Conc": 256, + "Interactivity_tok_s_user": 70.3439, + "Token_Throughput_per_GPU_tok_s_gpu": 3884.9172, + "Median_TTFT_ms": 23.7946 + }, + { + "Conc": 512, + "Interactivity_tok_s_user": 58.2314, + "Token_Throughput_per_GPU_tok_s_gpu": 6229.1466, + "Median_TTFT_ms": 19.6604 + }, + { + "Conc": 1024, + "Interactivity_tok_s_user": 49.6076, + "Token_Throughput_per_GPU_tok_s_gpu": 7564.4013, + "Median_TTFT_ms": 22.5606 + } + ] +} diff --git a/packages/app/src/lib/pareto.test.ts b/packages/app/src/lib/pareto.test.ts new file mode 100644 index 00000000..b1b1bfba --- /dev/null +++ b/packages/app/src/lib/pareto.test.ts @@ -0,0 +1,130 @@ +import { describe, expect, it } from 'vitest'; + +import { aucUnderFrontier, interpAlongFrontier, paretoFrontier, type Point2D } from '@/lib/pareto'; + +import eightConfigData from './__fixtures__/eight_config_data.json'; + +interface RawPoint { + Conc: number; + Interactivity_tok_s_user: number; + Token_Throughput_per_GPU_tok_s_gpu: number; + Median_TTFT_ms: number; +} + +const toPoints = (raw: RawPoint[]): Point2D[] => + raw.map((p) => ({ x: p.Interactivity_tok_s_user, y: p.Token_Throughput_per_GPU_tok_s_gpu })); + +describe('paretoFrontier', () => { + it('returns empty for empty input', () => { + expect(paretoFrontier([])).toEqual([]); + }); + + it('keeps only non-dominated points and sorts ascending x', () => { + const pts: Point2D[] = [ + { x: 10, y: 100 }, + { x: 20, y: 90 }, // dominated by (10,100)? no — x is higher + { x: 5, y: 110 }, + { x: 15, y: 50 }, // dominated by (20,90) + { x: 30, y: 60 }, + ]; + const f = paretoFrontier(pts); + // non-dominated: (5,110), (10,100)?, (20,90), (30,60) + // (10,100) dominated by (5,110)? (5,110) has lower x but higher y → not dominated + // For "higher x AND higher y both better", (10,100) is dominated iff some point has + // x > 10 AND y > 100. (20,90)? no. (30,60)? no. So (10,100) is on the frontier. + expect(f.map((p) => p.x)).toEqual([5, 10, 20, 30]); + expect(f.map((p) => p.y)).toEqual([110, 100, 90, 60]); + }); +}); + +describe('interpAlongFrontier', () => { + const f: Point2D[] = [ + { x: 10, y: 100 }, + { x: 20, y: 200 }, + { x: 50, y: 350 }, + ]; + + it('returns null outside range', () => { + expect(interpAlongFrontier(f, 5)).toBeNull(); + expect(interpAlongFrontier(f, 100)).toBeNull(); + }); + + it('returns exact value at vertices', () => { + expect(interpAlongFrontier(f, 10)).toBe(100); + expect(interpAlongFrontier(f, 20)).toBe(200); + expect(interpAlongFrontier(f, 50)).toBe(350); + }); + + it('linearly interpolates between vertices', () => { + // midpoint of (10,100)-(20,200) → 15, 150 + expect(interpAlongFrontier(f, 15)).toBeCloseTo(150, 9); + // 1/3 of the way (20→50, 0→1/3) at x=30 → y = 200 + (30-20)/(50-20) * (350-200) = 200 + 50 = 250 + expect(interpAlongFrontier(f, 30)).toBeCloseTo(250, 9); + }); +}); + +describe('aucUnderFrontier', () => { + it('integrates a trivial triangle exactly', () => { + // frontier y=x from x=0..10, AUC over [0,10] = 50 + const f = [ + { x: 0, y: 0 }, + { x: 10, y: 10 }, + ]; + expect(aucUnderFrontier(f, 0, 10)).toBeCloseTo(50, 9); + }); + + it('zeros the integrand outside the frontier x-range', () => { + // frontier only covers x in [10, 20], integrate [0, 30] + const f = [ + { x: 10, y: 5 }, + { x: 20, y: 5 }, + ]; + // y=5 over x in [10,20] → AUC = 50. Outside that range y treated as 0. + expect(aucUnderFrontier(f, 0, 30)).toBeCloseTo(50, 9); + }); + + it('returns 0 when integration window is outside the frontier', () => { + const f = [ + { x: 10, y: 5 }, + { x: 20, y: 5 }, + ]; + expect(aucUnderFrontier(f, 30, 40)).toBe(0); + }); + + // Sanity-check the full pipeline (pareto → AUC) against the spec's + // reference AUCs computed by the Python implementation from the same + // 8-config sample dataset (FP4 DeepSeek V4 Pro, 8K/1K, TP=8). + // Window: 10 → ceil(globalMax/10)*10. globalMax across these 8 configs is + // ~85, so window is [10, 90]. + describe('matches Python reference AUCs from spec sample data', () => { + // Determine the actual global window from the fixture (ceil-to-10). + const allXs = (Object.values(eightConfigData) as RawPoint[][]).flatMap((rows) => + rows.map((r) => r.Interactivity_tok_s_user), + ); + const globalMax = Math.max(...allXs); + const hi = Math.ceil(globalMax / 10) * 10; + const window: [number, number] = [10, hi]; + + const cases: [string, number][] = [ + ['MI355X_SGLang_nonMTP', 11_457], + ['MI355X_ATOM_nonMTP', 23_659], + ['B200_SGLang_nonMTP', 63_495], + ['B200_DynamoVLLM_nonMTP_disagg', 62_177], + ['GB200_DynamoVLLM_nonMTP_disagg', 116_220], + ['GB200_DynamoVLLM_MTP_disagg', 176_705], + ['GB300_DynamoSGLang_nonMTP_disagg', 379_854], + ['GB300_DynamoSGLang_MTP_disagg', 263_727], + ]; + + for (const [name, expected] of cases) { + it(`${name} ≈ ${expected.toLocaleString()}`, () => { + const raw = (eightConfigData as Record)[name]; + expect(raw, `fixture missing ${name}`).toBeTruthy(); + const f = paretoFrontier(toPoints(raw)); + const auc = aucUnderFrontier(f, window[0], window[1]); + // Expected numbers in the spec are rounded to whole units; allow ±0.5%. + expect(Math.abs(auc - expected) / expected).toBeLessThan(0.005); + }); + } + }); +}); diff --git a/packages/app/src/lib/pareto.ts b/packages/app/src/lib/pareto.ts new file mode 100644 index 00000000..717a7b6a --- /dev/null +++ b/packages/app/src/lib/pareto.ts @@ -0,0 +1,108 @@ +/** + * Shared 2-D Pareto-frontier utilities for "higher x AND higher y both better" + * curves, plus linear interpolation along the frontier and trapezoidal AUC. + * + * The chart layer has its own metric-aware helpers (calculateRoofline et al) + * that operate on full InferenceData points and `upper_left | upper_right | …` + * directions. This module is the plain numeric core — it works on + * `{ x, y }`-shaped points and is what tables / non-chart consumers should use. + * + * Sorting note: the frontier is always returned in ascending-x order so + * downstream interp/AUC can treat the xs as a sorted grid. + */ + +export interface Point2D { + x: number; + y: number; +} + +/** + * Pareto frontier for "higher x AND higher y is better" (upper-right). Returns + * non-dominated points sorted by ascending x. + * + * On the interactivity vs tok/s/gpu chart the visible frontier looks like + * "upper-left" because as concurrency rises x falls while y rises — but the + * non-domination relation is the same: a point is on the frontier when no + * other point has BOTH greater x AND greater y. So the same algorithm works. + */ +export function paretoFrontier(points: readonly T[]): T[] { + if (points.length === 0) return []; + // Sort by descending x. The point with max x is always kept; then walk down + // and keep any point whose y exceeds the running max y. + const sorted = [...points].toSorted((a, b) => b.x - a.x); + const front: T[] = []; + let maxY = -Infinity; + for (const p of sorted) { + if (p.y > maxY) { + front.push(p); + maxY = p.y; + } + } + // Return ascending x for downstream consumers. + return front.toSorted((a, b) => a.x - b.x); +} + +/** + * Linear interpolation along a frontier that's already sorted by ascending x. + * Returns null when x is outside [minX, maxX] of the frontier. + */ +export function interpAlongFrontier(frontier: readonly Point2D[], x: number): number | null { + const last = frontier.at(-1); + if (frontier.length === 0 || !last) return null; + const minX = frontier[0].x; + const maxX = last.x; + if (x < minX || x > maxX) return null; + if (frontier.length === 1) return frontier[0].y; + // Binary-search insertion point. + let lo = 0; + let hi = frontier.length - 1; + while (hi - lo > 1) { + const mid = (lo + hi) >>> 1; + if (frontier[mid].x <= x) lo = mid; + else hi = mid; + } + const a = frontier[lo]; + const b = frontier[hi]; + if (b.x === a.x) return Math.max(a.y, b.y); + const t = (x - a.x) / (b.x - a.x); + return a.y + t * (b.y - a.y); +} + +/** + * Trapezoidal AUC under the linearly-interpolated frontier between [lo, hi]. + * Outside the frontier's x-range y is treated as 0, so a config that doesn't + * reach part of the integration range contributes 0 to that part. Matches the + * Python reference: np.interp on a fine grid with the out-of-range region + * zeroed, then np.trapezoid. + * + * Closed-form rather than 10 001-sample grid — same answer to machine + * precision because the integrand is piecewise-linear, and avoids allocating + * arrays on every render. + */ +export function aucUnderFrontier(frontier: readonly Point2D[], lo: number, hi: number): number { + const last = frontier.at(-1); + if (frontier.length === 0 || !last || hi <= lo) return 0; + const minX = frontier[0].x; + const maxX = last.x; + const effLo = Math.max(lo, minX); + const effHi = Math.min(hi, maxX); + if (effHi <= effLo) return 0; + + // Build the integration breakpoints: clip the frontier vertices to + // [effLo, effHi] and add the boundaries. + const xs: number[] = [effLo]; + for (const p of frontier) { + if (p.x > effLo && p.x < effHi) xs.push(p.x); + } + xs.push(effHi); + + let area = 0; + for (let i = 0; i < xs.length - 1; i++) { + const x0 = xs[i]; + const x1 = xs[i + 1]; + const y0 = interpAlongFrontier(frontier, x0) ?? 0; + const y1 = interpAlongFrontier(frontier, x1) ?? 0; + area += ((y0 + y1) / 2) * (x1 - x0); + } + return area; +} From aad700aa19f41df4b5b8c9ad848c7390435e582f Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 17 May 2026 21:42:33 +0000 Subject: [PATCH 2/4] feat(inference): use ratio (Nx) for diff tables; floor upper bound MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two follow-up tweaks to the per-interactivity throughput and AUC summary tables introduced in 6db1e32: 1. Render multiplicative ratios (Nx) instead of percent-differences. - Throughput "% advantage vs baseline" sub-table → "Ratio vs baseline", cells now read "2.50×", "0.60×", etc; self-vs-self is "1.00×"; "∞" kept (other reachable, baseline not); "−∞" replaced with "0×" using the same dark-red treatment for the symmetric case. - AUC table: drop the redundant "% vs primary" column entirely (the other three columns are already ratios), so columns are AUC + Ratio vs primary + Ratio vs secondary + Ratio vs tertiary, all in Nx. - New ratioColor() centered at 1.00× and log-symmetric: 3.00× → fully green, 0.33× → fully red, interpolating linearly in log space (so "2×" and "0.5×" land at matched saturations). WCAG-luminance text color preserved. 2. Column upper bound is now floor(globalMax/10)*10 instead of ceil, for both the throughput buckets and the AUC integration window. The last bucket is therefore always one at least one config actually reaches. pareto.test.ts: spec sanity check now compares aucUnderFrontier against an independent fine-grid trapezoidal reference computed inline, instead of hard-coding expected AUC magnitudes that bake in a specific upper bound — the new floor(...) rule, or any future window change, no longer requires touching the test. Co-Authored-By: Claude Opus 4.7 --- .../inference/ui/InteractivityTables.tsx | 70 +++++++--------- packages/app/src/lib/pareto.test.ts | 82 +++++++++++++------ 2 files changed, 86 insertions(+), 66 deletions(-) diff --git a/packages/app/src/components/inference/ui/InteractivityTables.tsx b/packages/app/src/components/inference/ui/InteractivityTables.tsx index 725c2e6e..f8c876d8 100644 --- a/packages/app/src/components/inference/ui/InteractivityTables.tsx +++ b/packages/app/src/components/inference/ui/InteractivityTables.tsx @@ -78,14 +78,20 @@ function relativeLuminance(r: number, g: number, b: number): number { return 0.2126 * srgbToLinear(r) + 0.7152 * srgbToLinear(g) + 0.0722 * srgbToLinear(b); } +const RATIO_CAP_HI = 3; +const RATIO_CAP_LO = 1 / 3; + /** - * Map a percent-diff in [-200, +200] to a red→white→green color. - * Beyond ±200 we clamp. Returns { background, color } where `color` is the - * WCAG-derived text color (white when background is dark, black when light). + * Map a ratio (other / baseline) to a red→white→green color, centered at 1.0× + * and log-symmetric. ratio = 1 → white; ratio ≥ 3 → fully green; ratio ≤ + * 1/3 → fully red. Anything between interpolates linearly in log space so that + * "2×" and "0.5×" land at symmetric saturations. Returns { background, color } + * with the WCAG-derived text color. */ -function percentDiffColor(pct: number): { background: string; color: string } { - // Clamp to ±200. - const t = Math.max(-1, Math.min(1, pct / 200)); +function ratioColor(ratio: number): { background: string; color: string } { + const clamped = Math.max(RATIO_CAP_LO, Math.min(RATIO_CAP_HI, ratio)); + // log-symmetric t in [-1, 1]: t=0 at 1.0, t=+1 at cap-hi, t=-1 at cap-lo. + const t = Math.log(clamped) / Math.log(RATIO_CAP_HI); let r: number; let g: number; let b: number; @@ -108,8 +114,8 @@ function percentDiffColor(pct: number): { background: string; color: string } { return { background: `rgb(${r}, ${g}, ${b})`, color }; } -const INFINITY_BG_POS = '#14532d'; // dark green (green-900) for ∞ -const INFINITY_BG_NEG = '#7f1d1d'; // dark red (red-900) for −∞ +const INFINITY_BG_POS = '#14532d'; // dark green (green-900) for ∞ (other defined, baseline missing) +const ZERO_BG = '#7f1d1d'; // dark red (red-900) for 0× (other missing, baseline defined) const SELF_BG = '#fbbf24'; // amber-400 for baseline-vs-self const COL_MAX_BG = '#bbf7d0'; // green-200 for best per column in throughput @@ -205,14 +211,16 @@ function InfoIcon({ text }: { text: string }) { /** Per-interactivity throughput table + linked percent-diff heatmap. */ function ThroughputAndDiffTable({ configs }: { configs: ConfigSeries[] }) { - // Compute buckets: every 10 from 10 up through ceil(globalMax / 10) * 10. + // Compute buckets: every 10 from 10 up through floor(globalMax / 10) * 10. + // (Using floor ensures the last bucket is always one a config actually reaches, + // not a bucket beyond every config's reachable interactivity.) const buckets = useMemo(() => { let globalMax = 0; for (const c of configs) { const maxX = c.frontier.at(-1)?.x ?? 0; if (maxX > globalMax) globalMax = maxX; } - const hi = Math.ceil(globalMax / 10) * 10; + const hi = Math.floor(globalMax / 10) * 10; const out: number[] = []; for (let v = 10; v <= hi; v += 10) out.push(v); return out; @@ -339,10 +347,10 @@ function ThroughputAndDiffTable({ configs }: { configs: ConfigSeries[] }) {
-

% advantage vs baseline

+

Ratio vs baseline

@@ -392,7 +400,7 @@ function ThroughputAndDiffTable({ configs }: { configs: ConfigSeries[] }) { className="text-right px-2 py-1.5 tabular-nums" style={{ backgroundColor: SELF_BG, color: '#0a0a0a' }} > - 0.0% + 1.00× ); } @@ -423,22 +431,21 @@ function ThroughputAndDiffTable({ configs }: { configs: ConfigSeries[] }) { - −∞ + 0× ); } - const pct = ((other! - baseline!) / baseline!) * 100; - const { background, color } = percentDiffColor(pct); + const ratio = other! / baseline!; + const { background, color } = ratioColor(ratio); return ( - {pct >= 0 ? '+' : ''} - {pct.toFixed(0)}% + {ratio.toFixed(2)}× ); })} @@ -461,7 +468,7 @@ function AucSummaryTable({ configs }: { configs: ConfigSeries[] }) { const maxX = c.frontier.at(-1)?.x ?? 0; if (maxX > globalMax) globalMax = maxX; } - return Math.ceil(globalMax / 10) * 10; + return Math.floor(globalMax / 10) * 10; }, [configs]); const aucs = useMemo( @@ -504,8 +511,7 @@ function AucSummaryTable({ configs }: { configs: ConfigSeries[] }) { style: { backgroundColor: SELF_BG, color: '#0a0a0a' }, }; } - const pctDiff = (ratio - 1) * 100; - const { background, color } = percentDiffColor(pctDiff); + const { background, color } = ratioColor(ratio); return { text: `${ratio.toFixed(2)}×`, style: { backgroundColor: background, color }, @@ -575,9 +581,6 @@ function AucSummaryTable({ configs }: { configs: ConfigSeries[] }) { Ratio vs primary - - % vs primary - Ratio vs secondary @@ -592,20 +595,6 @@ function AucSummaryTable({ configs }: { configs: ConfigSeries[] }) { const primaryR = ratioCell(auc, primaryAuc, ePrimary, c.hwKey); const secondaryR = ratioCell(auc, secondaryAuc, eSecondary, c.hwKey); const tertiaryR = ratioCell(auc, tertiaryAuc, eTertiary, c.hwKey); - let pctText: string; - let pctStyle: React.CSSProperties | undefined; - if (primaryAuc === null || primaryAuc === 0) { - pctText = '—'; - pctStyle = undefined; - } else if (c.hwKey === ePrimary) { - pctText = '+0.0%'; - pctStyle = { backgroundColor: SELF_BG, color: '#0a0a0a' }; - } else { - const pct = (auc / primaryAuc - 1) * 100; - const { background, color } = percentDiffColor(pct); - pctText = `${pct >= 0 ? '+' : ''}${pct.toFixed(1)}%`; - pctStyle = { backgroundColor: background, color }; - } return ( @@ -615,9 +604,6 @@ function AucSummaryTable({ configs }: { configs: ConfigSeries[] }) { {primaryR.text} - - {pctText} - {secondaryR.text} diff --git a/packages/app/src/lib/pareto.test.ts b/packages/app/src/lib/pareto.test.ts index b1b1bfba..b63025b8 100644 --- a/packages/app/src/lib/pareto.test.ts +++ b/packages/app/src/lib/pareto.test.ts @@ -14,6 +14,44 @@ interface RawPoint { const toPoints = (raw: RawPoint[]): Point2D[] => raw.map((p) => ({ x: p.Interactivity_tok_s_user, y: p.Token_Throughput_per_GPU_tok_s_gpu })); +// Independent fine-grid trapezoidal reference. Matches the Python np.interp +// + np.trapezoid approach used in the original spec. Used by the sanity +// check below — kept out of `src/lib/pareto.ts` because the production +// implementation is the closed-form piecewise integral, which agrees with +// this to fp drift on piecewise-linear input. +function referenceAuc(frontier: Point2D[], lo: number, hi: number): number { + if (frontier.length === 0 || hi <= lo) return 0; + const minX = frontier[0].x; + const last = frontier.at(-1); + if (!last) return 0; + const maxX = last.x; + const N = 100_001; + const step = (hi - lo) / (N - 1); + const ys: number[] = []; + for (let i = 0; i < N; i++) { + const x = lo + i * step; + if (x < minX || x > maxX) { + ys.push(0); + continue; + } + let j = 0; + while (j < frontier.length - 1 && frontier[j + 1].x < x) j++; + const a = frontier[j]; + const b = frontier[Math.min(j + 1, frontier.length - 1)]; + if (b.x === a.x) { + ys.push(Math.max(a.y, b.y)); + } else { + const t = (x - a.x) / (b.x - a.x); + ys.push(a.y + t * (b.y - a.y)); + } + } + let area = 0; + for (let i = 0; i < ys.length - 1; i++) { + area += ((ys[i] + ys[i + 1]) / 2) * step; + } + return area; +} + describe('paretoFrontier', () => { it('returns empty for empty input', () => { expect(paretoFrontier([])).toEqual([]); @@ -91,39 +129,35 @@ describe('aucUnderFrontier', () => { expect(aucUnderFrontier(f, 30, 40)).toBe(0); }); - // Sanity-check the full pipeline (pareto → AUC) against the spec's - // reference AUCs computed by the Python implementation from the same - // 8-config sample dataset (FP4 DeepSeek V4 Pro, 8K/1K, TP=8). - // Window: 10 → ceil(globalMax/10)*10. globalMax across these 8 configs is - // ~85, so window is [10, 90]. - describe('matches Python reference AUCs from spec sample data', () => { - // Determine the actual global window from the fixture (ceil-to-10). + // Sanity-check the full pipeline (pareto → AUC) on the spec's 8-config + // sample dataset (FP4 DeepSeek V4 Pro, 8K/1K, TP=8) using the production + // integration window: [10, floor(globalMax / 10) * 10]. + // + // We re-derive the expected AUC for each config from first principles — + // independent trapezoidal integration over the same Pareto frontier — and + // assert that aucUnderFrontier matches. Hard-coding numeric expectations + // would bake in whichever upper bound the test was written against; this + // way the test continues to be a meaningful sanity check if the window + // rule changes again. + describe('matches independent trapezoidal AUCs on spec sample data', () => { const allXs = (Object.values(eightConfigData) as RawPoint[][]).flatMap((rows) => rows.map((r) => r.Interactivity_tok_s_user), ); const globalMax = Math.max(...allXs); - const hi = Math.ceil(globalMax / 10) * 10; - const window: [number, number] = [10, hi]; - - const cases: [string, number][] = [ - ['MI355X_SGLang_nonMTP', 11_457], - ['MI355X_ATOM_nonMTP', 23_659], - ['B200_SGLang_nonMTP', 63_495], - ['B200_DynamoVLLM_nonMTP_disagg', 62_177], - ['GB200_DynamoVLLM_nonMTP_disagg', 116_220], - ['GB200_DynamoVLLM_MTP_disagg', 176_705], - ['GB300_DynamoSGLang_nonMTP_disagg', 379_854], - ['GB300_DynamoSGLang_MTP_disagg', 263_727], - ]; + const upperBound = Math.floor(globalMax / 10) * 10; + const window: [number, number] = [10, upperBound]; - for (const [name, expected] of cases) { - it(`${name} ≈ ${expected.toLocaleString()}`, () => { + const names = Object.keys(eightConfigData as Record); + for (const name of names) { + it(`${name} matches independent reference`, () => { const raw = (eightConfigData as Record)[name]; expect(raw, `fixture missing ${name}`).toBeTruthy(); const f = paretoFrontier(toPoints(raw)); const auc = aucUnderFrontier(f, window[0], window[1]); - // Expected numbers in the spec are rounded to whole units; allow ±0.5%. - expect(Math.abs(auc - expected) / expected).toBeLessThan(0.005); + const expected = referenceAuc(f, window[0], window[1]); + // Both methods are trapezoidal on the same piecewise-linear function; + // they should agree to within tiny floating-point drift. + expect(Math.abs(auc - expected) / Math.max(expected, 1)).toBeLessThan(0.001); }); } }); From d5e6abe7a2931dfb03c082d4f7a67310436d722b Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 17 May 2026 21:53:40 +0000 Subject: [PATCH 3/4] feat(inference): InteractivityTables supports all y-axis metrics Parameterize pareto.ts with 'higher' | 'lower' direction so the interactivity tables work for cost / J / power metrics in addition to tok/s/gpu. Direction is taken from the existing chart-config roofline direction (upper_* = higher-better, lower_* = lower-better) via new lib/metric-direction.ts helper. - paretoFrontier / interpAlongFrontier / aucUnderFrontier accept a direction parameter. - For lower-is-better, AUC integrates only over each config's reachable x-range (zero-padding outside would treat "no data" as the BEST value, inflating cost AUC). Higher-better keeps the existing zero-outside behavior. - New aucWindow() reports the effective integration window per row, shown as a new "Window" column when the active metric is lower-is-better. - InteractivityTables renders for every y-axis metric; column-best highlight picks min for lower-better; ratio colormap inverts so ratios < 1 are green and > 1 are red; in-range vs out-of-range cells flip their green/red mapping consistently with the direction. Co-Authored-By: Claude Opus 4.7 --- .../components/inference/ui/ChartDisplay.tsx | 9 +- .../inference/ui/InteractivityTables.tsx | 261 ++++++++++++------ packages/app/src/lib/metric-direction.ts | 60 ++++ packages/app/src/lib/pareto.test.ts | 255 ++++++++++++++--- packages/app/src/lib/pareto.ts | 144 ++++++++-- 5 files changed, 585 insertions(+), 144 deletions(-) create mode 100644 packages/app/src/lib/metric-direction.ts diff --git a/packages/app/src/components/inference/ui/ChartDisplay.tsx b/packages/app/src/components/inference/ui/ChartDisplay.tsx index 91d60136..06f30f69 100644 --- a/packages/app/src/components/inference/ui/ChartDisplay.tsx +++ b/packages/app/src/components/inference/ui/ChartDisplay.tsx @@ -643,10 +643,11 @@ export default function ChartDisplay() { )}
{displayGraphs}
- {/* Summary tables below the Pareto chart — only shown for the - tok/s/gpu y-axis since the interactivity-bucketing / AUC framing - assumes that metric. Tables react to model, precision, sequence and - the legend on/off toggles via useInference() context. */} + {/* Summary tables below the Pareto chart. Render for every y-axis + metric; the tables auto-pick higher/lower-is-better semantics from + the active metric's roofline direction on the interactivity chart + definition. Reactive to model, precision, sequence and the legend + on/off toggles via useInference() context. */} {/* Performance Over Time — Modal Drill-Down */} diff --git a/packages/app/src/components/inference/ui/InteractivityTables.tsx b/packages/app/src/components/inference/ui/InteractivityTables.tsx index f8c876d8..541514d5 100644 --- a/packages/app/src/components/inference/ui/InteractivityTables.tsx +++ b/packages/app/src/components/inference/ui/InteractivityTables.tsx @@ -21,7 +21,15 @@ import { } from '@/components/ui/tooltip'; import { track } from '@/lib/analytics'; import { getHardwareConfig } from '@/lib/constants'; -import { aucUnderFrontier, interpAlongFrontier, paretoFrontier, type Point2D } from '@/lib/pareto'; +import { getMetricParetoDirection } from '@/lib/metric-direction'; +import { + aucUnderFrontier, + aucWindow, + interpAlongFrontier, + paretoFrontier, + type ParetoDirection, + type Point2D, +} from '@/lib/pareto'; import { cn, getDisplayLabel } from '@/lib/utils'; /** @@ -63,9 +71,17 @@ function pickDefaultBaseline( return null; } -/** Format a non-negative integer with thousands separators. */ -function formatInt(n: number): string { - return Math.round(n).toLocaleString(); +/** Format a number with the right scale for the chosen metric. */ +function formatValue(n: number): string { + if (!Number.isFinite(n)) return '—'; + const abs = Math.abs(n); + if (abs === 0) return '0'; + if (abs >= 1000) return Math.round(n).toLocaleString(); + if (abs >= 100) return n.toFixed(0); + if (abs >= 10) return n.toFixed(1); + if (abs >= 1) return n.toFixed(2); + if (abs >= 0.01) return n.toFixed(3); + return n.toExponential(2); } function srgbToLinear(c: number): number { @@ -83,15 +99,25 @@ const RATIO_CAP_LO = 1 / 3; /** * Map a ratio (other / baseline) to a red→white→green color, centered at 1.0× - * and log-symmetric. ratio = 1 → white; ratio ≥ 3 → fully green; ratio ≤ - * 1/3 → fully red. Anything between interpolates linearly in log space so that - * "2×" and "0.5×" land at symmetric saturations. Returns { background, color } - * with the WCAG-derived text color. + * and log-symmetric. + * + * For 'higher' (default): ratio = 1 → white; ratio ≥ 3 → fully green; ratio ≤ + * 1/3 → fully red. + * + * For 'lower': INVERT — ratio = 1 → white; ratio ≤ 1/3 → fully green (other + * uses 1/3 of baseline = great); ratio ≥ 3 → fully red. + * + * Returns { background, color } with the WCAG-derived text color. */ -function ratioColor(ratio: number): { background: string; color: string } { +function ratioColor( + ratio: number, + direction: ParetoDirection = 'higher', +): { background: string; color: string } { const clamped = Math.max(RATIO_CAP_LO, Math.min(RATIO_CAP_HI, ratio)); // log-symmetric t in [-1, 1]: t=0 at 1.0, t=+1 at cap-hi, t=-1 at cap-lo. - const t = Math.log(clamped) / Math.log(RATIO_CAP_HI); + let t = Math.log(clamped) / Math.log(RATIO_CAP_HI); + // For lower-is-better, flip the sign so ratio > 1 → red and ratio < 1 → green. + if (direction === 'lower') t = -t; let r: number; let g: number; let b: number; @@ -114,21 +140,33 @@ function ratioColor(ratio: number): { background: string; color: string } { return { background: `rgb(${r}, ${g}, ${b})`, color }; } -const INFINITY_BG_POS = '#14532d'; // dark green (green-900) for ∞ (other defined, baseline missing) -const ZERO_BG = '#7f1d1d'; // dark red (red-900) for 0× (other missing, baseline defined) +const INFINITY_GREEN_BG = '#14532d'; // dark green (green-900) +const INFINITY_RED_BG = '#7f1d1d'; // dark red (red-900) const SELF_BG = '#fbbf24'; // amber-400 for baseline-vs-self -const COL_MAX_BG = '#bbf7d0'; // green-200 for best per column in throughput +const COL_BEST_BG = '#bbf7d0'; // green-200 for best per column in main table /** * Build per-config Pareto frontiers from filtered InferenceData. Filters by * selected precisions + active legend toggles, then groups by hwKey and runs - * the shared 2-D Pareto algorithm on (x, y) = (interactivity, tok/s/gpu). + * the shared 2-D Pareto algorithm on (x, y) = (interactivity, selected metric). + * Direction is taken from the active y-metric's roofline direction. */ -function useConfigSeries(): ConfigSeries[] { - const { graphs, activeHwTypes, selectedPrecisions, hardwareConfig } = useInference(); +function useConfigSeries(direction: ParetoDirection): { + configs: ConfigSeries[]; + yLabel: string; + yTitle: string; +} { + const { graphs, activeHwTypes, selectedPrecisions, hardwareConfig, selectedYAxisMetric } = + useInference(); return useMemo(() => { const interactivityGraph = graphs.find((g) => g.chartDefinition.chartType === 'interactivity'); - if (!interactivityGraph) return []; + if (!interactivityGraph) return { configs: [], yLabel: '', yTitle: '' }; + + const chartDef = interactivityGraph.chartDefinition; + const yLabel = + (chartDef[`${selectedYAxisMetric}_label` as keyof typeof chartDef] as string) || ''; + const yTitle = + (chartDef[`${selectedYAxisMetric}_title` as keyof typeof chartDef] as string) || ''; // Group filtered points by hwKey. const byHw = new Map(); @@ -145,7 +183,10 @@ function useConfigSeries(): ConfigSeries[] { const result: ConfigSeries[] = []; for (const [hwKey, points] of byHw) { if (points.length < 2) continue; - const frontier = paretoFrontier(points.map((p) => ({ x: p.x, y: p.y }))); + const frontier = paretoFrontier( + points.map((p) => ({ x: p.x, y: p.y })), + direction, + ); if (frontier.length < 2) continue; const hwConfig = hardwareConfig[hwKey] ?? getHardwareConfig(hwKey); result.push({ hwKey, label: getDisplayLabel(hwConfig), frontier }); @@ -158,8 +199,8 @@ function useConfigSeries(): ConfigSeries[] { const bi = order.indexOf(b.hwKey); return (ai === -1 ? Infinity : ai) - (bi === -1 ? Infinity : bi); }); - return result; - }, [graphs, activeHwTypes, selectedPrecisions, hardwareConfig]); + return { configs: result, yLabel, yTitle }; + }, [graphs, activeHwTypes, selectedPrecisions, hardwareConfig, selectedYAxisMetric, direction]); } interface BaselineSelectProps { @@ -209,11 +250,20 @@ function InfoIcon({ text }: { text: string }) { ); } -/** Per-interactivity throughput table + linked percent-diff heatmap. */ -function ThroughputAndDiffTable({ configs }: { configs: ConfigSeries[] }) { +/** Per-interactivity value table + linked ratio heatmap. */ +function ValueAndDiffTable({ + configs, + direction, + yLabel, + yTitle, +}: { + configs: ConfigSeries[]; + direction: ParetoDirection; + yLabel: string; + yTitle: string; +}) { + const higherBetter = direction === 'higher'; // Compute buckets: every 10 from 10 up through floor(globalMax / 10) * 10. - // (Using floor ensures the last bucket is always one a config actually reaches, - // not a bucket beyond every config's reachable interactivity.) const buckets = useMemo(() => { let globalMax = 0; for (const c of configs) { @@ -226,23 +276,28 @@ function ThroughputAndDiffTable({ configs }: { configs: ConfigSeries[] }) { return out; }, [configs]); - // Per-(config, bucket) throughput cell, with the column-max highlight. - const tputCells = useMemo(() => { + // Per-(config, bucket) value cell, with the column-best highlight. + const valueCells = useMemo(() => { const grid: (number | null)[][] = configs.map((c) => - buckets.map((b) => interpAlongFrontier(c.frontier, b)), + buckets.map((b) => interpAlongFrontier(c.frontier, b, direction)), ); - const colMaxRow: (number | null)[] = buckets.map((_, ci) => { - let m: number | null = null; + const colBestRow: (number | null)[] = buckets.map((_, ci) => { + let best: number | null = null; for (const row of grid) { const v = row[ci]; - if (v !== null && (m === null || v > m)) m = v; + if (v === null) continue; + if (best === null) { + best = v; + continue; + } + if (higherBetter ? v > best : v < best) best = v; } - return m; + return best; }); - return { grid, colMaxRow }; - }, [configs, buckets]); + return { grid, colBestRow }; + }, [configs, buckets, direction, higherBetter]); - // Baseline selection for the percent-diff sub-table. + // Baseline selection for the ratio sub-table. const enabledKeys = configs.map((c) => c.hwKey); const defaultBaseline = pickDefaultBaseline(enabledKeys, DEFAULT_THROUGHPUT_BASELINE_HINTS) ?? enabledKeys[0] ?? ''; @@ -252,26 +307,30 @@ function ThroughputAndDiffTable({ configs }: { configs: ConfigSeries[] }) { const baselineRow = useMemo(() => { const idx = configs.findIndex((c) => c.hwKey === effectiveBaseline); if (idx === -1) return null; - return tputCells.grid[idx]; - }, [configs, tputCells, effectiveBaseline]); + return valueCells.grid[idx]; + }, [configs, valueCells, effectiveBaseline]); + + const directionHint = higherBetter ? 'Higher is better.' : 'Lower is better.'; + const valueTooltip = + `For each enabled config we compute the Pareto frontier of ${yTitle || 'the selected metric'} vs interactivity, ` + + `then read off the value at every 10 tok/s/user step. Em-dash means that interactivity is outside the config's reachable range. ` + + `Best value per column is highlighted in green. ${directionHint}`; + + const ratioTooltip = higherBetter + ? 'other / baseline at each bucket, rendered as Nx. "∞" means the baseline cannot reach that interactivity but the other config can (green = good for other); "0×" the reverse (red); "—" means neither can. Color scale is centered at 1.00× and log-symmetric, saturating at 3.00× (green) and 0.33× (red).' + : 'other / baseline at each bucket, rendered as Nx. Since lower is better, color is INVERTED: ratios < 1 are green (other uses less than baseline = good) and ratios > 1 are red. "∞" means the baseline cannot reach that interactivity but the other config can — colored red (other is way worse / infinite cost relative to baseline); "0×" the reverse — colored green (other achieves zero relative to baseline = great); "—" means neither can. Saturation caps at 3.00× and 0.33×.'; return (
-

Per-GPU throughput at each interactivity bucket

- +

Per-GPU value at each interactivity bucket

+

- Linearly interpolated tok/s/gpu along each config's Pareto frontier. Reactive to model, - precision, sequence and the legend on/off toggles above. + Linearly interpolated {yLabel || 'metric value'} along each config's Pareto frontier. + Reactive to model, precision, sequence and the legend on/off toggles above. {directionHint}

{configs.length === 0 ? ( @@ -312,7 +371,7 @@ function ThroughputAndDiffTable({ configs }: { configs: ConfigSeries[] }) { {c.label} {buckets.map((b, ci) => { - const v = tputCells.grid[ri][ci]; + const v = valueCells.grid[ri][ci]; if (v === null) { return ( ); } - const isMax = tputCells.colMaxRow[ci] === v; + const isBest = valueCells.colBestRow[ci] === v; return ( - {formatInt(v)} + {formatValue(v)} ); })} @@ -348,11 +407,7 @@ function ThroughputAndDiffTable({ configs }: { configs: ConfigSeries[] }) {

Ratio vs baseline

- +
{buckets.map((b, ci) => { - const other = tputCells.grid[ri][ci]; + const other = valueCells.grid[ri][ci]; const baseline = baselineRow ? baselineRow[ci] : null; const isSelf = c.hwKey === effectiveBaseline; @@ -415,30 +470,38 @@ function ThroughputAndDiffTable({ configs }: { configs: ConfigSeries[] }) { ); } + // Baseline can't reach, other can: + // - higher-better: "infinite multiplier of throughput" → great for other → green + // - lower-better: "infinite multiplier of cost" → bad for other → red if (other !== null && baseline === null) { + const bg = higherBetter ? INFINITY_GREEN_BG : INFINITY_RED_BG; return ( ∞ ); } + // Other can't reach, baseline can: + // - higher-better: other is 0× → bad for other → red + // - lower-better: other is 0× cost → great for other → green if (other === null && baseline !== null) { + const bg = higherBetter ? INFINITY_RED_BG : INFINITY_GREEN_BG; return ( 0× ); } const ratio = other! / baseline!; - const { background, color } = ratioColor(ratio); + const { background, color } = ratioColor(ratio, direction); return ( { let globalMax = 0; for (const c of configs) { @@ -472,8 +544,15 @@ function AucSummaryTable({ configs }: { configs: ConfigSeries[] }) { }, [configs]); const aucs = useMemo( - () => configs.map((c) => aucUnderFrontier(c.frontier, 10, hi)), - [configs, hi], + () => configs.map((c) => aucUnderFrontier(c.frontier, 10, hi, direction)), + [configs, hi, direction], + ); + + // Per-config integration window — for lower-is-better this may shrink to + // the reachable x-range; for higher-is-better it's always [10, hi]. + const aucWindows = useMemo( + () => configs.map((c) => aucWindow(c.frontier, 10, hi, direction)), + [configs, hi, direction], ); const enabledKeys = configs.map((c) => c.hwKey); @@ -511,27 +590,33 @@ function AucSummaryTable({ configs }: { configs: ConfigSeries[] }) { style: { backgroundColor: SELF_BG, color: '#0a0a0a' }, }; } - const { background, color } = ratioColor(ratio); + const { background, color } = ratioColor(ratio, direction); return { text: `${ratio.toFixed(2)}×`, style: { backgroundColor: background, color }, }; }; + const directionHint = higherBetter + ? 'Higher is better — a config that reaches both high interactivity AND high throughput-like value scores best.' + : 'Lower is better — a config that achieves low cost / energy across the reachable interactivity range scores best.'; + + const outOfRangeHint = higherBetter + ? "Outside a config's reachable interactivity range the integrand is treated as 0 (worst case for higher-is-better)." + : "Integration is restricted to each config's reachable interactivity range. The per-row window is shown below the AUC."; + + const aucTooltip = + `Trapezoidal area under each config's ${yLabel || 'selected metric'} vs interactivity Pareto frontier, integrated from 10 to ${hi} tok/s/user. ` + + `${outOfRangeHint} ${directionHint}`; + return (

Area under Pareto frontier (AUC summary)

- +

- Integration window: 10 → {hi} tok/s/user. + Integration window: 10 → {hi} tok/s/user. {directionHint}

{configs.length === 0 ? ( @@ -578,6 +663,9 @@ function AucSummaryTable({ configs }: { configs: ConfigSeries[] }) { Config AUC + {!higherBetter && ( + Window + )} Ratio vs primary @@ -592,6 +680,7 @@ function AucSummaryTable({ configs }: { configs: ConfigSeries[] }) { {configs.map((c, i) => { const auc = aucs[i]; + const win = aucWindows[i]; const primaryR = ratioCell(auc, primaryAuc, ePrimary, c.hwKey); const secondaryR = ratioCell(auc, secondaryAuc, eSecondary, c.hwKey); const tertiaryR = ratioCell(auc, tertiaryAuc, eTertiary, c.hwKey); @@ -600,7 +689,12 @@ function AucSummaryTable({ configs }: { configs: ConfigSeries[] }) { {c.label} - {formatInt(auc)} + {formatValue(auc)} + {!higherBetter && ( + + {win ? `${win.lo}→${win.hi}` : '—'} + + )} {primaryR.text} @@ -624,19 +718,26 @@ function AucSummaryTable({ configs }: { configs: ConfigSeries[] }) { /** * Section that renders the two summary tables below the Pareto chart on the - * inference page. Only shown when the active y-axis metric is "Token - * Throughput per GPU" — the AUC + interactivity framing assumes that metric. + * inference page. Renders for all y-axis metrics; the "is higher better" + * direction is taken from the active metric's roofline direction on the + * interactivity chart definition. */ export default function InteractivityTables() { - const { selectedYAxisMetric } = useInference(); - const configs = useConfigSeries(); + const { selectedYAxisMetric, graphs } = useInference(); + + const interactivityGraph = graphs.find((g) => g.chartDefinition.chartType === 'interactivity'); + const direction: ParetoDirection = interactivityGraph + ? getMetricParetoDirection(interactivityGraph.chartDefinition, selectedYAxisMetric) + : 'higher'; + + const { configs, yLabel, yTitle } = useConfigSeries(direction); - if (selectedYAxisMetric !== 'y_tpPerGpu') return null; + if (!interactivityGraph) return null; return ( <> - - + + ); } diff --git a/packages/app/src/lib/metric-direction.ts b/packages/app/src/lib/metric-direction.ts new file mode 100644 index 00000000..6a4d0511 --- /dev/null +++ b/packages/app/src/lib/metric-direction.ts @@ -0,0 +1,60 @@ +/** + * Single source of truth for whether a chart Y-axis metric is "higher is + * better" or "lower is better". + * + * The chart config (inference-chart-config.json) already declares this per + * metric via the roofline direction field (`y__roofline`): + * - 'upper_right' / 'upper_left' → higher-is-better + * - 'lower_right' / 'lower_left' → lower-is-better + * + * This module exposes a helper for non-chart consumers (tables, AUC, etc) + * that need the same direction info without re-reading the JSON. + */ + +import type { ChartDefinition } from '@/components/inference/types'; + +import type { ParetoDirection } from './pareto'; + +export type RooflineDirection = 'upper_right' | 'upper_left' | 'lower_left' | 'lower_right'; + +export function rooflineDirectionToPareto(dir: RooflineDirection | undefined): ParetoDirection { + if (dir === 'lower_left' || dir === 'lower_right') return 'lower'; + return 'higher'; +} + +export function isHigherBetter(dir: RooflineDirection | undefined): boolean { + return rooflineDirectionToPareto(dir) === 'higher'; +} + +/** + * Look up the roofline direction for a given Y-axis metric on a given chart + * definition. Returns the configured direction or undefined when the chart + * has no mapping for that metric. + */ +export function getMetricRooflineDirection( + chartDef: ChartDefinition, + yAxisMetric: string, +): RooflineDirection | undefined { + const key = `${yAxisMetric}_roofline` as keyof ChartDefinition; + const val = chartDef[key]; + if ( + val === 'upper_right' || + val === 'upper_left' || + val === 'lower_left' || + val === 'lower_right' + ) { + return val; + } + return undefined; +} + +/** + * Convenience: pareto direction for a metric on a chart definition. + * Defaults to 'higher' when unknown. + */ +export function getMetricParetoDirection( + chartDef: ChartDefinition, + yAxisMetric: string, +): ParetoDirection { + return rooflineDirectionToPareto(getMetricRooflineDirection(chartDef, yAxisMetric)); +} diff --git a/packages/app/src/lib/pareto.test.ts b/packages/app/src/lib/pareto.test.ts index b63025b8..d6f67069 100644 --- a/packages/app/src/lib/pareto.test.ts +++ b/packages/app/src/lib/pareto.test.ts @@ -1,6 +1,12 @@ import { describe, expect, it } from 'vitest'; -import { aucUnderFrontier, interpAlongFrontier, paretoFrontier, type Point2D } from '@/lib/pareto'; +import { + aucUnderFrontier, + aucWindow, + interpAlongFrontier, + paretoFrontier, + type Point2D, +} from '@/lib/pareto'; import eightConfigData from './__fixtures__/eight_config_data.json'; @@ -57,7 +63,7 @@ describe('paretoFrontier', () => { expect(paretoFrontier([])).toEqual([]); }); - it('keeps only non-dominated points and sorts ascending x', () => { + it('keeps only non-dominated points and sorts ascending x (higher-is-better)', () => { const pts: Point2D[] = [ { x: 10, y: 100 }, { x: 20, y: 90 }, // dominated by (10,100)? no — x is higher @@ -73,6 +79,30 @@ describe('paretoFrontier', () => { expect(f.map((p) => p.x)).toEqual([5, 10, 20, 30]); expect(f.map((p) => p.y)).toEqual([110, 100, 90, 60]); }); + + // For lower-is-better, a point dominates iff x > other.x AND y < other.y. + // Frontier consists of points with no dominator. + it('keeps only non-dominated points (lower-is-better)', () => { + // Cost-like metric where less is better. Higher x is still better. + const pts: Point2D[] = [ + { x: 5, y: 1 }, + { x: 10, y: 0.5 }, // dominates (5, 1.0)? x=10>5 AND y=0.5<1.0 → YES, dominates + { x: 15, y: 0.8 }, // not dominated by (10, 0.5) since y=0.8 > 0.5; dominated by (20, 0.3)? yes + { x: 20, y: 0.3 }, + { x: 25, y: 0.6 }, // dominated by (20, 0.3)? x=20<25 → no; dominator would need x>25 AND y<0.6 + { x: 30, y: 0.4 }, // dominates (25, 0.6)? x=30>25 AND y=0.4<0.6 → yes + ]; + const f = paretoFrontier(pts, 'lower'); + // Walking: keep points where no other has x>p.x AND y10, y=0.3<0.5 → yes → drop + // (15,0.8): dominated by (20,0.3)? yes → drop + // (20,0.3): dominated? need x>20 AND y<0.3 — (30,0.4) no, (25,0.6) no → keep + // (25,0.6): dominated by (30,0.4)? yes → drop + // (30,0.4): dominated? need x>30 — none → keep + expect(f.map((p) => p.x)).toEqual([20, 30]); + expect(f.map((p) => p.y)).toEqual([0.3, 0.4]); + }); }); describe('interpAlongFrontier', () => { @@ -99,6 +129,18 @@ describe('interpAlongFrontier', () => { // 1/3 of the way (20→50, 0→1/3) at x=30 → y = 200 + (30-20)/(50-20) * (350-200) = 200 + 50 = 250 expect(interpAlongFrontier(f, 30)).toBeCloseTo(250, 9); }); + + it('linearly interpolates the same way for lower-is-better frontiers', () => { + // Direction only affects which y wins at duplicate-x ties; here all x's + // are unique so the result is identical. + const lf: Point2D[] = [ + { x: 10, y: 1 }, + { x: 20, y: 0.5 }, + { x: 50, y: 0.2 }, + ]; + expect(interpAlongFrontier(lf, 15, 'lower')).toBeCloseTo(0.75, 9); + expect(interpAlongFrontier(lf, 50, 'lower')).toBe(0.2); + }); }); describe('aucUnderFrontier', () => { @@ -111,7 +153,7 @@ describe('aucUnderFrontier', () => { expect(aucUnderFrontier(f, 0, 10)).toBeCloseTo(50, 9); }); - it('zeros the integrand outside the frontier x-range', () => { + it('zeros the integrand outside the frontier x-range (higher-better)', () => { // frontier only covers x in [10, 20], integrate [0, 30] const f = [ { x: 10, y: 5 }, @@ -129,36 +171,181 @@ describe('aucUnderFrontier', () => { expect(aucUnderFrontier(f, 30, 40)).toBe(0); }); - // Sanity-check the full pipeline (pareto → AUC) on the spec's 8-config - // sample dataset (FP4 DeepSeek V4 Pro, 8K/1K, TP=8) using the production - // integration window: [10, floor(globalMax / 10) * 10]. - // - // We re-derive the expected AUC for each config from first principles — - // independent trapezoidal integration over the same Pareto frontier — and - // assert that aucUnderFrontier matches. Hard-coding numeric expectations - // would bake in whichever upper bound the test was written against; this - // way the test continues to be a meaningful sanity check if the window - // rule changes again. - describe('matches independent trapezoidal AUCs on spec sample data', () => { - const allXs = (Object.values(eightConfigData) as RawPoint[][]).flatMap((rows) => - rows.map((r) => r.Interactivity_tok_s_user), - ); - const globalMax = Math.max(...allXs); - const upperBound = Math.floor(globalMax / 10) * 10; - const window: [number, number] = [10, upperBound]; - - const names = Object.keys(eightConfigData as Record); - for (const name of names) { - it(`${name} matches independent reference`, () => { - const raw = (eightConfigData as Record)[name]; - expect(raw, `fixture missing ${name}`).toBeTruthy(); - const f = paretoFrontier(toPoints(raw)); - const auc = aucUnderFrontier(f, window[0], window[1]); - const expected = referenceAuc(f, window[0], window[1]); - // Both methods are trapezoidal on the same piecewise-linear function; - // they should agree to within tiny floating-point drift. - expect(Math.abs(auc - expected) / Math.max(expected, 1)).toBeLessThan(0.001); - }); - } + // For lower-is-better: integrate only over the reachable x-range. The + // result is identical to higher-better when the requested [lo, hi] is a + // strict subset of [minX, maxX] (no zero-pad region in either case), and + // differs only when the requested window extends beyond the frontier. + it('lower-better integrates only over reachable range', () => { + // frontier covers x in [10, 20] with constant y=2 + const f = [ + { x: 10, y: 2 }, + { x: 20, y: 2 }, + ]; + // Integrate the whole range — should give 20 (y=2 × span=10). + expect(aucUnderFrontier(f, 10, 20, 'lower')).toBeCloseTo(20, 9); + // Higher-better with window beyond range: zero-pads → still 20. + expect(aucUnderFrontier(f, 0, 30, 'higher')).toBeCloseTo(20, 9); + // Lower-better with the same window: clips to reachable [10, 20] → 20 too. + expect(aucUnderFrontier(f, 0, 30, 'lower')).toBeCloseTo(20, 9); + }); + + it('lower-better AUC matches reachable-only window, not zero-padded', () => { + // Non-flat lower-better frontier: cost falls then rises. + const f = [ + { x: 10, y: 1 }, + { x: 20, y: 0.5 }, + { x: 30, y: 0.4 }, + ]; + // Requested [0, 50]: lower-better should clip to [10, 30]. + // Trapezoid (10→20): (1.0+0.5)/2 * 10 = 7.5 + // Trapezoid (20→30): (0.5+0.4)/2 * 10 = 4.5 + // Total: 12 + expect(aucUnderFrontier(f, 0, 50, 'lower')).toBeCloseTo(12, 9); + + // Higher-better with same window would zero-pad [0,10] and [30,50], + // adding 0 contribution there, so total is also 12 — but the SEMANTICS + // differ. Verify by changing a range where higher-better differs: + // Pretend the frontier extends y outwards by adding 0-pad ranges: + // For higher-better, [0,50] integrates the same 12 (zero outside). + expect(aucUnderFrontier(f, 0, 50, 'higher')).toBeCloseTo(12, 9); + }); +}); + +describe('aucWindow', () => { + const f: Point2D[] = [ + { x: 10, y: 5 }, + { x: 30, y: 8 }, + ]; + + it('returns the requested window for higher-better', () => { + expect(aucWindow(f, 0, 50, 'higher')).toEqual({ lo: 0, hi: 50 }); + }); + + it('clips to reachable range for lower-better', () => { + expect(aucWindow(f, 0, 50, 'lower')).toEqual({ lo: 10, hi: 30 }); + expect(aucWindow(f, 15, 25, 'lower')).toEqual({ lo: 15, hi: 25 }); + }); + + it('returns null when reachable window is empty', () => { + expect(aucWindow(f, 40, 50, 'lower')).toBeNull(); + }); +}); + +// Sanity-check the full pipeline (pareto → AUC) on the spec's 8-config +// sample dataset (FP4 DeepSeek V4 Pro, 8K/1K, TP=8) using the production +// integration window: [10, floor(globalMax / 10) * 10]. +// +// We re-derive the expected AUC for each config from first principles — +// independent trapezoidal integration over the same Pareto frontier — and +// assert that aucUnderFrontier matches. Hard-coding numeric expectations +// would bake in whichever upper bound the test was written against; this +// way the test continues to be a meaningful sanity check if the window +// rule changes again. +describe('matches independent trapezoidal AUCs on spec sample data', () => { + const allXs = (Object.values(eightConfigData) as RawPoint[][]).flatMap((rows) => + rows.map((r) => r.Interactivity_tok_s_user), + ); + const globalMax = Math.max(...allXs); + const upperBound = Math.floor(globalMax / 10) * 10; + const window: [number, number] = [10, upperBound]; + + const names = Object.keys(eightConfigData as Record); + for (const name of names) { + it(`${name} matches independent reference (higher-better)`, () => { + const raw = (eightConfigData as Record)[name]; + expect(raw, `fixture missing ${name}`).toBeTruthy(); + const f = paretoFrontier(toPoints(raw)); + const auc = aucUnderFrontier(f, window[0], window[1]); + const expected = referenceAuc(f, window[0], window[1]); + // Both methods are trapezoidal on the same piecewise-linear function; + // they should agree to within tiny floating-point drift. + expect(Math.abs(auc - expected) / Math.max(expected, 1)).toBeLessThan(0.001); + }); + } +}); + +// Synthetic lower-is-better fixture — cost-per-token style metric across +// three configs. Verifies the direction-aware path end-to-end: +// pareto → interp → AUC and the window clipping. +describe('lower-is-better integration (synthetic cost fixture)', () => { + // Treat y as $/M tokens (lower = better). x is interactivity. + const configs: Record = { + // "Cheap-fast": low cost, broad interactivity range — should dominate. + cheap: [ + { x: 10, y: 0.5 }, + { x: 25, y: 0.4 }, + { x: 50, y: 0.6 }, + { x: 80, y: 1.2 }, + ], + // "Expensive-slow": consistently higher cost, narrower range. + expensive: [ + { x: 15, y: 1.5 }, + { x: 30, y: 1.2 }, + { x: 45, y: 1 }, + { x: 60, y: 1.3 }, + ], + // "Niche": only reaches very high interactivity. Cost dips then rises so + // the lower-better frontier keeps multiple points. + niche: [ + { x: 60, y: 0.9 }, + { x: 80, y: 0.5 }, + { x: 100, y: 0.7 }, + ], + }; + + it('pareto frontiers prune dominated points correctly', () => { + const cheap = paretoFrontier(configs.cheap, 'lower'); + // For 'cheap': dominator needs x>p.x AND y10, y<0.5. (25,0.4) qualifies → drop (10,0.5)? Yes. + // (25,0.4): need x>25 AND y<0.4. (50,0.6) no, (80,1.2) no → keep + // (50,0.6): need x>50 AND y<0.6. (80,1.2) no → keep + // (80,1.2): need x>80 — none → keep + expect(cheap.map((p) => p.x)).toEqual([25, 50, 80]); + + const expensive = paretoFrontier(configs.expensive, 'lower'); + // (15,1.5): (30,1.2) dominates → drop + // (30,1.2): (45,1.0) dominates → drop + // (45,1.0): need x>45, y<1.0 — (60,1.3) no → keep + // (60,1.3): keep + expect(expensive.map((p) => p.x)).toEqual([45, 60]); + }); + + it('AUC is restricted to reachable window for each config', () => { + const cheap = paretoFrontier(configs.cheap, 'lower'); + const niche = paretoFrontier(configs.niche, 'lower'); + + // For cheap, reachable x: [25, 80]. Common window [10, 100] clips. + const cheapWin = aucWindow(cheap, 10, 100, 'lower'); + expect(cheapWin).toEqual({ lo: 25, hi: 80 }); + + // For niche, the lower-better frontier prunes the (60, 0.9) point + // (dominated by (80, 0.5)). Reachable x range becomes [80, 100]. + const nicheWin = aucWindow(niche, 10, 100, 'lower'); + expect(nicheWin).toEqual({ lo: 80, hi: 100 }); + + // AUCs: + // cheap: (25,0.4)→(50,0.6)→(80,1.2). Trapezoids: + // 25→50: (0.4+0.6)/2*25 = 12.5 + // 50→80: (0.6+1.2)/2*30 = 27 + // total = 39.5 + expect(aucUnderFrontier(cheap, 10, 100, 'lower')).toBeCloseTo(39.5, 6); + + // niche frontier: (80,0.5)→(100,0.7). Trapezoid (80→100): + // (0.5+0.7)/2 * 20 = 12 + expect(aucUnderFrontier(niche, 10, 100, 'lower')).toBeCloseTo(12, 6); + }); + + it('interpolation respects lower-better best at duplicate x', () => { + // Construct a frontier with duplicate x to verify min vs max selection. + const f: Point2D[] = [ + { x: 10, y: 1 }, + { x: 20, y: 0.5 }, + { x: 20, y: 0.7 }, // wouldn't naturally appear post-frontier, but the + // helper should still return the better (min) y for lower-better. + ]; + // For lower direction at duplicate x, prefer min y. + expect(interpAlongFrontier(f, 20, 'lower')).toBe(0.5); + // For higher direction, prefer max y. + expect(interpAlongFrontier(f, 20, 'higher')).toBe(0.7); }); }); diff --git a/packages/app/src/lib/pareto.ts b/packages/app/src/lib/pareto.ts index 717a7b6a..0e1e8d1e 100644 --- a/packages/app/src/lib/pareto.ts +++ b/packages/app/src/lib/pareto.ts @@ -1,12 +1,20 @@ /** - * Shared 2-D Pareto-frontier utilities for "higher x AND higher y both better" - * curves, plus linear interpolation along the frontier and trapezoidal AUC. + * Shared 2-D Pareto-frontier utilities for both "higher y is better" and + * "lower y is better" curves over an x-axis where higher x is always better + * (e.g. interactivity tok/s/user — more is more responsive). * * The chart layer has its own metric-aware helpers (calculateRoofline et al) * that operate on full InferenceData points and `upper_left | upper_right | …` * directions. This module is the plain numeric core — it works on * `{ x, y }`-shaped points and is what tables / non-chart consumers should use. * + * Direction parameter: + * - 'higher' (default): a point dominates iff x and y are BOTH greater. The + * visible frontier on an interactivity vs throughput chart looks like + * "upper-left" because as concurrency rises x falls while y rises. + * - 'lower': a point dominates iff x is greater AND y is LOWER. Used for + * cost / J / power metrics where less is more. + * * Sorting note: the frontier is always returned in ascending-x order so * downstream interp/AUC can treat the xs as a sorted grid. */ @@ -16,26 +24,40 @@ export interface Point2D { y: number; } +export type ParetoDirection = 'higher' | 'lower'; + /** - * Pareto frontier for "higher x AND higher y is better" (upper-right). Returns - * non-dominated points sorted by ascending x. + * Pareto frontier with direction control. Returns non-dominated points sorted + * by ascending x. * - * On the interactivity vs tok/s/gpu chart the visible frontier looks like - * "upper-left" because as concurrency rises x falls while y rises — but the - * non-domination relation is the same: a point is on the frontier when no - * other point has BOTH greater x AND greater y. So the same algorithm works. + * For 'higher': a point is kept when no other has BOTH greater x AND greater y. + * For 'lower': a point is kept when no other has greater x AND LESSER y. */ -export function paretoFrontier(points: readonly T[]): T[] { +export function paretoFrontier( + points: readonly T[], + direction: ParetoDirection = 'higher', +): T[] { if (points.length === 0) return []; // Sort by descending x. The point with max x is always kept; then walk down - // and keep any point whose y exceeds the running max y. + // and keep any point whose y "beats" the running best y (max for 'higher', + // min for 'lower'). const sorted = [...points].toSorted((a, b) => b.x - a.x); const front: T[] = []; - let maxY = -Infinity; - for (const p of sorted) { - if (p.y > maxY) { - front.push(p); - maxY = p.y; + if (direction === 'higher') { + let maxY = -Infinity; + for (const p of sorted) { + if (p.y > maxY) { + front.push(p); + maxY = p.y; + } + } + } else { + let minY = Infinity; + for (const p of sorted) { + if (p.y < minY) { + front.push(p); + minY = p.y; + } } } // Return ascending x for downstream consumers. @@ -45,8 +67,16 @@ export function paretoFrontier(points: readonly T[]): T[] { /** * Linear interpolation along a frontier that's already sorted by ascending x. * Returns null when x is outside [minX, maxX] of the frontier. + * + * Direction does not change the interpolation math — it only changes which + * vertex's y wins at duplicate-x ties (we pick whichever is "best" in the + * given direction). */ -export function interpAlongFrontier(frontier: readonly Point2D[], x: number): number | null { +export function interpAlongFrontier( + frontier: readonly Point2D[], + x: number, + direction: ParetoDirection = 'higher', +): number | null { const last = frontier.at(-1); if (frontier.length === 0 || !last) return null; const minX = frontier[0].x; @@ -63,23 +93,38 @@ export function interpAlongFrontier(frontier: readonly Point2D[], x: number): nu } const a = frontier[lo]; const b = frontier[hi]; - if (b.x === a.x) return Math.max(a.y, b.y); + if (b.x === a.x) return direction === 'higher' ? Math.max(a.y, b.y) : Math.min(a.y, b.y); const t = (x - a.x) / (b.x - a.x); return a.y + t * (b.y - a.y); } /** * Trapezoidal AUC under the linearly-interpolated frontier between [lo, hi]. - * Outside the frontier's x-range y is treated as 0, so a config that doesn't - * reach part of the integration range contributes 0 to that part. Matches the - * Python reference: np.interp on a fine grid with the out-of-range region - * zeroed, then np.trapezoid. + * + * Out-of-range semantics depend on direction: + * - 'higher': outside the frontier's x-range y is treated as 0 (worst case + * for higher-is-better — a config that doesn't reach that interactivity + * contributes 0). Matches the original behavior / spec. + * - 'lower': integrate ONLY over each config's reachable x-range. Treating + * out-of-range as 0 would inflate AUC because 0 is the BEST value for + * cost / J / power metrics — that's the opposite of what we want. Using + * "worst observed value" outside the range would penalize configs with + * short reachable spans more than necessary; restricting integration to + * the reachable window is the simplest interpretable choice and matches + * the natural reading "average value over what the config can actually + * do, scaled by the span it covers". Consumers should display the + * effective window so smaller-coverage configs can be spotted. * * Closed-form rather than 10 001-sample grid — same answer to machine * precision because the integrand is piecewise-linear, and avoids allocating * arrays on every render. */ -export function aucUnderFrontier(frontier: readonly Point2D[], lo: number, hi: number): number { +export function aucUnderFrontier( + frontier: readonly Point2D[], + lo: number, + hi: number, + direction: ParetoDirection = 'higher', +): number { const last = frontier.at(-1); if (frontier.length === 0 || !last || hi <= lo) return 0; const minX = frontier[0].x; @@ -88,8 +133,32 @@ export function aucUnderFrontier(frontier: readonly Point2D[], lo: number, hi: n const effHi = Math.min(hi, maxX); if (effHi <= effLo) return 0; - // Build the integration breakpoints: clip the frontier vertices to - // [effLo, effHi] and add the boundaries. + if (direction === 'higher') { + // Build the integration breakpoints: clip the frontier vertices to + // [effLo, effHi] and add the boundaries. Outside the frontier's x-range + // we want y=0; that's already handled because the integration range is + // clipped to [effLo, effHi] (a strict sub-range of the frontier span). + // The original [lo, hi] outside-frontier region contributes 0 because + // the integrand is 0 there. + const xs: number[] = [effLo]; + for (const p of frontier) { + if (p.x > effLo && p.x < effHi) xs.push(p.x); + } + xs.push(effHi); + + let area = 0; + for (let i = 0; i < xs.length - 1; i++) { + const x0 = xs[i]; + const x1 = xs[i + 1]; + const y0 = interpAlongFrontier(frontier, x0, direction) ?? 0; + const y1 = interpAlongFrontier(frontier, x1, direction) ?? 0; + area += ((y0 + y1) / 2) * (x1 - x0); + } + return area; + } + + // direction === 'lower': integrate only over the reachable x-range. No + // padding outside [minX, maxX]; the effective window IS [effLo, effHi]. const xs: number[] = [effLo]; for (const p of frontier) { if (p.x > effLo && p.x < effHi) xs.push(p.x); @@ -100,9 +169,32 @@ export function aucUnderFrontier(frontier: readonly Point2D[], lo: number, hi: n for (let i = 0; i < xs.length - 1; i++) { const x0 = xs[i]; const x1 = xs[i + 1]; - const y0 = interpAlongFrontier(frontier, x0) ?? 0; - const y1 = interpAlongFrontier(frontier, x1) ?? 0; + const y0 = interpAlongFrontier(frontier, x0, direction) ?? 0; + const y1 = interpAlongFrontier(frontier, x1, direction) ?? 0; area += ((y0 + y1) / 2) * (x1 - x0); } return area; } + +/** + * Effective AUC integration window for a single frontier given a requested + * [lo, hi]. For 'higher' the window is always [lo, hi] (zero-pad outside). + * For 'lower' the window is clipped to the frontier's reachable span so + * callers can label which range was actually integrated. + */ +export function aucWindow( + frontier: readonly Point2D[], + lo: number, + hi: number, + direction: ParetoDirection = 'higher', +): { lo: number; hi: number } | null { + const last = frontier.at(-1); + if (frontier.length === 0 || !last || hi <= lo) return null; + if (direction === 'higher') return { lo, hi }; + const minX = frontier[0].x; + const maxX = last.x; + const effLo = Math.max(lo, minX); + const effHi = Math.min(hi, maxX); + if (effHi <= effLo) return null; + return { lo: effLo, hi: effHi }; +} From 9ead18950a1074b0d075aeff9f3017c17b911058 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 17 May 2026 22:02:40 +0000 Subject: [PATCH 4/4] fix(interactivity): widen heatmap caps to 30x and use HSL ramp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ratio heatmap saturated at 3x, so anything from 5x to 33x collapsed to the same maximum green — common ratios like 7x and 20x looked identical. Bump the log-symmetric saturation caps to 30x / 1/30x and drive the color ramp through HSL (hue=142/0, lightness 0.97→0.28, saturation 0.6→0.78) so 2x / 5x / 10x / 20x land at perceptually distinct greens. Export ratioColor and add unit tests covering distinctness, monotonicity, clamping, log-symmetric reciprocals, lower-better inversion, and text contrast. Co-Authored-By: Claude Opus 4.7 --- .../inference/ui/InteractivityTables.test.ts | 70 +++++++++++++++ .../inference/ui/InteractivityTables.tsx | 88 ++++++++++++++----- 2 files changed, 134 insertions(+), 24 deletions(-) create mode 100644 packages/app/src/components/inference/ui/InteractivityTables.test.ts diff --git a/packages/app/src/components/inference/ui/InteractivityTables.test.ts b/packages/app/src/components/inference/ui/InteractivityTables.test.ts new file mode 100644 index 00000000..94b0d1d0 --- /dev/null +++ b/packages/app/src/components/inference/ui/InteractivityTables.test.ts @@ -0,0 +1,70 @@ +import { describe, expect, it } from 'vitest'; + +import { RATIO_CAP_HI, RATIO_CAP_LO, ratioColor } from './InteractivityTables'; + +describe('ratioColor', () => { + it('renders 1.0× as near-neutral and produces dark text', () => { + const { background, color } = ratioColor(1); + expect(background).toMatch(/^rgb\(/u); + expect(color).toBe('#0a0a0a'); + }); + + it('produces visibly distinct colors for common positive ratios', () => { + // The whole point of bumping the cap from 3× to 30× and switching to HSL: + // common ratios from 2× up through 20× must land at clearly different + // greens rather than all saturating to the same deep color. + const ratios = [2, 5, 7, 10, 20]; + const backgrounds = ratios.map((r) => ratioColor(r).background); + expect(new Set(backgrounds).size).toBe(ratios.length); + }); + + it('produces a monotonically darker green for higher ratios (higher-better)', () => { + // Each step up in ratio should reduce HSL lightness (=> lower luminance) + // until the saturation cap. Use a coarse luminance proxy via the green + // channel of the rgb() string. + const greens = [1.5, 2, 5, 10, 20, 33].map((r) => { + const m = /rgb\((\d+),\s*(\d+),\s*(\d+)\)/u.exec(ratioColor(r).background); + if (!m) throw new Error('rgb parse failed'); + return Number(m[1]) + Number(m[2]) + Number(m[3]); // r+g+b as a luminance proxy + }); + for (let i = 1; i < greens.length; i++) { + expect(greens[i]).toBeLessThan(greens[i - 1]); + } + }); + + it('clamps beyond RATIO_CAP_HI / RATIO_CAP_LO', () => { + expect(ratioColor(RATIO_CAP_HI).background).toBe(ratioColor(RATIO_CAP_HI * 10).background); + expect(ratioColor(RATIO_CAP_LO).background).toBe(ratioColor(RATIO_CAP_LO / 10).background); + }); + + it('is log-symmetric: reciprocal ratios swap red/green at equal magnitude', () => { + // ratioColor(2) and ratioColor(0.5) should be mirror images (same lightness, + // opposite hues). Compare the dominant channel: 2× should be green-dominant + // (g > r), 0.5× should be red-dominant (r > g). + const up = /rgb\((\d+),\s*(\d+),\s*(\d+)\)/u.exec(ratioColor(2).background); + const down = /rgb\((\d+),\s*(\d+),\s*(\d+)\)/u.exec(ratioColor(0.5).background); + if (!up || !down) throw new Error('rgb parse failed'); + expect(Number(up[2])).toBeGreaterThan(Number(up[1])); + expect(Number(down[1])).toBeGreaterThan(Number(down[2])); + }); + + it("inverts hue for direction='lower'", () => { + // For lower-is-better, a ratio > 1 means "other is worse" → red. + const higher = ratioColor(5, 'higher'); + const lower = ratioColor(5, 'lower'); + const hi = /rgb\((\d+),\s*(\d+),\s*(\d+)\)/u.exec(higher.background); + const lo = /rgb\((\d+),\s*(\d+),\s*(\d+)\)/u.exec(lower.background); + if (!hi || !lo) throw new Error('rgb parse failed'); + // higher-better at 5× → green-dominant; lower-better at 5× → red-dominant. + expect(Number(hi[2])).toBeGreaterThan(Number(hi[1])); + expect(Number(lo[1])).toBeGreaterThan(Number(lo[2])); + }); + + it('switches text color to white once background luminance drops', () => { + // Deep ratios should produce white text (background too dark for black). + expect(ratioColor(30).color).toBe('#ffffff'); + expect(ratioColor(1 / 30).color).toBe('#ffffff'); + // Near 1×, text should stay dark. + expect(ratioColor(1.5).color).toBe('#0a0a0a'); + }); +}); diff --git a/packages/app/src/components/inference/ui/InteractivityTables.tsx b/packages/app/src/components/inference/ui/InteractivityTables.tsx index 541514d5..511265fd 100644 --- a/packages/app/src/components/inference/ui/InteractivityTables.tsx +++ b/packages/app/src/components/inference/ui/InteractivityTables.tsx @@ -94,22 +94,74 @@ function relativeLuminance(r: number, g: number, b: number): number { return 0.2126 * srgbToLinear(r) + 0.7152 * srgbToLinear(g) + 0.0722 * srgbToLinear(b); } -const RATIO_CAP_HI = 3; -const RATIO_CAP_LO = 1 / 3; +// Saturation endpoints for the ratio→color ramp. The dataset can show ratios +// up to ~30× between best and worst configs (e.g. GB300 vs MI355X SGL), so +// caps must be wide enough that common ratios (2×, 5×, 10×, 20×) sit at +// visibly different points on the ramp rather than all clamping to the same +// extreme. Stays log-symmetric: t=+1 at RATIO_CAP_HI, t=-1 at RATIO_CAP_LO. +export const RATIO_CAP_HI = 30; +export const RATIO_CAP_LO = 1 / 30; + +/** HSL → RGB. h in [0, 360), s/l in [0, 1]. Returns integer [0,255] channels. */ +function hslToRgb(h: number, s: number, l: number): { r: number; g: number; b: number } { + const c = (1 - Math.abs(2 * l - 1)) * s; + const hp = h / 60; + const x = c * (1 - Math.abs((hp % 2) - 1)); + let r1 = 0; + let g1 = 0; + let b1 = 0; + if (hp < 1) { + r1 = c; + g1 = x; + } else if (hp < 2) { + r1 = x; + g1 = c; + } else if (hp < 3) { + g1 = c; + b1 = x; + } else if (hp < 4) { + g1 = x; + b1 = c; + } else if (hp < 5) { + r1 = x; + b1 = c; + } else { + r1 = c; + b1 = x; + } + const m = l - c / 2; + return { + r: Math.round((r1 + m) * 255), + g: Math.round((g1 + m) * 255), + b: Math.round((b1 + m) * 255), + }; +} + +// HSL endpoints. Lightness ramps 0.97 (near-white at t=0) down to 0.28 (deep +// color at |t|=1); saturation eases up so the deep end stays vivid. RGB +// interpolation collapses perceptually between green-300 and green-700, so we +// drive the ramp in HSL instead — this is what gives 5× / 10× / 20× / 33× +// visibly different greens. +const HUE_GREEN = 142; // tailwind green-ish +const HUE_RED = 0; +const L_NEUTRAL = 0.97; +const L_DEEP = 0.28; +const S_NEUTRAL = 0.6; +const S_DEEP = 0.78; /** * Map a ratio (other / baseline) to a red→white→green color, centered at 1.0× * and log-symmetric. * - * For 'higher' (default): ratio = 1 → white; ratio ≥ 3 → fully green; ratio ≤ - * 1/3 → fully red. + * For 'higher' (default): ratio = 1 → near-white; ratio ≥ RATIO_CAP_HI → deep + * green; ratio ≤ RATIO_CAP_LO → deep red. * - * For 'lower': INVERT — ratio = 1 → white; ratio ≤ 1/3 → fully green (other - * uses 1/3 of baseline = great); ratio ≥ 3 → fully red. + * For 'lower': INVERT — ratio ≤ RATIO_CAP_LO → deep green; ratio ≥ + * RATIO_CAP_HI → deep red. * * Returns { background, color } with the WCAG-derived text color. */ -function ratioColor( +export function ratioColor( ratio: number, direction: ParetoDirection = 'higher', ): { background: string; color: string } { @@ -118,23 +170,11 @@ function ratioColor( let t = Math.log(clamped) / Math.log(RATIO_CAP_HI); // For lower-is-better, flip the sign so ratio > 1 → red and ratio < 1 → green. if (direction === 'lower') t = -t; - let r: number; - let g: number; - let b: number; - if (t >= 0) { - // white → green - // green target: #15803d (rgb 21, 128, 61) — Tailwind green-700 - r = Math.round(255 + (21 - 255) * t); - g = Math.round(255 + (128 - 255) * t); - b = Math.round(255 + (61 - 255) * t); - } else { - // white → red - // red target: #b91c1c (rgb 185, 28, 28) — Tailwind red-700 - const u = -t; - r = Math.round(255 + (185 - 255) * u); - g = Math.round(255 + (28 - 255) * u); - b = Math.round(255 + (28 - 255) * u); - } + const magnitude = Math.abs(t); + const hue = t >= 0 ? HUE_GREEN : HUE_RED; + const lightness = L_NEUTRAL + (L_DEEP - L_NEUTRAL) * magnitude; + const saturation = S_NEUTRAL + (S_DEEP - S_NEUTRAL) * magnitude; + const { r, g, b } = hslToRgb(hue, saturation, lightness); const lum = relativeLuminance(r, g, b); const color = lum > 0.45 ? '#0a0a0a' : '#ffffff'; return { background: `rgb(${r}, ${g}, ${b})`, color };