feat(inference): use ratio (Nx) for diff tables; floor upper bound

claude · claude · commit aad700aa19f4 · 2026-05-17T21:42:33.000Z
Two follow-up tweaks to the per-interactivity throughput and AUC summary tables introduced in 6db1e32: 1. Render multiplicative ratios (Nx) instead of percent-differences. - Throughput "% advantage vs baseline" sub-table → "Ratio vs baseline", cells now read "2.50×", "0.60×", etc; self-vs-self is "1.00×"; "∞" kept (other reachable, baseline not); "−∞" replaced with "0×" using the same dark-red treatment for the symmetric case. - AUC table: drop the redundant "% vs primary" column entirely (the other three columns are already ratios), so columns are AUC + Ratio vs primary + Ratio vs secondary + Ratio vs tertiary, all in Nx. - New ratioColor() centered at 1.00× and log-symmetric: 3.00× → fully green, 0.33× → fully red, interpolating linearly in log space (so "2×" and "0.5×" land at matched saturations). WCAG-luminance text color preserved. 2. Column upper bound is now floor(globalMax/10)*10 instead of ceil, for both the throughput buckets and the AUC integration window. The last bucket is therefore always one at least one config actually reaches. pareto.test.ts: spec sanity check now compares aucUnderFrontier against an independent fine-grid trapezoidal reference computed inline, instead of hard-coding expected AUC magnitudes that bake in a specific upper bound — the new floor(...) rule, or any future window change, no longer requires touching the test. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
diff --git a/packages/app/src/components/inference/ui/InteractivityTables.tsx b/packages/app/src/components/inference/ui/InteractivityTables.tsx
@@ -78,14 +78,20 @@ function relativeLuminance(r: number, g: number, b: number): number {
   return 0.2126 * srgbToLinear(r) + 0.7152 * srgbToLinear(g) + 0.0722 * srgbToLinear(b);
 }
 
+const RATIO_CAP_HI = 3;
+const RATIO_CAP_LO = 1 / 3;
+
 /**
- * Map a percent-diff in [-200, +200] to a red→white→green color.
- * Beyond ±200 we clamp. Returns { background, color } where `color` is the
- * WCAG-derived text color (white when background is dark, black when light).
+ * Map a ratio (other / baseline) to a red→white→green color, centered at 1.0×
+ * and log-symmetric. ratio = 1   → white; ratio ≥ 3   → fully green; ratio ≤
+ * 1/3 → fully red. Anything between interpolates linearly in log space so that
+ * "2×" and "0.5×" land at symmetric saturations. Returns { background, color }
+ * with the WCAG-derived text color.
  */
-function percentDiffColor(pct: number): { background: string; color: string } {
-  // Clamp to ±200.
-  const t = Math.max(-1, Math.min(1, pct / 200));
+function ratioColor(ratio: number): { background: string; color: string } {
+  const clamped = Math.max(RATIO_CAP_LO, Math.min(RATIO_CAP_HI, ratio));
+  // log-symmetric t in [-1, 1]: t=0 at 1.0, t=+1 at cap-hi, t=-1 at cap-lo.
+  const t = Math.log(clamped) / Math.log(RATIO_CAP_HI);
   let r: number;
   let g: number;
   let b: number;
@@ -108,8 +114,8 @@ function percentDiffColor(pct: number): { background: string; color: string } {
   return { background: `rgb(${r}, ${g}, ${b})`, color };
 }
 
-const INFINITY_BG_POS = '#14532d'; // dark green (green-900) for ∞
-const INFINITY_BG_NEG = '#7f1d1d'; // dark red (red-900) for −∞
+const INFINITY_BG_POS = '#14532d'; // dark green (green-900) for ∞ (other defined, baseline missing)
+const ZERO_BG = '#7f1d1d'; // dark red (red-900) for 0× (other missing, baseline defined)
 const SELF_BG = '#fbbf24'; // amber-400 for baseline-vs-self
 const COL_MAX_BG = '#bbf7d0'; // green-200 for best per column in throughput
 
@@ -205,14 +211,16 @@ function InfoIcon({ text }: { text: string }) {
 
 /** Per-interactivity throughput table + linked percent-diff heatmap. */
 function ThroughputAndDiffTable({ configs }: { configs: ConfigSeries[] }) {
-  // Compute buckets: every 10 from 10 up through ceil(globalMax / 10) * 10.
+  // Compute buckets: every 10 from 10 up through floor(globalMax / 10) * 10.
+  // (Using floor ensures the last bucket is always one a config actually reaches,
+  // not a bucket beyond every config's reachable interactivity.)
   const buckets = useMemo(() => {
     let globalMax = 0;
     for (const c of configs) {
       const maxX = c.frontier.at(-1)?.x ?? 0;
       if (maxX > globalMax) globalMax = maxX;
     }
-    const hi = Math.ceil(globalMax / 10) * 10;
+    const hi = Math.floor(globalMax / 10) * 10;
     const out: number[] = [];
     for (let v = 10; v <= hi; v += 10) out.push(v);
     return out;
@@ -339,10 +347,10 @@ function ThroughputAndDiffTable({ configs }: { configs: ConfigSeries[] }) {
         <div className="mt-6">
           <div className="flex items-center justify-between gap-3 flex-wrap mb-2">
             <div className="flex items-center gap-2">
-              <h3 className="text-base font-semibold">% advantage vs baseline</h3>
+              <h3 className="text-base font-semibold">Ratio vs baseline</h3>
               <InfoIcon
                 text={
-                  '(other − baseline) / baseline × 100 at each bucket. "∞" means the baseline cannot reach that interactivity but the other config can; "−∞" the reverse; "—" means neither can. Cells clamp to ±200% for the color scale.'
+                  'other / baseline at each bucket, rendered as Nx. "∞" means the baseline cannot reach that interactivity but the other config can; "0×" the reverse; "—" means neither can. Color scale is centered at 1.00× and log-symmetric, saturating at 3.00× (green) and 0.33× (red).'
                 }
               />
             </div>
@@ -392,7 +400,7 @@ function ThroughputAndDiffTable({ configs }: { configs: ConfigSeries[] }) {
                             className="text-right px-2 py-1.5 tabular-nums"
                             style={{ backgroundColor: SELF_BG, color: '#0a0a0a' }}
                           >
-                            0.0%
+                            1.00×
                           </td>
                         );
                       }
@@ -423,22 +431,21 @@ function ThroughputAndDiffTable({ configs }: { configs: ConfigSeries[] }) {
                           <td
                             key={b}
                             className="text-right px-2 py-1.5 tabular-nums font-semibold"
-                            style={{ backgroundColor: INFINITY_BG_NEG, color: '#ffffff' }}
+                            style={{ backgroundColor: ZERO_BG, color: '#ffffff' }}
                           >
-                            −∞
+                            0×
                           </td>
                         );
                       }
-                      const pct = ((other! - baseline!) / baseline!) * 100;
-                      const { background, color } = percentDiffColor(pct);
+                      const ratio = other! / baseline!;
+                      const { background, color } = ratioColor(ratio);
                       return (
                         <td
                           key={b}
                           className="text-right px-2 py-1.5 tabular-nums"
                           style={{ backgroundColor: background, color }}
                         >
-                          {pct >= 0 ? '+' : ''}
-                          {pct.toFixed(0)}%
+                          {ratio.toFixed(2)}×
                         </td>
                       );
                     })}
@@ -461,7 +468,7 @@ function AucSummaryTable({ configs }: { configs: ConfigSeries[] }) {
       const maxX = c.frontier.at(-1)?.x ?? 0;
       if (maxX > globalMax) globalMax = maxX;
     }
-    return Math.ceil(globalMax / 10) * 10;
+    return Math.floor(globalMax / 10) * 10;
   }, [configs]);
 
   const aucs = useMemo(
@@ -504,8 +511,7 @@ function AucSummaryTable({ configs }: { configs: ConfigSeries[] }) {
         style: { backgroundColor: SELF_BG, color: '#0a0a0a' },
       };
     }
-    const pctDiff = (ratio - 1) * 100;
-    const { background, color } = percentDiffColor(pctDiff);
+    const { background, color } = ratioColor(ratio);
     return {
       text: `${ratio.toFixed(2)}×`,
       style: { backgroundColor: background, color },
@@ -575,9 +581,6 @@ function AucSummaryTable({ configs }: { configs: ConfigSeries[] }) {
                   <th className="text-right font-medium px-2 py-1.5 whitespace-nowrap">
                     Ratio vs primary
                   </th>
-                  <th className="text-right font-medium px-2 py-1.5 whitespace-nowrap">
-                    % vs primary
-                  </th>
                   <th className="text-right font-medium px-2 py-1.5 whitespace-nowrap">
                     Ratio vs secondary
                   </th>
@@ -592,20 +595,6 @@ function AucSummaryTable({ configs }: { configs: ConfigSeries[] }) {
                   const primaryR = ratioCell(auc, primaryAuc, ePrimary, c.hwKey);
                   const secondaryR = ratioCell(auc, secondaryAuc, eSecondary, c.hwKey);
                   const tertiaryR = ratioCell(auc, tertiaryAuc, eTertiary, c.hwKey);
-                  let pctText: string;
-                  let pctStyle: React.CSSProperties | undefined;
-                  if (primaryAuc === null || primaryAuc === 0) {
-                    pctText = '—';
-                    pctStyle = undefined;
-                  } else if (c.hwKey === ePrimary) {
-                    pctText = '+0.0%';
-                    pctStyle = { backgroundColor: SELF_BG, color: '#0a0a0a' };
-                  } else {
-                    const pct = (auc / primaryAuc - 1) * 100;
-                    const { background, color } = percentDiffColor(pct);
-                    pctText = `${pct >= 0 ? '+' : ''}${pct.toFixed(1)}%`;
-                    pctStyle = { backgroundColor: background, color };
-                  }
                   return (
                     <tr key={c.hwKey} className="border-b border-border last:border-b-0">
                       <td className="text-left font-medium px-2 py-1.5 whitespace-nowrap">
@@ -615,9 +604,6 @@ function AucSummaryTable({ configs }: { configs: ConfigSeries[] }) {
                       <td className="text-right tabular-nums px-2 py-1.5" style={primaryR.style}>
                         {primaryR.text}
                       </td>
-                      <td className="text-right tabular-nums px-2 py-1.5" style={pctStyle}>
-                        {pctText}
-                      </td>
                       <td className="text-right tabular-nums px-2 py-1.5" style={secondaryR.style}>
                         {secondaryR.text}
                       </td>
diff --git a/packages/app/src/lib/pareto.test.ts b/packages/app/src/lib/pareto.test.ts
@@ -14,6 +14,44 @@ interface RawPoint {
 const toPoints = (raw: RawPoint[]): Point2D[] =>
   raw.map((p) => ({ x: p.Interactivity_tok_s_user, y: p.Token_Throughput_per_GPU_tok_s_gpu }));
 
+// Independent fine-grid trapezoidal reference. Matches the Python np.interp
+// + np.trapezoid approach used in the original spec. Used by the sanity
+// check below — kept out of `src/lib/pareto.ts` because the production
+// implementation is the closed-form piecewise integral, which agrees with
+// this to fp drift on piecewise-linear input.
+function referenceAuc(frontier: Point2D[], lo: number, hi: number): number {
+  if (frontier.length === 0 || hi <= lo) return 0;
+  const minX = frontier[0].x;
+  const last = frontier.at(-1);
+  if (!last) return 0;
+  const maxX = last.x;
+  const N = 100_001;
+  const step = (hi - lo) / (N - 1);
+  const ys: number[] = [];
+  for (let i = 0; i < N; i++) {
+    const x = lo + i * step;
+    if (x < minX || x > maxX) {
+      ys.push(0);
+      continue;
+    }
+    let j = 0;
+    while (j < frontier.length - 1 && frontier[j + 1].x < x) j++;
+    const a = frontier[j];
+    const b = frontier[Math.min(j + 1, frontier.length - 1)];
+    if (b.x === a.x) {
+      ys.push(Math.max(a.y, b.y));
+    } else {
+      const t = (x - a.x) / (b.x - a.x);
+      ys.push(a.y + t * (b.y - a.y));
+    }
+  }
+  let area = 0;
+  for (let i = 0; i < ys.length - 1; i++) {
+    area += ((ys[i] + ys[i + 1]) / 2) * step;
+  }
+  return area;
+}
+
 describe('paretoFrontier', () => {
   it('returns empty for empty input', () => {
     expect(paretoFrontier([])).toEqual([]);
@@ -91,39 +129,35 @@ describe('aucUnderFrontier', () => {
     expect(aucUnderFrontier(f, 30, 40)).toBe(0);
   });
 
-  // Sanity-check the full pipeline (pareto → AUC) against the spec's
-  // reference AUCs computed by the Python implementation from the same
-  // 8-config sample dataset (FP4 DeepSeek V4 Pro, 8K/1K, TP=8).
-  // Window: 10 → ceil(globalMax/10)*10. globalMax across these 8 configs is
-  // ~85, so window is [10, 90].
-  describe('matches Python reference AUCs from spec sample data', () => {
-    // Determine the actual global window from the fixture (ceil-to-10).
+  // Sanity-check the full pipeline (pareto → AUC) on the spec's 8-config
+  // sample dataset (FP4 DeepSeek V4 Pro, 8K/1K, TP=8) using the production
+  // integration window: [10, floor(globalMax / 10) * 10].
+  //
+  // We re-derive the expected AUC for each config from first principles —
+  // independent trapezoidal integration over the same Pareto frontier — and
+  // assert that aucUnderFrontier matches. Hard-coding numeric expectations
+  // would bake in whichever upper bound the test was written against; this
+  // way the test continues to be a meaningful sanity check if the window
+  // rule changes again.
+  describe('matches independent trapezoidal AUCs on spec sample data', () => {
     const allXs = (Object.values(eightConfigData) as RawPoint[][]).flatMap((rows) =>
       rows.map((r) => r.Interactivity_tok_s_user),
     );
     const globalMax = Math.max(...allXs);
-    const hi = Math.ceil(globalMax / 10) * 10;
-    const window: [number, number] = [10, hi];
-
-    const cases: [string, number][] = [
-      ['MI355X_SGLang_nonMTP', 11_457],
-      ['MI355X_ATOM_nonMTP', 23_659],
-      ['B200_SGLang_nonMTP', 63_495],
-      ['B200_DynamoVLLM_nonMTP_disagg', 62_177],
-      ['GB200_DynamoVLLM_nonMTP_disagg', 116_220],
-      ['GB200_DynamoVLLM_MTP_disagg', 176_705],
-      ['GB300_DynamoSGLang_nonMTP_disagg', 379_854],
-      ['GB300_DynamoSGLang_MTP_disagg', 263_727],
-    ];
+    const upperBound = Math.floor(globalMax / 10) * 10;
+    const window: [number, number] = [10, upperBound];
 
-    for (const [name, expected] of cases) {
-      it(`${name} ≈ ${expected.toLocaleString()}`, () => {
+    const names = Object.keys(eightConfigData as Record<string, RawPoint[]>);
+    for (const name of names) {
+      it(`${name} matches independent reference`, () => {
         const raw = (eightConfigData as Record<string, RawPoint[]>)[name];
         expect(raw, `fixture missing ${name}`).toBeTruthy();
         const f = paretoFrontier(toPoints(raw));
         const auc = aucUnderFrontier(f, window[0], window[1]);
-        // Expected numbers in the spec are rounded to whole units; allow ±0.5%.
-        expect(Math.abs(auc - expected) / expected).toBeLessThan(0.005);
+        const expected = referenceAuc(f, window[0], window[1]);
+        // Both methods are trapezoidal on the same piecewise-linear function;
+        // they should agree to within tiny floating-point drift.
+        expect(Math.abs(auc - expected) / Math.max(expected, 1)).toBeLessThan(0.001);
       });
     }
   });