Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions packages/app/src/components/inference/inference-chart-config.json
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,13 @@
"y_jInput_label": "All-in Provisioned J per Input Token (J/tok)",
"y_jInput_title": "All-in Provisioned Joules per Input Token",
"y_jInput_roofline": "lower_right",
"y_measuredAvgPower": "measuredAvgPower.y",
"y_measuredAvgPower_label": "Measured Avg Power per GPU (W)",
"y_measuredAvgPower_title": "Measured Average Power per GPU",
"y_measuredJPerOutputToken": "measuredJPerOutputToken.y",
"y_measuredJPerOutputToken_label": "Measured J per Output Token (J/tok)",
"y_measuredJPerOutputToken_title": "Measured Joules per Output Token",
"y_measuredJPerOutputToken_roofline": "lower_right",
"y_cost_limit": 5,
"y_latency_limit": 60
},
Expand Down Expand Up @@ -179,6 +186,13 @@
"y_jInput_label": "All-in Provisioned J per Input Token (J/tok)",
"y_jInput_title": "All-in Provisioned Joules per Input Token",
"y_jInput_roofline": "lower_left",
"y_measuredAvgPower": "measuredAvgPower.y",
"y_measuredAvgPower_label": "Measured Avg Power per GPU (W)",
"y_measuredAvgPower_title": "Measured Average Power per GPU",
"y_measuredJPerOutputToken": "measuredJPerOutputToken.y",
"y_measuredJPerOutputToken_label": "Measured J per Output Token (J/tok)",
"y_measuredJPerOutputToken_title": "Measured Joules per Output Token",
"y_measuredJPerOutputToken_roofline": "lower_left",
"y_cost_limit": 5,
"y_latency_limit": 60
}
Expand Down
27 changes: 26 additions & 1 deletion packages/app/src/components/inference/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,10 @@ export interface AggDataEntry {
median_e2el: number;
std_e2el: number;
p99_e2el: number;
// Measured GPU telemetry (emitted by runner's aggregate_power.py).
// Optional because historical runs predate the field.
avg_power_w?: number;
joules_per_output_token?: number;
disagg: boolean;
num_prefill_gpu: number;
num_decode_gpu: number;
Expand Down Expand Up @@ -152,6 +156,12 @@ export interface InferenceData extends Partial<Omit<AggDataEntry, AggDataConflic
jTotal?: { y: number; roof: boolean };
jOutput?: { y: number; roof: boolean };
jInput?: { y: number; roof: boolean };

// Measured power / energy from runner GPU telemetry. Optional because
// pre-aggregate_power.py runs (and runs with monitoring disabled) won't
// emit these fields.
measuredAvgPower?: { y: number; roof: boolean };
measuredJPerOutputToken?: { y: number; roof: boolean };
}

/**
Expand All @@ -177,7 +187,9 @@ export type YAxisMetricKey =
| 'powerUser'
| 'jTotal'
| 'jOutput'
| 'jInput';
| 'jInput'
| 'measuredAvgPower'
| 'measuredJPerOutputToken';

/**
* Defines the configuration and labels for a specific chart.
Expand Down Expand Up @@ -277,6 +289,19 @@ export interface ChartDefinition {
y_jInput_label?: string;
y_jInput_title?: string;
y_jInput_roofline?: 'upper_right' | 'upper_left' | 'lower_left' | 'lower_right';
// Measured power / energy from runner GPU telemetry
y_measuredAvgPower?: string;
y_measuredAvgPower_label?: string;
y_measuredAvgPower_title?: string;
// Not explicitly set in the config — ScatterGraph falls back to lower_right
// (matches "lower power at the same interactivity is more efficient").
// The field stays in the type for parity with the other y_* metrics and
// so a future config can override the default.
y_measuredAvgPower_roofline?: 'upper_right' | 'upper_left' | 'lower_left' | 'lower_right';
y_measuredJPerOutputToken?: string;
y_measuredJPerOutputToken_label?: string;
y_measuredJPerOutputToken_title?: string;
y_measuredJPerOutputToken_roofline?: 'upper_right' | 'upper_left' | 'lower_left' | 'lower_right';
y_cost_limit?: number;
y_latency_limit?: number;
}
Expand Down
63 changes: 44 additions & 19 deletions packages/app/src/components/inference/ui/ChartControls.tsx
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
'use client';

import { useState } from 'react';
import { useMemo, useState } from 'react';

import { track } from '@/lib/analytics';
import { useFeatureGate } from '@/lib/use-feature-gate';

import { useInference } from '@/components/inference/InferenceContext';
import {
Expand All @@ -26,8 +27,15 @@ import chartDefinitions from '@/components/inference/inference-chart-config.json
import type { ChartDefinition } from '@/components/inference/types';
import type { Model, Sequence } from '@/lib/data-mappings';

// Build Y-axis metric options from static chart config JSON — available immediately, no API wait
const METRIC_GROUPS = [
/**
* Y-axis metric options from static chart config JSON — available immediately, no API wait.
*
* Groups marked `gated: true` are hidden unless the konami-code feature gate is unlocked
* (see useFeatureGate). Use this for surfaces that are wired but whose underlying data
* pipeline is in the rollout phase (e.g. measured-power telemetry waiting on a runner-
* side aggregation PR to start populating the DB).
*/
const METRIC_GROUPS: { label: string; metrics: string[]; gated?: boolean }[] = [
{
label: 'Throughput',
metrics: [
Expand All @@ -46,6 +54,11 @@ const METRIC_GROUPS = [
},
{ label: 'Cost per Million Input Tokens', metrics: ['y_costhi', 'y_costni', 'y_costri'] },
{ label: 'All-in Provisioned Energy per Token', metrics: ['y_jTotal', 'y_jOutput', 'y_jInput'] },
{
label: 'Measured Energy',
metrics: ['y_measuredAvgPower', 'y_measuredJPerOutputToken'],
gated: true,
},
{ label: 'Custom User Values', metrics: ['y_costUser', 'y_powerUser'] },
];

Expand All @@ -61,18 +74,6 @@ const METRIC_TITLE_MAP = (() => {
return map;
})();

/** Map from metric key → group label (e.g. "Throughput", "Cost per Million Total Tokens") */
const METRIC_GROUP_MAP = new Map<string, string>(
METRIC_GROUPS.flatMap((g) => g.metrics.map((m) => [m, g.label] as const)),
);

const GROUPED_Y_AXIS_OPTIONS = METRIC_GROUPS.map((group) => ({
groupLabel: group.label,
options: group.metrics
.filter((m) => METRIC_TITLE_MAP.has(m))
.map((m) => ({ value: m, label: METRIC_TITLE_MAP.get(m)! })),
})).filter((g) => g.options.length > 0);

interface ChartControlsProps {
/** Hide GPU Config selector and related date pickers (used by Historical Trends tab) */
hideGpuComparison?: boolean;
Expand Down Expand Up @@ -113,8 +114,32 @@ export default function ChartControls({ hideGpuComparison = false }: ChartContro
setScaleType,
} = useInference();

// Y-axis metric options — built from static chart config JSON (no API dependency)
const groupedYAxisOptions = GROUPED_Y_AXIS_OPTIONS;
// Y-axis metric options — built from static chart config JSON (no API dependency).
// Hidden groups (Measured Energy) appear only after the ↑↑↓↓ feature gate unlocks.
const featureGateUnlocked = useFeatureGate();
const visibleGroups = useMemo(
() => METRIC_GROUPS.filter((g) => !g.gated || featureGateUnlocked),
[featureGateUnlocked],
);
const metricGroupMap = useMemo(
() =>
new Map<string, string>(
visibleGroups.flatMap((g) => g.metrics.map((m) => [m, g.label] as const)),
),
[visibleGroups],
);
const groupedYAxisOptions = useMemo(
() =>
visibleGroups
.map((group) => ({
groupLabel: group.label,
options: group.metrics
.filter((m) => METRIC_TITLE_MAP.has(m))
.map((m) => ({ value: m, label: METRIC_TITLE_MAP.get(m)! })),
}))
.filter((g) => g.options.length > 0),
[visibleGroups],
);

const trackCombinedFilters = () => {
if (selectedModel && selectedSequence && selectedPrecisions.length > 0 && selectedYAxisMetric) {
Expand All @@ -124,7 +149,7 @@ export default function ChartControls({ hideGpuComparison = false }: ChartContro
precision: selectedPrecisions.join(','),
yAxisMetric: selectedYAxisMetric,
yAxisMetricLabel: METRIC_TITLE_MAP.get(selectedYAxisMetric) ?? selectedYAxisMetric,
yAxisMetricGroup: METRIC_GROUP_MAP.get(selectedYAxisMetric) ?? 'Unknown',
yAxisMetricGroup: metricGroupMap.get(selectedYAxisMetric) ?? 'Unknown',
});
}
};
Expand Down Expand Up @@ -159,7 +184,7 @@ export default function ChartControls({ hideGpuComparison = false }: ChartContro
track('inference_y_axis_metric_selected', {
metric: value,
metric_label: METRIC_TITLE_MAP.get(value) ?? value,
metric_group: METRIC_GROUP_MAP.get(value) ?? 'Unknown',
metric_group: metricGroupMap.get(value) ?? 'Unknown',
});
setTimeout(trackCombinedFilters, 0);
};
Expand Down
51 changes: 2 additions & 49 deletions packages/app/src/components/tab-nav.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@
import { ChevronDown } from 'lucide-react';
import Link from 'next/link';
import { usePathname, useRouter } from 'next/navigation';
import { useContext, useEffect, useRef, useState } from 'react';
import { useContext, useEffect, useState } from 'react';

import { track } from '@/lib/analytics';
import { useFeatureGate } from '@/lib/use-feature-gate';
import { Card } from '@/components/ui/card';
import { Label } from '@/components/ui/label';
import { Popover, PopoverContent, PopoverTrigger } from '@/components/ui/popover';
Expand All @@ -22,54 +23,6 @@ import {
import { UnofficialRunContext } from '@/components/unofficial-run-provider';
import { cn } from '@/lib/utils';

const FEATURE_GATE_KEY = 'inferencex-feature-gate';
const UNLOCK_SEQUENCE = ['ArrowUp', 'ArrowUp', 'ArrowDown', 'ArrowDown'];

function useFeatureGate(): boolean {
const [unlocked, setUnlocked] = useState(false);
const sequenceRef = useRef<string[]>([]);

useEffect(() => {
if (typeof window !== 'undefined' && localStorage.getItem(FEATURE_GATE_KEY) === '1') {
setUnlocked(true);
}
}, []);

useEffect(() => {
if (unlocked) return;
const handleKeyDown = (e: KeyboardEvent) => {
sequenceRef.current.push(e.key);
if (sequenceRef.current.length > UNLOCK_SEQUENCE.length) {
sequenceRef.current = sequenceRef.current.slice(-UNLOCK_SEQUENCE.length);
}
if (
sequenceRef.current.length === UNLOCK_SEQUENCE.length &&
sequenceRef.current.every((k, i) => k === UNLOCK_SEQUENCE[i])
) {
localStorage.setItem(FEATURE_GATE_KEY, '1');
setUnlocked(true);
window.dispatchEvent(new Event('inferencex:feature-gate:unlocked'));
track('feature_gate_unlocked');
}
};
window.addEventListener('keydown', handleKeyDown);
return () => window.removeEventListener('keydown', handleKeyDown);
}, [unlocked]);

useEffect(() => {
const handleLock = () => setUnlocked(false);
const handleUnlock = () => setUnlocked(true);
window.addEventListener('inferencex:feature-gate:locked', handleLock);
window.addEventListener('inferencex:feature-gate:unlocked', handleUnlock);
return () => {
window.removeEventListener('inferencex:feature-gate:locked', handleLock);
window.removeEventListener('inferencex:feature-gate:unlocked', handleUnlock);
};
}, []);

return unlocked;
}

const VISIBLE_TABS = [
{ href: '/inference', label: 'Inference Performance', testId: 'tab-trigger-inference' },
{ href: '/evaluation', label: 'Accuracy Evals', testId: 'tab-trigger-evaluation' },
Expand Down
18 changes: 18 additions & 0 deletions packages/app/src/lib/benchmark-transform.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,24 @@ describe('rowToAggDataEntry', () => {
const entryNull = rowToAggDataEntry(makeRow({ image: null }));
expect(entryNull.image).toBeUndefined();
});

it('passes through measured power telemetry fields when present', () => {
const entry = rowToAggDataEntry(
makeRow({
metrics: { tput_per_gpu: 100, avg_power_w: 685.5, joules_per_output_token: 8.4 },
}),
);
expect(entry.avg_power_w).toBe(685.5);
expect(entry.joules_per_output_token).toBe(8.4);
});

it('leaves measured power fields undefined for rows that predate the metric', () => {
// Distinguishing "no measurement" from "0 W" matters: createChartDataPoint
// uses typeof===number to decide whether to emit the measuredAvgPower field.
const entry = rowToAggDataEntry(makeRow({ metrics: {} }));
expect(entry.avg_power_w).toBeUndefined();
expect(entry.joules_per_output_token).toBeUndefined();
});
});

describe('transformBenchmarkRows', () => {
Expand Down
5 changes: 5 additions & 0 deletions packages/app/src/lib/benchmark-transform.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,11 @@ export function rowToAggDataEntry(row: BenchmarkRow): AggDataEntry {
median_e2el: m.median_e2el ?? 0,
std_e2el: m.std_e2el ?? 0,
p99_e2el: m.p99_e2el ?? 0,
// Measured GPU telemetry (runner's aggregate_power.py). Left undefined for
// rows predating the field so downstream chart code can distinguish
// "no measurement" from "0 W" via createChartDataPoint's typeof guard.
avg_power_w: m.avg_power_w,
joules_per_output_token: m.joules_per_output_token,
disagg: row.disagg,
num_prefill_gpu: row.num_prefill_gpu,
num_decode_gpu: row.num_decode_gpu,
Expand Down
49 changes: 49 additions & 0 deletions packages/app/src/lib/chart-utils.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1218,6 +1218,55 @@ describe('createChartDataPoint energy fields', () => {
});
});

// ===========================================================================
// createChartDataPoint — measured power / energy fields (from runner telemetry)
// ===========================================================================
describe('createChartDataPoint measured power fields', () => {
it('emits measuredAvgPower when avg_power_w is present on the entry', () => {
const e = entry({ avg_power_w: 685.5 });
const point = createChartDataPoint('2025-01-01', e, 'median_e2el', 'tput_per_gpu', 'h100');
expect(point.measuredAvgPower).toBeDefined();
expect(point.measuredAvgPower!.y).toBe(685.5);
expect(point.measuredAvgPower!.roof).toBe(false);
});

it('emits measuredJPerOutputToken when joules_per_output_token is present', () => {
const e = entry({ joules_per_output_token: 8.4 });
const point = createChartDataPoint('2025-01-01', e, 'median_e2el', 'tput_per_gpu', 'h100');
expect(point.measuredJPerOutputToken).toBeDefined();
expect(point.measuredJPerOutputToken!.y).toBe(8.4);
});

it('omits both fields when neither is on the entry', () => {
// Legacy runs predating aggregate_power.py.
const point = createChartDataPoint(
'2025-01-01',
entry(),
'median_e2el',
'tput_per_gpu',
'h100',
);
expect(point.measuredAvgPower).toBeUndefined();
expect(point.measuredJPerOutputToken).toBeUndefined();
});

it('emits one and omits the other when only one is present', () => {
// Defensive: aggregator can patch only avg_power_w if total_output_tokens=0.
const e = entry({ avg_power_w: 500 });
const point = createChartDataPoint('2025-01-01', e, 'median_e2el', 'tput_per_gpu', 'h100');
expect(point.measuredAvgPower).toBeDefined();
expect(point.measuredJPerOutputToken).toBeUndefined();
});

it('preserves a zero measured power value (not falsy-coerced away)', () => {
// Guards against a refactor switching the gate from typeof===number to truthiness.
const e = entry({ avg_power_w: 0 });
const point = createChartDataPoint('2025-01-01', e, 'median_e2el', 'tput_per_gpu', 'h100');
expect(point.measuredAvgPower).toBeDefined();
expect(point.measuredAvgPower!.y).toBe(0);
});
});

// ===========================================================================
// createChartDataPoint — boolean narrowing for prefill/decode dp_attention, is_multinode
// ===========================================================================
Expand Down
Loading
Loading