From c08f65886b2bf4f0f513ef4ea4c10941666b475a Mon Sep 17 00:00:00 2001
From: Aryan <aryan@gupta-inc.com>
Date: Thu, 21 May 2026 16:50:51 -0700
Subject: [PATCH 1/3] feat(inference): measured-power Y-axis metrics on scatter
 chart
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds two new options under a new "Measured Energy" dropdown group on
both the "vs. Interactivity" and "vs. E2E Latency" charts:

  - Measured Avg Power per GPU (W)        — no roofline (no universal
                                             "better" direction)
  - Measured J per Output Token (J/tok)   — roofline lower_right
                                             (interactivity) / lower_left
                                             (e2e)

Distinct from the existing y_jTotal/y_jOutput/y_jInput which derive
joules from each GPU's spec-sheet TDP. The new metrics are sourced
from runner GPU telemetry averaged over the exact bench load window
(see aggregate_power.py in semianalysisai/InferenceX).

Wiring:

  - packages/constants/src/metric-keys.ts: register avg_power_w,
    joules_per_output_token in the canonical metric key set so the ETL
    auto-capture warning doesn't fire.
  - packages/app/src/lib/benchmark-transform.ts: pass the two raw
    fields through rowToAggDataEntry. Left undefined when absent so
    downstream code can distinguish "no measurement" from "0 W".
  - packages/app/src/components/inference/types.ts: extend AggDataEntry,
    InferenceData, YAxisMetricKey, and ChartDefinition.
  - packages/app/src/lib/chart-utils.ts: extend Y_AXIS_METRICS,
    createChartDataPoint (gated on typeof===number), calculateRoofline
    and computeAllRooflines yKey union, markRooflinePoints init+mark
    blocks.
  - packages/app/src/components/inference/inference-chart-config.json:
    add y_measured* entries to both chartTypes.
  - packages/app/src/components/inference/ui/ChartControls.tsx: add
    "Measured Energy" group to METRIC_GROUPS.

The overlay (unofficial run) path is automatic — transformBenchmarkRows
is shared between official and overlay rendering, so the new metrics
flow to ?unofficialrun= URLs once the runner-side PR is merged and
benchmarks ingest the new fields.

For rows without measured-power data (historical runs, runs predating
aggregate_power.py, runs where the SMI poller didn't start), points
are simply omitted from the new charts — the existing TDP-derived
y_jTotal/y_jOutput/y_jInput stay visible as a comparable fallback.

Verification:

  - pnpm typecheck: clean
  - pnpm lint: 0 warnings, 0 errors
  - pnpm test:unit: 1921/1921 passing (+7 new tests covering
    rowToAggDataEntry pass-through, createChartDataPoint field gating,
    zero-value preservation, missing-field handling)
  - Dev-server smoke: confirmed "Measured Energy" group label and
    both metric labels are present in the served JS bundle at
    /_next/static/chunks/

Follow-up: Cypress E2E covering both the official path and
?unofficialrun= overlay path for the two new metrics, to be added
once the runner PR ships real data to the DB.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 .../inference/inference-chart-config.json     | 14 ++++++
 .../app/src/components/inference/types.ts     | 26 +++++++++-
 .../components/inference/ui/ChartControls.tsx |  4 ++
 .../app/src/lib/benchmark-transform.test.ts   | 18 +++++++
 packages/app/src/lib/benchmark-transform.ts   |  5 ++
 packages/app/src/lib/chart-utils.test.ts      | 49 +++++++++++++++++++
 packages/app/src/lib/chart-utils.ts           | 31 +++++++++++-
 packages/constants/src/metric-keys.ts         |  5 ++
 8 files changed, 149 insertions(+), 3 deletions(-)

diff --git a/packages/app/src/components/inference/inference-chart-config.json b/packages/app/src/components/inference/inference-chart-config.json
index e26d237e..1ca1c440 100644
--- a/packages/app/src/components/inference/inference-chart-config.json
+++ b/packages/app/src/components/inference/inference-chart-config.json
@@ -88,6 +88,13 @@
     "y_jInput_label": "All-in Provisioned J per Input Token (J/tok)",
     "y_jInput_title": "All-in Provisioned Joules per Input Token",
     "y_jInput_roofline": "lower_right",
+    "y_measuredAvgPower": "measuredAvgPower.y",
+    "y_measuredAvgPower_label": "Measured Avg Power per GPU (W)",
+    "y_measuredAvgPower_title": "Measured Average Power per GPU",
+    "y_measuredJPerOutputToken": "measuredJPerOutputToken.y",
+    "y_measuredJPerOutputToken_label": "Measured J per Output Token (J/tok)",
+    "y_measuredJPerOutputToken_title": "Measured Joules per Output Token",
+    "y_measuredJPerOutputToken_roofline": "lower_right",
     "y_cost_limit": 5,
     "y_latency_limit": 60
   },
@@ -179,6 +186,13 @@
     "y_jInput_label": "All-in Provisioned J per Input Token (J/tok)",
     "y_jInput_title": "All-in Provisioned Joules per Input Token",
     "y_jInput_roofline": "lower_left",
+    "y_measuredAvgPower": "measuredAvgPower.y",
+    "y_measuredAvgPower_label": "Measured Avg Power per GPU (W)",
+    "y_measuredAvgPower_title": "Measured Average Power per GPU",
+    "y_measuredJPerOutputToken": "measuredJPerOutputToken.y",
+    "y_measuredJPerOutputToken_label": "Measured J per Output Token (J/tok)",
+    "y_measuredJPerOutputToken_title": "Measured Joules per Output Token",
+    "y_measuredJPerOutputToken_roofline": "lower_left",
     "y_cost_limit": 5,
     "y_latency_limit": 60
   }
diff --git a/packages/app/src/components/inference/types.ts b/packages/app/src/components/inference/types.ts
index 5b5f9ec2..a33122c9 100644
--- a/packages/app/src/components/inference/types.ts
+++ b/packages/app/src/components/inference/types.ts
@@ -67,6 +67,10 @@ export interface AggDataEntry {
   median_e2el: number;
   std_e2el: number;
   p99_e2el: number;
+  // Measured GPU telemetry (emitted by runner's aggregate_power.py).
+  // Optional because historical runs predate the field.
+  avg_power_w?: number;
+  joules_per_output_token?: number;
   disagg: boolean;
   num_prefill_gpu: number;
   num_decode_gpu: number;
@@ -152,6 +156,12 @@ export interface InferenceData extends Partial<Omit<AggDataEntry, AggDataConflic
   jTotal?: { y: number; roof: boolean };
   jOutput?: { y: number; roof: boolean };
   jInput?: { y: number; roof: boolean };
+
+  // Measured power / energy from runner GPU telemetry. Optional because
+  // pre-aggregate_power.py runs (and runs with monitoring disabled) won't
+  // emit these fields.
+  measuredAvgPower?: { y: number; roof: boolean };
+  measuredJPerOutputToken?: { y: number; roof: boolean };
 }
 
 /**
@@ -177,7 +187,9 @@ export type YAxisMetricKey =
   | 'powerUser'
   | 'jTotal'
   | 'jOutput'
-  | 'jInput';
+  | 'jInput'
+  | 'measuredAvgPower'
+  | 'measuredJPerOutputToken';
 
 /**
  * Defines the configuration and labels for a specific chart.
@@ -277,6 +289,18 @@ export interface ChartDefinition {
   y_jInput_label?: string;
   y_jInput_title?: string;
   y_jInput_roofline?: 'upper_right' | 'upper_left' | 'lower_left' | 'lower_right';
+  // Measured power / energy from runner GPU telemetry
+  y_measuredAvgPower?: string;
+  y_measuredAvgPower_label?: string;
+  y_measuredAvgPower_title?: string;
+  // Intentionally no roofline for avg power: there's no universal "better"
+  // direction for absolute draw. Omitting roofline causes computeAllRooflines
+  // to skip the metric (it requires a direction); points render plain.
+  y_measuredAvgPower_roofline?: 'upper_right' | 'upper_left' | 'lower_left' | 'lower_right';
+  y_measuredJPerOutputToken?: string;
+  y_measuredJPerOutputToken_label?: string;
+  y_measuredJPerOutputToken_title?: string;
+  y_measuredJPerOutputToken_roofline?: 'upper_right' | 'upper_left' | 'lower_left' | 'lower_right';
   y_cost_limit?: number;
   y_latency_limit?: number;
 }
diff --git a/packages/app/src/components/inference/ui/ChartControls.tsx b/packages/app/src/components/inference/ui/ChartControls.tsx
index 0b1705b0..3b5926ca 100644
--- a/packages/app/src/components/inference/ui/ChartControls.tsx
+++ b/packages/app/src/components/inference/ui/ChartControls.tsx
@@ -46,6 +46,10 @@ const METRIC_GROUPS = [
   },
   { label: 'Cost per Million Input Tokens', metrics: ['y_costhi', 'y_costni', 'y_costri'] },
   { label: 'All-in Provisioned Energy per Token', metrics: ['y_jTotal', 'y_jOutput', 'y_jInput'] },
+  {
+    label: 'Measured Energy',
+    metrics: ['y_measuredAvgPower', 'y_measuredJPerOutputToken'],
+  },
   { label: 'Custom User Values', metrics: ['y_costUser', 'y_powerUser'] },
 ];
 
diff --git a/packages/app/src/lib/benchmark-transform.test.ts b/packages/app/src/lib/benchmark-transform.test.ts
index be76438e..42d2ed5a 100644
--- a/packages/app/src/lib/benchmark-transform.test.ts
+++ b/packages/app/src/lib/benchmark-transform.test.ts
@@ -115,6 +115,24 @@ describe('rowToAggDataEntry', () => {
     const entryNull = rowToAggDataEntry(makeRow({ image: null }));
     expect(entryNull.image).toBeUndefined();
   });
+
+  it('passes through measured power telemetry fields when present', () => {
+    const entry = rowToAggDataEntry(
+      makeRow({
+        metrics: { tput_per_gpu: 100, avg_power_w: 685.5, joules_per_output_token: 8.4 },
+      }),
+    );
+    expect(entry.avg_power_w).toBe(685.5);
+    expect(entry.joules_per_output_token).toBe(8.4);
+  });
+
+  it('leaves measured power fields undefined for rows that predate the metric', () => {
+    // Distinguishing "no measurement" from "0 W" matters: createChartDataPoint
+    // uses typeof===number to decide whether to emit the measuredAvgPower field.
+    const entry = rowToAggDataEntry(makeRow({ metrics: {} }));
+    expect(entry.avg_power_w).toBeUndefined();
+    expect(entry.joules_per_output_token).toBeUndefined();
+  });
 });
 
 describe('transformBenchmarkRows', () => {
diff --git a/packages/app/src/lib/benchmark-transform.ts b/packages/app/src/lib/benchmark-transform.ts
index 107f0b12..adeea144 100644
--- a/packages/app/src/lib/benchmark-transform.ts
+++ b/packages/app/src/lib/benchmark-transform.ts
@@ -49,6 +49,11 @@ export function rowToAggDataEntry(row: BenchmarkRow): AggDataEntry {
     median_e2el: m.median_e2el ?? 0,
     std_e2el: m.std_e2el ?? 0,
     p99_e2el: m.p99_e2el ?? 0,
+    // Measured GPU telemetry (runner's aggregate_power.py). Left undefined for
+    // rows predating the field so downstream chart code can distinguish
+    // "no measurement" from "0 W" via createChartDataPoint's typeof guard.
+    avg_power_w: m.avg_power_w,
+    joules_per_output_token: m.joules_per_output_token,
     disagg: row.disagg,
     num_prefill_gpu: row.num_prefill_gpu,
     num_decode_gpu: row.num_decode_gpu,
diff --git a/packages/app/src/lib/chart-utils.test.ts b/packages/app/src/lib/chart-utils.test.ts
index 28147802..aee6f7eb 100644
--- a/packages/app/src/lib/chart-utils.test.ts
+++ b/packages/app/src/lib/chart-utils.test.ts
@@ -1218,6 +1218,55 @@ describe('createChartDataPoint energy fields', () => {
   });
 });
 
+// ===========================================================================
+// createChartDataPoint — measured power / energy fields (from runner telemetry)
+// ===========================================================================
+describe('createChartDataPoint measured power fields', () => {
+  it('emits measuredAvgPower when avg_power_w is present on the entry', () => {
+    const e = entry({ avg_power_w: 685.5 });
+    const point = createChartDataPoint('2025-01-01', e, 'median_e2el', 'tput_per_gpu', 'h100');
+    expect(point.measuredAvgPower).toBeDefined();
+    expect(point.measuredAvgPower!.y).toBe(685.5);
+    expect(point.measuredAvgPower!.roof).toBe(false);
+  });
+
+  it('emits measuredJPerOutputToken when joules_per_output_token is present', () => {
+    const e = entry({ joules_per_output_token: 8.4 });
+    const point = createChartDataPoint('2025-01-01', e, 'median_e2el', 'tput_per_gpu', 'h100');
+    expect(point.measuredJPerOutputToken).toBeDefined();
+    expect(point.measuredJPerOutputToken!.y).toBe(8.4);
+  });
+
+  it('omits both fields when neither is on the entry', () => {
+    // Legacy runs predating aggregate_power.py.
+    const point = createChartDataPoint(
+      '2025-01-01',
+      entry(),
+      'median_e2el',
+      'tput_per_gpu',
+      'h100',
+    );
+    expect(point.measuredAvgPower).toBeUndefined();
+    expect(point.measuredJPerOutputToken).toBeUndefined();
+  });
+
+  it('emits one and omits the other when only one is present', () => {
+    // Defensive: aggregator can patch only avg_power_w if total_output_tokens=0.
+    const e = entry({ avg_power_w: 500 });
+    const point = createChartDataPoint('2025-01-01', e, 'median_e2el', 'tput_per_gpu', 'h100');
+    expect(point.measuredAvgPower).toBeDefined();
+    expect(point.measuredJPerOutputToken).toBeUndefined();
+  });
+
+  it('preserves a zero measured power value (not falsy-coerced away)', () => {
+    // Guards against a refactor switching the gate from typeof===number to truthiness.
+    const e = entry({ avg_power_w: 0 });
+    const point = createChartDataPoint('2025-01-01', e, 'median_e2el', 'tput_per_gpu', 'h100');
+    expect(point.measuredAvgPower).toBeDefined();
+    expect(point.measuredAvgPower!.y).toBe(0);
+  });
+});
+
 // ===========================================================================
 // createChartDataPoint — boolean narrowing for prefill/decode dp_attention, is_multinode
 // ===========================================================================
diff --git a/packages/app/src/lib/chart-utils.ts b/packages/app/src/lib/chart-utils.ts
index 554bb7a5..c7b6e04b 100644
--- a/packages/app/src/lib/chart-utils.ts
+++ b/packages/app/src/lib/chart-utils.ts
@@ -148,6 +148,10 @@ export const Y_AXIS_METRICS = [
   'y_jTotal',
   'y_jOutput',
   'y_jInput',
+  // Measured power / energy (sourced from runner's aggregate_power.py output;
+  // distinct from the spec-sheet TDP-derived jTotal/jOutput/jInput above).
+  'y_measuredAvgPower',
+  'y_measuredJPerOutputToken',
 ] as const;
 
 export type YAxisMetric = (typeof Y_AXIS_METRICS)[number];
@@ -389,6 +393,16 @@ export function createChartDataPoint(
           },
         }
       : {}),
+
+    // Measured power / energy from runner's aggregate_power.py. Gated on the
+    // raw fields existing so points from runs predating the measurement land
+    // without these keys and the chart correctly filters them out.
+    ...(typeof entry.avg_power_w === 'number'
+      ? { measuredAvgPower: { y: entry.avg_power_w, roof: false } }
+      : {}),
+    ...(typeof entry.joules_per_output_token === 'number'
+      ? { measuredJPerOutputToken: { y: entry.joules_per_output_token, roof: false } }
+      : {}),
   };
 }
 
@@ -549,7 +563,9 @@ export const calculateRoofline = (
     | `costri.y`
     | `jTotal.y`
     | `jOutput.y`
-    | `jInput.y`,
+    | `jInput.y`
+    | `measuredAvgPower.y`
+    | `measuredJPerOutputToken.y`,
   rooflineDirection: 'upper_right' | 'upper_left' | 'lower_left' | 'lower_right',
 ): InferenceData[] => {
   const pointsForRoofline = points.map((p) => {
@@ -619,7 +635,9 @@ export function computeAllRooflines(
             | `costri.y`
             | `jTotal.y`
             | `jOutput.y`
-            | `jInput.y`,
+            | `jInput.y`
+            | `measuredAvgPower.y`
+            | `measuredJPerOutputToken.y`,
           rooflineDirection,
         );
       }
@@ -663,6 +681,8 @@ export function markRooflinePoints(
       if (newPoint.jTotal) newPoint.jTotal.roof = false;
       if (newPoint.jOutput) newPoint.jOutput.roof = false;
       if (newPoint.jInput) newPoint.jInput.roof = false;
+      if (newPoint.measuredAvgPower) newPoint.measuredAvgPower.roof = false;
+      if (newPoint.measuredJPerOutputToken) newPoint.measuredJPerOutputToken.roof = false;
 
       for (const chartDefYKey of Y_AXIS_METRICS) {
         const rooflinePoints = computedRooflines[hwKey]?.[chartDefYKey];
@@ -722,6 +742,13 @@ export function markRooflinePoints(
           newPoint.jOutput.roof = onCurrentRoofline;
         } else if (chartDefYKey === 'y_jInput' && newPoint.jInput) {
           newPoint.jInput.roof = onCurrentRoofline;
+        } else if (chartDefYKey === 'y_measuredAvgPower' && newPoint.measuredAvgPower) {
+          newPoint.measuredAvgPower.roof = onCurrentRoofline;
+        } else if (
+          chartDefYKey === 'y_measuredJPerOutputToken' &&
+          newPoint.measuredJPerOutputToken
+        ) {
+          newPoint.measuredJPerOutputToken.roof = onCurrentRoofline;
         }
       }
       finalProcessedData.push(newPoint);
diff --git a/packages/constants/src/metric-keys.ts b/packages/constants/src/metric-keys.ts
index cf2c4d0b..bc2a5e65 100644
--- a/packages/constants/src/metric-keys.ts
+++ b/packages/constants/src/metric-keys.ts
@@ -43,4 +43,9 @@ export const METRIC_KEYS = new Set([
   'p99_intvty',
   'p99.9_intvty',
   'std_intvty',
+  // measured power / energy (emitted by runner's aggregate_power.py)
+  // avg_power_w: mean per-GPU draw (W) during the load window
+  // joules_per_output_token: avg_power_w * num_gpus * duration / total_output_tokens
+  'avg_power_w',
+  'joules_per_output_token',
 ]);

From 19f9ab1f8779af7e2875732663c6ab57ed4c875c Mon Sep 17 00:00:00 2001
From: Aryan <aryan@gupta-inc.com>
Date: Thu, 21 May 2026 19:03:14 -0700
Subject: [PATCH 2/3] docs(inference): correct y_measuredAvgPower_roofline
 comment

The previous comment claimed avg-power has no roofline because the field
is omitted from the chart config. That's wrong: ScatterGraph.tsx:270-277
falls back to paretoFrontLowerRight when the direction is undefined, so
the chart actually draws a lower_right roofline by default.

The behavior happens to be sensible ("lower power at the same
interactivity is more efficient"), so leaving it as-is and just fixing
the comment to describe what the code actually does.
---
 packages/app/src/components/inference/types.ts | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/packages/app/src/components/inference/types.ts b/packages/app/src/components/inference/types.ts
index a33122c9..83926fe1 100644
--- a/packages/app/src/components/inference/types.ts
+++ b/packages/app/src/components/inference/types.ts
@@ -293,9 +293,10 @@ export interface ChartDefinition {
   y_measuredAvgPower?: string;
   y_measuredAvgPower_label?: string;
   y_measuredAvgPower_title?: string;
-  // Intentionally no roofline for avg power: there's no universal "better"
-  // direction for absolute draw. Omitting roofline causes computeAllRooflines
-  // to skip the metric (it requires a direction); points render plain.
+  // Not explicitly set in the config — ScatterGraph falls back to lower_right
+  // (matches "lower power at the same interactivity is more efficient").
+  // The field stays in the type for parity with the other y_* metrics and
+  // so a future config can override the default.
   y_measuredAvgPower_roofline?: 'upper_right' | 'upper_left' | 'lower_left' | 'lower_right';
   y_measuredJPerOutputToken?: string;
   y_measuredJPerOutputToken_label?: string;

From aff6054cb472368fe72581bde4e61492bb7f0b7a Mon Sep 17 00:00:00 2001
From: Aryan <aryan@gupta-inc.com>
Date: Fri, 22 May 2026 13:19:23 -0700
Subject: [PATCH 3/3] feat(inference): gate measured-energy dropdown behind
 feature flag
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Wires the new "Measured Energy" chart options into the existing
↑↑↓↓ feature-gate so they can be toggled without a deploy.

Two changes:

1. Extract useFeatureGate to a shared hook at lib/use-feature-gate.ts.
   It was previously private to tab-nav.tsx; three other components
   (gpu-power, submissions, feedback) read the same localStorage key
   and DOM events ad-hoc. Now there's one source of truth, and the
   key + event names are exported as named constants so other
   consumers can migrate when convenient. No behavior change for
   tab-nav.

2. ChartControls METRIC_GROUPS entries gain an optional `gated` flag.
   Inside the component, the group list, the metric→group map, and
   the dropdown options are derived via useMemo from useFeatureGate(),
   so toggling the gate flips visibility without a page reload. The
   Measured Energy group is marked `gated: true`; the rest stay
   as-is.

Verified:
- pnpm typecheck clean
- pnpm test:unit 1921/1921
- Pre-commit hook (lint + format + typecheck) passes
---
 .../components/inference/ui/ChartControls.tsx | 59 +++++++++++------
 packages/app/src/components/tab-nav.tsx       | 51 +-------------
 packages/app/src/lib/use-feature-gate.ts      | 66 +++++++++++++++++++
 3 files changed, 108 insertions(+), 68 deletions(-)
 create mode 100644 packages/app/src/lib/use-feature-gate.ts

diff --git a/packages/app/src/components/inference/ui/ChartControls.tsx b/packages/app/src/components/inference/ui/ChartControls.tsx
index 3b5926ca..316e4ae8 100644
--- a/packages/app/src/components/inference/ui/ChartControls.tsx
+++ b/packages/app/src/components/inference/ui/ChartControls.tsx
@@ -1,8 +1,9 @@
 'use client';
 
-import { useState } from 'react';
+import { useMemo, useState } from 'react';
 
 import { track } from '@/lib/analytics';
+import { useFeatureGate } from '@/lib/use-feature-gate';
 
 import { useInference } from '@/components/inference/InferenceContext';
 import {
@@ -26,8 +27,15 @@ import chartDefinitions from '@/components/inference/inference-chart-config.json
 import type { ChartDefinition } from '@/components/inference/types';
 import type { Model, Sequence } from '@/lib/data-mappings';
 
-// Build Y-axis metric options from static chart config JSON — available immediately, no API wait
-const METRIC_GROUPS = [
+/**
+ * Y-axis metric options from static chart config JSON — available immediately, no API wait.
+ *
+ * Groups marked `gated: true` are hidden unless the konami-code feature gate is unlocked
+ * (see useFeatureGate). Use this for surfaces that are wired but whose underlying data
+ * pipeline is in the rollout phase (e.g. measured-power telemetry waiting on a runner-
+ * side aggregation PR to start populating the DB).
+ */
+const METRIC_GROUPS: { label: string; metrics: string[]; gated?: boolean }[] = [
   {
     label: 'Throughput',
     metrics: [
@@ -49,6 +57,7 @@ const METRIC_GROUPS = [
   {
     label: 'Measured Energy',
     metrics: ['y_measuredAvgPower', 'y_measuredJPerOutputToken'],
+    gated: true,
   },
   { label: 'Custom User Values', metrics: ['y_costUser', 'y_powerUser'] },
 ];
@@ -65,18 +74,6 @@ const METRIC_TITLE_MAP = (() => {
   return map;
 })();
 
-/** Map from metric key → group label (e.g. "Throughput", "Cost per Million Total Tokens") */
-const METRIC_GROUP_MAP = new Map<string, string>(
-  METRIC_GROUPS.flatMap((g) => g.metrics.map((m) => [m, g.label] as const)),
-);
-
-const GROUPED_Y_AXIS_OPTIONS = METRIC_GROUPS.map((group) => ({
-  groupLabel: group.label,
-  options: group.metrics
-    .filter((m) => METRIC_TITLE_MAP.has(m))
-    .map((m) => ({ value: m, label: METRIC_TITLE_MAP.get(m)! })),
-})).filter((g) => g.options.length > 0);
-
 interface ChartControlsProps {
   /** Hide GPU Config selector and related date pickers (used by Historical Trends tab) */
   hideGpuComparison?: boolean;
@@ -117,8 +114,32 @@ export default function ChartControls({ hideGpuComparison = false }: ChartContro
     setScaleType,
   } = useInference();
 
-  // Y-axis metric options — built from static chart config JSON (no API dependency)
-  const groupedYAxisOptions = GROUPED_Y_AXIS_OPTIONS;
+  // Y-axis metric options — built from static chart config JSON (no API dependency).
+  // Hidden groups (Measured Energy) appear only after the ↑↑↓↓ feature gate unlocks.
+  const featureGateUnlocked = useFeatureGate();
+  const visibleGroups = useMemo(
+    () => METRIC_GROUPS.filter((g) => !g.gated || featureGateUnlocked),
+    [featureGateUnlocked],
+  );
+  const metricGroupMap = useMemo(
+    () =>
+      new Map<string, string>(
+        visibleGroups.flatMap((g) => g.metrics.map((m) => [m, g.label] as const)),
+      ),
+    [visibleGroups],
+  );
+  const groupedYAxisOptions = useMemo(
+    () =>
+      visibleGroups
+        .map((group) => ({
+          groupLabel: group.label,
+          options: group.metrics
+            .filter((m) => METRIC_TITLE_MAP.has(m))
+            .map((m) => ({ value: m, label: METRIC_TITLE_MAP.get(m)! })),
+        }))
+        .filter((g) => g.options.length > 0),
+    [visibleGroups],
+  );
 
   const trackCombinedFilters = () => {
     if (selectedModel && selectedSequence && selectedPrecisions.length > 0 && selectedYAxisMetric) {
@@ -128,7 +149,7 @@ export default function ChartControls({ hideGpuComparison = false }: ChartContro
         precision: selectedPrecisions.join(','),
         yAxisMetric: selectedYAxisMetric,
         yAxisMetricLabel: METRIC_TITLE_MAP.get(selectedYAxisMetric) ?? selectedYAxisMetric,
-        yAxisMetricGroup: METRIC_GROUP_MAP.get(selectedYAxisMetric) ?? 'Unknown',
+        yAxisMetricGroup: metricGroupMap.get(selectedYAxisMetric) ?? 'Unknown',
       });
     }
   };
@@ -163,7 +184,7 @@ export default function ChartControls({ hideGpuComparison = false }: ChartContro
     track('inference_y_axis_metric_selected', {
       metric: value,
       metric_label: METRIC_TITLE_MAP.get(value) ?? value,
-      metric_group: METRIC_GROUP_MAP.get(value) ?? 'Unknown',
+      metric_group: metricGroupMap.get(value) ?? 'Unknown',
     });
     setTimeout(trackCombinedFilters, 0);
   };
diff --git a/packages/app/src/components/tab-nav.tsx b/packages/app/src/components/tab-nav.tsx
index 52c43c61..cb7142db 100644
--- a/packages/app/src/components/tab-nav.tsx
+++ b/packages/app/src/components/tab-nav.tsx
@@ -3,9 +3,10 @@
 import { ChevronDown } from 'lucide-react';
 import Link from 'next/link';
 import { usePathname, useRouter } from 'next/navigation';
-import { useContext, useEffect, useRef, useState } from 'react';
+import { useContext, useEffect, useState } from 'react';
 
 import { track } from '@/lib/analytics';
+import { useFeatureGate } from '@/lib/use-feature-gate';
 import { Card } from '@/components/ui/card';
 import { Label } from '@/components/ui/label';
 import { Popover, PopoverContent, PopoverTrigger } from '@/components/ui/popover';
@@ -22,54 +23,6 @@ import {
 import { UnofficialRunContext } from '@/components/unofficial-run-provider';
 import { cn } from '@/lib/utils';
 
-const FEATURE_GATE_KEY = 'inferencex-feature-gate';
-const UNLOCK_SEQUENCE = ['ArrowUp', 'ArrowUp', 'ArrowDown', 'ArrowDown'];
-
-function useFeatureGate(): boolean {
-  const [unlocked, setUnlocked] = useState(false);
-  const sequenceRef = useRef<string[]>([]);
-
-  useEffect(() => {
-    if (typeof window !== 'undefined' && localStorage.getItem(FEATURE_GATE_KEY) === '1') {
-      setUnlocked(true);
-    }
-  }, []);
-
-  useEffect(() => {
-    if (unlocked) return;
-    const handleKeyDown = (e: KeyboardEvent) => {
-      sequenceRef.current.push(e.key);
-      if (sequenceRef.current.length > UNLOCK_SEQUENCE.length) {
-        sequenceRef.current = sequenceRef.current.slice(-UNLOCK_SEQUENCE.length);
-      }
-      if (
-        sequenceRef.current.length === UNLOCK_SEQUENCE.length &&
-        sequenceRef.current.every((k, i) => k === UNLOCK_SEQUENCE[i])
-      ) {
-        localStorage.setItem(FEATURE_GATE_KEY, '1');
-        setUnlocked(true);
-        window.dispatchEvent(new Event('inferencex:feature-gate:unlocked'));
-        track('feature_gate_unlocked');
-      }
-    };
-    window.addEventListener('keydown', handleKeyDown);
-    return () => window.removeEventListener('keydown', handleKeyDown);
-  }, [unlocked]);
-
-  useEffect(() => {
-    const handleLock = () => setUnlocked(false);
-    const handleUnlock = () => setUnlocked(true);
-    window.addEventListener('inferencex:feature-gate:locked', handleLock);
-    window.addEventListener('inferencex:feature-gate:unlocked', handleUnlock);
-    return () => {
-      window.removeEventListener('inferencex:feature-gate:locked', handleLock);
-      window.removeEventListener('inferencex:feature-gate:unlocked', handleUnlock);
-    };
-  }, []);
-
-  return unlocked;
-}
-
 const VISIBLE_TABS = [
   { href: '/inference', label: 'Inference Performance', testId: 'tab-trigger-inference' },
   { href: '/evaluation', label: 'Accuracy Evals', testId: 'tab-trigger-evaluation' },
diff --git a/packages/app/src/lib/use-feature-gate.ts b/packages/app/src/lib/use-feature-gate.ts
new file mode 100644
index 00000000..d940b31e
--- /dev/null
+++ b/packages/app/src/lib/use-feature-gate.ts
@@ -0,0 +1,66 @@
+'use client';
+
+import { useEffect, useRef, useState } from 'react';
+
+import { track } from '@/lib/analytics';
+
+export const FEATURE_GATE_KEY = 'inferencex-feature-gate';
+export const FEATURE_GATE_UNLOCKED_EVENT = 'inferencex:feature-gate:unlocked';
+export const FEATURE_GATE_LOCKED_EVENT = 'inferencex:feature-gate:locked';
+
+const UNLOCK_SEQUENCE = ['ArrowUp', 'ArrowUp', 'ArrowDown', 'ArrowDown'];
+
+/**
+ * Konami-style ↑↑↓↓ unlock for hidden features. State persists in
+ * localStorage and is shared across components via custom DOM events
+ * (FEATURE_GATE_UNLOCKED_EVENT / FEATURE_GATE_LOCKED_EVENT) so all
+ * consumers flip together without each owning a keyboard listener.
+ *
+ * Used by tab-nav (GATED_TABS), gpu-power, submissions, feedback,
+ * and any chart surface that should be visible only to insiders
+ * until the underlying data is stable.
+ */
+export function useFeatureGate(): boolean {
+  const [unlocked, setUnlocked] = useState(false);
+  const sequenceRef = useRef<string[]>([]);
+
+  useEffect(() => {
+    if (typeof window !== 'undefined' && localStorage.getItem(FEATURE_GATE_KEY) === '1') {
+      setUnlocked(true);
+    }
+  }, []);
+
+  useEffect(() => {
+    if (unlocked) return;
+    const handleKeyDown = (e: KeyboardEvent) => {
+      sequenceRef.current.push(e.key);
+      if (sequenceRef.current.length > UNLOCK_SEQUENCE.length) {
+        sequenceRef.current = sequenceRef.current.slice(-UNLOCK_SEQUENCE.length);
+      }
+      if (
+        sequenceRef.current.length === UNLOCK_SEQUENCE.length &&
+        sequenceRef.current.every((k, i) => k === UNLOCK_SEQUENCE[i])
+      ) {
+        localStorage.setItem(FEATURE_GATE_KEY, '1');
+        setUnlocked(true);
+        window.dispatchEvent(new Event(FEATURE_GATE_UNLOCKED_EVENT));
+        track('feature_gate_unlocked');
+      }
+    };
+    window.addEventListener('keydown', handleKeyDown);
+    return () => window.removeEventListener('keydown', handleKeyDown);
+  }, [unlocked]);
+
+  useEffect(() => {
+    const handleLock = () => setUnlocked(false);
+    const handleUnlock = () => setUnlocked(true);
+    window.addEventListener(FEATURE_GATE_LOCKED_EVENT, handleLock);
+    window.addEventListener(FEATURE_GATE_UNLOCKED_EVENT, handleUnlock);
+    return () => {
+      window.removeEventListener(FEATURE_GATE_LOCKED_EVENT, handleLock);
+      window.removeEventListener(FEATURE_GATE_UNLOCKED_EVENT, handleUnlock);
+    };
+  }, []);
+
+  return unlocked;
+}