diff --git a/packages/app/src/components/inference/inference-chart-config.json b/packages/app/src/components/inference/inference-chart-config.json index e26d237e..1ca1c440 100644 --- a/packages/app/src/components/inference/inference-chart-config.json +++ b/packages/app/src/components/inference/inference-chart-config.json @@ -88,6 +88,13 @@ "y_jInput_label": "All-in Provisioned J per Input Token (J/tok)", "y_jInput_title": "All-in Provisioned Joules per Input Token", "y_jInput_roofline": "lower_right", + "y_measuredAvgPower": "measuredAvgPower.y", + "y_measuredAvgPower_label": "Measured Avg Power per GPU (W)", + "y_measuredAvgPower_title": "Measured Average Power per GPU", + "y_measuredJPerOutputToken": "measuredJPerOutputToken.y", + "y_measuredJPerOutputToken_label": "Measured J per Output Token (J/tok)", + "y_measuredJPerOutputToken_title": "Measured Joules per Output Token", + "y_measuredJPerOutputToken_roofline": "lower_right", "y_cost_limit": 5, "y_latency_limit": 60 }, @@ -179,6 +186,13 @@ "y_jInput_label": "All-in Provisioned J per Input Token (J/tok)", "y_jInput_title": "All-in Provisioned Joules per Input Token", "y_jInput_roofline": "lower_left", + "y_measuredAvgPower": "measuredAvgPower.y", + "y_measuredAvgPower_label": "Measured Avg Power per GPU (W)", + "y_measuredAvgPower_title": "Measured Average Power per GPU", + "y_measuredJPerOutputToken": "measuredJPerOutputToken.y", + "y_measuredJPerOutputToken_label": "Measured J per Output Token (J/tok)", + "y_measuredJPerOutputToken_title": "Measured Joules per Output Token", + "y_measuredJPerOutputToken_roofline": "lower_left", "y_cost_limit": 5, "y_latency_limit": 60 } diff --git a/packages/app/src/components/inference/types.ts b/packages/app/src/components/inference/types.ts index 5b5f9ec2..83926fe1 100644 --- a/packages/app/src/components/inference/types.ts +++ b/packages/app/src/components/inference/types.ts @@ -67,6 +67,10 @@ export interface AggDataEntry { median_e2el: number; std_e2el: number; p99_e2el: number; + // Measured GPU telemetry (emitted by runner's aggregate_power.py). + // Optional because historical runs predate the field. + avg_power_w?: number; + joules_per_output_token?: number; disagg: boolean; num_prefill_gpu: number; num_decode_gpu: number; @@ -152,6 +156,12 @@ export interface InferenceData extends Partial