From 5aa45206b029d9bdb289415a094a3bfae8f6fa70 Mon Sep 17 00:00:00 2001
From: Yeswanth K <yeswanthk@nvidia.com>
Date: Fri, 22 May 2026 13:33:37 -0700
Subject: [PATCH 1/3] [GB300][B300][vLLM] Add MiniMax-M2.5 FP4 disagg Dynamo
 configs

Port PR57 MiniMax-M2.5 FP4 disaggregated multi-node vLLM recipes for GB300 and B300 to SA upstream, including runner wiring and changelog entries while keeping SA-default GB300 account/partition/sqsh paths.
---
 .github/configs/nvidia-master.yaml            | 387 ++++++++++++++++++
 .../minimax-m2.5-b300/1k1k/dep2-1p2d.yaml     |  70 ++++
 .../1k1k/dep2-2p3d-c6144.yaml                 |  70 ++++
 .../minimax-m2.5-b300/1k1k/dep2-2p3d.yaml     |  70 ++++
 .../minimax-m2.5-b300/1k1k/dep8-2p1d.yaml     |  69 ++++
 .../vllm/minimax-m2.5-b300/1k1k/tp4-1p1d.yaml |  71 ++++
 .../vllm/minimax-m2.5-b300/1k1k/tp4-1p2d.yaml |  67 +++
 .../minimax-m2.5-b300/1k1k/tp4ep-1p1d.yaml    |  69 ++++
 .../minimax-m2.5-b300/1k1k/tp4ep-1p3d.yaml    |  67 +++
 .../vllm/minimax-m2.5-b300/1k1k/tp8-1p1d.yaml |  76 ++++
 .../minimax-m2.5-b300/8k1k/dep4-4p1d.yaml     |  69 ++++
 .../minimax-m2.5-b300/8k1k/dep8-4p1d.yaml     |  69 ++++
 .../vllm/minimax-m2.5-b300/8k1k/tp4-1p1d.yaml |  67 +++
 .../minimax-m2.5-b300/8k1k/tp4ep-1p1d.yaml    |  67 +++
 .../minimax-m2.5-b300/8k1k/tp4ep-2p1d.yaml    |  67 +++
 .../vllm/minimax-m2.5-b300/8k1k/tp8-1p1d.yaml |  71 ++++
 .../minimax-m2.5/1k1k/dep2-2p3d-c6144.yaml    |  71 ++++
 .../vllm/minimax-m2.5/1k1k/dep2-2p3d.yaml     |  71 ++++
 .../vllm/minimax-m2.5/1k1k/dep8-2p1d.yaml     |  68 +++
 .../vllm/minimax-m2.5/1k1k/tp4-1p1d.yaml      |  70 ++++
 .../vllm/minimax-m2.5/1k1k/tp4-1p2d.yaml      |  66 +++
 .../vllm/minimax-m2.5/1k1k/tp4ep-1p1d.yaml    |  68 +++
 .../vllm/minimax-m2.5/1k1k/tp4ep-1p3d.yaml    |  66 +++
 .../vllm/minimax-m2.5/8k1k/dep4-4p1d.yaml     |  68 +++
 .../vllm/minimax-m2.5/8k1k/dep8-4p1d.yaml     |  68 +++
 .../vllm/minimax-m2.5/8k1k/tp4-1p1d.yaml      |  66 +++
 .../vllm/minimax-m2.5/8k1k/tp4ep-1p1d.yaml    |  66 +++
 .../vllm/minimax-m2.5/8k1k/tp4ep-2p1d.yaml    |  66 +++
 perf-changelog.yaml                           |  18 +
 runners/launch_b300-nv.sh                     |  20 +-
 runners/launch_gb300-nv.sh                    |  32 +-
 31 files changed, 2307 insertions(+), 3 deletions(-)
 create mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/dep2-1p2d.yaml
 create mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/dep2-2p3d-c6144.yaml
 create mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/dep2-2p3d.yaml
 create mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/dep8-2p1d.yaml
 create mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp4-1p1d.yaml
 create mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp4-1p2d.yaml
 create mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp4ep-1p1d.yaml
 create mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp4ep-1p3d.yaml
 create mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp8-1p1d.yaml
 create mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/dep4-4p1d.yaml
 create mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/dep8-4p1d.yaml
 create mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/tp4-1p1d.yaml
 create mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/tp4ep-1p1d.yaml
 create mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/tp4ep-2p1d.yaml
 create mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/tp8-1p1d.yaml
 create mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/dep2-2p3d-c6144.yaml
 create mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/dep2-2p3d.yaml
 create mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/dep8-2p1d.yaml
 create mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/tp4-1p1d.yaml
 create mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/tp4-1p2d.yaml
 create mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/tp4ep-1p1d.yaml
 create mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/tp4ep-1p3d.yaml
 create mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/8k1k/dep4-4p1d.yaml
 create mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/8k1k/dep8-4p1d.yaml
 create mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/8k1k/tp4-1p1d.yaml
 create mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/8k1k/tp4ep-1p1d.yaml
 create mode 100644 benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/8k1k/tp4ep-2p1d.yaml

diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml
index cf6709258..c5f721e27 100644
--- a/.github/configs/nvidia-master.yaml
+++ b/.github/configs/nvidia-master.yaml
@@ -9430,3 +9430,390 @@ glm5-fp4-gb300-dynamo-sglang:
           tp: 4
           ep: 1
           dp-attn: false
+
+minimaxm2.5-fp4-gb300-dynamo-vllm:
+  image: vllm/vllm-openai:v0.20.1
+  model: nvidia/MiniMax-M2.5-NVFP4
+  model-prefix: minimaxm2.5
+  runner: gb300
+  precision: fp4
+  framework: dynamo-vllm
+  multinode: true
+  disagg: true
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - conc-list: [2, 4, 16]
+        prefill:
+          num-worker: 1
+          tp: 1
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/vllm/minimax-m2.5/1k1k/tp4-1p1d.yaml"
+        decode:
+          num-worker: 1
+          tp: 4
+          ep: 1
+          dp-attn: false
+      - conc-list: [4, 8, 16, 64]
+        prefill:
+          num-worker: 1
+          tp: 1
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/vllm/minimax-m2.5/1k1k/tp4-1p2d.yaml"
+        decode:
+          num-worker: 2
+          tp: 4
+          ep: 1
+          dp-attn: false
+      - conc-list: [32, 64, 128]
+        prefill:
+          num-worker: 1
+          tp: 1
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/vllm/minimax-m2.5/1k1k/tp4ep-1p1d.yaml"
+        decode:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: false
+      - conc-list: [64, 128, 256, 512, 1024]
+        prefill:
+          num-worker: 1
+          tp: 1
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/vllm/minimax-m2.5/1k1k/tp4ep-1p3d.yaml"
+        decode:
+          num-worker: 3
+          tp: 4
+          ep: 4
+          dp-attn: false
+      - conc-list: [2048]
+        prefill:
+          num-worker: 2
+          tp: 1
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/vllm/minimax-m2.5/1k1k/dep2-2p3d.yaml"
+        decode:
+          num-worker: 3
+          tp: 2
+          ep: 2
+          dp-attn: true
+      - conc-list: [6144, 8192]
+        prefill:
+          num-worker: 2
+          tp: 1
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/vllm/minimax-m2.5/1k1k/dep2-2p3d-c6144.yaml"
+        decode:
+          num-worker: 3
+          tp: 2
+          ep: 2
+          dp-attn: true
+      - conc-list: [1024, 2048, 4096]
+        prefill:
+          num-worker: 2
+          tp: 1
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/vllm/minimax-m2.5/1k1k/dep8-2p1d.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - conc-list: [2, 4, 8, 16]
+        prefill:
+          num-worker: 1
+          tp: 1
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/vllm/minimax-m2.5/8k1k/tp4-1p1d.yaml"
+        decode:
+          num-worker: 1
+          tp: 4
+          ep: 1
+          dp-attn: false
+      - conc-list: [32, 64, 128, 256]
+        prefill:
+          num-worker: 1
+          tp: 1
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/vllm/minimax-m2.5/8k1k/tp4ep-1p1d.yaml"
+        decode:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: false
+      - conc-list: [64, 128]
+        prefill:
+          num-worker: 2
+          tp: 1
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/vllm/minimax-m2.5/8k1k/tp4ep-2p1d.yaml"
+        decode:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: false
+      - conc-list: [256]
+        prefill:
+          num-worker: 4
+          tp: 1
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/vllm/minimax-m2.5/8k1k/dep4-4p1d.yaml"
+        decode:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+      - conc-list: [1024, 2048]
+        prefill:
+          num-worker: 4
+          tp: 1
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/vllm/minimax-m2.5/8k1k/dep8-4p1d.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+
+minimaxm2.5-fp4-b300-dynamo-vllm:
+  image: vllm/vllm-openai:v0.20.1
+  model: nvidia/MiniMax-M2.5-NVFP4
+  model-prefix: minimaxm2.5
+  runner: b300
+  precision: fp4
+  framework: dynamo-vllm
+  multinode: true
+  disagg: true
+  scenarios:
+    fixed-seq-len:
+    - isl: 1024
+      osl: 1024
+      search-space:
+      - conc-list: [4, 16]
+        prefill:
+          num-worker: 1
+          tp: 1
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/vllm/minimax-m2.5/1k1k/tp4-1p1d.yaml"
+        decode:
+          num-worker: 1
+          tp: 4
+          ep: 1
+          dp-attn: false
+      - conc-list: [4]
+        prefill:
+          num-worker: 1
+          tp: 1
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/vllm/minimax-m2.5/1k1k/tp8-1p1d.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+      - conc-list: [8, 16]
+        prefill:
+          num-worker: 1
+          tp: 1
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/vllm/minimax-m2.5/1k1k/tp4-1p2d.yaml"
+        decode:
+          num-worker: 2
+          tp: 4
+          ep: 1
+          dp-attn: false
+      - conc-list: [32, 64, 128]
+        prefill:
+          num-worker: 1
+          tp: 1
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/vllm/minimax-m2.5/1k1k/tp4ep-1p1d.yaml"
+        decode:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: false
+      - conc-list: [64, 128, 256, 1024]
+        prefill:
+          num-worker: 1
+          tp: 1
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/vllm/minimax-m2.5/1k1k/tp4ep-1p3d.yaml"
+        decode:
+          num-worker: 3
+          tp: 4
+          ep: 4
+          dp-attn: false
+      - conc-list: [4096]
+        prefill:
+          num-worker: 1
+          tp: 1
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/vllm/minimax-m2.5/1k1k/dep2-1p2d.yaml"
+        decode:
+          num-worker: 2
+          tp: 2
+          ep: 2
+          dp-attn: true
+      - conc-list: [2048, 4096]
+        prefill:
+          num-worker: 2
+          tp: 1
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/vllm/minimax-m2.5/1k1k/dep2-2p3d.yaml"
+        decode:
+          num-worker: 3
+          tp: 2
+          ep: 2
+          dp-attn: true
+      - conc-list: [6144, 8192]
+        prefill:
+          num-worker: 2
+          tp: 1
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/vllm/minimax-m2.5/1k1k/dep2-2p3d-c6144.yaml"
+        decode:
+          num-worker: 3
+          tp: 2
+          ep: 2
+          dp-attn: true
+      - conc-list: [1024, 1536, 2048, 4096]
+        prefill:
+          num-worker: 2
+          tp: 1
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/vllm/minimax-m2.5/1k1k/dep8-2p1d.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
+    - isl: 8192
+      osl: 1024
+      search-space:
+      - conc-list: [2, 4, 8, 16]
+        prefill:
+          num-worker: 1
+          tp: 1
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/vllm/minimax-m2.5/8k1k/tp4-1p1d.yaml"
+        decode:
+          num-worker: 1
+          tp: 4
+          ep: 1
+          dp-attn: false
+      - conc-list: [4]
+        prefill:
+          num-worker: 1
+          tp: 1
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/vllm/minimax-m2.5/8k1k/tp8-1p1d.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 1
+          dp-attn: false
+      - conc-list: [32, 128]
+        prefill:
+          num-worker: 1
+          tp: 1
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/vllm/minimax-m2.5/8k1k/tp4ep-1p1d.yaml"
+        decode:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: false
+      - conc-list: [64, 128, 256, 512]
+        prefill:
+          num-worker: 2
+          tp: 1
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/vllm/minimax-m2.5/8k1k/tp4ep-2p1d.yaml"
+        decode:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: false
+      - conc-list: [384, 512]
+        prefill:
+          num-worker: 4
+          tp: 1
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/vllm/minimax-m2.5/8k1k/dep4-4p1d.yaml"
+        decode:
+          num-worker: 1
+          tp: 4
+          ep: 4
+          dp-attn: true
+      - conc-list: [384]
+        prefill:
+          num-worker: 4
+          tp: 1
+          ep: 1
+          dp-attn: false
+          additional-settings:
+          - "CONFIG_FILE=recipes/vllm/minimax-m2.5/8k1k/dep8-4p1d.yaml"
+        decode:
+          num-worker: 1
+          tp: 8
+          ep: 8
+          dp-attn: true
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/dep2-1p2d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/dep2-1p2d.yaml
new file mode 100644
index 000000000..f1badb09a
--- /dev/null
+++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/dep2-1p2d.yaml
@@ -0,0 +1,70 @@
+name: "minimax-m2.5-vllm-disagg-b300-decode-2xdep2"
+
+model:
+  path: "minimax-m2.5-nvfp4"
+  container: "vllm/vllm-openai:v0.20.1"
+  precision: "fp4"
+
+dynamo:
+  install: true
+  wheel: "1.2.0.dev20260501"
+
+setup_script: install-deps.sh
+
+resources:
+  gpu_type: "b300"
+  gpus_per_node: 8
+  prefill_nodes: 1
+  decode_nodes: 1
+  prefill_workers: 1
+  decode_workers: 2
+  gpus_per_prefill: 1
+  gpus_per_decode: 2
+frontend:
+  type: dynamo
+  enable_multiple_frontends: false
+
+backend:
+  type: vllm
+  connector: null
+  allow_prefill_decode_colocation: true
+
+  prefill_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+    UCX_TLS: "cuda_copy,rc"
+
+  decode_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+    UCX_TLS: "cuda_copy,rc"
+
+  vllm_config:
+    prefill:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      trust-remote-code: true
+      no-enable-prefix-caching: true
+      max-model-len: 2048
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+
+    decode:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      data-parallel-size: 2
+      data-parallel-rpc-port: 13345
+      enable-expert-parallel: true
+      no-enable-prefix-caching: true
+      max-model-len: 2048
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+      max-num-seqs: 864
+      gpu-memory-utilization: 0.90
+
+benchmark:
+  type: "sa-bench"
+  isl: 1024
+  osl: 1024
+  concurrencies: "4096"
+  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/dep2-2p3d-c6144.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/dep2-2p3d-c6144.yaml
new file mode 100644
index 000000000..9ff89ade7
--- /dev/null
+++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/dep2-2p3d-c6144.yaml
@@ -0,0 +1,70 @@
+name: "minimax-m2.5-vllm-disagg-b300-decode-2p3xdep2-c6144"
+
+model:
+  path: "minimax-m2.5-nvfp4"
+  container: "vllm/vllm-openai:v0.20.1"
+  precision: "fp4"
+
+dynamo:
+  install: true
+  wheel: "1.2.0.dev20260501"
+
+setup_script: install-deps.sh
+
+resources:
+  gpu_type: "b300"
+  gpus_per_node: 8
+  prefill_nodes: 1
+  decode_nodes: 1
+  prefill_workers: 2
+  decode_workers: 3
+  gpus_per_prefill: 1
+  gpus_per_decode: 2
+frontend:
+  type: dynamo
+  enable_multiple_frontends: false
+
+backend:
+  type: vllm
+  connector: null
+  allow_prefill_decode_colocation: true
+
+  prefill_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+    UCX_TLS: "cuda_copy,rc"
+
+  decode_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+    UCX_TLS: "cuda_copy,rc"
+
+  vllm_config:
+    prefill:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      trust-remote-code: true
+      no-enable-prefix-caching: true
+      max-model-len: 2048
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+
+    decode:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      data-parallel-size: 2
+      data-parallel-rpc-port: 13345
+      enable-expert-parallel: true
+      no-enable-prefix-caching: true
+      max-model-len: 2048
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+      max-num-seqs: 864
+      gpu-memory-utilization: 0.90
+
+benchmark:
+  type: "sa-bench"
+  isl: 1024
+  osl: 1024
+  concurrencies: "6144x8192"
+  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/dep2-2p3d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/dep2-2p3d.yaml
new file mode 100644
index 000000000..c3765faf7
--- /dev/null
+++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/dep2-2p3d.yaml
@@ -0,0 +1,70 @@
+name: "minimax-m2.5-vllm-disagg-b300-decode-2p3xdep2"
+
+model:
+  path: "minimax-m2.5-nvfp4"
+  container: "vllm/vllm-openai:v0.20.1"
+  precision: "fp4"
+
+dynamo:
+  install: true
+  wheel: "1.2.0.dev20260501"
+
+setup_script: install-deps.sh
+
+resources:
+  gpu_type: "b300"
+  gpus_per_node: 8
+  prefill_nodes: 1
+  decode_nodes: 1
+  prefill_workers: 2
+  decode_workers: 3
+  gpus_per_prefill: 1
+  gpus_per_decode: 2
+frontend:
+  type: dynamo
+  enable_multiple_frontends: false
+
+backend:
+  type: vllm
+  connector: null
+  allow_prefill_decode_colocation: true
+
+  prefill_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+    UCX_TLS: "cuda_copy,rc"
+
+  decode_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+    UCX_TLS: "cuda_copy,rc"
+
+  vllm_config:
+    prefill:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      trust-remote-code: true
+      no-enable-prefix-caching: true
+      max-model-len: 2048
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+
+    decode:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      data-parallel-size: 2
+      data-parallel-rpc-port: 13345
+      enable-expert-parallel: true
+      no-enable-prefix-caching: true
+      max-model-len: 2048
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+      max-num-seqs: 864
+      gpu-memory-utilization: 0.90
+
+benchmark:
+  type: "sa-bench"
+  isl: 1024
+  osl: 1024
+  concurrencies: "2048x4096"
+  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/dep8-2p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/dep8-2p1d.yaml
new file mode 100644
index 000000000..b01600157
--- /dev/null
+++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/dep8-2p1d.yaml
@@ -0,0 +1,69 @@
+name: "minimax-m2.5-vllm-disagg-b300-decode-2p1xdep8"
+
+model:
+  path: "minimax-m2.5-nvfp4"
+  container: "vllm/vllm-openai:v0.20.1"
+  precision: "fp4"
+
+dynamo:
+  install: true
+  wheel: "1.2.0.dev20260501"
+
+setup_script: install-deps.sh
+
+resources:
+  gpu_type: "b300"
+  gpus_per_node: 8
+  prefill_nodes: 1
+  decode_nodes: 1
+  prefill_workers: 2
+  decode_workers: 1
+  gpus_per_prefill: 1
+  gpus_per_decode: 8
+
+frontend:
+  type: dynamo
+  enable_multiple_frontends: false
+
+backend:
+  type: vllm
+  connector: null
+  allow_prefill_decode_colocation: true
+
+  prefill_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+
+  decode_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+
+  vllm_config:
+    prefill:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      trust-remote-code: true
+      no-enable-prefix-caching: true
+      max-model-len: 2048
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+
+    decode:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      data-parallel-size: 8
+      data-parallel-rpc-port: 13345
+      enable-expert-parallel: true
+      no-enable-prefix-caching: true
+      max-model-len: 2048
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+      max-num-seqs: 864
+      gpu-memory-utilization: 0.90
+
+benchmark:
+  type: "sa-bench"
+  isl: 1024
+  osl: 1024
+  concurrencies: "1024x1536x2048x4096"
+  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp4-1p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp4-1p1d.yaml
new file mode 100644
index 000000000..04bf48494
--- /dev/null
+++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp4-1p1d.yaml
@@ -0,0 +1,71 @@
+name: "minimax-m2.5-vllm-disagg-b300-decode-focus-tp4-1p1d"
+
+model:
+  path: "minimax-m2.5-nvfp4"
+  container: "vllm/vllm-openai:v0.20.1"
+  precision: "fp4"
+
+dynamo:
+  install: true
+  wheel: "1.2.0.dev20260501"
+
+setup_script: install-deps.sh
+
+resources:
+  gpu_type: "b300"
+  gpus_per_node: 8
+  prefill_nodes: 1
+  decode_nodes: 1
+  prefill_workers: 1
+  decode_workers: 1
+  gpus_per_prefill: 1
+  gpus_per_decode: 4
+
+frontend:
+  type: dynamo
+  enable_multiple_frontends: false
+
+backend:
+  type: vllm
+  connector: null
+  allow_prefill_decode_colocation: true
+
+  prefill_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+    UCX_TLS: "cuda_copy,rc"
+    UCX_RCACHE_MAX_UNRELEASED: "1024"
+
+  decode_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+    UCX_TLS: "cuda_copy,rc"
+    UCX_RCACHE_MAX_UNRELEASED: "1024"
+
+  vllm_config:
+    prefill:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      trust-remote-code: true
+      no-enable-prefix-caching: true
+      max-model-len: 2048
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+
+    decode:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      tensor-parallel-size: 4
+      enable-expert-parallel: false
+      no-enable-prefix-caching: true
+      max-model-len: 2048
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+      gpu-memory-utilization: 0.90
+
+benchmark:
+  type: "sa-bench"
+  isl: 1024
+  osl: 1024
+  concurrencies: "4x16"
+  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp4-1p2d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp4-1p2d.yaml
new file mode 100644
index 000000000..9b524b7e6
--- /dev/null
+++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp4-1p2d.yaml
@@ -0,0 +1,67 @@
+name: "minimax-m2.5-vllm-disagg-b300-decode-focus-tp4-1p2d"
+
+model:
+  path: "minimax-m2.5-nvfp4"
+  container: "vllm/vllm-openai:v0.20.1"
+  precision: "fp4"
+
+dynamo:
+  install: true
+  wheel: "1.2.0.dev20260501"
+
+setup_script: install-deps.sh
+
+resources:
+  gpu_type: "b300"
+  gpus_per_node: 8
+  prefill_nodes: 1
+  decode_nodes: 1
+  prefill_workers: 1
+  decode_workers: 2
+  gpus_per_prefill: 1
+  gpus_per_decode: 4
+
+frontend:
+  type: dynamo
+  enable_multiple_frontends: false
+
+backend:
+  type: vllm
+  connector: null
+  allow_prefill_decode_colocation: true
+
+  prefill_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+
+  decode_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+
+  vllm_config:
+    prefill:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      trust-remote-code: true
+      no-enable-prefix-caching: true
+      max-model-len: 2048
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+
+    decode:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      tensor-parallel-size: 4
+      enable-expert-parallel: false
+      no-enable-prefix-caching: true
+      max-model-len: 2048
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+      gpu-memory-utilization: 0.90
+
+benchmark:
+  type: "sa-bench"
+  isl: 1024
+  osl: 1024
+  concurrencies: "8x16"
+  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp4ep-1p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp4ep-1p1d.yaml
new file mode 100644
index 000000000..f80f0fad3
--- /dev/null
+++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp4ep-1p1d.yaml
@@ -0,0 +1,69 @@
+name: "minimax-m2.5-vllm-disagg-b300-decode-focus-tp4ep-1p1d"
+
+model:
+  path: "minimax-m2.5-nvfp4"
+  container: "vllm/vllm-openai:v0.20.1"
+  precision: "fp4"
+
+dynamo:
+  install: true
+  wheel: "1.2.0.dev20260501"
+
+setup_script: install-deps.sh
+
+resources:
+  gpu_type: "b300"
+  gpus_per_node: 8
+  prefill_nodes: 1
+  decode_nodes: 1
+  prefill_workers: 1
+  decode_workers: 1
+  gpus_per_prefill: 1
+  gpus_per_decode: 4
+
+frontend:
+  type: dynamo
+  enable_multiple_frontends: false
+
+backend:
+  type: vllm
+  connector: null
+  allow_prefill_decode_colocation: true
+
+  prefill_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+    UCX_TLS: "cuda_copy,rc"
+
+  decode_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+    UCX_TLS: "cuda_copy,rc"
+
+  vllm_config:
+    prefill:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      trust-remote-code: true
+      no-enable-prefix-caching: true
+      max-model-len: 2048
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+
+    decode:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      tensor-parallel-size: 4
+      enable-expert-parallel: true
+      no-enable-prefix-caching: true
+      max-model-len: 2048
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+      gpu-memory-utilization: 0.90
+
+benchmark:
+  type: "sa-bench"
+  isl: 1024
+  osl: 1024
+  concurrencies: "32x64x128"
+  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp4ep-1p3d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp4ep-1p3d.yaml
new file mode 100644
index 000000000..14a103641
--- /dev/null
+++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp4ep-1p3d.yaml
@@ -0,0 +1,67 @@
+name: "minimax-m2.5-vllm-disagg-b300-decode-focus-tp4ep-1p3d"
+
+model:
+  path: "minimax-m2.5-nvfp4"
+  container: "vllm/vllm-openai:v0.20.1"
+  precision: "fp4"
+
+dynamo:
+  install: true
+  wheel: "1.2.0.dev20260501"
+
+setup_script: install-deps.sh
+
+resources:
+  gpu_type: "b300"
+  gpus_per_node: 8
+  prefill_nodes: 1
+  decode_nodes: 2
+  prefill_workers: 1
+  decode_workers: 3
+  gpus_per_prefill: 1
+  gpus_per_decode: 4
+
+frontend:
+  type: dynamo
+  enable_multiple_frontends: false
+
+backend:
+  type: vllm
+  connector: null
+  allow_prefill_decode_colocation: true
+
+  prefill_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+
+  decode_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+
+  vllm_config:
+    prefill:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      trust-remote-code: true
+      no-enable-prefix-caching: true
+      max-model-len: 2048
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+
+    decode:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      tensor-parallel-size: 4
+      enable-expert-parallel: true
+      no-enable-prefix-caching: true
+      max-model-len: 2048
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+      gpu-memory-utilization: 0.90
+
+benchmark:
+  type: "sa-bench"
+  isl: 1024
+  osl: 1024
+  concurrencies: "64x128x256x1024"
+  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp8-1p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp8-1p1d.yaml
new file mode 100644
index 000000000..7534b6ae0
--- /dev/null
+++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/1k1k/tp8-1p1d.yaml
@@ -0,0 +1,76 @@
+name: "minimax-m2.5-vllm-disagg-b300-decode-focus-tp8-1p1d"
+
+# B300-only: full-node TP=8 decode (the 8 GPUs of a single B300 node).
+# Cousin of tp4-1p1d.yaml but exercises the wider TP that B300's per-node
+# GPU count makes available. Only the smallest concurrencies (1,4,8) —
+# this topology is decode-latency focused, not throughput.
+
+model:
+  path: "minimax-m2.5-nvfp4"
+  container: "vllm/vllm-openai:v0.20.1"
+  precision: "fp4"
+
+dynamo:
+  install: true
+  wheel: "1.2.0.dev20260501"
+
+setup_script: install-deps.sh
+
+resources:
+  gpu_type: "b300"
+  gpus_per_node: 8
+  prefill_nodes: 1
+  decode_nodes: 1
+  prefill_workers: 1
+  decode_workers: 1
+  gpus_per_prefill: 1
+  gpus_per_decode: 8
+
+frontend:
+  type: dynamo
+  enable_multiple_frontends: false
+
+backend:
+  type: vllm
+  connector: null
+  allow_prefill_decode_colocation: true
+
+  prefill_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+    UCX_TLS: "cuda_copy,rc"
+    UCX_RCACHE_MAX_UNRELEASED: "1024"
+
+  decode_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+    UCX_TLS: "cuda_copy,rc"
+    UCX_RCACHE_MAX_UNRELEASED: "1024"
+
+  vllm_config:
+    prefill:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      trust-remote-code: true
+      no-enable-prefix-caching: true
+      max-model-len: 2048
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+
+    decode:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      tensor-parallel-size: 8
+      enable-expert-parallel: false
+      no-enable-prefix-caching: true
+      max-model-len: 2048
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+      gpu-memory-utilization: 0.90
+
+benchmark:
+  type: "sa-bench"
+  isl: 1024
+  osl: 1024
+  concurrencies: "4"
+  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/dep4-4p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/dep4-4p1d.yaml
new file mode 100644
index 000000000..6de56ae59
--- /dev/null
+++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/dep4-4p1d.yaml
@@ -0,0 +1,69 @@
+name: "minimax-m2.5-vllm-disagg-b300-8k1k-4p1xdep4"
+
+model:
+  path: "minimax-m2.5-nvfp4"
+  container: "vllm/vllm-openai:v0.20.1"
+  precision: "fp4"
+
+dynamo:
+  install: true
+  wheel: "1.2.0.dev20260501"
+
+setup_script: install-deps.sh
+
+resources:
+  gpu_type: "b300"
+  gpus_per_node: 8
+  prefill_nodes: 1
+  decode_nodes: 1
+  prefill_workers: 4
+  decode_workers: 1
+  gpus_per_prefill: 1
+  gpus_per_decode: 4
+
+frontend:
+  type: dynamo
+  enable_multiple_frontends: false
+
+backend:
+  type: vllm
+  connector: null
+  allow_prefill_decode_colocation: true
+
+  prefill_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+
+  decode_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+
+  vllm_config:
+    prefill:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      trust-remote-code: true
+      no-enable-prefix-caching: true
+      max-model-len: 9280
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 16384
+
+    decode:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      data-parallel-size: 4
+      data-parallel-rpc-port: 13345
+      enable-expert-parallel: true
+      no-enable-prefix-caching: true
+      max-model-len: 9280
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+      max-num-seqs: 864
+      gpu-memory-utilization: 0.90
+
+benchmark:
+  type: "sa-bench"
+  isl: 8192
+  osl: 1024
+  concurrencies: "384x512"
+  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/dep8-4p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/dep8-4p1d.yaml
new file mode 100644
index 000000000..0b86e3672
--- /dev/null
+++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/dep8-4p1d.yaml
@@ -0,0 +1,69 @@
+name: "minimax-m2.5-vllm-disagg-b300-8k1k-4p1xdep8"
+
+model:
+  path: "minimax-m2.5-nvfp4"
+  container: "vllm/vllm-openai:v0.20.1"
+  precision: "fp4"
+
+dynamo:
+  install: true
+  wheel: "1.2.0.dev20260501"
+
+setup_script: install-deps.sh
+
+resources:
+  gpu_type: "b300"
+  gpus_per_node: 8
+  prefill_nodes: 1
+  decode_nodes: 1
+  prefill_workers: 4
+  decode_workers: 1
+  gpus_per_prefill: 1
+  gpus_per_decode: 8
+
+frontend:
+  type: dynamo
+  enable_multiple_frontends: false
+
+backend:
+  type: vllm
+  connector: null
+  allow_prefill_decode_colocation: true
+
+  prefill_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+
+  decode_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+
+  vllm_config:
+    prefill:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      trust-remote-code: true
+      no-enable-prefix-caching: true
+      max-model-len: 9280
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 16384
+
+    decode:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      data-parallel-size: 8
+      data-parallel-rpc-port: 13345
+      enable-expert-parallel: true
+      no-enable-prefix-caching: true
+      max-model-len: 9280
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+      max-num-seqs: 864
+      gpu-memory-utilization: 0.90
+
+benchmark:
+  type: "sa-bench"
+  isl: 8192
+  osl: 1024
+  concurrencies: "384"
+  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/tp4-1p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/tp4-1p1d.yaml
new file mode 100644
index 000000000..a4458bc8c
--- /dev/null
+++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/tp4-1p1d.yaml
@@ -0,0 +1,67 @@
+name: "minimax-m2.5-vllm-disagg-b300-8k1k-1p1xtp4"
+
+model:
+  path: "minimax-m2.5-nvfp4"
+  container: "vllm/vllm-openai:v0.20.1"
+  precision: "fp4"
+
+dynamo:
+  install: true
+  wheel: "1.2.0.dev20260501"
+
+setup_script: install-deps.sh
+
+resources:
+  gpu_type: "b300"
+  gpus_per_node: 8
+  prefill_nodes: 1
+  decode_nodes: 1
+  prefill_workers: 1
+  decode_workers: 1
+  gpus_per_prefill: 1
+  gpus_per_decode: 4
+
+frontend:
+  type: dynamo
+  enable_multiple_frontends: false
+
+backend:
+  type: vllm
+  connector: null
+  allow_prefill_decode_colocation: true
+
+  prefill_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+
+  decode_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+
+  vllm_config:
+    prefill:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      trust-remote-code: true
+      no-enable-prefix-caching: true
+      max-model-len: 9280
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 16384
+
+    decode:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      tensor-parallel-size: 4
+      enable-expert-parallel: false
+      no-enable-prefix-caching: true
+      max-model-len: 9280
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+      gpu-memory-utilization: 0.90
+
+benchmark:
+  type: "sa-bench"
+  isl: 8192
+  osl: 1024
+  concurrencies: "2x4x8x16"
+  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/tp4ep-1p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/tp4ep-1p1d.yaml
new file mode 100644
index 000000000..93e0ddb53
--- /dev/null
+++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/tp4ep-1p1d.yaml
@@ -0,0 +1,67 @@
+name: "minimax-m2.5-vllm-disagg-b300-8k1k-1p1xtp4ep"
+
+model:
+  path: "minimax-m2.5-nvfp4"
+  container: "vllm/vllm-openai:v0.20.1"
+  precision: "fp4"
+
+dynamo:
+  install: true
+  wheel: "1.2.0.dev20260501"
+
+setup_script: install-deps.sh
+
+resources:
+  gpu_type: "b300"
+  gpus_per_node: 8
+  prefill_nodes: 1
+  decode_nodes: 1
+  prefill_workers: 1
+  decode_workers: 1
+  gpus_per_prefill: 1
+  gpus_per_decode: 4
+
+frontend:
+  type: dynamo
+  enable_multiple_frontends: false
+
+backend:
+  type: vllm
+  connector: null
+  allow_prefill_decode_colocation: true
+
+  prefill_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+
+  decode_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+
+  vllm_config:
+    prefill:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      trust-remote-code: true
+      no-enable-prefix-caching: true
+      max-model-len: 9280
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 16384
+
+    decode:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      tensor-parallel-size: 4
+      enable-expert-parallel: true
+      no-enable-prefix-caching: true
+      max-model-len: 9280
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+      gpu-memory-utilization: 0.90
+
+benchmark:
+  type: "sa-bench"
+  isl: 8192
+  osl: 1024
+  concurrencies: "32x128"
+  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/tp4ep-2p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/tp4ep-2p1d.yaml
new file mode 100644
index 000000000..e63345140
--- /dev/null
+++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/tp4ep-2p1d.yaml
@@ -0,0 +1,67 @@
+name: "minimax-m2.5-vllm-disagg-b300-8k1k-2p1xtp4ep"
+
+model:
+  path: "minimax-m2.5-nvfp4"
+  container: "vllm/vllm-openai:v0.20.1"
+  precision: "fp4"
+
+dynamo:
+  install: true
+  wheel: "1.2.0.dev20260501"
+
+setup_script: install-deps.sh
+
+resources:
+  gpu_type: "b300"
+  gpus_per_node: 8
+  prefill_nodes: 1
+  decode_nodes: 1
+  prefill_workers: 2
+  decode_workers: 1
+  gpus_per_prefill: 1
+  gpus_per_decode: 4
+
+frontend:
+  type: dynamo
+  enable_multiple_frontends: false
+
+backend:
+  type: vllm
+  connector: null
+  allow_prefill_decode_colocation: true
+
+  prefill_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+
+  decode_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+
+  vllm_config:
+    prefill:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      trust-remote-code: true
+      no-enable-prefix-caching: true
+      max-model-len: 9280
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 16384
+
+    decode:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      tensor-parallel-size: 4
+      enable-expert-parallel: true
+      no-enable-prefix-caching: true
+      max-model-len: 9280
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+      gpu-memory-utilization: 0.90
+
+benchmark:
+  type: "sa-bench"
+  isl: 8192
+  osl: 1024
+  concurrencies: "64x128x256x512"
+  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/tp8-1p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/tp8-1p1d.yaml
new file mode 100644
index 000000000..cb9f45141
--- /dev/null
+++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/8k1k/tp8-1p1d.yaml
@@ -0,0 +1,71 @@
+name: "minimax-m2.5-vllm-disagg-b300-8k1k-1p1xtp8"
+
+# B300-only: full-node TP=8 decode at 8k input. Cousin of tp4-1p1d.yaml
+# but exercises the wider TP that B300's per-node GPU count makes
+# available. Smallest concurrencies only (1,4,8).
+
+model:
+  path: "minimax-m2.5-nvfp4"
+  container: "vllm/vllm-openai:v0.20.1"
+  precision: "fp4"
+
+dynamo:
+  install: true
+  wheel: "1.2.0.dev20260501"
+
+setup_script: install-deps.sh
+
+resources:
+  gpu_type: "b300"
+  gpus_per_node: 8
+  prefill_nodes: 1
+  decode_nodes: 1
+  prefill_workers: 1
+  decode_workers: 1
+  gpus_per_prefill: 1
+  gpus_per_decode: 8
+
+frontend:
+  type: dynamo
+  enable_multiple_frontends: false
+
+backend:
+  type: vllm
+  connector: null
+  allow_prefill_decode_colocation: true
+
+  prefill_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+
+  decode_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+
+  vllm_config:
+    prefill:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      trust-remote-code: true
+      no-enable-prefix-caching: true
+      max-model-len: 9280
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 16384
+
+    decode:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      tensor-parallel-size: 8
+      enable-expert-parallel: false
+      no-enable-prefix-caching: true
+      max-model-len: 9280
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+      gpu-memory-utilization: 0.90
+
+benchmark:
+  type: "sa-bench"
+  isl: 8192
+  osl: 1024
+  concurrencies: "4"
+  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/dep2-2p3d-c6144.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/dep2-2p3d-c6144.yaml
new file mode 100644
index 000000000..e0d2b6f71
--- /dev/null
+++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/dep2-2p3d-c6144.yaml
@@ -0,0 +1,71 @@
+name: "minimax-m2.5-vllm-disagg-gb300-decode-2p3xdep2-c6144"
+
+model:
+  path: "minimax-m2.5-nvfp4"
+  container: "vllm/vllm-openai:v0.20.1"
+  precision: "fp4"
+
+dynamo:
+  install: true
+  wheel: "1.2.0.dev20260501"
+
+setup_script: install-deps.sh
+
+resources:
+  gpu_type: "gb300"
+  gpus_per_node: 4
+  prefill_nodes: 2
+  decode_nodes: 3
+  prefill_workers: 2
+  decode_workers: 3
+  gpus_per_prefill: 1
+  gpus_per_decode: 2
+  spread_workers: true
+
+frontend:
+  type: dynamo
+  enable_multiple_frontends: false
+
+backend:
+  type: vllm
+  connector: null
+
+  prefill_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+    UCX_TLS: "cuda_copy,rc"
+
+  decode_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+    UCX_TLS: "cuda_copy,rc"
+
+  vllm_config:
+    prefill:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      trust-remote-code: true
+      no-enable-prefix-caching: true
+      max-model-len: 2048
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+
+    decode:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      data-parallel-size: 2
+      data-parallel-rpc-port: 13345
+      enable-expert-parallel: true
+      no-enable-prefix-caching: true
+      max-model-len: 2048
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+      max-num-seqs: 864
+      gpu-memory-utilization: 0.90
+
+benchmark:
+  type: "sa-bench"
+  isl: 1024
+  osl: 1024
+  concurrencies: "6144x8192"
+  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/dep2-2p3d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/dep2-2p3d.yaml
new file mode 100644
index 000000000..bf3b554b7
--- /dev/null
+++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/dep2-2p3d.yaml
@@ -0,0 +1,71 @@
+name: "minimax-m2.5-vllm-disagg-gb300-decode-2p3xdep2"
+
+model:
+  path: "minimax-m2.5-nvfp4"
+  container: "vllm/vllm-openai:v0.20.1"
+  precision: "fp4"
+
+dynamo:
+  install: true
+  wheel: "1.2.0.dev20260501"
+
+setup_script: install-deps.sh
+
+resources:
+  gpu_type: "gb300"
+  gpus_per_node: 4
+  prefill_nodes: 2
+  decode_nodes: 3
+  prefill_workers: 2
+  decode_workers: 3
+  gpus_per_prefill: 1
+  gpus_per_decode: 2
+  spread_workers: true
+
+frontend:
+  type: dynamo
+  enable_multiple_frontends: false
+
+backend:
+  type: vllm
+  connector: null
+
+  prefill_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+    UCX_TLS: "cuda_copy,rc"
+
+  decode_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+    UCX_TLS: "cuda_copy,rc"
+
+  vllm_config:
+    prefill:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      trust-remote-code: true
+      no-enable-prefix-caching: true
+      max-model-len: 2048
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+
+    decode:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      data-parallel-size: 2
+      data-parallel-rpc-port: 13345
+      enable-expert-parallel: true
+      no-enable-prefix-caching: true
+      max-model-len: 2048
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+      max-num-seqs: 864
+      gpu-memory-utilization: 0.90
+
+benchmark:
+  type: "sa-bench"
+  isl: 1024
+  osl: 1024
+  concurrencies: "2048"
+  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/dep8-2p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/dep8-2p1d.yaml
new file mode 100644
index 000000000..f351b3f1a
--- /dev/null
+++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/dep8-2p1d.yaml
@@ -0,0 +1,68 @@
+name: "minimax-m2.5-vllm-disagg-gb300-decode-2p1xdep8"
+
+model:
+  path: "minimax-m2.5-nvfp4"
+  container: "vllm/vllm-openai:v0.20.1"
+  precision: "fp4"
+
+dynamo:
+  install: true
+  wheel: "1.2.0.dev20260501"
+
+setup_script: install-deps.sh
+
+resources:
+  gpu_type: "gb300"
+  gpus_per_node: 4
+  prefill_nodes: 1
+  decode_nodes: 2
+  prefill_workers: 2
+  decode_workers: 1
+  gpus_per_prefill: 1
+  gpus_per_decode: 8
+
+frontend:
+  type: dynamo
+  enable_multiple_frontends: false
+
+backend:
+  type: vllm
+  connector: null
+
+  prefill_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+
+  decode_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+
+  vllm_config:
+    prefill:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      trust-remote-code: true
+      no-enable-prefix-caching: true
+      max-model-len: 2048
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+
+    decode:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      data-parallel-size: 8
+      data-parallel-rpc-port: 13345
+      enable-expert-parallel: true
+      no-enable-prefix-caching: true
+      max-model-len: 2048
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+      max-num-seqs: 864
+      gpu-memory-utilization: 0.90
+
+benchmark:
+  type: "sa-bench"
+  isl: 1024
+  osl: 1024
+  concurrencies: "1024x2048x4096"
+  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/tp4-1p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/tp4-1p1d.yaml
new file mode 100644
index 000000000..67fe9a634
--- /dev/null
+++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/tp4-1p1d.yaml
@@ -0,0 +1,70 @@
+name: "minimax-m2.5-vllm-disagg-gb300-decode-focus-tp4-1p1d"
+
+model:
+  path: "minimax-m2.5-nvfp4"
+  container: "vllm/vllm-openai:v0.20.1"
+  precision: "fp4"
+
+dynamo:
+  install: true
+  wheel: "1.2.0.dev20260501"
+
+setup_script: install-deps.sh
+
+resources:
+  gpu_type: "gb300"
+  gpus_per_node: 4
+  prefill_nodes: 1
+  decode_nodes: 1
+  prefill_workers: 1
+  decode_workers: 1
+  gpus_per_prefill: 1
+  gpus_per_decode: 4
+
+frontend:
+  type: dynamo
+  enable_multiple_frontends: false
+
+backend:
+  type: vllm
+  connector: null
+
+  prefill_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+    UCX_TLS: "cuda_copy,rc"
+    UCX_RCACHE_MAX_UNRELEASED: "1024"
+
+  decode_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+    UCX_TLS: "cuda_copy,rc"
+    UCX_RCACHE_MAX_UNRELEASED: "1024"
+
+  vllm_config:
+    prefill:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      trust-remote-code: true
+      no-enable-prefix-caching: true
+      max-model-len: 2048
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+
+    decode:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      tensor-parallel-size: 4
+      enable-expert-parallel: false
+      no-enable-prefix-caching: true
+      max-model-len: 2048
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+      gpu-memory-utilization: 0.90
+
+benchmark:
+  type: "sa-bench"
+  isl: 1024
+  osl: 1024
+  concurrencies: "2x4x16"
+  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/tp4-1p2d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/tp4-1p2d.yaml
new file mode 100644
index 000000000..2fc0946f2
--- /dev/null
+++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/tp4-1p2d.yaml
@@ -0,0 +1,66 @@
+name: "minimax-m2.5-vllm-disagg-gb300-decode-focus-tp4-1p2d"
+
+model:
+  path: "minimax-m2.5-nvfp4"
+  container: "vllm/vllm-openai:v0.20.1"
+  precision: "fp4"
+
+dynamo:
+  install: true
+  wheel: "1.2.0.dev20260501"
+
+setup_script: install-deps.sh
+
+resources:
+  gpu_type: "gb300"
+  gpus_per_node: 4
+  prefill_nodes: 1
+  decode_nodes: 2
+  prefill_workers: 1
+  decode_workers: 2
+  gpus_per_prefill: 1
+  gpus_per_decode: 4
+
+frontend:
+  type: dynamo
+  enable_multiple_frontends: false
+
+backend:
+  type: vllm
+  connector: null
+
+  prefill_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+
+  decode_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+
+  vllm_config:
+    prefill:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      trust-remote-code: true
+      no-enable-prefix-caching: true
+      max-model-len: 2048
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+
+    decode:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      tensor-parallel-size: 4
+      enable-expert-parallel: false
+      no-enable-prefix-caching: true
+      max-model-len: 2048
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+      gpu-memory-utilization: 0.90
+
+benchmark:
+  type: "sa-bench"
+  isl: 1024
+  osl: 1024
+  concurrencies: "4x8x16x64"
+  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/tp4ep-1p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/tp4ep-1p1d.yaml
new file mode 100644
index 000000000..b7f5aae6d
--- /dev/null
+++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/tp4ep-1p1d.yaml
@@ -0,0 +1,68 @@
+name: "minimax-m2.5-vllm-disagg-gb300-decode-focus-tp4ep-1p1d"
+
+model:
+  path: "minimax-m2.5-nvfp4"
+  container: "vllm/vllm-openai:v0.20.1"
+  precision: "fp4"
+
+dynamo:
+  install: true
+  wheel: "1.2.0.dev20260501"
+
+setup_script: install-deps.sh
+
+resources:
+  gpu_type: "gb300"
+  gpus_per_node: 4
+  prefill_nodes: 1
+  decode_nodes: 1
+  prefill_workers: 1
+  decode_workers: 1
+  gpus_per_prefill: 1
+  gpus_per_decode: 4
+
+frontend:
+  type: dynamo
+  enable_multiple_frontends: false
+
+backend:
+  type: vllm
+  connector: null
+
+  prefill_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+    UCX_TLS: "cuda_copy,rc"
+
+  decode_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+    UCX_TLS: "cuda_copy,rc"
+
+  vllm_config:
+    prefill:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      trust-remote-code: true
+      no-enable-prefix-caching: true
+      max-model-len: 2048
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+
+    decode:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      tensor-parallel-size: 4
+      enable-expert-parallel: true
+      no-enable-prefix-caching: true
+      max-model-len: 2048
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+      gpu-memory-utilization: 0.90
+
+benchmark:
+  type: "sa-bench"
+  isl: 1024
+  osl: 1024
+  concurrencies: "32x64x128"
+  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/tp4ep-1p3d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/tp4ep-1p3d.yaml
new file mode 100644
index 000000000..4139fd511
--- /dev/null
+++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/1k1k/tp4ep-1p3d.yaml
@@ -0,0 +1,66 @@
+name: "minimax-m2.5-vllm-disagg-gb300-decode-focus-tp4ep-1p3d"
+
+model:
+  path: "minimax-m2.5-nvfp4"
+  container: "vllm/vllm-openai:v0.20.1"
+  precision: "fp4"
+
+dynamo:
+  install: true
+  wheel: "1.2.0.dev20260501"
+
+setup_script: install-deps.sh
+
+resources:
+  gpu_type: "gb300"
+  gpus_per_node: 4
+  prefill_nodes: 1
+  decode_nodes: 3
+  prefill_workers: 1
+  decode_workers: 3
+  gpus_per_prefill: 1
+  gpus_per_decode: 4
+
+frontend:
+  type: dynamo
+  enable_multiple_frontends: false
+
+backend:
+  type: vllm
+  connector: null
+
+  prefill_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+
+  decode_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+
+  vllm_config:
+    prefill:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      trust-remote-code: true
+      no-enable-prefix-caching: true
+      max-model-len: 2048
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+
+    decode:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      tensor-parallel-size: 4
+      enable-expert-parallel: true
+      no-enable-prefix-caching: true
+      max-model-len: 2048
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+      gpu-memory-utilization: 0.90
+
+benchmark:
+  type: "sa-bench"
+  isl: 1024
+  osl: 1024
+  concurrencies: "64x128x256x512x1024"
+  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/8k1k/dep4-4p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/8k1k/dep4-4p1d.yaml
new file mode 100644
index 000000000..cd349b725
--- /dev/null
+++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/8k1k/dep4-4p1d.yaml
@@ -0,0 +1,68 @@
+name: "minimax-m2.5-vllm-disagg-gb300-8k1k-4p1xdep4"
+
+model:
+  path: "minimax-m2.5-nvfp4"
+  container: "vllm/vllm-openai:v0.20.1"
+  precision: "fp4"
+
+dynamo:
+  install: true
+  wheel: "1.2.0.dev20260501"
+
+setup_script: install-deps.sh
+
+resources:
+  gpu_type: "gb300"
+  gpus_per_node: 4
+  prefill_nodes: 1
+  decode_nodes: 1
+  prefill_workers: 4
+  decode_workers: 1
+  gpus_per_prefill: 1
+  gpus_per_decode: 4
+
+frontend:
+  type: dynamo
+  enable_multiple_frontends: false
+
+backend:
+  type: vllm
+  connector: null
+
+  prefill_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+
+  decode_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+
+  vllm_config:
+    prefill:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      trust-remote-code: true
+      no-enable-prefix-caching: true
+      max-model-len: 9280
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 16384
+
+    decode:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      data-parallel-size: 4
+      data-parallel-rpc-port: 13345
+      enable-expert-parallel: true
+      no-enable-prefix-caching: true
+      max-model-len: 9280
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+      max-num-seqs: 864
+      gpu-memory-utilization: 0.90
+
+benchmark:
+  type: "sa-bench"
+  isl: 8192
+  osl: 1024
+  concurrencies: "256"
+  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/8k1k/dep8-4p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/8k1k/dep8-4p1d.yaml
new file mode 100644
index 000000000..b86d03e15
--- /dev/null
+++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/8k1k/dep8-4p1d.yaml
@@ -0,0 +1,68 @@
+name: "minimax-m2.5-vllm-disagg-gb300-8k1k-4p1xdep8"
+
+model:
+  path: "minimax-m2.5-nvfp4"
+  container: "vllm/vllm-openai:v0.20.1"
+  precision: "fp4"
+
+dynamo:
+  install: true
+  wheel: "1.2.0.dev20260501"
+
+setup_script: install-deps.sh
+
+resources:
+  gpu_type: "gb300"
+  gpus_per_node: 4
+  prefill_nodes: 1
+  decode_nodes: 2
+  prefill_workers: 4
+  decode_workers: 1
+  gpus_per_prefill: 1
+  gpus_per_decode: 8
+
+frontend:
+  type: dynamo
+  enable_multiple_frontends: false
+
+backend:
+  type: vllm
+  connector: null
+
+  prefill_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+
+  decode_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+
+  vllm_config:
+    prefill:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      trust-remote-code: true
+      no-enable-prefix-caching: true
+      max-model-len: 9280
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 16384
+
+    decode:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      data-parallel-size: 8
+      data-parallel-rpc-port: 13345
+      enable-expert-parallel: true
+      no-enable-prefix-caching: true
+      max-model-len: 9280
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+      max-num-seqs: 864
+      gpu-memory-utilization: 0.90
+
+benchmark:
+  type: "sa-bench"
+  isl: 8192
+  osl: 1024
+  concurrencies: "1024x2048"
+  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/8k1k/tp4-1p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/8k1k/tp4-1p1d.yaml
new file mode 100644
index 000000000..4f16dcb3e
--- /dev/null
+++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/8k1k/tp4-1p1d.yaml
@@ -0,0 +1,66 @@
+name: "minimax-m2.5-vllm-disagg-gb300-8k1k-1p1xtp4"
+
+model:
+  path: "minimax-m2.5-nvfp4"
+  container: "vllm/vllm-openai:v0.20.1"
+  precision: "fp4"
+
+dynamo:
+  install: true
+  wheel: "1.2.0.dev20260501"
+
+setup_script: install-deps.sh
+
+resources:
+  gpu_type: "gb300"
+  gpus_per_node: 4
+  prefill_nodes: 1
+  decode_nodes: 1
+  prefill_workers: 1
+  decode_workers: 1
+  gpus_per_prefill: 1
+  gpus_per_decode: 4
+
+frontend:
+  type: dynamo
+  enable_multiple_frontends: false
+
+backend:
+  type: vllm
+  connector: null
+
+  prefill_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+
+  decode_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+
+  vllm_config:
+    prefill:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      trust-remote-code: true
+      no-enable-prefix-caching: true
+      max-model-len: 9280
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 16384
+
+    decode:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      tensor-parallel-size: 4
+      enable-expert-parallel: false
+      no-enable-prefix-caching: true
+      max-model-len: 9280
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+      gpu-memory-utilization: 0.90
+
+benchmark:
+  type: "sa-bench"
+  isl: 8192
+  osl: 1024
+  concurrencies: "2x4x8x16"
+  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/8k1k/tp4ep-1p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/8k1k/tp4ep-1p1d.yaml
new file mode 100644
index 000000000..30f9e51f7
--- /dev/null
+++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/8k1k/tp4ep-1p1d.yaml
@@ -0,0 +1,66 @@
+name: "minimax-m2.5-vllm-disagg-gb300-8k1k-1p1xtp4ep"
+
+model:
+  path: "minimax-m2.5-nvfp4"
+  container: "vllm/vllm-openai:v0.20.1"
+  precision: "fp4"
+
+dynamo:
+  install: true
+  wheel: "1.2.0.dev20260501"
+
+setup_script: install-deps.sh
+
+resources:
+  gpu_type: "gb300"
+  gpus_per_node: 4
+  prefill_nodes: 1
+  decode_nodes: 1
+  prefill_workers: 1
+  decode_workers: 1
+  gpus_per_prefill: 1
+  gpus_per_decode: 4
+
+frontend:
+  type: dynamo
+  enable_multiple_frontends: false
+
+backend:
+  type: vllm
+  connector: null
+
+  prefill_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+
+  decode_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+
+  vllm_config:
+    prefill:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      trust-remote-code: true
+      no-enable-prefix-caching: true
+      max-model-len: 9280
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 16384
+
+    decode:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      tensor-parallel-size: 4
+      enable-expert-parallel: true
+      no-enable-prefix-caching: true
+      max-model-len: 9280
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+      gpu-memory-utilization: 0.90
+
+benchmark:
+  type: "sa-bench"
+  isl: 8192
+  osl: 1024
+  concurrencies: "32x64x128x256"
+  random_range_ratio: 0.8
diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/8k1k/tp4ep-2p1d.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/8k1k/tp4ep-2p1d.yaml
new file mode 100644
index 000000000..273b34c1d
--- /dev/null
+++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/8k1k/tp4ep-2p1d.yaml
@@ -0,0 +1,66 @@
+name: "minimax-m2.5-vllm-disagg-gb300-8k1k-2p1xtp4ep"
+
+model:
+  path: "minimax-m2.5-nvfp4"
+  container: "vllm/vllm-openai:v0.20.1"
+  precision: "fp4"
+
+dynamo:
+  install: true
+  wheel: "1.2.0.dev20260501"
+
+setup_script: install-deps.sh
+
+resources:
+  gpu_type: "gb300"
+  gpus_per_node: 4
+  prefill_nodes: 1
+  decode_nodes: 1
+  prefill_workers: 2
+  decode_workers: 1
+  gpus_per_prefill: 1
+  gpus_per_decode: 4
+
+frontend:
+  type: dynamo
+  enable_multiple_frontends: false
+
+backend:
+  type: vllm
+  connector: null
+
+  prefill_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+
+  decode_environment:
+    VLLM_ENGINE_READY_TIMEOUT_S: "3600"
+    VLLM_FLOAT32_MATMUL_PRECISION: "high"
+
+  vllm_config:
+    prefill:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      trust-remote-code: true
+      no-enable-prefix-caching: true
+      max-model-len: 9280
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 16384
+
+    decode:
+      kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
+      kv-cache-dtype: "fp8"
+      tensor-parallel-size: 4
+      enable-expert-parallel: true
+      no-enable-prefix-caching: true
+      max-model-len: 9280
+      max-cudagraph-capture-size: 2048
+      max-num-batched-tokens: 2048
+      gpu-memory-utilization: 0.90
+
+benchmark:
+  type: "sa-bench"
+  isl: 8192
+  osl: 1024
+  concurrencies: "64x128"
+  random_range_ratio: 0.8
diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index 0fa3b2cd8..95bd16b42 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -3113,3 +3113,21 @@
     - "1k1k and 8k1k STP low-latency and max-throughput srt-slurm recipes under benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/gb300-fp4/ (ported from upstream srt-slurm PR #152)"
     - "Wire glm5/fp4 model + dynamo-sglang framework branches into runners/launch_gb300-nv.sh"
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1514
+
+- config-keys:
+    - minimaxm2.5-fp4-b300-dynamo-vllm
+  description:
+    - "Add MiniMax-M2.5 NVFP4 B300 disaggregated multinode vLLM benchmarks via Dynamo (mirror of the GB300 pareto sweep, repacked for B300's 8 GPUs/node)"
+    - "Image: vllm/vllm-openai:v0.20.1; model nvidia/MiniMax-M2.5-NVFP4 staged at /data/models/MiniMax-M2.5-NVFP4"
+    - "Same 1k/1k and 8k/1k search space as gb300, plus a new tp8-1p1d at low concurrencies for both ISLs"
+    - "Recipes under benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/ with gpus_per_node=8 and worker repacking for B300 topology"
+    - "Wire minimax b300 launch path through runners/launch_b300-nv.sh and minimax-specific srt-slurm fork branch (lijas/spread-workers)"
+  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/XXXX
+
+- config-keys:
+    - minimaxm2.5-fp4-gb300-dynamo-vllm
+  description:
+    - "Reactivate GB300 minimax disagg vLLM sweep with the pruned pareto-only search space from internal PR57"
+    - "Add minimax model routing and minimax srt-slurm fork path in runners/launch_gb300-nv.sh while keeping SA upstream defaults (SLURM_PARTITION=batch_1, SLURM_ACCOUNT=benchmark, SQUASH_FILE under /home/sa-shared/gharunners/squash/)"
+    - "Add 1k1k/8k1k minimax recipe set under benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/"
+  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/XXXX
diff --git a/runners/launch_b300-nv.sh b/runners/launch_b300-nv.sh
index cca8b4ab0..b353d0466 100644
--- a/runners/launch_b300-nv.sh
+++ b/runners/launch_b300-nv.sh
@@ -39,8 +39,17 @@ elif [[ $MODEL_PREFIX == "dsv4" && $PRECISION == "fp4" && $FRAMEWORK == "dynamo-
     fi
     export MODEL_PATH="${SELECTED_MODEL_PATH:-/data/models/dsv4-pro}"
     export SRT_SLURM_MODEL_PREFIX="deepseek-v4-pro"
+elif [[ $MODEL_PREFIX == "minimaxm2.5" && $PRECISION == "fp4" && $FRAMEWORK == "dynamo-vllm" ]]; then
+    export MODEL_PATH="/data/models/MiniMax-M2.5-NVFP4"
+    export SRT_SLURM_MODEL_PREFIX="minimax-m2.5-nvfp4"
+    # The GHA runner user for the minimax sweep can't use the default
+    # `benchmark` account (sbatch rejects with "Invalid account or
+    # account/partition combination"). Historical b300 single-node
+    # minimax (commit 5f314bf1) used `restricted` for the same reason.
+    # Keep batch_1 partition; only override the account.
+    SLURM_ACCOUNT="restricted"
 else
-    echo "Unsupported model: $MODEL_PREFIX-$PRECISION. Supported models are: dsr1-fp4, dsr1-fp8, dsv4-fp4 with dynamo-vllm"
+    echo "Unsupported model: $MODEL_PREFIX-$PRECISION. Supported models are: dsr1-fp4, dsr1-fp8, dsv4-fp4 with dynamo-vllm, minimaxm2.5-fp4 with dynamo-vllm"
     exit 1
 fi
 
@@ -61,6 +70,15 @@ elif [[ $FRAMEWORK == "dynamo-vllm" && $MODEL_PREFIX == "dsv4" ]]; then
     git checkout aflowers/vllm-gb200-v0.20.0
     mkdir -p recipes/vllm/deepseek-v4
     cp -rT "$GITHUB_WORKSPACE/benchmarks/multi_node/srt-slurm-recipes/vllm/deepseek-v4" recipes/vllm/deepseek-v4
+elif [[ $FRAMEWORK == "dynamo-vllm" && $MODEL_PREFIX == "minimaxm2.5" ]]; then
+    # Custom srt-slurm fork that recognizes resources.spread_workers,
+    # dynamo.wheel, and backend.allow_prefill_decode_colocation schema
+    # fields used by the minimax pareto recipes. Same branch as gb300-nv.
+    git clone https://github.com/jasonlizhengjian/srt-slurm.git "$SRT_REPO_DIR"
+    cd "$SRT_REPO_DIR" || exit 1
+    git checkout lijas/spread-workers
+    mkdir -p recipes/vllm/minimax-m2.5
+    cp -rT "$GITHUB_WORKSPACE/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300" recipes/vllm/minimax-m2.5
 else
     git clone https://github.com/NVIDIA/srt-slurm.git "$SRT_REPO_DIR"
     cd "$SRT_REPO_DIR" || exit 1
diff --git a/runners/launch_gb300-nv.sh b/runners/launch_gb300-nv.sh
index 5248e63ed..5f4d538b9 100644
--- a/runners/launch_gb300-nv.sh
+++ b/runners/launch_gb300-nv.sh
@@ -24,8 +24,11 @@ elif [[ $MODEL_PREFIX == "dsv4" && $PRECISION == "fp4" ]]; then
 elif [[ $MODEL_PREFIX == "glm5" && $PRECISION == "fp4" ]]; then
     export MODEL_PATH=/scratch/models/GLM-5-NVFP4
     export SRT_SLURM_MODEL_PREFIX="glm-5-fp4"
+elif [[ $MODEL_PREFIX == "minimaxm2.5" && $PRECISION == "fp4" ]]; then
+    export MODEL_PATH=/data/models/MiniMax-M2.5-NVFP4
+    export SRT_SLURM_MODEL_PREFIX="minimax-m2.5-nvfp4"
 else
-    echo "Unsupported model: $MODEL_PREFIX-$PRECISION. Supported models are: dsr1-fp4, dsr1-fp8, dsv4-fp4, glm5-fp4"
+    echo "Unsupported model: $MODEL_PREFIX-$PRECISION. Supported models are: dsr1-fp4, dsr1-fp8, dsv4-fp4, glm5-fp4, minimaxm2.5-fp4"
     exit 1
 fi
 
@@ -40,13 +43,30 @@ NGINX_SQUASH_FILE="/home/sa-shared/gharunners/squash/$(echo "$NGINX_IMAGE" | sed
 import_squash() {
     local squash="$1" image="$2"
     local lock="${squash}.lock"
+    # Fast-path: if a squash file is already at the expected path (e.g.,
+    # a symlink to a prestaged file), skip srun entirely. Use [ -f ]
+    # rather than unsquashfs because the login node may not have the
+    # squashfs-tools binary installed.
+    if [ -f "$squash" ]; then
+        echo "Squash file already exists, skipping import: $squash"
+        return 0
+    fi
     srun --partition=$SLURM_PARTITION --exclusive --time=180 bash -c "
+        # A previously failed enroot import on VAST can leave the lock
+        # or squash as a self-referencing symlink, making subsequent
+        # opens fail with ELOOP. Only unlink when the lock is currently
+        # a symlink — otherwise parallel runs would race to delete each
+        # other's freshly-created regular lock files.
+        if [ -L \"$lock\" ]; then
+            unlink \"$lock\" 2>/dev/null || true
+        fi
         exec 9>\"$lock\"
         flock -w 600 9 || { echo 'Failed to acquire lock for $squash' >&2; exit 1; }
         if unsquashfs -l \"$squash\" > /dev/null 2>&1; then
             echo 'Squash file already exists and is valid, skipping import: $squash'
         else
-            rm -f \"$squash\"
+            unlink \"$squash\" 2>/dev/null || true
+            rm -f \"$squash\" 2>/dev/null || true
             enroot import -o \"$squash\" docker://$image
         fi
     "
@@ -82,6 +102,14 @@ elif [[ $FRAMEWORK == "dynamo-sglang" && $MODEL_PREFIX == "glm5" ]]; then
     git checkout sa-submission-q2-2026
     mkdir -p recipes/sglang/glm5
     cp -rT "$GITHUB_WORKSPACE/benchmarks/multi_node/srt-slurm-recipes/sglang/glm5" recipes/sglang/glm5
+elif [[ $FRAMEWORK == "dynamo-vllm" && $MODEL_PREFIX == "minimaxm2.5" ]]; then
+    # Custom srt-slurm fork that recognizes resources.spread_workers and
+    # dynamo.wheel schema fields used by the minimax pareto recipes.
+    git clone https://github.com/jasonlizhengjian/srt-slurm.git "$SRT_REPO_DIR"
+    cd "$SRT_REPO_DIR"
+    git checkout lijas/spread-workers
+    mkdir -p recipes/vllm/minimax-m2.5
+    cp -rT "$GITHUB_WORKSPACE/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5" recipes/vllm/minimax-m2.5
 else
     git clone https://github.com/NVIDIA/srt-slurm.git "$SRT_REPO_DIR"
     cd "$SRT_REPO_DIR"

From b8874334a0db5088d7aedd7306ae074c9db8dfbc Mon Sep 17 00:00:00 2001
From: Yeswanth K <yeswanthk@nvidia.com>
Date: Fri, 22 May 2026 18:15:01 -0700
Subject: [PATCH 2/3] Use SA default account for b300 minimax run

Remove the minimax-specific SLURM account override in launch_b300-nv.sh so SA upstream runs with batch_1/benchmark defaults, per reviewer guidance.
---
 runners/launch_b300-nv.sh | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/runners/launch_b300-nv.sh b/runners/launch_b300-nv.sh
index b353d0466..e0c2a4cd5 100644
--- a/runners/launch_b300-nv.sh
+++ b/runners/launch_b300-nv.sh
@@ -42,12 +42,6 @@ elif [[ $MODEL_PREFIX == "dsv4" && $PRECISION == "fp4" && $FRAMEWORK == "dynamo-
 elif [[ $MODEL_PREFIX == "minimaxm2.5" && $PRECISION == "fp4" && $FRAMEWORK == "dynamo-vllm" ]]; then
     export MODEL_PATH="/data/models/MiniMax-M2.5-NVFP4"
     export SRT_SLURM_MODEL_PREFIX="minimax-m2.5-nvfp4"
-    # The GHA runner user for the minimax sweep can't use the default
-    # `benchmark` account (sbatch rejects with "Invalid account or
-    # account/partition combination"). Historical b300 single-node
-    # minimax (commit 5f314bf1) used `restricted` for the same reason.
-    # Keep batch_1 partition; only override the account.
-    SLURM_ACCOUNT="restricted"
 else
     echo "Unsupported model: $MODEL_PREFIX-$PRECISION. Supported models are: dsr1-fp4, dsr1-fp8, dsv4-fp4 with dynamo-vllm, minimaxm2.5-fp4 with dynamo-vllm"
     exit 1

From 2f1c47c2da84574243003a4681e763522ae9828a Mon Sep 17 00:00:00 2001
From: Ankur Singh <ankusingh@nvidia.com>
Date: Fri, 22 May 2026 20:00:28 -0700
Subject: [PATCH 3/3] Simplify descriptions for MiniMax benchmarks

Removed redundant details from MiniMax-M2.5 NVFP4 B300 and GB300 descriptions in the changelog.
---
 perf-changelog.yaml | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index a4764aa99..20534b955 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -3133,17 +3133,14 @@
 - config-keys:
     - minimaxm2.5-fp4-b300-dynamo-vllm
   description:
-    - "Add MiniMax-M2.5 NVFP4 B300 disaggregated multinode vLLM benchmarks via Dynamo (mirror of the GB300 pareto sweep, repacked for B300's 8 GPUs/node)"
-    - "Image: vllm/vllm-openai:v0.20.1; model nvidia/MiniMax-M2.5-NVFP4 staged at /data/models/MiniMax-M2.5-NVFP4"
+    - "Add MiniMax-M2.5 NVFP4 B300 disaggregated multinode vLLM benchmarks via Dynamo"
+    - "Image: vllm/vllm-openai:v0.20.1"
     - "Same 1k/1k and 8k/1k search space as gb300, plus a new tp8-1p1d at low concurrencies for both ISLs"
-    - "Recipes under benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5-b300/ with gpus_per_node=8 and worker repacking for B300 topology"
-    - "Wire minimax b300 launch path through runners/launch_b300-nv.sh and minimax-specific srt-slurm fork branch (lijas/spread-workers)"
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1560
 
 - config-keys:
     - minimaxm2.5-fp4-gb300-dynamo-vllm
   description:
-    - "Reactivate GB300 minimax disagg vLLM sweep with the pruned pareto-only search space from internal PR57"
-    - "Add minimax model routing and minimax srt-slurm fork path in runners/launch_gb300-nv.sh while keeping SA upstream defaults (SLURM_PARTITION=batch_1, SLURM_ACCOUNT=benchmark, SQUASH_FILE under /home/sa-shared/gharunners/squash/)"
+    - "Add minimax model routing and minimax srt-slurm fork path in runners/launch_gb300-nv.sh"
     - "Add 1k1k/8k1k minimax recipe set under benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m2.5/"
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1560