From c34271c8a4c303208a15375e79de83e08149fe52 Mon Sep 17 00:00:00 2001 From: yhyang201 Date: Mon, 18 May 2026 23:59:51 +0800 Subject: [PATCH 1/2] Update dpskv4 GB300 MTP SGLang image to nightly-20260518 and clean env vars --- .github/configs/nvidia-master.yaml | 2 +- .../disagg-low-latency-1p1d-tp4-tp4-mtp.yaml | 14 ++++------- .../disagg-low-latency-1p6d-dep4-tp4-mtp.yaml | 18 ++++----------- .../disagg-mid-curve-1p1d-dep4-dep16-mtp.yaml | 23 ++++--------------- .../disagg-mid-curve-1p1d-dep4-dep8-mtp.yaml | 23 ++++--------------- .../disagg-mid-curve-2p1d-dep4-dep8-mtp.yaml | 23 ++++--------------- .../disagg-mid-curve-4p1d-dep4-dep8-mtp.yaml | 23 ++++--------------- perf-changelog.yaml | 6 +++++ 8 files changed, 36 insertions(+), 96 deletions(-) diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml index 2e1b43a97..ccc39d2d9 100644 --- a/.github/configs/nvidia-master.yaml +++ b/.github/configs/nvidia-master.yaml @@ -9034,7 +9034,7 @@ glm5-fp8-b200-dynamo-sglang: # MTP variant of dsv4-fp4-gb300-dynamo-sglang. dsv4-fp4-gb300-dynamo-sglang-mtp: - image: lmsysorg/sglang:nightly-dev-cu13-20260509-9ee83034 + image: lmsysorg/sglang:nightly-dev-cu13-20260518-c67b2870 model: deepseek-ai/DeepSeek-V4-Pro model-prefix: dsv4 runner: gb300-cw diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-low-latency-1p1d-tp4-tp4-mtp.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-low-latency-1p1d-tp4-tp4-mtp.yaml index accb5e56f..fb1e8d402 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-low-latency-1p1d-tp4-tp4-mtp.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-low-latency-1p1d-tp4-tp4-mtp.yaml @@ -11,7 +11,7 @@ dynamo: model: path: "deepseek-v4-pro" - container: "lmsysorg/sglang:nightly-dev-cu13-20260510-2473659e" + container: "lmsysorg/sglang:nightly-dev-cu13-20260518-c67b2870" precision: "mxfp4" sbatch_directives: @@ -31,14 +31,12 @@ backend: prefill_environment: PYTHONUNBUFFERED: "1" - SGLANG_RADIX_DISABLE_REUSE: "1" + SGLANG_RADIX_FORCE_MISS: "1" SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1" SGLANG_DEFAULT_THINKING: "1" SGLANG_DSV4_REASONING_EFFORT: "max" SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1" - SGLANG_OPT_USE_JIT_NORM: "1" - SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1" - SGLANG_OPT_USE_TOPK_V2: "1" + SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1" NCCL_MNNVL_ENABLE: "1" NCCL_CUMEM_ENABLE: "1" SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True" @@ -49,14 +47,12 @@ backend: decode_environment: PYTHONUNBUFFERED: "1" - SGLANG_RADIX_DISABLE_REUSE: "1" + SGLANG_RADIX_FORCE_MISS: "1" SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1" SGLANG_DEFAULT_THINKING: "1" SGLANG_DSV4_REASONING_EFFORT: "max" SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1" - SGLANG_OPT_USE_JIT_NORM: "1" - SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1" - SGLANG_OPT_USE_TOPK_V2: "1" + SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1" NCCL_MNNVL_ENABLE: "1" NCCL_CUMEM_ENABLE: "1" SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True" diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-low-latency-1p6d-dep4-tp4-mtp.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-low-latency-1p6d-dep4-tp4-mtp.yaml index 3e8fca87b..b549cd102 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-low-latency-1p6d-dep4-tp4-mtp.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-low-latency-1p6d-dep4-tp4-mtp.yaml @@ -11,7 +11,7 @@ dynamo: model: path: "deepseek-v4-pro" - container: "lmsysorg/sglang:nightly-dev-cu13-20260510-2473659e" + container: "lmsysorg/sglang:nightly-dev-cu13-20260518-c67b2870" precision: "mxfp4" sbatch_directives: @@ -31,23 +31,16 @@ backend: prefill_environment: PYTHONUNBUFFERED: "1" - SGLANG_RADIX_DISABLE_REUSE: "1" + SGLANG_RADIX_FORCE_MISS: "1" SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1" SGLANG_DEFAULT_THINKING: "1" SGLANG_DSV4_REASONING_EFFORT: "max" SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1" - SGLANG_OPT_USE_JIT_NORM: "1" - SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1" - SGLANG_OPT_USE_TOPK_V2: "1" - SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1" - SGLANG_OPT_USE_CUSTOM_ALL_REDUCE_V2: "1" - SGLANG_OPT_USE_FAST_MASK_EP: "1" + SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1" - SGLANG_OPT_FIX_HASH_MEGA_MOE: "1" SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "9216" SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1" - SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1" SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0" NCCL_MNNVL_ENABLE: "1" @@ -60,14 +53,11 @@ backend: decode_environment: PYTHONUNBUFFERED: "1" - SGLANG_RADIX_DISABLE_REUSE: "1" + SGLANG_RADIX_FORCE_MISS: "1" SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1" SGLANG_DEFAULT_THINKING: "1" SGLANG_DSV4_REASONING_EFFORT: "max" SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1" - SGLANG_OPT_USE_JIT_NORM: "1" - SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1" - SGLANG_OPT_USE_TOPK_V2: "1" NCCL_MNNVL_ENABLE: "1" NCCL_CUMEM_ENABLE: "1" SGLANG_MOONCAKE_CUSTOM_MEM_POOL: "True" diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep16-mtp.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep16-mtp.yaml index 6745aa37e..261d294b1 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep16-mtp.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep16-mtp.yaml @@ -11,7 +11,7 @@ dynamo: model: path: "deepseek-v4-pro" - container: "lmsysorg/sglang:nightly-dev-cu13-20260510-2473659e" + container: "lmsysorg/sglang:nightly-dev-cu13-20260518-c67b2870" precision: "mxfp4" sbatch_directives: @@ -33,23 +33,16 @@ backend: prefill_environment: PYTHONUNBUFFERED: "1" - SGLANG_RADIX_DISABLE_REUSE: "1" + SGLANG_RADIX_FORCE_MISS: "1" SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1" SGLANG_DEFAULT_THINKING: "1" SGLANG_DSV4_REASONING_EFFORT: "max" SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1" - SGLANG_OPT_USE_JIT_NORM: "1" - SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1" - SGLANG_OPT_USE_TOPK_V2: "1" - SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1" - SGLANG_OPT_USE_CUSTOM_ALL_REDUCE_V2: "1" - SGLANG_OPT_USE_FAST_MASK_EP: "1" + SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1" - SGLANG_OPT_FIX_HASH_MEGA_MOE: "1" SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "9216" SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1" - SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1" SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0" NCCL_MNNVL_ENABLE: "1" @@ -62,22 +55,16 @@ backend: decode_environment: PYTHONUNBUFFERED: "1" - SGLANG_RADIX_DISABLE_REUSE: "1" + SGLANG_RADIX_FORCE_MISS: "1" SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1" SGLANG_DEFAULT_THINKING: "1" SGLANG_DSV4_REASONING_EFFORT: "max" SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1" - SGLANG_OPT_USE_JIT_NORM: "1" - SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1" - SGLANG_OPT_USE_TOPK_V2: "1" - SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1" - SGLANG_OPT_USE_FAST_MASK_EP: "1" + SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1" - SGLANG_OPT_FIX_HASH_MEGA_MOE: "1" SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "2048" SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1" - SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1" SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0" NCCL_MNNVL_ENABLE: "1" diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep8-mtp.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep8-mtp.yaml index 842fbb556..54ffacc55 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep8-mtp.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep8-mtp.yaml @@ -11,7 +11,7 @@ dynamo: model: path: "deepseek-v4-pro" - container: "lmsysorg/sglang:nightly-dev-cu13-20260510-2473659e" + container: "lmsysorg/sglang:nightly-dev-cu13-20260518-c67b2870" precision: "mxfp4" sbatch_directives: @@ -33,23 +33,16 @@ backend: prefill_environment: PYTHONUNBUFFERED: "1" - SGLANG_RADIX_DISABLE_REUSE: "1" + SGLANG_RADIX_FORCE_MISS: "1" SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1" SGLANG_DEFAULT_THINKING: "1" SGLANG_DSV4_REASONING_EFFORT: "max" SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1" - SGLANG_OPT_USE_JIT_NORM: "1" - SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1" - SGLANG_OPT_USE_TOPK_V2: "1" - SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1" - SGLANG_OPT_USE_CUSTOM_ALL_REDUCE_V2: "1" - SGLANG_OPT_USE_FAST_MASK_EP: "1" + SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1" - SGLANG_OPT_FIX_HASH_MEGA_MOE: "1" SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "9216" SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1" - SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1" SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0" NCCL_MNNVL_ENABLE: "1" @@ -62,22 +55,16 @@ backend: decode_environment: PYTHONUNBUFFERED: "1" - SGLANG_RADIX_DISABLE_REUSE: "1" + SGLANG_RADIX_FORCE_MISS: "1" SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1" SGLANG_DEFAULT_THINKING: "1" SGLANG_DSV4_REASONING_EFFORT: "max" SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1" - SGLANG_OPT_USE_JIT_NORM: "1" - SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1" - SGLANG_OPT_USE_TOPK_V2: "1" - SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1" - SGLANG_OPT_USE_FAST_MASK_EP: "1" + SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1" - SGLANG_OPT_FIX_HASH_MEGA_MOE: "1" SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "2048" SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1" - SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1" SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0" NCCL_MNNVL_ENABLE: "1" diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-2p1d-dep4-dep8-mtp.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-2p1d-dep4-dep8-mtp.yaml index 49b0d31c3..0c53794a4 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-2p1d-dep4-dep8-mtp.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-2p1d-dep4-dep8-mtp.yaml @@ -11,7 +11,7 @@ dynamo: model: path: "deepseek-v4-pro" - container: "lmsysorg/sglang:nightly-dev-cu13-20260510-2473659e" + container: "lmsysorg/sglang:nightly-dev-cu13-20260518-c67b2870" precision: "mxfp4" sbatch_directives: @@ -33,23 +33,16 @@ backend: prefill_environment: PYTHONUNBUFFERED: "1" - SGLANG_RADIX_DISABLE_REUSE: "1" + SGLANG_RADIX_FORCE_MISS: "1" SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1" SGLANG_DEFAULT_THINKING: "1" SGLANG_DSV4_REASONING_EFFORT: "max" SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1" - SGLANG_OPT_USE_JIT_NORM: "1" - SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1" - SGLANG_OPT_USE_TOPK_V2: "1" - SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1" - SGLANG_OPT_USE_CUSTOM_ALL_REDUCE_V2: "1" - SGLANG_OPT_USE_FAST_MASK_EP: "1" + SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1" - SGLANG_OPT_FIX_HASH_MEGA_MOE: "1" SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "9216" SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1" - SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1" SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0" NCCL_MNNVL_ENABLE: "1" @@ -62,22 +55,16 @@ backend: decode_environment: PYTHONUNBUFFERED: "1" - SGLANG_RADIX_DISABLE_REUSE: "1" + SGLANG_RADIX_FORCE_MISS: "1" SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1" SGLANG_DEFAULT_THINKING: "1" SGLANG_DSV4_REASONING_EFFORT: "max" SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1" - SGLANG_OPT_USE_JIT_NORM: "1" - SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1" - SGLANG_OPT_USE_TOPK_V2: "1" - SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1" - SGLANG_OPT_USE_FAST_MASK_EP: "1" + SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1" - SGLANG_OPT_FIX_HASH_MEGA_MOE: "1" SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "4096" SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1" - SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1" SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0" NCCL_MNNVL_ENABLE: "1" diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-4p1d-dep4-dep8-mtp.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-4p1d-dep4-dep8-mtp.yaml index d907c369e..3ff382647 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-4p1d-dep4-dep8-mtp.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-4p1d-dep4-dep8-mtp.yaml @@ -11,7 +11,7 @@ dynamo: model: path: "deepseek-v4-pro" - container: "lmsysorg/sglang:nightly-dev-cu13-20260510-2473659e" + container: "lmsysorg/sglang:nightly-dev-cu13-20260518-c67b2870" precision: "mxfp4" sbatch_directives: @@ -33,23 +33,16 @@ backend: prefill_environment: PYTHONUNBUFFERED: "1" - SGLANG_RADIX_DISABLE_REUSE: "1" + SGLANG_RADIX_FORCE_MISS: "1" SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1" SGLANG_DEFAULT_THINKING: "1" SGLANG_DSV4_REASONING_EFFORT: "max" SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1" - SGLANG_OPT_USE_JIT_NORM: "1" - SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1" - SGLANG_OPT_USE_TOPK_V2: "1" - SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1" - SGLANG_OPT_USE_CUSTOM_ALL_REDUCE_V2: "1" - SGLANG_OPT_USE_FAST_MASK_EP: "1" + SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1" - SGLANG_OPT_FIX_HASH_MEGA_MOE: "1" SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "9216" SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1" - SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1" SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0" NCCL_MNNVL_ENABLE: "1" @@ -62,22 +55,16 @@ backend: decode_environment: PYTHONUNBUFFERED: "1" - SGLANG_RADIX_DISABLE_REUSE: "1" + SGLANG_RADIX_FORCE_MISS: "1" SGLANG_JIT_DEEPGEMM_FAST_WARMUP: "1" SGLANG_DEFAULT_THINKING: "1" SGLANG_DSV4_REASONING_EFFORT: "max" SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT: "1" - SGLANG_OPT_USE_JIT_NORM: "1" - SGLANG_OPT_USE_JIT_INDEXER_METADATA: "1" - SGLANG_OPT_USE_TOPK_V2: "1" - SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN: "1" - SGLANG_OPT_USE_FAST_MASK_EP: "1" + SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE: "1" - SGLANG_OPT_FIX_HASH_MEGA_MOE: "1" SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK: "4096" SGLANG_OPT_FIX_MEGA_MOE_MEMORY: "1" - SGLANG_OPT_FIX_NEXTN_MEGA_MOE: "1" SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK: "0" NCCL_MNNVL_ENABLE: "1" diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 9c4910b13..73217d355 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -2934,3 +2934,9 @@ description: - "Update SGLang ROCm image from v0.5.11/v0.5.10rc0 to v0.5.12-rocm720-mi35x-20260517" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1440 + +- config-keys: + - dsv4-fp4-gb300-dynamo-sglang-mtp + description: + - "Update SGLang image to nightly-20260518 and remove obsolete env vars for MTP configs" + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1478 From 77727b22ea671c4d728ddcd8bfa5ea377b31918d Mon Sep 17 00:00:00 2001 From: yhyang201 Date: Tue, 19 May 2026 21:23:41 +0800 Subject: [PATCH 2/2] Update dsv4 GB300 MTP SGLang image to nightly-20260519 --- .github/configs/nvidia-master.yaml | 2 +- .../deepseek-v4/8k1k/disagg-low-latency-1p1d-tp4-tp4-mtp.yaml | 2 +- .../deepseek-v4/8k1k/disagg-low-latency-1p6d-dep4-tp4-mtp.yaml | 2 +- .../deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep16-mtp.yaml | 2 +- .../deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep8-mtp.yaml | 2 +- .../deepseek-v4/8k1k/disagg-mid-curve-2p1d-dep4-dep8-mtp.yaml | 2 +- .../deepseek-v4/8k1k/disagg-mid-curve-4p1d-dep4-dep8-mtp.yaml | 2 +- perf-changelog.yaml | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml index ccc39d2d9..11175f0f5 100644 --- a/.github/configs/nvidia-master.yaml +++ b/.github/configs/nvidia-master.yaml @@ -9034,7 +9034,7 @@ glm5-fp8-b200-dynamo-sglang: # MTP variant of dsv4-fp4-gb300-dynamo-sglang. dsv4-fp4-gb300-dynamo-sglang-mtp: - image: lmsysorg/sglang:nightly-dev-cu13-20260518-c67b2870 + image: lmsysorg/sglang:nightly-dev-cu13-20260519-dbac4647 model: deepseek-ai/DeepSeek-V4-Pro model-prefix: dsv4 runner: gb300-cw diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-low-latency-1p1d-tp4-tp4-mtp.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-low-latency-1p1d-tp4-tp4-mtp.yaml index fb1e8d402..8e4b47b6d 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-low-latency-1p1d-tp4-tp4-mtp.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-low-latency-1p1d-tp4-tp4-mtp.yaml @@ -11,7 +11,7 @@ dynamo: model: path: "deepseek-v4-pro" - container: "lmsysorg/sglang:nightly-dev-cu13-20260518-c67b2870" + container: "lmsysorg/sglang:nightly-dev-cu13-20260519-dbac4647" precision: "mxfp4" sbatch_directives: diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-low-latency-1p6d-dep4-tp4-mtp.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-low-latency-1p6d-dep4-tp4-mtp.yaml index b549cd102..09619bedc 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-low-latency-1p6d-dep4-tp4-mtp.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-low-latency-1p6d-dep4-tp4-mtp.yaml @@ -11,7 +11,7 @@ dynamo: model: path: "deepseek-v4-pro" - container: "lmsysorg/sglang:nightly-dev-cu13-20260518-c67b2870" + container: "lmsysorg/sglang:nightly-dev-cu13-20260519-dbac4647" precision: "mxfp4" sbatch_directives: diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep16-mtp.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep16-mtp.yaml index 261d294b1..9357c0017 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep16-mtp.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep16-mtp.yaml @@ -11,7 +11,7 @@ dynamo: model: path: "deepseek-v4-pro" - container: "lmsysorg/sglang:nightly-dev-cu13-20260518-c67b2870" + container: "lmsysorg/sglang:nightly-dev-cu13-20260519-dbac4647" precision: "mxfp4" sbatch_directives: diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep8-mtp.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep8-mtp.yaml index 54ffacc55..5a2c99669 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep8-mtp.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-1p1d-dep4-dep8-mtp.yaml @@ -11,7 +11,7 @@ dynamo: model: path: "deepseek-v4-pro" - container: "lmsysorg/sglang:nightly-dev-cu13-20260518-c67b2870" + container: "lmsysorg/sglang:nightly-dev-cu13-20260519-dbac4647" precision: "mxfp4" sbatch_directives: diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-2p1d-dep4-dep8-mtp.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-2p1d-dep4-dep8-mtp.yaml index 0c53794a4..7ada85405 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-2p1d-dep4-dep8-mtp.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-2p1d-dep4-dep8-mtp.yaml @@ -11,7 +11,7 @@ dynamo: model: path: "deepseek-v4-pro" - container: "lmsysorg/sglang:nightly-dev-cu13-20260518-c67b2870" + container: "lmsysorg/sglang:nightly-dev-cu13-20260519-dbac4647" precision: "mxfp4" sbatch_directives: diff --git a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-4p1d-dep4-dep8-mtp.yaml b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-4p1d-dep4-dep8-mtp.yaml index 3ff382647..833b44f33 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-4p1d-dep4-dep8-mtp.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/sglang/deepseek-v4/8k1k/disagg-mid-curve-4p1d-dep4-dep8-mtp.yaml @@ -11,7 +11,7 @@ dynamo: model: path: "deepseek-v4-pro" - container: "lmsysorg/sglang:nightly-dev-cu13-20260518-c67b2870" + container: "lmsysorg/sglang:nightly-dev-cu13-20260519-dbac4647" precision: "mxfp4" sbatch_directives: diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 73217d355..df762b507 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -2938,5 +2938,5 @@ - config-keys: - dsv4-fp4-gb300-dynamo-sglang-mtp description: - - "Update SGLang image to nightly-20260518 and remove obsolete env vars for MTP configs" + - "Update SGLang image to nightly-20260519 and remove obsolete env vars for MTP configs" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1478