diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml index 3d1a70d42..553e69e86 100644 --- a/.github/configs/nvidia-master.yaml +++ b/.github/configs/nvidia-master.yaml @@ -2208,7 +2208,7 @@ qwen3.5-fp4-b200-sglang-mtp: - { tp: 2, ep: 1, conc-start: 4, conc-end: 64, spec-decoding: mtp } glm5-fp8-b200-sglang: - image: lmsysorg/sglang:v0.5.12-cu130 + image: lmsysorg/sglang:nightly-dev-cu13-20260523-c112f762 model: zai-org/GLM-5-FP8 model-prefix: glm5 runner: b200 @@ -2227,7 +2227,7 @@ glm5-fp8-b200-sglang: - { tp: 8, ep: 1, conc-start: 4, conc-end: 256 } glm5-fp8-b200-sglang-mtp: - image: lmsysorg/sglang:v0.5.12-cu130 + image: lmsysorg/sglang:nightly-dev-cu13-20260523-c112f762 model: zai-org/GLM-5-FP8 model-prefix: glm5 runner: b200 @@ -2307,7 +2307,7 @@ glm5-fp8-b300-sglang-mtp: - { tp: 8, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp } glm5-fp4-b200-sglang: - image: lmsysorg/sglang:v0.5.12-cu130 + image: lmsysorg/sglang:nightly-dev-cu13-20260523-c112f762 model: nvidia/GLM-5-NVFP4 model-prefix: glm5 runner: b200 @@ -2328,7 +2328,7 @@ glm5-fp4-b200-sglang: - { tp: 4, ep: 1, conc-start: 4, conc-end: 256 } glm5-fp4-b200-sglang-mtp: - image: lmsysorg/sglang:v0.5.12-cu130 + image: lmsysorg/sglang:nightly-dev-cu13-20260523-c112f762 model: nvidia/GLM-5-NVFP4 model-prefix: glm5 runner: b200 diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 208a2da6f..4798ae026 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -3129,3 +3129,15 @@ description: - "Add --use-chat-template to run_benchmark_serving so prompts are formatted with the Qwen chat template (matching the other Qwen MTP recipes)" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1555 + +- config-keys: + - glm5-fp4-b200-sglang + description: + - "Update SGLang image from v0.5.11-cu130 to nightly-dev-cu13-20260523-c112f762" + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1561 + +- config-keys: + - glm5-fp4-b200-sglang-mtp + description: + - "Update SGLang image from v0.5.11-cu130 to nightly-dev-cu13-20260523-c112f762" + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1561 \ No newline at end of file