From c813cb465ef790bfd38ae2f0c26e6285eabf750c Mon Sep 17 00:00:00 2001 From: functionstackx <47992694+functionstackx@users.noreply.github.com> Date: Thu, 21 May 2026 13:41:59 -0400 Subject: [PATCH 1/2] [Klaud Cold] minimaxm2.5-fp8-mi325x: add VLLM_ROCM_SHUFFLE_KV_CACHE_LAYOUT=1 + --attention-backend ROCM_AITER_FA Adopts the AITER recipe pattern (shuffle KV-cache layout + AITER FA attention backend) used by the mi355x sibling. Matches AMD's recommended recipe for MiniMax-M2.x on ROCm vLLM. Co-Authored-By: Claude Opus 4.7 (1M context) --- benchmarks/single_node/minimaxm2.5_fp8_mi325x.sh | 4 +++- perf-changelog.yaml | 6 ++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/benchmarks/single_node/minimaxm2.5_fp8_mi325x.sh b/benchmarks/single_node/minimaxm2.5_fp8_mi325x.sh index 13867ce7e..f63bba614 100755 --- a/benchmarks/single_node/minimaxm2.5_fp8_mi325x.sh +++ b/benchmarks/single_node/minimaxm2.5_fp8_mi325x.sh @@ -24,9 +24,10 @@ if [ -n "$ROCR_VISIBLE_DEVICES" ]; then export HIP_VISIBLE_DEVICES="$ROCR_VISIBLE_DEVICES" fi -# following AMD andy's recipe +# following AMD andy's recipe # https://www.linkedin.com/posts/andyluo77_day-0-support-of-minimax-25-on-amd-gpu-activity-7428151527309025280-hXR8/ export VLLM_ROCM_USE_AITER=1 +export VLLM_ROCM_SHUFFLE_KV_CACHE_LAYOUT=1 SERVER_LOG=/workspace/server.log PORT=${PORT:-8888} @@ -52,6 +53,7 @@ $EP \ --max-model-len $MAX_MODEL_LEN \ --block-size=32 \ --no-enable-prefix-caching \ +--attention-backend "ROCM_AITER_FA" \ --trust-remote-code > $SERVER_LOG 2>&1 & SERVER_PID=$! diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 684d40dcc..1588205de 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -3068,3 +3068,9 @@ description: - "Bump image to rocm/sgl-dev:rocm720-mi35x-8c3b5aa-20260521-DSv4" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1548 + +- config-keys: + - minimaxm2.5-fp8-mi325x-vllm + description: + - "Add VLLM_ROCM_SHUFFLE_KV_CACHE_LAYOUT=1 + --attention-backend ROCM_AITER_FA (match AMD-recommended AITER recipe pattern used on mi355x)" + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/XXX From 0ca9cae3b405c8b97c27772733a18939a0de6924 Mon Sep 17 00:00:00 2001 From: functionstackx <47992694+functionstackx@users.noreply.github.com> Date: Thu, 21 May 2026 13:42:09 -0400 Subject: [PATCH 2/2] chore: fill pr-link for #1549 --- perf-changelog.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 1588205de..2ea351f76 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -3073,4 +3073,4 @@ - minimaxm2.5-fp8-mi325x-vllm description: - "Add VLLM_ROCM_SHUFFLE_KV_CACHE_LAYOUT=1 + --attention-backend ROCM_AITER_FA (match AMD-recommended AITER recipe pattern used on mi355x)" - pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/XXX + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1549