From 5a65e295914282dd229d64dc6c1be0eca1017f85 Mon Sep 17 00:00:00 2001 From: F0undLinks <2862323246@qq.com> Date: Thu, 18 Jun 2026 09:39:12 +0800 Subject: [PATCH 1/2] Update answer pattern for COT chat prompt debug --- .../configs/datasets/cmmlu/cmmlu_gen_0_shot_cot_chat_prompt.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ais_bench/benchmark/configs/datasets/cmmlu/cmmlu_gen_0_shot_cot_chat_prompt.py b/ais_bench/benchmark/configs/datasets/cmmlu/cmmlu_gen_0_shot_cot_chat_prompt.py index 9ee0996c..c0e24e08 100644 --- a/ais_bench/benchmark/configs/datasets/cmmlu/cmmlu_gen_0_shot_cot_chat_prompt.py +++ b/ais_bench/benchmark/configs/datasets/cmmlu/cmmlu_gen_0_shot_cot_chat_prompt.py @@ -108,7 +108,8 @@ evaluator=dict(type=AccEvaluator), pred_postprocessor=dict( type=match_answer_pattern, - answer_pattern=r'(?i)答案\s*:\s*[\W]*([A-D])[\W]*', + # answer_pattern=r'(?i)答案\s*:\s*[\W]*([A-D])[\W]*', + answer_pattern=r'(?i)答案\s*[::]\s*[\W]*([A-D])[\W]*', ) ) cmmlu_datasets.append( From ff5fc5b6d5bb4977fc2f720f7f07cb149590bdc2 Mon Sep 17 00:00:00 2001 From: F0undLinks <2862323246@qq.com> Date: Thu, 18 Jun 2026 09:47:52 +0800 Subject: [PATCH 2/2] Update ais_bench/benchmark/configs/datasets/cmmlu/cmmlu_gen_0_shot_cot_chat_prompt.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- .../configs/datasets/cmmlu/cmmlu_gen_0_shot_cot_chat_prompt.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ais_bench/benchmark/configs/datasets/cmmlu/cmmlu_gen_0_shot_cot_chat_prompt.py b/ais_bench/benchmark/configs/datasets/cmmlu/cmmlu_gen_0_shot_cot_chat_prompt.py index c0e24e08..aabbe360 100644 --- a/ais_bench/benchmark/configs/datasets/cmmlu/cmmlu_gen_0_shot_cot_chat_prompt.py +++ b/ais_bench/benchmark/configs/datasets/cmmlu/cmmlu_gen_0_shot_cot_chat_prompt.py @@ -108,7 +108,6 @@ evaluator=dict(type=AccEvaluator), pred_postprocessor=dict( type=match_answer_pattern, - # answer_pattern=r'(?i)答案\s*:\s*[\W]*([A-D])[\W]*', answer_pattern=r'(?i)答案\s*[::]\s*[\W]*([A-D])[\W]*', ) )