From b010574cf30896480f19f222d5b3fcebaf5e9e99 Mon Sep 17 00:00:00 2001 From: Vijay Sai Date: Mon, 22 Jun 2026 01:51:57 +0530 Subject: [PATCH] fix(rm_hub): guard deepscaler reward against a missing response MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit get_deepscaler_rule_based_reward did 'if "" in response' without checking response first. async_rm passes sample.response straight through, and the sibling reward functions in this package (gpqa, f1) already guard a missing response and return 0 — but deepscaler did not, so a None response raised 'TypeError: argument of type NoneType is not iterable' instead of scoring 0. Return 0 for a falsy response, matching the gpqa/f1 contract. An empty string already returned 0, so only the crash path changes. Adds a regression test for None / empty response. --- slime/rollout/rm_hub/deepscaler.py | 6 ++++++ tests/test_rm_deepscaler.py | 10 ++++++++++ 2 files changed, 16 insertions(+) diff --git a/slime/rollout/rm_hub/deepscaler.py b/slime/rollout/rm_hub/deepscaler.py index 39d4de383a..428c12cca2 100644 --- a/slime/rollout/rm_hub/deepscaler.py +++ b/slime/rollout/rm_hub/deepscaler.py @@ -2,6 +2,12 @@ def get_deepscaler_rule_based_reward(response, label): + # Guard against a missing response, mirroring the gpqa / f1 reward + # functions in this package. async_rm passes sample.response straight + # through, so a None response would otherwise raise a TypeError here + # instead of scoring 0. + if not response: + return 0 if "" in response: model_solution = response.split("")[-1] elif "###Response" in response: diff --git a/tests/test_rm_deepscaler.py b/tests/test_rm_deepscaler.py index 0883a60622..828b8df37d 100644 --- a/tests/test_rm_deepscaler.py +++ b/tests/test_rm_deepscaler.py @@ -80,6 +80,16 @@ def test_label_with_boxed_marker_is_extracted_too(): assert get_deepscaler_rule_based_reward(r"\boxed{42}", r"\boxed{42}") == 1 +@pytest.mark.unit +def test_missing_response_returns_zero(): + """A None/empty response scores 0 instead of raising. ``async_rm`` + feeds ``sample.response`` straight in, and the sibling gpqa / f1 reward + functions already guard this; without the guard ``None`` raised + ``TypeError: argument of type 'NoneType' is not iterable``.""" + assert get_deepscaler_rule_based_reward(None, "42") == 0 + assert get_deepscaler_rule_based_reward("", "42") == 0 + + @pytest.mark.unit def test_wrong_answer_returns_zero(): """Sanity-check the negative side of the contract."""