diff --git a/slime/rollout/rm_hub/deepscaler.py b/slime/rollout/rm_hub/deepscaler.py index 39d4de383a..428c12cca2 100644 --- a/slime/rollout/rm_hub/deepscaler.py +++ b/slime/rollout/rm_hub/deepscaler.py @@ -2,6 +2,12 @@ def get_deepscaler_rule_based_reward(response, label): + # Guard against a missing response, mirroring the gpqa / f1 reward + # functions in this package. async_rm passes sample.response straight + # through, so a None response would otherwise raise a TypeError here + # instead of scoring 0. + if not response: + return 0 if "" in response: model_solution = response.split("")[-1] elif "###Response" in response: diff --git a/tests/test_rm_deepscaler.py b/tests/test_rm_deepscaler.py index 0883a60622..828b8df37d 100644 --- a/tests/test_rm_deepscaler.py +++ b/tests/test_rm_deepscaler.py @@ -80,6 +80,16 @@ def test_label_with_boxed_marker_is_extracted_too(): assert get_deepscaler_rule_based_reward(r"\boxed{42}", r"\boxed{42}") == 1 +@pytest.mark.unit +def test_missing_response_returns_zero(): + """A None/empty response scores 0 instead of raising. ``async_rm`` + feeds ``sample.response`` straight in, and the sibling gpqa / f1 reward + functions already guard this; without the guard ``None`` raised + ``TypeError: argument of type 'NoneType' is not iterable``.""" + assert get_deepscaler_rule_based_reward(None, "42") == 0 + assert get_deepscaler_rule_based_reward("", "42") == 0 + + @pytest.mark.unit def test_wrong_answer_returns_zero(): """Sanity-check the negative side of the contract."""