From f207fd26f136a05c9dae9f623120cbe74d123136 Mon Sep 17 00:00:00 2001 From: Minh Vu Date: Sat, 27 Jun 2026 12:32:27 +0200 Subject: [PATCH] Harden SWE agent test file parsing --- responses_api_agents/swe_agents/app.py | 33 +++++++++++--- .../swe_agents/tests/test_app.py | 44 +++++++++++++++++++ 2 files changed, 71 insertions(+), 6 deletions(-) diff --git a/responses_api_agents/swe_agents/app.py b/responses_api_agents/swe_agents/app.py index e248038bf..3df6d683c 100644 --- a/responses_api_agents/swe_agents/app.py +++ b/responses_api_agents/swe_agents/app.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import ast import asyncio import glob import importlib.util @@ -537,6 +538,28 @@ def get_run_command(self) -> ExecuteContainerCommandArgs: class NVInternalDatasetProcessor(BaseDatasetHarnessProcessor): + @staticmethod + def _parse_selected_test_files_to_run(raw_value: Any) -> list[str]: + parsed_value = raw_value + if isinstance(raw_value, str): + try: + parsed_value = json.loads(raw_value) + except json.JSONDecodeError: + try: + parsed_value = ast.literal_eval(raw_value) + except (SyntaxError, ValueError) as exc: + raise ValueError( + "selected_test_files_to_run must be a JSON array or Python literal list of strings" + ) from exc + + if isinstance(parsed_value, tuple): + parsed_value = list(parsed_value) + + if not isinstance(parsed_value, list) or not all(isinstance(test_file, str) for test_file in parsed_value): + raise ValueError("selected_test_files_to_run must decode to a list of strings") + + return parsed_value + def get_run_command(self) -> ExecuteContainerCommandArgs: instance_dict = json.loads(self.config.problem_info["instance_dict"]) base_dockerfile = instance_dict.get("base_dockerfile", "") @@ -572,11 +595,9 @@ def get_run_command(self) -> ExecuteContainerCommandArgs: repo_cmd = repo_cmd.split("\n")[-1] # Get test files - test_files_str = instance_dict.get("selected_test_files_to_run", "[]") - if isinstance(test_files_str, str): - test_files = ",".join(eval(test_files_str)) - else: - test_files = ",".join(test_files_str) + test_files = self._parse_selected_test_files_to_run(instance_dict.get("selected_test_files_to_run", "[]")) + test_files_arg = ",".join(test_files) + test_files_arg = f" {shlex.quote(test_files_arg)}" if test_files_arg else "" run_script = instance_dict["run_script.sh"] parsing_script = instance_dict["parsing_script.py"] @@ -608,7 +629,7 @@ def get_run_command(self) -> ExecuteContainerCommandArgs: {repo_cmd} # Run tests -bash /root/run_script.sh {test_files} > /root/stdout.log 2> /root/stderr.log || true +bash /root/run_script.sh{test_files_arg} > /root/stdout.log 2> /root/stderr.log || true # Parse results python /root/parsing_script.py /root/stdout.log /root/stderr.log /root/output.json diff --git a/responses_api_agents/swe_agents/tests/test_app.py b/responses_api_agents/swe_agents/tests/test_app.py index d8f6bcd04..e8554097e 100644 --- a/responses_api_agents/swe_agents/tests/test_app.py +++ b/responses_api_agents/swe_agents/tests/test_app.py @@ -530,6 +530,50 @@ def test_get_run_command_list_test_files(self) -> None: result = processor.get_run_command() assert "test_x.py,test_y.py" in result.command + def test_get_run_command_python_literal_test_files(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + processor = self._make_processor( + tmpdir, + { + "selected_test_files_to_run": "['test_x.py', 'test_y.py']", + }, + ) + result = processor.get_run_command() + assert "test_x.py,test_y.py" in result.command + + def test_get_run_command_quotes_test_files_arg(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + processor = self._make_processor( + tmpdir, + { + "selected_test_files_to_run": ["test_x.py", "test;touch /tmp/pwned"], + }, + ) + result = processor.get_run_command() + assert "bash /root/run_script.sh 'test_x.py,test;touch /tmp/pwned'" in result.command + + def test_get_run_command_rejects_executable_test_files_metadata(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + processor = self._make_processor( + tmpdir, + { + "selected_test_files_to_run": '__import__("os").system("touch /tmp/pwned")', + }, + ) + with pytest.raises(ValueError, match="selected_test_files_to_run"): + processor.get_run_command() + + def test_get_run_command_rejects_non_string_test_files(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + processor = self._make_processor( + tmpdir, + { + "selected_test_files_to_run": '["test_x.py", 1]', + }, + ) + with pytest.raises(ValueError, match="selected_test_files_to_run"): + processor.get_run_command() + def test_get_run_command_no_repo_cmd(self) -> None: with tempfile.TemporaryDirectory() as tmpdir: processor = self._make_processor(