Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 27 additions & 6 deletions responses_api_agents/swe_agents/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import ast
import asyncio
import glob
import importlib.util
Expand Down Expand Up @@ -537,6 +538,28 @@ def get_run_command(self) -> ExecuteContainerCommandArgs:


class NVInternalDatasetProcessor(BaseDatasetHarnessProcessor):
@staticmethod
def _parse_selected_test_files_to_run(raw_value: Any) -> list[str]:
parsed_value = raw_value
if isinstance(raw_value, str):
try:
parsed_value = json.loads(raw_value)
except json.JSONDecodeError:
try:
parsed_value = ast.literal_eval(raw_value)
except (SyntaxError, ValueError) as exc:
raise ValueError(
"selected_test_files_to_run must be a JSON array or Python literal list of strings"
) from exc

if isinstance(parsed_value, tuple):
parsed_value = list(parsed_value)

if not isinstance(parsed_value, list) or not all(isinstance(test_file, str) for test_file in parsed_value):
raise ValueError("selected_test_files_to_run must decode to a list of strings")

return parsed_value

def get_run_command(self) -> ExecuteContainerCommandArgs:
instance_dict = json.loads(self.config.problem_info["instance_dict"])
base_dockerfile = instance_dict.get("base_dockerfile", "")
Expand Down Expand Up @@ -572,11 +595,9 @@ def get_run_command(self) -> ExecuteContainerCommandArgs:
repo_cmd = repo_cmd.split("\n")[-1]

# Get test files
test_files_str = instance_dict.get("selected_test_files_to_run", "[]")
if isinstance(test_files_str, str):
test_files = ",".join(eval(test_files_str))
else:
test_files = ",".join(test_files_str)
test_files = self._parse_selected_test_files_to_run(instance_dict.get("selected_test_files_to_run", "[]"))
test_files_arg = ",".join(test_files)
test_files_arg = f" {shlex.quote(test_files_arg)}" if test_files_arg else ""

run_script = instance_dict["run_script.sh"]
parsing_script = instance_dict["parsing_script.py"]
Expand Down Expand Up @@ -608,7 +629,7 @@ def get_run_command(self) -> ExecuteContainerCommandArgs:
{repo_cmd}

# Run tests
bash /root/run_script.sh {test_files} > /root/stdout.log 2> /root/stderr.log || true
bash /root/run_script.sh{test_files_arg} > /root/stdout.log 2> /root/stderr.log || true

# Parse results
python /root/parsing_script.py /root/stdout.log /root/stderr.log /root/output.json
Expand Down
44 changes: 44 additions & 0 deletions responses_api_agents/swe_agents/tests/test_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -530,6 +530,50 @@ def test_get_run_command_list_test_files(self) -> None:
result = processor.get_run_command()
assert "test_x.py,test_y.py" in result.command

def test_get_run_command_python_literal_test_files(self) -> None:
with tempfile.TemporaryDirectory() as tmpdir:
processor = self._make_processor(
tmpdir,
{
"selected_test_files_to_run": "['test_x.py', 'test_y.py']",
},
)
result = processor.get_run_command()
assert "test_x.py,test_y.py" in result.command

def test_get_run_command_quotes_test_files_arg(self) -> None:
with tempfile.TemporaryDirectory() as tmpdir:
processor = self._make_processor(
tmpdir,
{
"selected_test_files_to_run": ["test_x.py", "test;touch /tmp/pwned"],
},
)
result = processor.get_run_command()
assert "bash /root/run_script.sh 'test_x.py,test;touch /tmp/pwned'" in result.command

def test_get_run_command_rejects_executable_test_files_metadata(self) -> None:
with tempfile.TemporaryDirectory() as tmpdir:
processor = self._make_processor(
tmpdir,
{
"selected_test_files_to_run": '__import__("os").system("touch /tmp/pwned")',
},
)
with pytest.raises(ValueError, match="selected_test_files_to_run"):
processor.get_run_command()

def test_get_run_command_rejects_non_string_test_files(self) -> None:
with tempfile.TemporaryDirectory() as tmpdir:
processor = self._make_processor(
tmpdir,
{
"selected_test_files_to_run": '["test_x.py", 1]',
},
)
with pytest.raises(ValueError, match="selected_test_files_to_run"):
processor.get_run_command()

def test_get_run_command_no_repo_cmd(self) -> None:
with tempfile.TemporaryDirectory() as tmpdir:
processor = self._make_processor(
Expand Down