sdevare-nv · sdevare-nv · May 6, 2026 · May 7, 2026 · May 7, 2026 · May 7, 2026
diff --git a/evaluation/benchmarks/swe_bench/scripts/run_infer.sh b/evaluation/benchmarks/swe_bench/scripts/run_infer.sh
@@ -0,0 +1,118 @@
+#!/usr/bin/env bash
+# Bench entry script — invoked by gym's OpenCodeHarnessProcessor.get_run_command().
+#
+# Args (positional, must match the order in app.py's get_run_command):
+#   $1  COMMIT_HASH        opencode commit (informational; checkout is done at setup)
+#   $2  AGENT              agent class name (informational)
+#   $3  MAX_ITER           max agent turns
+#   $4  DATASET            dataset name (informational; gym already dispatched)
+#   $5  SPLIT              dataset split (informational)
+#   $6  EVAL_OUTPUT_DIR    where to write trajectories (relative to opencode dir)
+#   $7  SELECTED_ID        instance_id to run
+#   $8  INSTANCE_DICT_PATH /root/dataset/data.jsonl (single-line JSONL)
+#   $9  CONFIG_FILE        opencode model config JSON (written by gym)
+#   $10 WORKSPACE_ROOT     resolved repo path inside the SIF (gym side decided)
+#   $11 USER_MESSAGE_PATH  pre-rendered user prompt file (workspace baked in)
+#   $12 SYSTEM_PROMPT_PATH optional system-prompt override
+#
+# Environment (set by gym):
+#   NEMO_GYM_MODEL_SERVER_NAME      proxy name on the gym head server
+#   NEMO_GYM_MODEL_SERVER_BASE_URL  base http://host:port for the model server
+#   NEMO_GYM_METRICS_FPATH          path to the metrics JSON to update
+#   NEMO_GYM_CONFIG_DICT            (informational) the gym YAML config blob
+#   COMMAND_EXEC_TIMEOUT            per-bash-command timeout in seconds
+#   DIVERSIFY_TOOL_NAMES            optional: rename tools for RL diversity
+#   CAMEL_CASE_TOOL_NAMES           optional: camelCase tool names
+
+set -eo pipefail
+
+COMMIT_HASH="${1:-}"
+AGENT="${2:-OpenCodeAgent}"
+MAX_ITER="${3:-100}"
+DATASET="${4:-}"
+SPLIT="${5:-test}"
+EVAL_OUTPUT_DIR="${6:-evaluation/oh}"
+SELECTED_ID="${7:-}"
+INSTANCE_DICT_PATH="${8:-/root/dataset/data.jsonl}"
+CONFIG_FILE="${9:-/tmp/oc_config.json}"
+WORKSPACE_ROOT="${10:-}"
+USER_MESSAGE_PATH="${11:-}"
+SYSTEM_PROMPT_PATH="${12:-}"
+
+if [ -z "$SELECTED_ID" ]; then
+    echo "ERROR: SELECTED_ID (\$7) is required."
+    exit 64
+fi
+if [ -z "$WORKSPACE_ROOT" ]; then
+    echo "ERROR: WORKSPACE_ROOT (\$10) is required — gym side resolves the dataset-aware repo path."
+    exit 65
+fi
+if [ -z "$USER_MESSAGE_PATH" ]; then
+    echo "ERROR: USER_MESSAGE_PATH (\$11) is required — gym side renders the user prompt."
+    exit 66
+fi
+if [ -z "${NEMO_GYM_MODEL_SERVER_NAME:-}" ]; then
+    echo "ERROR: NEMO_GYM_MODEL_SERVER_NAME not set in env."
+    exit 67
+fi
+if [ -z "${NEMO_GYM_MODEL_SERVER_BASE_URL:-}" ]; then
+    echo "ERROR: NEMO_GYM_MODEL_SERVER_BASE_URL not set in env."
+    exit 68
+fi
+
+# Resolve the opencode root directory. The script lives at
+# evaluation/benchmarks/swe_bench/scripts/run_infer.sh — go up four levels.
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+OPENCODE_DIR="$(cd "$SCRIPT_DIR/../../../.." && pwd)"
+BENCH_CLI="$OPENCODE_DIR/packages/opencode/src/bench/cli.ts"
+
+if [ ! -f "$BENCH_CLI" ]; then
+    echo "ERROR: bench cli.ts not found at $BENCH_CLI"
+    exit 69
+fi
+if ! command -v bun >/dev/null 2>&1; then
+    echo "ERROR: bun not on PATH (expected /opencode_setup/bun/bin/bun)"
+    exit 70
+fi
+
+# Make EVAL_OUTPUT_DIR absolute (relative to opencode dir).
+case "$EVAL_OUTPUT_DIR" in
+    /*) ABS_OUTPUT_DIR="$EVAL_OUTPUT_DIR" ;;
+    *)  ABS_OUTPUT_DIR="$OPENCODE_DIR/$EVAL_OUTPUT_DIR" ;;
+esac
+mkdir -p "$ABS_OUTPUT_DIR"
+
+echo "OPENCODE_DIR: $OPENCODE_DIR"
+echo "BENCH_CLI: $BENCH_CLI"
+echo "AGENT: $AGENT  COMMIT: $COMMIT_HASH  MAX_ITER: $MAX_ITER"
+echo "DATASET: $DATASET  SPLIT: $SPLIT  SELECTED_ID: $SELECTED_ID"
+echo "EVAL_OUTPUT_DIR: $ABS_OUTPUT_DIR"
+echo "INSTANCE_DICT_PATH: $INSTANCE_DICT_PATH"
+echo "CONFIG_FILE: $CONFIG_FILE"
+echo "WORKSPACE_ROOT: $WORKSPACE_ROOT"
+echo "USER_MESSAGE_PATH: $USER_MESSAGE_PATH"
+echo "SYSTEM_PROMPT_PATH: $SYSTEM_PROMPT_PATH"
+echo "MODEL_SERVER: $NEMO_GYM_MODEL_SERVER_NAME @ $NEMO_GYM_MODEL_SERVER_BASE_URL"
+
+cmd=(
+    bun "$BENCH_CLI"
+    --instance-dict-path "$INSTANCE_DICT_PATH"
+    --output-dir "$ABS_OUTPUT_DIR"
+    --config "$CONFIG_FILE"
+    --max-turns "$MAX_ITER"
+    --agent-cls "$AGENT"
+    --dataset "$DATASET"
+    --split "$SPLIT"
+    --selected-id "$SELECTED_ID"
+    --workspace-root "$WORKSPACE_ROOT"
+    --user-message-file "$USER_MESSAGE_PATH"
+)
+if [ -n "$SYSTEM_PROMPT_PATH" ]; then
+    cmd+=(--system-prompt "$SYSTEM_PROMPT_PATH")
+fi
+if [ "${ENABLE_SUBAGENTS:-0}" = "1" ] || [ "${ENABLE_SUBAGENTS:-}" = "true" ]; then
+    cmd+=(--enable-subagents)
+fi
+
+echo "Executing: ${cmd[*]}"
+exec "${cmd[@]}"
diff --git a/packages/opencode/src/bench/bootstrap_repo.ts b/packages/opencode/src/bench/bootstrap_repo.ts
@@ -0,0 +1,78 @@
+/**
+ * Bootstrap a git repository inside the workspace when the SIF ships a flat
+ * source tree without a `.git` directory.
+ *
+ * Some dataset SIFs (notably `swe-bench-ext`, and certain SWE-rebench variants)
+ * copy the repo contents into `/workspace/repo` (or the dataset-specific path)
+ * without preserving git history. Without `.git`, `runDeepReset` is a silent
+ * no-op (its `git rev-parse` fails under the outer `|| true`) and
+ * `captureGitDiff` returns "" — every rollout is recorded as `patch=0 bytes`
+ * regardless of what the agent did. Port of nv-OpenHands'
+ * `evaluation/benchmarks/swe_bench/run_infer.py:1142-1156`.
+ *
+ * If `.git` already exists, this is a no-op. Otherwise a pristine baseline
+ * commit is created and tagged `opencode_bench_baseline`. Callers should skip
+ * `runDeepReset` when this returns `{ freshInit: true }` — the dataset's
+ * upstream `base_commit` SHA does not exist in the fresh repo, so deep_reset
+ * would just fail rev-parse and noisily fall through to its nuclear pass.
+ */
+
+import { spawn } from "node:child_process"
+import { existsSync } from "node:fs"
+import path from "node:path"
+
+function detectShell(): string | null {
+  for (const p of ["/bin/bash", "/usr/bin/bash", "/bin/sh", "/usr/bin/sh"]) {
+    if (existsSync(p)) return p
+  }
+  return null
+}
+
+function shellQuote(s: string): string {
+  return `'${s.replace(/'/g, `'\\''`)}'`
+}
+
+function buildBootstrapCmd(workspaceRoot: string): string {
+  const q = shellQuote(workspaceRoot)
+  return (
+    `cd ${q} && ` +
+    `echo "[bootstrap_repo] initializing git repo at ${workspaceRoot}" && ` +
+    `git config --global --add safe.directory ${q} && ` +
+    `git init -q && ` +
+    `git config user.email 'bench@opencode.local' && ` +
+    `git config user.name 'opencode bench' && ` +
+    `git add -A && ` +
+    `git commit -q --allow-empty -m 'opencode bench baseline' && ` +
+    `git tag -f opencode_bench_baseline HEAD && ` +
+    `echo "[bootstrap_repo] done; HEAD=$(git rev-parse --short HEAD)"`
+  )
+}
+
+export interface BootstrapResult {
+  freshInit: boolean
+}
+
+export async function bootstrapRepoIfMissing(workspaceRoot: string): Promise<BootstrapResult> {
+  if (existsSync(path.join(workspaceRoot, ".git"))) {
+    return { freshInit: false }
+  }
+  const shell = detectShell()
+  if (!shell) {
+    console.warn(`[bench] bootstrap_repo skipped: no shell found at /bin/{bash,sh} or /usr/bin/{bash,sh}`)
+    return { freshInit: false }
+  }
+  const cmd = buildBootstrapCmd(workspaceRoot)
+  console.log(`[bench] bootstrap_repo workspace=${workspaceRoot} shell=${shell}`)
+  const exitCode = await new Promise<number>((resolve) => {
+    const child = spawn(shell, ["-c", cmd], {
+      stdio: ["ignore", "inherit", "inherit"],
+    })
+    child.on("close", (code) => resolve(code ?? 0))
+    child.on("error", (err) => {
+      console.warn(`[bench] bootstrap_repo spawn error: ${err}`)
+      resolve(1)
+    })
+  })
+  console.log(`[bench] bootstrap_repo exit=${exitCode}`)
+  return { freshInit: exitCode === 0 }
+}