From 6f8e3d3311e1ab8a8a7db8ba1bcc86715dc072bb Mon Sep 17 00:00:00 2001
From: Arti Jain <artij@nvidia.com>
Date: Wed, 24 Jun 2026 00:04:35 +0000
Subject: [PATCH 1/7] feat(sandbox): add Apptainer sandbox provider

Add an ApptainerProvider implementing the SandboxProvider protocol via the
local apptainer CLI: persistent instance lifecycle, exec with user/fakeroot
mapping, bind-mount file transfer, status, readiness probe, and teardown.
Register it under the name "apptainer" and add unit tests plus a README.

Signed-off-by: Arti Jain <artij@nvidia.com>
---
 .../sandbox/providers/apptainer/README.md     | 279 ++++++++
 .../sandbox/providers/apptainer/__init__.py   |  34 +
 .../sandbox/providers/apptainer/provider.py   | 559 +++++++++++++++
 nemo_gym/sandbox/providers/registry.py        |   7 +
 tests/unit_tests/test_apptainer_provider.py   | 662 ++++++++++++++++++
 5 files changed, 1541 insertions(+)
 create mode 100644 nemo_gym/sandbox/providers/apptainer/README.md
 create mode 100644 nemo_gym/sandbox/providers/apptainer/__init__.py
 create mode 100644 nemo_gym/sandbox/providers/apptainer/provider.py
 create mode 100644 tests/unit_tests/test_apptainer_provider.py

diff --git a/nemo_gym/sandbox/providers/apptainer/README.md b/nemo_gym/sandbox/providers/apptainer/README.md
new file mode 100644
index 0000000000..21f3279465
--- /dev/null
+++ b/nemo_gym/sandbox/providers/apptainer/README.md
@@ -0,0 +1,279 @@
+# Apptainer Sandbox Provider
+
+A [NeMo Gym](../../../../README.md) sandbox provider backed by the local
+[Apptainer](https://apptainer.org/) (formerly Singularity) CLI. It runs each sandbox as
+a persistent Apptainer *instance* on the host and shells out to the `apptainer` binary —
+no daemon, no network service, no Kubernetes.
+
+Use it when you want lightweight, container-based isolation on a single machine or HPC
+node where Apptainer is already the supported container runtime (common on clusters
+where Docker is unavailable).
+
+> **Provider name:** `apptainer` (select it via the sandbox config; see below).
+
+## Requirements
+
+- The **`apptainer` binary** must be installed and on `PATH`. The provider does **not**
+  auto-install it; constructing the provider raises `RuntimeError` if it is missing.
+  See the [Apptainer install guide](https://apptainer.org/docs/admin/main/installation.html).
+- A container **image**, supplied per sandbox as either:
+  - a local `.sif` file path, or
+  - a remote URI that Apptainer can pull: `docker://`, `oras://`, or `library://`.
+- A couple of features — **running commands as a different user** and **enforcing
+  CPU/memory limits** — only work if your machine's administrator has enabled Apptainer's
+  **`--fakeroot`** support (Linux user namespaces, and cgroups v2 delegation for limits).
+  On many HPC clusters this is on by default; where it isn't, those features are quietly
+  skipped. Everything else works without it. See [Limitations](#limitations).
+
+## Quick start
+
+The provider is used through NeMo Gym's provider-neutral sandbox API
+(`nemo_gym.sandbox.api`). You pick the provider with a single-key mapping and describe
+the sandbox with a `SandboxSpec`.
+
+### Synchronous
+
+```python
+from nemo_gym.sandbox.api import Sandbox
+from nemo_gym.sandbox.providers import SandboxSpec
+
+spec = SandboxSpec(
+    image="docker://ubuntu:22.04",   # or "/path/to/image.sif"
+    workdir="/sandbox",
+    env={"GREETING": "hello"},
+    files={"/sandbox/input.txt": "some seed content"},
+    resources={"cpu": 2, "memory_mib": 4096},
+)
+
+with Sandbox({"apptainer": {}}, spec) as sandbox:
+    sandbox.start()
+
+    result = sandbox.exec("echo $GREETING && cat /sandbox/input.txt")
+    print(result.return_code, result.stdout)
+
+    sandbox.upload("./local_script.sh", "/sandbox/script.sh")
+    sandbox.download("/sandbox/result.txt", "./result.txt")
+# leaving the `with` block stops the instance and cleans up
+```
+
+### Asynchronous
+
+```python
+from nemo_gym.sandbox.api import AsyncSandbox
+from nemo_gym.sandbox.providers import SandboxSpec
+
+async def run():
+    spec = SandboxSpec(image="docker://ubuntu:22.04", workdir="/sandbox")
+    async with AsyncSandbox({"apptainer": {}}, spec) as sandbox:
+        await sandbox.start()
+        result = await sandbox.exec("uname -a")
+        print(result.stdout)
+```
+
+> **Lifecycle contract:** download anything you want to keep *before* the sandbox is
+> stopped. Stopping is teardown — it stops the (ephemeral) instance and deletes the host
+> staging directory, leaving nothing behind.
+
+## Selecting and configuring the provider
+
+The provider config is a single-key mapping: `{"apptainer": {<kwargs>}}`. The kwargs are
+grouped into three optional sections, each of which accepts a plain mapping (e.g. from
+Hydra YAML) or the corresponding dataclass:
+
+```yaml
+# Provider config (the value passed as the sandbox provider)
+apptainer:
+  exec:
+    fakeroot_for_root: true
+    default_binds: ["/tmp"]
+    extra_exec_args: ["--writable-tmpfs"]
+    default_timeout_s: 180
+    concurrency: 32
+  create:
+    mount_point: /sandbox
+    start_timeout_s: 600
+    extra_start_args: []
+  probe:
+    command: printf apptainer-sandbox-ready
+    expected_stdout: apptainer-sandbox-ready
+    deadline_s: 120
+```
+
+### `create` — `ApptainerCreateConfig`
+
+Settings for starting the instance (`apptainer instance start`).
+
+| Field | Default | Meaning |
+|---|---|---|
+| `mount_point` | `/sandbox` | Absolute path inside the container where the host staging dir is bind-mounted. Powers the file-transfer fast path. |
+| `start_timeout_s` | `600` | Max seconds to wait for `instance start` (`None` = no timeout). |
+| `extra_start_args` | `[]` | Extra raw flags appended to `instance start`. |
+
+### `exec` — `ApptainerExecConfig`
+
+Settings for running commands (`apptainer exec`) and global provider behavior.
+
+| Field | Default | Meaning |
+|---|---|---|
+| `default_timeout_s` | `180` | Default per-command timeout when the caller doesn't pass one (`None` = no timeout). |
+| `fakeroot_for_root` | `true` | When running as root, add `--fakeroot` (map the host user to root inside the container). |
+| `default_binds` | `[]` | Extra `--bind host:container` mounts added at instance start. |
+| `extra_exec_args` | `[]` | Extra raw flags appended to every `apptainer exec` (e.g. `--no-home`, `--writable-tmpfs`, `--contain`). |
+| `concurrency` | `32` | Upper bound on concurrent `apptainer` subprocesses (shared semaphore). |
+
+### `probe` — `ApptainerProbeConfig`
+
+Readiness-probe knobs. After starting an instance, `create` runs `command` and checks
+its output before returning the sandbox, so callers never receive a sandbox that can't
+actually run commands. Set `command: null` to skip the probe.
+
+| Field | Default | Meaning |
+|---|---|---|
+| `command` | `printf apptainer-sandbox-ready` | Probe command (`None` disables the probe). |
+| `expected_stdout` | `apptainer-sandbox-ready` | Output the probe must produce to count as ready. |
+| `timeout_s` | `30` | Per-probe-attempt timeout. |
+| `deadline_s` | `None` | Overall time budget for the probe loop. |
+| `stable_count` | `1` | Consecutive successes required before the sandbox is considered ready. |
+| `stable_delay_s` | `0.0` | Delay between probe attempts. |
+
+### Relevant `SandboxSpec` fields
+
+The spec is provider-neutral; the Apptainer provider uses these fields:
+
+| Field | Used for |
+|---|---|
+| `image` | Image source — local `.sif` path or remote `docker://` / `oras://` / `library://` URI. Required. |
+| `env` | Each entry becomes `--env KEY=VALUE` at instance start. |
+| `workdir` | Default working directory for `exec` (applied as `--pwd`). |
+| `files` | Seed files written into the sandbox at `start()` (handled by the sandbox API via `upload`). |
+| `resources` | Mapped to cgroup flags (see below). |
+| `ttl_s` | **Not supported** — ignored with a warning. Tear down via `stop()`/`close()` instead. |
+
+## How it works
+
+### Lifecycle: one persistent instance per sandbox
+
+| Step | Apptainer command |
+|---|---|
+| Create | `apptainer instance start --bind <staging>:<mount_point> [...] <image> <name>` |
+| Exec | `apptainer exec [flags] instance://<name> sh -c <command>` |
+| Status | `apptainer instance list --json` |
+| Close | `apptainer instance stop <name>` |
+
+Instances are named `nemo-gym-<uuid>` and persist across `exec` calls, so state written
+by one command is visible to the next — agents rely on this.
+
+### File transfer: a shared bind-mounted directory
+
+On create, the provider makes a temporary host directory and bind-mounts it into the
+container at `mount_point` (default `/sandbox`). This shared folder is the transfer
+channel:
+
+- **Fast path** — if the target/source path is *inside* `mount_point`, the provider
+  reads/writes the host side of the shared folder directly (no container call).
+- **Fallback** — for arbitrary in-container paths, the provider stages the bytes in the
+  shared folder and runs an in-container `cp` (as root) to move them to/from the target.
+
+### Running as a specific user
+
+The neutral `user` argument to `exec` maps onto Apptainer like this:
+
+| `user` | Behavior |
+|---|---|
+| `None` | Run as the default (launching) user. |
+| `"root"` or `0` | Add `--fakeroot` (gated by `exec.fakeroot_for_root`). |
+| other name / uid | Add `--fakeroot` and wrap the command in `su -s /bin/sh -c '<cmd>' <user>`. |
+
+### Resource limits
+
+`SandboxResources` is translated to cgroup flags on `instance start`:
+
+| Resource | Flag |
+|---|---|
+| `cpu` | `--cpus <n>` |
+| `memory_mib` | `--memory <n>m` |
+| `gpu` (truthy) | `--nv` (NVIDIA passthrough) |
+| `disk_gib`, `gpu_type` | No direct Apptainer flag — ignored. |
+
+### Status mapping
+
+`apptainer instance list --json` only lists *live* instances, so:
+
+- name present → `RUNNING` (unless a `state` field says otherwise),
+- name absent → `STOPPED`,
+- timeout / non-zero / unparseable output → `UNKNOWN`.
+
+### Error reporting
+
+`exec` never raises for command failure; it returns a `SandboxExecResult`:
+
+- **Normal** — the command's real `return_code`, `error_type=None`.
+- **Timeout** — `return_code=125`, `error_type="timeout"`.
+- **Apptainer runtime failure** (instance gone, etc., detected via stderr markers like
+  `FATAL:`) — `return_code=125`, `error_type="sandbox"`.
+
+`125` is the sentinel `SANDBOX_RUNTIME_RETURN_CODE`, signaling "the sandbox runtime
+failed" rather than "the command exited 125".
+
+## Limitations
+
+- **No `ttl_s`.** Apptainer has no native auto-expiry; the field is ignored (warned once
+  per relevant create). Manage lifetime with `stop()` / `close()`.
+- **Numeric uids.** The `su`-based user switch expects a *username*; a bare numeric uid
+  may not resolve. Prefer named users.
+- **`--fakeroot` on exec.** Whether `--fakeroot` works on `exec` into an instance that
+  was started *without* fakeroot varies by Apptainer version and host configuration.
+- **Resource enforcement.** cgroup limits may require cgroups v2 delegation and/or
+  `--fakeroot` on the host; limits are best-effort and silently ignored where the host
+  can't enforce them.
+- **Runtime-failure detection is heuristic.** It keys off stderr markers, so a user
+  command whose own output contains `FATAL:` could be misclassified as a sandbox error.
+
+## Development
+
+Source: [`provider.py`](./provider.py). The provider implements the
+`SandboxProvider` protocol from [`../base.py`](../base.py) structurally (no subclassing)
+and is registered under the name `apptainer` in [`../registry.py`](../registry.py).
+
+### Running the tests
+
+The unit tests live in
+[`tests/unit_tests/test_apptainer_provider.py`](../../../../tests/unit_tests/test_apptainer_provider.py)
+and run as part of the core library test suite — no `apptainer` binary required:
+
+```bash
+uv venv && uv sync --extra dev      # one-time environment setup
+pytest tests/unit_tests/test_apptainer_provider.py -q
+```
+
+Async tests need no decorator because the repo sets `asyncio_mode = "auto"` in
+`pyproject.toml`.
+
+### How the tests avoid needing Apptainer
+
+The suite mocks at the **subprocess boundary** so it is fully hermetic:
+
+- `_require_apptainer` is monkeypatched to return a fake path, so constructing the
+  provider never checks for a real binary.
+- `ApptainerProvider._run` (the single chokepoint every CLI call goes through) is
+  replaced with a small recorder that captures the `argv` / `timeout_s` and returns
+  canned `(return_code, stdout, stderr)`. Tests then assert the exact command line built
+  for `create` / `exec` / `upload` / `download` / `status` / `close`.
+
+This mirrors how [`test_opensandbox_provider.py`](../../../../tests/unit_tests/test_opensandbox_provider.py)
+works: that suite mocks at the **SDK boundary** instead — it calls
+`pytest.importorskip("tenacity")` to skip when the optional dependency is absent, then
+monkeypatches `_require_opensandbox_sdk` to hand back fake `Sandbox` classes, so no real
+SDK or network is ever used. Same idea, different seam: OpenSandbox talks to a remote SDK
+(mock the SDK), Apptainer shells out to a local CLI (mock the subprocess). The Apptainer
+provider has no optional Python dependency, so it needs no `importorskip`.
+
+A few tests do exercise the real subprocess plumbing in `_run` using harmless system
+binaries (`echo`, `cat`, `sleep`), each guarded so they skip where the binary is missing:
+
+```python
+import shutil, pytest
+
+@pytest.mark.skipif(shutil.which("apptainer") is None, reason="apptainer not installed")
+def test_real_apptainer(): ...
+```
diff --git a/nemo_gym/sandbox/providers/apptainer/__init__.py b/nemo_gym/sandbox/providers/apptainer/__init__.py
new file mode 100644
index 0000000000..196d4a71ae
--- /dev/null
+++ b/nemo_gym/sandbox/providers/apptainer/__init__.py
@@ -0,0 +1,34 @@
+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Apptainer provider package."""
+
+from nemo_gym.sandbox.providers.apptainer.provider import (
+    ApptainerCreateConfig,
+    ApptainerCreateError,
+    ApptainerCreateVerificationError,
+    ApptainerExecConfig,
+    ApptainerProbeConfig,
+    ApptainerProvider,
+)
+
+
+__all__ = [
+    "ApptainerCreateConfig",
+    "ApptainerCreateError",
+    "ApptainerCreateVerificationError",
+    "ApptainerExecConfig",
+    "ApptainerProbeConfig",
+    "ApptainerProvider",
+]
diff --git a/nemo_gym/sandbox/providers/apptainer/provider.py b/nemo_gym/sandbox/providers/apptainer/provider.py
new file mode 100644
index 0000000000..ff7d280464
--- /dev/null
+++ b/nemo_gym/sandbox/providers/apptainer/provider.py
@@ -0,0 +1,559 @@
+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Apptainer provider implementation."""
+
+import asyncio
+import contextlib
+import json
+import logging
+import os
+import posixpath
+import shlex
+import shutil
+import signal
+import tempfile
+import uuid
+from collections.abc import Mapping
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any
+
+from nemo_gym.sandbox.providers.base import (
+    SandboxCreateError,
+    SandboxCreateVerificationError,
+    SandboxExecResult,
+    SandboxHandle,
+    SandboxResources,
+    SandboxSpec,
+    SandboxStatus,
+)
+
+
+LOGGER = logging.getLogger(__name__)
+
+DEFAULT_MOUNT_POINT = "/sandbox"
+INSTANCE_NAME_PREFIX = "nemo-gym-"
+READY_PROBE_COMMAND = "printf apptainer-sandbox-ready"
+READY_PROBE_EXPECTED = "apptainer-sandbox-ready"
+SANDBOX_RUNTIME_RETURN_CODE = 125
+# Best-effort stderr markers indicating apptainer itself (not the user's command)
+# failed to run the command. Apptainer prefixes its own fatal errors with "FATAL:".
+APPTAINER_RUNTIME_ERROR_MARKERS = ("fatal:", "no instance found", "instance not found", "does not exist")
+
+
+class ApptainerCreateError(SandboxCreateError):
+    """Raised when Apptainer cannot create a sandbox."""
+
+
+class ApptainerCreateVerificationError(SandboxCreateVerificationError):
+    """Raised when a newly-created sandbox cannot execute a probe command."""
+
+
+def _require_apptainer() -> str:
+    """Return the apptainer binary path or hard-error if it is not installed."""
+    path = shutil.which("apptainer")
+    if path is None:
+        raise RuntimeError(
+            "The 'apptainer' binary is required for the apptainer sandbox provider. "
+            "Install Apptainer before using env.sandbox.provider.name=apptainer."
+        )
+    return path
+
+
+def _coerce_config(value: Any, config_cls: type[Any]) -> Any:
+    """Accept either a config dataclass instance or a plain mapping (Hydra YAML)."""
+    if value is None:
+        return config_cls()
+    if isinstance(value, config_cls):
+        return value
+    if isinstance(value, Mapping):
+        return config_cls(**value)
+    raise TypeError(f"{config_cls.__name__} must be a mapping or {config_cls.__name__} instance")
+
+
+@dataclass(frozen=True)
+class ApptainerCreateConfig:
+    """Settings for creating an Apptainer sandbox instance."""
+
+    mount_point: str = DEFAULT_MOUNT_POINT
+    start_timeout_s: float | None = 600
+    extra_start_args: list[str] = field(default_factory=list)
+
+    def __post_init__(self) -> None:
+        if self.start_timeout_s is not None and self.start_timeout_s <= 0:
+            raise ValueError("create.start_timeout_s must be > 0")
+        if not self.mount_point.startswith("/"):
+            raise ValueError("create.mount_point must be an absolute path")
+
+
+@dataclass(frozen=True)
+class ApptainerExecConfig:
+    """Settings for running commands inside an Apptainer sandbox."""
+
+    default_timeout_s: float | None = 180
+    fakeroot_for_root: bool = True
+    default_binds: list[str] = field(default_factory=list)
+    extra_exec_args: list[str] = field(default_factory=list)
+    concurrency: int = 32
+
+    def __post_init__(self) -> None:
+        if self.default_timeout_s is not None and self.default_timeout_s <= 0:
+            raise ValueError("exec.default_timeout_s must be > 0")
+        if self.concurrency < 1:
+            raise ValueError("exec.concurrency must be >= 1")
+
+
+@dataclass(frozen=True)
+class ApptainerProbeConfig:
+    """Post-create probe settings: a test command confirming the sandbox is usable."""
+
+    command: str | None = READY_PROBE_COMMAND
+    expected_stdout: str | None = READY_PROBE_EXPECTED
+    timeout_s: int = 30
+    deadline_s: float | None = None
+    stable_count: int = 1
+    stable_delay_s: float = 0.0
+
+    def __post_init__(self) -> None:
+        if self.command is not None and self.timeout_s <= 0:
+            raise ValueError("probe.timeout_s must be > 0")
+        if self.deadline_s is not None and self.deadline_s <= 0:
+            raise ValueError("probe.deadline_s must be > 0")
+        if self.stable_count < 1:
+            raise ValueError("probe.stable_count must be >= 1")
+        if self.stable_delay_s < 0:
+            raise ValueError("probe.stable_delay_s must be >= 0")
+
+
+@dataclass
+class _ApptainerInstance:
+    """Provider-private state stashed on SandboxHandle.raw."""
+
+    name: str  # what the instance is called
+    staging_dir: Path  # the shared folder on the host
+    mount_point: str  # where the folder shows up inside
+    image: str  # what it was built from
+
+
+def _resource_flags(resources: SandboxResources) -> list[str]:
+    """Translate neutral resources into apptainer CLI flags."""
+    flags: list[str] = []
+    if resources.cpu is not None:
+        flags += ["--cpus", str(resources.cpu)]
+    if resources.memory_mib is not None:
+        flags += ["--memory", f"{resources.memory_mib}m"]
+    if resources.gpu:
+        flags.append("--nv")
+    # disk_gib / gpu_type have no direct apptainer flag; intentionally ignored.
+    return flags
+
+
+def _to_sandbox_status(state: str | None) -> SandboxStatus:
+    """Map an apptainer-reported state string to the neutral status enum."""
+    normalized = str(state or "").lower()
+    if normalized in {"running", "active", "ready"}:
+        return SandboxStatus.RUNNING
+    if normalized in {"starting", "creating", "pending"}:
+        return SandboxStatus.STARTING
+    if normalized in {"stopped", "exited", "terminated"}:
+        return SandboxStatus.STOPPED
+    if normalized in {"error", "failed", "unhealthy"}:
+        return SandboxStatus.ERROR
+    return SandboxStatus.UNKNOWN
+
+
+def _path_under_mount(mount_point: str, path: str) -> str | None:
+    """If `path` is inside the mount, return its path relative to the mount; else None."""
+    mp = mount_point.rstrip("/")
+    if path == mp:
+        return ""
+    prefix = mp + "/"
+    if path.startswith(prefix):
+        return path[len(prefix) :]
+    return None
+
+
+def _is_runtime_failure(stderr: str) -> bool:
+    """Best-effort: did apptainer itself fail to run the command (vs the command failing)?"""
+    low = stderr.lower()
+    return any(marker in low for marker in APPTAINER_RUNTIME_ERROR_MARKERS)
+
+
+class ApptainerProvider:
+    """Sandbox provider backed by the local Apptainer CLI."""
+
+    name = "apptainer"
+
+    def __init__(
+        self,
+        *,
+        exec: ApptainerExecConfig | Mapping[str, Any] | None = None,
+        create: ApptainerCreateConfig | Mapping[str, Any] | None = None,
+        probe: ApptainerProbeConfig | Mapping[str, Any] | None = None,
+    ) -> None:
+        self._exec_config = _coerce_config(exec, ApptainerExecConfig)
+        self._create_config = _coerce_config(create, ApptainerCreateConfig)
+        self._probe = _coerce_config(probe, ApptainerProbeConfig)
+        self._binary = _require_apptainer()
+        self._semaphore = asyncio.Semaphore(self._exec_config.concurrency)
+
+    async def _run(
+        self,
+        argv: list[str],
+        *,
+        timeout_s: float | None,
+        stdin: bytes | None = None,
+    ) -> tuple[int, str, str]:
+        """Run an apptainer CLI command. Returns (return_code, stdout, stderr).
+
+        Enforces timeout via asyncio.wait_for and kills the whole process group
+        on timeout so child processes do not linger. Bounds concurrency with a
+        shared semaphore. Decodes output with errors="replace".
+        """
+        async with self._semaphore:
+            proc = await asyncio.create_subprocess_exec(
+                *argv,
+                stdin=asyncio.subprocess.PIPE if stdin is not None else None,
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE,
+                start_new_session=True,
+            )
+            try:
+                stdout_b, stderr_b = await asyncio.wait_for(
+                    proc.communicate(input=stdin),
+                    timeout=timeout_s,
+                )
+            except asyncio.TimeoutError as e:
+                with contextlib.suppress(ProcessLookupError):
+                    os.killpg(os.getpgid(proc.pid), signal.SIGKILL)
+                with contextlib.suppress(Exception):
+                    await proc.wait()
+                raise TimeoutError(f"apptainer command timed out after {timeout_s:g}s: {argv}") from e
+
+            return_code = proc.returncode if proc.returncode is not None else SANDBOX_RUNTIME_RETURN_CODE
+            return return_code, stdout_b.decode(errors="replace"), stderr_b.decode(errors="replace")
+
+    async def create(self, spec: SandboxSpec) -> SandboxHandle:
+        """Start an apptainer instance and return a ready handle.
+
+        Steps:
+        1. Warn once if spec.ttl_s is set (unsupported by apptainer).
+        2. Resolve the image source (local .sif path or remote docker://, oras://,
+           library:// URI) from spec.image. -- just use as is
+        3. Make a host staging dir (tempfile.mkdtemp), pick
+           mount_point = self._create_config.mount_point, generate a unique
+           name = INSTANCE_NAME_PREFIX + uuid4().hex.
+        4. Build argv: [binary, "instance", "start", <--bind staging:mount_point>,
+           <config default_binds>, <--env ...>, _resource_flags(spec.resources),
+           <extra_start_args>, image, name].
+        5. await self._run(argv, timeout_s=self._create_config.start_timeout_s);
+           on non-zero return, clean up the staging dir and raise
+           ApptainerCreateError(stderr).
+        6. Build the handle:
+           SandboxHandle(sandbox_id=name, provider_name=self.name,
+               raw=_ApptainerInstance(name, staging_dir, mount_point, image)).
+        7. Verify readiness via self._verify_created_handle(handle); on failure
+           clean up and raise ApptainerCreateVerificationError.
+        8. Return the handle.
+        """
+        # ttl_s has no apptainer equivalent; warn once, then ignore it.
+        if spec.ttl_s is not None:
+            LOGGER.warning("ttl_s is not supported by the apptainer provider; it will be ignored.")
+
+        image = spec.image
+        if image is None:
+            raise ApptainerCreateError("spec.image is required for the apptainer provider")
+
+        # host staging dir (bind-mounted in), mount point, unique name.
+        mount_point = self._create_config.mount_point
+        staging_dir = Path(
+            tempfile.mkdtemp(prefix="nemo-gym-apptainer-")
+        )  # create a new empty temp directory on the host and returns that path
+        name = INSTANCE_NAME_PREFIX + uuid.uuid4().hex
+
+        # build the `apptainer instance start` command line.
+        argv: list[str] = [self._binary, "instance", "start"]
+        argv += ["--bind", f"{staging_dir}:{mount_point}"]
+        for bind in self._exec_config.default_binds:
+            argv += ["--bind", bind]
+        for key, value in spec.env.items():
+            argv += ["--env", f"{key}={value}"]
+        argv += _resource_flags(spec.resources)
+        argv += list(self._create_config.extra_start_args)
+        argv += [image, name]
+
+        # start the instance; clean up the staging dir on any failure.
+        try:
+            code, _out, err = await self._run(argv, timeout_s=self._create_config.start_timeout_s)
+        except TimeoutError as e:
+            shutil.rmtree(staging_dir, ignore_errors=True)
+            raise ApptainerCreateError(f"apptainer instance start timed out for image={image!r}: {e}") from e
+        if code != 0:
+            shutil.rmtree(staging_dir, ignore_errors=True)
+            raise ApptainerCreateError(
+                f"apptainer instance start failed (code={code}) for image={image!r}: {err.strip()}"
+            )
+
+        # wrap provider-private state on the handle.
+        handle = SandboxHandle(
+            sandbox_id=name,
+            provider_name=self.name,
+            raw=_ApptainerInstance(name=name, staging_dir=staging_dir, mount_point=mount_point, image=image),
+        )
+
+        # Verify the sandbox can actually run a command before handing it back.
+        # On any failure, tear down the half-created sandbox so we don't leak a
+        # running instance / staging dir.
+        try:
+            await self._verify_created_handle(handle)
+        except Exception:
+            await self._cleanup_failed_create_handle(handle)
+            raise
+
+        return handle
+
+    async def _verify_created_handle(self, handle: SandboxHandle) -> None:
+        """Run the readiness probe until the sandbox responds, or raise.
+
+        - probe.command is None      -> skip (no verification).
+        - probe.deadline_s is None   -> single attempt; a failure raises immediately.
+        - probe.deadline_s is set    -> poll until the sandbox passes the probe
+          `stable_count` consecutive times, or the deadline elapses.
+        """
+        probe = self._probe
+        if probe.command is None:
+            return
+
+        loop = asyncio.get_running_loop()
+        deadline = loop.time() + probe.deadline_s if probe.deadline_s is not None else None
+        consecutive = 0
+        last_detail = "no probe attempt completed"
+
+        while True:
+            result = await self.exec(handle, probe.command, timeout_s=probe.timeout_s)
+            passed = result.return_code == 0 and (
+                probe.expected_stdout is None or probe.expected_stdout in (result.stdout or "")
+            )
+            if passed:
+                consecutive += 1
+                if consecutive >= probe.stable_count:
+                    return
+            else:
+                consecutive = 0
+                last_detail = f"return_code={result.return_code}, stderr={(result.stderr or '').strip()!r}"
+                if deadline is None:
+                    raise ApptainerCreateVerificationError(
+                        f"sandbox {handle.sandbox_id!r} failed readiness probe: {last_detail}"
+                    )
+
+            if deadline is not None and loop.time() >= deadline:
+                raise ApptainerCreateVerificationError(
+                    f"sandbox {handle.sandbox_id!r} did not pass readiness probe within "
+                    f"{probe.deadline_s:g}s: {last_detail}"
+                )
+            if probe.stable_delay_s > 0:
+                await asyncio.sleep(probe.stable_delay_s)
+
+    async def _cleanup_failed_create_handle(self, handle: SandboxHandle) -> None:
+        """Best-effort teardown of a sandbox that failed verification."""
+        inst = handle.raw
+        with contextlib.suppress(Exception):
+            await self._run(
+                [self._binary, "instance", "stop", inst.name],
+                timeout_s=self._exec_config.default_timeout_s,
+            )
+        shutil.rmtree(inst.staging_dir, ignore_errors=True)
+
+    async def exec(
+        self,
+        handle: SandboxHandle,
+        command: str,
+        *,
+        cwd: str | None = None,
+        env: dict[str, str] | None = None,
+        timeout_s: int | float | None = None,
+        user: str | int | None = None,
+    ) -> SandboxExecResult:
+        """Run a command inside the instance.
+
+        Maps the neutral ``user`` parameter onto apptainer:
+        - None            -> run as the default (launching) user.
+        - "root" / 0      -> add --fakeroot (root inside the container).
+        - other user/uid  -> --fakeroot + wrap in ``su`` to switch to that user.
+        """
+        inst = handle.raw
+
+        flags: list[str] = []
+        if cwd is not None:
+            flags += ["--pwd", cwd]
+        if env:
+            for key, value in env.items():
+                flags += ["--env", f"{key}={value}"]
+
+        effective_command = command
+        is_root = user == "root" or user == 0
+        if is_root:
+            if self._exec_config.fakeroot_for_root:
+                flags.append("--fakeroot")
+        elif user is not None:
+            # Need root inside the container to switch users, then su to the target.
+            flags.append("--fakeroot")
+            effective_command = f"su -s /bin/sh -c {shlex.quote(command)} {shlex.quote(str(user))}"
+
+        flags += list(self._exec_config.extra_exec_args)
+
+        argv = [self._binary, "exec", *flags, f"instance://{inst.name}", "sh", "-c", effective_command]
+        effective_timeout = timeout_s if timeout_s is not None else self._exec_config.default_timeout_s
+
+        try:
+            code, out, err = await self._run(argv, timeout_s=effective_timeout)
+        except TimeoutError as e:
+            return SandboxExecResult(
+                stdout=None,
+                stderr=str(e),
+                return_code=SANDBOX_RUNTIME_RETURN_CODE,
+                error_type="timeout",
+            )
+
+        if code != 0 and _is_runtime_failure(err):
+            return SandboxExecResult(
+                stdout=out or None,
+                stderr=err or None,
+                return_code=SANDBOX_RUNTIME_RETURN_CODE,
+                error_type="sandbox",
+            )
+        return SandboxExecResult(stdout=out or None, stderr=err or None, return_code=code, error_type=None)
+
+    async def upload_file(self, handle: SandboxHandle, source_path: Path, target_path: str) -> None:
+        """Upload one host file into the sandbox.
+
+        Fast path (target under the bind mount): write directly to the host side
+        of the shared folder. Fallback (arbitrary path): stage into the shared
+        folder, then cp inside the container.
+        """
+        inst = handle.raw
+
+        rel = _path_under_mount(inst.mount_point, target_path)
+        if rel is not None:
+            dest = inst.staging_dir / rel
+            dest.parent.mkdir(parents=True, exist_ok=True)
+            dest.write_bytes(source_path.read_bytes())
+            return
+
+        tmp_name = uuid.uuid4().hex
+        host_tmp = inst.staging_dir / tmp_name
+        host_tmp.write_bytes(source_path.read_bytes())
+        try:
+            container_tmp = f"{inst.mount_point.rstrip('/')}/{tmp_name}"
+            parent = posixpath.dirname(target_path)
+            script = f"mkdir -p {shlex.quote(parent)} && cp {shlex.quote(container_tmp)} {shlex.quote(target_path)}"
+            result = await self.exec(handle, script, user="root")
+            if result.return_code != 0:
+                raise RuntimeError(f"apptainer upload to {target_path!r} failed: {result.stderr}")
+        finally:
+            host_tmp.unlink(missing_ok=True)
+
+    async def download_file(self, handle: SandboxHandle, source_path: str, target_path: Path) -> None:
+        """Download one sandbox file to the host.
+
+        Fast path (source under the bind mount): read directly from the host side
+        of the shared folder. Fallback (arbitrary path): cp inside the container
+        into the shared folder, then read the host side.
+        """
+        inst = handle.raw
+        target_path.parent.mkdir(parents=True, exist_ok=True)
+
+        rel = _path_under_mount(inst.mount_point, source_path)
+        if rel is not None:
+            target_path.write_bytes((inst.staging_dir / rel).read_bytes())
+            return
+
+        tmp_name = uuid.uuid4().hex
+        host_tmp = inst.staging_dir / tmp_name
+        try:
+            container_tmp = f"{inst.mount_point.rstrip('/')}/{tmp_name}"
+            script = f"cp {shlex.quote(source_path)} {shlex.quote(container_tmp)}"
+            result = await self.exec(handle, script, user="root")
+            if result.return_code != 0:
+                raise RuntimeError(f"apptainer download from {source_path!r} failed: {result.stderr}")
+            target_path.write_bytes(host_tmp.read_bytes())
+        finally:
+            host_tmp.unlink(missing_ok=True)
+
+    async def status(self, handle: SandboxHandle) -> SandboxStatus:
+        """Return the instance's lifecycle status by querying ``apptainer instance list``.
+        Runs apptainer instance list --json
+        On timeout, non-zero, unparseable JSON ---> UNKNOWN
+        Look for the instance name of this sandbox. If it is found --> RUNNING. If it's gone --> STOPPED
+        """
+        inst = handle.raw
+
+        try:
+            code, out, _err = await self._run(
+                [self._binary, "instance", "list", "--json"],
+                timeout_s=self._exec_config.default_timeout_s,
+            )
+        except TimeoutError:
+            return SandboxStatus.UNKNOWN
+
+        if code != 0:
+            return SandboxStatus.UNKNOWN
+
+        try:
+            instances = json.loads(out).get("instances", [])
+        except (json.JSONDecodeError, AttributeError):
+            return SandboxStatus.UNKNOWN
+
+        for entry in instances:
+            if entry.get("instance") == inst.name:
+                # apptainer's list output has no explicit state field for a listed (i.e. live) instance, so being present means it is running.
+                return _to_sandbox_status(entry.get("state") or "running")
+
+        # Not listed -> it has been stopped (or never existed anymore).
+        return SandboxStatus.STOPPED
+
+    async def close(self, handle: SandboxHandle) -> None:
+        """Stop the instance and clean up the host staging dir.
+        Runs apptainer instance stop <name>
+        If there is no instance --> SUCCESS
+        Removes the host staging dir afterward
+        """
+        inst = handle.raw
+
+        stop_error: Exception | None = None
+        try:
+            code, _out, err = await self._run(
+                [self._binary, "instance", "stop", inst.name],
+                timeout_s=self._exec_config.default_timeout_s,
+            )
+            if code != 0 and not _is_runtime_failure(err):
+                stop_error = RuntimeError(
+                    f"apptainer instance stop failed (code={code}) for {inst.name!r}: {err.strip()}"
+                )
+        except TimeoutError as e:
+            stop_error = e
+
+        # Always best-effort remove the host staging dir, even if stop failed.
+        try:
+            shutil.rmtree(inst.staging_dir, ignore_errors=False)
+        except OSError as e:
+            LOGGER.warning("failed to remove staging dir %s: %s", inst.staging_dir, e)
+
+        if stop_error is not None:
+            raise stop_error
+
+    async def aclose(self) -> None:
+        """No provider-wide resources to close."""
+        return None
diff --git a/nemo_gym/sandbox/providers/registry.py b/nemo_gym/sandbox/providers/registry.py
index 8ec7ea81a8..8c4e39e577 100644
--- a/nemo_gym/sandbox/providers/registry.py
+++ b/nemo_gym/sandbox/providers/registry.py
@@ -75,4 +75,11 @@ def _load_opensandbox_provider() -> ProviderClass:
     return OpenSandboxProvider
 
 
+def _load_apptainer_provider() -> ProviderClass:
+    from nemo_gym.sandbox.providers.apptainer import ApptainerProvider
+
+    return ApptainerProvider
+
+
+_BUILTIN_PROVIDER_LOADERS["apptainer"] = _load_apptainer_provider
 _BUILTIN_PROVIDER_LOADERS["opensandbox"] = _load_opensandbox_provider
diff --git a/tests/unit_tests/test_apptainer_provider.py b/tests/unit_tests/test_apptainer_provider.py
new file mode 100644
index 0000000000..c595738200
--- /dev/null
+++ b/tests/unit_tests/test_apptainer_provider.py
@@ -0,0 +1,662 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import shlex
+import shutil
+from pathlib import Path
+from typing import Any, Callable
+
+import pytest
+
+from nemo_gym.sandbox.providers.apptainer import provider as apptainer_provider
+from nemo_gym.sandbox.providers.base import (
+    SandboxExecResult,
+    SandboxHandle,
+    SandboxResources,
+    SandboxSpec,
+    SandboxStatus,
+)
+
+
+FAKE_BINARY = "/usr/bin/apptainer"
+
+
+# --------------------------------------------------------------------------- #
+# Test helpers
+# --------------------------------------------------------------------------- #
+class RunRecorder:
+    """Stand-in for ApptainerProvider._run that records argv and returns canned output."""
+
+    def __init__(self, responder: Callable[[list[str]], tuple[int, str, str]]) -> None:
+        self.calls: list[dict[str, Any]] = []
+        self._responder = responder
+
+    async def __call__(
+        self, argv: list[str], *, timeout_s: float | None, stdin: bytes | None = None
+    ) -> tuple[int, str, str]:
+        self.calls.append({"argv": list(argv), "timeout_s": timeout_s, "stdin": stdin})
+        return self._responder(list(argv))
+
+
+def _contains_seq(haystack: list[str], needle: list[str]) -> bool:
+    return any(haystack[i : i + len(needle)] == needle for i in range(len(haystack) - len(needle) + 1))
+
+
+def _make_handle(staging: Path, *, name: str = "nemo-gym-x", mount: str = "/sandbox") -> SandboxHandle:
+    inst = apptainer_provider._ApptainerInstance(
+        name=name,
+        staging_dir=staging,
+        mount_point=mount,
+        image="docker://img",
+    )
+    return SandboxHandle(sandbox_id=name, provider_name="apptainer", raw=inst)
+
+
+@pytest.fixture
+def fake_binary(monkeypatch: pytest.MonkeyPatch) -> str:
+    monkeypatch.setattr(apptainer_provider, "_require_apptainer", lambda: FAKE_BINARY)
+    return FAKE_BINARY
+
+
+def _make_provider(
+    monkeypatch: pytest.MonkeyPatch, responder: Callable[[list[str]], tuple[int, str, str]], **kwargs: Any
+) -> tuple[Any, RunRecorder]:
+    provider = apptainer_provider.ApptainerProvider(**kwargs)
+    rec = RunRecorder(responder)
+    monkeypatch.setattr(provider, "_run", rec)
+    return provider, rec
+
+
+# --------------------------------------------------------------------------- #
+# Pure helpers
+# --------------------------------------------------------------------------- #
+def test_require_apptainer(monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.setattr(apptainer_provider.shutil, "which", lambda _name: "/opt/apptainer")
+    assert apptainer_provider._require_apptainer() == "/opt/apptainer"
+
+    monkeypatch.setattr(apptainer_provider.shutil, "which", lambda _name: None)
+    with pytest.raises(RuntimeError, match="apptainer"):
+        apptainer_provider._require_apptainer()
+
+
+def test_coerce_config() -> None:
+    coerce = apptainer_provider._coerce_config
+    cls = apptainer_provider.ApptainerExecConfig
+
+    assert coerce(None, cls) == cls()
+    existing = cls(concurrency=4)
+    assert coerce(existing, cls) is existing
+    assert coerce({"concurrency": 7}, cls).concurrency == 7
+    with pytest.raises(TypeError):
+        coerce(123, cls)
+
+
+def test_config_validation() -> None:
+    with pytest.raises(ValueError, match="start_timeout_s"):
+        apptainer_provider.ApptainerCreateConfig(start_timeout_s=0)
+    with pytest.raises(ValueError, match="absolute"):
+        apptainer_provider.ApptainerCreateConfig(mount_point="relative")
+    with pytest.raises(ValueError, match="default_timeout_s"):
+        apptainer_provider.ApptainerExecConfig(default_timeout_s=-1)
+    with pytest.raises(ValueError, match="concurrency"):
+        apptainer_provider.ApptainerExecConfig(concurrency=0)
+    with pytest.raises(ValueError, match="timeout_s"):
+        apptainer_provider.ApptainerProbeConfig(timeout_s=0)
+    with pytest.raises(ValueError, match="deadline_s"):
+        apptainer_provider.ApptainerProbeConfig(deadline_s=0)
+    with pytest.raises(ValueError, match="stable_count"):
+        apptainer_provider.ApptainerProbeConfig(stable_count=0)
+    with pytest.raises(ValueError, match="stable_delay_s"):
+        apptainer_provider.ApptainerProbeConfig(stable_delay_s=-1)
+    # command=None disables the timeout_s validation gate.
+    assert apptainer_provider.ApptainerProbeConfig(command=None, timeout_s=0).command is None
+
+
+def test_resource_flags() -> None:
+    flags = apptainer_provider._resource_flags(
+        SandboxResources(cpu=2, memory_mib=1024, gpu=1, disk_gib=50, gpu_type="h100")
+    )
+    assert _contains_seq(flags, ["--cpus", "2"])
+    assert _contains_seq(flags, ["--memory", "1024m"])
+    assert "--nv" in flags
+    # disk_gib and gpu_type have no flag.
+    assert "50" not in flags and "h100" not in flags
+
+    assert apptainer_provider._resource_flags(SandboxResources()) == []
+
+
+def test_to_sandbox_status() -> None:
+    to_status = apptainer_provider._to_sandbox_status
+    assert to_status("running") is SandboxStatus.RUNNING
+    assert to_status("active") is SandboxStatus.RUNNING
+    assert to_status("starting") is SandboxStatus.STARTING
+    assert to_status("stopped") is SandboxStatus.STOPPED
+    assert to_status("failed") is SandboxStatus.ERROR
+    assert to_status("nonsense") is SandboxStatus.UNKNOWN
+    assert to_status(None) is SandboxStatus.UNKNOWN
+
+
+def test_path_under_mount() -> None:
+    under = apptainer_provider._path_under_mount
+    assert under("/sandbox", "/sandbox/a/b.txt") == "a/b.txt"
+    assert under("/sandbox", "/sandbox") == ""
+    assert under("/sandbox/", "/sandbox/x") == "x"
+    assert under("/sandbox", "/etc/passwd") is None
+
+
+def test_is_runtime_failure() -> None:
+    assert apptainer_provider._is_runtime_failure("FATAL: no instance found") is True
+    assert apptainer_provider._is_runtime_failure("instance not found") is True
+    assert apptainer_provider._is_runtime_failure("ls: cannot access") is False
+
+
+def test_constructor_requires_binary(monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.setattr(apptainer_provider.shutil, "which", lambda _name: None)
+    with pytest.raises(RuntimeError):
+        apptainer_provider.ApptainerProvider()
+
+
+# --------------------------------------------------------------------------- #
+# create
+# --------------------------------------------------------------------------- #
+async def test_create_builds_argv_and_runs_probe(
+    fake_binary: str, monkeypatch: pytest.MonkeyPatch, tmp_path: Path, caplog: pytest.LogCaptureFixture
+) -> None:
+    staging = tmp_path / "staging"
+    monkeypatch.setattr(apptainer_provider.tempfile, "mkdtemp", lambda prefix: str(staging.mkdir() or staging))
+
+    def responder(argv: list[str]) -> tuple[int, str, str]:
+        if "start" in argv:
+            return (0, "", "")
+        if "exec" in argv:
+            return (0, apptainer_provider.READY_PROBE_EXPECTED, "")
+        return (0, "", "")
+
+    provider, rec = _make_provider(
+        monkeypatch,
+        responder,
+        exec={"default_binds": ["/data:/data"], "extra_exec_args": ["--contain"]},
+        create={"extra_start_args": ["--cleanenv"]},
+    )
+
+    spec = SandboxSpec(
+        image="docker://ubuntu:22.04",
+        env={"FOO": "bar"},
+        resources={"cpu": 2, "memory_mib": 1024, "gpu": 1},
+        ttl_s=60,
+    )
+
+    with caplog.at_level("WARNING"):
+        handle = await provider.create(spec)
+
+    assert "ttl_s is not supported" in caplog.text
+    assert handle.provider_name == "apptainer"
+    assert handle.sandbox_id.startswith(apptainer_provider.INSTANCE_NAME_PREFIX)
+    assert handle.raw.staging_dir == staging
+    assert handle.raw.mount_point == "/sandbox"
+
+    start_argv = rec.calls[0]["argv"]
+    assert start_argv[:3] == [FAKE_BINARY, "instance", "start"]
+    assert _contains_seq(start_argv, ["--bind", f"{staging}:/sandbox"])
+    assert _contains_seq(start_argv, ["--bind", "/data:/data"])
+    assert _contains_seq(start_argv, ["--env", "FOO=bar"])
+    assert _contains_seq(start_argv, ["--cpus", "2.0"])
+    assert _contains_seq(start_argv, ["--memory", "1024m"])
+    assert "--nv" in start_argv
+    assert "--cleanenv" in start_argv
+    assert start_argv[-2:] == ["docker://ubuntu:22.04", handle.sandbox_id]
+
+    probe_argv = rec.calls[1]["argv"]
+    assert "exec" in probe_argv
+    assert f"instance://{handle.sandbox_id}" in probe_argv
+    assert probe_argv[-1] == apptainer_provider.READY_PROBE_COMMAND
+    assert rec.calls[1]["timeout_s"] == 30
+
+
+async def test_create_requires_image(fake_binary: str, monkeypatch: pytest.MonkeyPatch) -> None:
+    provider, _rec = _make_provider(monkeypatch, lambda argv: (0, "", ""))
+    with pytest.raises(apptainer_provider.ApptainerCreateError, match="image is required"):
+        await provider.create(SandboxSpec(image=None))
+
+
+async def test_create_start_failure_cleans_up(
+    fake_binary: str, monkeypatch: pytest.MonkeyPatch, tmp_path: Path
+) -> None:
+    staging = tmp_path / "staging"
+    monkeypatch.setattr(apptainer_provider.tempfile, "mkdtemp", lambda prefix: str(staging.mkdir() or staging))
+    provider, _rec = _make_provider(monkeypatch, lambda argv: (1, "", "boom"))
+
+    with pytest.raises(apptainer_provider.ApptainerCreateError, match="failed"):
+        await provider.create(SandboxSpec(image="docker://img"))
+    assert not staging.exists()
+
+
+async def test_create_start_timeout_cleans_up(
+    fake_binary: str, monkeypatch: pytest.MonkeyPatch, tmp_path: Path
+) -> None:
+    staging = tmp_path / "staging"
+    monkeypatch.setattr(apptainer_provider.tempfile, "mkdtemp", lambda prefix: str(staging.mkdir() or staging))
+
+    def responder(argv: list[str]) -> tuple[int, str, str]:
+        raise TimeoutError("slow")
+
+    provider, _rec = _make_provider(monkeypatch, responder)
+    with pytest.raises(apptainer_provider.ApptainerCreateError, match="timed out"):
+        await provider.create(SandboxSpec(image="docker://img"))
+    assert not staging.exists()
+
+
+async def test_create_probe_failure_cleans_up(
+    fake_binary: str, monkeypatch: pytest.MonkeyPatch, tmp_path: Path
+) -> None:
+    staging = tmp_path / "staging"
+    monkeypatch.setattr(apptainer_provider.tempfile, "mkdtemp", lambda prefix: str(staging.mkdir() or staging))
+
+    def responder(argv: list[str]) -> tuple[int, str, str]:
+        if "start" in argv:
+            return (0, "", "")
+        if "exec" in argv:
+            return (1, "", "probe broke")
+        return (0, "", "")  # instance stop during cleanup
+
+    provider, rec = _make_provider(monkeypatch, responder)
+    with pytest.raises(apptainer_provider.ApptainerCreateVerificationError):
+        await provider.create(SandboxSpec(image="docker://img"))
+
+    assert not staging.exists()
+    assert any("stop" in call["argv"] for call in rec.calls)
+
+
+# --------------------------------------------------------------------------- #
+# exec
+# --------------------------------------------------------------------------- #
+async def test_exec_normal_with_cwd_and_env(fake_binary: str, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
+    provider, rec = _make_provider(monkeypatch, lambda argv: (0, "hello", ""))
+    handle = _make_handle(tmp_path)
+
+    result = await provider.exec(handle, "echo hi", cwd="/work", env={"A": "b"})
+
+    assert result.return_code == 0
+    assert result.stdout == "hello"
+    assert result.error_type is None
+
+    argv = rec.calls[0]["argv"]
+    assert argv[:2] == [FAKE_BINARY, "exec"]
+    assert _contains_seq(argv, ["--pwd", "/work"])
+    assert _contains_seq(argv, ["--env", "A=b"])
+    assert argv[-4:] == ["instance://nemo-gym-x", "sh", "-c", "echo hi"]
+    assert rec.calls[0]["timeout_s"] == 180  # default exec timeout
+
+
+@pytest.mark.parametrize(
+    "user,fakeroot_for_root,expect_fakeroot,expect_su",
+    [
+        (None, True, False, False),
+        ("root", True, True, False),
+        (0, True, True, False),
+        ("root", False, False, False),
+        ("alice", True, True, True),
+    ],
+)
+async def test_exec_user_mapping(
+    fake_binary: str,
+    monkeypatch: pytest.MonkeyPatch,
+    tmp_path: Path,
+    user: Any,
+    fakeroot_for_root: bool,
+    expect_fakeroot: bool,
+    expect_su: bool,
+) -> None:
+    provider, rec = _make_provider(
+        monkeypatch, lambda argv: (0, "", ""), exec={"fakeroot_for_root": fakeroot_for_root}
+    )
+    handle = _make_handle(tmp_path)
+
+    await provider.exec(handle, "whoami", user=user)
+    argv = rec.calls[0]["argv"]
+
+    assert ("--fakeroot" in argv) is expect_fakeroot
+    if expect_su:
+        expected = f"su -s /bin/sh -c {shlex.quote('whoami')} {shlex.quote(str(user))}"
+        assert argv[-1] == expected
+    else:
+        assert argv[-1] == "whoami"
+
+
+async def test_exec_timeout(fake_binary: str, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
+    def responder(argv: list[str]) -> tuple[int, str, str]:
+        raise TimeoutError("too slow")
+
+    provider, _rec = _make_provider(monkeypatch, responder)
+    result = await provider.exec(_make_handle(tmp_path), "sleep 99", timeout_s=1)
+
+    assert result.return_code == apptainer_provider.SANDBOX_RUNTIME_RETURN_CODE
+    assert result.error_type == "timeout"
+    assert result.stdout is None
+
+
+async def test_exec_runtime_failure(fake_binary: str, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
+    provider, _rec = _make_provider(monkeypatch, lambda argv: (1, "", "FATAL: no instance found"))
+    result = await provider.exec(_make_handle(tmp_path), "echo hi")
+
+    assert result.return_code == apptainer_provider.SANDBOX_RUNTIME_RETURN_CODE
+    assert result.error_type == "sandbox"
+    assert "FATAL" in result.stderr
+
+
+async def test_exec_command_failure_is_not_runtime_error(
+    fake_binary: str, monkeypatch: pytest.MonkeyPatch, tmp_path: Path
+) -> None:
+    provider, _rec = _make_provider(monkeypatch, lambda argv: (2, "", "ls: cannot access"))
+    result = await provider.exec(_make_handle(tmp_path), "ls /nope")
+
+    assert result.return_code == 2
+    assert result.error_type is None
+
+
+# --------------------------------------------------------------------------- #
+# upload / download
+# --------------------------------------------------------------------------- #
+async def test_upload_fast_path(fake_binary: str, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
+    staging = tmp_path / "staging"
+    staging.mkdir()
+    provider, rec = _make_provider(monkeypatch, lambda argv: (0, "", ""))
+    handle = _make_handle(staging)
+
+    src = tmp_path / "src.txt"
+    src.write_bytes(b"payload")
+    await provider.upload_file(handle, src, "/sandbox/sub/dest.txt")
+
+    assert (staging / "sub" / "dest.txt").read_bytes() == b"payload"
+    assert rec.calls == []  # fast path never shells out
+
+
+async def test_upload_fallback(fake_binary: str, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
+    staging = tmp_path / "staging"
+    staging.mkdir()
+    provider, _rec = _make_provider(monkeypatch, lambda argv: (0, "", ""))
+    handle = _make_handle(staging)
+
+    captured: dict[str, Any] = {}
+
+    async def fake_exec(h: SandboxHandle, command: str, *, user: Any = None, **_: Any) -> SandboxExecResult:
+        captured["command"] = command
+        captured["user"] = user
+        return SandboxExecResult(stdout="", stderr="", return_code=0)
+
+    monkeypatch.setattr(provider, "exec", fake_exec)
+
+    src = tmp_path / "src.txt"
+    src.write_bytes(b"payload")
+    await provider.upload_file(handle, src, "/etc/app.conf")
+
+    assert "cp" in captured["command"]
+    assert "/etc/app.conf" in captured["command"]
+    assert captured["user"] == "root"
+    assert list(staging.iterdir()) == []  # temp staging file cleaned up
+
+
+async def test_upload_fallback_error(fake_binary: str, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
+    staging = tmp_path / "staging"
+    staging.mkdir()
+    provider, _rec = _make_provider(monkeypatch, lambda argv: (0, "", ""))
+    handle = _make_handle(staging)
+
+    async def fake_exec(h: SandboxHandle, command: str, *, user: Any = None, **_: Any) -> SandboxExecResult:
+        return SandboxExecResult(stdout="", stderr="denied", return_code=1)
+
+    monkeypatch.setattr(provider, "exec", fake_exec)
+
+    src = tmp_path / "src.txt"
+    src.write_bytes(b"payload")
+    with pytest.raises(RuntimeError, match="upload"):
+        await provider.upload_file(handle, src, "/etc/app.conf")
+    assert list(staging.iterdir()) == []
+
+
+async def test_download_fast_path(fake_binary: str, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
+    staging = tmp_path / "staging"
+    (staging / "out").mkdir(parents=True)
+    (staging / "out" / "r.txt").write_bytes(b"result")
+    provider, rec = _make_provider(monkeypatch, lambda argv: (0, "", ""))
+    handle = _make_handle(staging)
+
+    dest = tmp_path / "local.txt"
+    await provider.download_file(handle, "/sandbox/out/r.txt", dest)
+
+    assert dest.read_bytes() == b"result"
+    assert rec.calls == []
+
+
+async def test_download_fallback(fake_binary: str, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
+    staging = tmp_path / "staging"
+    staging.mkdir()
+    provider, _rec = _make_provider(monkeypatch, lambda argv: (0, "", ""))
+    handle = _make_handle(staging)
+
+    async def fake_exec(h: SandboxHandle, command: str, *, user: Any = None, **_: Any) -> SandboxExecResult:
+        # Simulate the in-container `cp` by writing the host side of the staging file.
+        container_tmp = shlex.split(command)[-1]
+        (staging / Path(container_tmp).name).write_bytes(b"remote-bytes")
+        return SandboxExecResult(stdout="", stderr="", return_code=0)
+
+    monkeypatch.setattr(provider, "exec", fake_exec)
+
+    dest = tmp_path / "local.txt"
+    await provider.download_file(handle, "/var/log/app.log", dest)
+
+    assert dest.read_bytes() == b"remote-bytes"
+    assert list(staging.iterdir()) == []  # temp staging file cleaned up
+
+
+async def test_download_fallback_error(fake_binary: str, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
+    staging = tmp_path / "staging"
+    staging.mkdir()
+    provider, _rec = _make_provider(monkeypatch, lambda argv: (0, "", ""))
+    handle = _make_handle(staging)
+
+    async def fake_exec(h: SandboxHandle, command: str, *, user: Any = None, **_: Any) -> SandboxExecResult:
+        return SandboxExecResult(stdout="", stderr="missing", return_code=1)
+
+    monkeypatch.setattr(provider, "exec", fake_exec)
+
+    with pytest.raises(RuntimeError, match="download"):
+        await provider.download_file(handle, "/var/log/app.log", tmp_path / "local.txt")
+    assert list(staging.iterdir()) == []
+
+
+# --------------------------------------------------------------------------- #
+# status
+# --------------------------------------------------------------------------- #
+async def test_status_running(fake_binary: str, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
+    out = json.dumps({"instances": [{"instance": "nemo-gym-x"}]})
+    provider, _rec = _make_provider(monkeypatch, lambda argv: (0, out, ""))
+    assert await provider.status(_make_handle(tmp_path)) is SandboxStatus.RUNNING
+
+
+async def test_status_stopped_when_absent(fake_binary: str, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
+    out = json.dumps({"instances": [{"instance": "other"}]})
+    provider, _rec = _make_provider(monkeypatch, lambda argv: (0, out, ""))
+    assert await provider.status(_make_handle(tmp_path)) is SandboxStatus.STOPPED
+
+
+async def test_status_explicit_state(fake_binary: str, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
+    out = json.dumps({"instances": [{"instance": "nemo-gym-x", "state": "stopped"}]})
+    provider, _rec = _make_provider(monkeypatch, lambda argv: (0, out, ""))
+    assert await provider.status(_make_handle(tmp_path)) is SandboxStatus.STOPPED
+
+
+async def test_status_unknown_paths(fake_binary: str, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
+    handle = _make_handle(tmp_path)
+
+    provider, _rec = _make_provider(monkeypatch, lambda argv: (1, "", "err"))
+    assert await provider.status(handle) is SandboxStatus.UNKNOWN
+
+    provider, _rec = _make_provider(monkeypatch, lambda argv: (0, "not-json", ""))
+    assert await provider.status(handle) is SandboxStatus.UNKNOWN
+
+    def timeout_responder(argv: list[str]) -> tuple[int, str, str]:
+        raise TimeoutError("slow")
+
+    provider, _rec = _make_provider(monkeypatch, timeout_responder)
+    assert await provider.status(handle) is SandboxStatus.UNKNOWN
+
+
+# --------------------------------------------------------------------------- #
+# close / aclose
+# --------------------------------------------------------------------------- #
+async def test_close_success(fake_binary: str, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
+    staging = tmp_path / "staging"
+    staging.mkdir()
+    provider, rec = _make_provider(monkeypatch, lambda argv: (0, "", ""))
+    await provider.close(_make_handle(staging))
+
+    assert not staging.exists()
+    assert _contains_seq(rec.calls[0]["argv"], [FAKE_BINARY, "instance", "stop", "nemo-gym-x"])
+
+
+async def test_close_missing_instance_is_success(
+    fake_binary: str, monkeypatch: pytest.MonkeyPatch, tmp_path: Path
+) -> None:
+    staging = tmp_path / "staging"
+    staging.mkdir()
+    provider, _rec = _make_provider(monkeypatch, lambda argv: (1, "", "no instance found"))
+    await provider.close(_make_handle(staging))  # does not raise
+    assert not staging.exists()
+
+
+async def test_close_real_failure_raises_but_cleans_up(
+    fake_binary: str, monkeypatch: pytest.MonkeyPatch, tmp_path: Path
+) -> None:
+    staging = tmp_path / "staging"
+    staging.mkdir()
+    provider, _rec = _make_provider(monkeypatch, lambda argv: (1, "", "permission denied"))
+    with pytest.raises(RuntimeError, match="stop failed"):
+        await provider.close(_make_handle(staging))
+    assert not staging.exists()
+
+
+async def test_close_timeout_raises(fake_binary: str, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
+    staging = tmp_path / "staging"
+    staging.mkdir()
+
+    def responder(argv: list[str]) -> tuple[int, str, str]:
+        raise TimeoutError("slow")
+
+    provider, _rec = _make_provider(monkeypatch, responder)
+    with pytest.raises(TimeoutError):
+        await provider.close(_make_handle(staging))
+    assert not staging.exists()
+
+
+async def test_close_staging_removal_failure_is_logged(
+    fake_binary: str, monkeypatch: pytest.MonkeyPatch, tmp_path: Path, caplog: pytest.LogCaptureFixture
+) -> None:
+    staging = tmp_path / "staging"
+    staging.mkdir()
+    provider, _rec = _make_provider(monkeypatch, lambda argv: (0, "", ""))
+
+    def boom(path: Any, ignore_errors: bool = False) -> None:
+        raise OSError("locked")
+
+    monkeypatch.setattr(apptainer_provider.shutil, "rmtree", boom)
+    with caplog.at_level("WARNING"):
+        await provider.close(_make_handle(staging))  # does not raise
+    assert "failed to remove staging dir" in caplog.text
+
+
+async def test_aclose(fake_binary: str, monkeypatch: pytest.MonkeyPatch) -> None:
+    provider, _rec = _make_provider(monkeypatch, lambda argv: (0, "", ""))
+    assert await provider.aclose() is None
+
+
+# --------------------------------------------------------------------------- #
+# readiness probe
+# --------------------------------------------------------------------------- #
+async def test_verify_skipped_when_command_none(
+    fake_binary: str, monkeypatch: pytest.MonkeyPatch, tmp_path: Path
+) -> None:
+    provider, _rec = _make_provider(monkeypatch, lambda argv: (0, "", ""), probe={"command": None})
+
+    async def boom(*_a: Any, **_k: Any) -> SandboxExecResult:
+        raise AssertionError("exec should not be called when probe is disabled")
+
+    monkeypatch.setattr(provider, "exec", boom)
+    await provider._verify_created_handle(_make_handle(tmp_path))  # returns without exec
+
+
+async def test_verify_polls_until_stable(fake_binary: str, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
+    provider, _rec = _make_provider(
+        monkeypatch,
+        lambda argv: (0, "", ""),
+        probe={"deadline_s": 5, "stable_count": 2, "stable_delay_s": 0},
+    )
+
+    results = iter(
+        [
+            SandboxExecResult(stdout="", stderr="warming up", return_code=1),
+            SandboxExecResult(stdout=apptainer_provider.READY_PROBE_EXPECTED, stderr="", return_code=0),
+            SandboxExecResult(stdout=apptainer_provider.READY_PROBE_EXPECTED, stderr="", return_code=0),
+        ]
+    )
+
+    async def fake_exec(*_a: Any, **_k: Any) -> SandboxExecResult:
+        return next(results)
+
+    monkeypatch.setattr(provider, "exec", fake_exec)
+    await provider._verify_created_handle(_make_handle(tmp_path))  # 2 consecutive passes
+
+
+async def test_verify_deadline_exceeded(fake_binary: str, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
+    provider, _rec = _make_provider(
+        monkeypatch,
+        lambda argv: (0, "", ""),
+        probe={"deadline_s": 0.01, "stable_delay_s": 0.02},
+    )
+
+    async def always_fail(*_a: Any, **_k: Any) -> SandboxExecResult:
+        return SandboxExecResult(stdout="", stderr="nope", return_code=1)
+
+    monkeypatch.setattr(provider, "exec", always_fail)
+    with pytest.raises(apptainer_provider.ApptainerCreateVerificationError, match="within"):
+        await provider._verify_created_handle(_make_handle(tmp_path))
+
+
+# --------------------------------------------------------------------------- #
+# _run against real lightweight binaries (exercises subprocess plumbing)
+# --------------------------------------------------------------------------- #
+@pytest.mark.skipif(shutil.which("echo") is None, reason="echo not available")
+async def test_run_real_echo(fake_binary: str, monkeypatch: pytest.MonkeyPatch) -> None:
+    provider = apptainer_provider.ApptainerProvider()
+    code, out, err = await provider._run([shutil.which("echo"), "hi"], timeout_s=10)
+    assert code == 0
+    assert out.strip() == "hi"
+    assert err == ""
+
+
+@pytest.mark.skipif(shutil.which("cat") is None, reason="cat not available")
+async def test_run_real_stdin(fake_binary: str) -> None:
+    provider = apptainer_provider.ApptainerProvider()
+    code, out, _err = await provider._run([shutil.which("cat")], timeout_s=10, stdin=b"piped")
+    assert code == 0
+    assert out == "piped"
+
+
+@pytest.mark.skipif(shutil.which("sleep") is None, reason="sleep not available")
+async def test_run_real_timeout(fake_binary: str) -> None:
+    provider = apptainer_provider.ApptainerProvider()
+    with pytest.raises(TimeoutError):
+        await provider._run([shutil.which("sleep"), "5"], timeout_s=0.1)

From ee2153a9010ef030cd23828f15093111c29580db Mon Sep 17 00:00:00 2001
From: Arti Jain <artij@nvidia.com>
Date: Wed, 24 Jun 2026 00:16:40 +0000
Subject: [PATCH 2/7] update to allow piping input via stdin for longer inputs

Signed-off-by: Arti Jain <artij@nvidia.com>
---
 nemo_gym/sandbox/providers/apptainer/README.md   | 15 +++++++++++++++
 nemo_gym/sandbox/providers/apptainer/provider.py |  7 ++++++-
 tests/unit_tests/test_apptainer_provider.py      |  6 ++++++
 3 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/nemo_gym/sandbox/providers/apptainer/README.md b/nemo_gym/sandbox/providers/apptainer/README.md
index 21f3279465..fd902f3592 100644
--- a/nemo_gym/sandbox/providers/apptainer/README.md
+++ b/nemo_gym/sandbox/providers/apptainer/README.md
@@ -184,6 +184,21 @@ The neutral `user` argument to `exec` maps onto Apptainer like this:
 | `"root"` or `0` | Add `--fakeroot` (gated by `exec.fakeroot_for_root`). |
 | other name / uid | Add `--fakeroot` and wrap the command in `su -s /bin/sh -c '<cmd>' <user>`. |
 
+### Piping input via `stdin`
+
+`exec` accepts an optional `stdin: bytes | None` argument (an apptainer-provider
+extension beyond the neutral `SandboxProvider` protocol). When set, the bytes are piped
+to the command's standard input:
+
+```python
+await provider.exec(handle, "cat > /work/out.txt", stdin=b"large payload")
+```
+
+This is the right way to pass inputs that would exceed the kernel's per-argument size
+limit (`MAX_ARG_STRLEN`, ~128KB) — e.g. a long prompt — which would otherwise fail with
+`E2BIG` if passed as a command-line argument. It defaults to `None`, so existing callers
+are unaffected.
+
 ### Resource limits
 
 `SandboxResources` is translated to cgroup flags on `instance start`:
diff --git a/nemo_gym/sandbox/providers/apptainer/provider.py b/nemo_gym/sandbox/providers/apptainer/provider.py
index ff7d280464..339e833871 100644
--- a/nemo_gym/sandbox/providers/apptainer/provider.py
+++ b/nemo_gym/sandbox/providers/apptainer/provider.py
@@ -385,6 +385,7 @@ async def exec(
         env: dict[str, str] | None = None,
         timeout_s: int | float | None = None,
         user: str | int | None = None,
+        stdin: bytes | None = None,
     ) -> SandboxExecResult:
         """Run a command inside the instance.
 
@@ -392,6 +393,10 @@ async def exec(
         - None            -> run as the default (launching) user.
         - "root" / 0      -> add --fakeroot (root inside the container).
         - other user/uid  -> --fakeroot + wrap in ``su`` to switch to that user.
+
+        ``stdin``, when given, is piped to the command's standard input. This is an
+        apptainer-provider extension to the base protocol, useful for feeding large
+        inputs (e.g. prompts) that would exceed the kernel's argv length limit.
         """
         inst = handle.raw
 
@@ -418,7 +423,7 @@ async def exec(
         effective_timeout = timeout_s if timeout_s is not None else self._exec_config.default_timeout_s
 
         try:
-            code, out, err = await self._run(argv, timeout_s=effective_timeout)
+            code, out, err = await self._run(argv, timeout_s=effective_timeout, stdin=stdin)
         except TimeoutError as e:
             return SandboxExecResult(
                 stdout=None,
diff --git a/tests/unit_tests/test_apptainer_provider.py b/tests/unit_tests/test_apptainer_provider.py
index c595738200..e705d95191 100644
--- a/tests/unit_tests/test_apptainer_provider.py
+++ b/tests/unit_tests/test_apptainer_provider.py
@@ -336,6 +336,12 @@ async def test_exec_user_mapping(
         assert argv[-1] == "whoami"
 
 
+async def test_exec_passes_stdin(fake_binary: str, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
+    provider, rec = _make_provider(monkeypatch, lambda argv: (0, "ok", ""))
+    await provider.exec(_make_handle(tmp_path), "cat", stdin=b"prompt-bytes")
+    assert rec.calls[0]["stdin"] == b"prompt-bytes"
+
+
 async def test_exec_timeout(fake_binary: str, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
     def responder(argv: list[str]) -> tuple[int, str, str]:
         raise TimeoutError("too slow")

From be7cf87d611b1ddbeae78f61c3faba2da088f091 Mon Sep 17 00:00:00 2001
From: Arti Jain <artij@nvidia.com>
Date: Wed, 24 Jun 2026 19:57:28 +0000
Subject: [PATCH 3/7] added a function that accepts one of many binds via
 provider_options.

Signed-off-by: Arti Jain <artij@nvidia.com>
---
 .../sandbox/providers/apptainer/README.md     |  1 +
 .../sandbox/providers/apptainer/provider.py   | 25 +++++++++-
 tests/unit_tests/test_apptainer_provider.py   | 49 +++++++++++++++++++
 3 files changed, 73 insertions(+), 2 deletions(-)

diff --git a/nemo_gym/sandbox/providers/apptainer/README.md b/nemo_gym/sandbox/providers/apptainer/README.md
index fd902f3592..efaedc0eb2 100644
--- a/nemo_gym/sandbox/providers/apptainer/README.md
+++ b/nemo_gym/sandbox/providers/apptainer/README.md
@@ -147,6 +147,7 @@ The spec is provider-neutral; the Apptainer provider uses these fields:
 | `workdir` | Default working directory for `exec` (applied as `--pwd`). |
 | `files` | Seed files written into the sandbox at `start()` (handled by the sandbox API via `upload`). |
 | `resources` | Mapped to cgroup flags (see below). |
+| `provider_options` | `binds`: a `"src:dst[:opts]"` string or list of them — extra per-sandbox `--bind` mounts added at instance start (on top of the staging mount and `exec.default_binds`). |
 | `ttl_s` | **Not supported** — ignored with a warning. Tear down via `stop()`/`close()` instead. |
 
 ## How it works
diff --git a/nemo_gym/sandbox/providers/apptainer/provider.py b/nemo_gym/sandbox/providers/apptainer/provider.py
index 339e833871..0b89f94f24 100644
--- a/nemo_gym/sandbox/providers/apptainer/provider.py
+++ b/nemo_gym/sandbox/providers/apptainer/provider.py
@@ -191,6 +191,22 @@ def _is_runtime_failure(stderr: str) -> bool:
     return any(marker in low for marker in APPTAINER_RUNTIME_ERROR_MARKERS)
 
 
+def _coerce_binds(value: Any) -> list[str]:
+    """Normalize ``spec.provider_options['binds']`` into a list of bind strings.
+
+    Accepts a single ``"src:dst[:opts]"`` string or a list of them. These are
+    extra per-sandbox bind mounts, added on top of the staging mount and the
+    provider-level ``exec.default_binds``.
+    """
+    if value is None:
+        return []
+    if isinstance(value, str):
+        return [value]
+    if isinstance(value, (list, tuple)):
+        return [str(v) for v in value]
+    raise ApptainerCreateError(f"provider_options['binds'] must be a string or list, got {type(value).__name__}")
+
+
 class ApptainerProvider:
     """Sandbox provider backed by the local Apptainer CLI."""
 
@@ -256,8 +272,8 @@ async def create(self, spec: SandboxSpec) -> SandboxHandle:
            mount_point = self._create_config.mount_point, generate a unique
            name = INSTANCE_NAME_PREFIX + uuid4().hex.
         4. Build argv: [binary, "instance", "start", <--bind staging:mount_point>,
-           <config default_binds>, <--env ...>, _resource_flags(spec.resources),
-           <extra_start_args>, image, name].
+           <config default_binds>, <spec.provider_options["binds"]>, <--env ...>,
+           _resource_flags(spec.resources), <extra_start_args>, image, name].
         5. await self._run(argv, timeout_s=self._create_config.start_timeout_s);
            on non-zero return, clean up the staging dir and raise
            ApptainerCreateError(stderr).
@@ -276,6 +292,9 @@ async def create(self, spec: SandboxSpec) -> SandboxHandle:
         if image is None:
             raise ApptainerCreateError("spec.image is required for the apptainer provider")
 
+        # Extra per-sandbox bind mounts (validated before we allocate anything).
+        extra_binds = _coerce_binds(spec.provider_options.get("binds"))
+
         # host staging dir (bind-mounted in), mount point, unique name.
         mount_point = self._create_config.mount_point
         staging_dir = Path(
@@ -288,6 +307,8 @@ async def create(self, spec: SandboxSpec) -> SandboxHandle:
         argv += ["--bind", f"{staging_dir}:{mount_point}"]
         for bind in self._exec_config.default_binds:
             argv += ["--bind", bind]
+        for bind in extra_binds:
+            argv += ["--bind", bind]
         for key, value in spec.env.items():
             argv += ["--env", f"{key}={value}"]
         argv += _resource_flags(spec.resources)
diff --git a/tests/unit_tests/test_apptainer_provider.py b/tests/unit_tests/test_apptainer_provider.py
index e705d95191..09aa80bb98 100644
--- a/tests/unit_tests/test_apptainer_provider.py
+++ b/tests/unit_tests/test_apptainer_provider.py
@@ -226,6 +226,55 @@ def responder(argv: list[str]) -> tuple[int, str, str]:
     assert rec.calls[1]["timeout_s"] == 30
 
 
+async def test_create_extra_binds_from_provider_options(
+    fake_binary: str, monkeypatch: pytest.MonkeyPatch, tmp_path: Path
+) -> None:
+    staging = tmp_path / "staging"
+    monkeypatch.setattr(apptainer_provider.tempfile, "mkdtemp", lambda prefix: str(staging.mkdir() or staging))
+
+    def responder(argv: list[str]) -> tuple[int, str, str]:
+        if "exec" in argv:
+            return (0, apptainer_provider.READY_PROBE_EXPECTED, "")
+        return (0, "", "")
+
+    provider, rec = _make_provider(monkeypatch, responder, exec={"default_binds": ["/data:/data"]})
+
+    spec = SandboxSpec(
+        image="docker://img",
+        provider_options={"binds": ["/host/a:/code/a", "/host/b:/code/b:ro"]},
+    )
+    await provider.create(spec)
+
+    start_argv = rec.calls[0]["argv"]
+    # staging + default_binds + the two per-sandbox binds are all present
+    assert _contains_seq(start_argv, ["--bind", f"{staging}:/sandbox"])
+    assert _contains_seq(start_argv, ["--bind", "/data:/data"])
+    assert _contains_seq(start_argv, ["--bind", "/host/a:/code/a"])
+    assert _contains_seq(start_argv, ["--bind", "/host/b:/code/b:ro"])
+
+
+async def test_create_extra_binds_accepts_single_string(
+    fake_binary: str, monkeypatch: pytest.MonkeyPatch, tmp_path: Path
+) -> None:
+    staging = tmp_path / "staging"
+    monkeypatch.setattr(apptainer_provider.tempfile, "mkdtemp", lambda prefix: str(staging.mkdir() or staging))
+
+    def responder(argv: list[str]) -> tuple[int, str, str]:
+        if "exec" in argv:
+            return (0, apptainer_provider.READY_PROBE_EXPECTED, "")
+        return (0, "", "")
+
+    provider, rec = _make_provider(monkeypatch, responder)
+    await provider.create(SandboxSpec(image="docker://img", provider_options={"binds": "/host/x:/code/x"}))
+    assert _contains_seq(rec.calls[0]["argv"], ["--bind", "/host/x:/code/x"])
+
+
+async def test_create_extra_binds_invalid_type_raises(fake_binary: str, monkeypatch: pytest.MonkeyPatch) -> None:
+    provider, _rec = _make_provider(monkeypatch, lambda argv: (0, "", ""))
+    with pytest.raises(apptainer_provider.ApptainerCreateError, match="must be a string or list"):
+        await provider.create(SandboxSpec(image="docker://img", provider_options={"binds": 123}))
+
+
 async def test_create_requires_image(fake_binary: str, monkeypatch: pytest.MonkeyPatch) -> None:
     provider, _rec = _make_provider(monkeypatch, lambda argv: (0, "", ""))
     with pytest.raises(apptainer_provider.ApptainerCreateError, match="image is required"):

From d29eee70874487005e2e6e4ee8bb208532b00241 Mon Sep 17 00:00:00 2001
From: Arti Jain <artij@nvidia.com>
Date: Wed, 24 Jun 2026 22:40:37 +0000
Subject: [PATCH 4/7] added support for daemon running

Signed-off-by: Arti Jain <artij@nvidia.com>
---
 .../sandbox/providers/apptainer/README.md     | 20 ++++++++
 .../sandbox/providers/apptainer/provider.py   | 47 ++++++++++++++++++-
 tests/unit_tests/test_apptainer_provider.py   | 42 ++++++++++++++++-
 3 files changed, 106 insertions(+), 3 deletions(-)

diff --git a/nemo_gym/sandbox/providers/apptainer/README.md b/nemo_gym/sandbox/providers/apptainer/README.md
index efaedc0eb2..8344f000da 100644
--- a/nemo_gym/sandbox/providers/apptainer/README.md
+++ b/nemo_gym/sandbox/providers/apptainer/README.md
@@ -164,6 +164,26 @@ The spec is provider-neutral; the Apptainer provider uses these fields:
 Instances are named `nemo-gym-<uuid>` and persist across `exec` calls, so state written
 by one command is visible to the next — agents rely on this.
 
+### Why `create` runs `instance start` in "daemonize" mode
+
+`apptainer instance start` is different from every other command the provider runs: it
+**launches a long-lived background instance**, then the foreground process returns. The
+started instance inherits the foreground process's stdout/stderr. If we captured output
+the normal way (`communicate()`, which reads the pipes until they close), the call would
+block until the *instance* exits — i.e. it would appear to hang for the full
+`start_timeout_s` (default 600s) even though the container came up in ~1s.
+
+To avoid this, `_run` takes a `daemonize` flag:
+
+| `daemonize` | Used by | Behavior |
+|---|---|---|
+| `False` (default) | `exec`, `status`, `close`, file copies | Pipe-captures stdout/stderr and waits for the command to finish. Supports `stdin`. |
+| `True` | `create` (`instance start`) only | Captures stdout/stderr to temp files (which the lingering instance may inherit harmlessly) and waits only for the **foreground** process to exit. No `stdin`. |
+
+`daemonize=True` is **not** a general default — it intentionally drops `stdin` support (a
+background launcher has nothing to read), which commands like `exec` need to pipe in large
+prompts. Only flip it on for a command that spawns a process which outlives the call.
+
 ### File transfer: a shared bind-mounted directory
 
 On create, the provider makes a temporary host directory and bind-mounts it into the
diff --git a/nemo_gym/sandbox/providers/apptainer/provider.py b/nemo_gym/sandbox/providers/apptainer/provider.py
index 0b89f94f24..a3143d28e4 100644
--- a/nemo_gym/sandbox/providers/apptainer/provider.py
+++ b/nemo_gym/sandbox/providers/apptainer/provider.py
@@ -231,14 +231,27 @@ async def _run(
         *,
         timeout_s: float | None,
         stdin: bytes | None = None,
+        daemonize: bool = False,
     ) -> tuple[int, str, str]:
         """Run an apptainer CLI command. Returns (return_code, stdout, stderr).
 
         Enforces timeout via asyncio.wait_for and kills the whole process group
         on timeout so child processes do not linger. Bounds concurrency with a
         shared semaphore. Decodes output with errors="replace".
+
+        Set ``daemonize=True`` for commands that fork a long-lived background
+        process (``apptainer instance start``). Such commands hand the started
+        instance a copy of the child's stdout/stderr, so reading those pipes to
+        EOF (``communicate()``) blocks until the *instance* exits — i.e. the call
+        appears to hang until ``timeout_s`` even though the foreground process
+        finished in under a second. In that mode we capture output to temp files
+        (which the instance may inherit harmlessly) and only wait for the
+        foreground process to exit.
         """
         async with self._semaphore:
+            if daemonize:
+                return await self._run_daemonizing(argv, timeout_s=timeout_s)
+
             proc = await asyncio.create_subprocess_exec(
                 *argv,
                 stdin=asyncio.subprocess.PIPE if stdin is not None else None,
@@ -261,6 +274,38 @@ async def _run(
             return_code = proc.returncode if proc.returncode is not None else SANDBOX_RUNTIME_RETURN_CODE
             return return_code, stdout_b.decode(errors="replace"), stderr_b.decode(errors="replace")
 
+    async def _run_daemonizing(self, argv: list[str], *, timeout_s: float | None) -> tuple[int, str, str]:
+        """Run a command that daemonizes a child (e.g. ``apptainer instance start``).
+
+        Captures stdout/stderr to temp files instead of pipes so the long-lived
+        instance inheriting those descriptors cannot wedge the read, then waits
+        only for the foreground process to exit.
+        """
+        with tempfile.TemporaryFile() as out_f, tempfile.TemporaryFile() as err_f:
+            proc = await asyncio.create_subprocess_exec(
+                *argv,
+                stdin=asyncio.subprocess.DEVNULL,
+                stdout=out_f,
+                stderr=err_f,
+                start_new_session=True,
+            )
+            try:
+                await asyncio.wait_for(proc.wait(), timeout=timeout_s)
+            except asyncio.TimeoutError as e:
+                with contextlib.suppress(ProcessLookupError):
+                    os.killpg(os.getpgid(proc.pid), signal.SIGKILL)
+                with contextlib.suppress(Exception):
+                    await proc.wait()
+                raise TimeoutError(f"apptainer command timed out after {timeout_s:g}s: {argv}") from e
+
+            out_f.seek(0)
+            err_f.seek(0)
+            stdout_b = out_f.read()
+            stderr_b = err_f.read()
+
+        return_code = proc.returncode if proc.returncode is not None else SANDBOX_RUNTIME_RETURN_CODE
+        return return_code, stdout_b.decode(errors="replace"), stderr_b.decode(errors="replace")
+
     async def create(self, spec: SandboxSpec) -> SandboxHandle:
         """Start an apptainer instance and return a ready handle.
 
@@ -317,7 +362,7 @@ async def create(self, spec: SandboxSpec) -> SandboxHandle:
 
         # start the instance; clean up the staging dir on any failure.
         try:
-            code, _out, err = await self._run(argv, timeout_s=self._create_config.start_timeout_s)
+            code, _out, err = await self._run(argv, timeout_s=self._create_config.start_timeout_s, daemonize=True)
         except TimeoutError as e:
             shutil.rmtree(staging_dir, ignore_errors=True)
             raise ApptainerCreateError(f"apptainer instance start timed out for image={image!r}: {e}") from e
diff --git a/tests/unit_tests/test_apptainer_provider.py b/tests/unit_tests/test_apptainer_provider.py
index 09aa80bb98..2e5cc0b1a3 100644
--- a/tests/unit_tests/test_apptainer_provider.py
+++ b/tests/unit_tests/test_apptainer_provider.py
@@ -45,9 +45,9 @@ def __init__(self, responder: Callable[[list[str]], tuple[int, str, str]]) -> No
         self._responder = responder
 
     async def __call__(
-        self, argv: list[str], *, timeout_s: float | None, stdin: bytes | None = None
+        self, argv: list[str], *, timeout_s: float | None, stdin: bytes | None = None, daemonize: bool = False
     ) -> tuple[int, str, str]:
-        self.calls.append({"argv": list(argv), "timeout_s": timeout_s, "stdin": stdin})
+        self.calls.append({"argv": list(argv), "timeout_s": timeout_s, "stdin": stdin, "daemonize": daemonize})
         return self._responder(list(argv))
 
 
@@ -715,3 +715,41 @@ async def test_run_real_timeout(fake_binary: str) -> None:
     provider = apptainer_provider.ApptainerProvider()
     with pytest.raises(TimeoutError):
         await provider._run([shutil.which("sleep"), "5"], timeout_s=0.1)
+
+
+@pytest.mark.skipif(shutil.which("sh") is None, reason="sh not available")
+async def test_run_daemonizing_returns_despite_lingering_child(fake_binary: str) -> None:
+    """Regression: a backgrounded child inheriting stdout must not wedge the read.
+
+    Mirrors ``apptainer instance start``, which forks a long-lived instance that
+    keeps the child's stdout/stderr open. The pipe-based path would block on EOF
+    until timeout; the daemonizing path waits only for the foreground process.
+    """
+    provider = apptainer_provider.ApptainerProvider()
+    # Foreground prints and exits immediately; the backgrounded `sleep` holds the
+    # inherited stdout fd open well past the (generous) timeout.
+    argv = [shutil.which("sh"), "-c", "sleep 30 & printf started"]
+    code, out, _err = await provider._run(argv, timeout_s=10, daemonize=True)
+    assert code == 0
+    assert out == "started"
+
+
+async def test_create_uses_daemonize_for_instance_start(
+    fake_binary: str, monkeypatch: pytest.MonkeyPatch, tmp_path: Path
+) -> None:
+    staging = tmp_path / "staging"
+    monkeypatch.setattr(apptainer_provider.tempfile, "mkdtemp", lambda prefix: str(staging.mkdir() or staging))
+
+    def responder(argv: list[str]) -> tuple[int, str, str]:
+        if "exec" in argv:
+            return (0, apptainer_provider.READY_PROBE_EXPECTED, "")
+        return (0, "", "")
+
+    provider, rec = _make_provider(monkeypatch, responder)
+    await provider.create(SandboxSpec(image="docker://ubuntu:22.04"))
+
+    start_call = next(c for c in rec.calls if "start" in c["argv"])
+    assert start_call["daemonize"] is True
+    # The readiness probe (exec) must NOT use the daemonizing path.
+    exec_calls = [c for c in rec.calls if "exec" in c["argv"]]
+    assert exec_calls and all(c["daemonize"] is False for c in exec_calls)

From 41c86d09fde4356fabb7aec3a3418b806f800ebb Mon Sep 17 00:00:00 2001
From: Arti Jain <artij@nvidia.com>
Date: Thu, 25 Jun 2026 17:55:20 +0000
Subject: [PATCH 5/7] feat(claude_code_agent): surface num_turns in parsed
 metadata

Parse Claude Code's authoritative num_turns from the stream-json result
event and include it in the returned metadata.

Signed-off-by: Arti Jain <artij@nvidia.com>
---
 responses_api_agents/claude_code_agent/app.py          |  9 ++++++++-
 .../claude_code_agent/tests/test_app.py                | 10 ++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/responses_api_agents/claude_code_agent/app.py b/responses_api_agents/claude_code_agent/app.py
index 2d41e24c1d..3acf5ca06a 100644
--- a/responses_api_agents/claude_code_agent/app.py
+++ b/responses_api_agents/claude_code_agent/app.py
@@ -86,6 +86,7 @@ def parse_stream_json(stdout: str) -> tuple[list[Any], dict]:
     buffered_think: str | None = None
     total_input = 0
     total_output = 0
+    num_turns: Optional[int] = None
 
     for event in raw_events:
         etype = event.get("type")
@@ -94,6 +95,9 @@ def parse_stream_json(stdout: str) -> tuple[list[Any], dict]:
             usage = event.get("usage") or {}
             total_input += int(usage.get("input_tokens") or 0)
             total_output += int(usage.get("output_tokens") or 0)
+            # Claude Code's authoritative turn counter (what --max-turns bounds).
+            if event.get("num_turns") is not None:
+                num_turns = int(event["num_turns"])
 
         elif etype == "assistant":
             message = event.get("message", {})
@@ -168,7 +172,10 @@ def parse_stream_json(stdout: str) -> tuple[list[Any], dict]:
                     )
                 )
 
-    return output_items, {"input_tokens": total_input, "output_tokens": total_output}
+    metadata: dict = {"input_tokens": total_input, "output_tokens": total_output}
+    if num_turns is not None:
+        metadata["num_turns"] = num_turns
+    return output_items, metadata
 
 
 def _extract_instruction(body_input) -> tuple[str, Optional[str]]:
diff --git a/responses_api_agents/claude_code_agent/tests/test_app.py b/responses_api_agents/claude_code_agent/tests/test_app.py
index 5c234f9092..dc9f6caeb0 100644
--- a/responses_api_agents/claude_code_agent/tests/test_app.py
+++ b/responses_api_agents/claude_code_agent/tests/test_app.py
@@ -345,6 +345,16 @@ def test_result_event_accumulates_usage(self) -> None:
         assert usage["input_tokens"] == 100
         assert usage["output_tokens"] == 50
 
+    def test_result_event_exposes_num_turns(self) -> None:
+        result = _event("result", num_turns=9, usage={"input_tokens": 1, "output_tokens": 1})
+        _, usage = parse_stream_json(result)
+        assert usage["num_turns"] == 9
+
+    def test_num_turns_absent_when_no_result_event(self) -> None:
+        assistant = self._assistant([{"type": "text", "text": "hi"}])
+        _, usage = parse_stream_json(assistant)
+        assert "num_turns" not in usage
+
 
 class TestConfigYaml:
     def test_module_parses(self) -> None:

From 7d62f44bc11b667b1d65d3761089ce3ab64a034f Mon Sep 17 00:00:00 2001
From: Arti Jain <artij@nvidia.com>
Date: Thu, 25 Jun 2026 17:58:30 +0000
Subject: [PATCH 6/7] feat(cvdp): Apptainer-based CVDP resources server and
 agent

Add the CVDP code-generation environment built on the Apptainer sandbox
provider: resources server with harness execution, non-agentic and
agentic cvdp_agent harnesses, configs, tests, and example dataset.

Signed-off-by: Arti Jain <artij@nvidia.com>
---
 .gitignore                                    |   3 +
 README.md                                     |   1 -
 resources_servers/cvdp/README.md              | 114 ++-
 resources_servers/cvdp/app.py                 | 577 +--------------
 resources_servers/cvdp/configs/cvdp.yaml      |  28 +-
 .../cvdp/data/example_agentic.jsonl           |   5 +
 resources_servers/cvdp/harness.py             | 606 ++++++++++++++++
 resources_servers/cvdp/tests/test_app.py      | 316 ++++++---
 .../cvdp_agent/agentic_app.py                 | 654 ++++++++++++++++++
 .../cvdp_agent/configs/cvdp_agent.yaml        |  13 +
 .../configs/cvdp_agent_agentic.yaml           |  29 +
 .../cvdp_agent/tests/test_agentic_app.py      | 504 ++++++++++++++
 12 files changed, 2173 insertions(+), 677 deletions(-)
 create mode 100644 resources_servers/cvdp/data/example_agentic.jsonl
 create mode 100644 resources_servers/cvdp/harness.py
 create mode 100644 responses_api_agents/cvdp_agent/agentic_app.py
 create mode 100644 responses_api_agents/cvdp_agent/configs/cvdp_agent_agentic.yaml
 create mode 100644 responses_api_agents/cvdp_agent/tests/test_agentic_app.py

diff --git a/.gitignore b/.gitignore
index 522c150ba8..c78e6df9bc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -237,3 +237,6 @@ env.yaml
 
 # Backup files
 *.backup
+
+# Claude Code agent runtime artifacts
+**/.claude_node/
diff --git a/README.md b/README.md
index 71d6e511af..560b6a4811 100644
--- a/README.md
+++ b/README.md
@@ -197,7 +197,6 @@ The Dataset column links to publicly available datasets (e.g., on HuggingFace).
 | Code Gen                                      | coding                | Model must submit the right code to solve a problem                                                                                                                                                                          | Improve competitive coding capabilities                                                                                               | ✓     | ✓          | Apache 2.0                                                | <a href='resources_servers/code_gen/configs/code_gen.yaml'>code_gen.yaml</a>                                                                                                                                                | <a href='https://huggingface.co/datasets/nvidia/nemotron-RL-coding-competitive_coding'>nemotron-RL-coding-competitive_coding</a>                               |
 | Competitive Coding Challenges                 | coding                | Execution of competitive programming competition questions                                                                                                                                                                   | Improve competitive coding capabilities on contest-style problems                                                                     | -     | -          | -                                                         | <a href='resources_servers/competitive_coding_challenges/configs/competitive_coding_challenges.yaml'>competitive_coding_challenges.yaml</a>                                                                                 | -                                                                                                                                                              |
 | Critpt                                        | other                 | Research-level physics problems scored by the Artificial Analysis API                                                                                                                                                        | Evaluate model performance on research-level physics reasoning                                                                        | -     | -          | -                                                         | <a href='resources_servers/critpt/configs/critpt.yaml'>critpt.yaml</a>                                                                                                                                                      | -                                                                                                                                                              |
-| Cvdp                                          | coding                | CVDP benchmark dataset for code generation                                                                                                                                                                                   | Evaluate RTL code generation capabilities                                                                                             | -     | ✓          | -                                                         | <a href='resources_servers/cvdp/configs/cvdp.yaml'>cvdp.yaml</a>                                                                                                                                                            | -                                                                                                                                                              |
 | Equivalence Llm Judge                         | agent                 | Short bash command generation questions with LLM-as-a-judge                                                                                                                                                                  | Improve foundational bash and IF capabilities                                                                                         | ✓     | ✓          | GNU General Public License v3.0                           | <a href='resources_servers/equivalence_llm_judge/configs/nl2bash-equivalency.yaml'>nl2bash-equivalency.yaml</a>                                                                                                             | -                                                                                                                                                              |
 | Equivalence Llm Judge                         | knowledge             | Short answer questions with LLM-as-a-judge                                                                                                                                                                                   | Improve knowledge-related benchmarks like GPQA / HLE                                                                                  | -     | -          | -                                                         | <a href='resources_servers/equivalence_llm_judge/configs/equivalence_llm_judge.yaml'>equivalence_llm_judge.yaml</a>                                                                                                         | -                                                                                                                                                              |
 | Equivalence Rule                              | knowledge             | Question - Answering with rule-based reward                                                                                                                                                                                  | Improve retrieval and counting capabilities                                                                                           | -     | -          | -                                                         | <a href='resources_servers/equivalence_rule/configs/lc.yaml'>lc.yaml</a>                                                                                                                                                    | -                                                                                                                                                              |
diff --git a/resources_servers/cvdp/README.md b/resources_servers/cvdp/README.md
index 67db483f47..0bd177ec2d 100644
--- a/resources_servers/cvdp/README.md
+++ b/resources_servers/cvdp/README.md
@@ -1,4 +1,4 @@
-# CVDP Benchmark 
+# CVDP Benchmark
 
 This resources server is for model evaluation purposes. It is reproducing [CVDP](https://github.com/NVlabs/cvdp_benchmark).
 
@@ -14,31 +14,116 @@ JSONL entry so the server is self-contained.
 
 Mirrors `repository.py` in the [CVDP source](https://github.com/NVlabs/cvdp_benchmark):
 
-1. Parse model response via `ModelHelpers.parse_model_response()`
+1. Obtain the candidate RTL: grade the files the agent wrote on disk (`rtl_files` in the verify request, agentic flow) when present, otherwise parse the model's text response via `ModelHelpers.parse_model_response()`
 2. Write harness files to temp workspace — applies image placeholder substitutions
 3. Write extracted RTL to `workdir/rtl/`
-4. For each service in `docker-compose.yml`, pull the Docker image as a cached SIF file and run via `apptainer exec` with `--bind` mounts for `rtl/`, `verif/`, `docs/`, `src/`, `rundir/`
+4. For each service in `docker-compose.yml`, pull the Docker image as a cached SIF file and run it through the Apptainer sandbox provider (`instance start` + `exec`) with `--bind` mounts for `rtl/`, `verif/`, `docs/`, `src/`, `rundir/`
 5. Exit code `0` across all services → reward `1.0`; any failure → reward `0.0`
 
+Code layout: `app.py` owns the HTTP `verify` contract and reward scoring; the sandbox execution (docker-compose → Apptainer translation, SIF cache, provider lifecycle) lives in `harness.py`'s `HarnessRunner`.
+
+> **Note:** Both the verification harness here and the agentic agent share the same `ApptainerProvider`. Because `apptainer instance start` launches a long-lived instance, the provider starts it in "daemonize" mode (captures output to temp files and waits only for the foreground process) so the call returns immediately instead of blocking until the instance exits. This is internal to `create()` — nothing to configure here. See the [provider README](../../nemo_gym/sandbox/providers/apptainer/README.md#why-create-runs-instance-start-in-daemonize-mode).
+
 ## Configuration
 
 
-| Field                     | Default                 | Description                                                                                       |
-| ------------------------- | ----------------------- | ------------------------------------------------------------------------------------------------- |
-| `oss_sim_image`           | `ghcr.io/hdl/sim/osvb`  | Container image for open-source simulation (Icarus)                                               |
-| `oss_pnr_image`           | `""`                    | Container image for place-and-route problems                                                      |
-| `eda_sim_image`           | `""`                    | Commercial EDA image (Cadence Xcelium etc.)                                                       |
-| `container_timeout`       | `600`                   | Seconds before an Apptainer run is killed                                                         |
-| `num_processes`           | `4`                     | Max concurrent Apptainer jobs                                                                     |
-| `sif_cache_dir`           | `~/.cache/nemo-gym/sif` | Directory for cached SIF images pulled from Docker registries                                     |
-| `harness_workspace_dir`   | `""`                    | Optional host directory where per-rollout temp workspaces are created (default: system temp)      |
+| Field                     | Default                 | Description                                                                                                                                                                                                                                                            |
+| ------------------------- | ----------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `oss_sim_image`           | `ghcr.io/hdl/sim/osvb`  | Container image for open-source simulation (Icarus)                                                                                                                                                                                                                    |
+| `oss_pnr_image`           | `""`                    | Container image for place-and-route problems                                                                                                                                                                                                                           |
+| `eda_sim_image`           | `""`                    | Commercial EDA image (Cadence Xcelium etc.)                                                                                                                                                                                                                            |
+| `container_timeout`       | `600`                   | Seconds before an Apptainer run is killed                                                                                                                                                                                                                              |
+| `num_processes`           | `4`                     | Max concurrent Apptainer jobs                                                                                                                                                                                                                                          |
+| `sif_cache_dir`           | `~/.cache/nemo-gym/sif` | Directory for cached SIF images pulled from Docker registries                                                                                                                                                                                                          |
+| `harness_workspace_dir`   | `""`                    | Optional host directory where per-rollout temp workspaces are created (default: system temp)                                                                                                                                                                           |
 | `container_tmp_bind_path` | `""`                    | If set, redirects in-container temp (e.g. `/tmp`) to per-rollout host storage and forces temp env vars (`TMPDIR`, `XCELIUM_TMPDIR`, `CDS_LOCK`, `JAVA_TOOL_OPTIONS`) — useful when default `/tmp` is too small or tools (Cadence/Java) write large temp/lock artifacts |
 
+
 **Note**: To run the commercial subset, pass the EDA image name in the yaml config file (/scratch/artij/Gym/resources_servers/cvdp/configs/cvdp.yaml).
+
 ```
 eda_sim_image: cvdp-cadence-verif:latest
 ```
 
+## Agents
+
+There are two ways to drive this resources server:
+
+- **Non-agentic** (`cvdp_agent`, `responses_api_agents/cvdp_agent/app.py`, config `configs/cvdp_agent.yaml`): the model emits the RTL directly in its text response; the server parses it out and runs the harness.
+- **Agentic** (`cvdp_agent_agentic`, `responses_api_agents/cvdp_agent/agentic_app.py`, config `configs/cvdp_agent_agentic.yaml`): runs Claude Code **inside** the EDA sim container so it can edit files on disk and self-test with the in-container EDA tools, then reports the files it wrote back to the server as `rtl_files` for grading. See `[responses_api_agents/cvdp_agent/](../../responses_api_agents/cvdp_agent/)`.
+
+### Agentic agent settings (`configs/cvdp_agent_agentic.yaml`)
+
+
+| Field                | Default                   | Description                                                                                    |
+| -------------------- | ------------------------- | ---------------------------------------------------------------------------------------------- |
+| `model`              | `${anthropic_model_name}` | Claude model used inside the container                                                         |
+| `anthropic_api_key`  | `${anthropic_api_key}`    | API key for Claude (set via env.yaml)                                                          |
+| `anthropic_base_url` | `${anthropic_base_url}`   | Anthropic-compatible endpoint                                                                  |
+| `sim_image`          | `nvidia/cvdp-sim:v1.0.0`  | EDA sim image Claude runs inside (pulled/converted to a cached `.sif`)                         |
+| `sif_path`           | `null`                    | Explicit `.sif` to use instead of pulling `sim_image`                                          |
+| `sif_cache_dir`      | `""`                      | SIF cache dir (defaults to `~/.cache/nemo-gym/sif`)                                            |
+| `claude_node_dir`    | `""`                      | Host Node+Claude prefix to bind into the container (defaults to a built-in self-contained one) |
+| `container_workdir`  | `/code`                   | Workspace mount point + cwd + `HOME` inside the container                                      |
+| `max_turns`          | `30`                      | Max Claude Code turns                                                                          |
+| `timeout`            | `900`                     | Per-task wall-clock budget (seconds)                                                           |
+| `concurrency`        | `4`                       | Max concurrent agent runs                                                                      |
+| `max_context_tokens` | `1000000`                 | Sets `CLAUDE_CODE_MAX_CONTEXT_TOKENS` inside the container                                     |
+
+
+`system_prompt`, `allowed_tools`, `disallowed_tools`, and `claude_code_version` are inherited Claude Code knobs (leave `null` for defaults).
+
+Add the Claude settings to your repo-root `env.yaml`:
+
+```yaml
+anthropic_model_name: <claude-model>
+anthropic_api_key: <your-api-key>
+anthropic_base_url: https://api.anthropic.com
+```
+
+To run the agentic variant, swap the agent config in and target the agent by name (no separate model server — the agent calls Claude itself):
+
+```bash
+ng_run "+config_paths=[resources_servers/cvdp/configs/cvdp.yaml,responses_api_agents/cvdp_agent/configs/cvdp_agent_agentic.yaml]"
+
+ng_collect_rollouts \
+    +agent_name=cvdp_agent_agentic \
+    +input_jsonl_fpath=resources_servers/cvdp/data/<dataset>.jsonl \
+    +output_jsonl_fpath=results/rollouts.jsonl \
+    +num_repeats=5 \
+    +num_samples_in_parallel=4 \
+    "+config_paths=[resources_servers/cvdp/configs/cvdp.yaml,responses_api_agents/cvdp_agent/configs/cvdp_agent_agentic.yaml]"
+```
+
+## Build the Open-Source Simulation Image
+
+If you're using the CVDP v1.1.0 data (e.g. `data/example_agentic.jsonl`), build the open-source
+simulation image **once** before collecting rollouts. CVDP v1.1.0 uses a dedicated open-source
+simulation image for non-commercial simulation tasks:
+
+```bash
+cd /path/to/cvdp_benchmark
+docker build -f docker/Dockerfile.sim -t nvidia/cvdp-sim:v1.0.0 .
+```
+
+This image provides the default `OSS_SIM_IMAGE` environment used by dataset harnesses via
+`__OSS_SIM_IMAGE__`. CVDP v1.1.0 no longer uses the legacy third-party simulation images for this
+default open-source simulation flow. The build includes cocotb 2.0.1, pytest 8.3.2, Icarus Verilog
+v13_0, Yosys yosys-0.40, and Verilator v5.038.
+
+If you tag the image differently, set the matching value in `.env`:
+
+```bash
+OSS_SIM_IMAGE=nvidia/cvdp-sim:v1.0.0
+```
+
+Open-source place-and-route tasks still use the separate `OSS_PNR_IMAGE` setting, but in CVDP
+v1.1.0 its default points at the same `nvidia/cvdp-sim:v1.0.0` image:
+
+```bash
+OSS_PNR_IMAGE=nvidia/cvdp-sim:v1.0.0
+```
+
 ## Download Dataset
 
 The data can be found [on Hugging Face](https://huggingface.co/datasets/nvidia/cvdp-benchmark-dataset). 
@@ -113,6 +198,7 @@ pre-commit install
 ```
 
 To install apptainer:
+
 ```bash
 wget https://github.com/apptainer/apptainer/releases/download/v1.3.1/apptainer_1.3.1_amd64.deb                                               
 apt install -y ./apptainer_1.3.1_amd64.deb 
@@ -123,7 +209,7 @@ apt install -y ./apptainer_1.3.1_amd64.deb
 ### Step 1 — Start servers
 
 ```bash
-ng_run "+config_paths=[resources_servers/cvdp/configs/cvdp.yaml,responses_api_models/vllm_model/configs/vllm_model.yaml]"
+ng_run "+config_paths=[resources_servers/cvdp/configs/cvdp.yaml,responses_api_agents/cvdp_agent/configs/cvdp_agent.yaml,responses_api_models/vllm_model/configs/vllm_model.yaml]"
 ```
 
 ### Step 2 — Run rollout collection
@@ -138,7 +224,7 @@ ng_collect_rollouts \
     +num_repeats=5 \
     +num_samples_in_parallel=4 \
     "+responses_create_params={max_output_tokens: 4096, temperature: 0.2, top_p: 0.7}" \
-    "+config_paths=[resources_servers/cvdp/configs/cvdp.yaml,responses_api_models/vllm_model/configs/vllm_model.yaml]"
+    "+config_paths=[resources_servers/cvdp/configs/cvdp.yaml,responses_api_agents/cvdp_agent/configs/cvdp_agent.yaml,responses_api_models/vllm_model/configs/vllm_model.yaml]"
     "+resume_from_cache=True"
 ```
 
diff --git a/resources_servers/cvdp/app.py b/resources_servers/cvdp/app.py
index a5ac4c1275..25ef46180d 100644
--- a/resources_servers/cvdp/app.py
+++ b/resources_servers/cvdp/app.py
@@ -13,18 +13,26 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+"""CVDP resources server.
+
+This module owns the *policy* side of CVDP verification: the HTTP ``verify``
+contract, the request/response schemas, and how a model/agent answer is turned
+into a reward. ``verify`` routes by category — code-comprehension tasks are
+scored with BLEU/ROUGE against a reference answer (``_verify_subjective``),
+while code-generation tasks are graded by actually running the task's test
+harness (``_verify_objective``). The objective path either grades the files an
+agent already wrote (``rtl_files``) or parses RTL out of the model's text, then
+delegates execution to :class:`resources_servers.cvdp.harness.HarnessRunner`,
+which owns the *mechanism* (docker-compose → Apptainer translation, the SIF
+cache, and the sandbox provider). Keeping execution in ``harness.py`` lets this
+file stay focused on the contract and scoring.
+"""
+
 import asyncio
-import hashlib
 import logging
-import os
-import shlex
-import signal
-import tempfile
 import time
-from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Dict, List, Optional
 
-import yaml
 from cvdp_lib.cvdp_constants import (
     BLEU_THRESHOLD,
     CODE_COMPREHENSION_CATEGORIES,
@@ -44,6 +52,7 @@
     BaseVerifyResponse,
     SimpleResourcesServer,
 )
+from resources_servers.cvdp.harness import HarnessRunner
 
 
 _helpers = ModelHelpers()
@@ -86,6 +95,9 @@ class CVDPRunRequest(BaseRunRequest):
 
 class CVDPVerifyRequest(CVDPRunRequest, BaseVerifyRequest):
     verifier_metadata: Dict[str, Any]
+    rtl_files: Optional[Dict[str, str]] = (
+        None  # files the agent already wrote to disk in the sandbox (agentic flow). When present, these are graded directly instead of re-parsing RTL out of the model's chat text.
+    )
 
 
 class CVDPVerifyResponse(BaseVerifyResponse):
@@ -142,230 +154,6 @@ def _parse_model_response(res: str, target_files: List[str]) -> Optional[Dict[st
     return result if result else None
 
 
-# ----------------------------
-# Apptainer harness helpers
-# ----------------------------
-
-
-def _apply_substitutions(content: str, config: CVDPResourcesServerConfig) -> str:
-    """
-    Replace image placeholders in harness file content — mirrors repository.apply_template_substitution() but with Apptainer syntax.
-    """
-    substitutions = {
-        "__VERIF_EDA_IMAGE__": config.eda_sim_image,
-        "__OSS_SIM_IMAGE__": config.oss_sim_image,
-        "__OSS_PNR_IMAGE__": config.oss_pnr_image,
-    }
-    for placeholder, value in substitutions.items():
-        if value and placeholder in content:
-            content = content.replace(placeholder, value)
-    return content
-
-
-def _resolve_image_for_service(
-    compose_data: dict,
-    service_name: str,
-    harness_files: Dict[str, Optional[str]],
-    config: CVDPResourcesServerConfig,
-) -> Tuple[str, List[str]]:
-    """
-    Resolve the container image for a service that uses ``build:`` instead of
-    ``image:`` in its docker-compose definition.
-
-    Docker Compose handles ``build:`` natively by reading a Dockerfile and
-    building an image on the fly.  Apptainer cannot do this directly, so we
-    parse the Dockerfile to extract the base image (FROM) and any RUN / ADD
-    commands, then replay them via ``apptainer build`` with a def file.
-
-    Returns (base_image, post_commands) where *post_commands* are shell
-    commands for the ``%post`` section of an Apptainer definition file.
-    If the service already has ``image:``, returns (image, []).
-    """
-    svc = (compose_data.get("services") or {}).get(service_name, {})
-    image = svc.get("image", "")
-    if image:
-        return image, []
-
-    # Determine Dockerfile path from build: config
-    build_cfg = svc.get("build", {})
-    if isinstance(build_cfg, str):
-        dockerfile_path = os.path.join(build_cfg, "Dockerfile")
-    elif isinstance(build_cfg, dict):
-        dockerfile_path = build_cfg.get("dockerfile", "Dockerfile")
-    else:
-        return "", []
-
-    # Look for the Dockerfile in harness_files (try multiple path variants)
-    dockerfile_content = None
-    candidates = [
-        dockerfile_path,
-        f"src/{dockerfile_path}",
-        dockerfile_path.replace("src/", ""),
-    ]
-    for candidate in candidates:
-        for hf_path, hf_content in harness_files.items():
-            if hf_content and (hf_path == candidate or hf_path.endswith(os.path.basename(candidate))):
-                dockerfile_content = _apply_substitutions(hf_content, config)
-                break
-        if dockerfile_content:
-            break
-
-    if not dockerfile_content:
-        return "", []
-
-    # Parse Dockerfile: extract FROM base image and RUN/ADD commands
-    base_image = ""
-    post_commands: List[str] = []
-    for line in dockerfile_content.splitlines():
-        line = line.strip()
-        if not line or line.startswith("#"):
-            continue
-        if line.upper().startswith("FROM "):
-            parts = line.split()
-            base_image = parts[1] if len(parts) > 1 else ""
-            if " AS " in base_image.upper():
-                base_image = base_image.split()[0]
-        elif line.upper().startswith("RUN "):
-            post_commands.append(line[4:].strip())
-        elif line.upper().startswith("ADD ") and "http" in line.lower():
-            # Convert ADD <url> <dest> to wget/curl
-            parts = line.split()
-            if len(parts) >= 3:
-                url, dest = parts[1], parts[2]
-                post_commands.append(f"wget -q -O {dest} {url} || curl -sL -o {dest} {url}")
-
-    return base_image, post_commands
-
-
-def _parse_compose_service(compose_content: str, service_name: str) -> Dict[str, Any]:
-    """
-    Extract image, command, entrypoint, volumes, working_dir, and environment
-    from a docker-compose service definition.  The compose YAML is only used as
-    metadata — Apptainer handles the actual execution.
-    """
-    data = yaml.safe_load(compose_content) or {}
-    service = (data.get("services") or {}).get(service_name, {})
-    return {
-        "image": service.get("image", ""),
-        "command": service.get("command", ""),
-        "entrypoint": service.get("entrypoint"),
-        "volumes": service.get("volumes", []),
-        "working_dir": service.get("working_dir", "/code/rundir"),
-        "environment": service.get("environment", {}),
-    }
-
-
-def _build_bind_args(workdir: str, compose_volumes: List[str]) -> List[str]:
-    """
-    Build --bind arguments for Apptainer from:
-    1. The standard /code/* workspace mounts
-    2. Non-/code volumes from the docker-compose service definition
-    """
-    bind_args: List[str] = []
-
-    # Standard /code/* mounts
-    for vol in ["docs", "rundir", "rtl", "verif", "src"]:
-        bind_args += ["--bind", f"{workdir}/{vol}:/code/{vol}"]
-
-    # Compose-defined volumes (skip /code mounts — handled above)
-    for vol_str in compose_volumes:
-        parts = vol_str.split(":")
-        host_path = parts[0]
-        container_path = parts[1] if len(parts) > 1 else host_path
-        opts = parts[2] if len(parts) > 2 else ""
-
-        if "/code" in container_path:
-            continue
-
-        # Resolve relative paths against workdir
-        if host_path.startswith("./") or host_path.startswith("../") or not os.path.isabs(host_path):
-            host_path = os.path.normpath(os.path.join(workdir, host_path))
-
-        bind_spec = f"{host_path}:{container_path}"
-        if opts:
-            bind_spec += f":{opts}"
-        bind_args += ["--bind", bind_spec]
-
-    return bind_args
-
-
-def _load_dot_env(workdir: str) -> Dict[str, str]:
-    """
-    Parse the src/.env file (KEY=value lines) from the workspace.
-    Docker Compose auto-loads env_file directives; Apptainer does not,
-    so we read them ourselves and pass them via --env.
-    """
-    env_path = os.path.join(workdir, "src", ".env")
-    env_vars: Dict[str, str] = {}
-    if not os.path.isfile(env_path):
-        return env_vars
-    with open(env_path, encoding="utf-8") as f:
-        for line in f:
-            line = line.strip()
-            if not line or line.startswith("#"):
-                continue
-            if "=" in line:
-                key, _, val = line.partition("=")
-                env_vars[key.strip()] = val.strip()
-    return env_vars
-
-
-def _build_env_args(environment: Any, dot_env: Optional[Dict[str, str]] = None) -> List[str]:
-    """Build --env arguments for Apptainer from a compose environment field
-    and any variables loaded from the workspace src/.env file."""
-    env_args: List[str] = []
-    # Load dot_env first so compose environment can override
-    if dot_env:
-        for key, val in dot_env.items():
-            env_args += ["--env", f"{key}={val}"]
-    if isinstance(environment, dict):
-        for key, val in environment.items():
-            env_args += ["--env", f"{key}={val}"]
-    elif isinstance(environment, list):
-        for item in environment:
-            env_args += ["--env", str(item)]
-    return env_args
-
-
-def _build_runtime_tmp_env_args(container_tmp_path: str) -> List[str]:
-    """
-    Force simulator temp and lock files into writable per-rollout container storage.
-    """
-    runtime_env = {
-        "TMPDIR": container_tmp_path,
-        "TMP": container_tmp_path,
-        "TEMP": container_tmp_path,
-        "TEMPDIR": container_tmp_path,
-        "XCELIUM_TMPDIR": container_tmp_path,
-        "CDS_LOCK": f"{container_tmp_path}/.cdslock",
-        # imc/Java can still hit /tmp unless java.io.tmpdir is forced.
-        "JAVA_TOOL_OPTIONS": f"-Djava.io.tmpdir={container_tmp_path}",
-    }
-    env_args: List[str] = []
-    for key, value in runtime_env.items():
-        env_args += ["--env", f"{key}={value}"]
-    return env_args
-
-
-def _build_command(entrypoint: Any, command: Any) -> List[str]:
-    """Build the command list from compose entrypoint + command fields."""
-    cmd_parts: List[str] = []
-
-    if entrypoint:
-        if isinstance(entrypoint, str):
-            cmd_parts = shlex.split(entrypoint)
-        else:
-            cmd_parts = list(entrypoint)
-
-    if command:
-        if isinstance(command, str):
-            cmd_parts += shlex.split(command)
-        else:
-            cmd_parts += list(command)
-
-    return cmd_parts
-
-
 # ----------------------------
 # Server
 # ----------------------------
@@ -376,13 +164,9 @@ class CVDPResourcesServer(SimpleResourcesServer):
 
     def model_post_init(self, context: Any) -> None:
         self._semaphore = asyncio.Semaphore(value=self.config.num_processes)
-        self._sif_locks: Dict[str, asyncio.Lock] = {}
-        self._sif_lock_guard = asyncio.Lock()
-        cache = self.config.sif_cache_dir
-        if not cache:
-            cache = os.path.join(Path.home(), ".cache", "nemo-gym", "sif")
-        self._sif_cache_dir = cache
-        os.makedirs(self._sif_cache_dir, exist_ok=True)
+        # Sandbox execution (SIF cache, provider, compose translation) lives in
+        # the harness runner; this server only owns the HTTP contract + scoring.
+        self._harness = HarnessRunner(self.config)
 
         # Warn if commercial EDA image is not configured.
         # Categories 12, 13, 14 require a commercial EDA image (e.g. Cadence Xcelium).
@@ -400,8 +184,10 @@ def model_post_init(self, context: Any) -> None:
     async def verify(self, body: CVDPVerifyRequest) -> CVDPVerifyResponse:
         meta = CVDPVerifierMetadata.model_validate(body.verifier_metadata)
 
-        # categories is [category_id, difficulty], e.g. ["cid003", "medium"]
-        category, difficulty = meta.categories[0], meta.categories[1]
+        category, difficulty = (
+            meta.categories[0],
+            meta.categories[1],
+        )  # categories is [category_id, difficulty], e.g. ["cid003", "medium"]
         category_num = int(category[3:])  # "cid003" -> 3
 
         model_out = body.response.output_text
@@ -504,7 +290,13 @@ async def _verify_objective(
         """
         Objective scoring for code-generation categories via docker-compose harness.
         """
-        rtl_files = _parse_model_response(model_out, meta.target_files)
+        # Agentic flow: the agent ran in its own sandbox and reports the files it
+        # wrote on disk. Grade those directly. Model-only flow: fall back to
+        # parsing RTL out of the model's text response.
+        if body.rtl_files:
+            rtl_files = dict(body.rtl_files)
+        else:
+            rtl_files = _parse_model_response(model_out, meta.target_files)
 
         # If model produced output but parsing failed, signal parse_failed so the
         # agent can retry with a fresh model completion — mirrors CVDP's
@@ -526,7 +318,7 @@ async def _verify_objective(
 
         async with self._semaphore:
             t0 = time.time()
-            exit_code, stderr, service_results = await self._run_harness(
+            exit_code, stderr, service_results = await self._harness.run(
                 rtl_files=rtl_files or {},
                 harness_files=meta.harness_files,
                 task_id=meta.task_id,
@@ -547,305 +339,6 @@ async def _verify_objective(
             execution_time=execution_time,
         )
 
-    async def _run_harness(
-        self,
-        rtl_files: Dict[str, str],
-        harness_files: Dict[str, Optional[str]],
-        task_id: str,
-        context_files: Optional[Dict[str, str]] = None,
-    ) -> Tuple[int, str, List[Dict]]:
-        """
-        Write harness + RTL to a temp workspace and run verification via Apptainer.
-
-        Mirrors repository.py prepare() + obj_harness():
-          Workspace layout:
-            workdir/
-              docker-compose.yml   (parsed for service metadata, not executed directly)
-              src/                 (test scripts and .env from harness_files)
-              rtl/                 (model-generated RTL, bound as /code/rtl)
-              verif/               (empty, bound as /code/verif)
-              docs/                (empty, bound as /code/docs)
-              rundir/              (execution output, bound as /code/rundir)
-        """
-        context_files = context_files or {}
-        tmp_root = self.config.harness_workspace_dir.strip()
-        if tmp_root:
-            os.makedirs(tmp_root, exist_ok=True)
-        with tempfile.TemporaryDirectory(prefix=f"cvdp_{task_id}_", dir=tmp_root or None) as workdir:
-            workdir_path = Path(workdir)
-
-            # Create all mount dirs — mirrors repository.create_folders()
-            for d in ["rtl", "verif", "docs", "src", "rundir"]:
-                (workdir_path / d).mkdir()
-            # Optional per-rollout temp storage; cleaned when TemporaryDirectory exits.
-            if self.config.container_tmp_bind_path:
-                (workdir_path / "rundir" / "tmp").mkdir(parents=True, exist_ok=True)
-
-            # Write harness files — mirrors repository.restore_files()
-            compose_content: Optional[str] = None
-            for filepath, content in harness_files.items():
-                if content is None:
-                    continue
-                content = _apply_substitutions(content, self.config)
-                if filepath.endswith("docker-compose.yml"):
-                    compose_content = content
-                dest = workdir_path / filepath
-                dest.parent.mkdir(parents=True, exist_ok=True)
-                try:
-                    with open(str(dest), "w+", encoding="utf-8") as f:
-                        f.write(content)
-                except Exception:
-                    print(f"Failed to write file: {filepath}")
-
-            if compose_content is None:
-                return 1, "No docker-compose.yml found in harness_files", []
-
-            # Write companion files from input.context — mirrors
-            # repository.restore_files(self.context). Preserves the full
-            # target path (e.g. verif/tb_foo.sv -> workdir/verif/tb_foo.sv).
-            for filepath, code in context_files.items():
-                dest = workdir_path / filepath
-                dest.parent.mkdir(parents=True, exist_ok=True)
-                try:
-                    with open(str(dest), "w+", encoding="utf-8") as f:
-                        f.write(code)
-                except Exception:
-                    print(f"Failed to write context file: {filepath}")
-
-            # Write model-generated files (overwrites context files for target slots).
-            # Preserves the full target path, matching CVDP's restore_files().
-            for filepath, code in rtl_files.items():
-                dest = workdir_path / filepath
-                dest.parent.mkdir(parents=True, exist_ok=True)
-                try:
-                    with open(str(dest), "w+", encoding="utf-8") as f:
-                        f.write(code)
-                except Exception:
-                    print(f"Failed to write file: {filepath}")
-
-            # Run each service — mirrors repository.obj_harness()
-            compose_data = yaml.safe_load(compose_content)
-            services = list((compose_data.get("services") or {}).keys())
-
-            service_results: List[Dict] = []
-            for service in services:
-                exit_code, output = await self._run_service(workdir, service, task_id, compose_content, harness_files)
-                service_results.append({"service": service, "exit_code": exit_code, "stderr": output})
-
-            final_exit_code = 0 if all(r["exit_code"] == 0 for r in service_results) else 1
-            combined_stderr = "\n".join(f"[{r['service']}] {r['stderr']}" for r in service_results if r["stderr"])
-            return final_exit_code, combined_stderr, service_results
-
-    async def _run_service(
-        self,
-        workdir: str,
-        service: str,
-        task_id: str,
-        compose_content: str,
-        harness_files: Optional[Dict[str, Optional[str]]] = None,
-    ) -> Tuple[int, str]:
-        """
-        Run a single service from the compose definition using Apptainer. — mirrors repository.log_docker().
-
-        Pulls the Docker image as a SIF (cached), then executes with
-        --bind mounts equivalent to the original Docker volume mappings.
-        Apptainer uses host networking by default, so no network setup is needed.
-
-        """
-        path = os.path.abspath(workdir)
-        svc = _parse_compose_service(compose_content, service)
-
-        # Resolve image — handles both image: and build: services.
-        # Docker Compose builds from Dockerfiles automatically; for Apptainer
-        # we parse the Dockerfile and build a SIF with the equivalent commands.
-        image = svc["image"]
-        post_commands: List[str] = []
-        if not image and harness_files:
-            compose_data = yaml.safe_load(compose_content)
-            image, post_commands = _resolve_image_for_service(compose_data, service, harness_files, self.config)
-        if not image:
-            return 1, f"No image defined for service '{service}'"
-
-        try:
-            if post_commands:
-                sif_path = await self._ensure_built_sif(image, post_commands)
-            else:
-                sif_path = await self._ensure_sif(image)
-        except RuntimeError as exc:
-            return 1, str(exc)
-
-        bind_args = _build_bind_args(path, svc["volumes"])
-        dot_env = _load_dot_env(path)
-        env_args = _build_env_args(svc["environment"], dot_env)
-        if self.config.container_tmp_bind_path:
-            bind_args += ["--bind", f"{path}/rundir/tmp:{self.config.container_tmp_bind_path}"]
-            env_args += _build_runtime_tmp_env_args(self.config.container_tmp_bind_path)
-        cmd_parts = _build_command(svc["entrypoint"], svc["command"])
-
-        # Fix working_dir paths that don't exist under Apptainer's bind mounts.
-        # Some compose files use /src/rundir/ which exists in Docker (via volume
-        # mount) but not in Apptainer (which only binds to /code/*).
-        working_dir = svc["working_dir"] or "/code/rundir"
-        if "/code/" not in working_dir:
-            working_dir = "/code/rundir"
-
-        if cmd_parts:
-            cmd = [
-                "apptainer",
-                "exec",
-                "--writable-tmpfs",
-                "--home",
-                "/code/rundir",
-                *bind_args,
-                *env_args,
-                "--pwd",
-                working_dir,
-                sif_path,
-                *cmd_parts,
-            ]
-        else:
-            # No explicit command — use the container's default runscript
-            cmd = [
-                "apptainer",
-                "run",
-                "--writable-tmpfs",
-                "--home",
-                "/code/rundir",
-                *bind_args,
-                *env_args,
-                "--pwd",
-                working_dir,
-                sif_path,
-            ]
-
-        proc = await asyncio.create_subprocess_exec(
-            *cmd,
-            cwd=workdir,
-            stdout=asyncio.subprocess.PIPE,
-            stderr=asyncio.subprocess.PIPE,
-            start_new_session=True,  # Create new process group so we can kill all children
-        )
-        exit_code = -1
-        stdout_bytes = b""
-        stderr_bytes = b""
-        try:
-            stdout_bytes, stderr_bytes = await asyncio.wait_for(
-                proc.communicate(),
-                timeout=self.config.container_timeout,
-            )
-            exit_code = proc.returncode
-        except asyncio.TimeoutError:
-            # Kill the entire process group (apptainer + vvp and other children)
-            try:
-                os.killpg(proc.pid, signal.SIGKILL)
-            except ProcessLookupError:
-                pass
-            stdout_bytes, stderr_bytes = await proc.communicate()
-            exit_code = -1
-            stderr_bytes = f"apptainer exec timed out after {self.config.container_timeout}s".encode()
-
-        combined = (stderr_bytes + stdout_bytes).decode("utf-8", errors="replace")
-        return exit_code, combined
-
-    async def _ensure_built_sif(self, base_image: str, post_commands: List[str]) -> str:
-        """
-        Build a SIF that extends a base image with extra commands from a Dockerfile.
-
-        This replicates what ``docker compose build`` does: take a base image,
-        run additional commands (pip install, etc.), and produce a new image.
-        For Apptainer we generate a definition file and run ``apptainer build``.
-        Results are cached by a hash of the commands.
-        """
-        if not post_commands:
-            return await self._ensure_sif(base_image)
-
-        cmd_hash = hashlib.md5("\n".join(post_commands).encode()).hexdigest()[:12]
-        safe_name = base_image.replace("/", "_").replace(":", "_") + f"__built_{cmd_hash}.sif"
-        sif_path = os.path.join(self._sif_cache_dir, safe_name)
-
-        if os.path.exists(sif_path):
-            return sif_path
-
-        # Reuse the per-image locking pattern
-        async with self._sif_lock_guard:
-            if safe_name not in self._sif_locks:
-                self._sif_locks[safe_name] = asyncio.Lock()
-            lock = self._sif_locks[safe_name]
-
-        async with lock:
-            if os.path.exists(sif_path):
-                return sif_path
-
-            base_sif = await self._ensure_sif(base_image)
-
-            post_section = "\n    ".join(post_commands)
-            def_content = f"Bootstrap: localimage\nFrom: {base_sif}\n\n%post\n    {post_section}\n"
-            tmp_def = sif_path + ".def"
-            tmp_sif = sif_path + ".building"
-            with open(tmp_def, "w") as f:
-                f.write(def_content)
-
-            proc = await asyncio.create_subprocess_exec(
-                "apptainer",
-                "build",
-                "--force",
-                tmp_sif,
-                tmp_def,
-                stdout=asyncio.subprocess.PIPE,
-                stderr=asyncio.subprocess.PIPE,
-            )
-            _, stderr = await proc.communicate()
-            os.unlink(tmp_def)
-            if proc.returncode != 0:
-                if os.path.exists(tmp_sif):
-                    os.unlink(tmp_sif)
-                raise RuntimeError(f"apptainer build failed: {stderr.decode(errors='replace')}")
-            os.rename(tmp_sif, sif_path)
-            return sif_path
-
-    async def _ensure_sif(self, image: str) -> str:
-        """
-        Return the path to a cached SIF file for the given Docker image,
-        pulling it from the registry if not already cached.
-        Mirrors the cleanup() trap in repository.log_docker()'s generated shell script.
-        """
-        safe_name = image.replace("/", "_").replace(":", "_") + ".sif"
-        sif_path = os.path.join(self._sif_cache_dir, safe_name)
-
-        if os.path.exists(sif_path):
-            return sif_path
-
-        # Per-image lock to avoid concurrent pulls of the same image
-        async with self._sif_lock_guard:
-            if image not in self._sif_locks:
-                self._sif_locks[image] = asyncio.Lock()
-            lock = self._sif_locks[image]
-
-        async with lock:
-            # Double-check after acquiring lock
-            if os.path.exists(sif_path):
-                return sif_path
-
-            tmp_path = sif_path + ".pulling"
-            proc = await asyncio.create_subprocess_exec(
-                "apptainer",
-                "pull",
-                "--force",
-                tmp_path,
-                f"docker://{image}",
-                stdout=asyncio.subprocess.PIPE,
-                stderr=asyncio.subprocess.PIPE,
-            )
-            stdout, stderr = await proc.communicate()
-            if proc.returncode != 0:
-                if os.path.exists(tmp_path):
-                    os.unlink(tmp_path)
-                raise RuntimeError(
-                    f"apptainer pull failed for {image} (exit {proc.returncode}): {stderr.decode(errors='replace')}"
-                )
-            os.rename(tmp_path, sif_path)
-            return sif_path
-
 
 if __name__ == "__main__":
     CVDPResourcesServer.run_webserver()
diff --git a/resources_servers/cvdp/configs/cvdp.yaml b/resources_servers/cvdp/configs/cvdp.yaml
index 1a18212eea..8e8cf26834 100644
--- a/resources_servers/cvdp/configs/cvdp.yaml
+++ b/resources_servers/cvdp/configs/cvdp.yaml
@@ -4,34 +4,10 @@ cvdp:
       entrypoint: app.py
       domain: coding
       verified: false
-      oss_sim_image: ghcr.io/hdl/sim/osvb
+      oss_sim_image: ${cvdp_sim_image}
       eda_sim_image: ""
       container_timeout: 600
       num_processes: 16
       primary_service: direct
       description: CVDP benchmark dataset for code generation
-      value: Evaluate RTL code generation capabilities
-
-cvdp_agent:
-  responses_api_agents:
-    cvdp_agent:
-      entrypoint: app.py
-      resources_server:
-        type: resources_servers
-        name: cvdp
-      model_server:
-        type: responses_api_models
-        name: policy_model
-      datasets:
-      - name: example
-        type: example
-        jsonl_fpath: resources_servers/cvdp/data/example.jsonl
-      - name: validation
-        type: validation
-        jsonl_fpath: resources_servers/cvdp/data/gym_cvdp_1.0.4_nonagentic_code_generation_no_commercial.jsonl
-        gitlab_identifier:
-          dataset_name: cvdp_nonagentic_code_gen_no_commercial
-          version: 0.0.2
-          artifact_fpath: gym_cvdp_1.0.4_nonagentic_code_generation_no_commercial.jsonl
-        license: Apache 2.0
-        num_repeats: 5
\ No newline at end of file
+      value: Evaluate RTL code generation capabilities
\ No newline at end of file
diff --git a/resources_servers/cvdp/data/example_agentic.jsonl b/resources_servers/cvdp/data/example_agentic.jsonl
new file mode 100644
index 0000000000..d9a2a76466
--- /dev/null
+++ b/resources_servers/cvdp/data/example_agentic.jsonl
@@ -0,0 +1,5 @@
+{"responses_create_params": {"input": [{"role": "system", "content": "You are a language model that has the following file operations available at your disposal:\n  - **List files in a directory** by running one of the following commands: \n    - `ls`\n    - `tree`\n  - **Read files** by using:\n    - `cat <filename>`\n  - **Write files** by using:\n    - `echo <content> > <filename>`\n  - **Compile Verilog** by using `iverilog` such as:\n    - `iverilog -o <output_filename>.out -g2012 <verilog_code_file> <verilog_testbench_file>`\n  - **Run Simulation** by using:\n    - `vvp <output_filename>.out`\n  - **Find current working directory** by using:\n    - `pwd`\n\n  Your task is to create a Verilog module based on the provided specifications and integrate it into an existing system using proper module instantiation and connections. At the end, please prepare a Linux patch file for me to finalize the request. \n\n  You will solve the problem step by step using the following approach of \n  - thought (thinking process of the step you're going to take\n  - action (the command you will be running to get more details/context that's helpful to solve the problem)\n  - observation (the output from the action you will observe based on which you will take your next step)\n\n  The last step will be the final output summary and the patch itself in the following format \n  - thought (the summary of what you did and some introduction of the patch file itself)\n  - patch (a Linux-based patch that needs to be applied to reach the relevant solution)\n\n  The patch file should only be applied to a single file to reach the required solution."}, {"role": "user", "content": "\nProvide me one answer for this request: I need to implement a **64b/66b top-level codec module** that integrates a **data encoder (`encoder_data_64b66b`), a control encoder (`encoder_control_64b66b`), and a combined data/control decoder (`decoder_data_control_64b66b`)**. The top-level module should be created at **`/code/rtl/top_64b66b_codec.sv`** and must manage the full encode-decode flow for 64b/66b encoding, supporting data, and control paths.\n\nThe encoder must select between data and control encoding based on the `enc_control_in` value and produce a 66-bit encoded output (`enc_data_out`). The decoder must process incoming 66-bit data and output 64-bit decoded data, associated control signals, and any sync or decoding errors.\n\nThe RTL source files are located as follows:\n- `/code/rtl/encoder_data_64b66b.sv`\n- `/code/rtl/encoder_control_64b66b.sv`\n- `/code/rtl/decoder_data_control_64b66b.sv`\n\nThe documentation, located under the`/code/docs/specification.md` directory, provides design requirements and behavior specifications. \n\nThis integrated module should operate with **minimal latency and full protocol compliance**, as defined in the provided documentation.\n\n\nPlease provide your response as plain text without any JSON formatting. Your response will be saved directly to: rtl/top_64b66b_codec.sv."}]}, "verifier_metadata": {"task_id": "cvdp_agentic_64b66b_codec_0001", "categories": ["cid005", "medium"], "difficulty": "medium", "target_files": ["rtl/top_64b66b_codec.sv"], "harness_files": {"docker-compose.yml": "services:\n\n  direct:\n    image: __OSS_SIM_IMAGE__\n    volumes:\n      - ./src/:/src/:ro # Infrastructure location\n    working_dir : /code/rundir\n    env_file    : ./src/.env\n    command     : pytest -o cache_dir=/code/rundir/.cache /src/test_runner.py -v -s\n", "src/.env": "SIM             = icarus\nTOPLEVEL_LANG   = verilog\nVERILOG_SOURCES = /code/rtl/top_64b66b_codec.sv /code/rtl/encoder_data_64b66b.sv /code/rtl/encoder_control_64b66b.sv /code/rtl/decoder_data_control_64b66b.sv\nTOPLEVEL        = top_64b66b_codec\nMODULE          = test_top_64b66b_codec\nPYTHONPATH      = /src\nHASH            = 5ae28b08977dcf54c572c129fd28f61a708ef1ff\n", "src/test_runner.py": "import os\nfrom cocotb_tools.runner import get_runner\nimport pytest\n\n# Fetch environment variables for Verilog source setup\nverilog_sources = os.getenv(\"VERILOG_SOURCES\").split()\ntoplevel_lang   = os.getenv(\"TOPLEVEL_LANG\")\nsim             = os.getenv(\"SIM\", \"icarus\")\ntoplevel        = os.getenv(\"TOPLEVEL\")\nmodule          = os.getenv(\"MODULE\")\nwave            = os.getenv(\"WAVE\")\n\n# Runner to execute tests\ndef test_runner():\n    runner = get_runner(sim)\n\n    runner.build(\n        sources=verilog_sources,\n        hdl_toplevel=toplevel,\n        always=True,\n        clean=True,\n        verbose=True,\n        timescale=(\"1ns\", \"1ns\"),\n        log_file=\"sim.log\"\n    )\n    runner.test(hdl_toplevel=toplevel, test_module=module,waves=True)\n\nif __name__ == \"__main__\":\n    test_runner()\n", "src/test_top_64b66b_codec.py": "import cocotb\nfrom cocotb.triggers import RisingEdge, Timer\nfrom cocotb.clock import Clock\nimport random\n\n# Helper function to initialize DUT inputs\nasync def dut_initialization(dut):\n    \"\"\" Initialize all inputs for DUT \"\"\"\n    dut.rst_in.value = 1\n    dut.dec_data_valid_in.value = 0\n    dut.dec_data_in.value = 0\n    dut.enc_data_in.value = 0\n    dut.enc_control_in.value = 0\n    await RisingEdge(dut.clk_in)  # Wait for one clock cycle\n    await RisingEdge(dut.clk_in)  # Wait for one clock cycle\n    await RisingEdge(dut.clk_in)  # Wait for one clock cycle\n\n# Helper function to check the output with debug logging\nasync def check_output_encoder(dut, expected_sync, expected_data):\n    await RisingEdge(dut.clk_in)\n    actual_output = int(dut.enc_data_out.value)\n    expected_output = (expected_sync << 64) | expected_data\n\n    # Log the actual and expected outputs\n    dut._log.info(f\"Checking output:\\n\"\n                  f\"  Actual enc_data_out: {hex(actual_output)}\\n\"\n                  f\"  Expected enc_data_out: {hex(expected_output)}\\n\")\n\n    assert actual_output == expected_output, \\\n        f\"Test failed: enc_data_out={hex(actual_output)} (expected {hex(expected_output)})\"\n\n\n# Helper function to check the output with debug logging\nasync def check_output_decoder(dut, expected_data, expected_dec_sync_error, expected_control_out=0, expected_dec_error_out=0):\n    \"\"\"Check DUT output against expected values\"\"\"\n    await RisingEdge(dut.clk_in)  # Wait for the output latency of 1 cycle\n    actual_data_out = int(dut.dec_data_out.value)\n    actual_dec_sync_error = int(dut.dec_sync_error.value)\n    actual_control_out = int(dut.dec_control_out.value)\n    actual_dec_error_out = int(dut.dec_error_out.value)\n    dec_data_in = int(dut.dec_data_in.value)\n\n    # Log the actual and expected outputs\n    dut._log.info(f\"Checking output - Input: {hex(dec_data_in)},  Actual dec_data_out: {hex(actual_data_out)}, Expected dec_data_out: {hex(expected_data)}\\n\"\n                  f\"  Actual dec_sync_error: {actual_dec_sync_error}, Expected dec_sync_error: {expected_dec_sync_error}\\n\"\n                  f\"  Actual dec_control_out: {hex(actual_control_out)}, Expected dec_control_out: {hex(expected_control_out)}\\n\"\n                  f\"  Actual dec_error_out: {actual_dec_error_out}, Expected dec_error_out: {expected_dec_error_out}\\n\")\n\n    # Always check dec_sync_error and dec_error_out\n    assert actual_dec_sync_error == expected_dec_sync_error, \\\n        f\"Sync error mismatch: dec_sync_error={actual_dec_sync_error} (expected {expected_dec_sync_error})\"\n    assert actual_dec_error_out == expected_dec_error_out, \\\n        f\"Decoder error mismatch: dec_error_out={actual_dec_error_out} (expected {expected_dec_error_out})\"\n\n    # Check data and control output only if both dec_sync_error and dec_error_out are 0\n    if expected_dec_sync_error == 0 and expected_dec_error_out == 0:\n        assert actual_data_out == expected_data, \\\n            f\"Data mismatch: dec_data_out={hex(actual_data_out)} (expected {hex(expected_data)})\"\n        assert actual_control_out == expected_control_out, \\\n            f\"Control output mismatch: dec_control_out={hex(actual_control_out)} (expected {hex(expected_control_out)})\"\n\n@cocotb.test()\nasync def top_reset_test(dut):\n    \"\"\" Test the reset behavior of the decoder \"\"\"\n    clock = Clock(dut.clk_in, 10, unit=\"ns\")  # 100 MHz\n    cocotb.start_soon(clock.start())\n    \n    # Initialize DUT inputs\n    await dut_initialization(dut)\n\n    await Timer(20, unit=\"ns\")  # hold reset for 20ns\n    dut.rst_in.value = 0\n    await RisingEdge(dut.clk_in)\n    await RisingEdge(dut.clk_in)\n    await RisingEdge(dut.clk_in)\n    dut.rst_in.value = 1\n    await RisingEdge(dut.clk_in)\n    await RisingEdge(dut.clk_in)\n\n    # Log the output after reset\n    dut._log.info(f\"Reset Test:\\n  dec_data_out: {hex(int(dut.dec_data_out.value))}\\n  Expected: 0\")\n\n    # Check that output is zero after reset\n    assert dut.dec_data_out.value == 0, \"Reset test failed: dec_data_out should be zero after reset\"\n    assert dut.dec_sync_error.value == 0, \"Reset test failed: dec_sync_error should be zero after reset\"\n     # Check that output is zero after reset\n    assert int(dut.enc_data_out.value) == 0, \"Reset test failed: enc_data_out should be zero after reset\"\n\n@cocotb.test()\nasync def encoder_fixed_pattern_test(dut):\n    \"\"\" Test encoding when all data octets are pure data \"\"\"\n    clock = Clock(dut.clk_in, 10, unit=\"ns\")  # 100 MHz\n    cocotb.start_soon(clock.start())\n    # Initialize DUT inputs\n    await dut_initialization(dut)\n\n    await Timer(20, unit=\"ns\")  # hold reset for 20ns\n    await RisingEdge(dut.clk_in)\n    dut.rst_in.value = 0\n    await RisingEdge(dut.clk_in)\n    dut.enc_data_in.value = 0xA5A5A5A5A5A5A5A5\n    dut.enc_control_in.value = 0x00  # All data\n\n    await RisingEdge(dut.clk_in)\n    # Log inputs for data encoding test\n    dut._log.info(f\"Data Encoding Test:\\n\"\n                  f\"  enc_data_in: {hex(int(dut.enc_data_in.value))}\\n\"\n                  f\"  enc_control_in: {bin(int(dut.enc_control_in.value))}\")\n\n    # Apply test and check output\n    await check_output_encoder(dut, expected_sync=0b01, expected_data=0xA5A5A5A5A5A5A5A5)\n\n@cocotb.test()\nasync def encoder_control_encoding_test(dut):\n    \"\"\" Test encoding when control characters are in the last four octets \"\"\"\n    clock = Clock(dut.clk_in, 10, unit=\"ns\")  # 100 MHz\n    cocotb.start_soon(clock.start())\n    # Initialize DUT inputs\n    await dut_initialization(dut)\n    \n    await Timer(20, unit=\"ns\")  # hold reset for 20ns\n    await RisingEdge(dut.clk_in)\n    dut.rst_in.value = 0\n    await RisingEdge(dut.clk_in)\n    # Set test inputs\n    dut.enc_data_in.value = 0xFFFFFFFFFFFFFFFF\n    dut.enc_control_in.value = 0x0F  # Control in last four octets\n\n    await RisingEdge(dut.clk_in)\n    # Log inputs for control encoding test\n    dut._log.info(f\"Control Encoding Test:\\n\"\n                  f\"  enc_data_in: {hex(int(dut.enc_data_in.value))}\\n\"\n                  f\"  enc_control_in: {bin(int(dut.enc_control_in.value))}\")\n\n    # Apply test and check output\n    await check_output_encoder(dut, expected_sync=0b10, expected_data=0x0000000000000000)  # Expected data output is zero\n\n@cocotb.test()\nasync def encoder_mixed_data_control_test(dut):\n    \"\"\" Test encoding when control characters are mixed in the data \"\"\"\n    clock = Clock(dut.clk_in, 10, unit=\"ns\")  # 100 MHz\n    cocotb.start_soon(clock.start())\n\n    # Initialize DUT inputs\n    await dut_initialization(dut)\n\n    await Timer(20, unit=\"ns\")  # hold reset for 20ns\n    await RisingEdge(dut.clk_in)\n    dut.rst_in.value = 0\n    await RisingEdge(dut.clk_in)\n\n    # Set test inputs\n    dut.enc_data_in.value = 0x123456789ABCDEF0\n    dut.enc_control_in.value = 0x81  # Control in first and last octets\n\n    await RisingEdge(dut.clk_in)\n    # Log inputs for mixed data and control test\n    dut._log.info(f\"Mixed Data and Control Test:\\n\"\n                  f\"  enc_data_in: {hex(int(dut.enc_data_in.value))}\\n\"\n                  f\"  enc_control_in: {bin(int(dut.enc_control_in.value))}\")\n\n    # Apply test and check output\n    await RisingEdge(dut.clk_in)\n    await check_output_encoder(dut, expected_sync=0b10, expected_data=0x0000000000000000)  # Expected data output is zero\n\n@cocotb.test()\nasync def encoder_all_control_symbols_test(dut):\n    \"\"\" Test encoding when all characters are control \"\"\"\n    clock = Clock(dut.clk_in, 10, unit=\"ns\")  # 100 MHz\n    cocotb.start_soon(clock.start())\n\n    # Initialize DUT inputs\n    await dut_initialization(dut)\n\n    await Timer(20, unit=\"ns\")  # hold reset for 20ns\n    await RisingEdge(dut.clk_in)\n    dut.rst_in.value = 0\n    await RisingEdge(dut.clk_in)\n\n    # Set test inputs\n    dut.enc_data_in.value = 0xA5A5A5A5A5A5A5A5\n    dut.enc_control_in.value = 0xFF  # All control\n\n    await RisingEdge(dut.clk_in)\n    # Log inputs for all control symbols test\n    dut._log.info(f\"All Control Symbols Test:\\n\"\n                  f\"  enc_data_in: {hex(int(dut.enc_data_in.value))}\\n\"\n                  f\"  enc_control_in: {bin(int(dut.enc_control_in.value))}\")\n\n    # Apply test and check output\n    await check_output_encoder(dut, expected_sync=0b10, expected_data=0x0000000000000000)  # Expected data output is zero\n\n@cocotb.test()\nasync def encoder_random_data_control_test(dut):\n    \"\"\" Test encoding with random data and control inputs \"\"\"\n    clock = Clock(dut.clk_in, 10, unit=\"ns\")  # 100 MHz\n    cocotb.start_soon(clock.start())\n\n    # Initialize DUT inputs\n    await dut_initialization(dut)\n    \n    await Timer(20, unit=\"ns\")  # hold reset for 20ns\n    await RisingEdge(dut.clk_in)\n    dut.rst_in.value = 0\n    await RisingEdge(dut.clk_in)\n\n    for i in range(500):  # Run 5 random tests\n        # Generate random data and control inputs\n        random_data = random.getrandbits(64)\n        random_control = 0\n\n        dut.enc_data_in.value = random_data\n        dut.enc_control_in.value = random_control\n\n        # Determine expected sync word and data based on control input\n        expected_sync = 0b01 if random_control == 0 else 0b10\n        expected_data = random_data if random_control == 0 else 0x0000000000000000\n\n        await RisingEdge(dut.clk_in)\n        # Log inputs for each random test\n        dut._log.info(f\"Random Test {i+1}:\\n\"\n                      f\"  enc_data_in: {hex(int(dut.enc_data_in.value))}\\n\"\n                      f\"  enc_control_in: {bin(int(dut.enc_control_in.value))}\")\n\n        await check_output_encoder(dut, expected_sync=expected_sync, expected_data=expected_data)\n\n        await Timer(10, unit=\"ns\")  # Wait for next random test\n\n    dut._log.info(\"Randomized tests completed successfully\")\n\n@cocotb.test()\nasync def encoder_random_data_only_test(dut):\n    \"\"\" Test encoding with random data and control inputs \"\"\"\n    clock = Clock(dut.clk_in, 10, unit=\"ns\")  # 100 MHz\n    cocotb.start_soon(clock.start())\n\n    # Initialize DUT inputs\n    await dut_initialization(dut)\n    \n    await Timer(20, unit=\"ns\")  # hold reset for 20ns\n    await RisingEdge(dut.clk_in)\n    dut.rst_in.value = 0\n    dut.enc_control_in.value = 0  # All data\n    await RisingEdge(dut.clk_in)\n\n    for i in range(50):  # Run 5 random tests\n        # Generate random data\n        random_data = random.getrandbits(64)\n        dut.enc_data_in.value = random_data\n\n        # Determine expected sync word and data\n        expected_sync = 0b01\n        expected_data = random_data\n\n        await RisingEdge(dut.clk_in)\n        # Log inputs for each random test\n        dut._log.info(f\"Random Test {i+1}:\\n\"\n                      f\"  enc_data_in: {hex(int(dut.enc_data_in.value))}\\n\"\n                      f\"  enc_control_in: {bin(int(dut.enc_control_in.value))}\")\n\n        await check_output_encoder(dut, expected_sync=expected_sync, expected_data=expected_data)\n\n        await Timer(10, unit=\"ns\")  # Wait for next random test\n\n    dut._log.info(\"Randomized tests completed successfully\")\n\n@cocotb.test()\nasync def encoder_test_all_control_combinations(dut):\n    \"\"\"Cocotb test for 64b/66b encoder with full test cases and expected outputs\"\"\"\n\n    # Start the clock\n    clock = Clock(dut.clk_in, 10, unit=\"ns\")  # 100 MHz\n    cocotb.start_soon(clock.start())\n    # Initialize DUT inputs\n    await dut_initialization(dut)\n\n    await RisingEdge(dut.clk_in)\n    await RisingEdge(dut.clk_in)\n    dut.rst_in.value = 0\n    await RisingEdge(dut.clk_in)\n\n    # Test cases with expected values\n    test_cases = [\n        (0x0707070707070707, 0b11111111, 0x21e00000000000000),\n        (0x070707070707FDAE, 0b11111110, 0x299000000000000ae),\n        (0x0707070707FDA5A5, 0b11111100, 0x2aa0000000000a5a5),\n        (0x07070707FDFEED55, 0b11111000, 0x2b400000000feed55),\n        (0x070707FD99887766, 0b11110000, 0x2cc00000099887766),\n        (0x0707FDAABBCCDDEE, 0b11100000, 0x2d20000aabbccddee),\n        (0x07FDAAAAAA555555, 0b11000000, 0x2e100aaaaaa555555),\n        (0xFD773388229911AA, 0b10000000, 0x2ff773388229911aa),\n        (0xDDCCBBFB07070707, 0b00011111, 0x233ddccbb00000000),\n        (0x0707079C0707079C, 0b00010001, 0x255070707ff070707),\n        (0x3456789ABCDEF0FB, 0b00000001, 0x2783456789abcdef0),\n        (0x777777FBDEEDDE9C, 0b00010001, 0x2667777770fdeedde),\n        (0x07070707ABCDEF9C, 0b11110001, 0x24b0000000abcdeff),\n        (0xAAAAAA9C07070707, 0b00011111, 0x22daaaaaaf0000000),\n        (0xFEFEFEFEFEFEFEFE, 0b11111111, 0x21e3c78f1e3c78f1e),\n        (0x07070707070707FD, 0b11111111, 0x28700000000000000),\n    ]\n\n    # Apply test cases and compare DUT output with expected values\n    for idx, (data_in, control_in, expected_output) in enumerate(test_cases):\n        # Apply inputs\n        await RisingEdge(dut.clk_in)\n        dut.enc_data_in.value = data_in\n        dut.enc_control_in.value = control_in\n\n        # Wait for a clock cycle\n        await RisingEdge(dut.clk_in)\n        await RisingEdge(dut.clk_in)\n\n        # Get DUT output\n        dut_output = int(dut.enc_data_out.value)\n\n        # Compare DUT output with expected output\n        assert dut_output == expected_output, (\n            f\"Test case {idx+1} failed: \"\n            f\"Data: {hex(data_in)}, Control: {bin(control_in)}, \"\n            f\"Expected: {hex(expected_output)}, Got: {hex(dut_output)}\"\n        )\n\n        dut._log.info(\n            f\"Test Case {idx + 1}:\\n\"\n            f\"  enc_data_in: {hex(data_in)}\\n\"\n            f\"  enc_control_in: {bin(control_in)}\\n\"\n            f\"  enc_data_out (DUT): {hex(dut_output)}\\n\"\n            f\"  Expected: {hex(expected_output)}\"\n        )\n\n@cocotb.test()\nasync def encoder_test_all_octets_control(dut):\n    \"\"\"Cocotb test for 64b/66b encoder with full test cases and expected outputs\"\"\"\n\n    # Start the clock\n    clock = Clock(dut.clk_in, 10, unit=\"ns\")  # 100 MHz\n    cocotb.start_soon(clock.start())\n    # Initialize DUT inputs\n    await dut_initialization(dut)\n\n    await RisingEdge(dut.clk_in)\n    await RisingEdge(dut.clk_in)\n    dut.rst_in.value = 0\n    await RisingEdge(dut.clk_in)\n\n    # Test cases with expected values\n    test_cases = [\n        (0x0707070707070707, 0b11111111, 0x21e00000000000000),\n        (0xFEFEFEFEFEFEFEFE, 0b11111111, 0x21e3c78f1e3c78f1e),\n        (0x07070707070707FD, 0b11111111, 0x28700000000000000),\n    ]\n\n    # Apply test cases and compare DUT output with expected values\n    for idx, (data_in, control_in, expected_output) in enumerate(test_cases):\n        # Apply inputs\n        await RisingEdge(dut.clk_in)\n        dut.enc_data_in.value = data_in\n        dut.enc_control_in.value = control_in\n\n        # Wait for a clock cycle\n        await RisingEdge(dut.clk_in)\n        await RisingEdge(dut.clk_in)\n\n        # Get DUT output\n        dut_output = int(dut.enc_data_out.value)\n\n        # Compare DUT output with expected output\n        assert dut_output == expected_output, (\n            f\"Test case {idx+1} failed: \"\n            f\"Data: {hex(data_in)}, Control: {bin(control_in)}, \"\n            f\"Expected: {hex(expected_output)}, Got: {hex(dut_output)}\"\n        )\n\n        dut._log.info(\n            f\"Test Case {idx + 1}:\\n\"\n            f\"  enc_data_in: {hex(data_in)}\\n\"\n            f\"  enc_control_in: {bin(control_in)}\\n\"\n            f\"  enc_data_out (DUT): {hex(dut_output)}\\n\"\n            f\"  Expected: {hex(expected_output)}\"\n        )\n\n@cocotb.test()\nasync def encoder_test_mixed_data_control_octets(dut):\n    \"\"\"Cocotb test for 64b/66b encoder with full test cases and expected outputs\"\"\"\n\n    # Start the clock\n    clock = Clock(dut.clk_in, 10, unit=\"ns\")  # 100 MHz\n    cocotb.start_soon(clock.start())\n    # Initialize DUT inputs\n    await dut_initialization(dut)\n\n    await RisingEdge(dut.clk_in)\n    await RisingEdge(dut.clk_in)\n    dut.rst_in.value = 0\n    await RisingEdge(dut.clk_in)\n\n    # Test cases with expected values\n    test_cases = [\n        (0x070707070707FDAE, 0b11111110, 0x299000000000000ae),\n        (0x0707070707FDA5A5, 0b11111100, 0x2aa0000000000a5a5),\n        (0x07070707FDFEED55, 0b11111000, 0x2b400000000feed55),\n        (0x070707FD99887766, 0b11110000, 0x2cc00000099887766),\n        (0x0707FDAABBCCDDEE, 0b11100000, 0x2d20000aabbccddee),\n        (0x07FDAAAAAA555555, 0b11000000, 0x2e100aaaaaa555555),\n        (0xFD773388229911AA, 0b10000000, 0x2ff773388229911aa),\n        (0xDDCCBBFB07070707, 0b00011111, 0x233ddccbb00000000),\n        (0x0707079C0707079C, 0b00010001, 0x255070707ff070707),\n        (0x3456789ABCDEF0FB, 0b00000001, 0x2783456789abcdef0),\n        (0x777777FBDEEDDE9C, 0b00010001, 0x2667777770fdeedde),\n        (0x07070707ABCDEF9C, 0b11110001, 0x24b0000000abcdeff),\n        (0xAAAAAA9C07070707, 0b00011111, 0x22daaaaaaf0000000),\n    ]\n\n\n    # Apply test cases and compare DUT output with expected values\n    for idx, (data_in, control_in, expected_output) in enumerate(test_cases):\n        # Apply inputs\n        await RisingEdge(dut.clk_in)\n        dut.enc_data_in.value = data_in\n        dut.enc_control_in.value = control_in\n\n        # Wait for a clock cycle\n        await RisingEdge(dut.clk_in)\n        await RisingEdge(dut.clk_in)\n\n        # Get DUT output\n        dut_output = int(dut.enc_data_out.value)\n\n        # Compare DUT output with expected output\n        assert dut_output == expected_output, (\n            f\"Test case {idx+1} failed: \"\n            f\"Data: {hex(data_in)}, Control: {bin(control_in)}, \"\n            f\"Expected: {hex(expected_output)}, Got: {hex(dut_output)}\"\n        )\n\n        dut._log.info(\n            f\"Test Case {idx + 1}:\\n\"\n            f\"  enc_data_in: {hex(data_in)}\\n\"\n            f\"  enc_control_in: {bin(control_in)}\\n\"\n            f\"  enc_data_out (DUT): {hex(dut_output)}\\n\"\n            f\"  Expected: {hex(expected_output)}\"\n        )\n    dut._log.info(\"All test cases passed!\")\n\n\n@cocotb.test()\nasync def decoder_random_any_sync_header_data_test(dut):\n    \"\"\" Test decoding with random sync headers and data \"\"\"\n    clock = Clock(dut.clk_in, 10, unit=\"ns\")  # 100 MHz\n    cocotb.start_soon(clock.start())\n\n    # Initialize DUT inputs\n    await dut_initialization(dut)\n    await RisingEdge(dut.clk_in)\n\n    dut.rst_in.value = 0\n    await RisingEdge(dut.clk_in)\n\n    for i in range(5):  # Run 5 random tests\n        random_sync_header = random.choice([0b01, 0b00, 0b11])\n        random_data = random.getrandbits(64)\n\n        dut.dec_data_in.value = (random_sync_header << 64) | random_data\n        dut.dec_data_valid_in.value = 1\n\n        expected_data = random_data if random_sync_header == 0b01 else 0x0000000000000000\n        expected_dec_sync_error = 0 if random_sync_header == 0b01 else 1\n\n        # Apply test and check output\n        await Timer(5, unit=\"ns\")  # Wait before next random test\n        await RisingEdge(dut.clk_in)\n        dut.dec_data_valid_in.value = 0\n        dut._log.info(f\"Random Test {i+1}:\\n\"\n                      f\"  dec_data_in: {hex(int(dut.dec_data_in.value))}\")\n        await check_output_decoder(dut, expected_data=expected_data, expected_dec_sync_error=expected_dec_sync_error)\n\n@cocotb.test()\nasync def decoder_random_valid_data_test(dut):\n    \"\"\" Test decoding with random sync headers and data \"\"\"\n    clock = Clock(dut.clk_in, 10, unit=\"ns\")  # 100 MHz\n    cocotb.start_soon(clock.start())\n\n    # Initialize DUT inputs\n    await dut_initialization(dut)\n    await RisingEdge(dut.clk_in)\n\n    dut.rst_in.value = 0\n    await RisingEdge(dut.clk_in)\n\n    for i in range(5):  # Run 5 random tests\n        random_sync_header = random.choice([0b01])\n        random_data = random.getrandbits(64)\n\n        dut.dec_data_in.value = (random_sync_header << 64) | random_data\n        dut.dec_data_valid_in.value = 1\n\n        expected_data = random_data if random_sync_header == 0b01 else 0x0000000000000000\n        expected_dec_sync_error = 0 if random_sync_header == 0b01 else 1\n\n        # Apply test and check output\n        await Timer(5, unit=\"ns\")  # Wait before next random test\n        await RisingEdge(dut.clk_in)\n        dut.dec_data_valid_in.value = 0\n        dut._log.info(f\"Random Test {i+1}:\\n\"\n                      f\"  dec_data_in: {hex(int(dut.dec_data_in.value))}\")\n        await check_output_decoder(dut, expected_data=expected_data, expected_dec_sync_error=expected_dec_sync_error)\n\n@cocotb.test()\nasync def decoder_control_only_test(dut):\n    \"\"\" Test decoding for control-only mode \"\"\"\n    clock = Clock(dut.clk_in, 10, unit=\"ns\")  # 100 MHz\n    cocotb.start_soon(clock.start())\n\n    # Initialize DUT inputs\n    await dut_initialization(dut)\n    await RisingEdge(dut.clk_in)\n\n    dut.rst_in.value = 0\n    await RisingEdge(dut.clk_in)\n\n    # Control-only mode test cases\n    test_cases = [\n        (0b10, 0x1E, 0x3C78F1E3C78F1E, 0xFEFEFEFEFEFEFEFE, 0, 0),  # All control characters\n        (0b10, 0x1E, 0x00000000000000, 0x0707070707070707, 0, 0),  # All control characters\n        (0b00, 0x1E, 0x00000000000000, 0x0707070707070707, 1, 0),  # All control characters\n        (0b10, 0x11, 0x3C78F1E3C78F1E, 0xFEFEFEFEFEFEFEFE, 0, 1),  # All control characters\n    ]\n\n    for sync_header, type_field, data_in, expected_data, expected_dec_sync_error, expected_dec_error_out in test_cases:\n        dut.dec_data_in.value = (sync_header << 64) | (type_field << 56) | data_in\n        dut.dec_data_valid_in.value = 1\n\n        await Timer(5, unit=\"ns\")\n        await RisingEdge(dut.clk_in)\n        dut.dec_data_valid_in.value = 0\n        dut._log.info(f\"Control-Only Test:\\n\"\n                      f\"  dec_data_in: {hex(int(dut.dec_data_in.value))}\")\n        await check_output_decoder(dut, expected_data=expected_data, expected_dec_sync_error=expected_dec_sync_error,\n                           expected_control_out=0xFF, expected_dec_error_out=expected_dec_error_out)\n\n@cocotb.test()\nasync def decoder_mixed_mode_test(dut):\n    \"\"\" Test decoding for mixed mode \"\"\"\n    clock = Clock(dut.clk_in, 10, unit=\"ns\")  # 100 MHz\n    cocotb.start_soon(clock.start())\n\n    # Initialize DUT inputs\n    await dut_initialization(dut)\n    await RisingEdge(dut.clk_in)\n\n    dut.rst_in.value = 0\n    await RisingEdge(dut.clk_in)\n\n    # Mixed mode test cases\n    test_cases = [\n        # Format: (sync_header, type_field, data_in, expected_data, expected_control_out, expected_dec_sync_error, expected_dec_error_out)\n        (0b10, 0x33, 0xDDCCBB00000000, 0xDDCCBBFB07070707, 0x1F, 0, 0),  # Mixed mode example\n        (0b10, 0x78, 0x3456789ABCDEF0, 0x3456789ABCDEF0FB, 0x01, 0, 0),  # Mixed mode example\n        (0b10, 0x87, 0x00000000000000, 0x07070707070707FD, 0xFE, 0, 0),  # Mixed mode example\n        (0b10, 0x99, 0x000000000000AE, 0x070707070707FDAE, 0xFE, 0, 0),  # Mixed mode example\n        (0b10, 0xAA, 0x0000000000A5A5, 0x0707070707FDA5A5, 0xFC, 0, 0),  # Mixed mode example\n        (0b10, 0xB4, 0x00000000FEED55, 0x07070707FDFEED55, 0xF8, 0, 0),  # Mixed mode example\n        (0b10, 0xCC, 0x00000099887766, 0x070707FD99887766, 0xF0, 0, 0),  # Mixed mode example\n        (0b10, 0xD2, 0x00001234567890, 0x0707FD1234567890, 0xE0, 0, 0),  # Mixed mode example\n        (0b10, 0xE1, 0x00FFEEDDCCBBAA, 0x07FDFFEEDDCCBBAA, 0xC0, 0, 0),  # Mixed mode example\n        (0b10, 0xFF, 0x773388229911AA, 0xFD773388229911AA, 0x80, 0, 0),  # Mixed mode example\n        (0b10, 0x55, 0x070707FF070707, 0x0707079C0707079C, 0x11, 0, 0),  # Mixed mode example\n        (0b10, 0x66, 0x7777770FDEEDDE, 0x777777FBDEEDDE9C, 0x11, 0, 0),  # Mixed mode example\n        (0b10, 0x4B, 0x0000000ABCDEFF, 0x0707070755E6F79C, 0xF1, 0, 0),  # Mixed mode example\n        (0b10, 0x2D, 0xAAAAAAF0000000, 0xAAAAAA9C07070707, 0x1F, 0, 0),  # Mixed mode example\n    ]\n\n    for sync_header, type_field, data_in, expected_data, expected_control_out, expected_dec_sync_error, expected_dec_error_out in test_cases:\n        # Set inputs\n        dut.dec_data_in.value = (sync_header << 64) | (type_field << 56) | data_in\n        dut.dec_data_valid_in.value = 1\n\n        # Wait for the output to stabilize\n        await Timer(5, unit=\"ns\")\n        await RisingEdge(dut.clk_in)\n\n        # Check outputs\n        await check_output_decoder(dut, expected_data=expected_data, expected_dec_sync_error=expected_dec_sync_error,\n                           expected_control_out=expected_control_out, expected_dec_error_out=expected_dec_error_out)\n\n        # Deassert valid signal\n        dut.dec_data_valid_in.value = 0\n        await RisingEdge(dut.clk_in)\n\n\n@cocotb.test()\nasync def decoder_control_mixed_mode_dec_sync_error_test(dut):\n    \"\"\" Test decoding for mixed mode \"\"\"\n    clock = Clock(dut.clk_in, 10, unit=\"ns\")  # 100 MHz\n    cocotb.start_soon(clock.start())\n\n    # Initialize DUT inputs\n    await dut_initialization(dut)\n    await RisingEdge(dut.clk_in)\n\n    dut.rst_in.value = 0\n    await RisingEdge(dut.clk_in)\n\n    # Mixed mode test cases\n    test_cases = [\n        # Format: (sync_header, type_field, data_in, expected_data, expected_control_out, expected_dec_sync_error, expected_dec_error_out)\n        (0b11, 0x33, 0xDDCCBB00000000, 0x0000000000000000, 0x00, 1, 0),  # Mixed mode example\n        (0b00, 0x78, 0x3456789ABCDEF0, 0x0000000000000000, 0x00, 1, 0),  # Mixed mode example\n        (0b11, 0x87, 0x00000000000000, 0x0000000000000000, 0x00, 1, 0),  # Mixed mode example\n        (0b00, 0x99, 0x000000000000AE, 0x0000000000000000, 0x00, 1, 0),  # Mixed mode example\n    ]\n\n    for sync_header, type_field, data_in, expected_data, expected_control_out, expected_dec_sync_error, expected_dec_error_out in test_cases:\n        # Set inputs\n        dut.dec_data_in.value = (sync_header << 64) | (type_field << 56) | data_in\n        dut.dec_data_valid_in.value = 1\n\n        # Wait for the output to stabilize\n        await Timer(5, unit=\"ns\")\n        await RisingEdge(dut.clk_in)\n\n\n        # Check outputs\n        await check_output_decoder(dut, expected_data=expected_data, expected_dec_sync_error=expected_dec_sync_error,\n                           expected_control_out=expected_control_out, expected_dec_error_out=expected_dec_error_out)\n\n        # Deassert valid signal\n        dut.dec_data_valid_in.value = 0\n        await RisingEdge(dut.clk_in)\n\n\n@cocotb.test()\nasync def decoder_control_mixed_mode_decoder_error_test(dut):\n    \"\"\" Test decoding for mixed mode \"\"\"\n    clock = Clock(dut.clk_in, 10, unit=\"ns\")  # 100 MHz\n    cocotb.start_soon(clock.start())\n\n    # Initialize DUT inputs\n    await dut_initialization(dut)\n    await RisingEdge(dut.clk_in)\n\n    dut.rst_in.value = 0\n    await RisingEdge(dut.clk_in)\n\n    # Mixed mode test cases\n    test_cases = [\n        # Format: (sync_header, type_field, data_in, expected_data, expected_control_out, expected_dec_sync_error, expected_dec_error_out)\n        (0b10, 0x13, 0xDDCCBB00000000, 0x0000000000000000, 0x00, 0, 1),  # Mixed mode example\n        (0b10, 0x18, 0x3456789ABCDEF0, 0x0000000000000000, 0x00, 0, 1),  # Mixed mode example\n        (0b10, 0x27, 0x00000000000000, 0x0000000000000000, 0x00, 0, 1),  # Mixed mode example\n        (0b10, 0x79, 0x000000000000AE, 0x0000000000000000, 0x00, 0, 1),  # Mixed mode example\n        (0b10, 0x0A, 0x0000000000A5A5, 0x0000000000000000, 0x00, 0, 1),  # Mixed mode example\n        (0b10, 0xD4, 0x00000000FEED55, 0x0000000000000000, 0x00, 0, 1),  # Mixed mode example\n        (0b10, 0x0C, 0x00000099887766, 0x0000000000000000, 0x00, 0, 1),  # Mixed mode example\n        (0b10, 0x22, 0x00001234567890, 0x0000000000000000, 0x00, 0, 1),  # Mixed mode example\n    ]\n\n    for sync_header, type_field, data_in, expected_data, expected_control_out, expected_dec_sync_error, expected_dec_error_out in test_cases:\n        # Set inputs\n        dut.dec_data_in.value = (sync_header << 64) | (type_field << 56) | data_in\n        dut.dec_data_valid_in.value = 1\n\n        # Wait for the output to stabilize\n        await Timer(5, unit=\"ns\")\n        await RisingEdge(dut.clk_in)\n\n\n        # Check outputs\n        await check_output_decoder(dut, expected_data=expected_data, expected_dec_sync_error=expected_dec_sync_error,\n                           expected_control_out=expected_control_out, expected_dec_error_out=expected_dec_error_out)\n\n        # Deassert valid signal\n        dut.dec_data_valid_in.value = 0\n        await RisingEdge(dut.clk_in)\n\n\n"}, "context_files": {"docs/specification.md": "# **64b/66b Codec Specification Document**\n\n## **1. Overview**\nThe 64b/66b encoding scheme is a line coding technique defined by the IEEE 802.3 standard for high-speed serial communication (e.g., 10GbE, PCIe). It addresses two primary transmission challenges:\n- **Clock recovery**: Ensuring frequent transitions to maintain synchronization.\n- **DC balance**: Avoiding long sequences of identical bits that might skew signal integrity.\n\nThe encoder maps 64-bit data along with optional control indicators into a 66-bit encoded format. The decoder reconstructs the original 64-bit data and control information, detecting synchronization and format errors.\n\n## **2. Module Hierarchy**\n```\ntop_64b66b_codec (Top-level)\n\u251c\u2500\u2500 encoder_data_64b66b (Data path encoder)\n\u251c\u2500\u2500 encoder_control_64b66b (Control path encoder)\n\u2514\u2500\u2500 decoder_data_control_64b66b (Data and control path decoder)\n```\n\n## **3. Top-Level Module**\n\n### **3.1 top_64b66b_codec**\nThe system integrator instantiates and connects all submodules. Routes signals based on control inputs and handles data flow between encoder/decoder paths.\n\n#### **I/O Port List**\n| Port                  | Direction | Width | Description                      |\n|-----------------------|-----------|-------|----------------------------------|\n| `clk_in`              | input     | 1     | System clock (rising-edge)       |\n| `rst_in`              | input     | 1     | Active-high synchronous reset    |\n| `enc_data_in`         | input     | 64    | Data input for encoding          |\n| `enc_control_in`      | input     | 8     | Control input for encoding       |\n| `enc_data_out`        | output    | 66    | Encoded output                   |\n| `dec_data_valid_in`   | input     | 1     | Decoder input valid signal       |\n| `dec_data_in`         | input     | 66    | Encoded input for decoding       |\n| `dec_data_out`        | output    | 64    | Decoded data output              |\n| `dec_control_out`     | output    | 8     | Decoded control output           |\n| `dec_sync_error`      | output    | 1     | Sync header error flag           |\n| `dec_error_out`       | output    | 1     | Comprehensive error indicator    |\n\n## **4. Submodules**\n\n### **4.1 encoder_data_64b66b**\nHandles pure data path encoding with \"01\" sync headers.\n\n#### **Key Features**\n- Processes 64-bit data words\n- Generates 2'b01 sync header\n- Zero-latency data pass-through\n- No type field insertion\n\n#### **I/O Port List**\n| Port                  | Direction | Width | Description                      |\n|-----------------------|-----------|-------|----------------------------------|\n| `clk_in`              | input     | 1     | System clock                     |\n| `rst_in`              | input     | 1     | Active-high reset                |\n| `encoder_data_in`     | input     | 64    | Input data word                  |\n| `encoder_control_in`  | input     | 8     | Control mask                     |\n| `encoder_data_out`    | output    | 66    | Encoded output (01 + data)       |\n\n### **4.2 encoder_control_64b66b**\nEncodes control sequences based on both the control flags and matching data patterns.\n\n- Adds sync header `10`\n- Appends an **8-bit type field** to classify the control pattern\n- Encodes remaining 56 bits based on predefined mappings\n- Detects and encodes special sequences such as:\n  - Idle sequences\n  - Start/End of packet delimiters\n  - Custom application codes\n\nControl encoding ensures:\n- Consistent mapping for control events\n- Valid type field generation\n- Zero padding or data substitution to enforce format\n\n#### **I/O Port List**\n| Port                  | Direction | Width | Description                      |\n|-----------------------|-----------|-------|----------------------------------|\n| `clk_in`              | input     | 1     | System clock                     |\n| `rst_in`              | input     | 1     | Active-high reset                |\n| `encoder_data_in`     | input     | 64    | Input data/control word          |\n| `encoder_control_in`  | input     | 8     | Control mask                     |\n| `encoder_data_out`    | output    | 66    | Encoded output (10 + type + data)|\n\n#### **Design Specification**\nThe encoder_control_64b66b converts 64-bit data words and 8-bit control words into 66-bit encoded output with three operational modes:\n\n1. **Control-Only Mode**:  \n   - Activated when `encoder_control_in` = 8'hFF\n   - Sync word set to 2'b10\n   - Full control character replacement\n\n2. **Mixed Mode**:  \n   - Activated for 0 < `encoder_control_in` < 8'hFF\n   - Sync word set to 2'b10\n   - Combines data bytes and control characters\n\n#### **Control Character Encoding**\n| Control Character | Hex Value | Encoded Value | Usage                |\n|-------------------|-----------|---------------|----------------------|\n| Idle (/I/)        | 0x07      | 7'h00         | Link synchronization |\n| Start (/S/)       | 0xFB      | 4'b0000       | Packet delineation   |\n| Terminate (/T/)   | 0xFD      | 4'b0000       | End-of-packet        |\n| Error (/E/)       | 0xFE      | 7'h1E         | Error propagation    |\n| Ordered Set (/Q/) | 0x9C      | 4'b1111       | Configuration        |\n\n\n#### **Valid Control Input Combinations with Type Field Lookup Table**\n\n| **Data Input [63:0]**            | **Control Input**| **Output [65:64]**| **Output [63:56]**| **Output [55:0]**                       |\n|----------------------------------|------------------|-------------------|-------------------|-----------------------------------------|\n| `I7, I6, I5, I4, I3, I2, I1, I0` | `8'b11111111`    | `2'b10`           | `0x1e`            | `C7, C6, C5, C4, C3, C2, C1, C0`        |\n| `E7, E6, E5, E4, E3, E2, E1, E0` | `8'b11111111`    | `2'b10`           | `0x1e`            | `C7, C6, C5, C4, C3, C2, C1, C0`        |\n| `D7, D6, D5, S4, I3, I2, I1, I0` | `8'b00011111`    | `2'b10`           | `0x33`            | `D7, D6, D5, 4'b0000, C3, C2, C1, C0`   |\n| `D7, D6, D5, D4, D3, D2, D1, S0` | `8'b00000001`    | `2'b10`           | `0x78`            | `D7, D6, D5, D4, D3, D2, D1, D0`        |\n| `I7, I6, I5, I4, I3, I2, I1, T0` | `8'b11111110`    | `2'b10`           | `0x87`            | `C7, C6, C5, C4, C3, C2, C1, 7'b0000000`|\n| `I7, I6, I5, I4, I3, I2, T1, D0` | `8'b11111110`    | `2'b10`           | `0x99`            | `C7, C6, C5, C4, C3, C2, 6'b000000, D0` |\n| `I7, I6, I5, I4, I3, T2, D1, D0` | `8'b11111100`    | `2'b10`           | `0xaa`            | `C7, C6, C5, C4, C3, 5'b00000, D1, D0`  |\n| `I7, I6, I5, I4, T3, D2, D1, D0` | `8'b11111000`    | `2'b10`           | `0xb4`            | `C7, C6, C5, C4, 4'b0000, D2, D1, D0`   |\n| `I7, I6, I5, T4, D3, D2, D1, D0` | `8'b11110000`    | `2'b10`           | `0xcc`            | `C7, C6, C5, 3'b000, D3, D2, D1, D0`    |\n| `I7, I6, T5, D4, D3, D2, D1, D0` | `8'b11100000`    | `2'b10`           | `0xd2`            | `C7, C6, 2'b00, D4, D3, D2, D1, D0`     |\n| `I7, T6, D5, D4, D3, D2, D1, D0` | `8'b11000000`    | `2'b10`           | `0xe1`            | `C7, 1'b0, D5, D4, D3, D2, D1, D0`      |\n| `T7, D6, D5, D4, D3, D2, D1, D0` | `8'b10000000`    | `2'b10`           | `0xff`            | `D6, D5, D4, D3, D2, D1, D0`            |\n| `D7, D6, D5, Q4, I3, I2, I1, I0` | `8'b00011111`    | `2'b10`           | `0x2d`            | `D7, D6, D5, 4'b1111, C3, C2, C1, C0`   |\n| `I7, I6, I5, I4, D3, D2, D1, Q0` | `8'b11110001`    | `2'b10`           | `0x4b`            | `C7, C6, C5, C4, D3, D2, D1, 4'b1111`   |\n| `D7, D6, D5, Q4, D3, D2, D1, Q0` | `8'b00010001`    | `2'b10`           | `0x55`            | `D7, D6, D5, 8'b11111111, D3, D2, D1`   |\n| `D7, D6, D5, S4, D3, D2, D1, Q0` | `8'b00010001`    | `2'b10`           | `0x66`            | `D7, D6, D5, 8'b00001111, D3, D2, D1`   |\n\n### **4.3 decoder_data_control_64b66b**\nCombined decoder handling both data and control paths. The decoder handles the full 66-bit word and interprets it based on the sync header.\n\n- **Sync header `01`**: Interpreted as raw data\n- **Sync header `10`**: Parsed using the type field to reconstruct original data and control meaning\n\n#### Functionality:\n- Extracts and checks sync headers\n- Maps type fields back to original control flags\n- Reconstructs data based on encoding format\n- Detects invalid sync headers and unknown control types\n- Performs data validation for encoded formats\n\n#### Error Detection:\n- **Sync Error**: Raised for invalid sync headers (neither `01` nor `10`)\n- **Format Error**: Raised if control types do not match expected format\n\n#### **I/O Port List**\n| Port                      | Direction | Width | Description                      |\n|---------------------------|-----------|-------|----------------------------------|\n| `clk_in`                  | input     | 1     | System clock                     |\n| `rst_in`                  | input     | 1     | Active-high reset                |\n| `decoder_data_valid_in`   | input     | 1     | Input data valid                 |\n| `decoder_data_in`         | input     | 66    | Encoded input                    |\n| `decoder_data_out`        | output    | 64    | Decoded data                     |\n| `decoder_control_out`     | output    | 8     | Decoded control mask             |\n| `sync_error`              | output    | 1     | Header error flag                |\n| `decoder_error_out`       | output    | 1     | Composite error indicator        |\n\n\n#### **Control Character Mapping**\n\n| Character | Hex | Usage                     |\n|-----------|-----|---------------------------|\n| /I/       | 0x07| Idle sequence             |\n| /S/       | 0xFB| Start of packet           |\n| /T/       | 0xFD| End of packet             |\n| /E/       | 0xFE| Error indication          |\n| /Q/       | 0x9C| Ordered set               |\n\n#### **Decoding Table**\n| **Type Field** | **decoder_control_out**  | **decoder_data_out**              |\n|----------------|--------------------------|-----------------------------------|\n| `0x1E`         | `8'b11111111`            | `{E7, E6, E5, E4, E3, E2, E1, E0}`|\n| `0x33`         | `8'b00011111`            | `{D6, D5, D4, S4, I3, I2, I1, I0}`|\n| `0x78`         | `8'b00000001`            | `{D6, D5, D4, D3, D2, D1, D0, S0}`|\n| `0x87`         | `8'b11111110`            | `{I7, I6, I5, I4, I3, I2, I1, T0}`|\n| `0x99`         | `8'b11111110`            | `{I7, I6, I5, I4, I3, I2, T1, D0}`|\n| `0xAA`         | `8'b11111100`            | `{I7, I6, I5, I4, I3, T2, D1, D0}`|\n| `0xB4`         | `8'b11111000`            | `{I7, I6, I5, I4, T3, D2, D1, D0}`|\n| `0xCC`         | `8'b11110000`            | `{I7, I6, I5, T4, D3, D2, D1, D0}`|\n| `0xD2`         | `8'b11100000`            | `{I7, I6, T5, D4, D3, D2, D1, D0}`|\n| `0xE1`         | `8'b11000000`            | `{I7, T6, D5, D4, D3, D2, D1, D0}`|\n| `0xFF`         | `8'b10000000`            | `{T7, D6, D5, D4, D3, D2, D1, D0}`|\n| `0x2D`         | `8'b00011111`            | `{D6, D5, D4, Q4, I3, I2, I1, I0}`|\n| `0x4B`         | `8'b11110001`            | `{I7, I6, I5, I4, D2, D1, D0, Q0}`|\n| `0x55`         | `8'b00010001`            | `{D6, D5, D4, Q4, D2, D1, D0, Q0}`|\n| `0x66`         | `8'b00010001`            | `{D6, D5, D4, S4, D2, D1, D0, Q0}`|\n\n- **Explanation**:\n     - `Dx`: Represents data bits from the input.\n     - `Ix`: Represents idle control characters (`/I/`).\n     - `Sx`: Represents start-of-frame control characters (`/S/`).\n     - `Tx`: Represents end-of-frame control characters (`/T/`).\n     - `Ex`: Represents error control characters (`/E/`).\n     - `Qx`: Represents ordered-set control characters (`/Q/`).\n\n#### **Error Signal Implementation**:\n   - The module generates two error signals:\n     1. **`sync_error`**:\n        - Asserted HIGH when the sync header is invalid (neither `2'b01` nor `2'b10`).\n        - This indicates a synchronization error, meaning the input data is not properly aligned or formatted.\n     2. **`decoder_error_out`**:\n        - Asserted HIGH when either:\n          - The type field is invalid (not in the predefined list of valid type fields).\n          - The control data (`data_in`) does not match the expected pattern for the given type field.\n        - This indicates a decoding error, meaning the input data cannot be properly decoded.\n        - The `decoder_error_out` signal is generated by combining the above two conditions.\n\n## **5. Latency**\n| Module                  | Latency |\n|-------------------------|---------|\n| encoder_data_64b66b     | 1 cycle |\n| encoder_control_64b66b  | 1 cycle |\n| decoder_data_control_64b66b | 1 cycle |\n\n## **6. Operational Notes**\n1. **Clock Domain**:\n   - All modules synchronous to clk_in\n   - No cross-clock domain handling\n\n2. **Reset Behavior**:\n   - Clears all registers\n   - Outputs forced to zero\n   - Error flags cleared\n\n3. **Performance Tradeoffs**:\n   - Fixed 1-cycle latency\n   - Balanced pipeline design\n   - Critical path optimization", "rtl/decoder_data_control_64b66b.sv": "module decoder_data_control_64b66b (\n    input  logic         clk_in,              // Clock signal\n    input  logic         rst_in,              // Asynchronous reset (active high)\n    input  logic         decoder_data_valid_in, // Input data valid signal\n    input  logic [65:0]  decoder_data_in,     // 66-bit encoded input\n    output logic [63:0]  decoder_data_out,    // Decoded 64-bit data output\n    output logic [7:0]   decoder_control_out, // Decoded 8-bit control output\n    output logic         sync_error,          // Sync error flag\n    output logic         decoder_error_out    // Type field error flag\n);\n\n    logic [1:0] sync_header;\n    logic [7:0] type_field;\n    logic [63:0] data_in;\n    logic type_field_valid;\n    logic decoder_wrong_ctrl_received;\n    logic decoder_wrong_type_field;\n\n    assign sync_header = decoder_data_in[65:64];\n    assign type_field = decoder_data_in[63:56];\n    assign data_in = decoder_data_in[55:0];\n\n    always_comb begin\n        type_field_valid = 1'b0;\n        if (sync_header == 2'b10) begin\n            case (type_field)\n                8'h1E, 8'h33, 8'h78, 8'h87, 8'h99, 8'hAA, 8'hB4, \n                8'hCC, 8'hD2, 8'hE1, 8'hFF, 8'h2D, 8'h4B, 8'h55, 8'h66: \n                    type_field_valid = 1'b1;\n                default: type_field_valid = 1'b0;\n            endcase\n        end\n    end\n\n    always_ff @(posedge clk_in or posedge rst_in) begin\n        if (rst_in) begin\n            decoder_control_out <= 8'b0;\n        end \n        else if (decoder_data_valid_in) begin\n            if (sync_header == 2'b10) begin\n                case (type_field)\n                    8'h1E: decoder_control_out <= 8'b11111111;\n                    8'h33: decoder_control_out <= 8'b00011111;\n                    8'h78: decoder_control_out <= 8'b00000001;\n                    8'h87: decoder_control_out <= 8'b11111110;\n                    8'h99: decoder_control_out <= 8'b11111110;\n                    8'hAA: decoder_control_out <= 8'b11111100;\n                    8'hB4: decoder_control_out <= 8'b11111000;\n                    8'hCC: decoder_control_out <= 8'b11110000;\n                    8'hD2: decoder_control_out <= 8'b11100000;\n                    8'hE1: decoder_control_out <= 8'b11000000;\n                    8'hFF: decoder_control_out <= 8'b10000000;\n                    8'h2D: decoder_control_out <= 8'b00011111;\n                    8'h4B: decoder_control_out <= 8'b11110001;\n                    8'h55: decoder_control_out <= 8'b00010001;\n                    8'h66: decoder_control_out <= 8'b00010001;\n                    default: decoder_control_out <= 8'b0;\n                endcase\n            end\n            else begin\n                decoder_control_out <= 8'b0;\n            end\n        end\n    end\n\n    always_ff @(posedge clk_in or posedge rst_in) begin\n        if (rst_in) begin\n            decoder_data_out <= 64'b0;\n        end \n        else if (decoder_data_valid_in) begin\n            case (sync_header)\n                2'b01: begin\n                    decoder_data_out <= decoder_data_in[63:0];\n                end\n                2'b10: begin\n                    case (type_field)\n                        8'h1E: if (data_in[55:0] == {8{7'h1E}}) decoder_data_out <= {8{8'hFE}};\n                               else decoder_data_out <= {8{8'h07}};\n                        8'h33: decoder_data_out <= {data_in[55:32], 8'hFB, {4{8'h07}}};\n                        8'h78: decoder_data_out <= {data_in[55:0], 8'hFB};\n                        8'h87: decoder_data_out <= {{7{8'h07}},8'hFD};\n                        8'h99: decoder_data_out <= {{6{8'h07}}, 8'hFD, data_in[7:0]};\n                        8'hAA: decoder_data_out <= {{5{8'h07}}, 8'hFD, data_in[15:0]};\n                        8'hB4: decoder_data_out <= {{4{8'h07}}, 8'hFD, data_in[23:0]};\n                        8'hCC: decoder_data_out <= {{3{8'h07}}, 8'hFD, data_in[31:0]};\n                        8'hD2: decoder_data_out <= {{2{8'h07}}, 8'hFD, data_in[39:0]};\n                        8'hE1: decoder_data_out <= {8'h07, 8'hFD, data_in[47:0]};\n                        8'hFF: decoder_data_out <= {8'hFD, data_in[55:0]};\n                        8'h2D: decoder_data_out <= {data_in[55:32], 8'h9C, {4{8'h07}}};\n                        8'h4B: decoder_data_out <= {{4{8'h07}}, data_in[28:5], 8'h9C};\n                        8'h55: decoder_data_out <= {data_in[55:32], 8'h9C, data_in[23:0], 8'h9C};\n                        8'h66: decoder_data_out <= {data_in[55:32], 8'hFB, data_in[23:0], 8'h9C};\n                        default: decoder_data_out <= 64'b0;\n                    endcase\n                end\n                default: decoder_data_out <= 64'b0;\n            endcase\n        end\n    end\n\n    always_ff @(posedge clk_in or posedge rst_in) begin\n        if (rst_in) begin\n            sync_error <= 1'b0;\n        end \n        else if (decoder_data_valid_in) begin\n            sync_error <= (sync_header != 2'b01 && sync_header != 2'b10);\n        end\n    end\n\n    always_ff @(posedge clk_in or posedge rst_in) begin\n        if (rst_in) begin\n            decoder_wrong_type_field <= 1'b0;\n        end \n        else if (decoder_data_valid_in) begin\n            if (sync_header == 2'b10) begin\n                decoder_wrong_type_field <= ~type_field_valid;\n            end\n            else begin\n                decoder_wrong_type_field <= 1'b0;\n            end\n        end\n    end\n    \n    assign decoder_error_out = decoder_wrong_ctrl_received || decoder_wrong_type_field;\n\n    always_ff @(posedge clk_in or posedge rst_in) begin\n        if (rst_in) begin\n            decoder_wrong_ctrl_received <= 1'b0;\n        end \n        else if (decoder_data_valid_in) begin\n            if (sync_header == 2'b10) begin\n                case (type_field)\n                    8'h1E: if ((data_in[55:0] == {8{7'h1E}}) || (data_in[55:0] == {8{7'h00}})) decoder_wrong_ctrl_received <= 1'b0;\n                           else decoder_wrong_ctrl_received <= 1'b1;\n                    8'h33: if (data_in [31:0] != 32'd0) decoder_wrong_ctrl_received <= 1'b1;\n                           else decoder_wrong_ctrl_received <= 1'b0;\n                    8'h87: if (data_in [55:0] != 56'd0) decoder_wrong_ctrl_received <= 1'b1;\n                           else decoder_wrong_ctrl_received <= 1'b0;\n                    8'h99: if (data_in [55:8] != 48'd0) decoder_wrong_ctrl_received <= 1'b1;\n                           else decoder_wrong_ctrl_received <= 1'b0;\n                    8'hAA: if (data_in [55:16] != 40'd0) decoder_wrong_ctrl_received <= 1'b1;\n                           else decoder_wrong_ctrl_received <= 1'b0;\n                    8'hB4: if (data_in [55:24] != 32'd0) decoder_wrong_ctrl_received <= 1'b1;\n                           else decoder_wrong_ctrl_received <= 1'b0;\n                    8'hCC: if (data_in [55:32] != 24'd0) decoder_wrong_ctrl_received <= 1'b1;\n                           else decoder_wrong_ctrl_received <= 1'b0;\n                    8'hD2: if (data_in [55:40] != 16'd0) decoder_wrong_ctrl_received <= 1'b1;\n                           else decoder_wrong_ctrl_received <= 1'b0;\n                    8'hE1: if (data_in [55:48] != 8'd0) decoder_wrong_ctrl_received <= 1'b1;\n                           else decoder_wrong_ctrl_received <= 1'b0;\n                    8'h2D: if (data_in [31:0] != 32'hF0000000) decoder_wrong_ctrl_received <= 1'b1;\n                           else decoder_wrong_ctrl_received <= 1'b0;\n                    8'h4B: if (data_in[55:28] != {4{7'h00}} && data_in[3:0] != 4'b1111) decoder_wrong_ctrl_received <= 1'b1;\n                           else decoder_wrong_ctrl_received <= 1'b0;              \n                    8'h55: if (data_in[31:24] != 8'hFF) decoder_wrong_ctrl_received <= 1'b1;\n                           else decoder_wrong_ctrl_received <= 1'b0; \n                    8'h66: if (data_in[31:24] != 8'h0F) decoder_wrong_ctrl_received <= 1'b1;\n                           else decoder_wrong_ctrl_received <= 1'b0; \n                    default: decoder_wrong_ctrl_received <= 1'b0; \n                endcase\n            end\n            else begin\n                decoder_wrong_ctrl_received <= 1'b0;\n            end\n        end\n    end\n\nendmodule", "rtl/encoder_control_64b66b.sv": "module encoder_control_64b66b (\n    input  logic         clk_in,              // Clock signal\n    input  logic         rst_in,              // Asynchronous reset (active high)\n    input  logic [63:0]  encoder_data_in,     // 64-bit data input\n    input  logic [7:0]   encoder_control_in,  // 8-bit control input\n    output logic [65:0]  encoder_data_out     // 66-bit encoded output\n);\n\n\n    function [7:0] get_output(input [63:0] data_in, input [7:0] control_input);\n        case (control_input)\n            8'b11111111: begin\n                if (data_in == 64'h0707070707070707) get_output = 8'h1e;\n                else if (data_in == 64'hFEFEFEFEFEFEFEFE) get_output = 8'h1e;\n                else if (data_in == 64'h07070707070707FD) get_output = 8'h87;\n                else get_output = 8'b0;\n            end\n            8'b00011111: begin\n                if (data_in[39:0] == 40'hFB07070707) get_output = 8'h33;\n                else if (data_in[39:0] == 40'h9C07070707) get_output = 8'h2d;\n                else get_output = 8'b0;\n            end\n            8'b00000001: begin\n                if (data_in[7:0] == 8'hFB) get_output = 8'h78;\n                else get_output = 8'b0;\n            end\n            8'b11111110: begin\n                if (data_in[63:8] == 56'h070707070707FD) get_output = 8'h99;\n                else get_output = 8'b0;\n            end\n            8'b11111100: begin\n                if (data_in[63:16] == 48'h0707070707FD) get_output = 8'haa;\n                else get_output = 8'b0;\n            end\n            8'b11111000: begin\n                if (data_in[63:24] == 40'h07070707FD) get_output = 8'hb4;\n                else get_output = 8'b0;\n            end\n            8'b11110000: begin\n                if (data_in[63:32] == 32'h070707FD) get_output = 8'hcc;\n                else get_output = 8'b0;\n            end\n            8'b11100000: begin\n                if (data_in[63:40] == 24'h0707FD) get_output = 8'hd2;\n                else get_output = 8'b0;\n            end\n            8'b11000000: begin\n                if (data_in[63:48] == 16'h07FD) get_output = 8'he1;\n                else get_output = 8'b0;\n            end\n            8'b10000000: begin\n                if (data_in[63:56] == 8'hFD) get_output = 8'hff;\n                else get_output = 8'b0;\n            end\n            8'b11110001: begin\n                if ({data_in[63:32], data_in[7:0]} == 40'h070707079C) get_output = 8'h4b;\n                else get_output = 8'b0;\n            end\n            8'b00010001: begin\n                if ({data_in[39:32], data_in[7:0]} == 16'h9C9C) get_output = 8'h55;\n                else if ({data_in[39:32], data_in[7:0]} == 16'hFB9C) get_output = 8'h66;\n                else get_output = 8'b0;\n            end\n            default: get_output = 8'b0;\n        endcase\n    endfunction\n\n    logic [1:0] sync_ctrl_word;\n    logic [7:0] type_field;\n    logic [55:0] encoded_ctrl_words;\n\n    always @(posedge clk_in or posedge rst_in) begin\n        if (rst_in) begin\n            encoded_ctrl_words <= 56'b0;\n            sync_ctrl_word <= 2'b00;\n            type_field <= 8'b0;\n        end else begin\n            sync_ctrl_word <= 2'b10;\n            type_field <= get_output(encoder_data_in, encoder_control_in);\n\n            case (encoder_control_in)\n                8'b11111111: begin\n                    if (encoder_data_in == 64'h0707070707070707) encoded_ctrl_words <= {7'h00, 7'h00, 7'h00, 7'h00, 7'h00, 7'h00, 7'h00, 7'h00};\n                    else if (encoder_data_in == 64'hFEFEFEFEFEFEFEFE) encoded_ctrl_words <= {7'h1E, 7'h1E, 7'h1E, 7'h1E, 7'h1E, 7'h1E, 7'h1E, 7'h1E};\n                    else if (encoder_data_in == 64'h07070707070707FD) encoded_ctrl_words <= {7'h00, 7'h00, 7'h00, 7'h00, 7'h00, 7'h00, 7'h00, 7'h00};\n                    else encoded_ctrl_words <= 56'h0000000;\n                end\n                8'b00011111: begin\n                    if (encoder_data_in[39:0] == 40'hFB07070707) encoded_ctrl_words <= {encoder_data_in[63:40], 4'h0, 7'h00, 7'h00, 7'h00, 7'h00};\n                    else if (encoder_data_in[39:0] == 40'h9C07070707) encoded_ctrl_words <= {encoder_data_in[63:40], 4'hF, 7'h00, 7'h00, 7'h00, 7'h00};\n                    else encoded_ctrl_words <= 56'h0000000;\n                end\n                8'b00000001: begin\n                    if (encoder_data_in[7:0] == 8'hFB) encoded_ctrl_words <= {encoder_data_in[63:8]};\n                    else encoded_ctrl_words <= 56'h0000000;\n                end\n                8'b11111110: begin\n                    if (encoder_data_in[63:8] == 56'h070707070707FD) encoded_ctrl_words <= {7'h00, 7'h00, 7'h00, 7'h00, 7'h00, 7'h00, 6'b000000, encoder_data_in[7:0]};\n                    else encoded_ctrl_words <= 56'h0000000;\n                end\n                8'b11111100: begin\n                    if (encoder_data_in[63:16] == 48'h0707070707FD) encoded_ctrl_words <= {7'h00, 7'h00, 7'h00, 7'h00, 7'h00, 5'b00000, encoder_data_in[15:0]};\n                    else encoded_ctrl_words <= 56'h0000000;\n                end\n                8'b11111000: begin\n                    if (encoder_data_in[63:24] == 40'h07070707FD) encoded_ctrl_words <= {7'h00, 7'h00, 7'h00, 7'h00, 4'b0000, encoder_data_in[23:0]};\n                    else encoded_ctrl_words <= 56'h0000000;\n                end\n                8'b11110000: begin\n                    if (encoder_data_in[63:32] == 32'h070707FD) encoded_ctrl_words <= {7'h00, 7'h00, 7'h00, 3'b000, encoder_data_in[31:0]};\n                    else encoded_ctrl_words <= 56'hFFFFFFF;\n                end\n                8'b11100000: begin\n                    if (encoder_data_in[63:40] == 24'h0707FD) encoded_ctrl_words <= {7'h00, 7'h00, 2'b00, encoder_data_in[39:0]};\n                    else encoded_ctrl_words <= 56'h0000000;\n                end\n                8'b11000000: begin\n                    if (encoder_data_in[63:48] == 16'h07FD) encoded_ctrl_words <= {7'h00, 1'b0, encoder_data_in[47:0]};\n                    else encoded_ctrl_words <= 56'h0000000;\n                end\n                8'b10000000: begin\n                    if (encoder_data_in[63:56] == 8'hFD) encoded_ctrl_words <= encoder_data_in[55:0];\n                    else encoded_ctrl_words <= 56'h0000000;\n                end\n                8'b11110001: begin\n                    if ({encoder_data_in[63:32], encoder_data_in[7:0]} == 40'h070707079C) encoded_ctrl_words <= {7'h00, 7'h00, 7'h00, 7'h00, encoder_data_in[31:8], 4'b1111};\n                    else encoded_ctrl_words <= 56'h0000000;\n                end\n                8'b00010001: begin\n                    if ({encoder_data_in[39:32], encoder_data_in[7:0]} == 16'h9C9C) encoded_ctrl_words <= {encoder_data_in[63:40], 8'hFF, encoder_data_in[31:8]};\n                    else if ({encoder_data_in[39:32], encoder_data_in[7:0]} == 16'hFB9C) encoded_ctrl_words <= {encoder_data_in[63:40], 8'h0F, encoder_data_in[31:8]};\n                    else encoded_ctrl_words <= 56'h0000000;\n                end\n                default: encoded_ctrl_words <= 56'h0000000;\n            endcase\n        end\n    end\n\n    assign encoder_data_out = {sync_ctrl_word, type_field, encoded_ctrl_words};\n\nendmodule", "rtl/encoder_data_64b66b.sv": "module encoder_data_64b66b (\n    input  logic         clk_in,              // Clock signal\n    input  logic         rst_in,              // Asynchronous reset (active high)\n    input  logic [63:0]  encoder_data_in,     // 64-bit data input\n    input  logic [7:0]   encoder_control_in,  // 8-bit control input\n    output logic [65:0]  encoder_data_out     // 66-bit encoded output\n);\n\n    logic [1:0] sync_word;     \n    logic [63:0] encoded_data; \n\n    always_ff @(posedge clk_in or posedge rst_in) begin\n        if (rst_in) begin\n            sync_word <= 2'b00;            \n        end \n        else begin\n            if (encoder_control_in == 8'b00000000) begin\n                sync_word <= 2'b01;         \n            end \n            else begin\n                sync_word <= 2'b10;         \n            end\n        end\n    end\n\n    always_ff @(posedge clk_in or posedge rst_in) begin\n        if (rst_in) begin\n            encoded_data <= 64'b0;         \n        end \n        else begin\n            if (encoder_control_in == 8'b00000000) begin\n                encoded_data <= encoder_data_in; \n            end\n            else begin\n                encoded_data <= 64'b0; \n            end\n        end\n    end\n\n    assign encoder_data_out = {sync_word, encoded_data};\n\nendmodule"}}}
+{"responses_create_params": {"input": [{"role": "system", "content": "You are a language model that has the following file operations available at your disposal:\n  - **List files in a directory** by running one of the following commands: \n    - `ls`\n    - `tree`\n  - **Read files** by using:\n    - `cat <filename>`\n  - **Write files** by using:\n    - `echo <content> > <filename>`\n  - **Compile Verilog** by using `iverilog` such as:\n    - `iverilog -o <output_filename>.out -g2012 <verilog_code_file> <verilog_testbench_file>`\n  - **Run Simulation** by using:\n    - `vvp <output_filename>.out`\n  - **Find current working directory** by using:\n    - `pwd`\n  - **Update the contents of a text file from a old content to new content**\n    - `sed -i  \"problematic_line_number s/problematic_statement/non_problematic_statement/\" Buggy_RTL_code.sv`\n  - **To access a specific line of the file**\n     - `awk 'NR==line_number' file_name.sv`\n\nYou will be given a prompt and your task is to understand it and solve the given issue by using the above-mentioned commands as needed. In the final step, you should create a Linux patch to highlight the necessary file updates to achieve the targeted goal.\n\n  You will solve the problem step by step using the following approach of \n  - thought (thinking process of the step you're going to take)\n  - action (the command you will be running to get more details/context that's helpful to solve the problem)\n  - observation (the output from the action you will observe based on which you will take your next step)\n\n  The last step will be the final output summary and the patch itself in the following format \n  - thought (the summary of what you did and some introduction of the patch file itself)\n  - patch (a Linux-based patch that needs to be applied to reach the relevant solution)"}, {"role": "user", "content": "\nProvide me one answer for this request: The `aes128_encrypt` module in `rtl` folder performs **AES-128 encryption** by first generating **11 round keys** (one for the initial state and 10 rounds) from the **128-bit cipher key** using a **recursive key expansion process**. It begins by treating the key as **four 32-bit words** (`W[0]` to `W[3]`) and deriving new words using the **previously generated ones**. Every **fourth word (`W[i]`)** undergoes the **key schedule core transformation**, which includes a **byte-wise left rotation (`RotWord`)**, substitution via the **S-box (`SubWord`)**, and XOR of the left-most byte of `SubWord` with a **round constant (`Rcon`)**. The transformed word is XORed with the word from **four positions earlier (`W[i-4]`)** to produce the next word. Each remaining word is generated by XORing the previous word with the word four positions earlier. The key expansion process does not run serially; instead, it **generates the first necessary round keys, allowing encryption to start in parallel** while the remaining keys continue to be derived. This process continues until all **44 words (`W[0]` to `W[43]`)** are generated and grouped into **11 round keys**.\n\n# AES-128 Encryption Overview\n\nThe encryption process begins by **loading the 128-bit plaintext block** into a **4\u00d74 state matrix**, which undergoes **11 transformations** (one for the initial round and 10 for encryption rounds). In the **initial round**, the state is XORed with the first round key.  \n\nEach of the **10 encryption rounds** consists of four main transformations:  \n\n- **SubBytes** \u2013 Replaces each byte using the **AES S-box** to introduce non-linearity.  \n- **ShiftRows** \u2013 Cyclically shifts the rows of the state matrix to introduce diffusion.  \n- **MixColumns** \u2013 Transforms each column of the state matrix by multiplying it with a fixed **GF(2\u2078) polynomial matrix** (over **Rijndael\u2019s finite field**) to diffuse data across bytes. This operation ensures that changes in one byte affect all four bytes of a column.  \n\n  The transformation is defined as a **matrix multiplication** where each column of the state is multiplied by the following constant matrix:\n\n     |  8'h02  |  8'h03  |  8'h01  |  8'h01  |\n     |:-------:|:-------:|:-------:|:-------:|\n     |  8'h01  |  8'h02  |  8'h03  |  8'h01  |\n     |  8'h01  |  8'h01  |  8'h02  |  8'h03  |\n     |  8'h03  |  8'h01  |  8'h01  |  8'h02  |\n\n  ## **Mathematical Basis in GF(2\u2078)**\n  - **Rijndael\u2019s finite field (GF(2\u2078))** is defined by the **irreducible polynomial**: $`x^8 + x^4 + x^3 + x + 1`$ **or** `0x11B` in hexadecimal. This polynomial is used for modular reduction when performing field operations.  \n  - **Addition in GF(2\u2078)** is simply **bitwise XOR**.  \n  - **Multiplication in GF(2\u2078)** follows standard polynomial multiplication, but results are reduced **modulo ($`x^8 + x^4 + x^3 + x + 1`$)** to ensure results stay within the field.  \n  - To implement multiplication by `{02}` (0x02) in hardware, a left shift (`x << 1`) is used, followed by XOR with `0x1B` if the most significant bit was set (to ensure modular reduction).  \n  - Multiplication by `{03}` (0x03) is computed as `{02} \u2295 {01}`, This is expressed as `{03} * x = ({02} * x) \u2295 x`.\n\n  This operation is **skipped in the final round** to maintain proper decryption symmetry.  \n\n- **AddRoundKey** \u2013 XORs the state matrix with the corresponding round key.  \n\nAfter **10 rounds**, the final state matrix is transformed into the **128-bit ciphertext output**, completing the AES-128 encryption process.\n\n\nThe key expansion and the encryption can happen simultaneously, by asserting both `i_start` and `i_update_key`. Also, if the stored expanded key is the one that should be used for the encryption, only an `i_start` signal may be asserted and the key will not be updated.\n\nDuring testing, the module failed to produce the expected output, leading to incorrect results. The `sbox_enc` module and the `Rcon` values were previously tested, and they have no errors.\nThe module and its testbench are available in the current working directory for debugging, and the expected output is available in the testbench. Could you help debug and fix the RTL to ensure correct functionality?\n\n\nPlease provide your response as plain text without any JSON formatting. Your response will be saved directly to: rtl/aes128_encrypt.sv."}]}, "verifier_metadata": {"task_id": "cvdp_agentic_AES_encryption_decryption_0003", "categories": ["cid016", "medium"], "difficulty": "medium", "target_files": ["rtl/aes128_encrypt.sv"], "harness_files": {"docker-compose.yml": "services:\n  sanity:\n    image: __OSS_SIM_IMAGE__\n    volumes:\n      - ./src:/src/      \n    working_dir : /code/rundir\n    env_file    : ./src/.env\n    command     : pytest /src/test_runner.py -s -v -o cache_dir=/rundir/harness/.cache\n", "src/.env": "VERILOG_SOURCES = /code/rtl/aes128_encrypt.sv \nTOPLEVEL        = aes128_encrypt\nMODULE          = test_aes128_encrypt\nSIM             = icarus\nTOPLEVEL_LANG   = verilog\nPYTHONPATH      = /src\nHASH            = 3-debug-and-fix-aes128_encrypt\nWAVE            = true", "src/harness_library.py": "import cocotb\nfrom cocotb.triggers import FallingEdge, RisingEdge, Timer\nfrom collections import deque\n\nasync def dut_init(dut):\n    # iterate all the input signals and initialize with 0\n    for signal in dut:\n        try:\n            signal.value = 0\n        except Exception:\n            pass\n\nclass aes128_encrypt:\n    # AES S-Box\n    S_BOX = [\n        0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,\n        0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,\n        0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,\n        0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,\n        0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,\n        0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,\n        0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,\n        0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,\n        0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,\n        0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,\n        0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,\n        0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,\n        0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,\n        0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,\n        0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,\n        0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16\n    ]\n    \n    # AES Rcon (Round constants)\n    RCON = [0x00, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1B, 0x36]\n\n    def __init__(self):\n        self.expanded_key = [0] * 44\n        self.data = 0\n    \n    def reset(self):\n        self.expanded_key = [0] * 44\n        self.data = 0\n\n    def sub_word(self, word):\n        return (self.S_BOX[(word >> 24) & 0xFF] << 24) |\\\n               (self.S_BOX[(word >> 16) & 0xFF] << 16) |\\\n               (self.S_BOX[(word >> 8) & 0xFF] << 8) |\\\n               (self.S_BOX[word & 0xFF])\n\n    def rot_word(self, word):\n        return ((word << 8) & 0xFFFFFFFF) | (word >> 24)\n\n    def update_key(self, key):\n        assert 0 <= key < (1 << 128), \"Key must be a 128-bit integer.\"\n        \n        for i in range(4):\n            self.expanded_key[i] = (key >> (96 - i * 32)) & 0xFFFFFFFF\n        \n        for i in range(4, 44):\n            temp = self.expanded_key[i - 1]\n            if i % 4 == 0:\n                temp = self.sub_word(self.rot_word(temp)) ^ (self.RCON[i // 4] << 24)\n            self.expanded_key[i] = self.expanded_key[i - 4] ^ temp\n\n    def update(self, data):\n        assert 0 <= data < (1 << 128), \"Data must be a 128-bit integer.\"\n        \n        # Convert data to 4x4 state matrix (column-major order)\n        state = [[(data >> (120 - 8 * (i + 4 * j))) & 0xFF for j in range(4)] for i in range(4)]\n\n        # Initial AddRoundKey\n        self.add_round_key(state, 0)\n\n        # 9 main rounds\n        for round in range(1, 10):\n            self.sub_bytes(state)\n            self.shift_rows(state)\n            self.mix_columns(state)\n            self.add_round_key(state, round)\n\n        # Final round (no MixColumns)\n        self.sub_bytes(state)\n        self.shift_rows(state)\n        self.add_round_key(state, 10)\n\n        # Convert state matrix back to 128-bit integer\n        encrypted_data = 0\n        for j in range(4):\n            for i in range(4):\n                encrypted_data = (encrypted_data << 8) | state[i][j]\n        \n        self.data = encrypted_data\n\n    def add_round_key(self, state, round):\n        for j in range(4):\n            word = self.expanded_key[round * 4 + j]\n            for i in range(4):\n                state[i][j] ^= (word >> (24 - 8 * i)) & 0xFF\n\n    def sub_bytes(self, state):\n        for i in range(4):\n            for j in range(4):\n                state[i][j] = self.S_BOX[state[i][j]]\n\n    def shift_rows(self, state):\n        state[1][0], state[1][1], state[1][2], state[1][3] = state[1][1], state[1][2], state[1][3], state[1][0]\n        state[2][0], state[2][1], state[2][2], state[2][3] = state[2][2], state[2][3], state[2][0], state[2][1]\n        state[3][0], state[3][1], state[3][2], state[3][3] = state[3][3], state[3][0], state[3][1], state[3][2]\n\n    def mix_columns(self, state):\n        for j in range(4):\n            a = state[0][j], state[1][j], state[2][j], state[3][j]\n            t = a[0] ^ a[1] ^ a[2] ^ a[3]\n            u = a[0]\n            state[0][j] ^= t ^ self.xtime(a[0] ^ a[1])\n            state[1][j] ^= t ^ self.xtime(a[1] ^ a[2])\n            state[2][j] ^= t ^ self.xtime(a[2] ^ a[3])\n            state[3][j] ^= t ^ self.xtime(a[3] ^ u)\n\n    def xtime(self, x):\n        return ((x << 1) ^ 0x1B) & 0xFF if x & 0x80 else x << 1\n", "src/test_aes128_encrypt.py": "import cocotb\nfrom cocotb.clock import Clock\nfrom cocotb.triggers import RisingEdge, Timer\nimport harness_library as hrs_lb\nimport random\n\ndef compare_values(dut, model, debug=0):\n    dut_data  = int(dut.o_data.value)\n\n    model_data = model.data\n\n    if debug == 1:\n        print(\"\\nOUTPUTS\")\n        print(f\"DUT o_data  = {hex(dut_data)} \\nMODEL o_data  = {hex(model_data)}\")\n    \n    assert dut_data == model_data,  f\"[ERROR] DUT o_data does not match model o_data: {hex(dut_data)} != {hex(model_data)}\"\n\n@cocotb.test()\nasync def test_aes128_encrypt(dut):\n    \"\"\"Test the aes128_encrypt module with edge cases and random data.\"\"\"\n    cocotb.start_soon(Clock(dut.clk, 10, unit='ns').start())\n\n    model = hrs_lb.aes128_encrypt()\n\n    resets = 4\n    runs = 1000\n\n    data_min = 0\n    data_max = 2**128 - 1\n    \n    await hrs_lb.dut_init(dut)\n\n    for i in range(resets):\n        # Reset DUT\n        # Set all inputs to 0\n        dut.i_start.value      = 0\n        dut.i_update_key.value = 0\n        dut.i_key.value        = 0\n        dut.i_data.value       = 0\n        dut.rst_async_n.value  = 0\n        await RisingEdge(dut.clk)\n        dut.rst_async_n.value  = 1\n        await RisingEdge(dut.clk)\n\n        model.reset()\n\n        compare_values(dut, model)\n\n        for j in range(runs):\n            key = random.randint(data_min, data_max)\n            data = random.randint(data_min, data_max)\n            if j == 0:\n                update_key = 1\n            else:\n                update_key = random.randint(0,1)\n\n            dut.i_update_key.value = update_key\n            dut.i_key.value        = key\n            dut.i_data.value       = data\n            dut.i_start.value      = 1\n\n            if update_key:\n                model.update_key(key)\n            \n            model.update(data)\n\n            await RisingEdge(dut.clk)\n            dut.i_start.value = 0\n            dut.i_key.value   = 0\n            dut.i_update_key.value = 0\n            dut.i_data.value       = 0\n            await RisingEdge(dut.clk)\n            while dut.o_done.value == 0:\n                await RisingEdge(dut.clk)\n\n            compare_values(dut, model)\n", "src/test_runner.py": "import cocotb\nimport os\nimport pytest\nimport random\nfrom cocotb_tools.runner import get_runner\n\n# Environment configuration\nverilog_sources = os.getenv(\"VERILOG_SOURCES\").split()\ntoplevel_lang   = os.getenv(\"TOPLEVEL_LANG\")\nsim             = os.getenv(\"SIM\", \"icarus\")\ntoplevel        = os.getenv(\"TOPLEVEL\")\nmodule          = os.getenv(\"MODULE\")\nwave            = bool(os.getenv(\"WAVE\"))\n\ndef runner():\n    # Configure and run the simulation\n    sim_runner = get_runner(sim)\n    sim_runner.build(\n        sources=verilog_sources,\n        hdl_toplevel=toplevel,\n        always=True,\n        clean=True,\n        verbose=True,\n        timescale=(\"1ns\", \"1ns\"),\n        log_file=\"sim.log\"\n    )\n\n    # Run the test\n    sim_runner.test(hdl_toplevel=toplevel, test_module=module, waves=True)\n\n\ndef test_data():\n    # Run the simulation with specified parameters\n    runner()"}, "context_files": {"verif/tb_aes128_enc.sv": "module tb_aes128_enc;\n\nlocalparam NBW_KEY  = 'd128;\nlocalparam NBW_DATA = 'd128;\n\nlogic                clk;\nlogic                rst_async_n;\nlogic                i_update_key;\nlogic [NBW_KEY-1:0]  i_key;\nlogic                i_start;\nlogic [NBW_DATA-1:0] i_data;\nlogic                o_done;\nlogic [NBW_DATA-1:0] o_data;\n\naes128_encrypt #(\n    .NBW_KEY(NBW_KEY),\n    .NBW_DATA(NBW_DATA)\n) uu_aes128_encrypt (\n    .clk(clk),\n    .rst_async_n(rst_async_n),\n    .i_update_key(i_update_key),\n    .i_key(i_key),\n    .i_start(i_start),\n    .i_data(i_data),\n    .o_done(o_done),\n    .o_data(o_data)\n);\n\ntask Simple_test(logic update_key);\n    @(negedge clk);\n    i_key = 128'h2b7e151628aed2a6abf7158809cf4f3c;\n    i_data = 128'h3243f6a8885a308d313198a2e0370734;\n    i_update_key = update_key;\n    i_start = 1;\n\n    @(negedge clk);\n    i_start = 0;\n    i_update_key = 0;\n    i_key = 0;\n\n    @(posedge o_done);\n    @(negedge clk);\n\n    if(o_data == 128'h3925841d02dc09fbdc118597196a0b32) begin\n        $display(\"PASS\");\n    end else begin\n        $display(\"FAIL\");\n        $display(\"Expected output: %h\", 128'h3925841d02dc09fbdc118597196a0b32);\n        $display(\"Observed output: %h\", o_data);\n    end\nendtask\n\ninitial begin\n    $dumpfile(\"test.vcd\");\n    $dumpvars(0,tb_aes128_enc);\nend\n\nalways #5 clk = ~clk;\n\ninitial begin\n    clk = 0;\n    i_start = 0;\n    rst_async_n = 1;\n    #1;\n    rst_async_n = 0;\n    #2;\n    rst_async_n = 1;\n    @(negedge clk);\n\n    // Tasks go here\n    Simple_test(1'b1);\n    Simple_test(1'b0);\n\n    @(negedge clk);\n    @(negedge clk);\n\n    $finish();\nend\n\nendmodule"}}}
+{"responses_create_params": {"input": [{"role": "system", "content": "You are a language model that has the following file operations available at your disposal:\n  - **List files in a directory** by running one of the following commands: \n    - `ls`\n    - `tree`\n  - **Read files** by using:\n    - `cat <filename>`\n  - **Write files** by using:\n    - `echo <content> > <filename>`\n  - **Compile Verilog** by using `iverilog` such as:\n    - `iverilog -o <output_filename>.out -g2012 <verilog_code_file> <verilog_testbench_file>`\n  - **Run Simulation** by using:\n    - `vvp <output_filename>.out`\n  - **Find current working directory** by using:\n    - `pwd`\n  - **Update the contents of a text file from a old content to new content**\n    - `sed -i  \"problematic_line_number s/problematic_statement/non_problematic_statement/\" Buggy_RTL_code.sv`\n  - **To access a specific line of the file**\n     - `awk 'NR==line_number' file_name.sv`\n\nYou will be given a prompt and your task is to understand it and solve the given issue by using the above-mentioned commands as needed. In the final step, you should create a Linux patch to highlight the necessary file updates to achieve the targeted goal.\n\n  You will solve the problem step by step using the following approach of \n  - thought (thinking process of the step you're going to take)\n  - action (the command you will be running to get more details/context that's helpful to solve the problem)\n  - observation (the output from the action you will observe based on which you will take your next step)\n\n  The last step will be the final output summary and the patch itself in the following format \n  - thought (the summary of what you did and some introduction of the patch file itself)\n  - patch (a Linux-based patch that needs to be applied to reach the relevant solution)"}, {"role": "user", "content": "\nProvide me one answer for this request: The `aes128_decrypt` module in the `rtl` folder performs **AES-128 decryption** by first using the `aes128_key_expansion` module to generate **11 round keys** (one for the initial state and 10 rounds) from the **128-bit cipher key** using a **recursive key expansion process**. It begins by treating the key as **four 32-bit words** (`W[0]` to `W[3]`) and deriving new words using the **previously generated ones**. Every **fourth word (`W[i]`)** undergoes the **key schedule core transformation**, which includes a **byte-wise left rotation (`RotWord`)**, substitution via the **S-box (`SubWord`)**, and XOR of the left-most byte of `SubWord` with a **round constant (`Rcon`)**. The transformed word is XORed with the word from **four positions earlier (`W[i-4]`)** to produce the next word. Each remaining word is generated by XORing the previous word with the word four positions earlier. The key expansion process runs serially, that is, if a key update is required, it first needs to expand it so only then can it perform the decryption operation. This process continues until all **44 words (`W[0]` to `W[43]`)** are generated and grouped into **11 round keys**.\n\n# AES-128 Decryption Overview\n\nAES-128 decryption operation is done in the `aes128_decrypt` module and it is a process that **recovers the original plaintext** from a **128-bit ciphertext** by applying a series of transformations. The **ciphertext** is loaded into a **4\u00d74 state matrix**, which undergoes **11 transformations** (one initial AddRoundKey step and 10 decryption rounds).  \n\n## **Decryption Process**\n\n1. **Initial Step**:  \n   - **AddRoundKey** \u2013 The state matrix is XORed with a round key derived from the original secret key (cipher key).  \n\n2. **9 Main Decryption Rounds** (Repeated 9 times):  \n   - **InvShiftRows** \u2013 The last three rows of the state matrix are **cyclically shifted to the right** by 1, 2, and 3 bytes, respectively, keeping the first row unchanged.  \n   - **InvSubBytes** \u2013 Each byte in the state is replaced using a **fixed substitution table** known as the **inverse AES S-box**, which maps each byte to a new value in a non-linear way.  \n   - **AddRoundKey** \u2013 The state matrix is XORed with a round key derived from the original secret key.  \n   - **InvMixColumns** \u2013 A mathematical transformation that modifies each column of the state by multiplying it with a fixed **4\u00d74 matrix** in **GF(2\u2078)**, ensuring that the data is spread across multiple bytes.  \n\n3. **Final Round** (10th Round, Without InvMixColumns):  \n   - **InvShiftRows**  \n   - **InvSubBytes**  \n   - **AddRoundKey**  \n\n## **Mathematical Basis of InvMixColumns in GF(2\u2078)**  \n\nTo apply **InvMixColumns**, each column of the state matrix is multiplied by the following **fixed matrix**:\n\n|  8'h0E  |  8'h0B  |  8'h0D  |  8'h09  |\n|:-------:|:-------:|:-------:|:-------:|\n|  8'h09  |  8'h0E  |  8'h0B  |  8'h0D  |\n|  8'h0D  |  8'h09  |  8'h0E  |  8'h0B  |\n|  8'h0B  |  8'h0D  |  8'h09  |  8'h0E  |\n\nThis multiplication follows special rules defined in **GF(2\u2078)** (Galois Field of 256 elements), a mathematical structure that allows modular arithmetic on 8-bit values.\n\n  ## **Mathematical Basis in GF(2\u2078)**\n  - **Rijndael\u2019s finite field (GF(2\u2078))** is defined by the **irreducible polynomial**: $`x^8 + x^4 + x^3 + x + 1`$ **or** `0x11B` in hexadecimal. This polynomial is used for modular reduction when performing field operations.  \n  - **Addition in GF(2\u2078)** is simply **bitwise XOR**.  \n  - **Multiplication in GF(2\u2078)** follows standard polynomial multiplication, but results are reduced **modulo ($`x^8 + x^4 + x^3 + x + 1`$)** to ensure results stay within the field.  \n  - To implement multiplication by `{02}` (0x02) in hardware, a left shift (`x << 1`) is used, followed by XOR with `0x1B` if the most significant bit was set (to ensure modular reduction).  \n  - **Multiplication by constants** follows these rules:  \n    - Multiplication by `{0E}` (0x0E) is computed as `{02} \u2295 {04} \u2295 {08}`  \n    - Multiplication by `{0B}` (0x0B) is computed as `{02} \u2295 {08} \u2295 1`  \n    - Multiplication by `{0D}` (0x0D) is computed as `{04} \u2295 {08} \u2295 1`  \n    - Multiplication by `{09}` (0x09) is computed as `{08} \u2295 1`  \n\n  This operation is **skipped in the final round** to maintain proper decryption symmetry.  \n\n- **AddRoundKey** \u2013 XORs the state matrix with the corresponding round key.  \n\nThese rules ensure that **InvMixColumns** correctly modifies the state matrix so that information from each byte is distributed across multiple columns.\n\n## **Final Decryption Steps**\nAfter **10 decryption rounds**, the state matrix is transformed back into the **128-bit plaintext**, completing the AES-128 decryption process.\n\n---\n\nThe key expansion, when asserted by `i_update_key` and `i_start`, must happen before the decryption algorithm, so the proper key is used in the operation. If the stored expanded key is the one that should be used for the decryption, only an `i_start` signal may be asserted and the key will not be updated.\n\nDuring testing, the module failed to produce the expected output, leading to incorrect results. The `sbox`, `inv_sbox` modules and the `Rcon` values were previously tested, and they have no errors.\nThe modules and the testbench are available in the current working directory for debugging, and the expected output is available in the testbench. Could you help debug and fix the RTL to ensure correct functionality? You can change the interface of internal blocks as you see fit, but you may not change the interface from `aes128_decrypt`.\n\n\nName the files as: ['rtl/aes128_decrypt.sv', 'rtl/aes128_key_expansion.sv']."}]}, "verifier_metadata": {"task_id": "cvdp_agentic_AES_encryption_decryption_0005", "categories": ["cid016", "medium"], "difficulty": "medium", "target_files": ["rtl/aes128_decrypt.sv", "rtl/aes128_key_expansion.sv"], "harness_files": {"docker-compose.yml": "services:\n  sanity:\n    image: __OSS_SIM_IMAGE__\n    volumes:\n      - ./src:/src/      \n    working_dir : /code/rundir\n    env_file    : ./src/.env\n    command     : pytest /src/test_runner.py -s -v -o cache_dir=/rundir/harness/.cache\n", "src/.env": "VERILOG_SOURCES = /code/rtl/aes128_decrypt.sv /code/rtl/aes128_key_expansion.sv /code/rtl/sbox.sv /code/rtl/inv_sbox.sv \nTOPLEVEL        = aes128_decrypt\nMODULE          = test_aes128_decrypt\nSIM             = icarus\nTOPLEVEL_LANG   = verilog\nPYTHONPATH      = /src\nHASH            = 5-debug-and-fix-aes128_decrypt\nWAVE            = true", "src/harness_library.py": "import cocotb\nfrom cocotb.triggers import FallingEdge, RisingEdge, Timer\nfrom collections import deque\n\nasync def dut_init(dut):\n    # iterate all the input signals and initialize with 0\n    for signal in dut:\n        try:\n            signal.value = 0\n        except Exception:\n            pass\n\nclass aes128_decrypt:\n    # AES S-Box\n    S_BOX = [\n        0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,\n        0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,\n        0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,\n        0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,\n        0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,\n        0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,\n        0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,\n        0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,\n        0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,\n        0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,\n        0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,\n        0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,\n        0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,\n        0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,\n        0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,\n        0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16\n    ]\n    \n    # AES Rcon (Round constants)\n    RCON = [0x00, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1B, 0x36]\n\n    def __init__(self):\n        self.expanded_key = [0] * 44\n        self.decrypted_data = 0\n        self.inv_s_box = [self.S_BOX.index(i) for i in range(256)]\n    \n    def reset(self):\n        self.expanded_key = [0] * 44\n        self.decrypted_data = 0\n\n    def sub_word(self, word):\n        return (self.S_BOX[(word >> 24) & 0xFF] << 24) |\\\n               (self.S_BOX[(word >> 16) & 0xFF] << 16) |\\\n               (self.S_BOX[(word >> 8) & 0xFF] << 8) |\\\n               (self.S_BOX[word & 0xFF])\n\n    def rot_word(self, word):\n        return ((word << 8) & 0xFFFFFFFF) | (word >> 24)\n\n    def update_key(self, key):\n        assert 0 <= key < (1 << 128), \"Key must be a 128-bit integer.\"\n        \n        for i in range(4):\n            self.expanded_key[i] = (key >> (96 - i * 32)) & 0xFFFFFFFF\n        \n        for i in range(4, 44):\n            temp = self.expanded_key[i - 1]\n            if i % 4 == 0:\n                temp = self.sub_word(self.rot_word(temp)) ^ (self.RCON[i // 4] << 24)\n            self.expanded_key[i] = self.expanded_key[i - 4] ^ temp\n\n    def get_key(self):\n        return sum(self.expanded_key[i] << (32 * (43 - i)) for i in range(44))\n    \n    def decrypt(self, ciphertext):\n        assert 0 <= ciphertext < (1 << 128), \"Ciphertext must be a 128-bit integer.\"\n\n        # Convert ciphertext to 4x4 state matrix (column-major order)\n        state = [[(ciphertext >> (120 - 8 * (i + 4 * j))) & 0xFF for j in range(4)] for i in range(4)]\n\n        # Initial AddRoundKey\n        self.add_round_key(state, 10)\n\n        # 9 main rounds (reverse order)\n        for round in range(9, 0, -1):\n            self.inv_shift_rows(state)\n            self.inv_sub_bytes(state)\n            self.add_round_key(state, round)\n            self.inv_mix_columns(state)\n\n        # Final round (no MixColumns)\n        self.inv_shift_rows(state)\n        self.inv_sub_bytes(state)\n        self.add_round_key(state, 0)\n\n        # Convert state matrix back to 128-bit integer\n        self.decrypted_data = 0\n        for j in range(4):\n            for i in range(4):\n                self.decrypted_data = (self.decrypted_data << 8) | state[i][j]\n    \n    def add_round_key(self, state, round):\n        for j in range(4):\n            word = self.expanded_key[round * 4 + j]\n            for i in range(4):\n                state[i][j] ^= (word >> (24 - 8 * i)) & 0xFF\n\n    def inv_sub_bytes(self, state):\n        for i in range(4):\n            for j in range(4):\n                state[i][j] = self.inv_s_box[state[i][j]]\n\n    def inv_shift_rows(self, state):\n        state[1][0], state[1][1], state[1][2], state[1][3] = state[1][3], state[1][0], state[1][1], state[1][2]\n        state[2][0], state[2][1], state[2][2], state[2][3] = state[2][2], state[2][3], state[2][0], state[2][1]\n        state[3][0], state[3][1], state[3][2], state[3][3] = state[3][1], state[3][2], state[3][3], state[3][0]\n\n    def inv_mix_columns(self, state):\n        for j in range(4):\n            a = state[0][j], state[1][j], state[2][j], state[3][j]\n            state[0][j] = self.gmul(a[0], 0x0E) ^ self.gmul(a[1], 0x0B) ^ self.gmul(a[2], 0x0D) ^ self.gmul(a[3], 0x09)\n            state[1][j] = self.gmul(a[0], 0x09) ^ self.gmul(a[1], 0x0E) ^ self.gmul(a[2], 0x0B) ^ self.gmul(a[3], 0x0D)\n            state[2][j] = self.gmul(a[0], 0x0D) ^ self.gmul(a[1], 0x09) ^ self.gmul(a[2], 0x0E) ^ self.gmul(a[3], 0x0B)\n            state[3][j] = self.gmul(a[0], 0x0B) ^ self.gmul(a[1], 0x0D) ^ self.gmul(a[2], 0x09) ^ self.gmul(a[3], 0x0E)\n\n    def gmul(self, a, b):\n        \"\"\"Galois Field multiplication of two bytes\"\"\"\n        p = 0\n        for _ in range(8):\n            if b & 1:\n                p ^= a\n            hi_bit_set = a & 0x80\n            a = (a << 1) & 0xFF\n            if hi_bit_set:\n                a ^= 0x1B\n            b >>= 1\n        return p\n", "src/test_aes128_decrypt.py": "import cocotb\nfrom cocotb.clock import Clock\nfrom cocotb.triggers import RisingEdge, Timer\nimport harness_library as hrs_lb\nimport random\n\ndef compare_values(dut, model, debug=0):\n    dut_data  = int(dut.o_data.value)\n\n    model_data = model.decrypted_data\n\n    if debug == 1:\n        print(\"\\nOUTPUTS\")\n        print(f\"DUT o_data  = {hex(dut_data)} \\nMODEL o_data  = {hex(model_data)}\")\n    \n    assert dut_data == model_data,  f\"[ERROR] DUT o_data does not match model o_data: {hex(dut_data)} != {hex(model_data)}\"\n\n@cocotb.test()\nasync def test_aes128_decrypt(dut):\n    \"\"\"Test the aes128_decrypt module with edge cases and random data.\"\"\"\n    cocotb.start_soon(Clock(dut.clk, 10, unit='ns').start())\n\n    model = hrs_lb.aes128_decrypt()\n\n    resets = 4\n    runs = 1000\n\n    data_min = 0\n    data_max = 2**128 - 1\n    \n    await hrs_lb.dut_init(dut)\n\n    for i in range(resets):\n        # Reset DUT\n        # Set all inputs to 0\n        dut.i_update_key.value = 0\n        dut.i_start.value      = 0\n        dut.i_key.value        = 0\n        dut.i_data.value       = 0\n        dut.rst_async_n.value  = 0\n        await RisingEdge(dut.clk)\n        dut.rst_async_n.value = 1\n        await RisingEdge(dut.clk)\n\n        model.reset()\n\n        compare_values(dut, model)\n\n        for j in range(runs):\n            if j % 100 == 0:\n                print(f\"Reset {i}, Run {j}\")\n\n            key = random.randint(data_min, data_max)\n            data = random.randint(data_min, data_max)\n\n            dut.i_key.value        = key\n            dut.i_data.value       = data\n            if j == 0:\n                update_key = 1\n            else:\n                update_key = random.randint(0,1)\n            dut.i_update_key.value = update_key\n            dut.i_start.value = 1\n            \n            if update_key:\n                model.update_key(key)\n            \n            model.decrypt(data)\n\n            await RisingEdge(dut.clk)\n            dut.i_update_key.value = 0\n            dut.i_start.value      = 0\n            dut.i_key.value        = 0\n            dut.i_data.value       = 0\n            await RisingEdge(dut.clk)\n            while dut.o_done.value == 0:\n                await RisingEdge(dut.clk)\n            \n            compare_values(dut, model)\n            ", "src/test_runner.py": "import cocotb\nimport os\nimport pytest\nimport random\nfrom cocotb_tools.runner import get_runner\n\n# Environment configuration\nverilog_sources = os.getenv(\"VERILOG_SOURCES\").split()\ntoplevel_lang   = os.getenv(\"TOPLEVEL_LANG\")\nsim             = os.getenv(\"SIM\", \"icarus\")\ntoplevel        = os.getenv(\"TOPLEVEL\")\nmodule          = os.getenv(\"MODULE\")\nwave            = bool(os.getenv(\"WAVE\"))\n\ndef runner():\n    # Configure and run the simulation\n    sim_runner = get_runner(sim)\n    sim_runner.build(\n        sources=verilog_sources,\n        hdl_toplevel=toplevel,\n        always=True,\n        clean=True,\n        verbose=True,\n        timescale=(\"1ns\", \"1ns\"),\n        log_file=\"sim.log\"\n    )\n\n    # Run the test\n    sim_runner.test(hdl_toplevel=toplevel, test_module=module, waves=True)\n\n\ndef test_data():\n    # Run the simulation with specified parameters\n    runner()"}, "context_files": {"rtl/inv_sbox.sv": "module inv_sbox (\n    input  logic [7:0] i_data,\n    output logic [7:0] o_data\n);\n\nalways_comb begin\n    case (i_data)\n        8'h00: o_data = 8'h52;\n        8'h01: o_data = 8'h09;\n        8'h02: o_data = 8'h6a;\n        8'h03: o_data = 8'hd5;\n        8'h04: o_data = 8'h30;\n        8'h05: o_data = 8'h36;\n        8'h06: o_data = 8'ha5;\n        8'h07: o_data = 8'h38;\n        8'h08: o_data = 8'hbf;\n        8'h09: o_data = 8'h40;\n        8'h0a: o_data = 8'ha3;\n        8'h0b: o_data = 8'h9e;\n        8'h0c: o_data = 8'h81;\n        8'h0d: o_data = 8'hf3;\n        8'h0e: o_data = 8'hd7;\n        8'h0f: o_data = 8'hfb;\n        8'h10: o_data = 8'h7c;\n        8'h11: o_data = 8'he3;\n        8'h12: o_data = 8'h39;\n        8'h13: o_data = 8'h82;\n        8'h14: o_data = 8'h9b;\n        8'h15: o_data = 8'h2f;\n        8'h16: o_data = 8'hff;\n        8'h17: o_data = 8'h87;\n        8'h18: o_data = 8'h34;\n        8'h19: o_data = 8'h8e;\n        8'h1a: o_data = 8'h43;\n        8'h1b: o_data = 8'h44;\n        8'h1c: o_data = 8'hc4;\n        8'h1d: o_data = 8'hde;\n        8'h1e: o_data = 8'he9;\n        8'h1f: o_data = 8'hcb;\n        8'h20: o_data = 8'h54;\n        8'h21: o_data = 8'h7b;\n        8'h22: o_data = 8'h94;\n        8'h23: o_data = 8'h32;\n        8'h24: o_data = 8'ha6;\n        8'h25: o_data = 8'hc2;\n        8'h26: o_data = 8'h23;\n        8'h27: o_data = 8'h3d;\n        8'h28: o_data = 8'hee;\n        8'h29: o_data = 8'h4c;\n        8'h2a: o_data = 8'h95;\n        8'h2b: o_data = 8'h0b;\n        8'h2c: o_data = 8'h42;\n        8'h2d: o_data = 8'hfa;\n        8'h2e: o_data = 8'hc3;\n        8'h2f: o_data = 8'h4e;\n        8'h30: o_data = 8'h08;\n        8'h31: o_data = 8'h2e;\n        8'h32: o_data = 8'ha1;\n        8'h33: o_data = 8'h66;\n        8'h34: o_data = 8'h28;\n        8'h35: o_data = 8'hd9;\n        8'h36: o_data = 8'h24;\n        8'h37: o_data = 8'hb2;\n        8'h38: o_data = 8'h76;\n        8'h39: o_data = 8'h5b;\n        8'h3a: o_data = 8'ha2;\n        8'h3b: o_data = 8'h49;\n        8'h3c: o_data = 8'h6d;\n        8'h3d: o_data = 8'h8b;\n        8'h3e: o_data = 8'hd1;\n        8'h3f: o_data = 8'h25;\n        8'h40: o_data = 8'h72;\n        8'h41: o_data = 8'hf8;\n        8'h42: o_data = 8'hf6;\n        8'h43: o_data = 8'h64;\n        8'h44: o_data = 8'h86;\n        8'h45: o_data = 8'h68;\n        8'h46: o_data = 8'h98;\n        8'h47: o_data = 8'h16;\n        8'h48: o_data = 8'hd4;\n        8'h49: o_data = 8'ha4;\n        8'h4a: o_data = 8'h5c;\n        8'h4b: o_data = 8'hcc;\n        8'h4c: o_data = 8'h5d;\n        8'h4d: o_data = 8'h65;\n        8'h4e: o_data = 8'hb6;\n        8'h4f: o_data = 8'h92;\n        8'h50: o_data = 8'h6c;\n        8'h51: o_data = 8'h70;\n        8'h52: o_data = 8'h48;\n        8'h53: o_data = 8'h50;\n        8'h54: o_data = 8'hfd;\n        8'h55: o_data = 8'hed;\n        8'h56: o_data = 8'hb9;\n        8'h57: o_data = 8'hda;\n        8'h58: o_data = 8'h5e;\n        8'h59: o_data = 8'h15;\n        8'h5a: o_data = 8'h46;\n        8'h5b: o_data = 8'h57;\n        8'h5c: o_data = 8'ha7;\n        8'h5d: o_data = 8'h8d;\n        8'h5e: o_data = 8'h9d;\n        8'h5f: o_data = 8'h84;\n        8'h60: o_data = 8'h90;\n        8'h61: o_data = 8'hd8;\n        8'h62: o_data = 8'hab;\n        8'h63: o_data = 8'h00;\n        8'h64: o_data = 8'h8c;\n        8'h65: o_data = 8'hbc;\n        8'h66: o_data = 8'hd3;\n        8'h67: o_data = 8'h0a;\n        8'h68: o_data = 8'hf7;\n        8'h69: o_data = 8'he4;\n        8'h6a: o_data = 8'h58;\n        8'h6b: o_data = 8'h05;\n        8'h6c: o_data = 8'hb8;\n        8'h6d: o_data = 8'hb3;\n        8'h6e: o_data = 8'h45;\n        8'h6f: o_data = 8'h06;\n        8'h70: o_data = 8'hd0;\n        8'h71: o_data = 8'h2c;\n        8'h72: o_data = 8'h1e;\n        8'h73: o_data = 8'h8f;\n        8'h74: o_data = 8'hca;\n        8'h75: o_data = 8'h3f;\n        8'h76: o_data = 8'h0f;\n        8'h77: o_data = 8'h02;\n        8'h78: o_data = 8'hc1;\n        8'h79: o_data = 8'haf;\n        8'h7a: o_data = 8'hbd;\n        8'h7b: o_data = 8'h03;\n        8'h7c: o_data = 8'h01;\n        8'h7d: o_data = 8'h13;\n        8'h7e: o_data = 8'h8a;\n        8'h7f: o_data = 8'h6b;\n        8'h80: o_data = 8'h3a;\n        8'h81: o_data = 8'h91;\n        8'h82: o_data = 8'h11;\n        8'h83: o_data = 8'h41;\n        8'h84: o_data = 8'h4f;\n        8'h85: o_data = 8'h67;\n        8'h86: o_data = 8'hdc;\n        8'h87: o_data = 8'hea;\n        8'h88: o_data = 8'h97;\n        8'h89: o_data = 8'hf2;\n        8'h8a: o_data = 8'hcf;\n        8'h8b: o_data = 8'hce;\n        8'h8c: o_data = 8'hf0;\n        8'h8d: o_data = 8'hb4;\n        8'h8e: o_data = 8'he6;\n        8'h8f: o_data = 8'h73;\n        8'h90: o_data = 8'h96;\n        8'h91: o_data = 8'hac;\n        8'h92: o_data = 8'h74;\n        8'h93: o_data = 8'h22;\n        8'h94: o_data = 8'he7;\n        8'h95: o_data = 8'had;\n        8'h96: o_data = 8'h35;\n        8'h97: o_data = 8'h85;\n        8'h98: o_data = 8'he2;\n        8'h99: o_data = 8'hf9;\n        8'h9a: o_data = 8'h37;\n        8'h9b: o_data = 8'he8;\n        8'h9c: o_data = 8'h1c;\n        8'h9d: o_data = 8'h75;\n        8'h9e: o_data = 8'hdf;\n        8'h9f: o_data = 8'h6e;\n        8'ha0: o_data = 8'h47;\n        8'ha1: o_data = 8'hf1;\n        8'ha2: o_data = 8'h1a;\n        8'ha3: o_data = 8'h71;\n        8'ha4: o_data = 8'h1d;\n        8'ha5: o_data = 8'h29;\n        8'ha6: o_data = 8'hc5;\n        8'ha7: o_data = 8'h89;\n        8'ha8: o_data = 8'h6f;\n        8'ha9: o_data = 8'hb7;\n        8'haa: o_data = 8'h62;\n        8'hab: o_data = 8'h0e;\n        8'hac: o_data = 8'haa;\n        8'had: o_data = 8'h18;\n        8'hae: o_data = 8'hbe;\n        8'haf: o_data = 8'h1b;\n        8'hb0: o_data = 8'hfc;\n        8'hb1: o_data = 8'h56;\n        8'hb2: o_data = 8'h3e;\n        8'hb3: o_data = 8'h4b;\n        8'hb4: o_data = 8'hc6;\n        8'hb5: o_data = 8'hd2;\n        8'hb6: o_data = 8'h79;\n        8'hb7: o_data = 8'h20;\n        8'hb8: o_data = 8'h9a;\n        8'hb9: o_data = 8'hdb;\n        8'hba: o_data = 8'hc0;\n        8'hbb: o_data = 8'hfe;\n        8'hbc: o_data = 8'h78;\n        8'hbd: o_data = 8'hcd;\n        8'hbe: o_data = 8'h5a;\n        8'hbf: o_data = 8'hf4;\n        8'hc0: o_data = 8'h1f;\n        8'hc1: o_data = 8'hdd;\n        8'hc2: o_data = 8'ha8;\n        8'hc3: o_data = 8'h33;\n        8'hc4: o_data = 8'h88;\n        8'hc5: o_data = 8'h07;\n        8'hc6: o_data = 8'hc7;\n        8'hc7: o_data = 8'h31;\n        8'hc8: o_data = 8'hb1;\n        8'hc9: o_data = 8'h12;\n        8'hca: o_data = 8'h10;\n        8'hcb: o_data = 8'h59;\n        8'hcc: o_data = 8'h27;\n        8'hcd: o_data = 8'h80;\n        8'hce: o_data = 8'hec;\n        8'hcf: o_data = 8'h5f;\n        8'hd0: o_data = 8'h60;\n        8'hd1: o_data = 8'h51;\n        8'hd2: o_data = 8'h7f;\n        8'hd3: o_data = 8'ha9;\n        8'hd4: o_data = 8'h19;\n        8'hd5: o_data = 8'hb5;\n        8'hd6: o_data = 8'h4a;\n        8'hd7: o_data = 8'h0d;\n        8'hd8: o_data = 8'h2d;\n        8'hd9: o_data = 8'he5;\n        8'hda: o_data = 8'h7a;\n        8'hdb: o_data = 8'h9f;\n        8'hdc: o_data = 8'h93;\n        8'hdd: o_data = 8'hc9;\n        8'hde: o_data = 8'h9c;\n        8'hdf: o_data = 8'hef;\n        8'he0: o_data = 8'ha0;\n        8'he1: o_data = 8'he0;\n        8'he2: o_data = 8'h3b;\n        8'he3: o_data = 8'h4d;\n        8'he4: o_data = 8'hae;\n        8'he5: o_data = 8'h2a;\n        8'he6: o_data = 8'hf5;\n        8'he7: o_data = 8'hb0;\n        8'he8: o_data = 8'hc8;\n        8'he9: o_data = 8'heb;\n        8'hea: o_data = 8'hbb;\n        8'heb: o_data = 8'h3c;\n        8'hec: o_data = 8'h83;\n        8'hed: o_data = 8'h53;\n        8'hee: o_data = 8'h99;\n        8'hef: o_data = 8'h61;\n        8'hf0: o_data = 8'h17;\n        8'hf1: o_data = 8'h2b;\n        8'hf2: o_data = 8'h04;\n        8'hf3: o_data = 8'h7e;\n        8'hf4: o_data = 8'hba;\n        8'hf5: o_data = 8'h77;\n        8'hf6: o_data = 8'hd6;\n        8'hf7: o_data = 8'h26;\n        8'hf8: o_data = 8'he1;\n        8'hf9: o_data = 8'h69;\n        8'hfa: o_data = 8'h14;\n        8'hfb: o_data = 8'h63;\n        8'hfc: o_data = 8'h55;\n        8'hfd: o_data = 8'h21;\n        8'hfe: o_data = 8'h0c;\n        8'hff: o_data = 8'h7d;\n        default: o_data = 8'h00;\n    endcase\nend\n\nendmodule : inv_sbox", "rtl/sbox.sv": "module sbox (\n    input  logic [7:0] i_data,\n    output logic [7:0] o_data\n);\n\nalways_comb begin\n    case (i_data)\n        8'h00: o_data = 8'h63;\n        8'h01: o_data = 8'h7C;\n        8'h02: o_data = 8'h77;\n        8'h03: o_data = 8'h7B;\n        8'h04: o_data = 8'hF2;\n        8'h05: o_data = 8'h6B;\n        8'h06: o_data = 8'h6F;\n        8'h07: o_data = 8'hC5;\n        8'h08: o_data = 8'h30;\n        8'h09: o_data = 8'h01;\n        8'h0A: o_data = 8'h67;\n        8'h0B: o_data = 8'h2B;\n        8'h0C: o_data = 8'hFE;\n        8'h0D: o_data = 8'hD7;\n        8'h0E: o_data = 8'hAB;\n        8'h0F: o_data = 8'h76;\n        8'h10: o_data = 8'hCA;\n        8'h11: o_data = 8'h82;\n        8'h12: o_data = 8'hC9;\n        8'h13: o_data = 8'h7D;\n        8'h14: o_data = 8'hFA;\n        8'h15: o_data = 8'h59;\n        8'h16: o_data = 8'h47;\n        8'h17: o_data = 8'hF0;\n        8'h18: o_data = 8'hAD;\n        8'h19: o_data = 8'hD4;\n        8'h1A: o_data = 8'hA2;\n        8'h1B: o_data = 8'hAF;\n        8'h1C: o_data = 8'h9C;\n        8'h1D: o_data = 8'hA4;\n        8'h1E: o_data = 8'h72;\n        8'h1F: o_data = 8'hC0;\n        8'h20: o_data = 8'hB7;\n        8'h21: o_data = 8'hFD;\n        8'h22: o_data = 8'h93;\n        8'h23: o_data = 8'h26;\n        8'h24: o_data = 8'h36;\n        8'h25: o_data = 8'h3F;\n        8'h26: o_data = 8'hF7;\n        8'h27: o_data = 8'hCC;\n        8'h28: o_data = 8'h34;\n        8'h29: o_data = 8'hA5;\n        8'h2A: o_data = 8'hE5;\n        8'h2B: o_data = 8'hF1;\n        8'h2C: o_data = 8'h71;\n        8'h2D: o_data = 8'hD8;\n        8'h2E: o_data = 8'h31;\n        8'h2F: o_data = 8'h15;\n        8'h30: o_data = 8'h04;\n        8'h31: o_data = 8'hC7;\n        8'h32: o_data = 8'h23;\n        8'h33: o_data = 8'hC3;\n        8'h34: o_data = 8'h18;\n        8'h35: o_data = 8'h96;\n        8'h36: o_data = 8'h05;\n        8'h37: o_data = 8'h9A;\n        8'h38: o_data = 8'h07;\n        8'h39: o_data = 8'h12;\n        8'h3A: o_data = 8'h80;\n        8'h3B: o_data = 8'hE2;\n        8'h3C: o_data = 8'hEB;\n        8'h3D: o_data = 8'h27;\n        8'h3E: o_data = 8'hB2;\n        8'h3F: o_data = 8'h75;\n        8'h40: o_data = 8'h09;\n        8'h41: o_data = 8'h83;\n        8'h42: o_data = 8'h2C;\n        8'h43: o_data = 8'h1A;\n        8'h44: o_data = 8'h1B;\n        8'h45: o_data = 8'h6E;\n        8'h46: o_data = 8'h5A;\n        8'h47: o_data = 8'hA0;\n        8'h48: o_data = 8'h52;\n        8'h49: o_data = 8'h3B;\n        8'h4A: o_data = 8'hD6;\n        8'h4B: o_data = 8'hB3;\n        8'h4C: o_data = 8'h29;\n        8'h4D: o_data = 8'hE3;\n        8'h4E: o_data = 8'h2F;\n        8'h4F: o_data = 8'h84;\n        8'h50: o_data = 8'h53;\n        8'h51: o_data = 8'hD1;\n        8'h52: o_data = 8'h00;\n        8'h53: o_data = 8'hED;\n        8'h54: o_data = 8'h20;\n        8'h55: o_data = 8'hFC;\n        8'h56: o_data = 8'hB1;\n        8'h57: o_data = 8'h5B;\n        8'h58: o_data = 8'h6A;\n        8'h59: o_data = 8'hCB;\n        8'h5A: o_data = 8'hBE;\n        8'h5B: o_data = 8'h39;\n        8'h5C: o_data = 8'h4A;\n        8'h5D: o_data = 8'h4C;\n        8'h5E: o_data = 8'h58;\n        8'h5F: o_data = 8'hCF;\n        8'h60: o_data = 8'hD0;\n        8'h61: o_data = 8'hEF;\n        8'h62: o_data = 8'hAA;\n        8'h63: o_data = 8'hFB;\n        8'h64: o_data = 8'h43;\n        8'h65: o_data = 8'h4D;\n        8'h66: o_data = 8'h33;\n        8'h67: o_data = 8'h85;\n        8'h68: o_data = 8'h45;\n        8'h69: o_data = 8'hF9;\n        8'h6A: o_data = 8'h02;\n        8'h6B: o_data = 8'h7F;\n        8'h6C: o_data = 8'h50;\n        8'h6D: o_data = 8'h3C;\n        8'h6E: o_data = 8'h9F;\n        8'h6F: o_data = 8'hA8;\n        8'h70: o_data = 8'h51;\n        8'h71: o_data = 8'hA3;\n        8'h72: o_data = 8'h40;\n        8'h73: o_data = 8'h8F;\n        8'h74: o_data = 8'h92;\n        8'h75: o_data = 8'h9D;\n        8'h76: o_data = 8'h38;\n        8'h77: o_data = 8'hF5;\n        8'h78: o_data = 8'hBC;\n        8'h79: o_data = 8'hB6;\n        8'h7A: o_data = 8'hDA;\n        8'h7B: o_data = 8'h21;\n        8'h7C: o_data = 8'h10;\n        8'h7D: o_data = 8'hFF;\n        8'h7E: o_data = 8'hF3;\n        8'h7F: o_data = 8'hD2;\n        8'h80: o_data = 8'hCD;\n        8'h81: o_data = 8'h0C;\n        8'h82: o_data = 8'h13;\n        8'h83: o_data = 8'hEC;\n        8'h84: o_data = 8'h5F;\n        8'h85: o_data = 8'h97;\n        8'h86: o_data = 8'h44;\n        8'h87: o_data = 8'h17;\n        8'h88: o_data = 8'hC4;\n        8'h89: o_data = 8'hA7;\n        8'h8A: o_data = 8'h7E;\n        8'h8B: o_data = 8'h3D;\n        8'h8C: o_data = 8'h64;\n        8'h8D: o_data = 8'h5D;\n        8'h8E: o_data = 8'h19;\n        8'h8F: o_data = 8'h73;\n        8'h90: o_data = 8'h60;\n        8'h91: o_data = 8'h81;\n        8'h92: o_data = 8'h4F;\n        8'h93: o_data = 8'hDC;\n        8'h94: o_data = 8'h22;\n        8'h95: o_data = 8'h2A;\n        8'h96: o_data = 8'h90;\n        8'h97: o_data = 8'h88;\n        8'h98: o_data = 8'h46;\n        8'h99: o_data = 8'hEE;\n        8'h9A: o_data = 8'hB8;\n        8'h9B: o_data = 8'h14;\n        8'h9C: o_data = 8'hDE;\n        8'h9D: o_data = 8'h5E;\n        8'h9E: o_data = 8'h0B;\n        8'h9F: o_data = 8'hDB;\n        8'hA0: o_data = 8'hE0;\n        8'hA1: o_data = 8'h32;\n        8'hA2: o_data = 8'h3A;\n        8'hA3: o_data = 8'h0A;\n        8'hA4: o_data = 8'h49;\n        8'hA5: o_data = 8'h06;\n        8'hA6: o_data = 8'h24;\n        8'hA7: o_data = 8'h5C;\n        8'hA8: o_data = 8'hC2;\n        8'hA9: o_data = 8'hD3;\n        8'hAA: o_data = 8'hAC;\n        8'hAB: o_data = 8'h62;\n        8'hAC: o_data = 8'h91;\n        8'hAD: o_data = 8'h95;\n        8'hAE: o_data = 8'hE4;\n        8'hAF: o_data = 8'h79;\n        8'hB0: o_data = 8'hE7;\n        8'hB1: o_data = 8'hC8;\n        8'hB2: o_data = 8'h37;\n        8'hB3: o_data = 8'h6D;\n        8'hB4: o_data = 8'h8D;\n        8'hB5: o_data = 8'hD5;\n        8'hB6: o_data = 8'h4E;\n        8'hB7: o_data = 8'hA9;\n        8'hB8: o_data = 8'h6C;\n        8'hB9: o_data = 8'h56;\n        8'hBA: o_data = 8'hF4;\n        8'hBB: o_data = 8'hEA;\n        8'hBC: o_data = 8'h65;\n        8'hBD: o_data = 8'h7A;\n        8'hBE: o_data = 8'hAE;\n        8'hBF: o_data = 8'h08;\n        8'hC0: o_data = 8'hBA;\n        8'hC1: o_data = 8'h78;\n        8'hC2: o_data = 8'h25;\n        8'hC3: o_data = 8'h2E;\n        8'hC4: o_data = 8'h1C;\n        8'hC5: o_data = 8'hA6;\n        8'hC6: o_data = 8'hB4;\n        8'hC7: o_data = 8'hC6;\n        8'hC8: o_data = 8'hE8;\n        8'hC9: o_data = 8'hDD;\n        8'hCA: o_data = 8'h74;\n        8'hCB: o_data = 8'h1F;\n        8'hCC: o_data = 8'h4B;\n        8'hCD: o_data = 8'hBD;\n        8'hCE: o_data = 8'h8B;\n        8'hCF: o_data = 8'h8A;\n        8'hD0: o_data = 8'h70;\n        8'hD1: o_data = 8'h3E;\n        8'hD2: o_data = 8'hB5;\n        8'hD3: o_data = 8'h66;\n        8'hD4: o_data = 8'h48;\n        8'hD5: o_data = 8'h03;\n        8'hD6: o_data = 8'hF6;\n        8'hD7: o_data = 8'h0E;\n        8'hD8: o_data = 8'h61;\n        8'hD9: o_data = 8'h35;\n        8'hDA: o_data = 8'h57;\n        8'hDB: o_data = 8'hB9;\n        8'hDC: o_data = 8'h86;\n        8'hDD: o_data = 8'hC1;\n        8'hDE: o_data = 8'h1D;\n        8'hDF: o_data = 8'h9E;\n        8'hE0: o_data = 8'hE1;\n        8'hE1: o_data = 8'hF8;\n        8'hE2: o_data = 8'h98;\n        8'hE3: o_data = 8'h11;\n        8'hE4: o_data = 8'h69;\n        8'hE5: o_data = 8'hD9;\n        8'hE6: o_data = 8'h8E;\n        8'hE7: o_data = 8'h94;\n        8'hE8: o_data = 8'h9B;\n        8'hE9: o_data = 8'h1E;\n        8'hEA: o_data = 8'h87;\n        8'hEB: o_data = 8'hE9;\n        8'hEC: o_data = 8'hCE;\n        8'hED: o_data = 8'h55;\n        8'hEE: o_data = 8'h28;\n        8'hEF: o_data = 8'hDF;\n        8'hF0: o_data = 8'h8C;\n        8'hF1: o_data = 8'hA1;\n        8'hF2: o_data = 8'h89;\n        8'hF3: o_data = 8'h0D;\n        8'hF4: o_data = 8'hBF;\n        8'hF5: o_data = 8'hE6;\n        8'hF6: o_data = 8'h42;\n        8'hF7: o_data = 8'h68;\n        8'hF8: o_data = 8'h41;\n        8'hF9: o_data = 8'h99;\n        8'hFA: o_data = 8'h2D;\n        8'hFB: o_data = 8'h0F;\n        8'hFC: o_data = 8'hB0;\n        8'hFD: o_data = 8'h54;\n        8'hFE: o_data = 8'hBB;\n        8'hFF: o_data = 8'h16;\n        default: o_data = 8'h00;\n    endcase\nend\n\nendmodule : sbox", "verif/tb_aes128_dec.sv": "module tb_aes128_dec;\n\nlocalparam NBW_KEY  = 'd128;\nlocalparam NBW_DATA = 'd128;\n\nlogic                clk;\nlogic                rst_async_n;\nlogic                i_update_key;\nlogic [NBW_KEY-1:0]  i_key;\nlogic                i_start;\nlogic [NBW_DATA-1:0] i_data;\nlogic                o_done;\nlogic [NBW_DATA-1:0] o_data;\n\naes128_decrypt #(\n    .NBW_KEY (NBW_KEY),\n    .NBW_DATA(NBW_DATA)\n) uu_aes128_decrypt (\n    .clk(clk),\n    .rst_async_n(rst_async_n),\n    .i_update_key(i_update_key),\n    .i_key(i_key),\n    .i_start(i_start),\n    .i_data(i_data),\n    .o_done(o_done),\n    .o_data(o_data)\n);\n\ntask Simple_test(logic update_key);\n    @(negedge clk);\n    i_key = 128'h2b7e151628aed2a6abf7158809cf4f3c;\n    i_data = 128'h3925841d02dc09fbdc118597196a0b32;\n    // Data is stored in the RTL as a 4x4 matrix. With this i_data, the matrix should be:\n    // Col  : 0  | 1  | 2  | 3\n    //       -------------------\n    // Row 0: 39 | 02 | dc | 19\n    //       -------------------\n    // Row 1: 25 | dc | 11 | 6a\n    //       -------------------\n    // Row 2: 84 | 09 | 85 | 0b\n    //       -------------------\n    // Row 3: 1d | fb | 97 | 32\n    //       -------------------\n\n    i_update_key = update_key;\n    i_start = 1;\n\n    @(negedge clk);\n    i_start = 0;\n    i_update_key = 0;\n    i_key = 0;\n    i_data = 0;\n\n    @(posedge o_done);\n    @(negedge clk);\n\n    if(o_data == 128'h3243f6a8885a308d313198a2e0370734) begin\n        $display(\"PASS\");\n    end else begin\n        $display(\"FAIL\");\n        $display(\"Expected output: %h\", 128'h3243f6a8885a308d313198a2e0370734);\n        $display(\"Observed output: %h\", o_data);\n    end\nendtask\n\ninitial begin\n    $dumpfile(\"test.vcd\");\n    $dumpvars(0,tb_aes128_dec);\nend\n\nalways #5 clk = ~clk;\n\ninitial begin\n    clk = 0;\n    i_start = 0;\n    rst_async_n = 1;\n    #1;\n    rst_async_n = 0;\n    #2;\n    rst_async_n = 1;\n    @(negedge clk);\n\n    // Tasks go here\n    Simple_test(1'b1);\n    Simple_test(1'b0);\n\n    @(negedge clk);\n    @(negedge clk);\n\n    $finish();\nend\n\nendmodule"}}}
+{"responses_create_params": {"input": [{"role": "system", "content": "You are a language model that has the following file operations available at your disposal:\n  - **List files in a directory** by running one of the following commands: \n    - `ls`\n    - `tree`\n  - **Read files** by using:\n    - `cat <filename>`\n  - **Write files** by using:\n    - `echo <content> > <filename>`\n  - **Compile Verilog** by using `iverilog` such as:\n    - `iverilog -o <output_filename>.out -g2012 <verilog_code_file> <verilog_testbench_file>`\n  - **Run Simulation** by using:\n    - `vvp <output_filename>.out`\n  - **Find current working directory** by using:\n    - `pwd`\n  - **Update the contents of a text file from a old content to new content**\n    - `sed -i  \"line_number s/old_statement/new_statement/\" file.sv`\n  - **To access a specific line of the file**\n     - `awk 'NR==line_number' file_name.sv`\n\nYou will be given a prompt and your task is to understand it and solve the given issue by using the above-mentioned commands as needed. In the final step, you should create a Linux patch to highlight the necessary file updates to achieve the targeted goal.\n\n  You will solve the problem step by step using the following approach of \n  - thought (thinking process of the step you're going to take)\n  - action (the command you will be running to get more details/context that's helpful to solve the problem)\n  - observation (the output from the action you will observe based on which you will take your next step)\n\n  The last step will be the final output summary and the patch itself in the following format \n  - thought (the summary of what you did and some introduction of the patch file itself)\n  - patch (a Linux-based patch that needs to be applied to reach the relevant solution)"}, {"role": "user", "content": "\nProvide me one answer for this request: Modify the `aes_encrypt` module in the `rtl` directory, which originally performs an AES-128 encryption, to perform only an AES-256 encryption. A testbench to test the updated design is provided in `verif` directory, and the `sbox` module does not need to be changed. The AES-128 version takes a 128-bit key and a 128-bit data and encrypts it, while the AES-256 version receives a 256-bit key and a 128-bit data and encrypts it. Below is a description of the changes that need to be made:\n\n### 1. **Update Interface Parameters**\n\n- Change the key input size from 128 to 256 bits: Instead of copying 4 32-bit words into the first part of the expanded key, copy 8 32-bit words from the 256-bit input key.\n\n### 2. **Modify Key Expansion Loop**\n\n- In AES-128, for each 32-bit word `w[i]` where `i` is a multiple of `4`, you apply:\n  - For each `i >= 4`:\n    - `Temp = RotWord(w[i-1])`\n    - `Temp = SubWord(Temp)`\n    - `Temp = Temp XOR Rcon[i/4 - 1]`\n    - `w[i] = w[i - 4] XOR Temp`\n\n(`Temp` is used to demonstrate intermediate calculation storage during each step of calculation)\n\n- In **AES-256**, the logic changes:\n  - For each `i >= 8`:\n    - If `i % 8 == 0`:\n      - `Temp = RotWord(w[i-1])`\n      - `Temp = SubWord(Temp)`\n      - `Temp = Temp XOR Rcon[i/8 - 1]`\n    - Else if `i % 8 == 4`:\n      - `Temp = SubWord(w[i-1])`\n      - **No rotation, no Rcon**\n    - Else:\n      - `Temp = w[i-1]`\n    - Then:\n      - `w[i] = w[i - 8] XOR Temp`\n\nMake sure to implement this conditional branching properly in the loop.\n\n### 3. **Rcon Handling**\n\n- Rcon is only applied when `i % 8 == 0` (i.e., every 8 words in AES-256).\n- Do **not** apply Rcon when `i % 8 == 4`.\n- **If any Rcon value is not needed, remove it from the code**.\n\n### 4. **Update Encryption Flow**\n\n- **Increase round counter** to go up to 14.\n- **Expand the key schedule** to generate and store **15 round keys**, each 128 bits (i.e., 240 bytes or 60 words of 32 bits total).\n- Update loops that iterate over rounds so they only use 128 bits of the expanded key for each round.\n\n### 5. **Initial Round Key Addition**\n- Ensure the first round key is generated correctly from the first 128 bits of the expanded 256-bit key.\n\n### 6. **Internal Buffers and Registers**\n- Update the size of any registers or memory arrays that store round keys from 44 32-bit words (AES-128) to 60 32-bit words (AES-256)\n\n\n\nPlease provide your response as plain text without any JSON formatting. Your response will be saved directly to: rtl/aes_encrypt.sv."}]}, "verifier_metadata": {"task_id": "cvdp_agentic_AES_encryption_decryption_0009", "categories": ["cid004", "medium"], "difficulty": "medium", "target_files": ["rtl/aes_encrypt.sv"], "harness_files": {"docker-compose.yml": "services:\n  sanity:\n    image: __OSS_SIM_IMAGE__\n    volumes:\n      - ./src:/src/      \n    working_dir : /code/rundir\n    env_file    : ./src/.env\n    command     : pytest /src/test_runner.py -s -v -o cache_dir=/rundir/harness/.cache\n", "src/.env": "VERILOG_SOURCES = /code/rtl/aes_encrypt.sv /code/rtl/sbox.sv\nTOPLEVEL        = aes_encrypt\nMODULE          = test_aes_encrypt\nSIM             = icarus\nTOPLEVEL_LANG   = verilog\nPYTHONPATH      = /src\nHASH            = 9-modify-aes-encrypt-to-256\nWAVE            = true", "src/harness_library.py": "import cocotb\nfrom cocotb.triggers import FallingEdge, RisingEdge, Timer\nfrom collections import deque\n\nasync def dut_init(dut):\n    # iterate all the input signals and initialize with 0\n    for signal in dut:\n        try:\n            signal.value = 0\n        except Exception:\n            pass\n\nclass aes_encrypt:\n    RCON = [\n        0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1B, 0x36\n    ]\n    \n    SBOX = [\n        # S-box table used in AES\n        0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,\n        0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,\n        0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,\n        0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,\n        0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,\n        0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,\n        0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,\n        0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,\n        0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,\n        0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,\n        0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,\n        0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,\n        0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,\n        0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,\n        0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,\n        0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16\n    ]\n    \n    def __init__(self):\n        self.expanded_key = 0\n        self.data_out = 0\n    \n    def reset(self):\n        self.expanded_key = 0\n        self.data_out = 0\n    \n    def update_key(self, key):\n        key_bytes = key.to_bytes(32, 'big')  # Convert 256-bit key to bytes\n        self.expanded_key = self.expand_key(key_bytes)\n    \n    def expand_key(self, key):\n        words = [list(key[i:i+4]) for i in range(0, 32, 4)]\n        \n        for i in range(8, 60):\n            temp = words[i - 1]\n            \n            if i % 8 == 0:\n                temp = self.sub_word(self.rot_word(temp))\n                temp[0] ^= self.RCON[i // 8 - 1]\n            elif i % 8 == 4:\n                temp = self.sub_word(temp)\n            \n            words.append([words[i - 8][j] ^ temp[j] for j in range(4)])\n        \n        expanded_key_bytes = b''.join(bytes(word) for word in words)\n        return int.from_bytes(expanded_key_bytes, 'big')\n    \n    def sub_word(self, word):\n        return [self.SBOX[b] for b in word]\n    \n    def rot_word(self, word):\n        return word[1:] + word[:1]\n    \n    def gmul(self, a, b):\n        p = 0\n        for _ in range(8):\n            if b & 1:\n                p ^= a\n            hi = a & 0x80\n            a = (a << 1) & 0xFF\n            if hi:\n                a ^= 0x1b\n            b >>= 1\n        return p\n\n    def sub_bytes(self, state):\n        for i in range(16):\n            state[i] = self.SBOX[state[i]]\n\n    def shift_rows(self, state):\n        state[1], state[5], state[9], state[13] = state[5], state[9], state[13], state[1]\n        state[2], state[6], state[10], state[14] = state[10], state[14], state[2], state[6]\n        state[3], state[7], state[11], state[15] = state[15], state[3], state[7], state[11]\n\n    def mix_columns(self, s):\n        for i in range(4):\n            a = s[i*4:(i+1)*4]\n            s[i*4+0] = self.gmul(a[0],2)^self.gmul(a[1],3)^a[2]^a[3]\n            s[i*4+1] = a[0]^self.gmul(a[1],2)^self.gmul(a[2],3)^a[3]\n            s[i*4+2] = a[0]^a[1]^self.gmul(a[2],2)^self.gmul(a[3],3)\n            s[i*4+3] = self.gmul(a[0],3)^a[1]^a[2]^self.gmul(a[3],2)\n\n    def add_round_key(self, state, round_key_words):\n        for col in range(4):\n            word = round_key_words[col]\n            for row in range(4):\n                state[col * 4 + row] ^= (word >> (24 - 8 * row)) & 0xFF\n\n    def get_round_keys(self):\n        expanded_bytes = self.expanded_key.to_bytes(240, 'big')\n        round_keys = []\n        for i in range(0, 240, 16):  # Each round key is 16 bytes (4 words)\n            words = [int.from_bytes(expanded_bytes[i + j*4 : i + (j+1)*4], 'big') for j in range(4)]\n            round_keys.append(words)\n        return round_keys\n\n    def encrypt(self, data):\n        state = [(data >> (8 * (15 - i))) & 0xFF for i in range(16)]\n        round_keys = self.get_round_keys()\n        \n        self.add_round_key(state, round_keys[0])\n\n        for rnd in range(1, 14):\n            self.sub_bytes(state)\n            self.shift_rows(state)\n            self.mix_columns(state)\n            self.add_round_key(state, round_keys[rnd])\n\n        self.sub_bytes(state)\n        self.shift_rows(state)\n        self.add_round_key(state, round_keys[14])\n\n        self.data_out = 0\n        for b in state:\n            self.data_out = (self.data_out << 8) | b\n", "src/test_aes_encrypt.py": "import cocotb\nfrom cocotb.clock import Clock\nfrom cocotb.triggers import RisingEdge, Timer\nimport harness_library as hrs_lb\nimport random\n\ndef compare_values(dut, model, debug=0):\n    dut_data  = int(dut.o_data.value)\n\n    model_data = model.data_out\n\n    if debug == 1:\n        print(\"\\nOUTPUTS\")\n        print(f\"DUT o_data  = {hex(dut_data)} \\nMODEL o_data  = {hex(model_data)}\")\n    \n    assert dut_data == model_data,  f\"[ERROR] DUT o_data does not match model o_data: {hex(dut_data)} != {hex(model_data)}\"\n\n@cocotb.test()\nasync def test_aes_encrypt(dut):\n    \"\"\"Test the aes_encrypt module with edge cases and random data.\"\"\"\n    cocotb.start_soon(Clock(dut.clk, 10, unit='ns').start())\n\n    model = hrs_lb.aes_encrypt()\n\n    resets = 4\n    runs = 1000\n\n    data_min = 0\n    data_max = 2**128 - 1\n\n    key_min = 0\n    key_max = 2**256 - 1\n    \n    await hrs_lb.dut_init(dut)\n\n    for i in range(resets):\n        # Reset DUT\n        # Set all inputs to 0\n        dut.i_update_key.value = 0\n        dut.i_key.value        = 0\n        dut.i_start.value      = 0\n        dut.i_data.value       = 0\n        dut.rst_async_n.value  = 0\n        await RisingEdge(dut.clk)\n        dut.rst_async_n.value  = 1\n        await RisingEdge(dut.clk)\n\n        model.reset()\n\n        compare_values(dut, model)\n\n        for j in range(runs):\n            if j%100 == 0:\n                print(f'Reset {i}, run {j}')\n                \n            data = random.randint(data_min, data_max)\n            key = random.randint(key_min, key_max)\n            if j == 0:\n                update_key = 1\n            else:\n                update_key = random.randint(0,1)\n            \n            dut.i_update_key.value = update_key\n            dut.i_start.value      = 1\n            dut.i_key.value        = key\n            dut.i_data.value       = data\n\n            if update_key == 1:\n                model.update_key(key)\n            \n            model.encrypt(data)\n\n            await RisingEdge(dut.clk)\n            dut.i_update_key.value = 0\n            dut.i_start.value      = 0\n            dut.i_data.value       = 0\n            dut.i_key.value        = 0\n            await RisingEdge(dut.clk)\n            while dut.o_done.value == 0:\n                await RisingEdge(dut.clk)\n            \n            compare_values(dut, model)\n            ", "src/test_runner.py": "import cocotb\nimport os\nimport pytest\nimport random\nfrom cocotb_tools.runner import get_runner\n\n# Environment configuration\nverilog_sources = os.getenv(\"VERILOG_SOURCES\").split()\ntoplevel_lang   = os.getenv(\"TOPLEVEL_LANG\")\nsim             = os.getenv(\"SIM\", \"icarus\")\ntoplevel        = os.getenv(\"TOPLEVEL\")\nmodule          = os.getenv(\"MODULE\")\nwave            = bool(os.getenv(\"WAVE\"))\n\ndef runner():\n    # Configure and run the simulation\n    sim_runner = get_runner(sim)\n    sim_runner.build(\n        sources=verilog_sources,\n        hdl_toplevel=toplevel,\n        always=True,\n        clean=True,\n        verbose=True,\n        timescale=(\"1ns\", \"1ns\"),\n        log_file=\"sim.log\"\n    )\n\n    # Run the test\n    sim_runner.test(hdl_toplevel=toplevel, test_module=module, waves=True)\n\n\ndef test_data():\n    # Run the simulation with specified parameters\n    runner()"}, "context_files": {"rtl/sbox.sv": "module sbox (\n    input  logic [7:0] i_data,\n    output logic [7:0] o_data\n);\n\nalways_comb begin\n    case (i_data)\n        8'h00: o_data = 8'h63;\n        8'h01: o_data = 8'h7C;\n        8'h02: o_data = 8'h77;\n        8'h03: o_data = 8'h7B;\n        8'h04: o_data = 8'hF2;\n        8'h05: o_data = 8'h6B;\n        8'h06: o_data = 8'h6F;\n        8'h07: o_data = 8'hC5;\n        8'h08: o_data = 8'h30;\n        8'h09: o_data = 8'h01;\n        8'h0A: o_data = 8'h67;\n        8'h0B: o_data = 8'h2B;\n        8'h0C: o_data = 8'hFE;\n        8'h0D: o_data = 8'hD7;\n        8'h0E: o_data = 8'hAB;\n        8'h0F: o_data = 8'h76;\n        8'h10: o_data = 8'hCA;\n        8'h11: o_data = 8'h82;\n        8'h12: o_data = 8'hC9;\n        8'h13: o_data = 8'h7D;\n        8'h14: o_data = 8'hFA;\n        8'h15: o_data = 8'h59;\n        8'h16: o_data = 8'h47;\n        8'h17: o_data = 8'hF0;\n        8'h18: o_data = 8'hAD;\n        8'h19: o_data = 8'hD4;\n        8'h1A: o_data = 8'hA2;\n        8'h1B: o_data = 8'hAF;\n        8'h1C: o_data = 8'h9C;\n        8'h1D: o_data = 8'hA4;\n        8'h1E: o_data = 8'h72;\n        8'h1F: o_data = 8'hC0;\n        8'h20: o_data = 8'hB7;\n        8'h21: o_data = 8'hFD;\n        8'h22: o_data = 8'h93;\n        8'h23: o_data = 8'h26;\n        8'h24: o_data = 8'h36;\n        8'h25: o_data = 8'h3F;\n        8'h26: o_data = 8'hF7;\n        8'h27: o_data = 8'hCC;\n        8'h28: o_data = 8'h34;\n        8'h29: o_data = 8'hA5;\n        8'h2A: o_data = 8'hE5;\n        8'h2B: o_data = 8'hF1;\n        8'h2C: o_data = 8'h71;\n        8'h2D: o_data = 8'hD8;\n        8'h2E: o_data = 8'h31;\n        8'h2F: o_data = 8'h15;\n        8'h30: o_data = 8'h04;\n        8'h31: o_data = 8'hC7;\n        8'h32: o_data = 8'h23;\n        8'h33: o_data = 8'hC3;\n        8'h34: o_data = 8'h18;\n        8'h35: o_data = 8'h96;\n        8'h36: o_data = 8'h05;\n        8'h37: o_data = 8'h9A;\n        8'h38: o_data = 8'h07;\n        8'h39: o_data = 8'h12;\n        8'h3A: o_data = 8'h80;\n        8'h3B: o_data = 8'hE2;\n        8'h3C: o_data = 8'hEB;\n        8'h3D: o_data = 8'h27;\n        8'h3E: o_data = 8'hB2;\n        8'h3F: o_data = 8'h75;\n        8'h40: o_data = 8'h09;\n        8'h41: o_data = 8'h83;\n        8'h42: o_data = 8'h2C;\n        8'h43: o_data = 8'h1A;\n        8'h44: o_data = 8'h1B;\n        8'h45: o_data = 8'h6E;\n        8'h46: o_data = 8'h5A;\n        8'h47: o_data = 8'hA0;\n        8'h48: o_data = 8'h52;\n        8'h49: o_data = 8'h3B;\n        8'h4A: o_data = 8'hD6;\n        8'h4B: o_data = 8'hB3;\n        8'h4C: o_data = 8'h29;\n        8'h4D: o_data = 8'hE3;\n        8'h4E: o_data = 8'h2F;\n        8'h4F: o_data = 8'h84;\n        8'h50: o_data = 8'h53;\n        8'h51: o_data = 8'hD1;\n        8'h52: o_data = 8'h00;\n        8'h53: o_data = 8'hED;\n        8'h54: o_data = 8'h20;\n        8'h55: o_data = 8'hFC;\n        8'h56: o_data = 8'hB1;\n        8'h57: o_data = 8'h5B;\n        8'h58: o_data = 8'h6A;\n        8'h59: o_data = 8'hCB;\n        8'h5A: o_data = 8'hBE;\n        8'h5B: o_data = 8'h39;\n        8'h5C: o_data = 8'h4A;\n        8'h5D: o_data = 8'h4C;\n        8'h5E: o_data = 8'h58;\n        8'h5F: o_data = 8'hCF;\n        8'h60: o_data = 8'hD0;\n        8'h61: o_data = 8'hEF;\n        8'h62: o_data = 8'hAA;\n        8'h63: o_data = 8'hFB;\n        8'h64: o_data = 8'h43;\n        8'h65: o_data = 8'h4D;\n        8'h66: o_data = 8'h33;\n        8'h67: o_data = 8'h85;\n        8'h68: o_data = 8'h45;\n        8'h69: o_data = 8'hF9;\n        8'h6A: o_data = 8'h02;\n        8'h6B: o_data = 8'h7F;\n        8'h6C: o_data = 8'h50;\n        8'h6D: o_data = 8'h3C;\n        8'h6E: o_data = 8'h9F;\n        8'h6F: o_data = 8'hA8;\n        8'h70: o_data = 8'h51;\n        8'h71: o_data = 8'hA3;\n        8'h72: o_data = 8'h40;\n        8'h73: o_data = 8'h8F;\n        8'h74: o_data = 8'h92;\n        8'h75: o_data = 8'h9D;\n        8'h76: o_data = 8'h38;\n        8'h77: o_data = 8'hF5;\n        8'h78: o_data = 8'hBC;\n        8'h79: o_data = 8'hB6;\n        8'h7A: o_data = 8'hDA;\n        8'h7B: o_data = 8'h21;\n        8'h7C: o_data = 8'h10;\n        8'h7D: o_data = 8'hFF;\n        8'h7E: o_data = 8'hF3;\n        8'h7F: o_data = 8'hD2;\n        8'h80: o_data = 8'hCD;\n        8'h81: o_data = 8'h0C;\n        8'h82: o_data = 8'h13;\n        8'h83: o_data = 8'hEC;\n        8'h84: o_data = 8'h5F;\n        8'h85: o_data = 8'h97;\n        8'h86: o_data = 8'h44;\n        8'h87: o_data = 8'h17;\n        8'h88: o_data = 8'hC4;\n        8'h89: o_data = 8'hA7;\n        8'h8A: o_data = 8'h7E;\n        8'h8B: o_data = 8'h3D;\n        8'h8C: o_data = 8'h64;\n        8'h8D: o_data = 8'h5D;\n        8'h8E: o_data = 8'h19;\n        8'h8F: o_data = 8'h73;\n        8'h90: o_data = 8'h60;\n        8'h91: o_data = 8'h81;\n        8'h92: o_data = 8'h4F;\n        8'h93: o_data = 8'hDC;\n        8'h94: o_data = 8'h22;\n        8'h95: o_data = 8'h2A;\n        8'h96: o_data = 8'h90;\n        8'h97: o_data = 8'h88;\n        8'h98: o_data = 8'h46;\n        8'h99: o_data = 8'hEE;\n        8'h9A: o_data = 8'hB8;\n        8'h9B: o_data = 8'h14;\n        8'h9C: o_data = 8'hDE;\n        8'h9D: o_data = 8'h5E;\n        8'h9E: o_data = 8'h0B;\n        8'h9F: o_data = 8'hDB;\n        8'hA0: o_data = 8'hE0;\n        8'hA1: o_data = 8'h32;\n        8'hA2: o_data = 8'h3A;\n        8'hA3: o_data = 8'h0A;\n        8'hA4: o_data = 8'h49;\n        8'hA5: o_data = 8'h06;\n        8'hA6: o_data = 8'h24;\n        8'hA7: o_data = 8'h5C;\n        8'hA8: o_data = 8'hC2;\n        8'hA9: o_data = 8'hD3;\n        8'hAA: o_data = 8'hAC;\n        8'hAB: o_data = 8'h62;\n        8'hAC: o_data = 8'h91;\n        8'hAD: o_data = 8'h95;\n        8'hAE: o_data = 8'hE4;\n        8'hAF: o_data = 8'h79;\n        8'hB0: o_data = 8'hE7;\n        8'hB1: o_data = 8'hC8;\n        8'hB2: o_data = 8'h37;\n        8'hB3: o_data = 8'h6D;\n        8'hB4: o_data = 8'h8D;\n        8'hB5: o_data = 8'hD5;\n        8'hB6: o_data = 8'h4E;\n        8'hB7: o_data = 8'hA9;\n        8'hB8: o_data = 8'h6C;\n        8'hB9: o_data = 8'h56;\n        8'hBA: o_data = 8'hF4;\n        8'hBB: o_data = 8'hEA;\n        8'hBC: o_data = 8'h65;\n        8'hBD: o_data = 8'h7A;\n        8'hBE: o_data = 8'hAE;\n        8'hBF: o_data = 8'h08;\n        8'hC0: o_data = 8'hBA;\n        8'hC1: o_data = 8'h78;\n        8'hC2: o_data = 8'h25;\n        8'hC3: o_data = 8'h2E;\n        8'hC4: o_data = 8'h1C;\n        8'hC5: o_data = 8'hA6;\n        8'hC6: o_data = 8'hB4;\n        8'hC7: o_data = 8'hC6;\n        8'hC8: o_data = 8'hE8;\n        8'hC9: o_data = 8'hDD;\n        8'hCA: o_data = 8'h74;\n        8'hCB: o_data = 8'h1F;\n        8'hCC: o_data = 8'h4B;\n        8'hCD: o_data = 8'hBD;\n        8'hCE: o_data = 8'h8B;\n        8'hCF: o_data = 8'h8A;\n        8'hD0: o_data = 8'h70;\n        8'hD1: o_data = 8'h3E;\n        8'hD2: o_data = 8'hB5;\n        8'hD3: o_data = 8'h66;\n        8'hD4: o_data = 8'h48;\n        8'hD5: o_data = 8'h03;\n        8'hD6: o_data = 8'hF6;\n        8'hD7: o_data = 8'h0E;\n        8'hD8: o_data = 8'h61;\n        8'hD9: o_data = 8'h35;\n        8'hDA: o_data = 8'h57;\n        8'hDB: o_data = 8'hB9;\n        8'hDC: o_data = 8'h86;\n        8'hDD: o_data = 8'hC1;\n        8'hDE: o_data = 8'h1D;\n        8'hDF: o_data = 8'h9E;\n        8'hE0: o_data = 8'hE1;\n        8'hE1: o_data = 8'hF8;\n        8'hE2: o_data = 8'h98;\n        8'hE3: o_data = 8'h11;\n        8'hE4: o_data = 8'h69;\n        8'hE5: o_data = 8'hD9;\n        8'hE6: o_data = 8'h8E;\n        8'hE7: o_data = 8'h94;\n        8'hE8: o_data = 8'h9B;\n        8'hE9: o_data = 8'h1E;\n        8'hEA: o_data = 8'h87;\n        8'hEB: o_data = 8'hE9;\n        8'hEC: o_data = 8'hCE;\n        8'hED: o_data = 8'h55;\n        8'hEE: o_data = 8'h28;\n        8'hEF: o_data = 8'hDF;\n        8'hF0: o_data = 8'h8C;\n        8'hF1: o_data = 8'hA1;\n        8'hF2: o_data = 8'h89;\n        8'hF3: o_data = 8'h0D;\n        8'hF4: o_data = 8'hBF;\n        8'hF5: o_data = 8'hE6;\n        8'hF6: o_data = 8'h42;\n        8'hF7: o_data = 8'h68;\n        8'hF8: o_data = 8'h41;\n        8'hF9: o_data = 8'h99;\n        8'hFA: o_data = 8'h2D;\n        8'hFB: o_data = 8'h0F;\n        8'hFC: o_data = 8'hB0;\n        8'hFD: o_data = 8'h54;\n        8'hFE: o_data = 8'hBB;\n        8'hFF: o_data = 8'h16;\n        default: o_data = 8'h00;\n    endcase\nend\n\nendmodule : sbox", "verif/tb_aes_encrypt.sv": "module tb_aes_encrypt;\n\nlocalparam NBW_KEY  = 'd256;\nlocalparam NBW_DATA = 'd128;\n\nlogic                clk;\nlogic                rst_async_n;\nlogic                i_update_key;\nlogic [NBW_KEY-1:0]  i_key;\nlogic                i_start;\nlogic [NBW_DATA-1:0] i_data;\nlogic                o_done;\nlogic [NBW_DATA-1:0] o_data;\n\naes_encrypt #(\n    .NBW_KEY(NBW_KEY),\n    .NBW_DATA(NBW_DATA)\n) uu_aes_encrypt (\n    .clk(clk),\n    .rst_async_n(rst_async_n),\n    .i_update_key(i_update_key),\n    .i_key(i_key),\n    .i_start(i_start),\n    .i_data(i_data),\n    .o_done(o_done),\n    .o_data(o_data)\n);\n\ntask Simple_test(logic update_key);\n    @(negedge clk);\n    i_key  = 256'h000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f;\n    i_data = 128'h00112233445566778899aabbccddeeff;\n    i_update_key = update_key;\n    i_start = 1;\n\n    @(negedge clk);\n    i_start = 0;\n    i_update_key = 0;\n    i_key = 0;\n\n    @(posedge o_done);\n    @(negedge clk);\n\n    if(o_data == 128'h8ea2b7ca516745bfeafc49904b496089) begin\n        $display(\"PASS\");\n    end else begin\n        $display(\"FAIL\");\n        $display(\"Expected output: %h\", 128'h8ea2b7ca516745bfeafc49904b496089);\n        $display(\"Observed output: %h\", o_data);\n    end\nendtask\n\ninitial begin\n    $dumpfile(\"test.vcd\");\n    $dumpvars(0,tb_aes_encrypt);\nend\n\nalways #5 clk = ~clk;\n\ninitial begin\n    clk = 0;\n    i_start = 0;\n    rst_async_n = 1;\n    #1;\n    rst_async_n = 0;\n    #2;\n    rst_async_n = 1;\n    @(negedge clk);\n\n    // Tasks go here\n    Simple_test(1'b1);\n    Simple_test(1'b0);\n\n    @(negedge clk);\n    @(negedge clk);\n\n    $finish();\nend\n\nendmodule"}}}
+{"responses_create_params": {"input": [{"role": "system", "content": "You are a language model that has the following file operations available at your disposal:\n  - **List files in a directory** by running one of the following commands: \n    - `ls`\n    - `tree`\n  - **Read files** by using:\n    - `cat <filename>`\n  - **Write files** by using:\n    - `echo <content> > <filename>`\n  - **Compile Verilog** by using `iverilog` such as:\n    - `iverilog -o <output_filename>.out -g2012 <verilog_code_file> <verilog_testbench_file>`\n  - **Run Simulation** by using:\n    - `vvp <output_filename>.out`\n  - **Find current working directory** by using:\n    - `pwd`\n  - **Update the contents of a text file from a old content to new content**\n    - `sed -i  \"line_number s/old_statement/new_statement/\" file.sv`\n  - **To access a specific line of the file**\n     - `awk 'NR==line_number' file_name.sv`\n\nYou will be given a prompt and your task is to understand it and solve the given issue by using the above-mentioned commands as needed. In the final step, you should create a Linux patch to highlight the necessary file updates to achieve the targeted goal.\n\n  You will solve the problem step by step using the following approach of \n  - thought (thinking process of the step you're going to take)\n  - action (the command you will be running to get more details/context that's helpful to solve the problem)\n  - observation (the output from the action you will observe based on which you will take your next step)\n\n  The last step will be the final output summary and the patch itself in the following format \n  - thought (the summary of what you did and some introduction of the patch file itself)\n  - patch (a Linux-based patch that needs to be applied to reach the relevant solution)"}, {"role": "user", "content": "\nProvide me one answer for this request: Modify the `aes_decrypt` and `aes_ke` modules in the `rtl` directory, which originally perform an AES-128 decryption and AES-128 key expansion, to perform an AES-256 decryption and an AES-256 key expansion. A testbench to test the updated design is provided in the `verif` directory, and the `sbox` and `inv_sbox` modules do not need to be changed. The AES-128 version takes a 128-bit key and a 128-bit data and decrypts it, while the AES-256 version receives a 256-bit key and a 128-bit data and decrypts it. Below is a description of the changes that need to be made:\n\n### 1. **Update Interface Parameters**\n\n- Change the key input size from 128 to 256 bits: Instead of copying 4 32-bit words into the first part of the expanded key, copy 8 32-bit words from the 256-bit input key.\n\n### 2. **Modify Key Expansion Loop**\n\n- In AES-128, for each 32-bit word `w[i]` where `i` is a multiple of `4`, you apply:\n  - For each `i >= 4`:\n    - `Temp = RotWord(w[i-1])`\n    - `Temp = SubWord(Temp)`\n    - `Temp = Temp XOR Rcon[i/4 - 1]`\n    - `w[i] = w[i - 4] XOR Temp`\n\n(`Temp` is used to demonstrate intermediate calculation storage during each step of calculation)\n\n- In **AES-256**, the logic changes:\n  - For each `i >= 8`:\n    - If `i % 8 == 0`:\n      - `Temp = RotWord(w[i-1])`\n      - `Temp = SubWord(Temp)`\n      - `Temp = Temp XOR Rcon[i/8 - 1]`\n    - Else if `i % 8 == 4`:\n      - `Temp = SubWord(w[i-1])`\n      - **No rotation, no Rcon**\n    - Else:\n      - `Temp = w[i-1]`\n    - Then:\n      - `w[i] = w[i - 8] XOR Temp`\n\nMake sure to implement this conditional branching properly in the loop.\n\n### 3. **Rcon Handling**\n\n- Rcon is only applied when `i % 8 == 0` (i.e., every 8 words in AES-256).\n- Do **not** apply Rcon when `i % 8 == 4`.\n- **If any Rcon value is not needed, remove it from the code**.\n\n### 4. **Update Decryption Flow**\n\n- **Increase round counter** of the decryption operation to go up to 14. Make sure to wait while the key is being expanded.\n- **Expand the key schedule** to generate and store **15 round keys**, each 128 bits (i.e., 240 bytes or 60 words of 32 bits total).\n- Update loops that iterate over rounds so they use the appropriate 128-bit portion of the expanded key in **reverse order**, starting from the last round and moving toward the first.\n- Ensure the decryption steps are correctly sequenced:\n  - Initial AddRoundKey\n  - 13 rounds of: ShiftRows \u2192 SubBytes \u2192 AddRoundKey \u2192 MixColumns\n  - Final round: ShiftRows \u2192 SubBytes \u2192 AddRoundKey (no MixColumns)\n\n### 5. **Initial Round Key Addition**\n- Ensure the first round key added corresponds to the last round key from the AES-256 key schedule.\n\n### 6. **Internal Buffers and Registers**\n- Update the size of any registers or memory arrays that store round keys from 44 32-bit words (AES-128) to 60 32-bit words (AES-256)\n\n\nName the files as: ['rtl/aes_decrypt.sv', 'rtl/aes_ke.sv']."}]}, "verifier_metadata": {"task_id": "cvdp_agentic_AES_encryption_decryption_0012", "categories": ["cid004", "hard"], "difficulty": "hard", "target_files": ["rtl/aes_decrypt.sv", "rtl/aes_ke.sv"], "harness_files": {"docker-compose.yml": "services:\n  sanity:\n    image: __OSS_SIM_IMAGE__\n    volumes:\n      - ./src:/src/      \n    working_dir : /code/rundir\n    env_file    : ./src/.env\n    command     : pytest /src/test_runner.py -s -v -o cache_dir=/rundir/harness/.cache\n", "src/.env": "VERILOG_SOURCES = /code/rtl/aes_decrypt.sv /code/rtl/aes_ke.sv /code/rtl/inv_sbox.sv /code/rtl/sbox.sv\nTOPLEVEL        = aes_decrypt\nMODULE          = test_aes_decrypt\nSIM             = icarus\nTOPLEVEL_LANG   = verilog\nPYTHONPATH      = /src\nHASH            = 12-modify-aes-decrypt-to-256\nWAVE            = true", "src/harness_library.py": "import cocotb\nfrom cocotb.triggers import FallingEdge, RisingEdge, Timer\nfrom collections import deque\n\nasync def dut_init(dut):\n    # iterate all the input signals and initialize with 0\n    for signal in dut:\n        try:\n            signal.value = 0\n        except Exception:\n            pass\n\nclass aes_decrypt:\n    RCON = [\n        0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1B, 0x36\n    ]\n    \n    SBOX = [\n        # S-box table used in AES\n        0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,\n        0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,\n        0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,\n        0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,\n        0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,\n        0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,\n        0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,\n        0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,\n        0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,\n        0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,\n        0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,\n        0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,\n        0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,\n        0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,\n        0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,\n        0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16\n    ]\n    \n    def __init__(self):\n        self.expanded_key = 0\n        self.data_out = 0\n        # Compute inverse S-Box\n        self.inv_s_box = [0] * 256\n        for i, val in enumerate(self.SBOX):\n            self.inv_s_box[val] = i\n    \n    def reset(self):\n        self.expanded_key = 0\n        self.data_out = 0\n    \n    def update_key(self, key):\n        key_bytes = key.to_bytes(32, 'big')  # Convert 256-bit key to bytes\n        self.expanded_key = self.expand_key(key_bytes)\n    \n    def expand_key(self, key):\n        words = [list(key[i:i+4]) for i in range(0, 32, 4)]\n        \n        for i in range(8, 60):\n            temp = words[i - 1]\n            \n            if i % 8 == 0:\n                temp = self.sub_word(self.rot_word(temp))\n                temp[0] ^= self.RCON[i // 8 - 1]\n            elif i % 8 == 4:\n                temp = self.sub_word(temp)\n            \n            words.append([words[i - 8][j] ^ temp[j] for j in range(4)])\n        \n        expanded_key_bytes = b''.join(bytes(word) for word in words)\n        return int.from_bytes(expanded_key_bytes, 'big')\n    \n    def sub_word(self, word):\n        return [self.SBOX[b] for b in word]\n    \n    def rot_word(self, word):\n        return word[1:] + word[:1]\n    \n    def gmul(self, a, b):\n        p = 0\n        for _ in range(8):\n            if b & 1:\n                p ^= a\n            hi = a & 0x80\n            a = (a << 1) & 0xFF\n            if hi:\n                a ^= 0x1b\n            b >>= 1\n        return p\n\n    def inv_sub_bytes(self, state):\n        for i in range(16):\n            state[i] = self.inv_s_box[state[i]]\n\n    def inv_shift_rows(self, state):\n        state[1], state[5], state[9], state[13] = state[13], state[1], state[5], state[9]\n        state[2], state[6], state[10], state[14] = state[10], state[14], state[2], state[6]\n        state[3], state[7], state[11], state[15] = state[7], state[11], state[15], state[3]\n\n    def inv_mix_columns(self, s):\n        for i in range(4):\n            a = s[i*4:(i+1)*4]\n            s[i*4+0] = self.gmul(a[0],14)^self.gmul(a[1],11)^self.gmul(a[2],13)^self.gmul(a[3],9)\n            s[i*4+1] = self.gmul(a[0],9)^self.gmul(a[1],14)^self.gmul(a[2],11)^self.gmul(a[3],13)\n            s[i*4+2] = self.gmul(a[0],13)^self.gmul(a[1],9)^self.gmul(a[2],14)^self.gmul(a[3],11)\n            s[i*4+3] = self.gmul(a[0],11)^self.gmul(a[1],13)^self.gmul(a[2],9)^self.gmul(a[3],14)\n\n    def add_round_key(self, state, round_key_words):\n        for col in range(4):\n            rk = round_key_words[col]\n            for row in range(4):\n                state[col * 4 + row] ^= (rk >> (24 - 8 * row)) & 0xFF\n\n    def get_round_keys(self):\n        expanded_bytes = self.expanded_key.to_bytes(240, 'big')\n        round_keys = []\n        for i in range(0, 240, 16):  # Each round key is 16 bytes (4 words)\n            words = [int.from_bytes(expanded_bytes[i + j*4 : i + (j+1)*4], 'big') for j in range(4)]\n            round_keys.append(words)\n        return round_keys\n\n    def decrypt(self, data):\n        state = [(data >> (8 * (15 - i))) & 0xFF for i in range(16)]\n        round_keys = self.get_round_keys()\n\n        self.add_round_key(state, round_keys[14])\n\n        for rnd in range(13, 0, -1):\n            self.inv_shift_rows(state)\n            self.inv_sub_bytes(state)\n            self.add_round_key(state, round_keys[rnd])\n            self.inv_mix_columns(state)\n\n        self.inv_shift_rows(state)\n        self.inv_sub_bytes(state)\n        self.add_round_key(state, round_keys[0])\n\n        self.data_out = 0\n        for b in state:\n            self.data_out = (self.data_out << 8) | b", "src/test_aes_decrypt.py": "import cocotb\nfrom cocotb.clock import Clock\nfrom cocotb.triggers import RisingEdge, Timer\nimport harness_library as hrs_lb\nimport random\n\ndef compare_values(dut, model, debug=0):\n    dut_data  = int(dut.o_data.value)\n\n    model_data = model.data_out\n\n    if debug == 1:\n        print(\"\\nOUTPUTS\")\n        print(f\"DUT o_data  = {hex(dut_data)} \\nMODEL o_data  = {hex(model_data)}\")\n    \n    assert dut_data == model_data,  f\"[ERROR] DUT o_data does not match model o_data: {hex(dut_data)} != {hex(model_data)}\"\n\n@cocotb.test()\nasync def test_aes_decrypt(dut):\n    \"\"\"Test the aes_decrypt module with edge cases and random data.\"\"\"\n    cocotb.start_soon(Clock(dut.clk, 10, unit='ns').start())\n\n    model = hrs_lb.aes_decrypt()\n\n    resets = 4\n    runs = 1000\n\n    data_min = 0\n    data_max = 2**128 - 1\n\n    key_min = 0\n    key_max = 2**256 - 1\n    \n    await hrs_lb.dut_init(dut)\n\n    for i in range(resets):\n        # Reset DUT\n        # Set all inputs to 0\n        dut.i_update_key.value = 0\n        dut.i_key.value        = 0\n        dut.i_start.value      = 0\n        dut.i_data.value       = 0\n        dut.rst_async_n.value  = 0\n        await RisingEdge(dut.clk)\n        dut.rst_async_n.value  = 1\n        await RisingEdge(dut.clk)\n\n        model.reset()\n\n        compare_values(dut, model)\n\n        for j in range(runs):\n            if j%100 == 0:\n                print(f'Reset {i}, run {j}')\n                \n            data = random.randint(data_min, data_max)\n            key = random.randint(key_min, key_max)\n            if j == 0:\n                update_key = 1\n            else:\n                update_key = random.randint(0,1)\n            \n            dut.i_update_key.value = update_key\n            dut.i_start.value      = 1\n            dut.i_key.value        = key\n            dut.i_data.value       = data\n\n            if update_key == 1:\n                model.update_key(key)\n            \n            model.decrypt(data)\n\n            await RisingEdge(dut.clk)\n            dut.i_update_key.value = 0\n            dut.i_start.value      = 0\n            dut.i_data.value       = 0\n            dut.i_key.value        = 0\n            await RisingEdge(dut.clk)\n            while dut.o_done.value == 0:\n                await RisingEdge(dut.clk)\n            \n            compare_values(dut, model)\n            ", "src/test_runner.py": "import cocotb\nimport os\nimport pytest\nimport random\nfrom cocotb_tools.runner import get_runner\n\n# Environment configuration\nverilog_sources = os.getenv(\"VERILOG_SOURCES\").split()\ntoplevel_lang   = os.getenv(\"TOPLEVEL_LANG\")\nsim             = os.getenv(\"SIM\", \"icarus\")\ntoplevel        = os.getenv(\"TOPLEVEL\")\nmodule          = os.getenv(\"MODULE\")\nwave            = bool(os.getenv(\"WAVE\"))\n\ndef runner():\n    # Configure and run the simulation\n    sim_runner = get_runner(sim)\n    sim_runner.build(\n        sources=verilog_sources,\n        hdl_toplevel=toplevel,\n        always=True,\n        clean=True,\n        verbose=True,\n        timescale=(\"1ns\", \"1ns\"),\n        log_file=\"sim.log\"\n    )\n\n    # Run the test\n    sim_runner.test(hdl_toplevel=toplevel, test_module=module, waves=True)\n\n\ndef test_data():\n    # Run the simulation with specified parameters\n    runner()"}, "context_files": {"rtl/inv_sbox.sv": "module inv_sbox (\n    input  logic [7:0] i_data,\n    output logic [7:0] o_data\n);\n\nalways_comb begin\n    case (i_data)\n        8'h00: o_data = 8'h52;\n        8'h01: o_data = 8'h09;\n        8'h02: o_data = 8'h6a;\n        8'h03: o_data = 8'hd5;\n        8'h04: o_data = 8'h30;\n        8'h05: o_data = 8'h36;\n        8'h06: o_data = 8'ha5;\n        8'h07: o_data = 8'h38;\n        8'h08: o_data = 8'hbf;\n        8'h09: o_data = 8'h40;\n        8'h0a: o_data = 8'ha3;\n        8'h0b: o_data = 8'h9e;\n        8'h0c: o_data = 8'h81;\n        8'h0d: o_data = 8'hf3;\n        8'h0e: o_data = 8'hd7;\n        8'h0f: o_data = 8'hfb;\n        8'h10: o_data = 8'h7c;\n        8'h11: o_data = 8'he3;\n        8'h12: o_data = 8'h39;\n        8'h13: o_data = 8'h82;\n        8'h14: o_data = 8'h9b;\n        8'h15: o_data = 8'h2f;\n        8'h16: o_data = 8'hff;\n        8'h17: o_data = 8'h87;\n        8'h18: o_data = 8'h34;\n        8'h19: o_data = 8'h8e;\n        8'h1a: o_data = 8'h43;\n        8'h1b: o_data = 8'h44;\n        8'h1c: o_data = 8'hc4;\n        8'h1d: o_data = 8'hde;\n        8'h1e: o_data = 8'he9;\n        8'h1f: o_data = 8'hcb;\n        8'h20: o_data = 8'h54;\n        8'h21: o_data = 8'h7b;\n        8'h22: o_data = 8'h94;\n        8'h23: o_data = 8'h32;\n        8'h24: o_data = 8'ha6;\n        8'h25: o_data = 8'hc2;\n        8'h26: o_data = 8'h23;\n        8'h27: o_data = 8'h3d;\n        8'h28: o_data = 8'hee;\n        8'h29: o_data = 8'h4c;\n        8'h2a: o_data = 8'h95;\n        8'h2b: o_data = 8'h0b;\n        8'h2c: o_data = 8'h42;\n        8'h2d: o_data = 8'hfa;\n        8'h2e: o_data = 8'hc3;\n        8'h2f: o_data = 8'h4e;\n        8'h30: o_data = 8'h08;\n        8'h31: o_data = 8'h2e;\n        8'h32: o_data = 8'ha1;\n        8'h33: o_data = 8'h66;\n        8'h34: o_data = 8'h28;\n        8'h35: o_data = 8'hd9;\n        8'h36: o_data = 8'h24;\n        8'h37: o_data = 8'hb2;\n        8'h38: o_data = 8'h76;\n        8'h39: o_data = 8'h5b;\n        8'h3a: o_data = 8'ha2;\n        8'h3b: o_data = 8'h49;\n        8'h3c: o_data = 8'h6d;\n        8'h3d: o_data = 8'h8b;\n        8'h3e: o_data = 8'hd1;\n        8'h3f: o_data = 8'h25;\n        8'h40: o_data = 8'h72;\n        8'h41: o_data = 8'hf8;\n        8'h42: o_data = 8'hf6;\n        8'h43: o_data = 8'h64;\n        8'h44: o_data = 8'h86;\n        8'h45: o_data = 8'h68;\n        8'h46: o_data = 8'h98;\n        8'h47: o_data = 8'h16;\n        8'h48: o_data = 8'hd4;\n        8'h49: o_data = 8'ha4;\n        8'h4a: o_data = 8'h5c;\n        8'h4b: o_data = 8'hcc;\n        8'h4c: o_data = 8'h5d;\n        8'h4d: o_data = 8'h65;\n        8'h4e: o_data = 8'hb6;\n        8'h4f: o_data = 8'h92;\n        8'h50: o_data = 8'h6c;\n        8'h51: o_data = 8'h70;\n        8'h52: o_data = 8'h48;\n        8'h53: o_data = 8'h50;\n        8'h54: o_data = 8'hfd;\n        8'h55: o_data = 8'hed;\n        8'h56: o_data = 8'hb9;\n        8'h57: o_data = 8'hda;\n        8'h58: o_data = 8'h5e;\n        8'h59: o_data = 8'h15;\n        8'h5a: o_data = 8'h46;\n        8'h5b: o_data = 8'h57;\n        8'h5c: o_data = 8'ha7;\n        8'h5d: o_data = 8'h8d;\n        8'h5e: o_data = 8'h9d;\n        8'h5f: o_data = 8'h84;\n        8'h60: o_data = 8'h90;\n        8'h61: o_data = 8'hd8;\n        8'h62: o_data = 8'hab;\n        8'h63: o_data = 8'h00;\n        8'h64: o_data = 8'h8c;\n        8'h65: o_data = 8'hbc;\n        8'h66: o_data = 8'hd3;\n        8'h67: o_data = 8'h0a;\n        8'h68: o_data = 8'hf7;\n        8'h69: o_data = 8'he4;\n        8'h6a: o_data = 8'h58;\n        8'h6b: o_data = 8'h05;\n        8'h6c: o_data = 8'hb8;\n        8'h6d: o_data = 8'hb3;\n        8'h6e: o_data = 8'h45;\n        8'h6f: o_data = 8'h06;\n        8'h70: o_data = 8'hd0;\n        8'h71: o_data = 8'h2c;\n        8'h72: o_data = 8'h1e;\n        8'h73: o_data = 8'h8f;\n        8'h74: o_data = 8'hca;\n        8'h75: o_data = 8'h3f;\n        8'h76: o_data = 8'h0f;\n        8'h77: o_data = 8'h02;\n        8'h78: o_data = 8'hc1;\n        8'h79: o_data = 8'haf;\n        8'h7a: o_data = 8'hbd;\n        8'h7b: o_data = 8'h03;\n        8'h7c: o_data = 8'h01;\n        8'h7d: o_data = 8'h13;\n        8'h7e: o_data = 8'h8a;\n        8'h7f: o_data = 8'h6b;\n        8'h80: o_data = 8'h3a;\n        8'h81: o_data = 8'h91;\n        8'h82: o_data = 8'h11;\n        8'h83: o_data = 8'h41;\n        8'h84: o_data = 8'h4f;\n        8'h85: o_data = 8'h67;\n        8'h86: o_data = 8'hdc;\n        8'h87: o_data = 8'hea;\n        8'h88: o_data = 8'h97;\n        8'h89: o_data = 8'hf2;\n        8'h8a: o_data = 8'hcf;\n        8'h8b: o_data = 8'hce;\n        8'h8c: o_data = 8'hf0;\n        8'h8d: o_data = 8'hb4;\n        8'h8e: o_data = 8'he6;\n        8'h8f: o_data = 8'h73;\n        8'h90: o_data = 8'h96;\n        8'h91: o_data = 8'hac;\n        8'h92: o_data = 8'h74;\n        8'h93: o_data = 8'h22;\n        8'h94: o_data = 8'he7;\n        8'h95: o_data = 8'had;\n        8'h96: o_data = 8'h35;\n        8'h97: o_data = 8'h85;\n        8'h98: o_data = 8'he2;\n        8'h99: o_data = 8'hf9;\n        8'h9a: o_data = 8'h37;\n        8'h9b: o_data = 8'he8;\n        8'h9c: o_data = 8'h1c;\n        8'h9d: o_data = 8'h75;\n        8'h9e: o_data = 8'hdf;\n        8'h9f: o_data = 8'h6e;\n        8'ha0: o_data = 8'h47;\n        8'ha1: o_data = 8'hf1;\n        8'ha2: o_data = 8'h1a;\n        8'ha3: o_data = 8'h71;\n        8'ha4: o_data = 8'h1d;\n        8'ha5: o_data = 8'h29;\n        8'ha6: o_data = 8'hc5;\n        8'ha7: o_data = 8'h89;\n        8'ha8: o_data = 8'h6f;\n        8'ha9: o_data = 8'hb7;\n        8'haa: o_data = 8'h62;\n        8'hab: o_data = 8'h0e;\n        8'hac: o_data = 8'haa;\n        8'had: o_data = 8'h18;\n        8'hae: o_data = 8'hbe;\n        8'haf: o_data = 8'h1b;\n        8'hb0: o_data = 8'hfc;\n        8'hb1: o_data = 8'h56;\n        8'hb2: o_data = 8'h3e;\n        8'hb3: o_data = 8'h4b;\n        8'hb4: o_data = 8'hc6;\n        8'hb5: o_data = 8'hd2;\n        8'hb6: o_data = 8'h79;\n        8'hb7: o_data = 8'h20;\n        8'hb8: o_data = 8'h9a;\n        8'hb9: o_data = 8'hdb;\n        8'hba: o_data = 8'hc0;\n        8'hbb: o_data = 8'hfe;\n        8'hbc: o_data = 8'h78;\n        8'hbd: o_data = 8'hcd;\n        8'hbe: o_data = 8'h5a;\n        8'hbf: o_data = 8'hf4;\n        8'hc0: o_data = 8'h1f;\n        8'hc1: o_data = 8'hdd;\n        8'hc2: o_data = 8'ha8;\n        8'hc3: o_data = 8'h33;\n        8'hc4: o_data = 8'h88;\n        8'hc5: o_data = 8'h07;\n        8'hc6: o_data = 8'hc7;\n        8'hc7: o_data = 8'h31;\n        8'hc8: o_data = 8'hb1;\n        8'hc9: o_data = 8'h12;\n        8'hca: o_data = 8'h10;\n        8'hcb: o_data = 8'h59;\n        8'hcc: o_data = 8'h27;\n        8'hcd: o_data = 8'h80;\n        8'hce: o_data = 8'hec;\n        8'hcf: o_data = 8'h5f;\n        8'hd0: o_data = 8'h60;\n        8'hd1: o_data = 8'h51;\n        8'hd2: o_data = 8'h7f;\n        8'hd3: o_data = 8'ha9;\n        8'hd4: o_data = 8'h19;\n        8'hd5: o_data = 8'hb5;\n        8'hd6: o_data = 8'h4a;\n        8'hd7: o_data = 8'h0d;\n        8'hd8: o_data = 8'h2d;\n        8'hd9: o_data = 8'he5;\n        8'hda: o_data = 8'h7a;\n        8'hdb: o_data = 8'h9f;\n        8'hdc: o_data = 8'h93;\n        8'hdd: o_data = 8'hc9;\n        8'hde: o_data = 8'h9c;\n        8'hdf: o_data = 8'hef;\n        8'he0: o_data = 8'ha0;\n        8'he1: o_data = 8'he0;\n        8'he2: o_data = 8'h3b;\n        8'he3: o_data = 8'h4d;\n        8'he4: o_data = 8'hae;\n        8'he5: o_data = 8'h2a;\n        8'he6: o_data = 8'hf5;\n        8'he7: o_data = 8'hb0;\n        8'he8: o_data = 8'hc8;\n        8'he9: o_data = 8'heb;\n        8'hea: o_data = 8'hbb;\n        8'heb: o_data = 8'h3c;\n        8'hec: o_data = 8'h83;\n        8'hed: o_data = 8'h53;\n        8'hee: o_data = 8'h99;\n        8'hef: o_data = 8'h61;\n        8'hf0: o_data = 8'h17;\n        8'hf1: o_data = 8'h2b;\n        8'hf2: o_data = 8'h04;\n        8'hf3: o_data = 8'h7e;\n        8'hf4: o_data = 8'hba;\n        8'hf5: o_data = 8'h77;\n        8'hf6: o_data = 8'hd6;\n        8'hf7: o_data = 8'h26;\n        8'hf8: o_data = 8'he1;\n        8'hf9: o_data = 8'h69;\n        8'hfa: o_data = 8'h14;\n        8'hfb: o_data = 8'h63;\n        8'hfc: o_data = 8'h55;\n        8'hfd: o_data = 8'h21;\n        8'hfe: o_data = 8'h0c;\n        8'hff: o_data = 8'h7d;\n        default: o_data = 8'h00;\n    endcase\nend\n\nendmodule : inv_sbox", "rtl/sbox.sv": "module sbox (\n    input  logic [7:0] i_data,\n    output logic [7:0] o_data\n);\n\nalways_comb begin\n    case (i_data)\n        8'h00: o_data = 8'h63;\n        8'h01: o_data = 8'h7C;\n        8'h02: o_data = 8'h77;\n        8'h03: o_data = 8'h7B;\n        8'h04: o_data = 8'hF2;\n        8'h05: o_data = 8'h6B;\n        8'h06: o_data = 8'h6F;\n        8'h07: o_data = 8'hC5;\n        8'h08: o_data = 8'h30;\n        8'h09: o_data = 8'h01;\n        8'h0A: o_data = 8'h67;\n        8'h0B: o_data = 8'h2B;\n        8'h0C: o_data = 8'hFE;\n        8'h0D: o_data = 8'hD7;\n        8'h0E: o_data = 8'hAB;\n        8'h0F: o_data = 8'h76;\n        8'h10: o_data = 8'hCA;\n        8'h11: o_data = 8'h82;\n        8'h12: o_data = 8'hC9;\n        8'h13: o_data = 8'h7D;\n        8'h14: o_data = 8'hFA;\n        8'h15: o_data = 8'h59;\n        8'h16: o_data = 8'h47;\n        8'h17: o_data = 8'hF0;\n        8'h18: o_data = 8'hAD;\n        8'h19: o_data = 8'hD4;\n        8'h1A: o_data = 8'hA2;\n        8'h1B: o_data = 8'hAF;\n        8'h1C: o_data = 8'h9C;\n        8'h1D: o_data = 8'hA4;\n        8'h1E: o_data = 8'h72;\n        8'h1F: o_data = 8'hC0;\n        8'h20: o_data = 8'hB7;\n        8'h21: o_data = 8'hFD;\n        8'h22: o_data = 8'h93;\n        8'h23: o_data = 8'h26;\n        8'h24: o_data = 8'h36;\n        8'h25: o_data = 8'h3F;\n        8'h26: o_data = 8'hF7;\n        8'h27: o_data = 8'hCC;\n        8'h28: o_data = 8'h34;\n        8'h29: o_data = 8'hA5;\n        8'h2A: o_data = 8'hE5;\n        8'h2B: o_data = 8'hF1;\n        8'h2C: o_data = 8'h71;\n        8'h2D: o_data = 8'hD8;\n        8'h2E: o_data = 8'h31;\n        8'h2F: o_data = 8'h15;\n        8'h30: o_data = 8'h04;\n        8'h31: o_data = 8'hC7;\n        8'h32: o_data = 8'h23;\n        8'h33: o_data = 8'hC3;\n        8'h34: o_data = 8'h18;\n        8'h35: o_data = 8'h96;\n        8'h36: o_data = 8'h05;\n        8'h37: o_data = 8'h9A;\n        8'h38: o_data = 8'h07;\n        8'h39: o_data = 8'h12;\n        8'h3A: o_data = 8'h80;\n        8'h3B: o_data = 8'hE2;\n        8'h3C: o_data = 8'hEB;\n        8'h3D: o_data = 8'h27;\n        8'h3E: o_data = 8'hB2;\n        8'h3F: o_data = 8'h75;\n        8'h40: o_data = 8'h09;\n        8'h41: o_data = 8'h83;\n        8'h42: o_data = 8'h2C;\n        8'h43: o_data = 8'h1A;\n        8'h44: o_data = 8'h1B;\n        8'h45: o_data = 8'h6E;\n        8'h46: o_data = 8'h5A;\n        8'h47: o_data = 8'hA0;\n        8'h48: o_data = 8'h52;\n        8'h49: o_data = 8'h3B;\n        8'h4A: o_data = 8'hD6;\n        8'h4B: o_data = 8'hB3;\n        8'h4C: o_data = 8'h29;\n        8'h4D: o_data = 8'hE3;\n        8'h4E: o_data = 8'h2F;\n        8'h4F: o_data = 8'h84;\n        8'h50: o_data = 8'h53;\n        8'h51: o_data = 8'hD1;\n        8'h52: o_data = 8'h00;\n        8'h53: o_data = 8'hED;\n        8'h54: o_data = 8'h20;\n        8'h55: o_data = 8'hFC;\n        8'h56: o_data = 8'hB1;\n        8'h57: o_data = 8'h5B;\n        8'h58: o_data = 8'h6A;\n        8'h59: o_data = 8'hCB;\n        8'h5A: o_data = 8'hBE;\n        8'h5B: o_data = 8'h39;\n        8'h5C: o_data = 8'h4A;\n        8'h5D: o_data = 8'h4C;\n        8'h5E: o_data = 8'h58;\n        8'h5F: o_data = 8'hCF;\n        8'h60: o_data = 8'hD0;\n        8'h61: o_data = 8'hEF;\n        8'h62: o_data = 8'hAA;\n        8'h63: o_data = 8'hFB;\n        8'h64: o_data = 8'h43;\n        8'h65: o_data = 8'h4D;\n        8'h66: o_data = 8'h33;\n        8'h67: o_data = 8'h85;\n        8'h68: o_data = 8'h45;\n        8'h69: o_data = 8'hF9;\n        8'h6A: o_data = 8'h02;\n        8'h6B: o_data = 8'h7F;\n        8'h6C: o_data = 8'h50;\n        8'h6D: o_data = 8'h3C;\n        8'h6E: o_data = 8'h9F;\n        8'h6F: o_data = 8'hA8;\n        8'h70: o_data = 8'h51;\n        8'h71: o_data = 8'hA3;\n        8'h72: o_data = 8'h40;\n        8'h73: o_data = 8'h8F;\n        8'h74: o_data = 8'h92;\n        8'h75: o_data = 8'h9D;\n        8'h76: o_data = 8'h38;\n        8'h77: o_data = 8'hF5;\n        8'h78: o_data = 8'hBC;\n        8'h79: o_data = 8'hB6;\n        8'h7A: o_data = 8'hDA;\n        8'h7B: o_data = 8'h21;\n        8'h7C: o_data = 8'h10;\n        8'h7D: o_data = 8'hFF;\n        8'h7E: o_data = 8'hF3;\n        8'h7F: o_data = 8'hD2;\n        8'h80: o_data = 8'hCD;\n        8'h81: o_data = 8'h0C;\n        8'h82: o_data = 8'h13;\n        8'h83: o_data = 8'hEC;\n        8'h84: o_data = 8'h5F;\n        8'h85: o_data = 8'h97;\n        8'h86: o_data = 8'h44;\n        8'h87: o_data = 8'h17;\n        8'h88: o_data = 8'hC4;\n        8'h89: o_data = 8'hA7;\n        8'h8A: o_data = 8'h7E;\n        8'h8B: o_data = 8'h3D;\n        8'h8C: o_data = 8'h64;\n        8'h8D: o_data = 8'h5D;\n        8'h8E: o_data = 8'h19;\n        8'h8F: o_data = 8'h73;\n        8'h90: o_data = 8'h60;\n        8'h91: o_data = 8'h81;\n        8'h92: o_data = 8'h4F;\n        8'h93: o_data = 8'hDC;\n        8'h94: o_data = 8'h22;\n        8'h95: o_data = 8'h2A;\n        8'h96: o_data = 8'h90;\n        8'h97: o_data = 8'h88;\n        8'h98: o_data = 8'h46;\n        8'h99: o_data = 8'hEE;\n        8'h9A: o_data = 8'hB8;\n        8'h9B: o_data = 8'h14;\n        8'h9C: o_data = 8'hDE;\n        8'h9D: o_data = 8'h5E;\n        8'h9E: o_data = 8'h0B;\n        8'h9F: o_data = 8'hDB;\n        8'hA0: o_data = 8'hE0;\n        8'hA1: o_data = 8'h32;\n        8'hA2: o_data = 8'h3A;\n        8'hA3: o_data = 8'h0A;\n        8'hA4: o_data = 8'h49;\n        8'hA5: o_data = 8'h06;\n        8'hA6: o_data = 8'h24;\n        8'hA7: o_data = 8'h5C;\n        8'hA8: o_data = 8'hC2;\n        8'hA9: o_data = 8'hD3;\n        8'hAA: o_data = 8'hAC;\n        8'hAB: o_data = 8'h62;\n        8'hAC: o_data = 8'h91;\n        8'hAD: o_data = 8'h95;\n        8'hAE: o_data = 8'hE4;\n        8'hAF: o_data = 8'h79;\n        8'hB0: o_data = 8'hE7;\n        8'hB1: o_data = 8'hC8;\n        8'hB2: o_data = 8'h37;\n        8'hB3: o_data = 8'h6D;\n        8'hB4: o_data = 8'h8D;\n        8'hB5: o_data = 8'hD5;\n        8'hB6: o_data = 8'h4E;\n        8'hB7: o_data = 8'hA9;\n        8'hB8: o_data = 8'h6C;\n        8'hB9: o_data = 8'h56;\n        8'hBA: o_data = 8'hF4;\n        8'hBB: o_data = 8'hEA;\n        8'hBC: o_data = 8'h65;\n        8'hBD: o_data = 8'h7A;\n        8'hBE: o_data = 8'hAE;\n        8'hBF: o_data = 8'h08;\n        8'hC0: o_data = 8'hBA;\n        8'hC1: o_data = 8'h78;\n        8'hC2: o_data = 8'h25;\n        8'hC3: o_data = 8'h2E;\n        8'hC4: o_data = 8'h1C;\n        8'hC5: o_data = 8'hA6;\n        8'hC6: o_data = 8'hB4;\n        8'hC7: o_data = 8'hC6;\n        8'hC8: o_data = 8'hE8;\n        8'hC9: o_data = 8'hDD;\n        8'hCA: o_data = 8'h74;\n        8'hCB: o_data = 8'h1F;\n        8'hCC: o_data = 8'h4B;\n        8'hCD: o_data = 8'hBD;\n        8'hCE: o_data = 8'h8B;\n        8'hCF: o_data = 8'h8A;\n        8'hD0: o_data = 8'h70;\n        8'hD1: o_data = 8'h3E;\n        8'hD2: o_data = 8'hB5;\n        8'hD3: o_data = 8'h66;\n        8'hD4: o_data = 8'h48;\n        8'hD5: o_data = 8'h03;\n        8'hD6: o_data = 8'hF6;\n        8'hD7: o_data = 8'h0E;\n        8'hD8: o_data = 8'h61;\n        8'hD9: o_data = 8'h35;\n        8'hDA: o_data = 8'h57;\n        8'hDB: o_data = 8'hB9;\n        8'hDC: o_data = 8'h86;\n        8'hDD: o_data = 8'hC1;\n        8'hDE: o_data = 8'h1D;\n        8'hDF: o_data = 8'h9E;\n        8'hE0: o_data = 8'hE1;\n        8'hE1: o_data = 8'hF8;\n        8'hE2: o_data = 8'h98;\n        8'hE3: o_data = 8'h11;\n        8'hE4: o_data = 8'h69;\n        8'hE5: o_data = 8'hD9;\n        8'hE6: o_data = 8'h8E;\n        8'hE7: o_data = 8'h94;\n        8'hE8: o_data = 8'h9B;\n        8'hE9: o_data = 8'h1E;\n        8'hEA: o_data = 8'h87;\n        8'hEB: o_data = 8'hE9;\n        8'hEC: o_data = 8'hCE;\n        8'hED: o_data = 8'h55;\n        8'hEE: o_data = 8'h28;\n        8'hEF: o_data = 8'hDF;\n        8'hF0: o_data = 8'h8C;\n        8'hF1: o_data = 8'hA1;\n        8'hF2: o_data = 8'h89;\n        8'hF3: o_data = 8'h0D;\n        8'hF4: o_data = 8'hBF;\n        8'hF5: o_data = 8'hE6;\n        8'hF6: o_data = 8'h42;\n        8'hF7: o_data = 8'h68;\n        8'hF8: o_data = 8'h41;\n        8'hF9: o_data = 8'h99;\n        8'hFA: o_data = 8'h2D;\n        8'hFB: o_data = 8'h0F;\n        8'hFC: o_data = 8'hB0;\n        8'hFD: o_data = 8'h54;\n        8'hFE: o_data = 8'hBB;\n        8'hFF: o_data = 8'h16;\n        default: o_data = 8'h00;\n    endcase\nend\n\nendmodule : sbox", "verif/tb_aes_decrypt.sv": "module tb_aes_decrypt;\n\nlocalparam NBW_KEY  = 'd256;\nlocalparam NBW_DATA = 'd128;\n\nlogic                clk;\nlogic                rst_async_n;\nlogic                i_update_key;\nlogic [NBW_KEY-1:0]  i_key;\nlogic                i_start;\nlogic [NBW_DATA-1:0] i_data;\nlogic                o_done;\nlogic [NBW_DATA-1:0] o_data;\n\naes_decrypt #(\n    .NBW_KEY (NBW_KEY),\n    .NBW_DATA(NBW_DATA)\n) uu_aes_decrypt (\n    .clk(clk),\n    .rst_async_n(rst_async_n),\n    .i_update_key(i_update_key),\n    .i_key(i_key),\n    .i_start(i_start),\n    .i_data(i_data),\n    .o_done(o_done),\n    .o_data(o_data)\n);\n\ntask Simple_test(logic update_key);\n    @(negedge clk);\n    i_key  = 256'h000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f;\n    i_data = 128'h8ea2b7ca516745bfeafc49904b496089;\n    i_update_key = update_key;\n    i_start = 1;\n\n    @(negedge clk);\n    i_start = 0;\n    i_update_key = 0;\n    i_key = 0;\n\n    @(posedge o_done);\n    @(negedge clk);\n\n    if(o_data == 128'h00112233445566778899aabbccddeeff) begin\n        $display(\"PASS\");\n    end else begin\n        $display(\"FAIL\");\n        $display(\"Expected output: %h\", 128'h00112233445566778899aabbccddeeff);\n        $display(\"Observed output: %h\", o_data);\n    end\nendtask\n\ninitial begin\n    $dumpfile(\"test.vcd\");\n    $dumpvars(0,tb_aes_decrypt);\nend\n\nalways #5 clk = ~clk;\n\ninitial begin\n    clk = 0;\n    i_start = 0;\n    rst_async_n = 1;\n    #1;\n    rst_async_n = 0;\n    #2;\n    rst_async_n = 1;\n    @(negedge clk);\n\n    // Tasks go here\n    Simple_test(1'b1);\n    Simple_test(1'b0);\n\n    @(negedge clk);\n    @(negedge clk);\n\n    $finish();\nend\n\nendmodule"}}}
diff --git a/resources_servers/cvdp/harness.py b/resources_servers/cvdp/harness.py
new file mode 100644
index 0000000000..f77fb34a6e
--- /dev/null
+++ b/resources_servers/cvdp/harness.py
@@ -0,0 +1,606 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Apptainer verification harness.
+
+This module owns the *mechanism* of CVDP verification: translating a dataset's
+docker-compose harness into Apptainer calls and executing it in a sandbox. The
+resources server (``app.py``) owns the *policy* (the HTTP contract and reward
+scoring) and delegates execution to :class:`HarnessRunner`.
+
+Layout:
+- module-level pure functions: compose -> Apptainer translation (stateless).
+- :class:`HarnessRunner`: stateful executor (SIF cache, per-image locks, the
+  lazily-built sandbox provider).
+"""
+
+import asyncio
+import contextlib
+import hashlib
+import os
+import shlex
+import tempfile
+from pathlib import Path
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
+
+import yaml
+
+from nemo_gym.sandbox.providers.apptainer import (
+    ApptainerCreateConfig,
+    ApptainerExecConfig,
+    ApptainerProbeConfig,
+    ApptainerProvider,
+)
+from nemo_gym.sandbox.providers.base import SandboxCreateError, SandboxSpec
+
+
+if TYPE_CHECKING:
+    from resources_servers.cvdp.app import CVDPResourcesServerConfig
+
+
+# ----------------------------
+# Compose -> Apptainer translation (pure helpers)
+# ----------------------------
+
+
+def _apply_substitutions(content: str, config: "CVDPResourcesServerConfig") -> str:
+    """
+    Replace image placeholders in harness file content — mirrors repository.apply_template_substitution() but with Apptainer syntax.
+    """
+    substitutions = {
+        "__VERIF_EDA_IMAGE__": config.eda_sim_image,
+        "__OSS_SIM_IMAGE__": config.oss_sim_image,
+        "__OSS_PNR_IMAGE__": config.oss_pnr_image,
+    }
+    for placeholder, value in substitutions.items():
+        if value and placeholder in content:
+            content = content.replace(placeholder, value)
+    return content
+
+
+def _resolve_image_for_service(
+    compose_data: dict,
+    service_name: str,
+    harness_files: Dict[str, Optional[str]],
+    config: "CVDPResourcesServerConfig",
+) -> Tuple[str, List[str]]:
+    """
+    Resolve the container image for a service that uses ``build:`` instead of
+    ``image:`` in its docker-compose definition.
+
+    Docker Compose handles ``build:`` natively by reading a Dockerfile and
+    building an image on the fly.  Apptainer cannot do this directly, so we
+    parse the Dockerfile to extract the base image (FROM) and any RUN / ADD
+    commands, then replay them via ``apptainer build`` with a def file.
+
+    Returns (base_image, post_commands) where *post_commands* are shell
+    commands for the ``%post`` section of an Apptainer definition file.
+    If the service already has ``image:``, returns (image, []).
+    """
+    svc = (compose_data.get("services") or {}).get(service_name, {})
+    image = svc.get("image", "")
+    if image:
+        return image, []
+
+    # Determine Dockerfile path from build: config
+    build_cfg = svc.get("build", {})
+    if isinstance(build_cfg, str):
+        dockerfile_path = os.path.join(build_cfg, "Dockerfile")
+    elif isinstance(build_cfg, dict):
+        dockerfile_path = build_cfg.get("dockerfile", "Dockerfile")
+    else:
+        return "", []
+
+    # Look for the Dockerfile in harness_files (try multiple path variants)
+    dockerfile_content = None
+    candidates = [
+        dockerfile_path,
+        f"src/{dockerfile_path}",
+        dockerfile_path.replace("src/", ""),
+    ]
+    for candidate in candidates:
+        for hf_path, hf_content in harness_files.items():
+            if hf_content and (hf_path == candidate or hf_path.endswith(os.path.basename(candidate))):
+                dockerfile_content = _apply_substitutions(hf_content, config)
+                break
+        if dockerfile_content:
+            break
+
+    if not dockerfile_content:
+        return "", []
+
+    # Parse Dockerfile: extract FROM base image and RUN/ADD commands
+    base_image = ""
+    post_commands: List[str] = []
+    for line in dockerfile_content.splitlines():
+        line = line.strip()
+        if not line or line.startswith("#"):
+            continue
+        if line.upper().startswith("FROM "):
+            parts = line.split()
+            base_image = parts[1] if len(parts) > 1 else ""
+            if " AS " in base_image.upper():
+                base_image = base_image.split()[0]
+        elif line.upper().startswith("RUN "):
+            post_commands.append(line[4:].strip())
+        elif line.upper().startswith("ADD ") and "http" in line.lower():
+            # Convert ADD <url> <dest> to wget/curl
+            parts = line.split()
+            if len(parts) >= 3:
+                url, dest = parts[1], parts[2]
+                post_commands.append(f"wget -q -O {dest} {url} || curl -sL -o {dest} {url}")
+
+    return base_image, post_commands
+
+
+def _parse_compose_service(compose_content: str, service_name: str) -> Dict[str, Any]:
+    """
+    Extract image, command, entrypoint, volumes, working_dir, and environment
+    from a docker-compose service definition.  The compose YAML is only used as
+    metadata — Apptainer handles the actual execution.
+    """
+    data = yaml.safe_load(compose_content) or {}
+    service = (data.get("services") or {}).get(service_name, {})
+    return {
+        "image": service.get("image", ""),
+        "command": service.get("command", ""),
+        "entrypoint": service.get("entrypoint"),
+        "volumes": service.get("volumes", []),
+        "working_dir": service.get("working_dir", "/code/rundir"),
+        "environment": service.get("environment", {}),
+    }
+
+
+def _build_binds(workdir: str, compose_volumes: List[str]) -> List[str]:
+    """
+    Build a list of Apptainer bind specs ("src:dst[:opts]") from:
+    1. The standard /code/* workspace mounts
+    2. Non-/code volumes from the docker-compose service definition
+
+    This is the provider-facing form (one string per mount), passed through
+    ``SandboxSpec.provider_options['binds']``.
+    """
+    binds: List[str] = []
+
+    # Standard /code/* mounts
+    for vol in ["docs", "rundir", "rtl", "verif", "src"]:
+        binds.append(f"{workdir}/{vol}:/code/{vol}")
+
+    # Compose-defined volumes (skip /code mounts — handled above)
+    for vol_str in compose_volumes:
+        parts = vol_str.split(":")
+        host_path = parts[0]
+        container_path = parts[1] if len(parts) > 1 else host_path
+        opts = parts[2] if len(parts) > 2 else ""
+
+        if "/code" in container_path:
+            continue
+
+        # Resolve relative paths against workdir
+        if host_path.startswith("./") or host_path.startswith("../") or not os.path.isabs(host_path):
+            host_path = os.path.normpath(os.path.join(workdir, host_path))
+
+        bind_spec = f"{host_path}:{container_path}"
+        if opts:
+            bind_spec += f":{opts}"
+        binds.append(bind_spec)
+
+    return binds
+
+
+def _load_dot_env(workdir: str) -> Dict[str, str]:
+    """
+    Parse the src/.env file (KEY=value lines) from the workspace.
+    Docker Compose auto-loads env_file directives; Apptainer does not,
+    so we read them ourselves and pass them via --env.
+    """
+    env_path = os.path.join(workdir, "src", ".env")
+    env_vars: Dict[str, str] = {}
+    if not os.path.isfile(env_path):
+        return env_vars
+    with open(env_path, encoding="utf-8") as f:
+        for line in f:
+            line = line.strip()
+            if not line or line.startswith("#"):
+                continue
+            if "=" in line:
+                key, _, val = line.partition("=")
+                env_vars[key.strip()] = val.strip()
+    return env_vars
+
+
+def _build_env(environment: Any, dot_env: Optional[Dict[str, str]] = None) -> Dict[str, str]:
+    """Merge workspace src/.env vars with a compose ``environment`` field into a
+    plain {key: value} dict. dot_env is applied first so compose values win."""
+    env: Dict[str, str] = {}
+    if dot_env:
+        env.update(dot_env)
+    if isinstance(environment, dict):
+        for key, val in environment.items():
+            env[str(key)] = str(val)
+    elif isinstance(environment, list):
+        for item in environment:
+            text = str(item)
+            if "=" in text:
+                key, _, val = text.partition("=")
+                env[key] = val
+    return env
+
+
+def _build_runtime_tmp_env(container_tmp_path: str) -> Dict[str, str]:
+    """
+    Force simulator temp and lock files into writable per-rollout container storage.
+    """
+    return {
+        "TMPDIR": container_tmp_path,
+        "TMP": container_tmp_path,
+        "TEMP": container_tmp_path,
+        "TEMPDIR": container_tmp_path,
+        "XCELIUM_TMPDIR": container_tmp_path,
+        "CDS_LOCK": f"{container_tmp_path}/.cdslock",
+        # imc/Java can still hit /tmp unless java.io.tmpdir is forced.
+        "JAVA_TOOL_OPTIONS": f"-Djava.io.tmpdir={container_tmp_path}",
+    }
+
+
+def _build_command(entrypoint: Any, command: Any) -> List[str]:
+    """Build the command list from compose entrypoint + command fields."""
+    cmd_parts: List[str] = []
+
+    if entrypoint:
+        if isinstance(entrypoint, str):
+            cmd_parts = shlex.split(entrypoint)
+        else:
+            cmd_parts = list(entrypoint)
+
+    if command:
+        if isinstance(command, str):
+            cmd_parts += shlex.split(command)
+        else:
+            cmd_parts += list(command)
+
+    return cmd_parts
+
+
+# ----------------------------
+# Stateful executor
+# ----------------------------
+
+
+class HarnessRunner:
+    """Runs a dataset's docker-compose harness inside Apptainer.
+
+    Owns the SIF cache, per-image pull/build locks, and the lazily-constructed
+    sandbox provider. Construct once per server (the apptainer binary is only
+    required when a sandbox is actually started, so this can be built on hosts
+    without apptainer).
+    """
+
+    def __init__(self, config: "CVDPResourcesServerConfig") -> None:
+        self.config = config
+        self._sif_locks: Dict[str, asyncio.Lock] = {}
+        self._sif_lock_guard = asyncio.Lock()
+        # Apptainer sandbox provider — built lazily on first use so this can be
+        # constructed on hosts without apptainer.
+        self._provider: Optional[ApptainerProvider] = None
+        self._provider_lock = asyncio.Lock()
+        cache = config.sif_cache_dir
+        if not cache:
+            cache = os.path.join(Path.home(), ".cache", "nemo-gym", "sif")
+        self._sif_cache_dir = cache
+        os.makedirs(self._sif_cache_dir, exist_ok=True)
+
+    async def run(
+        self,
+        rtl_files: Dict[str, str],
+        harness_files: Dict[str, Optional[str]],
+        task_id: str,
+        context_files: Optional[Dict[str, str]] = None,
+    ) -> Tuple[int, str, List[Dict]]:
+        """
+        Write harness + RTL to a temp workspace and run verification via Apptainer.
+
+        Mirrors repository.py prepare() + obj_harness():
+          Workspace layout:
+            workdir/
+              docker-compose.yml   (parsed for service metadata, not executed directly)
+              src/                 (test scripts and .env from harness_files)
+              rtl/                 (model-generated RTL, bound as /code/rtl)
+              verif/               (empty, bound as /code/verif)
+              docs/                (empty, bound as /code/docs)
+              rundir/              (execution output, bound as /code/rundir)
+        """
+        context_files = context_files or {}
+        tmp_root = self.config.harness_workspace_dir.strip()
+        if tmp_root:
+            os.makedirs(tmp_root, exist_ok=True)
+        with tempfile.TemporaryDirectory(prefix=f"cvdp_{task_id}_", dir=tmp_root or None) as workdir:
+            workdir_path = Path(workdir)
+
+            # Create all mount dirs — mirrors repository.create_folders()
+            for d in ["rtl", "verif", "docs", "src", "rundir"]:
+                (workdir_path / d).mkdir()
+            # Optional per-rollout temp storage; cleaned when TemporaryDirectory exits.
+            if self.config.container_tmp_bind_path:
+                (workdir_path / "rundir" / "tmp").mkdir(parents=True, exist_ok=True)
+
+            # Write harness files — mirrors repository.restore_files()
+            compose_content: Optional[str] = None
+            for filepath, content in harness_files.items():
+                if content is None:
+                    continue
+                content = _apply_substitutions(content, self.config)
+                if filepath.endswith("docker-compose.yml"):
+                    compose_content = content
+                dest = workdir_path / filepath
+                dest.parent.mkdir(parents=True, exist_ok=True)
+                try:
+                    with open(str(dest), "w+", encoding="utf-8") as f:
+                        f.write(content)
+                except Exception:
+                    print(f"Failed to write file: {filepath}")
+
+            if compose_content is None:
+                return 1, "No docker-compose.yml found in harness_files", []
+
+            # Write companion files from input.context — mirrors
+            # repository.restore_files(self.context). Preserves the full
+            # target path (e.g. verif/tb_foo.sv -> workdir/verif/tb_foo.sv).
+            for filepath, code in context_files.items():
+                dest = workdir_path / filepath
+                dest.parent.mkdir(parents=True, exist_ok=True)
+                try:
+                    with open(str(dest), "w+", encoding="utf-8") as f:
+                        f.write(code)
+                except Exception:
+                    print(f"Failed to write context file: {filepath}")
+
+            # Write model-generated files (overwrites context files for target slots).
+            # Preserves the full target path, matching CVDP's restore_files().
+            for filepath, code in rtl_files.items():
+                dest = workdir_path / filepath
+                dest.parent.mkdir(parents=True, exist_ok=True)
+                try:
+                    with open(str(dest), "w+", encoding="utf-8") as f:
+                        f.write(code)
+                except Exception:
+                    print(f"Failed to write file: {filepath}")
+
+            # Run each service — mirrors repository.obj_harness()
+            compose_data = yaml.safe_load(compose_content)
+            services = list((compose_data.get("services") or {}).keys())
+
+            service_results: List[Dict] = []
+            for service in services:
+                exit_code, output = await self._run_service(workdir, service, compose_content, harness_files)
+                service_results.append({"service": service, "exit_code": exit_code, "stderr": output})
+
+            final_exit_code = 0 if all(r["exit_code"] == 0 for r in service_results) else 1
+            combined_stderr = "\n".join(f"[{r['service']}] {r['stderr']}" for r in service_results if r["stderr"])
+            return final_exit_code, combined_stderr, service_results
+
+    async def _get_provider(self) -> ApptainerProvider:
+        """Build (once) and return the Apptainer sandbox provider.
+
+        The provider is configured for one-shot harness runs:
+        - ``--writable-tmpfs`` on the instance so EDA tools can write to the
+          container rootfs (matches the old ``apptainer exec --writable-tmpfs``).
+        - readiness probe disabled — we exec the real command immediately and
+          surface its failure directly, so an extra probe round-trip is wasted.
+        - timeouts pinned to ``container_timeout``; concurrency comfortably above
+          the outer ``num_processes`` gate so the provider never becomes the
+          bottleneck.
+        """
+        if self._provider is None:
+            async with self._provider_lock:
+                if self._provider is None:
+                    self._provider = ApptainerProvider(
+                        create=ApptainerCreateConfig(
+                            start_timeout_s=self.config.container_timeout,
+                            extra_start_args=["--writable-tmpfs"],
+                        ),
+                        exec=ApptainerExecConfig(
+                            default_timeout_s=self.config.container_timeout,
+                            concurrency=max(32, self.config.num_processes * 4),
+                        ),
+                        probe=ApptainerProbeConfig(command=None),
+                    )
+        return self._provider
+
+    async def _run_service(
+        self,
+        workdir: str,
+        service: str,
+        compose_content: str,
+        harness_files: Optional[Dict[str, Optional[str]]] = None,
+    ) -> Tuple[int, str]:
+        """
+        Run a single compose service via the Apptainer sandbox provider — mirrors
+        repository.log_docker().
+
+        The Docker image is pulled/built into a cached SIF first (the provider
+        does not pull or build), then the provider starts an instance with the
+        workspace ``/code/*`` mounts (and any compose-defined volumes) bound in,
+        execs the service command, and tears the instance down. Apptainer uses
+        host networking by default, so no network setup is needed.
+        """
+        path = os.path.abspath(workdir)
+        svc = _parse_compose_service(compose_content, service)
+
+        # Resolve image — handles both image: and build: services.
+        # Docker Compose builds from Dockerfiles automatically; for Apptainer
+        # we parse the Dockerfile and build a SIF with the equivalent commands.
+        image = svc["image"]
+        post_commands: List[str] = []
+        if not image and harness_files:
+            compose_data = yaml.safe_load(compose_content)
+            image, post_commands = _resolve_image_for_service(compose_data, service, harness_files, self.config)
+        if not image:
+            return 1, f"No image defined for service '{service}'"
+
+        try:
+            if post_commands:
+                sif_path = await self._ensure_built_sif(image, post_commands)
+            else:
+                sif_path = await self._ensure_sif(image)
+        except RuntimeError as exc:
+            return 1, str(exc)
+
+        # Per-service bind mounts and environment.
+        binds = _build_binds(path, svc["volumes"])
+        env = _build_env(svc["environment"], _load_dot_env(path))
+        if self.config.container_tmp_bind_path:
+            binds.append(f"{path}/rundir/tmp:{self.config.container_tmp_bind_path}")
+            env.update(_build_runtime_tmp_env(self.config.container_tmp_bind_path))
+
+        # Fix working_dir paths that don't exist under Apptainer's bind mounts.
+        # Some compose files use /src/rundir/ which exists in Docker (via volume
+        # mount) but not in Apptainer (which only binds to /code/*).
+        working_dir = svc["working_dir"] or "/code/rundir"
+        if "/code/" not in working_dir:
+            working_dir = "/code/rundir"
+
+        cmd_parts = _build_command(svc["entrypoint"], svc["command"])
+        # No explicit command -> run the image's default runscript (equivalent to
+        # the old ``apptainer run``). HOME is exported in-shell to mirror the old
+        # ``--home /code/rundir`` (apptainer refuses HOME via --env).
+        inner = shlex.join(cmd_parts) if cmd_parts else "/.singularity.d/runscript"
+        command = f"export HOME=/code/rundir; exec {inner}"
+
+        provider = await self._get_provider()
+        spec = SandboxSpec(image=sif_path, provider_options={"binds": binds})
+
+        try:
+            handle = await provider.create(spec)
+        except SandboxCreateError as exc:
+            return 1, f"apptainer instance start failed for service '{service}': {exc}"
+
+        try:
+            result = await provider.exec(
+                handle,
+                command,
+                cwd=working_dir,
+                env=env,
+                timeout_s=self.config.container_timeout,
+            )
+        finally:
+            with contextlib.suppress(Exception):
+                await provider.close(handle)
+
+        if result.error_type == "timeout":
+            return -1, f"apptainer exec timed out after {self.config.container_timeout}s"
+
+        # Mirror the old (stderr + stdout) ordering for combined diagnostics.
+        combined = (result.stderr or "") + (result.stdout or "")
+        return result.return_code, combined
+
+    async def _ensure_built_sif(self, base_image: str, post_commands: List[str]) -> str:
+        """
+        Build a SIF that extends a base image with extra commands from a Dockerfile.
+
+        This replicates what ``docker compose build`` does: take a base image,
+        run additional commands (pip install, etc.), and produce a new image.
+        For Apptainer we generate a definition file and run ``apptainer build``.
+        Results are cached by a hash of the commands.
+        """
+        if not post_commands:
+            return await self._ensure_sif(base_image)
+
+        cmd_hash = hashlib.md5("\n".join(post_commands).encode()).hexdigest()[:12]
+        safe_name = base_image.replace("/", "_").replace(":", "_") + f"__built_{cmd_hash}.sif"
+        sif_path = os.path.join(self._sif_cache_dir, safe_name)
+
+        if os.path.exists(sif_path):
+            return sif_path
+
+        # Reuse the per-image locking pattern
+        async with self._sif_lock_guard:
+            if safe_name not in self._sif_locks:
+                self._sif_locks[safe_name] = asyncio.Lock()
+            lock = self._sif_locks[safe_name]
+
+        async with lock:
+            if os.path.exists(sif_path):
+                return sif_path
+
+            base_sif = await self._ensure_sif(base_image)
+
+            post_section = "\n    ".join(post_commands)
+            def_content = f"Bootstrap: localimage\nFrom: {base_sif}\n\n%post\n    {post_section}\n"
+            tmp_def = sif_path + ".def"
+            tmp_sif = sif_path + ".building"
+            with open(tmp_def, "w") as f:
+                f.write(def_content)
+
+            proc = await asyncio.create_subprocess_exec(
+                "apptainer",
+                "build",
+                "--force",
+                tmp_sif,
+                tmp_def,
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE,
+            )
+            _, stderr = await proc.communicate()
+            os.unlink(tmp_def)
+            if proc.returncode != 0:
+                if os.path.exists(tmp_sif):
+                    os.unlink(tmp_sif)
+                raise RuntimeError(f"apptainer build failed: {stderr.decode(errors='replace')}")
+            os.rename(tmp_sif, sif_path)
+            return sif_path
+
+    async def _ensure_sif(self, image: str) -> str:
+        """
+        Return the path to a cached SIF file for the given Docker image,
+        pulling it from the registry if not already cached.
+        Mirrors the cleanup() trap in repository.log_docker()'s generated shell script.
+        """
+        safe_name = image.replace("/", "_").replace(":", "_") + ".sif"
+        sif_path = os.path.join(self._sif_cache_dir, safe_name)
+
+        if os.path.exists(sif_path):
+            return sif_path
+
+        # Per-image lock to avoid concurrent pulls of the same image
+        async with self._sif_lock_guard:
+            if image not in self._sif_locks:
+                self._sif_locks[image] = asyncio.Lock()
+            lock = self._sif_locks[image]
+
+        async with lock:
+            # Double-check after acquiring lock
+            if os.path.exists(sif_path):
+                return sif_path
+
+            tmp_path = sif_path + ".pulling"
+            proc = await asyncio.create_subprocess_exec(
+                "apptainer",
+                "pull",
+                "--force",
+                tmp_path,
+                f"docker://{image}",
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE,
+            )
+            _, stderr = await proc.communicate()
+            if proc.returncode != 0:
+                if os.path.exists(tmp_path):
+                    os.unlink(tmp_path)
+                raise RuntimeError(
+                    f"apptainer pull failed for {image} (exit {proc.returncode}): {stderr.decode(errors='replace')}"
+                )
+            os.rename(tmp_path, sif_path)
+            return sif_path
diff --git a/resources_servers/cvdp/tests/test_app.py b/resources_servers/cvdp/tests/test_app.py
index 03683e08d5..c44819aa66 100644
--- a/resources_servers/cvdp/tests/test_app.py
+++ b/resources_servers/cvdp/tests/test_app.py
@@ -19,20 +19,23 @@
 
 import pytest
 
+from nemo_gym.sandbox.providers.base import SandboxCreateError, SandboxExecResult
 from nemo_gym.server_utils import ServerClient
 from resources_servers.cvdp.app import (
     CVDPResourcesServer,
     CVDPResourcesServerConfig,
+    _parse_model_response,
+)
+from resources_servers.cvdp.cvdp_lib.subjective import calculate_BLEU, calculate_ROUGE
+from resources_servers.cvdp.harness import (
     _apply_substitutions,
-    _build_bind_args,
+    _build_binds,
     _build_command,
-    _build_env_args,
-    _build_runtime_tmp_env_args,
+    _build_env,
+    _build_runtime_tmp_env,
     _load_dot_env,
     _parse_compose_service,
-    _parse_model_response,
 )
-from resources_servers.cvdp.cvdp_lib.subjective import calculate_BLEU, calculate_ROUGE
 
 
 # ---------------------------------------------------------------------------
@@ -219,85 +222,10 @@ def test_missing_service_returns_defaults(self):
 
 
 # ---------------------------------------------------------------------------
-# Unit tests: _build_bind_args
+# Unit tests: _load_dot_env
 # ---------------------------------------------------------------------------
 
 
-class TestBuildBindArgs:
-    def test_includes_code_mounts(self):
-        args = _build_bind_args("/tmp/work", [])
-        assert "--bind" in args
-        assert "/tmp/work/rtl:/code/rtl" in args
-        assert "/tmp/work/rundir:/code/rundir" in args
-        assert "/tmp/work/src:/code/src" in args
-
-    def test_includes_compose_volumes(self):
-        args = _build_bind_args("/tmp/work", ["./src/:/src/:ro"])
-        # Compose volume should be resolved relative to workdir
-        assert any("/src/:ro" in a for a in args)
-
-    def test_skips_code_volumes_from_compose(self):
-        args = _build_bind_args("/tmp/work", ["./rtl:/code/rtl:ro"])
-        # The /code/rtl from compose should be skipped (we mount it ourselves)
-        code_rtl_compose = [a for a in args if a == "./rtl:/code/rtl:ro"]
-        assert len(code_rtl_compose) == 0
-
-
-# ---------------------------------------------------------------------------
-# Unit tests: _build_env_args
-# ---------------------------------------------------------------------------
-
-
-class TestBuildEnvArgs:
-    def test_empty_environment_returns_empty(self):
-        args = _build_env_args({})
-        assert args == []
-
-    def test_dict_environment(self):
-        args = _build_env_args({"SIM": "icarus", "TOPLEVEL": "foo"})
-        assert "SIM=icarus" in args
-        assert "TOPLEVEL=foo" in args
-
-    def test_list_environment(self):
-        args = _build_env_args(["SIM=icarus", "TOPLEVEL=foo"])
-        assert "SIM=icarus" in args
-        assert "TOPLEVEL=foo" in args
-
-    def test_dot_env_vars_included(self):
-        dot_env = {"VERILOG_SOURCES": "/code/rtl/foo.sv", "SIM": "icarus"}
-        args = _build_env_args({}, dot_env)
-        assert "VERILOG_SOURCES=/code/rtl/foo.sv" in args
-        assert "SIM=icarus" in args
-
-    def test_compose_env_overrides_dot_env(self):
-        dot_env = {"SIM": "icarus"}
-        args = _build_env_args({"SIM": "verilator"}, dot_env)
-        # dot_env SIM comes first, then compose SIM overrides
-        sim_values = [a for a in args if a.startswith("SIM=")]
-        assert sim_values[-1] == "SIM=verilator"
-
-
-class TestBuildRuntimeTmpEnvArgs:
-    def test_emits_expected_env_flags(self):
-        args = _build_runtime_tmp_env_args("/tmp")
-        # Pairs of "--env" "KEY=value" — assert each expected pair appears.
-        flags = [args[i + 1] for i in range(0, len(args), 2) if args[i] == "--env"]
-        assert "TMPDIR=/tmp" in flags
-        assert "TMP=/tmp" in flags
-        assert "TEMP=/tmp" in flags
-        assert "TEMPDIR=/tmp" in flags
-        assert "XCELIUM_TMPDIR=/tmp" in flags
-        assert "CDS_LOCK=/tmp/.cdslock" in flags
-        assert "JAVA_TOOL_OPTIONS=-Djava.io.tmpdir=/tmp" in flags
-
-    def test_uses_custom_path(self):
-        args = _build_runtime_tmp_env_args("/scratch/run/tmp")
-        flags = [args[i + 1] for i in range(0, len(args), 2) if args[i] == "--env"]
-        assert "TMPDIR=/scratch/run/tmp" in flags
-        assert "CDS_LOCK=/scratch/run/tmp/.cdslock" in flags
-        assert "JAVA_TOOL_OPTIONS=-Djava.io.tmpdir=/scratch/run/tmp" in flags
-
-
 class TestLoadDotEnv:
     def test_loads_env_file(self, tmp_path):
         src = tmp_path / "src"
@@ -363,8 +291,8 @@ async def test_verify_empty_output_returns_zero_reward(self):
     async def test_verify_plain_text_goes_to_harness(self):
         body_dict = _make_body(output_text="I am unable to generate this design.")
         with patch.object(
-            self.server,
-            "_run_harness",
+            self.server._harness,
+            "run",
             new_callable=AsyncMock,
             return_value=(1, "FAILED", []),
         ):
@@ -377,8 +305,8 @@ async def test_verify_plain_text_goes_to_harness(self):
     async def test_verify_harness_pass_returns_one_reward(self):
         body_dict = _make_body(output_text=f"```systemverilog\n{SAMPLE_RTL}\n```")
         with patch.object(
-            self.server,
-            "_run_harness",
+            self.server._harness,
+            "run",
             new_callable=AsyncMock,
             return_value=(0, "", []),
         ):
@@ -391,8 +319,8 @@ async def test_verify_harness_pass_returns_one_reward(self):
     async def test_verify_harness_fail_returns_zero_reward(self):
         body_dict = _make_body(output_text=f"```systemverilog\n{SAMPLE_RTL}\n```")
         with patch.object(
-            self.server,
-            "_run_harness",
+            self.server._harness,
+            "run",
             new_callable=AsyncMock,
             return_value=(1, "FAILED: assertion error", [{"service": "direct", "exit_code": 1, "stderr": "FAILED"}]),
         ):
@@ -404,8 +332,8 @@ async def test_verify_harness_fail_returns_zero_reward(self):
     async def test_verify_harness_timeout_returns_zero_reward(self):
         body_dict = _make_body(output_text=f"```systemverilog\n{SAMPLE_RTL}\n```")
         with patch.object(
-            self.server,
-            "_run_harness",
+            self.server._harness,
+            "run",
             new_callable=AsyncMock,
             return_value=(-1, "apptainer exec timed out after 30s", []),
         ):
@@ -436,8 +364,8 @@ async def test_verify_multi_file_json_response(self):
             target_files=["rtl/a.sv", "rtl/b.sv"],
         )
         with patch.object(
-            self.server,
-            "_run_harness",
+            self.server._harness,
+            "run",
             new_callable=AsyncMock,
             return_value=(0, "", []),
         ):
@@ -590,8 +518,8 @@ async def test_code_gen_category_does_not_use_subjective(self):
         body_dict = _make_body(output_text=f"```systemverilog\n{SAMPLE_RTL}\n```")
         body_dict["verifier_metadata"]["categories"] = ["cid003", "medium"]
         with patch.object(
-            self.server,
-            "_run_harness",
+            self.server._harness,
+            "run",
             new_callable=AsyncMock,
             return_value=(0, "", []),
         ):
@@ -619,7 +547,7 @@ def setup_method(self):
 
     @pytest.mark.asyncio
     async def test_missing_compose_returns_error_exit_code(self):
-        exit_code, stderr, services = await self.server._run_harness(
+        exit_code, stderr, services = await self.server._harness.run(
             rtl_files={"rtl/foo.sv": SAMPLE_RTL},
             harness_files={},  # no compose file
             task_id="test",
@@ -637,3 +565,203 @@ def _make_request(body_dict: dict):
     from resources_servers.cvdp.app import CVDPVerifyRequest
 
     return CVDPVerifyRequest.model_validate(body_dict)
+
+
+# ---------------------------------------------------------------------------
+# Unit tests: agentic rtl_files path (grade files written on disk)
+# ---------------------------------------------------------------------------
+
+
+class TestVerifyConsumesRtlFiles:
+    def setup_method(self):
+        self.server = _make_server()
+
+    @pytest.mark.asyncio
+    async def test_rtl_files_take_precedence_over_text_parse(self):
+        # Model chat text says one thing; the agent reports different files on
+        # disk. The on-disk files must win.
+        body_dict = _make_body(output_text="```systemverilog\nmodule from_text;\nendmodule\n```")
+        body_dict["rtl_files"] = {"rtl/foo.sv": "module from_disk;\nendmodule"}
+        captured = {}
+
+        async def fake_harness(*, rtl_files, harness_files, task_id, context_files=None):
+            captured["rtl_files"] = rtl_files
+            return (0, "", [])
+
+        with patch.object(self.server._harness, "run", side_effect=fake_harness):
+            result = await self.server.verify(_make_request(body_dict))
+
+        assert result.reward == 1.0
+        assert result.extracted_rtl == {"rtl/foo.sv": "module from_disk;\nendmodule"}
+        assert captured["rtl_files"] == {"rtl/foo.sv": "module from_disk;\nendmodule"}
+
+    @pytest.mark.asyncio
+    async def test_falls_back_to_text_parse_without_rtl_files(self):
+        body_dict = _make_body(output_text=f"```systemverilog\n{SAMPLE_RTL}\n```")
+        # No rtl_files key -> text parsing path.
+        with patch.object(self.server._harness, "run", new_callable=AsyncMock, return_value=(0, "", [])):
+            result = await self.server.verify(_make_request(body_dict))
+        assert result.reward == 1.0
+        assert "module foo" in result.extracted_rtl["rtl/foo.sv"]
+
+
+# ---------------------------------------------------------------------------
+# Unit tests: provider-facing bind/env helpers
+# ---------------------------------------------------------------------------
+
+
+class TestBuildBinds:
+    def test_includes_code_mounts(self):
+        binds = _build_binds("/tmp/work", [])
+        assert "/tmp/work/rtl:/code/rtl" in binds
+        assert "/tmp/work/rundir:/code/rundir" in binds
+        assert "/tmp/work/src:/code/src" in binds
+
+    def test_includes_compose_volumes_resolved(self):
+        binds = _build_binds("/tmp/work", ["./extra:/data:ro"])
+        assert "/tmp/work/extra:/data:ro" in binds
+
+    def test_skips_code_volumes_from_compose(self):
+        binds = _build_binds("/tmp/work", ["./rtl:/code/rtl:ro"])
+        assert all("/code/rtl:ro" not in b or b == "/tmp/work/rtl:/code/rtl" for b in binds)
+
+
+class TestBuildEnv:
+    def test_dict_environment(self):
+        env = _build_env({"SIM": "icarus"})
+        assert env["SIM"] == "icarus"
+
+    def test_list_environment(self):
+        env = _build_env(["SIM=icarus", "TOPLEVEL=foo"])
+        assert env == {"SIM": "icarus", "TOPLEVEL": "foo"}
+
+    def test_compose_overrides_dot_env(self):
+        env = _build_env({"SIM": "verilator"}, {"SIM": "icarus", "X": "1"})
+        assert env["SIM"] == "verilator"
+        assert env["X"] == "1"
+
+
+class TestBuildRuntimeTmpEnv:
+    def test_keys(self):
+        env = _build_runtime_tmp_env("/scratch/tmp")
+        assert env["TMPDIR"] == "/scratch/tmp"
+        assert env["CDS_LOCK"] == "/scratch/tmp/.cdslock"
+        assert env["JAVA_TOOL_OPTIONS"] == "-Djava.io.tmpdir=/scratch/tmp"
+
+
+# ---------------------------------------------------------------------------
+# Unit tests: _run_service runs through the Apptainer provider
+# ---------------------------------------------------------------------------
+
+
+class _FakeHandle:
+    sandbox_id = "inst-fake"
+
+
+class _FakeProvider:
+    def __init__(self, exec_result, create_error=None):
+        self._exec_result = exec_result
+        self._create_error = create_error
+        self.created = []
+        self.execs = []
+        self.closed = []
+
+    async def create(self, spec):
+        if self._create_error is not None:
+            raise self._create_error
+        self.created.append(spec)
+        return _FakeHandle()
+
+    async def exec(self, handle, command, *, cwd=None, env=None, timeout_s=None):
+        self.execs.append({"command": command, "cwd": cwd, "env": env, "timeout_s": timeout_s})
+        return self._exec_result
+
+    async def close(self, handle):
+        self.closed.append(handle)
+
+
+_COMPOSE_WITH_CMD = """
+services:
+  direct:
+    image: ghcr.io/hdl/sim/osvb
+    volumes:
+      - ./extra:/data:ro
+    working_dir: /code/rundir
+    command: /bin/sh -c "echo hi"
+"""
+
+_COMPOSE_NO_CMD = """
+services:
+  direct:
+    image: ghcr.io/hdl/sim/osvb
+    working_dir: /code/rundir
+"""
+
+
+class TestRunServiceProvider:
+    def setup_method(self):
+        self.server = _make_server()
+
+    async def _run(self, tmp_path, fake, compose):
+        with (
+            patch.object(self.server._harness, "_ensure_sif", new_callable=AsyncMock, return_value="/cache/img.sif"),
+            patch.object(self.server._harness, "_get_provider", new_callable=AsyncMock, return_value=fake),
+        ):
+            return await self.server._harness._run_service(str(tmp_path), "direct", compose)
+
+    @pytest.mark.asyncio
+    async def test_create_exec_close_and_payload(self, tmp_path):
+        fake = _FakeProvider(SandboxExecResult(stdout="hi\n", stderr="", return_code=0))
+        exit_code, output = await self._run(tmp_path, fake, _COMPOSE_WITH_CMD)
+
+        assert exit_code == 0
+        assert output == "hi\n"
+        # create got the cached SIF and the workspace + compose binds.
+        spec = fake.created[0]
+        assert spec.image == "/cache/img.sif"
+        binds = spec.provider_options["binds"]
+        assert any(b.endswith(":/code/rtl") for b in binds)
+        assert any(b.endswith(":/data:ro") for b in binds)
+        # exec wraps the command with HOME export, sets cwd + timeout.
+        call = fake.execs[0]
+        assert "export HOME=/code/rundir" in call["command"]
+        assert "echo hi" in call["command"]
+        assert call["cwd"] == "/code/rundir"
+        assert call["timeout_s"] == 30
+        # instance is always torn down.
+        assert len(fake.closed) == 1
+
+    @pytest.mark.asyncio
+    async def test_no_command_uses_runscript(self, tmp_path):
+        fake = _FakeProvider(SandboxExecResult(stdout="", stderr="", return_code=0))
+        await self._run(tmp_path, fake, _COMPOSE_NO_CMD)
+        assert "/.singularity.d/runscript" in fake.execs[0]["command"]
+
+    @pytest.mark.asyncio
+    async def test_timeout_maps_to_negative_one(self, tmp_path):
+        fake = _FakeProvider(SandboxExecResult(stdout=None, stderr="timed out", return_code=125, error_type="timeout"))
+        exit_code, output = await self._run(tmp_path, fake, _COMPOSE_WITH_CMD)
+        assert exit_code == -1
+        assert "timed out after 30s" in output
+        assert len(fake.closed) == 1
+
+    @pytest.mark.asyncio
+    async def test_create_failure_returns_error(self, tmp_path):
+        fake = _FakeProvider(
+            SandboxExecResult(stdout="", stderr="", return_code=0),
+            create_error=SandboxCreateError("boom"),
+        )
+        exit_code, output = await self._run(tmp_path, fake, _COMPOSE_WITH_CMD)
+        assert exit_code == 1
+        assert "instance start failed" in output
+        assert fake.execs == []
+        assert fake.closed == []
+
+    @pytest.mark.asyncio
+    async def test_tmp_bind_path_added(self, tmp_path):
+        self.server.config.container_tmp_bind_path = "/container/tmp"
+        fake = _FakeProvider(SandboxExecResult(stdout="", stderr="", return_code=0))
+        await self._run(tmp_path, fake, _COMPOSE_WITH_CMD)
+        spec = fake.created[0]
+        assert any(b.endswith(":/container/tmp") for b in spec.provider_options["binds"])
+        assert fake.execs[0]["env"]["TMPDIR"] == "/container/tmp"
diff --git a/responses_api_agents/cvdp_agent/agentic_app.py b/responses_api_agents/cvdp_agent/agentic_app.py
new file mode 100644
index 0000000000..662cd4ed53
--- /dev/null
+++ b/responses_api_agents/cvdp_agent/agentic_app.py
@@ -0,0 +1,654 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""CVDP agentic agent: runs Claude Code inside an Apptainer sandbox.
+
+This agent reuses the host-only ``claude_code_agent`` for everything except the
+sandboxed execution path. For tasks that declare ``target_files`` (RTL the agent
+must edit on disk), it:
+
+1. asks the :class:`ApptainerProvider` for a fresh sandbox whose host staging dir
+   is bind mounted at ``container_workdir`` (default ``/code``),
+2. seeds that workspace with the task's ``context_files`` only (never the hidden
+   grading harness),
+3. runs ``claude`` inside the container (the host Node+Claude prefix is bind
+   mounted at ``/opt/claude_node``; the prompt is fed via stdin so large CVDP
+   specs do not hit the kernel argv limit),
+4. reads back the HDL files the agent produced and passes them to ``/verify`` as
+   ``rtl_files`` so the resources server grades the actual on-disk artifact.
+
+Tasks without ``target_files`` (e.g. code-comprehension Q&A) fall back to the
+host-only flow inherited from ``ClaudeCodeAgent``.
+"""
+
+import asyncio
+import json
+import logging
+import os
+import shlex
+import shutil
+import subprocess
+import tarfile
+import tempfile
+import urllib.request
+from pathlib import Path
+from time import time
+from typing import Any, Dict, Optional
+from uuid import uuid4
+
+from fastapi import Request
+from pydantic import ConfigDict
+
+from nemo_gym.openai_utils import (
+    NeMoGymResponse,
+    NeMoGymResponseInputTokensDetails,
+    NeMoGymResponseOutputTokensDetails,
+    NeMoGymResponseUsage,
+)
+from nemo_gym.sandbox.providers.apptainer import (
+    ApptainerCreateConfig,
+    ApptainerExecConfig,
+    ApptainerProvider,
+)
+from nemo_gym.sandbox.providers.base import SandboxSpec
+from nemo_gym.server_utils import get_response_json, raise_for_status
+from responses_api_agents.claude_code_agent.app import (
+    ClaudeCodeAgent,
+    ClaudeCodeAgentConfig,
+    ClaudeCodeAgentRunRequest,
+    ClaudeCodeAgentVerifyResponse,
+    _extract_instruction,
+    parse_stream_json,
+)
+
+
+LOG = logging.getLogger(__name__)
+
+# Source files the agent may legitimately create or modify, captured for grading.
+# Scoped so we don't sweep in build artifacts (.out, .vcd, ...) or the on-disk
+# claude config.
+_HDL_EXTENSIONS = (".sv", ".svh", ".v", ".vh")
+_AGENT_SOURCE_DIRS = ("rtl", "verif")
+
+# Self-contained Node.js + Claude Code prefix, bind-mounted into the sandbox at
+# /opt/claude_node (the sim image has no Node/claude of its own). Built once on the
+# host, reused by every rollout. Override via config.claude_node_dir.
+_CLAUDE_PKG = "@anthropic-ai/claude-code"
+_NODE_VERSION = "22.15.0"
+_NODE_DIST_URL = f"https://nodejs.org/dist/v{_NODE_VERSION}/node-v{_NODE_VERSION}-linux-x64.tar.xz"
+_NODE_PREFIX = Path(__file__).parent / ".claude_node"
+
+# Claude's mutable state (config, sessions, tool-results, caches) lives here —
+# deliberately OUTSIDE the task workspace (container_workdir). If it lived under
+# the workdir, the model's first `find <workdir>` would surface Claude's own
+# internal files and it would rabbit-hole into them instead of doing the task.
+# This path is in the container's writable tmpfs (instances start --writable-tmpfs).
+_CONTAINER_STATE_DIR = "/tmp/claude_state"
+
+# Claude's stream-json stdout is tee'd to this dir, which is bind-mounted from a
+# host temp dir. It lives outside container_workdir (same rationale as state dir)
+# so the model never sees it. On a timeout the provider discards the captured
+# pipe output, so this on-disk copy is the only way to recover the partial
+# trajectory of a run that ran over its wall-clock budget.
+_CONTAINER_LOG_DIR = "/tmp/claude_logs"
+_STREAM_LOG_NAME = "stream.jsonl"
+
+
+def _install_node_locally() -> Path:
+    """Download a relocatable Node.js (incl. npm) into ``_NODE_PREFIX``; return its bin dir."""
+    if (_NODE_PREFIX / "bin" / "node").is_file():
+        return _NODE_PREFIX / "bin"
+
+    _NODE_PREFIX.mkdir(parents=True, exist_ok=True)
+    tarball = _NODE_PREFIX / "node.tar.xz"
+    LOG.info("downloading Node.js %s", _NODE_VERSION)
+    urllib.request.urlretrieve(_NODE_DIST_URL, tarball)  # noqa: S310
+    with tarfile.open(tarball, "r:xz") as tf:
+        tf.extractall(_NODE_PREFIX, filter="data")
+
+    nested = next(p for p in _NODE_PREFIX.iterdir() if p.is_dir() and p.name.startswith("node-"))
+    for item in nested.iterdir():
+        item.rename(_NODE_PREFIX / item.name)
+    nested.rmdir()
+    tarball.unlink(missing_ok=True)
+    return _NODE_PREFIX / "bin"
+
+
+def ensure_claude_node_prefix(version: str | None = None) -> Path:
+    """Ensure a self-contained Node.js + Claude Code prefix exists; return its path."""
+    claude_bin = _NODE_PREFIX / "bin" / "claude"
+    if claude_bin.is_file():
+        return _NODE_PREFIX
+
+    bin_dir = _install_node_locally()
+    npm_bin = bin_dir / "npm"
+    pkg = f"{_CLAUDE_PKG}@{version}" if version else f"{_CLAUDE_PKG}@latest"
+    # Install into the same prefix (npm global prefix == node prefix) so claude is
+    # self-contained: bin/claude -> lib/node_modules/@anthropic-ai/claude-code.
+    env = {**os.environ, "PATH": f"{bin_dir}{os.pathsep}{os.environ.get('PATH', '')}"}
+    subprocess.run([str(npm_bin), "install", "-g", "--prefix", str(_NODE_PREFIX), pkg], check=True, env=env)
+
+    if not claude_bin.is_file():
+        raise RuntimeError(f"claude-code install did not produce a self-contained binary at {claude_bin}")
+    LOG.info("self-contained claude-code prefix ready at %s", _NODE_PREFIX)
+    return _NODE_PREFIX
+
+
+def _safe_workspace_path(base: Path, rel: str) -> Optional[Path]:
+    """Resolve a workspace-relative path, rejecting absolute paths, ``..``
+    traversal, and symlink escapes. Returns the absolute path inside ``base``, or
+    None if the path would escape the workspace."""
+    if not rel:
+        return None
+    try:
+        candidate = (base / rel).resolve()
+        base_resolved = base.resolve()
+    except (OSError, ValueError, RuntimeError):
+        return None
+    if candidate == base_resolved or base_resolved in candidate.parents:
+        return candidate
+    return None
+
+
+def _is_harness_path(rel: str) -> bool:
+    """True for paths that belong to the hidden grading harness and must never be
+    seeded into the agent workspace (the test scripts in ``src/`` and the compose
+    file)."""
+    norm = rel.replace("\\", "/").strip("/")
+    return (
+        norm == "src"
+        or norm.startswith("src/")
+        or norm == "docker-compose.yml"
+        or norm.endswith("/docker-compose.yml")
+    )
+
+
+def _summarize_claude_failure(stdout: str) -> str:
+    """Best-effort one-line reason for a non-zero claude exit, pulled from its
+    stream-json stdout. Claude reports API problems as ``api_retry`` events and a
+    terminal ``result`` line (errors go to stdout, not stderr), so we collect the
+    HTTP statuses it retried on plus the final result's error fields."""
+    retry_statuses: list[str] = []
+    result_summary = ""
+    for line in stdout.splitlines():
+        line = line.strip()
+        if not line.startswith("{"):
+            continue
+        try:
+            obj = json.loads(line)
+        except (json.JSONDecodeError, ValueError):
+            continue
+        if obj.get("type") == "system" and obj.get("subtype") == "api_retry":
+            status = obj.get("error_status")
+            err = obj.get("error")
+            retry_statuses.append(f"{status}/{err}" if err else str(status))
+        elif obj.get("type") == "result":
+            parts = []
+            if obj.get("subtype"):
+                parts.append(f"subtype={obj['subtype']}")
+            if obj.get("is_error") is not None:
+                parts.append(f"is_error={obj['is_error']}")
+            if obj.get("api_error_status"):
+                parts.append(f"api_error_status={obj['api_error_status']}")
+            if obj.get("result"):
+                parts.append(f"result={str(obj['result'])[:200]}")
+            result_summary = " ".join(parts)
+    bits = []
+    if retry_statuses:
+        from collections import Counter
+
+        counts = Counter(retry_statuses)
+        bits.append("api_retry=" + ", ".join(f"{k} x{v}" for k, v in counts.items()))
+    if result_summary:
+        bits.append(f"result_line[{result_summary}]")
+    return "; ".join(bits)
+
+
+class CvdpAgentConfig(ClaudeCodeAgentConfig):
+    """Config for the sandboxed CVDP agent (extends the host-only agent's config)."""
+
+    # Context window Claude Code should assume for the active model. Claude only
+    # auto-detects the window for first-party (api.anthropic.com) base URLs; when
+    # routing through anthropic_base_url it falls back to a 200K default, which
+    # triggers premature auto-compaction on large-window models. Setting this
+    # exports CLAUDE_CODE_MAX_CONTEXT_TOKENS (+ DISABLE_AUTO_COMPACT, required for
+    # the override to take effect). None leaves Claude's default behavior.
+    max_context_tokens: Optional[int] = 1_000_000
+
+    # --- Apptainer execution ---
+    # claude runs inside an Apptainer container built from ``sim_image`` (or an
+    # explicit ``sif_path``) so it can self-test with the in-container EDA tools.
+    # The host Node+Claude prefix is bind mounted in (claude itself is not in the
+    # sim image).
+    sim_image: str = "nvidia/cvdp-sim:v1.0.0"
+    sif_path: Optional[str] = None  # explicit .sif; if unset, pull/convert sim_image
+    sif_cache_dir: str = ""  # defaults to ~/.cache/nemo-gym/sif
+    claude_node_dir: str = ""  # host Node+Claude prefix to bind (defaults to a built-in one)
+    container_workdir: str = "/code"  # workspace mount point + cwd + HOME inside the container
+
+
+class CvdpAgent(ClaudeCodeAgent):
+    """Claude Code agent that executes inside an Apptainer sandbox for RTL tasks."""
+
+    config: CvdpAgentConfig
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+
+    def model_post_init(self, __context: Any) -> None:
+        super().model_post_init(__context)
+        # The apptainer provider, host Node prefix, and SIF download are resolved
+        # lazily on the first sandboxed run so the host-only path (and startup on
+        # machines without apptainer) keeps working.
+        self._provider: Optional[ApptainerProvider] = None
+        self._node_bind_dir: Optional[str] = None
+        self._provider_guard = asyncio.Lock()
+        self._sif_locks: dict[str, asyncio.Lock] = {}
+        self._sif_lock_guard = asyncio.Lock()
+        self._sif_cache_dir = self.config.sif_cache_dir or os.path.join(Path.home(), ".cache", "nemo-gym", "sif")
+        os.makedirs(self._sif_cache_dir, exist_ok=True)
+
+    def _resolve_node_dir(self) -> str:
+        """Locate (or build) a self-contained Node + Claude Code prefix to bind in."""
+        if self.config.claude_node_dir:
+            node_dir = self.config.claude_node_dir
+            if not os.path.isdir(os.path.join(node_dir, "bin")):
+                raise RuntimeError(
+                    f"claude_node_dir not found at {node_dir!r}; expected a self-contained "
+                    "Node+Claude prefix (bin/, lib/)."
+                )
+            return node_dir
+        return str(ensure_claude_node_prefix(self.config.claude_code_version))
+
+    async def _ensure_provider(self) -> ApptainerProvider:
+        """Build the apptainer provider once, on first use."""
+        if self._provider is not None:
+            return self._provider
+        async with self._provider_guard:
+            if self._provider is None:
+                node_dir = await asyncio.to_thread(self._resolve_node_dir)
+                self._node_bind_dir = node_dir
+                self._provider = ApptainerProvider(
+                    create=ApptainerCreateConfig(
+                        mount_point=self.config.container_workdir,
+                        extra_start_args=["--writable-tmpfs"],
+                    ),
+                    exec=ApptainerExecConfig(
+                        default_timeout_s=self.config.timeout,
+                        default_binds=[f"{node_dir}:/opt/claude_node:ro"],
+                        concurrency=self.config.concurrency,
+                    ),
+                )
+                LOG.warning("apptainer provider ready: node bind=%s, sif cache=%s", node_dir, self._sif_cache_dir)
+        return self._provider
+
+    async def _ensure_sif(self, image: str) -> str:
+        """Return a cached SIF for the Docker image, pulling it on first use."""
+        safe_name = image.replace("/", "_").replace(":", "_") + ".sif"
+        sif_path = os.path.join(self._sif_cache_dir, safe_name)
+        if os.path.exists(sif_path):
+            return sif_path
+        async with self._sif_lock_guard:
+            lock = self._sif_locks.setdefault(image, asyncio.Lock())
+        async with lock:
+            if os.path.exists(sif_path):
+                return sif_path
+            tmp_path = sif_path + ".pulling"
+            proc = await asyncio.create_subprocess_exec(
+                "apptainer",
+                "pull",
+                "--force",
+                tmp_path,
+                f"docker://{image}",
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE,
+            )
+            _, stderr = await proc.communicate()
+            if proc.returncode != 0:
+                if os.path.exists(tmp_path):
+                    os.unlink(tmp_path)
+                raise RuntimeError(
+                    f"apptainer pull failed for {image} (exit {proc.returncode}): {stderr.decode(errors='replace')}"
+                )
+            os.rename(tmp_path, sif_path)
+            return sif_path
+
+    async def _resolve_sif(self) -> str:
+        if self.config.sif_path:
+            return self.config.sif_path
+        return await self._ensure_sif(self.config.sim_image)
+
+    def _seed_workspace(
+        self,
+        workdir: Path,
+        context_files: Dict[str, str],
+        harness_files: Optional[Dict[str, Any]] = None,
+    ) -> None:
+        """Create the CVDP workspace layout and write context files only (no harness/tests).
+
+        Defends the hidden-test guarantee: any context path that is also a declared
+        harness file, or that looks like harness (``src/**`` or a compose file), is
+        skipped. Paths that escape the workspace (absolute or ``..``) are rejected."""
+        for d in ("rtl", "verif", "docs", "src", "rundir"):
+            (workdir / d).mkdir(parents=True, exist_ok=True)
+        forbidden = set(harness_files or {})
+        for filepath, content in (context_files or {}).items():
+            if content is None:
+                continue
+            if filepath in forbidden or _is_harness_path(filepath):
+                LOG.warning("skipping harness-like context file %s", filepath)
+                continue
+            dest = _safe_workspace_path(workdir, filepath)
+            if dest is None:
+                LOG.warning("skipping unsafe context file path %s", filepath)
+                continue
+            dest.parent.mkdir(parents=True, exist_ok=True)
+            try:
+                dest.write_text(content, encoding="utf-8")
+            except Exception:
+                LOG.warning("failed to seed context file %s", filepath)
+
+    def _collect_produced_files(
+        self, workdir: Path, context_files: Dict[str, str], target_files: list
+    ) -> Dict[str, str]:
+        """Capture every HDL source the agent created or modified.
+
+        Returns {relpath: content} for files under the source dirs (``rtl``,
+        ``verif``) with an HDL extension that are new or differ from what we seeded,
+        plus any declared target files present on disk. Build artifacts and unchanged
+        context files are skipped."""
+        produced: Dict[str, str] = {}
+
+        for src_dir in _AGENT_SOURCE_DIRS:
+            base = workdir / src_dir
+            if not base.is_dir():
+                continue
+            for fpath in base.rglob("*"):
+                if not fpath.is_file() or fpath.suffix.lower() not in _HDL_EXTENSIONS:
+                    continue
+                rel = fpath.relative_to(workdir).as_posix()
+                try:
+                    content = fpath.read_text(encoding="utf-8")
+                except Exception:
+                    LOG.warning("could not read produced file %s", rel)
+                    continue
+                # New file, or modified relative to what we seeded.
+                if context_files.get(rel) != content:
+                    produced[rel] = content
+
+        # Always include declared target files that exist (even if unchanged or
+        # written outside the scanned source dirs).
+        for tf in target_files:
+            if tf in produced:
+                continue
+            fpath = _safe_workspace_path(workdir, tf)
+            if fpath is None:
+                LOG.warning("skipping unsafe target file path %s", tf)
+                continue
+            if fpath.is_file():
+                try:
+                    produced[tf] = fpath.read_text(encoding="utf-8")
+                except Exception:
+                    LOG.warning("could not read produced target file %s", tf)
+
+        return produced
+
+    def _build_claude_args(self, model: str, system_prompt: Optional[str]) -> list[str]:
+        """Build the ``claude`` CLI args (no positional prompt; it is fed via stdin)."""
+        args = [
+            "-p",
+            "--output-format",
+            "stream-json",
+            "--verbose",
+            "--dangerously-skip-permissions",
+        ]
+        if self.config.bare:
+            args.append("--bare")
+        args += ["--max-turns", str(self.config.max_turns), "--model", model]
+        if self.config.mcp_config:
+            args += ["--mcp-config", self.config.mcp_config]
+        if system_prompt:
+            args += ["--append-system-prompt", system_prompt]
+        if self.config.allowed_tools:
+            args += ["--allowedTools", self.config.allowed_tools]
+        if self.config.disallowed_tools:
+            args += ["--disallowedTools", self.config.disallowed_tools]
+        if self.config.thinking:
+            args += ["--thinking", self.config.thinking]
+        if self.config.max_thinking_tokens is not None:
+            args += ["--max-thinking-tokens", str(self.config.max_thinking_tokens)]
+        return args
+
+    def _container_env(self, model: str, base_url: str, api_key: str) -> dict[str, str]:
+        """Environment exported into the container for the claude exec.
+
+        HOME is intentionally not set here; apptainer rejects overriding HOME via
+        ``--env``, so it is exported inside the shell command instead."""
+        env = {
+            "ANTHROPIC_API_KEY": api_key,  # pragma: allowlist secret
+            "ANTHROPIC_MODEL": model,
+            "ANTHROPIC_DEFAULT_HAIKU_MODEL": model,
+            "ANTHROPIC_DEFAULT_SONNET_MODEL": model,
+            "ANTHROPIC_DEFAULT_OPUS_MODEL": model,
+            "CLAUDE_CODE_SUBAGENT_MODEL": model,
+            "IS_SANDBOX": "1",
+            # Keep Claude's config/session tree out of the task workspace.
+            "CLAUDE_CONFIG_DIR": f"{_CONTAINER_STATE_DIR}/.claude_config",
+            "PATH": "/opt/claude_node/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
+        }
+        if self.config.max_context_tokens is not None:
+            env["CLAUDE_CODE_MAX_CONTEXT_TOKENS"] = str(self.config.max_context_tokens)
+            env["DISABLE_AUTO_COMPACT"] = "1"
+        if base_url:
+            env["ANTHROPIC_BASE_URL"] = base_url
+            env["ANTHROPIC_AUTH_TOKEN"] = api_key or "local"
+        return env
+
+    @staticmethod
+    def _read_partial_stream(log_dir: Optional[Path]) -> str:
+        """Read whatever claude tee'd before it was killed (empty string if none)."""
+        if log_dir is None:
+            return ""
+        log_file = log_dir / _STREAM_LOG_NAME
+        try:
+            return log_file.read_text(errors="replace")
+        except OSError:
+            return ""
+
+    async def _run_claude_in_sandbox(
+        self, handle: Any, instruction: str, system_prompt: Optional[str], log_dir: Optional[Path] = None
+    ) -> tuple[str, str]:
+        """Run claude inside the sandbox; return (stdout, model_name).
+
+        When ``log_dir`` (a host dir bind-mounted at ``_CONTAINER_LOG_DIR``) is
+        given, claude's stdout is tee'd to a file there so a run that times out
+        can still surface its partial trajectory.
+        """
+        provider = await self._ensure_provider()
+        base_url = self._resolve_base_url()
+        # Keep full model name for local/custom endpoints; strip provider prefix for real Anthropic API.
+        model = self.config.model if base_url else self.config.model.split("/")[-1]
+
+        wd = self.config.container_workdir
+        # Claude's config dir lives in the container's tmpfs, NOT under the task
+        # workspace, so `find <wd>` only shows task files. Seed settings.json there
+        # at launch (the dir isn't host-visible, so we write it in-shell). The JSON
+        # is small and quote-free, so embedding it in the command is safe.
+        config_dir = f"{_CONTAINER_STATE_DIR}/.claude_config"
+        settings_json = json.dumps(self._build_settings())
+
+        claude_args = self._build_claude_args(model, system_prompt)
+        inner = shlex.join(["/opt/claude_node/bin/claude", *claude_args])
+        # Tee stdout to a host-visible log so a timed-out run (whose pipe output the
+        # provider discards) still leaves its partial trajectory on disk. `exec` is
+        # dropped because the shell must outlive claude to run the tee pipe.
+        container_log = f"{_CONTAINER_LOG_DIR}/{_STREAM_LOG_NAME}"
+        sink = f"| tee {shlex.quote(container_log)}" if log_dir is not None else ""
+        # HOME is set in-shell (apptainer forbids overriding HOME via --env) and points
+        # outside the workspace too, so stray dotfiles don't pollute it. The prompt is
+        # fed via stdin, not argv, since CVDP prompts can exceed the kernel's per-arg
+        # limit (MAX_ARG_STRLEN, ~128KB -> E2BIG).
+        command = (
+            f"export HOME={shlex.quote(_CONTAINER_STATE_DIR)} && "
+            f"mkdir -p {shlex.quote(config_dir)} && "
+            f"printf %s {shlex.quote(settings_json)} > {shlex.quote(config_dir + '/settings.json')} && "
+            f"cd {shlex.quote(wd)} && {inner} {sink}"
+        )
+
+        result = await provider.exec(
+            handle,
+            command,
+            env=self._container_env(model, base_url, self.config.anthropic_api_key),
+            stdin=instruction.encode(),
+            timeout_s=self.config.timeout,
+        )
+
+        if result.error_type == "timeout":
+            partial = self._read_partial_stream(log_dir)
+            if partial:
+                LOG.warning(
+                    "claude-code timed out after %ds; recovered %d bytes of partial trajectory from %s",
+                    self.config.timeout,
+                    len(partial),
+                    log_dir,
+                )
+            else:
+                LOG.warning("claude-code timed out after %ds (no partial trajectory captured)", self.config.timeout)
+            return partial, model
+
+        stdout = result.stdout or ""
+        if result.return_code != 0:
+            reason = _summarize_claude_failure(stdout)
+            LOG.warning(
+                "claude-code exited %s: %s (stderr=%r)",
+                result.return_code,
+                reason or stdout[-800:],
+                (result.stderr or "")[:400],
+            )
+        return stdout, model
+
+    async def run(self, request: Request, body: ClaudeCodeAgentRunRequest) -> ClaudeCodeAgentVerifyResponse:
+        meta = (body.model_extra or {}).get("verifier_metadata") or {}
+        target_files = meta.get("target_files") or []
+        if not target_files:
+            # No on-disk target: use the inherited host-only seed/respond/verify flow.
+            return await super().run(request, body)
+        async with self.sem:
+            return await self._run_sandboxed(request, body, meta, target_files)
+
+    async def _run_sandboxed(
+        self,
+        request: Request,
+        body: ClaudeCodeAgentRunRequest,
+        meta: dict,
+        target_files: list,
+    ) -> ClaudeCodeAgentVerifyResponse:
+        """Seed a workspace with context files only, run claude in the container so
+        it can read the spec / companion RTL and self-test with the in-container EDA
+        tools, then read the produced files back and grade them via ``/verify``."""
+        cookies = request.cookies
+        context_files = meta.get("context_files") or {}
+
+        inp = body.responses_create_params.input
+        if isinstance(inp, str):
+            user_message, input_system = inp, None
+        else:
+            user_message, input_system = _extract_instruction(inp)
+        system_parts = [p for p in [self.config.system_prompt, input_system] if p]
+        system_prompt = "\n\n".join(system_parts) if system_parts else None
+
+        provider = await self._ensure_provider()
+        sif = await self._resolve_sif()
+        # Host dir for claude's tee'd stream, bind-mounted into the container outside
+        # the workspace. Kept on timeout (so the partial trajectory is inspectable),
+        # removed otherwise since the full stdout already lands in the rollout.
+        log_dir = Path(tempfile.mkdtemp(prefix="cvdp_traj_"))
+        handle = await provider.create(
+            SandboxSpec(image=sif, provider_options={"binds": [f"{log_dir}:{_CONTAINER_LOG_DIR}"]})
+        )
+        workdir = handle.raw.staging_dir
+        try:
+            self._seed_workspace(workdir, context_files, meta.get("harness_files"))
+
+            stdout, model_name = await self._run_claude_in_sandbox(
+                handle, user_message, system_prompt, log_dir=log_dir
+            )
+            output_items, usage = parse_stream_json(stdout)
+            # Prefer Claude Code's authoritative num_turns (what --max-turns bounds);
+            # fall back to counting assistant text messages if the result event is absent
+            # (e.g. older CLI or a truncated stream). The message count undercounts because
+            # tool-only turns produce no text message.
+            turns = usage.get("num_turns")
+            if turns is None:
+                turns = sum(
+                    1
+                    for it in output_items
+                    if getattr(it, "type", None) == "message" and getattr(it, "role", None) == "assistant"
+                )
+
+            rtl_files = self._collect_produced_files(workdir, context_files, target_files)
+            produced_targets = [tf for tf in target_files if (workdir / tf).is_file()]
+            naturally = bool(target_files) and len(produced_targets) == len(target_files)
+            if not rtl_files:
+                LOG.warning(
+                    "claude-code produced no HDL files on disk (targets %s); "
+                    "falling back to text extraction from the response",
+                    target_files,
+                )
+
+            input_tokens = usage.get("input_tokens", 0)
+            output_tokens = usage.get("output_tokens", 0)
+            response = NeMoGymResponse(
+                id=f"resp_{uuid4().hex}",
+                created_at=int(time()),
+                model=model_name,
+                object="response",
+                output=output_items,
+                tool_choice=body.responses_create_params.tool_choice,
+                tools=body.responses_create_params.tools,
+                parallel_tool_calls=body.responses_create_params.parallel_tool_calls,
+                usage=NeMoGymResponseUsage(
+                    input_tokens=input_tokens,
+                    input_tokens_details=NeMoGymResponseInputTokensDetails(cached_tokens=0),
+                    output_tokens=output_tokens,
+                    output_tokens_details=NeMoGymResponseOutputTokensDetails(reasoning_tokens=0),
+                    total_tokens=input_tokens + output_tokens,
+                ),
+            )
+            agent_resp_json = response.model_dump()
+
+            # Only send rtl_files when claude actually wrote files; otherwise omit it
+            # so the resources server falls back to parsing RTL from the response text.
+            verify_payload = body.model_dump() | {"response": agent_resp_json}
+            if rtl_files:
+                verify_payload["rtl_files"] = rtl_files
+
+            verify_resp = await self.server_client.post(
+                server_name=self.config.resources_server.name,
+                url_path="/verify",
+                json=verify_payload,
+                cookies=cookies,
+            )
+            await raise_for_status(verify_resp)
+            verify_json = await get_response_json(verify_resp)
+
+            return ClaudeCodeAgentVerifyResponse.model_validate(
+                verify_json | {"turns_used": turns, "finished_naturally": naturally}
+            )
+        finally:
+            await provider.close(handle)
+            shutil.rmtree(log_dir, ignore_errors=True)
+
+
+if __name__ == "__main__":
+    CvdpAgent.run_webserver()
diff --git a/responses_api_agents/cvdp_agent/configs/cvdp_agent.yaml b/responses_api_agents/cvdp_agent/configs/cvdp_agent.yaml
index 39f66ec9ab..df950005b4 100644
--- a/responses_api_agents/cvdp_agent/configs/cvdp_agent.yaml
+++ b/responses_api_agents/cvdp_agent/configs/cvdp_agent.yaml
@@ -8,3 +8,16 @@ cvdp_agent:
       model_server:
         type: responses_api_models
         name: policy_model
+      datasets:
+      - name: example
+        type: example
+        jsonl_fpath: resources_servers/cvdp/data/example.jsonl
+      - name: validation
+        type: validation
+        jsonl_fpath: resources_servers/cvdp/data/gym_cvdp_1.0.4_nonagentic_code_generation_no_commercial.jsonl
+        gitlab_identifier:
+          dataset_name: cvdp_nonagentic_code_gen_no_commercial
+          version: 0.0.2
+          artifact_fpath: gym_cvdp_1.0.4_nonagentic_code_generation_no_commercial.jsonl
+        license: Apache 2.0
+        num_repeats: 5
diff --git a/responses_api_agents/cvdp_agent/configs/cvdp_agent_agentic.yaml b/responses_api_agents/cvdp_agent/configs/cvdp_agent_agentic.yaml
new file mode 100644
index 0000000000..42429e134b
--- /dev/null
+++ b/responses_api_agents/cvdp_agent/configs/cvdp_agent_agentic.yaml
@@ -0,0 +1,29 @@
+cvdp_agent_agentic:
+  responses_api_agents:
+    cvdp_agent:
+      entrypoint: agentic_app.py
+      resources_server:
+        type: resources_servers
+        name: cvdp
+      concurrency: 4
+      model: ${anthropic_model_name}
+      anthropic_api_key: ${anthropic_api_key}
+      anthropic_base_url: ${anthropic_base_url}
+      max_turns: 30
+      timeout: 900
+
+      # --- Apptainer agentic execution ---
+      # Runs claude inside the EDA sim container so it can edit files on disk and
+      # self-test with the in-container EDA tools. Leave sif_path unset to
+      # pull/convert sim_image to a cached .sif; claude_node_dir defaults to a
+      # built-in self-contained Node+Claude prefix that is bind-mounted in.
+      sim_image: ${cvdp_sim_image}
+      sif_path: null
+      sif_cache_dir: ""
+      claude_node_dir: ""
+      container_workdir: /code
+
+      system_prompt: null
+      allowed_tools: null
+      disallowed_tools: null
+      claude_code_version: null
\ No newline at end of file
diff --git a/responses_api_agents/cvdp_agent/tests/test_agentic_app.py b/responses_api_agents/cvdp_agent/tests/test_agentic_app.py
new file mode 100644
index 0000000000..c962214926
--- /dev/null
+++ b/responses_api_agents/cvdp_agent/tests/test_agentic_app.py
@@ -0,0 +1,504 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import asyncio
+import json
+import shlex
+from pathlib import Path
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import yaml
+
+from nemo_gym.openai_utils import NeMoGymResponseCreateParamsNonStreaming
+from nemo_gym.sandbox.providers.base import SandboxExecResult
+from nemo_gym.server_utils import ServerClient
+from responses_api_agents.claude_code_agent.app import (
+    ClaudeCodeAgentVerifyResponse,
+    ResourcesServerRef,
+)
+from responses_api_agents.cvdp_agent import agentic_app
+from responses_api_agents.cvdp_agent.agentic_app import (
+    CvdpAgent,
+    CvdpAgentConfig,
+    _is_harness_path,
+    _safe_workspace_path,
+    _summarize_claude_failure,
+)
+
+
+def _config(**kwargs) -> CvdpAgentConfig:
+    return CvdpAgentConfig(
+        host="0.0.0.0",
+        port=8080,
+        entrypoint="",
+        name="cvdp_agent",
+        resources_server=ResourcesServerRef(type="resources_servers", name="cvdp"),
+        **kwargs,
+    )
+
+
+def _make_agent(provider=None, **kwargs) -> CvdpAgent:
+    with patch("responses_api_agents.cvdp_agent.agentic_app.CvdpAgent.model_post_init"):
+        agent = CvdpAgent(config=_config(**kwargs), server_client=MagicMock(spec=ServerClient))
+    agent.sem = asyncio.Semaphore(agent.config.concurrency)
+    agent._provider = provider
+    agent._node_bind_dir = "/node" if provider is not None else None
+    agent._provider_guard = asyncio.Lock()
+    agent._sif_locks = {}
+    agent._sif_lock_guard = asyncio.Lock()
+    agent._sif_cache_dir = "/tmp/sif"
+    return agent
+
+
+class FakeHandle:
+    def __init__(self, staging_dir: Path) -> None:
+        self.sandbox_id = "nemo-gym-test"
+        self.provider_name = "apptainer"
+        self.raw = SimpleNamespace(staging_dir=staging_dir, name="nemo-gym-test", mount_point="/code", image="x.sif")
+
+
+class FakeProvider:
+    def __init__(self, staging_dir: Path, exec_result: SandboxExecResult, on_exec=None) -> None:
+        self._staging = staging_dir
+        self._exec_result = exec_result
+        self._on_exec = on_exec
+        self.created = False
+        self.closed = False
+        self.exec_calls: list[dict] = []
+
+    async def create(self, spec):
+        self.created = True
+        self.spec = spec
+        return FakeHandle(self._staging)
+
+    async def exec(self, handle, command, *, env=None, stdin=None, timeout_s=None, **kwargs):
+        self.exec_calls.append({"command": command, "env": env, "stdin": stdin, "timeout_s": timeout_s})
+        if self._on_exec is not None:
+            self._on_exec(handle)
+        return self._exec_result
+
+    async def close(self, handle):
+        self.closed = True
+
+
+def _assistant_line(text: str) -> str:
+    return json.dumps(
+        {
+            "type": "assistant",
+            "message": {
+                "content": [{"type": "text", "text": text}],
+                "usage": {"input_tokens": 7, "output_tokens": 3},
+            },
+        }
+    )
+
+
+def _result_line(num_turns: int) -> str:
+    return json.dumps(
+        {
+            "type": "result",
+            "subtype": "success",
+            "is_error": False,
+            "num_turns": num_turns,
+            "usage": {"input_tokens": 1, "output_tokens": 1},
+        }
+    )
+
+
+class TestConfig:
+    def test_sandbox_defaults(self) -> None:
+        cfg = _config()
+        assert cfg.sim_image == "nvidia/cvdp-sim:v1.0.0"
+        assert cfg.container_workdir == "/code"
+        assert cfg.sif_path is None
+        assert cfg.max_context_tokens == 1_000_000
+
+    def test_inherits_claude_defaults(self) -> None:
+        cfg = _config()
+        assert cfg.bare is True
+        assert cfg.max_turns == 30
+
+
+class TestBuildClaudeArgs:
+    def test_no_positional_prompt_and_bare(self) -> None:
+        agent = _make_agent()
+        args = agent._build_claude_args("m", None)
+        assert "--bare" in args
+        assert "--" not in args  # prompt is fed via stdin, never as a positional
+        assert args[args.index("--model") + 1] == "m"
+        assert args[args.index("--max-turns") + 1] == "30"
+
+    def test_optional_flags(self) -> None:
+        agent = _make_agent(allowed_tools="Bash", thinking="enabled", max_thinking_tokens=64)
+        args = agent._build_claude_args("m", "be terse")
+        assert args[args.index("--append-system-prompt") + 1] == "be terse"
+        assert args[args.index("--allowedTools") + 1] == "Bash"
+        assert args[args.index("--thinking") + 1] == "enabled"
+        assert args[args.index("--max-thinking-tokens") + 1] == "64"
+
+
+class TestContainerEnv:
+    def test_home_not_set_and_paths(self) -> None:
+        agent = _make_agent()
+        env = agent._container_env("m", "", "key")
+        # HOME must NOT be set via env (apptainer rejects it; it's exported in-shell).
+        assert "HOME" not in env
+        assert env["CLAUDE_CONFIG_DIR"] == f"{agentic_app._CONTAINER_STATE_DIR}/.claude_config"
+        assert env["PATH"].startswith("/opt/claude_node/bin:")
+        assert env["IS_SANDBOX"] == "1"
+        assert env["CLAUDE_CODE_MAX_CONTEXT_TOKENS"] == "1000000"
+        assert env["DISABLE_AUTO_COMPACT"] == "1"
+
+    def test_base_url_branch(self) -> None:
+        agent = _make_agent()
+        env = agent._container_env("m", "http://host:9000", "key")
+        assert env["ANTHROPIC_BASE_URL"] == "http://host:9000"
+        assert env["ANTHROPIC_AUTH_TOKEN"] == "key"
+
+    def test_max_context_none_omitted(self) -> None:
+        agent = _make_agent(max_context_tokens=None)
+        env = agent._container_env("m", "", "key")
+        assert "CLAUDE_CODE_MAX_CONTEXT_TOKENS" not in env
+        assert "DISABLE_AUTO_COMPACT" not in env
+
+
+class TestSeedWorkspace:
+    def test_seeds_context_only(self, tmp_path: Path) -> None:
+        agent = _make_agent()
+        agent._seed_workspace(
+            tmp_path,
+            {"rtl/a.sv": "module a;endmodule", "docs/spec.md": "# spec"},
+            harness_files=None,
+        )
+        assert (tmp_path / "rtl" / "a.sv").read_text() == "module a;endmodule"
+        assert (tmp_path / "docs" / "spec.md").read_text() == "# spec"
+        # standard layout dirs created
+        for d in ("rtl", "verif", "docs", "src", "rundir"):
+            assert (tmp_path / d).is_dir()
+
+    def test_skips_harness_and_unsafe(self, tmp_path: Path) -> None:
+        agent = _make_agent()
+        agent._seed_workspace(
+            tmp_path,
+            {
+                "src/test_a.py": "secret",
+                "docker-compose.yml": "secret",
+                "../escape.sv": "secret",
+                "verif/declared.sv": "ok",
+            },
+            harness_files={"verif/declared.sv": "..."},
+        )
+        assert not (tmp_path / "src" / "test_a.py").exists()
+        assert not (tmp_path / "docker-compose.yml").exists()
+        assert not (tmp_path.parent / "escape.sv").exists()
+        # declared as a harness file -> skipped even though it's an HDL path
+        assert not (tmp_path / "verif" / "declared.sv").exists()
+
+
+class TestCollectProducedFiles:
+    def test_collects_new_and_modified(self, tmp_path: Path) -> None:
+        agent = _make_agent()
+        (tmp_path / "rtl").mkdir()
+        (tmp_path / "verif").mkdir()
+        (tmp_path / "rtl" / "new.sv").write_text("module new;endmodule")
+        (tmp_path / "rtl" / "same.sv").write_text("unchanged")
+        (tmp_path / "rtl" / "build.out").write_text("artifact")
+        context = {"rtl/same.sv": "unchanged"}
+        produced = agent._collect_produced_files(tmp_path, context, target_files=[])
+        assert "rtl/new.sv" in produced
+        assert "rtl/same.sv" not in produced  # unchanged context file
+        assert "rtl/build.out" not in produced  # not an HDL extension
+
+    def test_includes_declared_targets(self, tmp_path: Path) -> None:
+        agent = _make_agent()
+        (tmp_path / "rtl").mkdir()
+        (tmp_path / "rtl" / "target.sv").write_text("module t;endmodule")
+        produced = agent._collect_produced_files(tmp_path, {}, target_files=["rtl/target.sv"])
+        assert produced["rtl/target.sv"] == "module t;endmodule"
+
+
+class TestRunClaudeInSandbox:
+    def test_command_exports_home_and_feeds_stdin(self, tmp_path: Path) -> None:
+        result = SandboxExecResult(stdout=_assistant_line("done"), stderr=None, return_code=0)
+        provider = FakeProvider(tmp_path, result)
+        agent = _make_agent(provider=provider)
+        handle = FakeHandle(tmp_path)
+
+        stdout, model = asyncio.run(agent._run_claude_in_sandbox(handle, "the prompt", "be terse"))
+
+        assert model == "claude-sonnet-4-6"
+        assert "done" in stdout
+        call = provider.exec_calls[0]
+        # prompt is fed via stdin, not the command
+        assert call["stdin"] == b"the prompt"
+        # HOME points outside the task workspace so Claude's state doesn't pollute it.
+        assert call["command"].startswith(f"export HOME={shlex.quote(agentic_app._CONTAINER_STATE_DIR)}")
+        assert "cd /code" in call["command"]
+        assert "/opt/claude_node/bin/claude" in call["command"]
+        # settings are seeded in-shell into the config dir (outside the workspace),
+        # not written into the bound workspace on the host.
+        assert f"{agentic_app._CONTAINER_STATE_DIR}/.claude_config" in call["command"]
+        assert "settings.json" in call["command"]
+        assert not (tmp_path / ".claude_config").exists()
+
+    def test_timeout_returns_empty(self, tmp_path: Path) -> None:
+        result = SandboxExecResult(stdout=None, stderr="timed out", return_code=125, error_type="timeout")
+        provider = FakeProvider(tmp_path, result)
+        agent = _make_agent(provider=provider)
+        handle = FakeHandle(tmp_path)
+        stdout, model = asyncio.run(agent._run_claude_in_sandbox(handle, "p", None))
+        assert stdout == ""
+        assert model == "claude-sonnet-4-6"
+
+    def test_command_tees_stream_when_log_dir_given(self, tmp_path: Path) -> None:
+        result = SandboxExecResult(stdout=_assistant_line("done"), stderr=None, return_code=0)
+        provider = FakeProvider(tmp_path, result)
+        agent = _make_agent(provider=provider)
+        handle = FakeHandle(tmp_path)
+        asyncio.run(agent._run_claude_in_sandbox(handle, "p", None, log_dir=tmp_path / "log"))
+        cmd = provider.exec_calls[0]["command"]
+        assert "tee" in cmd
+        assert agentic_app._CONTAINER_LOG_DIR in cmd
+
+    def test_timeout_recovers_partial_trajectory_from_log(self, tmp_path: Path) -> None:
+        # The provider drops pipe output on timeout, but the tee'd file survives,
+        # so the partial trajectory is recovered and returned as stdout.
+        log_dir = tmp_path / "log"
+        log_dir.mkdir()
+        (log_dir / agentic_app._STREAM_LOG_NAME).write_text(_assistant_line("partial work before kill"))
+        result = SandboxExecResult(stdout=None, stderr="timed out", return_code=125, error_type="timeout")
+        provider = FakeProvider(tmp_path, result)
+        agent = _make_agent(provider=provider)
+        handle = FakeHandle(tmp_path)
+        stdout, model = asyncio.run(agent._run_claude_in_sandbox(handle, "p", None, log_dir=log_dir))
+        assert "partial work before kill" in stdout
+
+
+class TestRunSandboxed:
+    def test_full_flow_sends_rtl_files(self, tmp_path: Path) -> None:
+        def writer(handle):
+            (handle.raw.staging_dir / "rtl" / "target.sv").write_text("module target;endmodule")
+
+        result = SandboxExecResult(stdout=_assistant_line("edited the file"), stderr=None, return_code=0)
+        provider = FakeProvider(tmp_path, result, on_exec=writer)
+        agent = _make_agent(provider=provider, sif_path="/cached/x.sif")
+
+        body = MagicMock()
+        body.responses_create_params = NeMoGymResponseCreateParamsNonStreaming(input="solve it")
+        body.model_dump.return_value = {"responses_create_params": {"input": "solve it"}}
+
+        request = SimpleNamespace(cookies={})
+        meta = {"target_files": ["rtl/target.sv"], "context_files": {}}
+
+        captured = {}
+
+        async def fake_post(*, server_name, url_path, json, cookies):
+            captured["url_path"] = url_path
+            captured["json"] = json
+            return SimpleNamespace()
+
+        agent.server_client.post = AsyncMock(side_effect=fake_post)
+
+        with (
+            patch("responses_api_agents.cvdp_agent.agentic_app.raise_for_status", AsyncMock()),
+            patch(
+                "responses_api_agents.cvdp_agent.agentic_app.get_response_json",
+                AsyncMock(return_value={"reward": 1.0}),
+            ),
+            patch.object(ClaudeCodeAgentVerifyResponse, "model_validate", side_effect=lambda d: SimpleNamespace(**d)),
+        ):
+            resp = asyncio.run(agent._run_sandboxed(request, body, meta, ["rtl/target.sv"]))
+
+        assert provider.created and provider.closed
+        assert captured["url_path"] == "/verify"
+        assert "rtl/target.sv" in captured["json"]["rtl_files"]
+        assert resp.reward == 1.0
+        assert resp.turns_used == 1
+        assert resp.finished_naturally is True
+
+    def test_turns_used_prefers_num_turns_from_result_event(self, tmp_path: Path) -> None:
+        # One assistant text message but the result event reports 7 turns: the
+        # authoritative num_turns must win over the message count (which is 1).
+        def writer(handle):
+            (handle.raw.staging_dir / "rtl" / "target.sv").write_text("module target;endmodule")
+
+        stdout = _assistant_line("did several tool-only turns") + "\n" + _result_line(7)
+        result = SandboxExecResult(stdout=stdout, stderr=None, return_code=0)
+        provider = FakeProvider(tmp_path, result, on_exec=writer)
+        agent = _make_agent(provider=provider, sif_path="/cached/x.sif")
+
+        body = MagicMock()
+        body.responses_create_params = NeMoGymResponseCreateParamsNonStreaming(input="solve it")
+        body.model_dump.return_value = {"responses_create_params": {"input": "solve it"}}
+        request = SimpleNamespace(cookies={})
+        meta = {"target_files": ["rtl/target.sv"], "context_files": {}}
+
+        async def fake_post(*, server_name, url_path, json, cookies):
+            return SimpleNamespace()
+
+        agent.server_client.post = AsyncMock(side_effect=fake_post)
+
+        with (
+            patch("responses_api_agents.cvdp_agent.agentic_app.raise_for_status", AsyncMock()),
+            patch(
+                "responses_api_agents.cvdp_agent.agentic_app.get_response_json",
+                AsyncMock(return_value={"reward": 1.0}),
+            ),
+            patch.object(ClaudeCodeAgentVerifyResponse, "model_validate", side_effect=lambda d: SimpleNamespace(**d)),
+        ):
+            resp = asyncio.run(agent._run_sandboxed(request, body, meta, ["rtl/target.sv"]))
+
+        assert resp.turns_used == 7
+
+    def test_binds_log_dir_and_cleans_up(self, tmp_path: Path) -> None:
+        def writer(handle):
+            (handle.raw.staging_dir / "rtl" / "target.sv").write_text("module target;endmodule")
+
+        result = SandboxExecResult(stdout=_assistant_line("ok"), stderr=None, return_code=0)
+        provider = FakeProvider(tmp_path, result, on_exec=writer)
+        agent = _make_agent(provider=provider, sif_path="/cached/x.sif")
+
+        body = MagicMock()
+        body.responses_create_params = NeMoGymResponseCreateParamsNonStreaming(input="solve it")
+        body.model_dump.return_value = {"responses_create_params": {"input": "solve it"}}
+        request = SimpleNamespace(cookies={})
+        meta = {"target_files": ["rtl/target.sv"], "context_files": {}}
+
+        async def fake_post(*, server_name, url_path, json, cookies):
+            return SimpleNamespace()
+
+        agent.server_client.post = AsyncMock(side_effect=fake_post)
+
+        with (
+            patch("responses_api_agents.cvdp_agent.agentic_app.raise_for_status", AsyncMock()),
+            patch(
+                "responses_api_agents.cvdp_agent.agentic_app.get_response_json",
+                AsyncMock(return_value={"reward": 1.0}),
+            ),
+            patch.object(ClaudeCodeAgentVerifyResponse, "model_validate", side_effect=lambda d: SimpleNamespace(**d)),
+        ):
+            asyncio.run(agent._run_sandboxed(request, body, meta, ["rtl/target.sv"]))
+
+        binds = provider.spec.provider_options["binds"]
+        assert len(binds) == 1
+        host_dir, container_dir = binds[0].split(":")
+        assert container_dir == agentic_app._CONTAINER_LOG_DIR
+        # temp log dir is removed once its contents have been folded into the response
+        assert not Path(host_dir).exists()
+
+    def test_no_produced_files_omits_rtl_files(self, tmp_path: Path) -> None:
+        result = SandboxExecResult(stdout=_assistant_line("nothing written"), stderr=None, return_code=0)
+        provider = FakeProvider(tmp_path, result)  # no writer -> no files produced
+        agent = _make_agent(provider=provider, sif_path="/cached/x.sif")
+
+        body = MagicMock()
+        body.responses_create_params = NeMoGymResponseCreateParamsNonStreaming(input="solve it")
+        body.model_dump.return_value = {"responses_create_params": {"input": "solve it"}}
+        request = SimpleNamespace(cookies={})
+        meta = {"target_files": ["rtl/target.sv"], "context_files": {}}
+
+        captured = {}
+
+        async def fake_post(*, server_name, url_path, json, cookies):
+            captured["json"] = json
+            return SimpleNamespace()
+
+        agent.server_client.post = AsyncMock(side_effect=fake_post)
+
+        with (
+            patch("responses_api_agents.cvdp_agent.agentic_app.raise_for_status", AsyncMock()),
+            patch(
+                "responses_api_agents.cvdp_agent.agentic_app.get_response_json",
+                AsyncMock(return_value={"reward": 0.0}),
+            ),
+            patch.object(ClaudeCodeAgentVerifyResponse, "model_validate", side_effect=lambda d: SimpleNamespace(**d)),
+        ):
+            resp = asyncio.run(agent._run_sandboxed(request, body, meta, ["rtl/target.sv"]))
+
+        assert "rtl_files" not in captured["json"]
+        assert resp.finished_naturally is False
+        assert provider.closed
+
+
+class TestRunDispatch:
+    def test_no_target_files_uses_host_path(self) -> None:
+        agent = _make_agent()
+        body = MagicMock()
+        body.model_extra = {"verifier_metadata": {}}
+        request = SimpleNamespace(cookies={})
+
+        host_run = AsyncMock(return_value="host-result")
+        with patch("responses_api_agents.claude_code_agent.app.ClaudeCodeAgent.run", host_run):
+            result = asyncio.run(agent.run(request, body))
+
+        assert result == "host-result"
+        host_run.assert_awaited_once()
+
+    def test_target_files_uses_sandbox_path(self) -> None:
+        agent = _make_agent()
+        body = MagicMock()
+        body.model_extra = {"verifier_metadata": {"target_files": ["rtl/a.sv"]}}
+        request = SimpleNamespace(cookies={})
+
+        agent._run_sandboxed = AsyncMock(return_value="sandbox-result")
+        result = asyncio.run(agent.run(request, body))
+
+        assert result == "sandbox-result"
+        agent._run_sandboxed.assert_awaited_once()
+
+
+class TestSummarizeFailure:
+    def test_collects_retries_and_result(self) -> None:
+        stdout = "\n".join(
+            [
+                json.dumps({"type": "system", "subtype": "api_retry", "error_status": 529}),
+                json.dumps({"type": "system", "subtype": "api_retry", "error_status": 529}),
+                json.dumps({"type": "result", "subtype": "error_max_turns", "is_error": True}),
+            ]
+        )
+        summary = _summarize_claude_failure(stdout)
+        assert "api_retry=529 x2" in summary
+        assert "error_max_turns" in summary
+
+    def test_empty(self) -> None:
+        assert _summarize_claude_failure("not-json\n") == ""
+
+
+class TestHelpers:
+    def test_is_harness_path(self) -> None:
+        assert _is_harness_path("src/test.py")
+        assert _is_harness_path("docker-compose.yml")
+        assert not _is_harness_path("rtl/a.sv")
+
+    def test_safe_workspace_path_rejects_escape(self, tmp_path: Path) -> None:
+        assert _safe_workspace_path(tmp_path, "../x") is None
+        assert _safe_workspace_path(tmp_path, "") is None
+        assert _safe_workspace_path(tmp_path, "rtl/a.sv") == (tmp_path / "rtl" / "a.sv").resolve()
+
+
+class TestConfigYaml:
+    def test_module_compiles(self) -> None:
+        app_path = Path(__file__).resolve().parent.parent / "agentic_app.py"
+        compile(app_path.read_text(), str(app_path), "exec")
+
+    def test_config_yaml_parses(self) -> None:
+        cfg_path = Path(__file__).resolve().parent.parent / "configs" / "cvdp_agent_agentic.yaml"
+        data = yaml.safe_load(cfg_path.read_text())
+        # Top-level key is the instance name; nested key is the server *directory*
+        # (responses_api_agents/cvdp_agent), which holds agentic_app.py.
+        inner = data["cvdp_agent_agentic"]["responses_api_agents"]["cvdp_agent"]
+        assert inner["entrypoint"] == "agentic_app.py"
+        assert inner["container_workdir"] == "/code"

From e7dd0663425b3155586f178ac793128d86fa71d0 Mon Sep 17 00:00:00 2001
From: Christian Munley <cmunley@nvidia.com>
Date: Thu, 25 Jun 2026 12:53:28 -0700
Subject: [PATCH 7/7] trim

Signed-off-by: Christian Munley <cmunley@nvidia.com>
---
 .../sandbox/providers/apptainer/README.md     | 24 +++---
 .../sandbox/providers/apptainer/provider.py   | 82 +++++++++++++++----
 tests/unit_tests/test_apptainer_provider.py   | 78 +++++++++++++++++-
 3 files changed, 151 insertions(+), 33 deletions(-)

diff --git a/nemo_gym/sandbox/providers/apptainer/README.md b/nemo_gym/sandbox/providers/apptainer/README.md
index 8344f000da..ca7b36f2a2 100644
--- a/nemo_gym/sandbox/providers/apptainer/README.md
+++ b/nemo_gym/sandbox/providers/apptainer/README.md
@@ -16,14 +16,10 @@ where Docker is unavailable).
 - The **`apptainer` binary** must be installed and on `PATH`. The provider does **not**
   auto-install it; constructing the provider raises `RuntimeError` if it is missing.
   See the [Apptainer install guide](https://apptainer.org/docs/admin/main/installation.html).
-- A container **image**, supplied per sandbox as either:
-  - a local `.sif` file path, or
-  - a remote URI that Apptainer can pull: `docker://`, `oras://`, or `library://`.
-- A couple of features — **running commands as a different user** and **enforcing
-  CPU/memory limits** — only work if your machine's administrator has enabled Apptainer's
-  **`--fakeroot`** support (Linux user namespaces, and cgroups v2 delegation for limits).
-  On many HPC clusters this is on by default; where it isn't, those features are quietly
-  skipped. Everything else works without it. See [Limitations](#limitations).
+- A container **image**: a local `.sif`, a URI Apptainer can pull, or a bare Docker
+  image name such as `ubuntu:22.04`.
+- Running commands as a different user needs Apptainer **`--fakeroot`** support.
+  CPU/memory limits need cgroups delegation. See [Limitations](#limitations).
 
 ## Quick start
 
@@ -93,6 +89,7 @@ apptainer:
     mount_point: /sandbox
     start_timeout_s: 600
     extra_start_args: []
+    apply_resource_limits: true
   probe:
     command: printf apptainer-sandbox-ready
     expected_stdout: apptainer-sandbox-ready
@@ -108,6 +105,7 @@ Settings for starting the instance (`apptainer instance start`).
 | `mount_point` | `/sandbox` | Absolute path inside the container where the host staging dir is bind-mounted. Powers the file-transfer fast path. |
 | `start_timeout_s` | `600` | Max seconds to wait for `instance start` (`None` = no timeout). |
 | `extra_start_args` | `[]` | Extra raw flags appended to `instance start`. |
+| `apply_resource_limits` | `true` | Add CPU/memory cgroup flags from `SandboxSpec.resources`. |
 
 ### `exec` — `ApptainerExecConfig`
 
@@ -142,8 +140,8 @@ The spec is provider-neutral; the Apptainer provider uses these fields:
 
 | Field | Used for |
 |---|---|
-| `image` | Image source — local `.sif` path or remote `docker://` / `oras://` / `library://` URI. Required. |
-| `env` | Each entry becomes `--env KEY=VALUE` at instance start. |
+| `image` | Bare Docker image name, local `.sif` path, or URI Apptainer can pull. Required. |
+| `env` | Passed as `--env KEY=VALUE` at instance start and every `exec`. |
 | `workdir` | Default working directory for `exec` (applied as `--pwd`). |
 | `files` | Seed files written into the sandbox at `start()` (handled by the sandbox API via `upload`). |
 | `resources` | Mapped to cgroup flags (see below). |
@@ -231,6 +229,7 @@ are unaffected.
 | `gpu` (truthy) | `--nv` (NVIDIA passthrough) |
 | `disk_gib`, `gpu_type` | No direct Apptainer flag — ignored. |
 
+
 ### Status mapping
 
 `apptainer instance list --json` only lists *live* instances, so:
@@ -259,9 +258,8 @@ failed" rather than "the command exited 125".
   may not resolve. Prefer named users.
 - **`--fakeroot` on exec.** Whether `--fakeroot` works on `exec` into an instance that
   was started *without* fakeroot varies by Apptainer version and host configuration.
-- **Resource enforcement.** cgroup limits may require cgroups v2 delegation and/or
-  `--fakeroot` on the host; limits are best-effort and silently ignored where the host
-  can't enforce them.
+- **Resource enforcement.** CPU/memory cgroup flags require cgroups v2 delegation.
+  Disable them with `create.apply_resource_limits: false`.
 - **Runtime-failure detection is heuristic.** It keys off stderr markers, so a user
   command whose own output contains `FATAL:` could be misclassified as a sandbox error.
 
diff --git a/nemo_gym/sandbox/providers/apptainer/provider.py b/nemo_gym/sandbox/providers/apptainer/provider.py
index a3143d28e4..0605b1a805 100644
--- a/nemo_gym/sandbox/providers/apptainer/provider.py
+++ b/nemo_gym/sandbox/providers/apptainer/provider.py
@@ -51,6 +51,7 @@
 # Best-effort stderr markers indicating apptainer itself (not the user's command)
 # failed to run the command. Apptainer prefixes its own fatal errors with "FATAL:".
 APPTAINER_RUNTIME_ERROR_MARKERS = ("fatal:", "no instance found", "instance not found", "does not exist")
+APPTAINER_MISSING_INSTANCE_MARKERS = ("no instance found", "instance not found", "does not exist")
 
 
 class ApptainerCreateError(SandboxCreateError):
@@ -90,6 +91,7 @@ class ApptainerCreateConfig:
     mount_point: str = DEFAULT_MOUNT_POINT
     start_timeout_s: float | None = 600
     extra_start_args: list[str] = field(default_factory=list)
+    apply_resource_limits: bool = True
 
     def __post_init__(self) -> None:
         if self.start_timeout_s is not None and self.start_timeout_s <= 0:
@@ -145,21 +147,36 @@ class _ApptainerInstance:
     staging_dir: Path  # the shared folder on the host
     mount_point: str  # where the folder shows up inside
     image: str  # what it was built from
+    env: dict[str, str] = field(default_factory=dict)
 
 
 def _resource_flags(resources: SandboxResources) -> list[str]:
     """Translate neutral resources into apptainer CLI flags."""
+    return _resource_limit_flags(resources) + _resource_passthrough_flags(resources)
+
+
+def _resource_limit_flags(resources: SandboxResources) -> list[str]:
     flags: list[str] = []
     if resources.cpu is not None:
         flags += ["--cpus", str(resources.cpu)]
     if resources.memory_mib is not None:
         flags += ["--memory", f"{resources.memory_mib}m"]
+    return flags
+
+
+def _resource_passthrough_flags(resources: SandboxResources) -> list[str]:
+    flags: list[str] = []
     if resources.gpu:
         flags.append("--nv")
-    # disk_gib / gpu_type have no direct apptainer flag; intentionally ignored.
     return flags
 
 
+def _resolve_image(image: str) -> str:
+    if "://" in image or image.startswith(("/", ".")) or image.endswith(".sif"):
+        return image
+    return f"docker://{image}"
+
+
 def _to_sandbox_status(state: str | None) -> SandboxStatus:
     """Map an apptainer-reported state string to the neutral status enum."""
     normalized = str(state or "").lower()
@@ -176,13 +193,20 @@ def _to_sandbox_status(state: str | None) -> SandboxStatus:
 
 def _path_under_mount(mount_point: str, path: str) -> str | None:
     """If `path` is inside the mount, return its path relative to the mount; else None."""
-    mp = mount_point.rstrip("/")
-    if path == mp:
+    if not path.startswith("/"):
+        return None
+    mp = posixpath.normpath(mount_point.rstrip("/") or "/")
+    normalized = posixpath.normpath(path)
+    if normalized == mp:
         return ""
-    prefix = mp + "/"
-    if path.startswith(prefix):
-        return path[len(prefix) :]
-    return None
+    try:
+        if posixpath.commonpath([mp, normalized]) != mp:
+            return None
+    except ValueError:
+        return None
+    if mp == "/":
+        return normalized.lstrip("/")
+    return normalized[len(mp) + 1 :]
 
 
 def _is_runtime_failure(stderr: str) -> bool:
@@ -191,6 +215,11 @@ def _is_runtime_failure(stderr: str) -> bool:
     return any(marker in low for marker in APPTAINER_RUNTIME_ERROR_MARKERS)
 
 
+def _is_missing_instance(stderr: str) -> bool:
+    low = stderr.lower()
+    return any(marker in low for marker in APPTAINER_MISSING_INSTANCE_MARKERS)
+
+
 def _coerce_binds(value: Any) -> list[str]:
     """Normalize ``spec.provider_options['binds']`` into a list of bind strings.
 
@@ -333,9 +362,9 @@ async def create(self, spec: SandboxSpec) -> SandboxHandle:
         if spec.ttl_s is not None:
             LOGGER.warning("ttl_s is not supported by the apptainer provider; it will be ignored.")
 
-        image = spec.image
-        if image is None:
+        if spec.image is None:
             raise ApptainerCreateError("spec.image is required for the apptainer provider")
+        image = _resolve_image(spec.image)
 
         # Extra per-sandbox bind mounts (validated before we allocate anything).
         extra_binds = _coerce_binds(spec.provider_options.get("binds"))
@@ -356,8 +385,17 @@ async def create(self, spec: SandboxSpec) -> SandboxHandle:
             argv += ["--bind", bind]
         for key, value in spec.env.items():
             argv += ["--env", f"{key}={value}"]
-        argv += _resource_flags(spec.resources)
-        argv += list(self._create_config.extra_start_args)
+        start_args = list(self._create_config.extra_start_args)
+        resource_limit_flags = _resource_limit_flags(spec.resources)
+        if resource_limit_flags and self._create_config.apply_resource_limits:
+            if "--fakeroot" in start_args:
+                LOGGER.warning(
+                    "Skipping apptainer CPU/memory resource flags because create.extra_start_args contains --fakeroot."
+                )
+            else:
+                argv += resource_limit_flags
+        argv += _resource_passthrough_flags(spec.resources)
+        argv += start_args
         argv += [image, name]
 
         # start the instance; clean up the staging dir on any failure.
@@ -376,7 +414,13 @@ async def create(self, spec: SandboxSpec) -> SandboxHandle:
         handle = SandboxHandle(
             sandbox_id=name,
             provider_name=self.name,
-            raw=_ApptainerInstance(name=name, staging_dir=staging_dir, mount_point=mount_point, image=image),
+            raw=_ApptainerInstance(
+                name=name,
+                staging_dir=staging_dir,
+                mount_point=mount_point,
+                image=image,
+                env=dict(spec.env),
+            ),
         )
 
         # Verify the sandbox can actually run a command before handing it back.
@@ -469,8 +513,11 @@ async def exec(
         flags: list[str] = []
         if cwd is not None:
             flags += ["--pwd", cwd]
+        merged_env = dict(getattr(inst, "env", {}))
         if env:
-            for key, value in env.items():
+            merged_env.update(env)
+        if merged_env:
+            for key, value in merged_env.items():
                 flags += ["--env", f"{key}={value}"]
 
         effective_command = command
@@ -500,12 +547,12 @@ async def exec(
 
         if code != 0 and _is_runtime_failure(err):
             return SandboxExecResult(
-                stdout=out or None,
-                stderr=err or None,
+                stdout=out,
+                stderr=err,
                 return_code=SANDBOX_RUNTIME_RETURN_CODE,
                 error_type="sandbox",
             )
-        return SandboxExecResult(stdout=out or None, stderr=err or None, return_code=code, error_type=None)
+        return SandboxExecResult(stdout=out, stderr=err, return_code=code, error_type=None)
 
     async def upload_file(self, handle: SandboxHandle, source_path: Path, target_path: str) -> None:
         """Upload one host file into the sandbox.
@@ -589,7 +636,6 @@ async def status(self, handle: SandboxHandle) -> SandboxStatus:
 
         for entry in instances:
             if entry.get("instance") == inst.name:
-                # apptainer's list output has no explicit state field for a listed (i.e. live) instance, so being present means it is running.
                 return _to_sandbox_status(entry.get("state") or "running")
 
         # Not listed -> it has been stopped (or never existed anymore).
@@ -609,7 +655,7 @@ async def close(self, handle: SandboxHandle) -> None:
                 [self._binary, "instance", "stop", inst.name],
                 timeout_s=self._exec_config.default_timeout_s,
             )
-            if code != 0 and not _is_runtime_failure(err):
+            if code != 0 and not _is_missing_instance(err):
                 stop_error = RuntimeError(
                     f"apptainer instance stop failed (code={code}) for {inst.name!r}: {err.strip()}"
                 )
diff --git a/tests/unit_tests/test_apptainer_provider.py b/tests/unit_tests/test_apptainer_provider.py
index 2e5cc0b1a3..fea5f8ae82 100644
--- a/tests/unit_tests/test_apptainer_provider.py
+++ b/tests/unit_tests/test_apptainer_provider.py
@@ -55,12 +55,19 @@ def _contains_seq(haystack: list[str], needle: list[str]) -> bool:
     return any(haystack[i : i + len(needle)] == needle for i in range(len(haystack) - len(needle) + 1))
 
 
-def _make_handle(staging: Path, *, name: str = "nemo-gym-x", mount: str = "/sandbox") -> SandboxHandle:
+def _make_handle(
+    staging: Path,
+    *,
+    name: str = "nemo-gym-x",
+    mount: str = "/sandbox",
+    env: dict[str, str] | None = None,
+) -> SandboxHandle:
     inst = apptainer_provider._ApptainerInstance(
         name=name,
         staging_dir=staging,
         mount_point=mount,
         image="docker://img",
+        env=env or {},
     )
     return SandboxHandle(sandbox_id=name, provider_name="apptainer", raw=inst)
 
@@ -138,6 +145,13 @@ def test_resource_flags() -> None:
     assert apptainer_provider._resource_flags(SandboxResources()) == []
 
 
+def test_resolve_image() -> None:
+    resolve = apptainer_provider._resolve_image
+    assert resolve("ubuntu:22.04") == "docker://ubuntu:22.04"
+    assert resolve("oras://registry.example/image:tag") == "oras://registry.example/image:tag"
+    assert resolve("/tmp/image.sif") == "/tmp/image.sif"
+
+
 def test_to_sandbox_status() -> None:
     to_status = apptainer_provider._to_sandbox_status
     assert to_status("running") is SandboxStatus.RUNNING
@@ -154,6 +168,7 @@ def test_path_under_mount() -> None:
     assert under("/sandbox", "/sandbox/a/b.txt") == "a/b.txt"
     assert under("/sandbox", "/sandbox") == ""
     assert under("/sandbox/", "/sandbox/x") == "x"
+    assert under("/sandbox", "/sandbox/../outside.txt") is None
     assert under("/sandbox", "/etc/passwd") is None
 
 
@@ -193,7 +208,7 @@ def responder(argv: list[str]) -> tuple[int, str, str]:
     )
 
     spec = SandboxSpec(
-        image="docker://ubuntu:22.04",
+        image="ubuntu:22.04",
         env={"FOO": "bar"},
         resources={"cpu": 2, "memory_mib": 1024, "gpu": 1},
         ttl_s=60,
@@ -207,6 +222,8 @@ def responder(argv: list[str]) -> tuple[int, str, str]:
     assert handle.sandbox_id.startswith(apptainer_provider.INSTANCE_NAME_PREFIX)
     assert handle.raw.staging_dir == staging
     assert handle.raw.mount_point == "/sandbox"
+    assert handle.raw.image == "docker://ubuntu:22.04"
+    assert handle.raw.env == {"FOO": "bar"}
 
     start_argv = rec.calls[0]["argv"]
     assert start_argv[:3] == [FAKE_BINARY, "instance", "start"]
@@ -226,6 +243,27 @@ def responder(argv: list[str]) -> tuple[int, str, str]:
     assert rec.calls[1]["timeout_s"] == 30
 
 
+@pytest.mark.parametrize("create_config", [{"apply_resource_limits": False}, {"extra_start_args": ["--fakeroot"]}])
+async def test_create_skips_cgroup_resource_limits(
+    fake_binary: str, monkeypatch: pytest.MonkeyPatch, tmp_path: Path, create_config: dict[str, Any]
+) -> None:
+    staging = tmp_path / "staging"
+    monkeypatch.setattr(apptainer_provider.tempfile, "mkdtemp", lambda prefix: str(staging.mkdir() or staging))
+
+    def responder(argv: list[str]) -> tuple[int, str, str]:
+        if "exec" in argv:
+            return (0, apptainer_provider.READY_PROBE_EXPECTED, "")
+        return (0, "", "")
+
+    provider, rec = _make_provider(monkeypatch, responder, create=create_config)
+    await provider.create(SandboxSpec(image="ubuntu:22.04", resources={"cpu": 2, "memory_mib": 1024, "gpu": 1}))
+
+    start_argv = rec.calls[0]["argv"]
+    assert "--cpus" not in start_argv
+    assert "--memory" not in start_argv
+    assert "--nv" in start_argv
+
+
 async def test_create_extra_binds_from_provider_options(
     fake_binary: str, monkeypatch: pytest.MonkeyPatch, tmp_path: Path
 ) -> None:
@@ -350,6 +388,31 @@ async def test_exec_normal_with_cwd_and_env(fake_binary: str, monkeypatch: pytes
     assert rec.calls[0]["timeout_s"] == 180  # default exec timeout
 
 
+async def test_exec_reapplies_create_env_and_overrides_call_env(
+    fake_binary: str, monkeypatch: pytest.MonkeyPatch, tmp_path: Path
+) -> None:
+    provider, rec = _make_provider(monkeypatch, lambda argv: (0, "", ""))
+    handle = _make_handle(tmp_path, env={"A": "from-create", "B": "base"})
+
+    await provider.exec(handle, "env", env={"A": "from-call"})
+
+    argv = rec.calls[0]["argv"]
+    assert _contains_seq(argv, ["--env", "A=from-call"])
+    assert _contains_seq(argv, ["--env", "B=base"])
+    assert "A=from-create" not in argv
+
+
+async def test_exec_empty_streams_are_strings(
+    fake_binary: str, monkeypatch: pytest.MonkeyPatch, tmp_path: Path
+) -> None:
+    provider, _rec = _make_provider(monkeypatch, lambda argv: (0, "", ""))
+    result = await provider.exec(_make_handle(tmp_path), "true")
+
+    assert result.return_code == 0
+    assert result.stdout == ""
+    assert result.stderr == ""
+
+
 @pytest.mark.parametrize(
     "user,fakeroot_for_root,expect_fakeroot,expect_su",
     [
@@ -604,6 +667,17 @@ async def test_close_real_failure_raises_but_cleans_up(
     assert not staging.exists()
 
 
+async def test_close_fatal_permission_denied_raises(
+    fake_binary: str, monkeypatch: pytest.MonkeyPatch, tmp_path: Path
+) -> None:
+    staging = tmp_path / "staging"
+    staging.mkdir()
+    provider, _rec = _make_provider(monkeypatch, lambda argv: (1, "", "FATAL: permission denied"))
+    with pytest.raises(RuntimeError, match="stop failed"):
+        await provider.close(_make_handle(staging))
+    assert not staging.exists()
+
+
 async def test_close_timeout_raises(fake_binary: str, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
     staging = tmp_path / "staging"
     staging.mkdir()