From 2fc65e900754f054cfcb474b8bf159e1f33889f5 Mon Sep 17 00:00:00 2001 From: PRAteek-singHWY Date: Tue, 9 Jun 2026 17:17:18 +0530 Subject: [PATCH 1/2] =?UTF-8?q?week=5F1:=20Module=20C=20(The=20Librarian)?= =?UTF-8?q?=20=E2=80=94=20RFC=20contracts=20+=20eval=20harness=20+=20golde?= =?UTF-8?q?n=20=20=20dataset?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Contracts + regression ruler before any pipeline code, per the OIE RFC's 'test before the code' directive. RFC #734 envelopes (KnowledgeItem in, LinkProposal/ReviewItem out) as Pydantic v2, drift-guarded against the vendored owasp-graph schemas; TRACT hub-firewall + multi-link scoring; 319-row golden dataset derived from standards_cache.sqlite with --check drift detection. One prod edit: pydantic>=2,<3 pin. --- application/tests/librarian/__init__.py | 0 .../tests/librarian/config_loader_test.py | 57 + application/tests/librarian/dataset_test.py | 123 + .../librarian/fixtures/golden_dataset.json | 6373 +++++++++++++++++ .../fixtures/golden_dataset.schema.json | 85 + .../fixtures/sample_knowledge_queue.jsonl | 3 + .../tests/librarian/hub_firewall_test.py | 41 + application/tests/librarian/schemas_test.py | 384 + application/tests/librarian/scoring_test.py | 32 + application/utils/librarian/__init__.py | 26 + .../_rfc_schemas/knowledge-item.json | 95 + .../librarian/_rfc_schemas/link-proposal.json | 91 + .../utils/librarian/_rfc_schemas/locator.json | 30 + .../librarian/_rfc_schemas/proposed-link.json | 14 + .../librarian/_rfc_schemas/review-item.json | 45 + .../librarian/_rfc_schemas/source-ref.json | 26 + application/utils/librarian/config_loader.py | 33 + application/utils/librarian/hub_firewall.py | 40 + .../utils/librarian/knowledge_source.py | 34 + application/utils/librarian/schemas.py | 364 + application/utils/librarian/scoring.py | 35 + requirements.txt | 2 +- scripts/build_golden_dataset.py | 463 ++ scripts/evaluate_librarian.py | 95 + 24 files changed, 8490 insertions(+), 1 deletion(-) create mode 100644 application/tests/librarian/__init__.py create mode 100644 application/tests/librarian/config_loader_test.py create mode 100644 application/tests/librarian/dataset_test.py create mode 100644 application/tests/librarian/fixtures/golden_dataset.json create mode 100644 application/tests/librarian/fixtures/golden_dataset.schema.json create mode 100644 application/tests/librarian/fixtures/sample_knowledge_queue.jsonl create mode 100644 application/tests/librarian/hub_firewall_test.py create mode 100644 application/tests/librarian/schemas_test.py create mode 100644 application/tests/librarian/scoring_test.py create mode 100644 application/utils/librarian/__init__.py create mode 100644 application/utils/librarian/_rfc_schemas/knowledge-item.json create mode 100644 application/utils/librarian/_rfc_schemas/link-proposal.json create mode 100644 application/utils/librarian/_rfc_schemas/locator.json create mode 100644 application/utils/librarian/_rfc_schemas/proposed-link.json create mode 100644 application/utils/librarian/_rfc_schemas/review-item.json create mode 100644 application/utils/librarian/_rfc_schemas/source-ref.json create mode 100644 application/utils/librarian/config_loader.py create mode 100644 application/utils/librarian/hub_firewall.py create mode 100644 application/utils/librarian/knowledge_source.py create mode 100644 application/utils/librarian/schemas.py create mode 100644 application/utils/librarian/scoring.py create mode 100644 scripts/build_golden_dataset.py create mode 100644 scripts/evaluate_librarian.py diff --git a/application/tests/librarian/__init__.py b/application/tests/librarian/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/application/tests/librarian/config_loader_test.py b/application/tests/librarian/config_loader_test.py new file mode 100644 index 000000000..1d22d751b --- /dev/null +++ b/application/tests/librarian/config_loader_test.py @@ -0,0 +1,57 @@ +import os +import unittest +from unittest import mock + +from application.utils.librarian.config_loader import LibrarianConfig, load_config + + +class TestConfigLoaderDefaults(unittest.TestCase): + def test_defaults_when_env_unset(self): + with mock.patch.dict(os.environ, {}, clear=True): + cfg = load_config() + self.assertEqual(cfg.crossencoder_model, "cross-encoder/ms-marco-MiniLM-L-6-v2") + self.assertEqual(cfg.top_k_retrieval, 20) + self.assertEqual(cfg.top_k_rerank, 5) + self.assertEqual(cfg.link_threshold, 0.8) + self.assertEqual(cfg.batch_size, 32) + self.assertEqual(cfg.ece_target, 0.10) + self.assertEqual(cfg.conformal_alpha, 0.10) + + def test_config_is_frozen(self): + cfg = load_config() + with self.assertRaises(Exception): + cfg.link_threshold = 0.5 # type: ignore[misc] + + +class TestConfigLoaderOverrides(unittest.TestCase): + OVERRIDES = { + "CRE_LIBRARIAN_CROSSENCODER_MODEL": "cross-encoder/other", + "CRE_LIBRARIAN_TOP_K_RETRIEVAL": "50", + "CRE_LIBRARIAN_TOP_K_RERANK": "10", + "CRE_LIBRARIAN_LINK_THRESHOLD": "0.7", + "CRE_LIBRARIAN_BATCH_SIZE": "64", + "CRE_LIBRARIAN_ECE_TARGET": "0.05", + "CRE_LIBRARIAN_CONFORMAL_ALPHA": "0.20", + } + + def test_env_overrides_apply(self): + with mock.patch.dict(os.environ, self.OVERRIDES, clear=True): + cfg = load_config() + self.assertEqual(cfg.crossencoder_model, "cross-encoder/other") + self.assertEqual(cfg.top_k_retrieval, 50) + self.assertEqual(cfg.top_k_rerank, 10) + self.assertAlmostEqual(cfg.link_threshold, 0.7) + self.assertEqual(cfg.batch_size, 64) + self.assertAlmostEqual(cfg.ece_target, 0.05) + self.assertAlmostEqual(cfg.conformal_alpha, 0.20) + + def test_bad_int_env_raises(self): + with mock.patch.dict( + os.environ, {"CRE_LIBRARIAN_TOP_K_RETRIEVAL": "not-an-int"}, clear=True + ): + with self.assertRaises(ValueError): + load_config() + + +if __name__ == "__main__": + unittest.main() diff --git a/application/tests/librarian/dataset_test.py b/application/tests/librarian/dataset_test.py new file mode 100644 index 000000000..bf37c858d --- /dev/null +++ b/application/tests/librarian/dataset_test.py @@ -0,0 +1,123 @@ +"""Sanity tests for the populated golden dataset. + +These tests run in CI against the committed JSON. They do NOT require the DB — +the DB-driven derivation is covered by ``scripts/build_golden_dataset.py``'s +own ``--check`` mode, which the determinism test invokes when the DB is present. +""" + +import json +import os +import subprocess +import sys +import unittest +from collections import Counter + +import jsonschema +from pydantic import ValidationError + +from application.utils.librarian.schemas import GoldenDatasetRow + +_HERE = os.path.dirname(__file__) +_REPO_ROOT = os.path.abspath(os.path.join(_HERE, "..", "..", "..")) +_DATASET = os.path.join(_HERE, "fixtures", "golden_dataset.json") +_JSON_SCHEMA = os.path.join(_HERE, "fixtures", "golden_dataset.schema.json") +_BUILD_SCRIPT = os.path.join(_REPO_ROOT, "scripts", "build_golden_dataset.py") +_DB = os.path.join(_REPO_ROOT, "standards_cache.sqlite") + + +def _load(path): + with open(path, encoding="utf-8") as fh: + return json.load(fh) + + +class TestGoldenDatasetShape(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.rows = _load(_DATASET) + cls.json_schema = _load(_JSON_SCHEMA) + + def test_has_at_least_the_asvs_core(self): + # Master guide §9.3 + golden-dataset-plan §3: at least 277 ASVS-core rows. + positive_asvs = [ + r + for r in self.rows + if r["slice"] == "positive" + and r["provenance"]["ground_truth_source"].startswith( + "OpenCRE DB mapping (cre_node_links)" + ) + ] + self.assertGreaterEqual(len(positive_asvs), 277) + + def test_all_five_slices_present(self): + slices = Counter(r["slice"] for r in self.rows) + self.assertEqual( + set(slices), + {"explicit", "positive", "hard_negative", "update", "ambiguous"}, + ) + # Every slice has at least a few rows so the harness can stratify. + for s, n in slices.items(): + self.assertGreaterEqual(n, 5, msg=f"slice {s} only has {n} rows") + + def test_multilink_positive_rows_exist(self): + # The Q-D scoring rule has nothing to exercise unless multi-link + # ground truth is actually present somewhere. + multi = [ + r + for r in self.rows + if r["slice"] == "positive" + and len(r.get("expected", {}).get("cre_ids") or []) > 1 + ] + self.assertGreater(len(multi), 0, "no multi-link positive rows present") + + def test_every_row_validates_against_pydantic_model(self): + errors = [] + for r in self.rows: + try: + GoldenDatasetRow.model_validate(r) + except ValidationError as e: + errors.append((r.get("id"), str(e))) + self.assertEqual(errors, [], msg=f"first failure: {errors[:1]}") + + def test_every_row_validates_against_json_schema(self): + validator = jsonschema.Draft202012Validator(self.json_schema) + first_errors = [] + for r in self.rows: + errs = sorted(validator.iter_errors(r), key=str) + if errs: + first_errors.append((r.get("id"), [str(e) for e in errs[:1]])) + if len(first_errors) >= 3: + break + self.assertEqual(first_errors, []) + + def test_provenance_is_recorded_per_row(self): + for r in self.rows: + self.assertTrue( + r["provenance"].get("ground_truth_source"), + msg=f"row {r.get('id')} missing ground_truth_source", + ) + + def test_ids_are_unique(self): + ids = [r["id"] for r in self.rows] + self.assertEqual(len(ids), len(set(ids))) + + +class TestDatasetDeterminism(unittest.TestCase): + """The committed JSON must re-derive identically from the DB.""" + + def test_build_check_matches_committed_dataset(self): + if not os.path.exists(_DB): + self.skipTest("standards_cache.sqlite not present") + result = subprocess.run( + [sys.executable, _BUILD_SCRIPT, "--check"], + capture_output=True, + text=True, + ) + self.assertEqual( + result.returncode, + 0, + msg=f"stdout={result.stdout}\nstderr={result.stderr}", + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/application/tests/librarian/fixtures/golden_dataset.json b/application/tests/librarian/fixtures/golden_dataset.json new file mode 100644 index 000000000..816539b4a --- /dev/null +++ b/application/tests/librarian/fixtures/golden_dataset.json @@ -0,0 +1,6373 @@ +[ + { + "id": "gold:explicit:V2.1.1", + "schema_version": "0.1.0", + "slice": "explicit", + "input": { + "text": "Per CRE 027-555, verify that user-set passwords are at least 12 characters in length after removing leading and trailing whitespace.", + "title_hint": "Password length policy", + "explicit_cre_ref": "027-555", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "027-555" + ] + }, + "provenance": { + "section_path": "V2.1.1", + "ground_truth_source": "synthesised text citing the real cre.external_id from the OpenCRE DB" + } + }, + { + "id": "gold:explicit:V3.4.1", + "schema_version": "0.1.0", + "slice": "explicit", + "input": { + "text": "Refer to CRE 688-081. Verify that cookie-based session tokens have the Secure attribute set.", + "title_hint": "Session cookie security", + "explicit_cre_ref": "688-081", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "688-081" + ] + }, + "provenance": { + "section_path": "V3.4.1", + "ground_truth_source": "synthesised text citing the real cre.external_id from the OpenCRE DB" + } + }, + { + "id": "gold:explicit:V2.4.1", + "schema_version": "0.1.0", + "slice": "explicit", + "input": { + "text": "This control corresponds to CRE 622-203: passwords shall be stored using an approved key derivation function.", + "title_hint": "Credential storage", + "explicit_cre_ref": "622-203", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "622-203" + ] + }, + "provenance": { + "section_path": "V2.4.1", + "ground_truth_source": "synthesised text citing the real cre.external_id from the OpenCRE DB" + } + }, + { + "id": "gold:explicit:V4.1.1", + "schema_version": "0.1.0", + "slice": "explicit", + "input": { + "text": "See CRE 650-560 for the canonical guidance on enforcing access control rules at a trusted service layer.", + "title_hint": "Access control enforcement", + "explicit_cre_ref": "650-560", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "650-560" + ] + }, + "provenance": { + "section_path": "V4.1.1", + "ground_truth_source": "synthesised text citing the real cre.external_id from the OpenCRE DB" + } + }, + { + "id": "gold:explicit:V8.3.1", + "schema_version": "0.1.0", + "slice": "explicit", + "input": { + "text": "Per CRE 186-540, sensitive data shall be sent to the server in the HTTP message body or headers, never in the URL query string.", + "title_hint": "Sensitive data in transit", + "explicit_cre_ref": "186-540", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "186-540" + ] + }, + "provenance": { + "section_path": "V8.3.1", + "ground_truth_source": "synthesised text citing the real cre.external_id from the OpenCRE DB" + } + }, + { + "id": "gold:asvs:V1.1.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify the use of a secure software development lifecycle that addresses security in all stages of development.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "616-305" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V1.1.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V1.1.3:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that all user stories and features contain functional security constraints, such as \"As a user, I should be able to view and edit my profile. I should not be able to view or edit anyone else's profile\"", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "822-100" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V1.1.3", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V1.1.4:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify documentation and justification of all the application's trust boundaries, components, and significant data flows.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "820-878" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V1.1.4", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V1.1.5:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify definition and security analysis of the application's high-level architecture and all connected remote services.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "068-102" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V1.1.5", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V1.1.6:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify implementation of centralized, simple (economy of design), vetted, secure, and reusable security controls to avoid duplicate, missing, ineffective, or insecure controls.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "344-611" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V1.1.6", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V1.1.7:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify availability of a secure coding checklist, security requirements, guideline, or policy to all developers and testers.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "036-275" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V1.1.7", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V1.10.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that a source code control system is in use, with procedures to ensure that check-ins are accompanied by issues or change tickets. The source code control system should have access control and identifiable users to allow traceability of any changes.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "757-271" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V1.10.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V1.11.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify the definition and documentation of all application components in terms of the business or security functions they provide.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "162-655" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V1.11.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V1.11.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that all high-value business logic flows, including authentication, session management and access control, do not share unsynchronized state.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "670-660" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V1.11.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V1.11.3:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that all high-value business logic flows, including authentication, session management and access control are thread safe and resistant to time-of-check and time-of-use race conditions.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "380-540" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V1.11.3", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V1.12.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that user-uploaded files - if required to be displayed or downloaded from the application - are served by either octet stream downloads, or from an unrelated domain, such as a cloud file storage bucket. Implement a suitable Content Security Policy (CSP) to reduce the risk from XSS vectors or other attacks from the uploaded file.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "384-344" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V1.12.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V1.14.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify the segregation of components of differing trust levels through well-defined security controls, firewall rules, API gateways, reverse proxies, cloud-based security groups, or similar mechanisms.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "273-600" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V1.14.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V1.14.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that binary signatures, trusted connections, and verified endpoints are used to deploy binaries to remote devices.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "171-222" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V1.14.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V1.14.3:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the build pipeline warns of out-of-date or insecure components and takes appropriate actions.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "053-751" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V1.14.3", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V1.14.4:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the build pipeline contains a build step to automatically build and verify the secure deployment of the application, particularly if the application infrastructure is software defined, such as cloud environment build scripts.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "263-184" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V1.14.4", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V1.14.5:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that application deployments adequately sandbox, containerize and/or isolate at the network level to delay and deter attackers from attacking other applications, especially when they are performing sensitive or dangerous actions such as deserialization.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "515-021" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V1.14.5", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V1.14.6:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify the application does not use unsupported, insecure, or deprecated client-side technologies such as NSAPI plugins, Flash, Shockwave, ActiveX, Silverlight, NACL, or client-side Java applets.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "673-475" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V1.14.6", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V1.2.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify the use of unique or special low-privilege operating system accounts for all application components, services, and servers.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "330-281" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V1.2.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V1.2.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that communications between application components, including APIs, middleware and data layers, are authenticated. Components should have the least necessary privileges needed.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "278-413" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V1.2.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V1.2.3:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the application uses a single vetted authentication mechanism that is known to be secure, can be extended to include strong authentication, and has sufficient logging and monitoring to detect account abuse or breaches.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "113-133" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V1.2.3", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V1.2.4:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that all authentication pathways and identity management APIs implement consistent authentication security control strength, such that there are no weaker alternatives per the risk of the application.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "576-042" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V1.2.4", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V1.4.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that trusted enforcement points, such as access control gateways, servers, and serverless functions, enforce access controls. Never enforce access controls on the client.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "640-364" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V1.4.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V1.4.4:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify the application uses a single and well-vetted access control mechanism for accessing protected data and resources. All requests must pass through this single mechanism to avoid copy and paste or insecure alternative paths.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "117-371" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V1.4.4", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V1.4.5:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that attribute or feature-based access control is used whereby the code checks the user's authorization for a feature/data item rather than just their role. Permissions should still be allocated using roles.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "801-310" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V1.4.5", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V1.5.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that input and output requirements clearly define how to handle and process data based on type, content, and applicable laws, regulations, and other policy compliance.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "782-234" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V1.5.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V1.5.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that serialization is not used when communicating with untrusted clients. If this is not possible, ensure that adequate integrity controls (and possibly encryption if sensitive data is sent) are enforced to prevent deserialization attacks including object injection.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "736-554" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V1.5.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V1.5.3:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that input validation is enforced on a trusted service layer. ([C5](https://owasp.org/www-project-proactive-controls/#div-numbering))", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "848-711" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V1.5.3", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V1.5.4:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that output encoding occurs close to or by the interpreter for which it is intended.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "806-367" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V1.5.4", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V1.6.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that there is an explicit policy for management of cryptographic keys and that a cryptographic key lifecycle follows a key management standard such as NIST SP 800-57.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "287-305" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V1.6.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V1.6.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that consumers of cryptographic services protect key material and other secrets by using key vaults or API based alternatives.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "508-702" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V1.6.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V1.6.3:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that all keys and passwords are replaceable and are part of a well-defined process to re-encrypt sensitive data.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "821-832" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V1.6.3", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V1.6.4:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the architecture treats client-side secrets--such as symmetric keys, passwords, or API tokens--as insecure and never uses them to protect or access sensitive data.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "232-325" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V1.6.4", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V1.7.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that a common logging format and approach is used across the system.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "260-200" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V1.7.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V1.7.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that logs are securely transmitted to a preferably remote system for analysis, detection, alerting, and escalation.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "026-280" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V1.7.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V1.8.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that all sensitive data is identified and classified into protection levels.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "765-788" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V1.8.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V1.8.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that all protection levels have an associated set of protection requirements, such as encryption requirements, integrity requirements, retention, privacy and other confidentiality requirements, and that these are applied in the architecture.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "731-120" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V1.8.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V1.9.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify the application encrypts communications between components, particularly when these components are in different containers, systems, sites, or cloud providers.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "527-034" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V1.9.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V1.9.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that application components verify the authenticity of each side in a communication link to prevent person-in-the-middle attacks. For example, application components should validate TLS certificates and chains.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "530-671" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V1.9.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V10.1.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that a code analysis tool is in use that can detect potentially malicious code, such as time functions, unsafe file operations and network connections.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "611-158" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V10.1.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V10.2.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the application source code and third party libraries do not contain unauthorized phone home or data collection capabilities. Where such functionality exists, obtain the user's permission for it to operate before collecting any data.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "834-645" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V10.2.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V10.2.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the application does not ask for unnecessary or excessive permissions to privacy related features or sensors, such as contacts, cameras, microphones, or location.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "540-566" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V10.2.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V10.2.3:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the application source code and third party libraries do not contain back doors, such as hard-coded or additional undocumented accounts or keys, code obfuscation, undocumented binary blobs, rootkits, or anti-debugging, insecure debugging features, or otherwise out of date, insecure, or hidden functionality that could be used maliciously if discovered.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "838-636" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V10.2.3", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V10.2.4:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the application source code and third party libraries do not contain time bombs by searching for date and time related functions.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "418-525" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V10.2.4", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V10.2.5:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the application source code and third party libraries do not contain malicious code, such as salami attacks, logic bypasses, or logic bombs.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "265-800" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V10.2.5", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V10.2.6:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the application source code and third party libraries do not contain Easter eggs or any other potentially unwanted functionality.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "154-031" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V10.2.6", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V10.3.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that if the application has a client or server auto-update feature, updates should be obtained over secure channels and digitally signed. The update code must validate the digital signature of the update before installing or executing the update.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "028-254" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V10.3.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V10.3.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the application employs integrity protections, such as code signing or subresource integrity. The application must not load or execute code from untrusted sources, such as loading includes, modules, plugins, code, or libraries from untrusted sources or the Internet.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "307-507" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V10.3.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V10.3.3:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the application has protection from subdomain takeovers if the application relies upon DNS entries or DNS subdomains, such as expired domain names, out of date DNS pointers or CNAMEs, expired projects at public source code repos, or transient cloud APIs, serverless functions, or storage buckets (*autogen-bucket-id*.cloud.example.com) or similar. Protections can include ensuring that DNS names used by applications are regularly checked for expiry or change.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "336-512" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V10.3.3", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V11.1.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the application will only process business logic flows for the same user in sequential step order and without skipping steps.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "534-605" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V11.1.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V11.1.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the application will only process business logic flows with all steps being processed in realistic human time, i.e. transactions are not submitted too quickly.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "456-535" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V11.1.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V11.1.3:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify the application has appropriate limits for specific business actions or transactions which are correctly enforced on a per user basis.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "746-705" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V11.1.3", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V11.1.4:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the application has anti-automation controls to protect against excessive calls such as mass data exfiltration, business logic requests, file uploads or denial of service attacks.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "630-573" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V11.1.4", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V11.1.5:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify the application has business logic limits or validation to protect against likely business risks or threats, identified using threat modeling or similar methodologies.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "660-867" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V11.1.5", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V11.1.6:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the application does not suffer from \"Time Of Check to Time Of Use\" (TOCTOU) issues or other race conditions for sensitive operations.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "134-412" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V11.1.6", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V11.1.7:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the application monitors for unusual events or activity from a business logic perspective. For example, attempts to perform actions out of order or actions which a normal user would never attempt.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "418-853" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V11.1.7", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V11.1.8:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the application has configurable alerting when automated attacks or unusual activity is detected.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "725-682" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V11.1.8", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V12.1.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the application will not accept large files that could fill up storage or cause a denial of service.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "660-052" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V12.1.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V12.1.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the application checks compressed files (e.g. zip, gz, docx, odt) against maximum allowed uncompressed size and against maximum number of files before uncompressing the file.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "163-518" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V12.1.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V12.1.3:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that a file size quota and maximum number of files per user is enforced to ensure that a single user cannot fill up the storage with too many files, or excessively large files.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "463-820" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V12.1.3", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V12.2.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that files obtained from untrusted sources are validated to be of expected type based on the file's content.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "175-235" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V12.2.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V12.3.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that user-submitted filename metadata is not used directly by system or framework filesystems and that a URL API is used to protect against path traversal.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "675-168" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V12.3.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V12.3.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that user-submitted filename metadata is validated or ignored to prevent the disclosure, creation, updating or removal of local files (LFI).", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "737-086" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V12.3.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V12.3.3:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that user-submitted filename metadata is validated or ignored to prevent the disclosure or execution of remote files via Remote File Inclusion (RFI) or Server-side Request Forgery (SSRF) attacks.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "742-056" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V12.3.3", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V12.3.4:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the application protects against Reflective File Download (RFD) by validating or ignoring user-submitted filenames in a JSON, JSONP, or URL parameter, the response Content-Type header should be set to text/plain, and the Content-Disposition header should have a fixed filename.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "421-513" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V12.3.4", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V12.3.5:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that untrusted file metadata is not used directly with system API or libraries, to protect against OS command injection.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "683-722" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V12.3.5", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V12.3.6:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the application does not include and execute functionality from untrusted sources, such as unverified content distribution networks, JavaScript libraries, node npm libraries, or server-side DLLs.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "777-470" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V12.3.6", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V12.4.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that files obtained from untrusted sources are stored outside the web root, with limited permissions.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "307-111" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V12.4.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V12.4.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that files obtained from untrusted sources are scanned by antivirus scanners to prevent upload and serving of known malicious content.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "112-273" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V12.4.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V12.5.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the web tier is configured to serve only files with specific file extensions to prevent unintentional information and source code leakage. For example, backup files (e.g. .bak), temporary working files (e.g. .swp), compressed files (.zip, .tar.gz, etc) and other extensions commonly used by editors should be blocked unless required.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "314-701" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V12.5.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V12.5.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that direct requests to uploaded files will never be executed as HTML/JavaScript content.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "545-243" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V12.5.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V12.6.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the web or application server is configured with an allow list of resources or systems to which the server can send requests or load data/files from.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "814-322" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V12.6.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V13.1.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that all application components use the same encodings and parsers to avoid parsing attacks that exploit different URI or file parsing behavior that could be used in SSRF and RFI attacks.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "061-186" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V13.1.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V13.1.3:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify API URLs do not expose sensitive information, such as the API key, session tokens etc.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "333-888" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V13.1.3", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V13.1.4:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that authorization decisions are made at both the URI, enforced by programmatic or declarative security at the controller or router, and at the resource level, enforced by model-based permissions.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "664-080" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V13.1.4", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V13.1.5:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that requests containing unexpected or missing content types are rejected with appropriate headers (HTTP response status 406 Unacceptable or 415 Unsupported Media Type).", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "377-680" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V13.1.5", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V13.2.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that enabled RESTful HTTP methods are a valid choice for the user or action, such as preventing normal users using DELETE or PUT on protected API or resources.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "532-878" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V13.2.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V13.2.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that JSON schema validation is in place and verified before accepting input.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "146-706" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V13.2.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V13.2.3:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that RESTful web services that utilize cookies are protected from Cross-Site Request Forgery via the use of at least one or more of the following: double submit cookie pattern, CSRF nonces, or Origin request header checks.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "464-084" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V13.2.3", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V13.2.5:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that REST services explicitly check the incoming Content-Type to be the expected one, such as application/xml or application/json.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "543-512" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V13.2.5", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V13.2.6:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the message headers and payload are trustworthy and not modified in transit. Requiring strong encryption for transport (TLS only) may be sufficient in many cases as it provides both confidentiality and integrity protection. Per-message digital signatures can provide additional assurance on top of the transport protections for high-security applications but bring with them additional complexity and risks to weigh against the benefits.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "426-842" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V13.2.6", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V13.3.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that XSD schema validation takes place to ensure a properly formed XML document, followed by validation of each input field before any processing of that data takes place.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "611-051" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V13.3.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V13.3.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the message payload is signed using WS-Security to ensure reliable transport between client and service.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "456-636" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V13.3.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V13.4.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that a query allow list or a combination of depth limiting and amount limiting is used to prevent GraphQL or data layer expression Denial of Service (DoS) as a result of expensive, nested queries. For more advanced scenarios, query cost analysis should be used.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "268-088" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V13.4.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V13.4.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that GraphQL or other data layer authorization logic should be implemented at the business logic layer instead of the GraphQL layer.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "612-252" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V13.4.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V14.1.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the application build and deployment processes are performed in a secure and repeatable way, such as CI / CD automation, automated configuration management, and automated deployment scripts.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "253-452" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V14.1.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V14.1.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that compiler flags are configured to enable all available buffer overflow protections and warnings, including stack randomization, data execution prevention, and to break the build if an unsafe pointer, memory, format string, integer, or string operations are found.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "314-131" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V14.1.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V14.1.3:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that server configuration is hardened as per the recommendations of the application server and frameworks in use.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "180-488" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V14.1.3", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V14.1.4:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the application, configuration, and all dependencies can be re-deployed using automated deployment scripts, built from a documented and tested runbook in a reasonable time, or restored from backups in a timely fashion.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "208-355" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V14.1.4", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V14.1.5:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that authorized administrators can verify the integrity of all security-relevant configurations to detect tampering.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "347-352" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V14.1.5", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V14.2.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that all components are up to date, preferably using a dependency checker during build or compile time.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "715-334" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V14.2.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V14.2.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that all unneeded features, documentation, sample applications and configurations are removed.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "462-245" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V14.2.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V14.2.3:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that if application assets, such as JavaScript libraries, CSS or web fonts, are hosted externally on a Content Delivery Network (CDN) or external provider, Subresource Integrity (SRI) is used to validate the integrity of the asset.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "577-260" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V14.2.3", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V14.2.4:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that third party components come from pre-defined, trusted and continually maintained repositories.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "715-223" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V14.2.4", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V14.2.5:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that a Software Bill of Materials (SBOM) is maintained of all third party libraries in use.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "863-521" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V14.2.5", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V14.2.6:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the attack surface is reduced by sandboxing or encapsulating third party libraries to expose only the required behaviour into the application.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "860-084" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V14.2.6", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V14.3.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that web or application server and application framework debug modes are disabled in production to eliminate debug features, developer consoles, and unintended security disclosures.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "208-805" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V14.3.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V14.3.3:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the HTTP headers or any part of the HTTP response do not expose detailed version information of system components.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "743-110" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V14.3.3", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V14.4.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that every HTTP response contains a Content-Type header. Also specify a safe character set (e.g., UTF-8, ISO-8859-1) if the content types are text/*, /+xml and application/xml. Content must match with the provided Content-Type header.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "036-725" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V14.4.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V14.4.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that all API responses contain a Content-Disposition: attachment; filename=\"api.json\" header (or other appropriate filename for the content type).", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "736-237" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V14.4.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V14.4.3:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that a Content Security Policy (CSP) response header is in place that helps mitigate impact for XSS attacks like HTML, DOM, JSON, and JavaScript injection vulnerabilities.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "257-668" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V14.4.3", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V14.4.4:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that all responses contain a X-Content-Type-Options: nosniff header.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "065-388" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V14.4.4", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V14.4.5:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that a Strict-Transport-Security header is included on all responses and for all subdomains, such as Strict-Transport-Security: max-age=15724800; includeSubdomains.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "036-147" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V14.4.5", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V14.4.6:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that a suitable Referrer-Policy header is included to avoid exposing sensitive information in the URL through the Referer header to untrusted parties.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "268-100" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V14.4.6", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V14.4.7:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the content of a web application cannot be embedded in a third-party site by default and that embedding of the exact resources is only allowed where necessary by using suitable Content-Security-Policy: frame-ancestors and X-Frame-Options response headers.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "480-071" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V14.4.7", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V14.5.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the application server only accepts the HTTP methods in use by the application/API, including pre-flight OPTIONS, and logs/alerts on any requests that are not valid for the application context.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "483-715" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V14.5.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V14.5.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the supplied Origin header is not used for authentication or access control decisions, as the Origin header can easily be changed by an attacker.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "405-411" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V14.5.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V14.5.3:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the Cross-Origin Resource Sharing (CORS) Access-Control-Allow-Origin header uses a strict allow list of trusted domains and subdomains to match against and does not support the \"null\" origin.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "316-272" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V14.5.3", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V14.5.4:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that HTTP headers added by a trusted proxy or SSO devices, such as a bearer token, are authenticated by the application.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "820-421" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V14.5.4", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.1.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that user set passwords are at least 12 characters in length (after multiple spaces are combined).", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "027-555" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.1.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.1.10:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that there are no periodic credential rotation or password history requirements.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "338-370" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.1.10", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.1.11:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that \"paste\" functionality, browser password helpers, and external password managers are permitted.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "630-577" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.1.11", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.1.12:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the user can choose to either temporarily view the entire masked password, or temporarily view the last typed character of the password on platforms that do not have this as built-in functionality.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "487-305" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.1.12", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.1.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that passwords of at least 64 characters are permitted, and that passwords of more than 128 characters are denied.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "158-874" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.1.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.1.3:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that password truncation is not performed. However, consecutive multiple spaces may be replaced by a single space.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "715-681" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.1.3", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.1.4:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that any printable Unicode character, including language neutral characters such as spaces and Emojis are permitted in passwords.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "103-707" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.1.4", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.1.5:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify users can change their password.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "751-176" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.1.5", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.1.6:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that password change functionality requires the user's current and new password.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "327-505" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.1.6", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.1.7:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that passwords submitted during account registration, login, and password change are checked against a set of breached passwords either locally (such as the top 1,000 or 10,000 most common passwords which match the system's password policy) or using an external API. If using an API a zero knowledge proof or other mechanism should be used to ensure that the plain text password is not sent or used in verifying the breach status of the password. If the password is breached, the application must require the user to set a new non-breached password. ([C6](https://owasp.org/www-project-proactive-controls/#div-numbering))", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "576-651" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.1.7", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.1.8:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that a password strength meter is provided to help users set a stronger password.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "604-025" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.1.8", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.1.9:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that there are no password composition rules limiting the type of characters permitted. There should be no requirement for upper or lower case or numbers or special characters.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "807-565" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.1.9", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.10.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that intra-service secrets do not rely on unchanging credentials such as passwords, API keys or shared accounts with privileged access.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "813-610" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.10.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.10.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that if passwords are required for service authentication, the service account used is not a default credential. (e.g. root/root or admin/admin are default in some services during installation).", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "065-183" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.10.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.10.3:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that passwords are stored with sufficient protection to prevent offline recovery attacks, including local system access.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "881-321" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.10.3", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.10.4:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify passwords, integrations with databases and third-party systems, seeds and internal secrets, and API keys are managed securely and not included in the source code or stored within source code repositories. Such storage SHOULD resist offline attacks. The use of a secure software key store (L1), hardware TPM, or an HSM (L3) is recommended for password storage.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "774-888" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.10.4", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.2.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that anti-automation controls are effective at mitigating breached credential testing, brute force, and account lockout attacks. Such controls include blocking the most common breached passwords, soft lockouts, rate limiting, CAPTCHA, ever increasing delays between attempts, IP address restrictions, or risk-based restrictions such as location, first login on a device, recent attempts to unlock the account, or similar. Verify that no more than 100 failed attempts per hour is possible on a single account.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "802-056" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.2.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.2.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the use of weak authenticators (such as SMS and email) is limited to secondary verification and transaction approval and not as a replacement for more secure authentication methods. Verify that stronger methods are offered before weak methods, users are aware of the risks, or that proper measures are in place to limit the risks of account compromise.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "354-752" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.2.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.2.3:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that secure notifications are sent to users after updates to authentication details, such as credential resets, email or address changes, logging in from unknown or risky locations. The use of push notifications - rather than SMS or email - is preferred, but in the absence of push notifications, SMS or email is acceptable as long as no sensitive information is disclosed in the notification.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "808-425" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.2.3", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.2.4:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify impersonation resistance against phishing, such as the use of multi-factor authentication, cryptographic devices with intent (such as connected keys with a push to authenticate), or at higher AAL levels, client-side certificates.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "333-858" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.2.4", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.2.5:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that where a Credential Service Provider (CSP) and the application verifying authentication are separated, mutually authenticated TLS is in place between the two endpoints.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "558-807" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.2.5", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.2.6:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify replay resistance through the mandated use of One-time Passwords (OTP) devices, cryptographic authenticators, or lookup codes.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "524-446" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.2.6", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.2.7:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify intent to authenticate by requiring the entry of an OTP token or user-initiated action such as a button press on a FIDO hardware key.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "525-361" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.2.7", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.3.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify system generated initial passwords or activation codes SHOULD be securely randomly generated, SHOULD be at least 6 characters long, and MAY contain letters and numbers, and expire after a short period of time. These initial secrets must not be permitted to become the long term password.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "622-835" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.3.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.3.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that enrollment and use of user-provided authentication devices are supported, such as a U2F or FIDO tokens.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "553-413" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.3.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.3.3:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that renewal instructions are sent with sufficient time to renew time bound authenticators.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "138-448" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.3.3", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.4.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that passwords are stored in a form that is resistant to offline attacks. Passwords SHALL be salted and hashed using an approved one-way key derivation or password hashing function. Key derivation and password hashing functions take a password, a salt, and a cost factor as inputs when generating a password hash.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "622-203" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.4.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.4.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the salt is at least 32 bits in length and be chosen arbitrarily to minimize salt value collisions among stored hashes. For each credential, a unique salt value and the resulting hash SHALL be stored.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "082-530" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.4.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.4.3:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that if PBKDF2 is used, the iteration count SHOULD be as large as verification server performance will allow, typically at least 100,000 iterations.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "767-435" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.4.3", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.4.4:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that if bcrypt is used, the work factor SHOULD be as large as verification server performance will allow, with a minimum of 10.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "078-427" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.4.4", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.4.5:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that an additional iteration of a key derivation function is performed, using a salt value that is secret and known only to the verifier. Generate the salt value using an approved random bit generator [SP 800-90Ar1] and provide at least the minimum security strength specified in the latest revision of SP 800-131A. The secret salt value SHALL be stored separately from the hashed passwords (e.g., in a specialized device like a hardware security module).", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "077-781" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.4.5", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.5.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that a system generated initial activation or recovery secret is not sent in clear text to the user.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "270-634" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.5.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.5.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify password hints or knowledge-based authentication (so-called \"secret questions\") are not present.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "772-358" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.5.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.5.3:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify password credential recovery does not reveal the current password in any way.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "543-621" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.5.3", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.5.4:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify shared or default accounts are not present (e.g. \"root\", \"admin\", or \"sa\").", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "623-347" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.5.4", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.5.5:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that if an authentication factor is changed or replaced, that the user is notified of this event.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "235-658" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.5.5", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.5.6:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify forgotten password, and other recovery paths use a secure recovery mechanism, such as time-based OTP (TOTP) or other soft token, mobile push, or another offline recovery mechanism.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "581-525" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.5.6", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.5.7:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that if OTP or multi-factor authentication factors are lost, that evidence of identity proofing is performed at the same level as during enrollment.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "358-860" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.5.7", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.6.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that lookup secrets can be used only once.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "101-217" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.6.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.6.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that lookup secrets have sufficient randomness (112 bits of entropy), or if less than 112 bits of entropy, salted with a unique and random 32-bit salt and hashed with an approved one-way hash.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "346-640" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.6.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.6.3:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that lookup secrets are resistant to offline attacks, such as predictable values.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "513-845" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.6.3", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.7.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that clear text out of band (NIST \"restricted\") authenticators, such as SMS or PSTN, are not offered by default, and stronger alternatives such as push notifications are offered first.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "354-753" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.7.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.7.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the out of band verifier expires out of band authentication requests, codes, or tokens after 10 minutes.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "816-631" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.7.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.7.3:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the out of band verifier authentication requests, codes, or tokens are only usable once, and only for the original authentication request.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "168-186" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.7.3", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.7.4:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the out of band authenticator and verifier communicates over a secure independent channel.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "102-811" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.7.4", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.7.5:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the out of band verifier retains only a hashed version of the authentication code.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "342-764" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.7.5", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.7.6:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the initial authentication code is generated by a secure random number generator, containing at least 20 bits of entropy (typically a six digital random number is sufficient).", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "206-254" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.7.6", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.8.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that time-based OTPs have a defined lifetime before expiring.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "681-823" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.8.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.8.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that symmetric keys used to verify submitted OTPs are highly protected, such as by using a hardware security module or secure operating system based key storage.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "543-428" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.8.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.8.3:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that approved cryptographic algorithms are used in the generation, seeding, and verification of OTPs.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "841-757" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.8.3", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.8.4:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that time-based OTP can be used only once within the validity period.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "404-126" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.8.4", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.8.5:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that if a time-based multi-factor OTP token is re-used during the validity period, it is logged and rejected with secure notifications being sent to the holder of the device.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "646-227" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.8.5", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.8.6:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify physical single-factor OTP generator can be revoked in case of theft or other loss. Ensure that revocation is immediately effective across logged in sessions, regardless of location.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "440-361" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.8.6", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.8.7:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that biometric authenticators are limited to use only as secondary factors in conjunction with either something you have and something you know.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "076-470" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.8.7", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.9.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that cryptographic keys used in verification are stored securely and protected against disclosure, such as using a Trusted Platform Module (TPM) or Hardware Security Module (HSM), or an OS service that can use this secure storage.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "783-255" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.9.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.9.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the challenge nonce is at least 64 bits in length, and statistically unique or unique over the lifetime of the cryptographic device.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "287-251" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.9.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V2.9.3:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that approved cryptographic algorithms are used in the generation, seeding, and verification.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "002-801" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V2.9.3", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V3.1.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify the application never reveals session tokens in URL parameters.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "402-133" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V3.1.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V3.2.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify the application generates a new session token on user authentication.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "002-630" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V3.2.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V3.2.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that session tokens possess at least 64 bits of entropy.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "704-530" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V3.2.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V3.2.3:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify the application only stores session tokens in the browser using secure methods such as appropriately secured cookies (see section 3.4) or HTML 5 session storage.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "455-358" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V3.2.3", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V3.2.4:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that session tokens are generated using approved cryptographic algorithms.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "727-043" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V3.2.4", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V3.3.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that logout and expiration invalidate the session token, such that the back button or a downstream relying party does not resume an authenticated session, including across relying parties.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "457-165" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V3.3.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V3.3.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "If authenticators permit users to remain logged in, verify that re-authentication occurs periodically both when actively used or after an idle period.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "065-782" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V3.3.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V3.3.3:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the application gives the option to terminate all other active sessions after a successful password change (including change via password reset/recovery), and that this is effective across the application, federated login (if present), and any relying parties.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "238-346" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V3.3.3", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V3.3.4:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that users are able to view and (having re-entered login credentials) log out of any or all currently active sessions and devices.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "673-736" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V3.3.4", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V3.4.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that cookie-based session tokens have the 'Secure' attribute set.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "688-081" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V3.4.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V3.4.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that cookie-based session tokens have the 'HttpOnly' attribute set.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "804-220" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V3.4.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V3.4.3:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that cookie-based session tokens utilize the 'SameSite' attribute to limit exposure to cross-site request forgery attacks.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "342-055" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V3.4.3", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V3.4.4:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that cookie-based session tokens use the \"__Host-\" prefix so cookies are only sent to the host that initially set the cookie.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "232-034" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V3.4.4", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V3.4.5:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that if the application is published under a domain name with other applications that set or use session cookies that might disclose the session cookies, set the path attribute in cookie-based session tokens using the most precise path possible.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "705-182" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V3.4.5", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V3.5.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify the application allows users to revoke OAuth tokens that form trust relationships with linked applications.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "551-400" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V3.5.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V3.5.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify the application uses session tokens rather than static API secrets and keys, except with legacy implementations.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "551-054" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V3.5.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V3.5.3:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that stateless session tokens use digital signatures, encryption, and other countermeasures to protect against tampering, enveloping, replay, null cipher, and key substitution attacks.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "483-883" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V3.5.3", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V3.6.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that Relying Parties (RPs) specify the maximum authentication time to Credential Service Providers (CSPs) and that CSPs re-authenticate the user if they haven't used a session within that period.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "618-403" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V3.6.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V3.6.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that Credential Service Providers (CSPs) inform Relying Parties (RPs) of the last authentication event, to allow RPs to determine if they need to re-authenticate the user.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "052-821" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V3.6.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V3.7.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify the application ensures a full, valid login session or requires re-authentication or secondary verification before allowing any sensitive transactions or account modifications.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "582-541" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V3.7.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V4.1.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the application enforces access control rules on a trusted service layer, especially if client-side access control is present and could be bypassed.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "650-560" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V4.1.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V4.1.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that all user and data attributes and policy information used by access controls cannot be manipulated by end users unless specifically authorized.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "524-603" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V4.1.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V4.1.3:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the principle of least privilege exists - users should only be able to access functions, data files, URLs, controllers, services, and other resources, for which they possess specific authorization. This implies protection against spoofing and elevation of privilege. ([C7](https://owasp.org/www-project-proactive-controls/#div-numbering))", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "368-633" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V4.1.3", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V4.1.5:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that access controls fail securely including when an exception occurs.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "166-151" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V4.1.5", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V4.2.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that sensitive data and APIs are protected against Insecure Direct Object Reference (IDOR) attacks targeting creation, reading, updating and deletion of records, such as creating or updating someone else's record, viewing everyone's records, or deleting all records.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "304-667" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V4.2.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V4.2.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the application or framework enforces a strong anti-CSRF mechanism to protect authenticated functionality, and effective anti-automation or anti-CSRF protects unauthenticated functionality.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "060-472" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V4.2.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V4.3.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify administrative interfaces use appropriate multi-factor authentication to prevent unauthorized use.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "201-246" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V4.3.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V4.3.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that directory browsing is disabled unless deliberately desired. Additionally, applications should not allow discovery or disclosure of file or directory metadata, such as Thumbs.db, .DS_Store, .git or .svn folders.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "615-744" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V4.3.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V4.3.3:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify the application has additional authorization (such as step up or adaptive authentication) for lower value systems, and / or segregation of duties for high value applications to enforce anti-fraud controls as per the risk of application and past fraud.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "284-521" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V4.3.3", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V5.1.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the application has defenses against HTTP parameter pollution attacks, particularly if the application framework makes no distinction about the source of request parameters (GET, POST, cookies, headers, or environment variables).", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "743-237" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V5.1.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V5.1.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that frameworks protect against mass parameter assignment attacks, or that the application has countermeasures to protect against unsafe parameter assignment, such as marking fields private or similar.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "042-550" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V5.1.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V5.1.3:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that all input (HTML form fields, REST requests, URL parameters, HTTP headers, cookies, batch files, RSS feeds, etc) is validated using positive validation (allow lists).", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "031-447" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V5.1.3", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V5.1.4:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that structured data is strongly typed and validated against a defined schema including allowed characters, length and pattern (e.g. credit card numbers, e-mail addresses, telephone numbers, or validating that two related fields are reasonable, such as checking that suburb and zip/postcode match).", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "653-242" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V5.1.4", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V5.1.5:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that URL redirects and forwards only allow destinations which appear on an allow list, or show a warning when redirecting to potentially untrusted content.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "232-217" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V5.1.5", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V5.2.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that all untrusted HTML input from WYSIWYG editors or similar is properly sanitized with an HTML sanitizer library or framework feature. ([C5](https://owasp.org/www-project-proactive-controls/#div-numbering))", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "542-445" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V5.2.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V5.2.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that unstructured data is sanitized to enforce safety measures such as allowed characters and length.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "538-446" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V5.2.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V5.2.3:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the application sanitizes user input before passing to mail systems to protect against SMTP or IMAP injection.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "881-434" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V5.2.3", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V5.2.4:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the application avoids the use of eval() or other dynamic code execution features. Where there is no alternative, any user input being included must be sanitized or sandboxed before being executed.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "317-743" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V5.2.4", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V5.2.5:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the application protects against template injection attacks by ensuring that any user input being included is sanitized or sandboxed.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "422-005" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V5.2.5", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V5.2.6:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the application protects against SSRF attacks, by validating or sanitizing untrusted data or HTTP file metadata, such as filenames and URL input fields, and uses allow lists of protocols, domains, paths and ports.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "657-084" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V5.2.6", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V5.2.7:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the application sanitizes, disables, or sandboxes user-supplied Scalable Vector Graphics (SVG) scriptable content, especially as they relate to XSS resulting from inline scripts, and foreignObject.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "145-310" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V5.2.7", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V5.2.8:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the application sanitizes, disables, or sandboxes user-supplied scriptable or expression template language content, such as Markdown, CSS or XSL stylesheets, BBCode, or similar.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "646-462" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V5.2.8", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V5.3.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that output encoding is relevant for the interpreter and context required. For example, use encoders specifically for HTML values, HTML attributes, JavaScript, URL parameters, HTTP headers, SMTP, and others as the context requires, especially from untrusted inputs (e.g. names with Unicode or apostrophes, such as „Å≠„Åì or O'Hara).", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "620-101" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V5.3.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V5.3.10:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the application protects against XPath injection or XML injection attacks.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "134-207" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V5.3.10", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V5.3.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that output encoding preserves the user's chosen character set and locale, such that any Unicode character point is valid and safely handled.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "533-516" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V5.3.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V5.3.3:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that context-aware, preferably automated - or at worst, manual - output escaping protects against reflected, stored, and DOM based XSS.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "366-835" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V5.3.3", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V5.3.4:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that data selection or database queries (e.g. SQL, HQL, ORM, NoSQL) use parameterized queries, ORMs, entity frameworks, or are otherwise protected from database injection attacks.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "732-873" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V5.3.4", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V5.3.5:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that where parameterized or safer mechanisms are not present, context-specific output encoding is used to protect against injection attacks, such as the use of SQL escaping to protect against SQL injection.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "064-808" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V5.3.5", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V5.3.6:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the application protects against JSON injection attacks, JSON eval attacks, and JavaScript expression evaluation.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "607-671" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V5.3.6", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V5.3.7:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the application protects against LDAP injection vulnerabilities, or that specific security controls to prevent LDAP injection have been implemented.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "531-558" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V5.3.7", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V5.3.8:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the application protects against OS command injection and that operating system calls use parameterized OS queries or use contextual command line output encoding.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "857-718" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V5.3.8", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V5.3.9:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the application protects against Local File Inclusion (LFI) or Remote File Inclusion (RFI) attacks.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "547-283" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V5.3.9", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V5.4.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the application uses memory-safe string, safer memory copy and pointer arithmetic to detect or prevent stack, buffer, or heap overflows.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "831-570" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V5.4.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V5.4.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that format strings do not take potentially hostile input, and are constant.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "824-732" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V5.4.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V5.4.3:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that sign, range, and input validation techniques are used to prevent integer overflows.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "482-771" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V5.4.3", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V5.5.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that serialized objects use integrity checks or are encrypted to prevent hostile object creation or data tampering.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "762-616" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V5.5.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V5.5.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the application correctly restricts XML parsers to only use the most restrictive configuration possible and to ensure that unsafe features such as resolving external entities are disabled to prevent XML eXternal Entity (XXE) attacks.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "764-507" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V5.5.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V5.5.3:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that deserialization of untrusted data is avoided or is protected in both custom code and third-party libraries (such as JSON, XML and YAML parsers).", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "831-563" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V5.5.3", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V5.5.4:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that when parsing JSON in browsers or JavaScript-based backends, JSON.parse is used to parse the JSON document. Do not use eval() to parse JSON.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "387-848" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V5.5.4", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V6.1.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that regulated private data is stored encrypted while at rest, such as Personally Identifiable Information (PII), sensitive personal information, or data assessed likely to be subject to EU's GDPR.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "482-866" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V6.1.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V6.1.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that regulated health data is stored encrypted while at rest, such as medical records, medical device details, or de-anonymized research records.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "224-321" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V6.1.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V6.1.3:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that regulated financial data is stored encrypted while at rest, such as financial accounts, defaults or credit history, tax records, pay history, beneficiaries, or de-anonymized market or research records.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "267-468" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V6.1.3", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V6.2.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that all cryptographic modules fail securely, and errors are handled in a way that does not enable Padding Oracle attacks.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "036-810" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V6.2.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V6.2.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that industry proven or government approved cryptographic algorithms, modes, and libraries are used, instead of custom coded cryptography.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "742-431" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V6.2.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V6.2.3:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that encryption initialization vector, cipher configuration, and block modes are configured securely using the latest advice.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "674-425" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V6.2.3", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V6.2.4:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that random number, encryption or hashing algorithms, key lengths, rounds, ciphers or modes, can be reconfigured, upgraded, or swapped at any time, to protect against cryptographic breaks. ([C8](https://owasp.org/www-project-proactive-controls/#div-numbering))", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "122-287" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V6.2.4", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V6.2.5:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that known insecure block modes (i.e. ECB, etc.), padding modes (i.e. PKCS#1 v1.5, etc.), ciphers with small block sizes (i.e. Triple-DES, Blowfish, etc.), and weak hashing algorithms (i.e. MD5, SHA1, etc.) are not used unless required for backwards compatibility.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "441-132" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V6.2.5", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V6.2.6:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that nonces, initialization vectors, and other single use numbers must not be used more than once with a given encryption key. The method of generation must be appropriate for the algorithm being used.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "433-122" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V6.2.6", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V6.2.7:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that encrypted data is authenticated via signatures, authenticated cipher modes, or HMAC to ensure that ciphertext is not altered by an unauthorized party.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "786-224" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V6.2.7", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V6.2.8:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that all cryptographic operations are constant-time, with no 'short-circuit' operations in comparisons, calculations, or returns, to avoid leaking information.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "878-880" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V6.2.8", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V6.3.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that all random numbers, random file names, random GUIDs, and random strings are generated using the cryptographic module's approved cryptographically secure random number generator when these random values are intended to be not guessable by an attacker.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "542-488" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V6.3.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V6.3.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that random GUIDs are created using the GUID v4 algorithm, and a Cryptographically-secure Pseudo-random Number Generator (CSPRNG). GUIDs created using other pseudo-random number generators may be predictable.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "027-210" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V6.3.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V6.3.3:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that random numbers are created with proper entropy even when the application is under heavy load, or that the application degrades gracefully in such circumstances.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "664-571" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V6.3.3", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V6.4.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that a secrets management solution such as a key vault is used to securely create, store, control access to and destroy secrets.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "340-375" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V6.4.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V6.4.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that key material is not exposed to the application but instead uses an isolated security module like a vault for cryptographic operations.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "032-213" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V6.4.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V7.1.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the application does not log credentials or payment details. Session tokens should only be stored in logs in an irreversible, hashed form.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "067-050" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V7.1.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V7.1.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the application does not log other sensitive data as defined under local privacy laws or relevant security policy.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "240-274" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V7.1.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V7.1.3:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that the application logs security relevant events including successful and failed authentication events, access control failures, deserialization failures and input validation failures.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "184-284" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V7.1.3", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V7.1.4:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that each log event includes necessary information that would allow for a detailed investigation of the timeline when an event happens.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "555-048" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V7.1.4", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V7.2.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that all authentication decisions are logged, without storing sensitive session tokens or passwords. This should include requests with relevant metadata needed for security investigations.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "841-710" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V7.2.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V7.2.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that all access control decisions can be logged and all failed decisions are logged. This should include requests with relevant metadata needed for security investigations.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "443-447" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V7.2.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V7.3.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that all logging components appropriately encode data to prevent log injection.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "048-612" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V7.3.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V7.3.3:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that security logs are protected from unauthorized access and modification.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "713-683" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V7.3.3", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V7.3.4:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that time sources are synchronized to the correct time and time zone. Strongly consider logging only in UTC if systems are global to assist with post-incident forensic analysis.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "770-361" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V7.3.4", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V7.4.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that a generic message is shown when an unexpected or security sensitive error occurs, potentially with a unique ID which support personnel can use to investigate.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "612-435" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V7.4.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V7.4.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that exception handling (or a functional equivalent) is used across the codebase to account for expected and unexpected error conditions.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "863-636" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V7.4.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V7.4.3:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that a \"last resort\" error handler is defined which will catch all unhandled exceptions.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "118-602" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V7.4.3", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V8.1.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify the application protects sensitive data from being cached in server components such as load balancers and application caches.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "846-302" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V8.1.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V8.1.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that all cached or temporary copies of sensitive data stored on the server are protected from unauthorized access or purged/invalidated after the authorized user accesses the sensitive data.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "157-430" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V8.1.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V8.1.3:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify the application minimizes the number of parameters in a request, such as hidden fields, Ajax variables, cookies and header values.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "217-112" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V8.1.3", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V8.1.4:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify the application can detect and alert on abnormal numbers of requests, such as by IP, user, total per hour or day, or whatever makes sense for the application.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "176-154" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V8.1.4", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V8.1.5:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that regular backups of important data are performed and that test restoration of data is performed.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "257-117" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V8.1.5", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V8.1.6:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that backups are stored securely to prevent data from being stolen or corrupted.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "614-353" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V8.1.6", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V8.2.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify the application sets sufficient anti-caching headers so that sensitive data is not cached in modern browsers.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "473-758" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V8.2.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V8.2.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that data stored in browser storage (such as localStorage, sessionStorage, IndexedDB, or cookies) does not contain sensitive data.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "617-524" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V8.2.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V8.2.3:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that authenticated data is cleared from client storage, such as the browser DOM, after the client or session is terminated.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "046-257" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V8.2.3", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V8.3.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that sensitive data is sent to the server in the HTTP message body or headers, and that query string parameters from any HTTP verb do not contain sensitive data.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "186-540" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V8.3.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V8.3.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that users have a method to remove or export their data on demand.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "762-451" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V8.3.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V8.3.3:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that users are provided clear language regarding collection and use of supplied personal information and that users have provided opt-in consent for the use of that data before it is used in any way.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "082-327" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V8.3.3", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V8.3.4:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that all sensitive data created and processed by the application has been identified, and ensure that a policy is in place on how to deal with sensitive data.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "227-045" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V8.3.4", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V8.3.5:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify accessing sensitive data is audited (without logging the sensitive data itself), if the data is collected under relevant data protection directives or where logging of access is required.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "015-063" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V8.3.5", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V8.3.6:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that sensitive information contained in memory is overwritten as soon as it is no longer required to mitigate memory dumping attacks, using zeroes or random data.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "715-304" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V8.3.6", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V8.3.7:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that sensitive or private information that is required to be encrypted, is encrypted using approved algorithms that provide both confidentiality and integrity.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "504-340" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V8.3.7", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V8.3.8:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that sensitive personal information is subject to data retention classification, such that old or out of date data is deleted automatically, on a schedule, or as the situation requires.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "268-272" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V8.3.8", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V9.1.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that TLS is used for all client connectivity, and does not fall back to insecure or unencrypted communications.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "745-045" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V9.1.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V9.1.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify using up to date TLS testing tools that only strong cipher suites are enabled, with the strongest cipher suites set as preferred.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "767-701" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V9.1.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V9.1.3:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that only the latest recommended versions of the TLS protocol are enabled, such as TLS 1.2 and TLS 1.3. The latest version of the TLS protocol should be the preferred option.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "248-646" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V9.1.3", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V9.2.1:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that connections to and from the server use trusted TLS certificates. Where internally generated or self-signed certificates are used, the server must be configured to only trust specific internal CAs and specific self-signed certificates. All others should be rejected.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "430-636" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V9.2.1", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V9.2.2:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that encrypted communications such as TLS is used for all inbound and outbound connections, including for management ports, monitoring, authentication, API, or web service calls, database, cloud, serverless, mainframe, external, and partner connections. The server must not fall back to insecure or unencrypted protocols.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "636-854" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V9.2.2", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V9.2.3:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that all encrypted connections to external systems that involve sensitive information or functions are authenticated.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "605-735" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V9.2.3", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V9.2.4:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that proper certification revocation, such as Online Certificate Status Protocol (OCSP) Stapling, is enabled and configured.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "537-367" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V9.2.4", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:asvs:V9.2.5:positive", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Verify that backend TLS connection failures are logged.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "668-364" + ] + }, + "provenance": { + "standard_version": "4.0", + "section_path": "V9.2.5", + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)" + } + }, + { + "id": "gold:cwe:1004:positive_multi", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Sensitive Cookie Without 'HttpOnly' Flag", + "source_standard": "OTHER" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "284-521", + "804-220" + ] + }, + "provenance": { + "section_path": "1004", + "ground_truth_source": "OpenCRE DB mapping (multi-CRE node from CWE)" + } + }, + { + "id": "gold:cwe:1021:positive_multi", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Improper Restriction of Rendered UI Layers or Frames", + "source_standard": "OTHER" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "257-668", + "480-071" + ] + }, + "provenance": { + "section_path": "1021", + "ground_truth_source": "OpenCRE DB mapping (multi-CRE node from CWE)" + } + }, + { + "id": "gold:cwe:1053:positive_multi", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Missing Documentation for Design", + "source_standard": "OTHER" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "068-102", + "162-655", + "820-878" + ] + }, + "provenance": { + "section_path": "1053", + "ground_truth_source": "OpenCRE DB mapping (multi-CRE node from CWE)" + } + }, + { + "id": "gold:cwe:1059:positive_multi", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Insufficient Technical Documentation", + "source_standard": "OTHER" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "068-102", + "162-655", + "820-878" + ] + }, + "provenance": { + "section_path": "1059", + "ground_truth_source": "OpenCRE DB mapping (multi-CRE node from CWE)" + } + }, + { + "id": "gold:cwe:1110:positive_multi", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Incomplete Design Documentation", + "source_standard": "OTHER" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "068-102", + "162-655", + "820-878", + "822-100" + ] + }, + "provenance": { + "section_path": "1110", + "ground_truth_source": "OpenCRE DB mapping (multi-CRE node from CWE)" + } + }, + { + "id": "gold:cwe:1111:positive_multi", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Incomplete I/O Documentation", + "source_standard": "OTHER" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "068-102", + "162-655", + "820-878" + ] + }, + "provenance": { + "section_path": "1111", + "ground_truth_source": "OpenCRE DB mapping (multi-CRE node from CWE)" + } + }, + { + "id": "gold:cwe:1112:positive_multi", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Incomplete Documentation of Program Execution", + "source_standard": "OTHER" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "068-102", + "162-655", + "820-878" + ] + }, + "provenance": { + "section_path": "1112", + "ground_truth_source": "OpenCRE DB mapping (multi-CRE node from CWE)" + } + }, + { + "id": "gold:cwe:1118:positive_multi", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Insufficient Documentation of Error Handling Techniques", + "source_standard": "OTHER" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "068-102", + "162-655", + "820-878" + ] + }, + "provenance": { + "section_path": "1118", + "ground_truth_source": "OpenCRE DB mapping (multi-CRE node from CWE)" + } + }, + { + "id": "gold:cwe:1173:positive_multi", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Improper Use of Validation Framework", + "source_standard": "OTHER" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "031-447", + "146-706", + "611-051", + "653-242" + ] + }, + "provenance": { + "section_path": "1173", + "ground_truth_source": "OpenCRE DB mapping (multi-CRE node from CWE)" + } + }, + { + "id": "gold:cwe:1174:positive_multi", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "ASP.NET Misconfiguration: Improper Model Validation", + "source_standard": "OTHER" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "031-447", + "146-706", + "611-051", + "653-242" + ] + }, + "provenance": { + "section_path": "1174", + "ground_truth_source": "OpenCRE DB mapping (multi-CRE node from CWE)" + } + }, + { + "id": "gold:cwe:1191:positive_multi", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "On-Chip Debug and Test Interface With Improper Access Control", + "source_standard": "OTHER" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "117-371", + "757-271", + "801-310" + ] + }, + "provenance": { + "section_path": "1191", + "ground_truth_source": "OpenCRE DB mapping (multi-CRE node from CWE)" + } + }, + { + "id": "gold:cwe:120:positive_multi", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Buffer Copy without Checking Size of Input ('Classic Buffer Overflow')", + "source_standard": "OTHER" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "314-131", + "831-570" + ] + }, + "provenance": { + "section_path": "120", + "ground_truth_source": "OpenCRE DB mapping (multi-CRE node from CWE)" + } + }, + { + "id": "gold:cwe:1204:positive_multi", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Generation of Weak Initialization Vector (IV)", + "source_standard": "OTHER" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "206-254", + "287-251", + "346-640", + "622-835" + ] + }, + "provenance": { + "section_path": "1204", + "ground_truth_source": "OpenCRE DB mapping (multi-CRE node from CWE)" + } + }, + { + "id": "gold:cwe:1220:positive_multi", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Insufficient Granularity of Access Control", + "source_standard": "OTHER" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "117-371", + "757-271", + "801-310" + ] + }, + "provenance": { + "section_path": "1220", + "ground_truth_source": "OpenCRE DB mapping (multi-CRE node from CWE)" + } + }, + { + "id": "gold:cwe:1222:positive_multi", + "schema_version": "0.1.0", + "slice": "positive", + "input": { + "text": "Insufficient Granularity of Address Regions Protected by Register Locks", + "source_standard": "OTHER" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "117-371", + "757-271", + "801-310" + ] + }, + "provenance": { + "section_path": "1222", + "ground_truth_source": "OpenCRE DB mapping (multi-CRE node from CWE)" + } + }, + { + "id": "gold:asvs:V1.1.3:hard_negative", + "schema_version": "0.1.0", + "slice": "hard_negative", + "input": { + "text": "Verify that all user stories and features contain functional security constraints, such as \"As a user, I should be able to view and edit my profile. I should not be able to view or edit anyone else's profile\"", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "822-100" + ] + }, + "provenance": { + "section_path": "V1.1.3", + "ground_truth_source": "OpenCRE DB mapping; negation phrasing (cross-encoder must beat cosine without losing the correct CRE)" + } + }, + { + "id": "gold:asvs:V1.11.2:hard_negative", + "schema_version": "0.1.0", + "slice": "hard_negative", + "input": { + "text": "Verify that all high-value business logic flows, including authentication, session management and access control, do not share unsynchronized state.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "670-660" + ] + }, + "provenance": { + "section_path": "V1.11.2", + "ground_truth_source": "OpenCRE DB mapping; negation phrasing (cross-encoder must beat cosine without losing the correct CRE)" + } + }, + { + "id": "gold:asvs:V1.14.6:hard_negative", + "schema_version": "0.1.0", + "slice": "hard_negative", + "input": { + "text": "Verify the application does not use unsupported, insecure, or deprecated client-side technologies such as NSAPI plugins, Flash, Shockwave, ActiveX, Silverlight, NACL, or client-side Java applets.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "673-475" + ] + }, + "provenance": { + "section_path": "V1.14.6", + "ground_truth_source": "OpenCRE DB mapping; negation phrasing (cross-encoder must beat cosine without losing the correct CRE)" + } + }, + { + "id": "gold:asvs:V10.2.1:hard_negative", + "schema_version": "0.1.0", + "slice": "hard_negative", + "input": { + "text": "Verify that the application source code and third party libraries do not contain unauthorized phone home or data collection capabilities. Where such functionality exists, obtain the user's permission for it to operate before collecting any data.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "834-645" + ] + }, + "provenance": { + "section_path": "V10.2.1", + "ground_truth_source": "OpenCRE DB mapping; negation phrasing (cross-encoder must beat cosine without losing the correct CRE)" + } + }, + { + "id": "gold:asvs:V10.2.2:hard_negative", + "schema_version": "0.1.0", + "slice": "hard_negative", + "input": { + "text": "Verify that the application does not ask for unnecessary or excessive permissions to privacy related features or sensors, such as contacts, cameras, microphones, or location.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "540-566" + ] + }, + "provenance": { + "section_path": "V10.2.2", + "ground_truth_source": "OpenCRE DB mapping; negation phrasing (cross-encoder must beat cosine without losing the correct CRE)" + } + }, + { + "id": "gold:asvs:V10.2.3:hard_negative", + "schema_version": "0.1.0", + "slice": "hard_negative", + "input": { + "text": "Verify that the application source code and third party libraries do not contain back doors, such as hard-coded or additional undocumented accounts or keys, code obfuscation, undocumented binary blobs, rootkits, or anti-debugging, insecure debugging features, or otherwise out of date, insecure, or hidden functionality that could be used maliciously if discovered.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "838-636" + ] + }, + "provenance": { + "section_path": "V10.2.3", + "ground_truth_source": "OpenCRE DB mapping; negation phrasing (cross-encoder must beat cosine without losing the correct CRE)" + } + }, + { + "id": "gold:asvs:V10.2.4:hard_negative", + "schema_version": "0.1.0", + "slice": "hard_negative", + "input": { + "text": "Verify that the application source code and third party libraries do not contain time bombs by searching for date and time related functions.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "418-525" + ] + }, + "provenance": { + "section_path": "V10.2.4", + "ground_truth_source": "OpenCRE DB mapping; negation phrasing (cross-encoder must beat cosine without losing the correct CRE)" + } + }, + { + "id": "gold:asvs:V10.2.5:hard_negative", + "schema_version": "0.1.0", + "slice": "hard_negative", + "input": { + "text": "Verify that the application source code and third party libraries do not contain malicious code, such as salami attacks, logic bypasses, or logic bombs.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "265-800" + ] + }, + "provenance": { + "section_path": "V10.2.5", + "ground_truth_source": "OpenCRE DB mapping; negation phrasing (cross-encoder must beat cosine without losing the correct CRE)" + } + }, + { + "id": "gold:asvs:V10.2.6:hard_negative", + "schema_version": "0.1.0", + "slice": "hard_negative", + "input": { + "text": "Verify that the application source code and third party libraries do not contain Easter eggs or any other potentially unwanted functionality.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "154-031" + ] + }, + "provenance": { + "section_path": "V10.2.6", + "ground_truth_source": "OpenCRE DB mapping; negation phrasing (cross-encoder must beat cosine without losing the correct CRE)" + } + }, + { + "id": "gold:asvs:V11.1.6:hard_negative", + "schema_version": "0.1.0", + "slice": "hard_negative", + "input": { + "text": "Verify that the application does not suffer from \"Time Of Check to Time Of Use\" (TOCTOU) issues or other race conditions for sensitive operations.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "134-412" + ] + }, + "provenance": { + "section_path": "V11.1.6", + "ground_truth_source": "OpenCRE DB mapping; negation phrasing (cross-encoder must beat cosine without losing the correct CRE)" + } + }, + { + "id": "gold:asvs:V12.3.6:hard_negative", + "schema_version": "0.1.0", + "slice": "hard_negative", + "input": { + "text": "Verify that the application does not include and execute functionality from untrusted sources, such as unverified content distribution networks, JavaScript libraries, node npm libraries, or server-side DLLs.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "777-470" + ] + }, + "provenance": { + "section_path": "V12.3.6", + "ground_truth_source": "OpenCRE DB mapping; negation phrasing (cross-encoder must beat cosine without losing the correct CRE)" + } + }, + { + "id": "gold:asvs:V13.1.3:hard_negative", + "schema_version": "0.1.0", + "slice": "hard_negative", + "input": { + "text": "Verify API URLs do not expose sensitive information, such as the API key, session tokens etc.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "333-888" + ] + }, + "provenance": { + "section_path": "V13.1.3", + "ground_truth_source": "OpenCRE DB mapping; negation phrasing (cross-encoder must beat cosine without losing the correct CRE)" + } + }, + { + "id": "gold:update:V2.1.1", + "schema_version": "0.1.0", + "slice": "update", + "input": { + "text": "Verify that user-set passwords are at least 12 characters in length after removing leading and trailing spaces.", + "prior_text": "Verify that user-set passwords are at least 12 characters in length.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "027-555" + ], + "is_update": true + }, + "provenance": { + "section_path": "V2.1.1", + "ground_truth_source": "synthesised before/after wording of the real ASVS requirement; ground-truth CRE from the OpenCRE DB" + } + }, + { + "id": "gold:update:V3.4.1", + "schema_version": "0.1.0", + "slice": "update", + "input": { + "text": "Verify that cookie-based session tokens have the Secure and SameSite attributes set.", + "prior_text": "Verify that cookie-based session tokens have the Secure attribute set.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "688-081" + ], + "is_update": true + }, + "provenance": { + "section_path": "V3.4.1", + "ground_truth_source": "synthesised before/after wording of the real ASVS requirement; ground-truth CRE from the OpenCRE DB" + } + }, + { + "id": "gold:update:V2.2.1", + "schema_version": "0.1.0", + "slice": "update", + "input": { + "text": "Verify that anti-automation controls are effective at mitigating breached credential testing, brute force, account lockout, and credential stuffing attacks.", + "prior_text": "Verify that anti-automation controls are effective at mitigating breached credential testing, brute force, and account lockout attacks.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "802-056" + ], + "is_update": true + }, + "provenance": { + "section_path": "V2.2.1", + "ground_truth_source": "synthesised before/after wording of the real ASVS requirement; ground-truth CRE from the OpenCRE DB" + } + }, + { + "id": "gold:update:V4.1.1", + "schema_version": "0.1.0", + "slice": "update", + "input": { + "text": "Verify that the application enforces access control rules on a trusted service layer, with mandatory denials on missing context.", + "prior_text": "Verify that the application enforces access control rules on a trusted service layer.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "650-560" + ], + "is_update": true + }, + "provenance": { + "section_path": "V4.1.1", + "ground_truth_source": "synthesised before/after wording of the real ASVS requirement; ground-truth CRE from the OpenCRE DB" + } + }, + { + "id": "gold:update:V8.3.1", + "schema_version": "0.1.0", + "slice": "update", + "input": { + "text": "Verify that sensitive data is sent to the server in the HTTP message body or headers, and that any URL-borne parameters do not contain sensitive data.", + "prior_text": "Verify that sensitive data is sent to the server in the HTTP message body or headers.", + "source_standard": "ASVS" + }, + "expected": { + "decision": "linked", + "cre_ids": [ + "186-540" + ], + "is_update": true + }, + "provenance": { + "section_path": "V8.3.1", + "ground_truth_source": "synthesised before/after wording of the real ASVS requirement; ground-truth CRE from the OpenCRE DB" + } + }, + { + "id": "gold:ambiguous:sdlc", + "schema_version": "0.1.0", + "slice": "ambiguous", + "input": { + "text": "Verify the use of a secure software development lifecycle that addresses security in all stages of development.", + "title_hint": "Secure SDLC" + }, + "expected": { + "decision": "review", + "reason_code": "BELOW_THRESHOLD" + }, + "provenance": { + "ground_truth_source": "manually synthesised broad statement that should route to human review (no single clear CRE target)" + } + }, + { + "id": "gold:ambiguous:culture", + "schema_version": "0.1.0", + "slice": "ambiguous", + "input": { + "text": "Security is everyone's responsibility and should be considered throughout the organization." + }, + "expected": { + "decision": "review", + "reason_code": "NO_CANDIDATES" + }, + "provenance": { + "ground_truth_source": "manually synthesised broad statement that should route to human review (no single clear CRE target)" + } + }, + { + "id": "gold:ambiguous:governance", + "schema_version": "0.1.0", + "slice": "ambiguous", + "input": { + "text": "Verify that the organization has a documented information security policy approved by senior management.", + "title_hint": "Governance" + }, + "expected": { + "decision": "review", + "reason_code": "BELOW_THRESHOLD" + }, + "provenance": { + "ground_truth_source": "manually synthesised broad statement that should route to human review (no single clear CRE target)" + } + }, + { + "id": "gold:ambiguous:training", + "schema_version": "0.1.0", + "slice": "ambiguous", + "input": { + "text": "Verify that developers receive security training appropriate to their role and responsibilities." + }, + "expected": { + "decision": "review", + "reason_code": "BELOW_THRESHOLD" + }, + "provenance": { + "ground_truth_source": "manually synthesised broad statement that should route to human review (no single clear CRE target)" + } + }, + { + "id": "gold:ambiguous:risk", + "schema_version": "0.1.0", + "slice": "ambiguous", + "input": { + "text": "Risk assessment processes are followed at every major design decision." + }, + "expected": { + "decision": "review", + "reason_code": "BELOW_THRESHOLD" + }, + "provenance": { + "ground_truth_source": "manually synthesised broad statement that should route to human review (no single clear CRE target)" + } + } +] diff --git a/application/tests/librarian/fixtures/golden_dataset.schema.json b/application/tests/librarian/fixtures/golden_dataset.schema.json new file mode 100644 index 000000000..7edb4de2d --- /dev/null +++ b/application/tests/librarian/fixtures/golden_dataset.schema.json @@ -0,0 +1,85 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://opencre.org/schemas/oie/golden-dataset-row.json", + "title": "GoldenDatasetRow", + "description": "One labelled test case for the Module C regression harness. The dataset is an array of these. schema_version 0.1.0 (Module C internal — NOT a pipeline wire contract).", + "type": "object", + "additionalProperties": false, + "required": ["id", "schema_version", "slice", "input", "expected", "provenance"], + "properties": { + "id": { "type": "string", "minLength": 1, "description": "Stable golden-case id, e.g. gold:asvs:V2.1.1:explicit" }, + "schema_version": { "type": "string", "pattern": "^0\\.\\d+\\.\\d+$" }, + "slice": { + "type": "string", + "enum": ["explicit", "positive", "hard_negative", "update", "ambiguous"] + }, + "input": { + "type": "object", + "additionalProperties": false, + "required": ["text"], + "properties": { + "text": { "type": "string", "minLength": 1, "description": "Section text under test (same role as KnowledgeItem.content.text)" }, + "title_hint": { "type": "string" }, + "explicit_cre_ref": { "type": "string", "description": "Required for slice=explicit: the CRE id literally present in the source" }, + "prior_text": { "type": "string", "description": "Required for slice=update: the previously mapped version of this content" }, + "source_standard": { + "type": "string", + "enum": ["ASVS", "WSTG", "NIST_800_53", "PCI_DSS", "OWASP_CHEATSHEET", "OTHER"] + } + } + }, + "expected": { + "type": "object", + "additionalProperties": false, + "required": ["decision"], + "properties": { + "decision": { "type": "string", "enum": ["linked", "review"] }, + "cre_ids": { + "type": "array", + "items": { "type": "string", "minLength": 1 }, + "description": "Ground-truth CRE target(s). May be >1 (multi-link)." + }, + "reason_code": { + "type": "string", + "enum": ["BELOW_THRESHOLD", "NO_CANDIDATES", "ADVERSARIAL_FLAG", "UPDATE_AMBIGUOUS"], + "description": "Required when decision=review" + }, + "is_update": { "type": "boolean", "description": "Used by slice=update" } + } + }, + "provenance": { + "type": "object", + "additionalProperties": false, + "required": ["ground_truth_source"], + "properties": { + "standard_version": { "type": "string" }, + "section_path": { "type": "string", "description": "e.g. V2.1.1" }, + "ground_truth_source": { "type": "string", "description": "How the label was established, e.g. 'existing OpenCRE DB mapping' or 'manual, reviewer=spyros'" } + } + }, + "notes": { "type": "string" } + }, + "allOf": [ + { + "if": { "properties": { "slice": { "const": "explicit" } } }, + "then": { "properties": { "input": { "required": ["text", "explicit_cre_ref"] } } } + }, + { + "if": { "properties": { "slice": { "const": "update" } } }, + "then": { + "properties": { + "input": { "required": ["text", "prior_text"] }, + "expected": { "required": ["decision", "is_update"] } + } + } + }, + { + "if": { "properties": { "expected": { "properties": { "decision": { "const": "review" } } } } }, + "then": { "properties": { "expected": { "required": ["decision", "reason_code"] } } } + }, + { + "if": { "properties": { "expected": { "properties": { "decision": { "const": "linked" } } } } }, + "then": { "properties": { "expected": { "required": ["decision", "cre_ids"] } } } + } + ] +} diff --git a/application/tests/librarian/fixtures/sample_knowledge_queue.jsonl b/application/tests/librarian/fixtures/sample_knowledge_queue.jsonl new file mode 100644 index 000000000..de82c456a --- /dev/null +++ b/application/tests/librarian/fixtures/sample_knowledge_queue.jsonl @@ -0,0 +1,3 @@ +{"id":"4a8c1b2e-1d2f-4e3a-9b4c-5d6e7f8a9b0c","source_repo":"OWASP/ASVS","source_path":"4.0/en/0x11-V2-Authentication.md","source_commit_sha":"abc123def456789012345678901234567890abcd","text":"Verify that user-set passwords are at least 12 characters in length.","confidence":0.93,"llm_label":"KNOWLEDGE","llm_reasoning":"clear security requirement on password length","created_at":"2026-05-25T02:25:00Z","consumed_at":null} +{"id":"5b9d2c3f-2e3a-4f4b-ac5d-6e7f8a9b0c1d","source_repo":"OWASP/ASVS","source_path":"4.0/en/0x11-V2-Authentication.md","source_commit_sha":"abc123def456789012345678901234567890abcd","text":"Do NOT use MD5 for password hashing; it is cryptographically broken for this purpose.","confidence":0.88,"llm_label":"KNOWLEDGE","llm_reasoning":"explicit security guidance against a deprecated primitive","created_at":"2026-05-25T02:25:01Z","consumed_at":null} +{"id":"6cae3d40-3f4b-4a5c-bd6e-7f8a9b0c1d2e","source_repo":"OWASP/wstg","source_path":"document/4-Web_Application_Security_Testing/05-Authentication_Testing.md","source_commit_sha":"def78901234567890123456789012345678901ab","text":"Testing for weak lockout mechanisms: confirm the application locks accounts after a defined number of failed login attempts.","confidence":0.81,"llm_label":"KNOWLEDGE","llm_reasoning":"testing methodology for anti-automation","created_at":"2026-05-25T02:25:02Z","consumed_at":null} diff --git a/application/tests/librarian/hub_firewall_test.py b/application/tests/librarian/hub_firewall_test.py new file mode 100644 index 000000000..8858d98f6 --- /dev/null +++ b/application/tests/librarian/hub_firewall_test.py @@ -0,0 +1,41 @@ +import unittest + +from application.utils.librarian.hub_firewall import HubRep, firewall, leaks + + +class TestHubFirewall(unittest.TestCase): + def setUp(self): + self.row = ( + "Verify that user-set passwords are at least 12 characters in length." + ) + self.hub = [ + HubRep("764-507", "Some CRE text. " + self.row + " More context."), + HubRep("311-369", "Session tokens must have the Secure attribute set."), + ] + + def test_test_row_text_leaks_before_firewall(self): + self.assertTrue(leaks(self.row, self.hub)) + + def test_firewall_removes_leaking_rep_only(self): + cleaned = firewall(self.row, self.hub) + self.assertEqual([r.cre_id for r in cleaned], ["311-369"]) + # The whole point: the row's text is absent from the hub afterwards. + self.assertFalse(leaks(self.row, cleaned)) + + def test_firewall_is_whitespace_and_case_insensitive(self): + hub = [ + HubRep( + "1-2", + "VERIFY THAT user-set passwords are at least 12 characters in length.", + ) + ] + self.assertTrue(leaks(self.row, hub)) + self.assertEqual(firewall(self.row, hub), []) + + def test_empty_row_is_a_noop(self): + self.assertFalse(leaks("", self.hub)) + self.assertEqual(firewall("", self.hub), self.hub) + + +if __name__ == "__main__": + unittest.main() diff --git a/application/tests/librarian/schemas_test.py b/application/tests/librarian/schemas_test.py new file mode 100644 index 000000000..6a5a63b1a --- /dev/null +++ b/application/tests/librarian/schemas_test.py @@ -0,0 +1,384 @@ +"""Tests for Module C contracts. + +The point of these tests is to enforce the RFC #734 contract — every Pydantic +model in ``schemas.py`` is dumped to JSON and validated against the **vendored +canonical JSON Schema** under ``application/utils/librarian/_rfc_schemas/``. +If the upstream schema and the Pydantic model ever drift, this fails. +""" + +import json +import os +import unittest + +import jsonschema +from pydantic import ValidationError +from referencing import Registry, Resource +from referencing.jsonschema import DRAFT202012 + +from application.utils.librarian.schemas import ( + SCHEMA_VERSION, + CreCandidate, + Filter, + FilterStage, + GoldenDatasetRow, + KnowledgeContent, + KnowledgeItem, + KnowledgeQueueItem, + KnowledgeSnapshot, + KnowledgeStatus, + LinkProposal, + Locator, + LocatorKind, + ProposedLink, + Rejection, + RetrievalAudit, + ReviewItem, + SourceRef, + SourceType, + UpdateDetection, +) + +_HERE = os.path.dirname(__file__) +_REPO_ROOT = os.path.abspath(os.path.join(_HERE, "..", "..", "..")) +_RFC_DIR = os.path.join(_REPO_ROOT, "application", "utils", "librarian", "_rfc_schemas") +_GOLDEN_SCHEMA = os.path.join(_HERE, "fixtures", "golden_dataset.schema.json") + + +def _load(path: str) -> dict: + with open(path, encoding="utf-8") as fh: + return json.load(fh) + + +def _build_registry() -> Registry: + """Register every vendored RFC schema under its $id so $refs resolve.""" + resources = [] + for name in os.listdir(_RFC_DIR): + if name.endswith(".json"): + schema = _load(os.path.join(_RFC_DIR, name)) + resources.append((schema["$id"], DRAFT202012.create_resource(schema))) + return Registry().with_resources(resources) + + +_REGISTRY = _build_registry() + + +def _validator_for(schema_filename: str) -> jsonschema.Draft202012Validator: + schema = _load(os.path.join(_RFC_DIR, schema_filename)) + return jsonschema.Draft202012Validator(schema, registry=_REGISTRY) + + +def _round_trip_through_canonical(self, model_instance, schema_filename: str): + """Dump the Pydantic model to plain JSON and assert canonical schema accepts it.""" + payload = json.loads(model_instance.model_dump_json(exclude_none=True)) + errors = sorted(_validator_for(schema_filename).iter_errors(payload), key=str) + self.assertEqual( + errors, + [], + msg=f"{type(model_instance).__name__} failed canonical schema: {errors}", + ) + + +# Shared fixtures for envelope tests +GITHUB_SOURCE = SourceRef( + type=SourceType.github, + repo="OWASP/ASVS", + commit_sha="abc1234", + committed_at="2026-02-01T01:00:00Z", +) +REPO_LOCATOR = Locator( + kind=LocatorKind.repo_path, + id="4.0/en/0x11-V2-Authentication.md", + path="4.0/en/0x11-V2-Authentication.md", +) +KNOWLEDGE_SNAPSHOT = KnowledgeSnapshot( + text="Verify MFA.", source=GITHUB_SOURCE, locator=REPO_LOCATOR +) +RETRIEVAL = RetrievalAudit( + retriever="pgvector+cross-encoder/0.1.0", + candidates=[ + CreCandidate( + cre_id="123-456", cre_name="Auth", score_vector=0.72, score_rerank=0.76 + ) + ], + reranked=[CreCandidate(cre_id="123-456", score_rerank=0.76)], + threshold=0.8, +) +UPDATE_NEW = UpdateDetection(is_update=False) + + +class TestSourceRef(unittest.TestCase): + def test_github_requires_repo_and_commit_sha(self): + with self.assertRaises(ValidationError): + SourceRef(type=SourceType.github, committed_at="2026-01-01T00:00:00Z") + with self.assertRaises(ValidationError): + SourceRef( + type=SourceType.github, repo="r", committed_at="2026-01-01T00:00:00Z" + ) + + def test_url_type_does_not_require_repo(self): + SourceRef( + type=SourceType.url, + url="https://x", + committed_at="2026-01-01T00:00:00Z", + ) + + def test_extra_field_forbidden(self): + with self.assertRaises(ValidationError): + SourceRef( + type=SourceType.url, + url="https://x", + committed_at="2026-01-01T00:00:00Z", + surprise=1, + ) + + +class TestLocator(unittest.TestCase): + def test_repo_path_requires_path(self): + with self.assertRaises(ValidationError): + Locator(kind=LocatorKind.repo_path, id="x") + + def test_url_kind_requires_url(self): + with self.assertRaises(ValidationError): + Locator(kind=LocatorKind.url, id="x") + Locator(kind=LocatorKind.url, id="x", url="https://x") + + def test_feed_item_requires_url(self): + with self.assertRaises(ValidationError): + Locator(kind=LocatorKind.feed_item, id="x") + + +class TestKnowledgeItemRFC(unittest.TestCase): + def _accepted(self, **over): + base = dict( + schema_version=SCHEMA_VERSION, + chunk_id="chk:1", + artifact_id="art:1", + event_id="evt:1", + pipeline_run_id="20260201T020000Z", + filtered_at="2026-02-01T02:00:00Z", + status=KnowledgeStatus.accepted, + source=GITHUB_SOURCE, + locator=REPO_LOCATOR, + content=KnowledgeContent(text="x"), + filter=Filter( + stages=[FilterStage(name="llm_relevance", passed=True)], + is_security_knowledge=True, + confidence=0.9, + ), + ) + base.update(over) + return KnowledgeItem(**base) + + def test_accepted_round_trips_canonical(self): + _round_trip_through_canonical(self, self._accepted(), "knowledge-item.json") + + def test_accepted_requires_content(self): + with self.assertRaises(ValidationError): + self._accepted(content=None) + + def test_rejected_requires_rejection(self): + with self.assertRaises(ValidationError): + self._accepted(status=KnowledgeStatus.rejected, content=None) + + def test_rejected_round_trips_canonical(self): + item = self._accepted( + status=KnowledgeStatus.rejected, + content=None, + rejection=Rejection(reason_code="NOISE", reason_message="ext denylisted"), + ) + _round_trip_through_canonical(self, item, "knowledge-item.json") + + def test_schema_version_pattern_enforced(self): + with self.assertRaises(ValidationError): + self._accepted(schema_version="bad") + + +class TestLinkProposalRFC(unittest.TestCase): + def _proposal(self, **over): + base = dict( + schema_version=SCHEMA_VERSION, + chunk_id="chk:1", + artifact_id="art:1", + pipeline_run_id="20260201T020000Z", + classified_at="2026-02-01T02:25:00Z", + knowledge=KNOWLEDGE_SNAPSHOT, + retrieval=RETRIEVAL, + links=[ + ProposedLink(cre_id="123-456", link_type="Related", confidence=0.94) + ], + update_detection=UPDATE_NEW, + ) + base.update(over) + return LinkProposal(**base) + + def test_round_trips_canonical(self): + _round_trip_through_canonical(self, self._proposal(), "link-proposal.json") + + def test_status_is_fixed_to_linked(self): + self.assertEqual(self._proposal().status, "linked") + + def test_links_min_length(self): + with self.assertRaises(ValidationError): + self._proposal(links=[]) + + def test_pipeline_run_id_required(self): + with self.assertRaises(ValidationError): + self._proposal(pipeline_run_id=None) + + def test_extra_field_forbidden(self): + # Building a proposal and tacking an extra key onto the JSON should fail + # canonical validation (extra="forbid" + additionalProperties:false). + payload = json.loads(self._proposal().model_dump_json(exclude_none=True)) + payload["surprise"] = 1 + errors = list(_validator_for("link-proposal.json").iter_errors(payload)) + self.assertTrue(errors, "canonical schema must reject extra fields") + + +class TestReviewItemRFC(unittest.TestCase): + def _review(self, **over): + base = dict( + schema_version=SCHEMA_VERSION, + review_id="rev_1", + chunk_id="chk:1", + artifact_id="art:1", + pipeline_run_id="20260201T020000Z", + created_at="2026-02-01T02:40:00Z", + reason_code="BELOW_THRESHOLD", + knowledge=KNOWLEDGE_SNAPSHOT, + retrieval=RETRIEVAL, + update_detection=UPDATE_NEW, + ) + base.update(over) + return ReviewItem(**base) + + def test_round_trips_canonical(self): + _round_trip_through_canonical(self, self._review(), "review-item.json") + + def test_status_is_fixed_to_review_required(self): + self.assertEqual(self._review().status, "review_required") + + def test_pipeline_run_id_required(self): + with self.assertRaises(ValidationError): + self._review(pipeline_run_id=None) + + def test_module_c_librarian_md_example_round_trips(self): + """Re-validate the literal example from docs/owasp-graph/apis/module-c-librarian.md.""" + example = { + "schema_version": "0.2.0", + "review_id": "rev_20260201_00042", + "chunk_id": "chk:art:OWASP/wstg:x:4", + "artifact_id": "art:OWASP/wstg:document/4-Web_Application_Security_Testing/x", + "pipeline_run_id": "20260201T020000Z", + "created_at": "2026-02-01T02:40:00Z", + "status": "review_required", + "reason_code": "BELOW_THRESHOLD", + "knowledge": { + "text": "Do not use MD5 for password hashing.", + "source": { + "type": "github", + "repo": "OWASP/wstg", + "commit_sha": "def7890", + "committed_at": "2026-02-01T01:30:00Z", + }, + "locator": { + "kind": "repo_path", + "id": "document/4-Web_Application_Security_Testing/x", + "path": "document/4-Web_Application_Security_Testing/x", + }, + }, + "retrieval": { + "retriever": "pgvector+cross-encoder/0.1.0", + "threshold": 0.8, + "candidates": [ + { + "cre_id": "123-456", + "cre_name": "Password storage", + "score_vector": 0.72, + "score_rerank": 0.76, + } + ], + "reranked": [{"cre_id": "123-456", "score_rerank": 0.76}], + }, + "suggested_links": [ + {"cre_id": "123-456", "link_type": "Related", "confidence": 0.76} + ], + "update_detection": {"is_update": False, "adversarial_flags": []}, + } + # Pydantic round-trip + review = ReviewItem.model_validate(example) + self.assertEqual(review.review_id, "rev_20260201_00042") + # canonical round-trip on the Pydantic dump + _round_trip_through_canonical(self, review, "review-item.json") + + +class TestKnowledgeQueueItem(unittest.TestCase): + """Internal model — mirrors B's SQL row. Not an RFC contract.""" + + def test_minimal_row(self): + item = KnowledgeQueueItem( + id="uuid-1", + source_repo="OWASP/ASVS", + source_path="4.0/en/0x11.md", + source_commit_sha="abc1234567890", + text="Verify X.", + confidence=0.9, + llm_label="KNOWLEDGE", + created_at="2026-05-25T02:25:00Z", + ) + self.assertIsNone(item.consumed_at) + + def test_confidence_bounds(self): + with self.assertRaises(ValidationError): + KnowledgeQueueItem( + id="x", + source_repo="r", + source_path="p", + source_commit_sha="c", + text="t", + confidence=1.5, + llm_label="KNOWLEDGE", + created_at="2026-05-25T02:25:00Z", + ) + + +class TestGoldenDataset(unittest.TestCase): + """The internal harness row mirrors fixtures/golden_dataset.schema.json.""" + + def _row(self, **over): + row = { + "id": "gold:test", + "schema_version": "0.1.0", + "slice": "positive", + "input": {"text": "x"}, + "expected": {"decision": "linked", "cre_ids": ["1-2"]}, + "provenance": {"ground_truth_source": "test"}, + } + row.update(over) + return row + + def test_explicit_requires_explicit_cre_ref(self): + with self.assertRaises(ValidationError): + GoldenDatasetRow.model_validate( + self._row(slice="explicit", input={"text": "x"}) + ) + + def test_update_requires_prior_text_and_is_update(self): + with self.assertRaises(ValidationError): + GoldenDatasetRow.model_validate( + self._row(slice="update", input={"text": "x"}) + ) + + def test_review_requires_reason_code(self): + with self.assertRaises(ValidationError): + GoldenDatasetRow.model_validate(self._row(expected={"decision": "review"})) + + def test_linked_requires_cre_ids(self): + with self.assertRaises(ValidationError): + GoldenDatasetRow.model_validate(self._row(expected={"decision": "linked"})) + + def test_valid_row_round_trips(self): + GoldenDatasetRow.model_validate(self._row()) + + +if __name__ == "__main__": + unittest.main() diff --git a/application/tests/librarian/scoring_test.py b/application/tests/librarian/scoring_test.py new file mode 100644 index 000000000..98882c5d6 --- /dev/null +++ b/application/tests/librarian/scoring_test.py @@ -0,0 +1,32 @@ +import unittest + +from application.utils.librarian.scoring import jaccard, score_case + + +class TestScoring(unittest.TestCase): + def test_jaccard_values(self): + self.assertEqual(jaccard([], []), 1.0) + self.assertEqual(jaccard(["a"], []), 0.0) + self.assertEqual(jaccard(["a", "b"], ["a", "b"]), 1.0) + self.assertEqual(jaccard(["a", "b"], ["a"]), 0.5) + + def test_exact_match_is_correct(self): + self.assertTrue(score_case(["a", "b"], ["a", "b"])) + + def test_jaccard_boundary_with_top1_in_set_is_correct(self): + # expected {a,b}, predicted [a,c] -> jaccard = 1/3 < 0.5 -> incorrect + self.assertFalse(score_case(["a", "b"], ["a", "c"])) + # expected {a,b,c}, predicted [a,b] -> jaccard = 2/3 >= 0.5, top1 in set + self.assertTrue(score_case(["a", "b", "c"], ["a", "b"])) + + def test_top1_outside_set_is_incorrect(self): + # jaccard >= 0.5 but top-1 (z) not expected + self.assertFalse(score_case(["a", "b"], ["z", "a", "b"])) + + def test_empty_prediction(self): + self.assertTrue(score_case([], [])) + self.assertFalse(score_case(["a"], [])) + + +if __name__ == "__main__": + unittest.main() diff --git a/application/utils/librarian/__init__.py b/application/utils/librarian/__init__.py new file mode 100644 index 000000000..379c131d1 --- /dev/null +++ b/application/utils/librarian/__init__.py @@ -0,0 +1,26 @@ +"""Module C — The Librarian. + +Maps accepted knowledge chunks (from Module B) to OpenCRE nodes: either +auto-links them or routes them to human review. + +Contracts (v0.2.0, RFC #734): + B -> C : KnowledgeItem (RFC envelope — what B emits) + internal: KnowledgeQueueItem (mirror of B's SQL row, master guide §1.2) + C -> graph : LinkProposal (confident auto-link, status=linked) + C -> D : ReviewItem (low-confidence / flagged, routed to HITL) + +Week 1-1 scope: contracts + config + tests only. No linking logic yet. + +Vendored RFC JSON schemas live under ``_rfc_schemas/``. They are pinned to +upstream/owasp-graph @ 2b1437987768d5ed20fe9ee721ab9a898c4b84af (PR #734). +Resync by running: + + git fetch upstream owasp-graph + for f in link-proposal review-item knowledge-item proposed-link \\ + source-ref locator; do + git show upstream/owasp-graph:docs/owasp-graph/apis/schemas/$f.json \\ + > application/utils/librarian/_rfc_schemas/$f.json + done + +Update the SHA above, then re-run the schemas test suite. +""" diff --git a/application/utils/librarian/_rfc_schemas/knowledge-item.json b/application/utils/librarian/_rfc_schemas/knowledge-item.json new file mode 100644 index 000000000..d12f7cadf --- /dev/null +++ b/application/utils/librarian/_rfc_schemas/knowledge-item.json @@ -0,0 +1,95 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://opencre.org/schemas/oie/knowledge-item.json", + "title": "KnowledgeItem", + "type": "object", + "additionalProperties": false, + "required": [ + "schema_version", + "chunk_id", + "artifact_id", + "event_id", + "pipeline_run_id", + "filtered_at", + "status", + "source", + "locator", + "filter" + ], + "properties": { + "schema_version": { "type": "string", "pattern": "^0\\.\\d+\\.\\d+$" }, + "chunk_id": { "type": "string" }, + "artifact_id": { "type": "string" }, + "event_id": { "type": "string" }, + "pipeline_run_id": { "type": "string" }, + "filtered_at": { "type": "string", "format": "date-time" }, + "status": { + "type": "string", + "enum": ["accepted", "rejected", "deferred"] + }, + "source": { "$ref": "source-ref.json" }, + "locator": { "$ref": "locator.json" }, + "content": { + "type": "object", + "additionalProperties": false, + "required": ["text"], + "properties": { + "text": { "type": "string", "minLength": 1 }, + "title_hint": { "type": "string" }, + "keywords": { + "type": "array", + "items": { "type": "string" } + }, + "language": { "type": "string", "default": "en" } + } + }, + "filter": { + "type": "object", + "additionalProperties": false, + "required": ["stages"], + "properties": { + "stages": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "additionalProperties": false, + "required": ["name", "passed"], + "properties": { + "name": { + "type": "string", + "enum": ["regex_path", "regex_content", "llm_relevance"] + }, + "passed": { "type": "boolean" }, + "reason": { "type": "string" }, + "model": { "type": "string" }, + "latency_ms": { "type": "integer", "minimum": 0 } + } + } + }, + "is_security_knowledge": { "type": "boolean" }, + "security_summary": { "type": "string" }, + "confidence": { "type": "number", "minimum": 0, "maximum": 1 } + } + }, + "rejection": { + "type": "object", + "additionalProperties": false, + "required": ["reason_code"], + "properties": { + "reason_code": { "type": "string" }, + "reason_message": { "type": "string" } + } + } + }, + "allOf": [ + { + "if": { "properties": { "status": { "const": "accepted" } } }, + "then": { "required": ["content"] } + }, + { + "if": { "properties": { "status": { "const": "rejected" } } }, + "then": { "required": ["rejection"] } + } + ] +} diff --git a/application/utils/librarian/_rfc_schemas/link-proposal.json b/application/utils/librarian/_rfc_schemas/link-proposal.json new file mode 100644 index 000000000..ee6bd5687 --- /dev/null +++ b/application/utils/librarian/_rfc_schemas/link-proposal.json @@ -0,0 +1,91 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://opencre.org/schemas/oie/link-proposal.json", + "title": "LinkProposal", + "type": "object", + "additionalProperties": false, + "required": [ + "schema_version", + "chunk_id", + "artifact_id", + "pipeline_run_id", + "classified_at", + "status", + "knowledge", + "retrieval", + "links", + "update_detection" + ], + "properties": { + "schema_version": { "type": "string", "pattern": "^0\\.\\d+\\.\\d+$" }, + "chunk_id": { "type": "string" }, + "artifact_id": { "type": "string" }, + "pipeline_run_id": { "type": "string" }, + "classified_at": { "type": "string", "format": "date-time" }, + "status": { "const": "linked" }, + "knowledge": { "$ref": "#/$defs/knowledge_snapshot" }, + "retrieval": { "$ref": "#/$defs/retrieval_audit" }, + "links": { + "type": "array", + "minItems": 1, + "items": { "$ref": "proposed-link.json" } + }, + "update_detection": { "$ref": "#/$defs/update_detection" } + }, + "$defs": { + "knowledge_snapshot": { + "type": "object", + "additionalProperties": false, + "required": ["text", "source", "locator"], + "properties": { + "text": { "type": "string" }, + "source": { "$ref": "source-ref.json" }, + "locator": { "$ref": "locator.json" }, + "security_summary": { "type": "string" } + } + }, + "cre_candidate": { + "type": "object", + "additionalProperties": false, + "required": ["cre_id"], + "properties": { + "cre_id": { "type": "string" }, + "cre_name": { "type": "string" }, + "score_vector": { "type": "number" }, + "score_rerank": { "type": "number" }, + "score_hybrid": { "type": "number" } + } + }, + "retrieval_audit": { + "type": "object", + "additionalProperties": false, + "required": ["retriever", "candidates", "reranked", "threshold"], + "properties": { + "retriever": { "type": "string" }, + "candidates": { + "type": "array", + "items": { "$ref": "#/$defs/cre_candidate" } + }, + "reranked": { + "type": "array", + "items": { "$ref": "#/$defs/cre_candidate" } + }, + "threshold": { "type": "number", "minimum": 0, "maximum": 1 } + } + }, + "update_detection": { + "type": "object", + "additionalProperties": false, + "required": ["is_update"], + "properties": { + "is_update": { "type": "boolean" }, + "prior_chunk_id": { "type": "string" }, + "prior_document_ref": { "type": "string" }, + "adversarial_flags": { + "type": "array", + "items": { "type": "string" } + } + } + } + } +} diff --git a/application/utils/librarian/_rfc_schemas/locator.json b/application/utils/librarian/_rfc_schemas/locator.json new file mode 100644 index 000000000..eb977e866 --- /dev/null +++ b/application/utils/librarian/_rfc_schemas/locator.json @@ -0,0 +1,30 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://opencre.org/schemas/oie/locator.json", + "title": "Locator", + "type": "object", + "additionalProperties": false, + "required": ["kind", "id"], + "properties": { + "kind": { + "type": "string", + "enum": ["repo_path", "url", "feed_item"] + }, + "id": { "type": "string", "minLength": 1 }, + "path": { "type": "string" }, + "url": { "type": "string", "format": "uri" }, + "title": { "type": "string" } + }, + "allOf": [ + { + "if": { "properties": { "kind": { "const": "repo_path" } } }, + "then": { "required": ["path"] } + }, + { + "if": { + "properties": { "kind": { "enum": ["url", "feed_item"] } } + }, + "then": { "required": ["url"] } + } + ] +} diff --git a/application/utils/librarian/_rfc_schemas/proposed-link.json b/application/utils/librarian/_rfc_schemas/proposed-link.json new file mode 100644 index 000000000..fd27d13a1 --- /dev/null +++ b/application/utils/librarian/_rfc_schemas/proposed-link.json @@ -0,0 +1,14 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://opencre.org/schemas/oie/proposed-link.json", + "title": "ProposedLink", + "type": "object", + "additionalProperties": false, + "required": ["cre_id", "link_type", "confidence"], + "properties": { + "cre_id": { "type": "string", "minLength": 1 }, + "link_type": { "type": "string" }, + "confidence": { "type": "number", "minimum": 0, "maximum": 1 }, + "rationale": { "type": "string" } + } +} diff --git a/application/utils/librarian/_rfc_schemas/review-item.json b/application/utils/librarian/_rfc_schemas/review-item.json new file mode 100644 index 000000000..f5a1de19c --- /dev/null +++ b/application/utils/librarian/_rfc_schemas/review-item.json @@ -0,0 +1,45 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://opencre.org/schemas/oie/review-item.json", + "title": "ReviewItem", + "type": "object", + "additionalProperties": false, + "required": [ + "schema_version", + "review_id", + "chunk_id", + "artifact_id", + "pipeline_run_id", + "created_at", + "status", + "reason_code", + "knowledge", + "retrieval", + "update_detection" + ], + "properties": { + "schema_version": { "type": "string", "pattern": "^0\\.\\d+\\.\\d+$" }, + "review_id": { "type": "string" }, + "chunk_id": { "type": "string" }, + "artifact_id": { "type": "string" }, + "pipeline_run_id": { "type": "string" }, + "created_at": { "type": "string", "format": "date-time" }, + "status": { "const": "review_required" }, + "reason_code": { + "type": "string", + "enum": [ + "BELOW_THRESHOLD", + "NO_CANDIDATES", + "ADVERSARIAL_FLAG", + "UPDATE_AMBIGUOUS" + ] + }, + "knowledge": { "$ref": "link-proposal.json#/$defs/knowledge_snapshot" }, + "retrieval": { "$ref": "link-proposal.json#/$defs/retrieval_audit" }, + "suggested_links": { + "type": "array", + "items": { "$ref": "proposed-link.json" } + }, + "update_detection": { "$ref": "link-proposal.json#/$defs/update_detection" } + } +} diff --git a/application/utils/librarian/_rfc_schemas/source-ref.json b/application/utils/librarian/_rfc_schemas/source-ref.json new file mode 100644 index 000000000..68ce821f3 --- /dev/null +++ b/application/utils/librarian/_rfc_schemas/source-ref.json @@ -0,0 +1,26 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://opencre.org/schemas/oie/source-ref.json", + "title": "SourceRef", + "type": "object", + "additionalProperties": false, + "required": ["type", "committed_at"], + "properties": { + "type": { + "type": "string", + "enum": ["github", "url", "rss"] + }, + "repo": { "type": "string" }, + "url": { "type": "string", "format": "uri" }, + "commit_sha": { "type": "string", "minLength": 7 }, + "commit_message": { "type": "string" }, + "committed_at": { "type": "string", "format": "date-time" }, + "author_login": { "type": "string" } + }, + "allOf": [ + { + "if": { "properties": { "type": { "const": "github" } } }, + "then": { "required": ["repo", "commit_sha"] } + } + ] +} diff --git a/application/utils/librarian/config_loader.py b/application/utils/librarian/config_loader.py new file mode 100644 index 000000000..a6e03232d --- /dev/null +++ b/application/utils/librarian/config_loader.py @@ -0,0 +1,33 @@ +"""Loads CRE_LIBRARIAN_* environment variables into a typed config. + +Loader only — nothing consumes these yet. Defaults match the OIE design doc so +later weeks (retriever W3, cross-encoder W4, SafetyGuard W5) read one source. +""" + +import os +from dataclasses import dataclass + + +@dataclass(frozen=True) +class LibrarianConfig: + crossencoder_model: str + top_k_retrieval: int + top_k_rerank: int + link_threshold: float + batch_size: int + ece_target: float + conformal_alpha: float + + +def load_config() -> LibrarianConfig: + return LibrarianConfig( + crossencoder_model=os.getenv( + "CRE_LIBRARIAN_CROSSENCODER_MODEL", "cross-encoder/ms-marco-MiniLM-L-6-v2" + ), + top_k_retrieval=int(os.getenv("CRE_LIBRARIAN_TOP_K_RETRIEVAL", "20")), + top_k_rerank=int(os.getenv("CRE_LIBRARIAN_TOP_K_RERANK", "5")), + link_threshold=float(os.getenv("CRE_LIBRARIAN_LINK_THRESHOLD", "0.8")), + batch_size=int(os.getenv("CRE_LIBRARIAN_BATCH_SIZE", "32")), + ece_target=float(os.getenv("CRE_LIBRARIAN_ECE_TARGET", "0.10")), + conformal_alpha=float(os.getenv("CRE_LIBRARIAN_CONFORMAL_ALPHA", "0.10")), + ) diff --git a/application/utils/librarian/hub_firewall.py b/application/utils/librarian/hub_firewall.py new file mode 100644 index 000000000..bd4493bdb --- /dev/null +++ b/application/utils/librarian/hub_firewall.py @@ -0,0 +1,40 @@ +"""TRACT-style hub-firewall for honest evaluation. + +Many golden standards (ASVS, WSTG, ...) are already linked into OpenCRE, so the +CRE "hub" representation can contain the exact text under test — retrieval then +echoes it back and inflates accuracy. Before scoring a row, strip that row's +text from the hub. + +The real CRE vector hub arrives W3; W1 models the hub as (cre_id, text) reps so +the methodology and its test exist before any accuracy number is claimed. +""" + +import re +from dataclasses import dataclass +from typing import Iterable, List + + +@dataclass(frozen=True) +class HubRep: + cre_id: str + text: str + + +def _norm(text: str) -> str: + return re.sub(r"\s+", " ", text).strip().lower() + + +def leaks(row_text: str, hub: Iterable[HubRep]) -> bool: + """True if any hub rep contains the row's text (i.e. would leak).""" + needle = _norm(row_text) + if not needle: + return False + return any(needle in _norm(rep.text) for rep in hub) + + +def firewall(row_text: str, hub: Iterable[HubRep]) -> List[HubRep]: + """Return the hub with every rep echoing row_text removed.""" + needle = _norm(row_text) + if not needle: + return list(hub) + return [rep for rep in hub if needle not in _norm(rep.text)] diff --git a/application/utils/librarian/knowledge_source.py b/application/utils/librarian/knowledge_source.py new file mode 100644 index 000000000..be7f97f65 --- /dev/null +++ b/application/utils/librarian/knowledge_source.py @@ -0,0 +1,34 @@ +"""Where Module C reads accepted chunks from. + +Defines the source interface plus a fixture-backed stub for testing. The real +DB-backed source (polling Module B's knowledge_queue table) lands W8 and yields +the same KnowledgeQueueItem rows; C synthesizes the RFC KnowledgeItem envelope +from each row at processing time (master guide §1.2). +""" + +import json +from abc import ABC, abstractmethod +from typing import Iterator + +from application.utils.librarian.schemas import KnowledgeQueueItem + + +class KnowledgeSource(ABC): + @abstractmethod + def items(self) -> Iterator[KnowledgeQueueItem]: + """Yield knowledge_queue rows awaiting classification.""" + raise NotImplementedError + + +class FixtureKnowledgeSource(KnowledgeSource): + """Reads knowledge_queue rows from a JSONL fixture (one JSON object per line).""" + + def __init__(self, jsonl_path: str) -> None: + self._path = jsonl_path + + def items(self) -> Iterator[KnowledgeQueueItem]: + with open(self._path, encoding="utf-8") as fh: + for line in fh: + line = line.strip() + if line: + yield KnowledgeQueueItem.model_validate_json(line) diff --git a/application/utils/librarian/schemas.py b/application/utils/librarian/schemas.py new file mode 100644 index 000000000..d1395f013 --- /dev/null +++ b/application/utils/librarian/schemas.py @@ -0,0 +1,364 @@ +"""Pydantic v2 contracts for Module C — aligned to RFC PR #734. + +Every RFC envelope below is round-tripped against its canonical JSON Schema in +``schemas_test.py`` (vendored under ``_rfc_schemas/``); any drift breaks the +build, not the next mentor review. + +Internal models (``KnowledgeQueueItem``, ``GoldenDatasetRow``) are not part of +the RFC — they mirror Module B's SQL row (master guide §1.2) and the regression +harness golden row, respectively. +""" + +from __future__ import annotations + +import re +from enum import Enum +from typing import List, Literal, Optional + +from pydantic import BaseModel, ConfigDict, Field, model_validator + +SCHEMA_VERSION = "0.2.0" +_SCHEMA_VERSION_RE = re.compile(r"^0\.\d+\.\d+$") + + +# ---------- Enums (RFC) ---------- + + +class KnowledgeStatus(str, Enum): + accepted = "accepted" + rejected = "rejected" + deferred = "deferred" + + +class SourceType(str, Enum): + github = "github" + url = "url" + rss = "rss" + + +class LocatorKind(str, Enum): + repo_path = "repo_path" + url = "url" + feed_item = "feed_item" + + +class FilterStageName(str, Enum): + regex_path = "regex_path" + regex_content = "regex_content" + llm_relevance = "llm_relevance" + + +class ReasonCode(str, Enum): + below_threshold = "BELOW_THRESHOLD" + no_candidates = "NO_CANDIDATES" + adversarial_flag = "ADVERSARIAL_FLAG" + update_ambiguous = "UPDATE_AMBIGUOUS" + + +# ---------- RFC sub-models ---------- + + +class SourceRef(BaseModel): + """RFC source-ref.json — required `committed_at`; github requires repo+sha.""" + + model_config = ConfigDict(extra="forbid") + type: SourceType + repo: Optional[str] = None + url: Optional[str] = None + commit_sha: Optional[str] = Field(default=None, min_length=7) + commit_message: Optional[str] = None + committed_at: str + author_login: Optional[str] = None + + @model_validator(mode="after") + def _conditional_github(self) -> "SourceRef": + if self.type == SourceType.github and (not self.repo or not self.commit_sha): + raise ValueError("type=github requires repo and commit_sha") + return self + + +class Locator(BaseModel): + """RFC locator.json — kind drives whether `path` or `url` is required.""" + + model_config = ConfigDict(extra="forbid") + kind: LocatorKind + id: str = Field(min_length=1) + path: Optional[str] = None + url: Optional[str] = None + title: Optional[str] = None + + @model_validator(mode="after") + def _conditional_kind(self) -> "Locator": + if self.kind == LocatorKind.repo_path and not self.path: + raise ValueError("kind=repo_path requires path") + if self.kind in (LocatorKind.url, LocatorKind.feed_item) and not self.url: + raise ValueError("kind=url|feed_item requires url") + return self + + +class KnowledgeContent(BaseModel): + """RFC knowledge-item.json#/properties/content.""" + + model_config = ConfigDict(extra="forbid") + text: str = Field(min_length=1) + title_hint: Optional[str] = None + keywords: Optional[List[str]] = None + language: Optional[str] = None + + +class FilterStage(BaseModel): + """RFC knowledge-item.json#/properties/filter/properties/stages[*].""" + + model_config = ConfigDict(extra="forbid") + name: FilterStageName + passed: bool + reason: Optional[str] = None + model: Optional[str] = None + latency_ms: Optional[int] = Field(default=None, ge=0) + + +class Filter(BaseModel): + """RFC knowledge-item.json#/properties/filter.""" + + model_config = ConfigDict(extra="forbid") + stages: List[FilterStage] = Field(min_length=1) + is_security_knowledge: Optional[bool] = None + security_summary: Optional[str] = None + confidence: Optional[float] = Field(default=None, ge=0, le=1) + + +class Rejection(BaseModel): + """RFC knowledge-item.json#/properties/rejection.""" + + model_config = ConfigDict(extra="forbid") + reason_code: str + reason_message: Optional[str] = None + + +class CreCandidate(BaseModel): + """RFC link-proposal.json#/$defs/cre_candidate (shared by candidates+reranked).""" + + model_config = ConfigDict(extra="forbid") + cre_id: str + cre_name: Optional[str] = None + score_vector: Optional[float] = None + score_rerank: Optional[float] = None + score_hybrid: Optional[float] = None + + +class RetrievalAudit(BaseModel): + """RFC link-proposal.json#/$defs/retrieval_audit.""" + + model_config = ConfigDict(extra="forbid") + retriever: str + candidates: List[CreCandidate] + reranked: List[CreCandidate] + threshold: float = Field(ge=0, le=1) + + +class ProposedLink(BaseModel): + """RFC proposed-link.json — used by both LinkProposal.links and ReviewItem.suggested_links.""" + + model_config = ConfigDict(extra="forbid") + cre_id: str = Field(min_length=1) + link_type: str + confidence: float = Field(ge=0, le=1) + rationale: Optional[str] = None + + +class KnowledgeSnapshot(BaseModel): + """RFC link-proposal.json#/$defs/knowledge_snapshot (shared with ReviewItem).""" + + model_config = ConfigDict(extra="forbid") + text: str + source: SourceRef + locator: Locator + security_summary: Optional[str] = None + + +class UpdateDetection(BaseModel): + """RFC link-proposal.json#/$defs/update_detection.""" + + model_config = ConfigDict(extra="forbid") + is_update: bool + prior_chunk_id: Optional[str] = None + prior_document_ref: Optional[str] = None + adversarial_flags: Optional[List[str]] = None + + +# ---------- RFC envelopes ---------- + + +class KnowledgeItem(BaseModel): + """RFC knowledge-item.json — B's full output envelope to C. + + `status=accepted` requires `content`; `status=rejected` requires `rejection`. + """ + + model_config = ConfigDict(extra="forbid") + schema_version: str + chunk_id: str + artifact_id: str + event_id: str + pipeline_run_id: str + filtered_at: str + status: KnowledgeStatus + source: SourceRef + locator: Locator + content: Optional[KnowledgeContent] = None + filter: Filter + rejection: Optional[Rejection] = None + + @model_validator(mode="after") + def _rfc_rules(self) -> "KnowledgeItem": + if not _SCHEMA_VERSION_RE.match(self.schema_version): + raise ValueError(r"schema_version must match ^0\.\d+\.\d+$") + if self.status == KnowledgeStatus.accepted and self.content is None: + raise ValueError("status=accepted requires content") + if self.status == KnowledgeStatus.rejected and self.rejection is None: + raise ValueError("status=rejected requires rejection") + return self + + +class LinkProposal(BaseModel): + """RFC link-proposal.json — C's auto-link output, status='linked'.""" + + model_config = ConfigDict(extra="forbid") + schema_version: str + chunk_id: str + artifact_id: str + pipeline_run_id: str + classified_at: str + status: Literal["linked"] = "linked" + knowledge: KnowledgeSnapshot + retrieval: RetrievalAudit + links: List[ProposedLink] = Field(min_length=1) + update_detection: UpdateDetection + + @model_validator(mode="after") + def _schema_version_pattern(self) -> "LinkProposal": + if not _SCHEMA_VERSION_RE.match(self.schema_version): + raise ValueError(r"schema_version must match ^0\.\d+\.\d+$") + return self + + +class ReviewItem(BaseModel): + """RFC review-item.json — C's human-review output, status='review_required'.""" + + model_config = ConfigDict(extra="forbid") + schema_version: str + review_id: str + chunk_id: str + artifact_id: str + pipeline_run_id: str + created_at: str + status: Literal["review_required"] = "review_required" + reason_code: ReasonCode + knowledge: KnowledgeSnapshot + retrieval: RetrievalAudit + suggested_links: Optional[List[ProposedLink]] = None + update_detection: UpdateDetection + + @model_validator(mode="after") + def _schema_version_pattern(self) -> "ReviewItem": + if not _SCHEMA_VERSION_RE.match(self.schema_version): + raise ValueError(r"schema_version must match ^0\.\d+\.\d+$") + return self + + +# ---------- Internal (NOT RFC) ---------- + + +class KnowledgeQueueItem(BaseModel): + """Read-side mirror of Module B's `knowledge_queue` Postgres row. + + Per master guide §1.2: C reads these rows and synthesizes the RFC + `KnowledgeItem` envelope from them. Not a wire contract; tolerates extra + fields so B can extend the row without breaking C. + """ + + id: str + source_repo: str + source_path: str + source_commit_sha: str + text: str + confidence: float = Field(ge=0, le=1) + llm_label: str + llm_reasoning: Optional[str] = None + created_at: str + consumed_at: Optional[str] = None + + +# ---------- Golden dataset (internal, harness only) ---------- + + +class Slice(str, Enum): + explicit = "explicit" + positive = "positive" + hard_negative = "hard_negative" + update = "update" + ambiguous = "ambiguous" + + +class Decision(str, Enum): + linked = "linked" + review = "review" + + +class SourceStandard(str, Enum): + asvs = "ASVS" + wstg = "WSTG" + nist_800_53 = "NIST_800_53" + pci_dss = "PCI_DSS" + owasp_cheatsheet = "OWASP_CHEATSHEET" + other = "OTHER" + + +class GoldenInput(BaseModel): + model_config = ConfigDict(extra="forbid") + text: str = Field(min_length=1) + title_hint: Optional[str] = None + explicit_cre_ref: Optional[str] = None + prior_text: Optional[str] = None + source_standard: Optional[SourceStandard] = None + + +class GoldenExpected(BaseModel): + model_config = ConfigDict(extra="forbid") + decision: Decision + cre_ids: Optional[List[str]] = None + reason_code: Optional[ReasonCode] = None + is_update: Optional[bool] = None + + +class GoldenProvenance(BaseModel): + model_config = ConfigDict(extra="forbid") + standard_version: Optional[str] = None + section_path: Optional[str] = None + ground_truth_source: str + + +class GoldenDatasetRow(BaseModel): + model_config = ConfigDict(extra="forbid") + id: str = Field(min_length=1) + schema_version: str + slice: Slice + input: GoldenInput + expected: GoldenExpected + provenance: GoldenProvenance + notes: Optional[str] = None + + @model_validator(mode="after") + def _conditional_requirements(self) -> "GoldenDatasetRow": + if self.slice == Slice.explicit and not self.input.explicit_cre_ref: + raise ValueError("slice=explicit requires input.explicit_cre_ref") + if self.slice == Slice.update: + if not self.input.prior_text: + raise ValueError("slice=update requires input.prior_text") + if self.expected.is_update is None: + raise ValueError("slice=update requires expected.is_update") + if self.expected.decision == Decision.review and not self.expected.reason_code: + raise ValueError("decision=review requires expected.reason_code") + if self.expected.decision == Decision.linked and not self.expected.cre_ids: + raise ValueError("decision=linked requires expected.cre_ids") + return self diff --git a/application/utils/librarian/scoring.py b/application/utils/librarian/scoring.py new file mode 100644 index 000000000..c2450bd6a --- /dev/null +++ b/application/utils/librarian/scoring.py @@ -0,0 +1,35 @@ +"""Golden-set scoring for the eval harness. + +Implements the multi-link correctness rule. The rule itself (Q-D) is provisional +pending mentor confirmation at the Friday call — keep it isolated here so a +change touches one function only. +""" + +from typing import List, Sequence + +# TODO(Q-D): provisional default — confirm with mentor before relying on results. +# A predicted set is correct iff Jaccard(expected, predicted) >= 0.5 AND the +# top-1 prediction is in the expected set. +JACCARD_THRESHOLD = 0.5 + + +def jaccard(a: Sequence[str], b: Sequence[str]) -> float: + sa, sb = set(a), set(b) + if not sa and not sb: + return 1.0 + union = sa | sb + if not union: + return 0.0 + return len(sa & sb) / len(union) + + +def score_case(expected_cre_ids: Sequence[str], predicted_cre_ids: List[str]) -> bool: + """True if the prediction counts as correct under the Q-D default rule. + + predicted_cre_ids is rank-ordered; index 0 is the top-1 prediction. + """ + if not predicted_cre_ids: + return not expected_cre_ids # correct only if nothing was expected + if jaccard(expected_cre_ids, predicted_cre_ids) < JACCARD_THRESHOLD: + return False + return predicted_cre_ids[0] in set(expected_cre_ids) diff --git a/requirements.txt b/requirements.txt index c33189613..5e65f56a6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -63,7 +63,7 @@ pyasn1 pyasn1-modules pycodestyle pycparser -pydantic +pydantic>=2,<3 pyee pyflakes PyGithub diff --git a/scripts/build_golden_dataset.py b/scripts/build_golden_dataset.py new file mode 100644 index 000000000..e4cf24dc2 --- /dev/null +++ b/scripts/build_golden_dataset.py @@ -0,0 +1,463 @@ +#!/usr/bin/env python +"""Build the Module C golden dataset from standards_cache.sqlite. + +Derives ground-truth CRE labels by joining ``node`` -> ``cre_node_links`` -> ``cre`` +in OpenCRE's own cache. The 5 slices are populated as follows: + + positive : all 277 ASVS requirements (1:1 mapping) + + multi-link rows from OWASP Top 10 and CWE (2-4 CREs) + hard_negative : ASVS requirements whose text contains a negation phrase + ("do not", "does not", "shall not", "should not"), with + their real DB CRE mapping (cross-encoder must beat cosine + on these without losing the right CRE) + explicit : synthesized text that literally cites a real cre.external_id + update : synthesized before/after pairs of real ASVS requirements, + ground-truth CRE pulled from DB at build time + ambiguous : broad ASVS V1.x SDLC/governance requirements, decision=review + +Output is deterministic (every query has an explicit ORDER BY and stable +formatting), so ``--check`` can verify the committed JSON has not drifted from +the DB-derived form. +""" + +import argparse +import json +import sqlite3 +import sys +from pathlib import Path +from typing import Dict, List, Optional + +REPO_ROOT = Path(__file__).resolve().parent.parent +DEFAULT_DB = REPO_ROOT / "standards_cache.sqlite" +DEFAULT_OUT = REPO_ROOT / "application/tests/librarian/fixtures/golden_dataset.json" + +SCHEMA_VERSION = "0.1.0" + + +# ---- Curated rows that need a real CRE id resolved at build time ---------- +# Each entry pins a real ASVS section_id; the build script pulls that section's +# ground-truth CRE from the DB. Text is synthesised; ground truth stays real. + +CURATED_EXPLICIT = [ + { + "id": "gold:explicit:V2.1.1", + "asvs_section_id": "V2.1.1", + "text_template": ( + "Per CRE {cre}, verify that user-set passwords are at least 12 " + "characters in length after removing leading and trailing whitespace." + ), + "title_hint": "Password length policy", + }, + { + "id": "gold:explicit:V3.4.1", + "asvs_section_id": "V3.4.1", + "text_template": ( + "Refer to CRE {cre}. Verify that cookie-based session tokens have " + "the Secure attribute set." + ), + "title_hint": "Session cookie security", + }, + { + "id": "gold:explicit:V2.4.1", + "asvs_section_id": "V2.4.1", + "text_template": ( + "This control corresponds to CRE {cre}: passwords shall be stored " + "using an approved key derivation function." + ), + "title_hint": "Credential storage", + }, + { + "id": "gold:explicit:V4.1.1", + "asvs_section_id": "V4.1.1", + "text_template": ( + "See CRE {cre} for the canonical guidance on enforcing access " + "control rules at a trusted service layer." + ), + "title_hint": "Access control enforcement", + }, + { + "id": "gold:explicit:V8.3.1", + "asvs_section_id": "V8.3.1", + "text_template": ( + "Per CRE {cre}, sensitive data shall be sent to the server in the " + "HTTP message body or headers, never in the URL query string." + ), + "title_hint": "Sensitive data in transit", + }, +] + +CURATED_UPDATE = [ + { + "id": "gold:update:V2.1.1", + "asvs_section_id": "V2.1.1", + "text": ( + "Verify that user-set passwords are at least 12 characters in " + "length after removing leading and trailing spaces." + ), + "prior_text": ( + "Verify that user-set passwords are at least 12 characters in length." + ), + }, + { + "id": "gold:update:V3.4.1", + "asvs_section_id": "V3.4.1", + "text": ( + "Verify that cookie-based session tokens have the Secure and " + "SameSite attributes set." + ), + "prior_text": ( + "Verify that cookie-based session tokens have the Secure attribute set." + ), + }, + { + "id": "gold:update:V2.2.1", + "asvs_section_id": "V2.2.1", + "text": ( + "Verify that anti-automation controls are effective at mitigating " + "breached credential testing, brute force, account lockout, and " + "credential stuffing attacks." + ), + "prior_text": ( + "Verify that anti-automation controls are effective at mitigating " + "breached credential testing, brute force, and account lockout attacks." + ), + }, + { + "id": "gold:update:V4.1.1", + "asvs_section_id": "V4.1.1", + "text": ( + "Verify that the application enforces access control rules on a " + "trusted service layer, with mandatory denials on missing context." + ), + "prior_text": ( + "Verify that the application enforces access control rules on a " + "trusted service layer." + ), + }, + { + "id": "gold:update:V8.3.1", + "asvs_section_id": "V8.3.1", + "text": ( + "Verify that sensitive data is sent to the server in the HTTP " + "message body or headers, and that any URL-borne parameters do " + "not contain sensitive data." + ), + "prior_text": ( + "Verify that sensitive data is sent to the server in the HTTP " + "message body or headers." + ), + }, +] + +CURATED_AMBIGUOUS = [ + { + "id": "gold:ambiguous:sdlc", + "text": ( + "Verify the use of a secure software development lifecycle that " + "addresses security in all stages of development." + ), + "title_hint": "Secure SDLC", + "reason_code": "BELOW_THRESHOLD", + }, + { + "id": "gold:ambiguous:culture", + "text": ( + "Security is everyone's responsibility and should be considered " + "throughout the organization." + ), + "reason_code": "NO_CANDIDATES", + }, + { + "id": "gold:ambiguous:governance", + "text": ( + "Verify that the organization has a documented information " + "security policy approved by senior management." + ), + "title_hint": "Governance", + "reason_code": "BELOW_THRESHOLD", + }, + { + "id": "gold:ambiguous:training", + "text": ( + "Verify that developers receive security training appropriate to " + "their role and responsibilities." + ), + "reason_code": "BELOW_THRESHOLD", + }, + { + "id": "gold:ambiguous:risk", + "text": ( + "Risk assessment processes are followed at every major design " "decision." + ), + "reason_code": "BELOW_THRESHOLD", + }, +] + + +def _fetch_asvs_cre(conn: sqlite3.Connection, section_id: str) -> Optional[str]: + row = conn.execute( + """ + SELECT c.external_id + FROM node n + JOIN cre_node_links l ON l.node = n.id + JOIN cre c ON c.id = l.cre + WHERE n.name LIKE '%ASVS%' AND n.section_id = ? + ORDER BY c.external_id + LIMIT 1 + """, + (section_id,), + ).fetchone() + return row[0] if row else None + + +def build_positive_asvs(conn: sqlite3.Connection) -> List[Dict]: + rows = conn.execute( + """ + SELECT n.section_id, n.section, c.external_id + FROM node n + JOIN cre_node_links l ON l.node = n.id + JOIN cre c ON c.id = l.cre + WHERE n.name LIKE '%ASVS%' + ORDER BY n.section_id, c.external_id + """ + ).fetchall() + out = [] + for section_id, text, ext_id in rows: + out.append( + { + "id": f"gold:asvs:{section_id}:positive", + "schema_version": SCHEMA_VERSION, + "slice": "positive", + "input": {"text": text, "source_standard": "ASVS"}, + "expected": {"decision": "linked", "cre_ids": [ext_id]}, + "provenance": { + "standard_version": "4.0", + "section_path": section_id, + "ground_truth_source": "OpenCRE DB mapping (cre_node_links)", + }, + } + ) + return out + + +def build_positive_multilink(conn: sqlite3.Connection) -> List[Dict]: + # OWASP Top 10 2021 + CWE rows with 2-4 CRE mappings and real text. + rows = conn.execute( + """ + SELECT n.id, n.name, n.section_id, n.section, + GROUP_CONCAT(c.external_id, '|') + FROM node n + JOIN cre_node_links l ON l.node = n.id + JOIN cre c ON c.id = l.cre + WHERE n.section IS NOT NULL AND length(n.section) > 20 + AND (n.name LIKE '%Top 10 2021%' OR n.name LIKE '%CWE%') + GROUP BY n.id + HAVING COUNT(DISTINCT c.external_id) BETWEEN 2 AND 4 + ORDER BY n.name, n.section_id + LIMIT 15 + """ + ).fetchall() + out = [] + for node_id, name, section_id, text, cre_concat in rows: + cre_ids = sorted(set(cre_concat.split("|"))) + std = "OTHER" + if "Top 10" in name: + std = "OTHER" # closest enum; not strictly ASVS/WSTG/NIST + prefix = "top10" if "Top 10" in name else "cwe" + out.append( + { + "id": f"gold:{prefix}:{section_id}:positive_multi", + "schema_version": SCHEMA_VERSION, + "slice": "positive", + "input": {"text": text, "source_standard": std}, + "expected": {"decision": "linked", "cre_ids": cre_ids}, + "provenance": { + "section_path": section_id, + "ground_truth_source": ( + "OpenCRE DB mapping (multi-CRE node from " + name + ")" + ), + }, + } + ) + return out + + +def build_hard_negative(conn: sqlite3.Connection) -> List[Dict]: + rows = conn.execute( + """ + SELECT n.section_id, n.section, c.external_id + FROM node n + JOIN cre_node_links l ON l.node = n.id + JOIN cre c ON c.id = l.cre + WHERE n.name LIKE '%ASVS%' + AND (LOWER(n.section) LIKE '%do not%' + OR LOWER(n.section) LIKE '%does not%' + OR LOWER(n.section) LIKE '%shall not%' + OR LOWER(n.section) LIKE '%should not%') + ORDER BY n.section_id, c.external_id + LIMIT 12 + """ + ).fetchall() + out = [] + for section_id, text, ext_id in rows: + out.append( + { + "id": f"gold:asvs:{section_id}:hard_negative", + "schema_version": SCHEMA_VERSION, + "slice": "hard_negative", + "input": {"text": text, "source_standard": "ASVS"}, + "expected": {"decision": "linked", "cre_ids": [ext_id]}, + "provenance": { + "section_path": section_id, + "ground_truth_source": ( + "OpenCRE DB mapping; negation phrasing (cross-encoder " + "must beat cosine without losing the correct CRE)" + ), + }, + } + ) + return out + + +def build_explicit(conn: sqlite3.Connection) -> List[Dict]: + out = [] + for entry in CURATED_EXPLICIT: + cre = _fetch_asvs_cre(conn, entry["asvs_section_id"]) + if cre is None: + continue # silently skip if the section isn't mapped (shouldn't happen) + text = entry["text_template"].format(cre=cre) + out.append( + { + "id": entry["id"], + "schema_version": SCHEMA_VERSION, + "slice": "explicit", + "input": { + "text": text, + "title_hint": entry["title_hint"], + "explicit_cre_ref": cre, + "source_standard": "ASVS", + }, + "expected": {"decision": "linked", "cre_ids": [cre]}, + "provenance": { + "section_path": entry["asvs_section_id"], + "ground_truth_source": ( + "synthesised text citing the real cre.external_id " + "from the OpenCRE DB" + ), + }, + } + ) + return out + + +def build_update(conn: sqlite3.Connection) -> List[Dict]: + out = [] + for entry in CURATED_UPDATE: + cre = _fetch_asvs_cre(conn, entry["asvs_section_id"]) + if cre is None: + continue + out.append( + { + "id": entry["id"], + "schema_version": SCHEMA_VERSION, + "slice": "update", + "input": { + "text": entry["text"], + "prior_text": entry["prior_text"], + "source_standard": "ASVS", + }, + "expected": { + "decision": "linked", + "cre_ids": [cre], + "is_update": True, + }, + "provenance": { + "section_path": entry["asvs_section_id"], + "ground_truth_source": ( + "synthesised before/after wording of the real ASVS " + "requirement; ground-truth CRE from the OpenCRE DB" + ), + }, + } + ) + return out + + +def build_ambiguous() -> List[Dict]: + out = [] + for entry in CURATED_AMBIGUOUS: + row = { + "id": entry["id"], + "schema_version": SCHEMA_VERSION, + "slice": "ambiguous", + "input": {"text": entry["text"]}, + "expected": { + "decision": "review", + "reason_code": entry["reason_code"], + }, + "provenance": { + "ground_truth_source": ( + "manually synthesised broad statement that should route " + "to human review (no single clear CRE target)" + ), + }, + } + if "title_hint" in entry: + row["input"]["title_hint"] = entry["title_hint"] + out.append(row) + return out + + +def build(conn: sqlite3.Connection) -> List[Dict]: + rows: List[Dict] = [] + rows.extend(build_explicit(conn)) + rows.extend(build_positive_asvs(conn)) + rows.extend(build_positive_multilink(conn)) + rows.extend(build_hard_negative(conn)) + rows.extend(build_update(conn)) + rows.extend(build_ambiguous()) + return rows + + +def main(argv: List[str]) -> int: + parser = argparse.ArgumentParser(description=__doc__.splitlines()[0]) + parser.add_argument("--db", default=str(DEFAULT_DB)) + parser.add_argument("--out", default=str(DEFAULT_OUT)) + parser.add_argument( + "--check", + action="store_true", + help="re-derive and verify --out matches; exit non-zero on drift", + ) + args = parser.parse_args(argv) + + if not Path(args.db).exists(): + print(f"db not found: {args.db}", file=sys.stderr) + return 2 + + conn = sqlite3.connect(args.db) + try: + rows = build(conn) + finally: + conn.close() + + text = json.dumps(rows, indent=2, ensure_ascii=False) + "\n" + + if args.check: + existing = Path(args.out).read_text(encoding="utf-8") + if existing != text: + print( + "DRIFT: golden dataset is out of sync with the DB derivation. " + "Re-run without --check.", + file=sys.stderr, + ) + return 1 + print(f"OK: golden dataset matches derivation ({len(rows)} rows)") + return 0 + + Path(args.out).write_text(text, encoding="utf-8") + print(f"wrote {len(rows)} rows to {args.out}") + return 0 + + +if __name__ == "__main__": + sys.exit(main(sys.argv[1:])) diff --git a/scripts/evaluate_librarian.py b/scripts/evaluate_librarian.py new file mode 100644 index 000000000..51c088404 --- /dev/null +++ b/scripts/evaluate_librarian.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python +"""Module C regression harness — Week 1 skeleton. + +Loads + validates the golden dataset, applies the TRACT hub-firewall (on by +default), and prints per-slice counts. No linking yet: predictions are empty, so +this only proves the dataset, scorer, and firewall wire together. Later weeks +plug the C.0 -> C.4 pipeline in where ``predict()`` is stubbed below, and swap +the stub hub for the real CRE vector hub. +""" + +import argparse +import json +import os +import sys +from collections import Counter +from typing import List + +# Bootstrap project root onto sys.path so this runs as a standalone script. +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) + +from application.utils.librarian.config_loader import load_config +from application.utils.librarian.hub_firewall import HubRep, firewall, leaks +from application.utils.librarian.schemas import GoldenDatasetRow +from application.utils.librarian.scoring import score_case + + +def load_dataset(path: str) -> List[GoldenDatasetRow]: + with open(path, encoding="utf-8") as fh: + raw = json.load(fh) + return [GoldenDatasetRow.model_validate(row) for row in raw] + + +def build_stub_hub(rows: List[GoldenDatasetRow]) -> List[HubRep]: + # W1 stub: the golden standards are already linked into OpenCRE, so seed the + # hub from their own text. This is the leakage the firewall must strip. + # W3 replaces this with the real CRE vector hub. + return [HubRep(row.id, row.input.text) for row in rows] + + +def predict(row: GoldenDatasetRow, hub: List[HubRep]) -> List[str]: + # W1 stub: no retriever/ranker yet. Returns no predictions. + return [] + + +def main(argv: List[str]) -> int: + cfg = load_config() + parser = argparse.ArgumentParser(description="Module C eval harness (W1 skeleton)") + parser.add_argument("--dataset", required=True, help="path to golden_dataset.json") + parser.add_argument("--slice", help="only evaluate this slice") + parser.add_argument("--limit", type=int, help="cap number of rows") + parser.add_argument("--threshold", type=float, default=cfg.link_threshold) + parser.add_argument("--top_k_retrieval", type=int, default=cfg.top_k_retrieval) + parser.add_argument("--top_k_rerank", type=int, default=cfg.top_k_rerank) + parser.add_argument( + "--dry_run", action="store_true", help="no writes (always true in W1)" + ) + parser.add_argument( + "--no_hub_firewall", + action="store_true", + help="disable the leakage firewall (firewall is ON by default)", + ) + args = parser.parse_args(argv) + + rows = load_dataset(args.dataset) + if args.slice: + rows = [r for r in rows if r.slice.value == args.slice] + if args.limit: + rows = rows[: args.limit] + + hub = build_stub_hub(rows) + firewall_on = not args.no_hub_firewall + + per_slice = Counter(r.slice.value for r in rows) + correct = 0 + stripped = 0 + for row in rows: + hub_view = firewall(row.input.text, hub) if firewall_on else hub + if firewall_on and leaks(row.input.text, hub): + stripped += 1 + if score_case(row.expected.cre_ids or [], predict(row, hub_view)): + correct += 1 + + print(f"loaded {len(rows)} golden rows from {args.dataset}") + for slice_name in sorted(per_slice): + print(f" {slice_name:<14} {per_slice[slice_name]}") + print( + f"hub-firewall: {'ON' if firewall_on else 'OFF'}; " + f"stripped {stripped} leaking hub entries" + ) + print(f"correct (W1 stub, no predictions): {correct}/{len(rows)}") + return 0 + + +if __name__ == "__main__": + sys.exit(main(sys.argv[1:])) From e0970166091dbaebb8b9e0385a85b7bc28df4ff2 Mon Sep 17 00:00:00 2001 From: PRAteek-singHWY Date: Tue, 9 Jun 2026 19:22:43 +0530 Subject: [PATCH 2/2] =?UTF-8?q?week=5F1:=20address=20review=20=E2=80=94=20?= =?UTF-8?q?config=20validation,=20schema=20non-empty=20constraints,=20=20?= =?UTF-8?q?=20fail-fast=20build,=20edge=20cases?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../tests/librarian/config_loader_test.py | 26 ++++++++++ .../fixtures/golden_dataset.schema.json | 23 ++++++-- application/utils/librarian/config_loader.py | 52 +++++++++++++++---- application/utils/librarian/scoring.py | 2 - scripts/build_golden_dataset.py | 13 ++++- scripts/evaluate_librarian.py | 2 +- 6 files changed, 101 insertions(+), 17 deletions(-) diff --git a/application/tests/librarian/config_loader_test.py b/application/tests/librarian/config_loader_test.py index 1d22d751b..d74deb1b4 100644 --- a/application/tests/librarian/config_loader_test.py +++ b/application/tests/librarian/config_loader_test.py @@ -52,6 +52,32 @@ def test_bad_int_env_raises(self): with self.assertRaises(ValueError): load_config() + def test_link_threshold_above_one_raises(self): + with mock.patch.dict( + os.environ, {"CRE_LIBRARIAN_LINK_THRESHOLD": "1.2"}, clear=True + ): + with self.assertRaises(ValueError): + load_config() + + def test_negative_top_k_retrieval_raises(self): + with mock.patch.dict( + os.environ, {"CRE_LIBRARIAN_TOP_K_RETRIEVAL": "-1"}, clear=True + ): + with self.assertRaises(ValueError): + load_config() + + def test_rerank_greater_than_retrieval_raises(self): + with mock.patch.dict( + os.environ, + { + "CRE_LIBRARIAN_TOP_K_RETRIEVAL": "3", + "CRE_LIBRARIAN_TOP_K_RERANK": "5", + }, + clear=True, + ): + with self.assertRaises(ValueError): + load_config() + if __name__ == "__main__": unittest.main() diff --git a/application/tests/librarian/fixtures/golden_dataset.schema.json b/application/tests/librarian/fixtures/golden_dataset.schema.json index 7edb4de2d..a48932460 100644 --- a/application/tests/librarian/fixtures/golden_dataset.schema.json +++ b/application/tests/librarian/fixtures/golden_dataset.schema.json @@ -62,13 +62,23 @@ "allOf": [ { "if": { "properties": { "slice": { "const": "explicit" } } }, - "then": { "properties": { "input": { "required": ["text", "explicit_cre_ref"] } } } + "then": { + "properties": { + "input": { + "required": ["text", "explicit_cre_ref"], + "properties": { "explicit_cre_ref": { "minLength": 1 } } + } + } + } }, { "if": { "properties": { "slice": { "const": "update" } } }, "then": { "properties": { - "input": { "required": ["text", "prior_text"] }, + "input": { + "required": ["text", "prior_text"], + "properties": { "prior_text": { "minLength": 1 } } + }, "expected": { "required": ["decision", "is_update"] } } } @@ -79,7 +89,14 @@ }, { "if": { "properties": { "expected": { "properties": { "decision": { "const": "linked" } } } } }, - "then": { "properties": { "expected": { "required": ["decision", "cre_ids"] } } } + "then": { + "properties": { + "expected": { + "required": ["decision", "cre_ids"], + "properties": { "cre_ids": { "minItems": 1 } } + } + } + } } ] } diff --git a/application/utils/librarian/config_loader.py b/application/utils/librarian/config_loader.py index a6e03232d..a78452e2e 100644 --- a/application/utils/librarian/config_loader.py +++ b/application/utils/librarian/config_loader.py @@ -20,14 +20,48 @@ class LibrarianConfig: def load_config() -> LibrarianConfig: + crossencoder_model = os.getenv( + "CRE_LIBRARIAN_CROSSENCODER_MODEL", "cross-encoder/ms-marco-MiniLM-L-6-v2" + ) + top_k_retrieval = int(os.getenv("CRE_LIBRARIAN_TOP_K_RETRIEVAL", "20")) + top_k_rerank = int(os.getenv("CRE_LIBRARIAN_TOP_K_RERANK", "5")) + link_threshold = float(os.getenv("CRE_LIBRARIAN_LINK_THRESHOLD", "0.8")) + batch_size = int(os.getenv("CRE_LIBRARIAN_BATCH_SIZE", "32")) + ece_target = float(os.getenv("CRE_LIBRARIAN_ECE_TARGET", "0.10")) + conformal_alpha = float(os.getenv("CRE_LIBRARIAN_CONFORMAL_ALPHA", "0.10")) + + if top_k_retrieval <= 0: + raise ValueError( + f"CRE_LIBRARIAN_TOP_K_RETRIEVAL must be > 0, got {top_k_retrieval}" + ) + if top_k_rerank <= 0: + raise ValueError(f"CRE_LIBRARIAN_TOP_K_RERANK must be > 0, got {top_k_rerank}") + if top_k_rerank > top_k_retrieval: + raise ValueError( + f"CRE_LIBRARIAN_TOP_K_RERANK ({top_k_rerank}) must be <= " + f"CRE_LIBRARIAN_TOP_K_RETRIEVAL ({top_k_retrieval})" + ) + if batch_size <= 0: + raise ValueError(f"CRE_LIBRARIAN_BATCH_SIZE must be > 0, got {batch_size}") + if not 0.0 <= link_threshold <= 1.0: + raise ValueError( + f"CRE_LIBRARIAN_LINK_THRESHOLD must be in [0.0, 1.0], got {link_threshold}" + ) + if not 0.0 <= ece_target <= 1.0: + raise ValueError( + f"CRE_LIBRARIAN_ECE_TARGET must be in [0.0, 1.0], got {ece_target}" + ) + if not 0.0 <= conformal_alpha <= 1.0: + raise ValueError( + f"CRE_LIBRARIAN_CONFORMAL_ALPHA must be in [0.0, 1.0], got {conformal_alpha}" + ) + return LibrarianConfig( - crossencoder_model=os.getenv( - "CRE_LIBRARIAN_CROSSENCODER_MODEL", "cross-encoder/ms-marco-MiniLM-L-6-v2" - ), - top_k_retrieval=int(os.getenv("CRE_LIBRARIAN_TOP_K_RETRIEVAL", "20")), - top_k_rerank=int(os.getenv("CRE_LIBRARIAN_TOP_K_RERANK", "5")), - link_threshold=float(os.getenv("CRE_LIBRARIAN_LINK_THRESHOLD", "0.8")), - batch_size=int(os.getenv("CRE_LIBRARIAN_BATCH_SIZE", "32")), - ece_target=float(os.getenv("CRE_LIBRARIAN_ECE_TARGET", "0.10")), - conformal_alpha=float(os.getenv("CRE_LIBRARIAN_CONFORMAL_ALPHA", "0.10")), + crossencoder_model=crossencoder_model, + top_k_retrieval=top_k_retrieval, + top_k_rerank=top_k_rerank, + link_threshold=link_threshold, + batch_size=batch_size, + ece_target=ece_target, + conformal_alpha=conformal_alpha, ) diff --git a/application/utils/librarian/scoring.py b/application/utils/librarian/scoring.py index c2450bd6a..882f492a6 100644 --- a/application/utils/librarian/scoring.py +++ b/application/utils/librarian/scoring.py @@ -18,8 +18,6 @@ def jaccard(a: Sequence[str], b: Sequence[str]) -> float: if not sa and not sb: return 1.0 union = sa | sb - if not union: - return 0.0 return len(sa & sb) / len(union) diff --git a/scripts/build_golden_dataset.py b/scripts/build_golden_dataset.py index e4cf24dc2..0905bd265 100644 --- a/scripts/build_golden_dataset.py +++ b/scripts/build_golden_dataset.py @@ -324,7 +324,10 @@ def build_explicit(conn: sqlite3.Connection) -> List[Dict]: for entry in CURATED_EXPLICIT: cre = _fetch_asvs_cre(conn, entry["asvs_section_id"]) if cre is None: - continue # silently skip if the section isn't mapped (shouldn't happen) + raise ValueError( + f"no CRE mapping found for explicit row {entry['id']} " + f"(ASVS section {entry['asvs_section_id']})" + ) text = entry["text_template"].format(cre=cre) out.append( { @@ -355,7 +358,10 @@ def build_update(conn: sqlite3.Connection) -> List[Dict]: for entry in CURATED_UPDATE: cre = _fetch_asvs_cre(conn, entry["asvs_section_id"]) if cre is None: - continue + raise ValueError( + f"no CRE mapping found for update row {entry['id']} " + f"(ASVS section {entry['asvs_section_id']})" + ) out.append( { "id": entry["id"], @@ -443,6 +449,9 @@ def main(argv: List[str]) -> int: text = json.dumps(rows, indent=2, ensure_ascii=False) + "\n" if args.check: + if not Path(args.out).exists(): + print(f"--check: output file not found: {args.out}", file=sys.stderr) + return 1 existing = Path(args.out).read_text(encoding="utf-8") if existing != text: print( diff --git a/scripts/evaluate_librarian.py b/scripts/evaluate_librarian.py index 51c088404..53a72263a 100644 --- a/scripts/evaluate_librarian.py +++ b/scripts/evaluate_librarian.py @@ -64,7 +64,7 @@ def main(argv: List[str]) -> int: rows = load_dataset(args.dataset) if args.slice: rows = [r for r in rows if r.slice.value == args.slice] - if args.limit: + if args.limit is not None: rows = rows[: args.limit] hub = build_stub_hub(rows)