Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
83 changes: 83 additions & 0 deletions application/tests/librarian/config_loader_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import os
import unittest
from unittest import mock

from application.utils.librarian.config_loader import LibrarianConfig, load_config


class TestConfigLoaderDefaults(unittest.TestCase):
def test_defaults_when_env_unset(self):
with mock.patch.dict(os.environ, {}, clear=True):
cfg = load_config()
self.assertEqual(cfg.crossencoder_model, "cross-encoder/ms-marco-MiniLM-L-6-v2")
self.assertEqual(cfg.top_k_retrieval, 20)
self.assertEqual(cfg.top_k_rerank, 5)
self.assertEqual(cfg.link_threshold, 0.8)
self.assertEqual(cfg.batch_size, 32)
self.assertEqual(cfg.ece_target, 0.10)
self.assertEqual(cfg.conformal_alpha, 0.10)

def test_config_is_frozen(self):
cfg = load_config()
with self.assertRaises(Exception):
cfg.link_threshold = 0.5 # type: ignore[misc]


class TestConfigLoaderOverrides(unittest.TestCase):
OVERRIDES = {
"CRE_LIBRARIAN_CROSSENCODER_MODEL": "cross-encoder/other",
"CRE_LIBRARIAN_TOP_K_RETRIEVAL": "50",
"CRE_LIBRARIAN_TOP_K_RERANK": "10",
"CRE_LIBRARIAN_LINK_THRESHOLD": "0.7",
"CRE_LIBRARIAN_BATCH_SIZE": "64",
"CRE_LIBRARIAN_ECE_TARGET": "0.05",
"CRE_LIBRARIAN_CONFORMAL_ALPHA": "0.20",
}

def test_env_overrides_apply(self):
with mock.patch.dict(os.environ, self.OVERRIDES, clear=True):
cfg = load_config()
self.assertEqual(cfg.crossencoder_model, "cross-encoder/other")
self.assertEqual(cfg.top_k_retrieval, 50)
self.assertEqual(cfg.top_k_rerank, 10)
self.assertAlmostEqual(cfg.link_threshold, 0.7)
self.assertEqual(cfg.batch_size, 64)
self.assertAlmostEqual(cfg.ece_target, 0.05)
self.assertAlmostEqual(cfg.conformal_alpha, 0.20)

def test_bad_int_env_raises(self):
with mock.patch.dict(
os.environ, {"CRE_LIBRARIAN_TOP_K_RETRIEVAL": "not-an-int"}, clear=True
):
with self.assertRaises(ValueError):
load_config()

def test_link_threshold_above_one_raises(self):
with mock.patch.dict(
os.environ, {"CRE_LIBRARIAN_LINK_THRESHOLD": "1.2"}, clear=True
):
with self.assertRaises(ValueError):
load_config()

def test_negative_top_k_retrieval_raises(self):
with mock.patch.dict(
os.environ, {"CRE_LIBRARIAN_TOP_K_RETRIEVAL": "-1"}, clear=True
):
with self.assertRaises(ValueError):
load_config()

def test_rerank_greater_than_retrieval_raises(self):
with mock.patch.dict(
os.environ,
{
"CRE_LIBRARIAN_TOP_K_RETRIEVAL": "3",
"CRE_LIBRARIAN_TOP_K_RERANK": "5",
},
clear=True,
):
with self.assertRaises(ValueError):
load_config()


if __name__ == "__main__":
unittest.main()
123 changes: 123 additions & 0 deletions application/tests/librarian/dataset_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
"""Sanity tests for the populated golden dataset.

These tests run in CI against the committed JSON. They do NOT require the DB —
the DB-driven derivation is covered by ``scripts/build_golden_dataset.py``'s
own ``--check`` mode, which the determinism test invokes when the DB is present.
"""

import json
import os
import subprocess
import sys
import unittest
from collections import Counter

import jsonschema
from pydantic import ValidationError

from application.utils.librarian.schemas import GoldenDatasetRow

_HERE = os.path.dirname(__file__)
_REPO_ROOT = os.path.abspath(os.path.join(_HERE, "..", "..", ".."))
_DATASET = os.path.join(_HERE, "fixtures", "golden_dataset.json")
_JSON_SCHEMA = os.path.join(_HERE, "fixtures", "golden_dataset.schema.json")
_BUILD_SCRIPT = os.path.join(_REPO_ROOT, "scripts", "build_golden_dataset.py")
_DB = os.path.join(_REPO_ROOT, "standards_cache.sqlite")


def _load(path):
with open(path, encoding="utf-8") as fh:
return json.load(fh)


class TestGoldenDatasetShape(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.rows = _load(_DATASET)
cls.json_schema = _load(_JSON_SCHEMA)

def test_has_at_least_the_asvs_core(self):
# Master guide §9.3 + golden-dataset-plan §3: at least 277 ASVS-core rows.
positive_asvs = [
r
for r in self.rows
if r["slice"] == "positive"
and r["provenance"]["ground_truth_source"].startswith(
"OpenCRE DB mapping (cre_node_links)"
)
]
self.assertGreaterEqual(len(positive_asvs), 277)

def test_all_five_slices_present(self):
slices = Counter(r["slice"] for r in self.rows)
self.assertEqual(
set(slices),
{"explicit", "positive", "hard_negative", "update", "ambiguous"},
)
# Every slice has at least a few rows so the harness can stratify.
for s, n in slices.items():
self.assertGreaterEqual(n, 5, msg=f"slice {s} only has {n} rows")

def test_multilink_positive_rows_exist(self):
# The Q-D scoring rule has nothing to exercise unless multi-link
# ground truth is actually present somewhere.
multi = [
r
for r in self.rows
if r["slice"] == "positive"
and len(r.get("expected", {}).get("cre_ids") or []) > 1
]
self.assertGreater(len(multi), 0, "no multi-link positive rows present")

def test_every_row_validates_against_pydantic_model(self):
errors = []
for r in self.rows:
try:
GoldenDatasetRow.model_validate(r)
except ValidationError as e:
errors.append((r.get("id"), str(e)))
self.assertEqual(errors, [], msg=f"first failure: {errors[:1]}")

def test_every_row_validates_against_json_schema(self):
validator = jsonschema.Draft202012Validator(self.json_schema)
first_errors = []
for r in self.rows:
errs = sorted(validator.iter_errors(r), key=str)
if errs:
first_errors.append((r.get("id"), [str(e) for e in errs[:1]]))
if len(first_errors) >= 3:
break
self.assertEqual(first_errors, [])

def test_provenance_is_recorded_per_row(self):
for r in self.rows:
self.assertTrue(
r["provenance"].get("ground_truth_source"),
msg=f"row {r.get('id')} missing ground_truth_source",
)

def test_ids_are_unique(self):
ids = [r["id"] for r in self.rows]
self.assertEqual(len(ids), len(set(ids)))


class TestDatasetDeterminism(unittest.TestCase):
"""The committed JSON must re-derive identically from the DB."""

def test_build_check_matches_committed_dataset(self):
if not os.path.exists(_DB):
self.skipTest("standards_cache.sqlite not present")
result = subprocess.run(
[sys.executable, _BUILD_SCRIPT, "--check"],
capture_output=True,
text=True,
)
self.assertEqual(
result.returncode,
0,
msg=f"stdout={result.stdout}\nstderr={result.stderr}",
)


if __name__ == "__main__":
unittest.main()
Loading
Loading