sadjadeb · sadjadeb · Jun 2, 2026 · Jun 2, 2026 · Jun 2, 2026 · Jun 2, 2026
diff --git a/tests/__init__.py b/tests/__init__.py
@@ -0,0 +1 @@
+
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -0,0 +1,41 @@
+"""Shared pytest fixtures and helpers for the DetectZoo test-suite."""
+
+from __future__ import annotations
+
+import importlib
+
+import pytest
+
+import detectzoo  # noqa: F401  (ensures registries are populated)
+from detectzoo.core.base import BaseDetector, DetectionResult
+
+
+def require_modality(modality: str) -> None:
+    """Skip the current test if a modality's detector package is unavailable.
+
+    DetectZoo loads modality subpackages on a best-effort basis (see
+    ``detectzoo/__init__.py``): if an optional heavy dependency such as
+    ``diffusers`` or ``timm`` is missing, the whole subpackage is skipped
+    with a warning rather than failing import.  Tests that assert on a
+    modality's detectors must therefore skip gracefully when that package
+    could not be imported, so the suite stays green on partial installs.
+    """
+    try:
+        importlib.import_module(f"detectzoo.detectors.{modality}")
+    except ImportError as exc:  # pragma: no cover - depends on environment
+        pytest.skip(f"{modality} detectors unavailable ({exc})")
+
+
+class DummyDetector(BaseDetector):
+    """Lightweight detector that scores text by its length (no models)."""
+
+    name = "dummy"
+    modality = "text"
+
+    def predict(self, input_data) -> DetectionResult:
+        return self._make_result(min(len(str(input_data)) / 100.0, 1.0))
+
+
+@pytest.fixture
+def dummy_detector() -> DummyDetector:
+    return DummyDetector(threshold=0.5)
diff --git a/tests/test_audio_detectors.py b/tests/test_audio_detectors.py
@@ -0,0 +1,55 @@
+"""Tests for audio-modality detectors.
+
+Audio detectors load pretrained checkpoints at construction time, so an
+actual prediction requires a network download and is marked
+``@pytest.mark.slow``.  The non-slow tests verify registration and
+interface invariants only, skipping when the audio subpackage cannot be
+imported.
+"""
+
+from __future__ import annotations
+
+import numpy as np
+import pytest
+
+from detectzoo.core.base import BaseDetector, DetectionResult
+from detectzoo.core.registry import _REGISTRY, list_detectors, load_detector
+
+from .conftest import require_modality
+
+
+class TestAudioRegistry:
+    def test_audio_detectors_registered(self):
+        require_modality("audio")
+        names = set(list_detectors("audio"))
+        assert names, "No audio detectors registered"
+        expected = {"aasist", "rawnet2", "res_tssdnet", "samo"}
+        missing = expected - names
+        assert not missing, f"Missing expected audio detectors: {missing}"
+
+    def test_audio_detector_invariants(self):
+        require_modality("audio")
+        for name in list_detectors("audio"):
+            cls = _REGISTRY[name]
+            assert issubclass(cls, BaseDetector)
+            assert cls.modality == "audio"
+
+    def test_rawnet2_alias(self):
+        require_modality("audio")
+        from detectzoo.core.registry import _ALIASES
+
+        assert _ALIASES.get("rawnet2_audio") == "rawnet2"
+
+
+@pytest.mark.slow
+class TestAASISTDetector:
+    def test_predict_with_synthetic_audio(self):
+        require_modality("audio")
+
+        det = load_detector("aasist", device="cpu")
+        rng = np.random.default_rng(0)
+        waveform = rng.standard_normal(16000).astype(np.float32)
+        result = det.predict(waveform)
+        assert isinstance(result, DetectionResult)
+        assert 0.0 <= result.score <= 1.0
+        assert "score_spoof" in result.metadata
diff --git a/tests/test_benchmarks.py b/tests/test_benchmarks.py
@@ -0,0 +1,96 @@
+"""Tests for the BenchmarkEvaluator (no model downloads).
+
+A trivial in-memory dataset and a length-based dummy detector are used so
+the evaluator's orchestration, metric aggregation, and persistence can be
+exercised without any heavy dependencies.
+"""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import List
+
+import pytest
+
+from detectzoo.benchmarks.evaluator import BenchmarkEvaluator
+from detectzoo.core.base import BaseDetector, DetectionResult
+from detectzoo.datasets.base import BaseDataset, DatasetItem
+
+
+class _MemoryDataset(BaseDataset):
+    name = "memory"
+    modality = "text"
+
+    def __init__(self, items: List[DatasetItem], **kw):
+        super().__init__(**kw)
+        self._mem = items
+
+    def _load_all(self) -> List[DatasetItem]:
+        return self._mem
+
+
+class _KeywordDetector(BaseDetector):
+    """Scores 0.9 if 'ai' appears in the text, else 0.1 — perfectly separable."""
+
+    name = "keyword"
+    modality = "text"
+
+    def predict(self, input_data) -> DetectionResult:
+        return self._make_result(0.9 if "ai" in str(input_data).lower() else 0.1)
+
+
+@pytest.fixture
+def dataset() -> _MemoryDataset:
+    items = [
+        DatasetItem(data="a human wrote this", label=0),
+        DatasetItem(data="another genuine note", label=0),
+        DatasetItem(data="this is ai generated", label=1),
+        DatasetItem(data="ai produced output", label=1),
+    ]
+    return _MemoryDataset(items)
+
+
+class TestBenchmarkEvaluator:
+    def test_evaluate_single(self, dataset):
+        ev = BenchmarkEvaluator(dataset)
+        metrics = ev.evaluate_single(_KeywordDetector())
+        assert metrics["detector"] == "keyword"
+        assert metrics["n_samples"] == 4
+        assert metrics["accuracy"] == 1.0
+        assert metrics["roc_auc"] == 1.0
+
+    def test_save_scores(self, dataset):
+        ev = BenchmarkEvaluator(dataset)
+        metrics = ev.evaluate_single(_KeywordDetector(), save_scores=True)
+        assert "samples" in metrics
+        assert len(metrics["samples"]) == 4
+        assert {"label", "score"} <= set(metrics["samples"][0])
+
+    def test_run_multiple(self, dataset):
+        ev = BenchmarkEvaluator(dataset)
+        results = ev.run([_KeywordDetector()])
+        assert "keyword" in results
+        assert results["keyword"]["accuracy"] == 1.0
+
+    def test_run_and_save(self, dataset, tmp_path: Path):
+        ev = BenchmarkEvaluator(dataset)
+        out = tmp_path / "nested" / "results.json"
+        ev.run_and_save([_KeywordDetector()], out)
+        assert out.is_file()
+        payload = json.loads(out.read_text())
+        assert payload["keyword"]["n_samples"] == 4
+
+    def test_run_and_save_with_meta(self, dataset, tmp_path: Path):
+        ev = BenchmarkEvaluator(dataset)
+        out = tmp_path / "results.json"
+        ev.run_and_save([_KeywordDetector()], out, meta={"run": "test"})
+        payload = json.loads(out.read_text())
+        assert payload["meta"] == {"run": "test"}
+        assert "keyword" in payload["results"]
+
+    def test_modality_inferred_from_dataset(self, dataset):
+        assert BenchmarkEvaluator(dataset).modality == "text"
+
+    def test_modality_override(self, dataset):
+        assert BenchmarkEvaluator(dataset, modality="audio").modality == "audio"
diff --git a/tests/test_core.py b/tests/test_core.py
@@ -0,0 +1,168 @@
+"""Tests for the core infrastructure (registry, base classes, results)."""
+
+from __future__ import annotations
+
+import pytest
+import torch
+
+from detectzoo.core.base import BaseDetector, DetectionResult
+from detectzoo.core.registry import (
+    _ALIASES,
+    _REGISTRY,
+    list_detectors,
+    load_detector,
+)
+
+VALID_MODALITIES = {"text", "image", "audio"}
+
+
+class TestDetectionResult:
+    def test_fields(self):
+        r = DetectionResult(score=0.75, label="ai", confidence=0.6)
+        assert r.score == 0.75
+        assert r.label == "ai"
+        assert r.confidence == 0.6
+        assert r.metadata == {}
+
+    def test_default_confidence_and_metadata(self):
+        r = DetectionResult(score=0.5, label="human")
+        assert r.confidence == 0.0
+        assert r.metadata == {}
+
+    def test_metadata_is_independent_per_instance(self):
+        a = DetectionResult(score=0.1, label="human")
+        b = DetectionResult(score=0.9, label="ai")
+        a.metadata["k"] = 1
+        assert b.metadata == {}
+
+    def test_repr(self):
+        r = DetectionResult(score=1.0, label="human", confidence=0.5)
+        assert "DetectionResult" in repr(r)
+        assert "1.0000" in repr(r)
+
+
+class TestRegistry:
+    def test_detectors_registered(self):
+        names = list_detectors()
+        assert len(names) >= 24, f"Expected >=24 detectors, got {len(names)}: {names}"
+
+    def test_registry_invariants(self):
+        """Every registered class must expose its registry name and a valid modality."""
+        for name, cls in _REGISTRY.items():
+            assert issubclass(cls, BaseDetector), f"{name} is not a BaseDetector"
+            assert cls.name == name, f"{name}: cls.name={cls.name!r} mismatches key"
+            assert cls.modality in VALID_MODALITIES, f"{name}: bad modality {cls.modality!r}"
+
+    def test_text_detectors_present(self):
+        # Text detectors have no heavy optional deps, so they always load.
+        text = set(list_detectors("text"))
+        assert len(text) >= 18, f"Expected >=18 text detectors, got {sorted(text)}"
+        # A representative, stable subset that should always exist.
+        expected = {
+            "log_likelihood", "log_rank", "rank", "entropy", "detectgpt",
+            "fast_detectgpt", "binoculars", "lrr", "npr", "dna_gpt",
+            "revise_detect", "imbd", "lastde", "lastde_pp", "radar",
+            "text_fluoroscopy", "coco", "roberta_base", "roberta_large",
+        }
+        missing = expected - text
+        assert not missing, f"Missing expected text detectors: {missing}"
+
+    def test_load_unknown_raises(self):
+        with pytest.raises(ValueError, match="Unknown detector"):
+            load_detector("nonexistent_detector_xyz")
+
+    def test_alias_resolution(self):
+        # roberta aliases are pure-text and resolve without any download.
+        assert _ALIASES.get("roberta_openai_base") == "roberta_base"
+        assert _ALIASES.get("roberta_openai_large") == "roberta_large"
+        # Every alias must point at a real, registered detector.
+        for alias, target in _ALIASES.items():
+            assert target in _REGISTRY, f"Alias {alias!r} -> unknown target {target!r}"
+
+    def test_list_by_modality_filters(self):
+        for name in list_detectors("text"):
+            assert _REGISTRY[name].modality == "text"
+
+    def test_list_detectors_sorted(self):
+        names = list_detectors()
+        assert names == sorted(names)
+
+
+class TestBaseDetector:
+    def _dummy(self, score: float, threshold: float = 0.5):
+        class _Dummy(BaseDetector):
+            name = "dummy_core"
+            modality = "text"
+
+            def predict(self, input_data):
+                return self._make_result(score)
+
+        return _Dummy(threshold=threshold)
+
+    def test_make_result_above_threshold(self):
+        r = self._dummy(0.8).predict("hello")
+        assert r.label == "ai"
+        assert r.score == 0.8
+
+    def test_make_result_at_threshold_is_ai(self):
+        # label uses score >= threshold.
+        r = self._dummy(0.5, threshold=0.5).predict("x")
+        assert r.label == "ai"
+
+    def test_make_result_below_threshold(self):
+        r = self._dummy(0.2).predict("hello")
+        assert r.label == "human"
+
+    def test_confidence_in_unit_interval(self):
+        r = self._dummy(0.8).predict("hello")
+        assert 0.0 <= r.confidence <= 1.0
+        assert r.confidence > 0.0
+
+    def test_make_result_passes_metadata(self):
+        class _Dummy(BaseDetector):
+            name = "dummy_meta"
+            modality = "text"
+
+            def predict(self, input_data):
+                return self._make_result(0.9, extra="info", n=3)
+
+        r = _Dummy().predict("x")
+        assert r.metadata == {"extra": "info", "n": 3}
+
+    def test_predict_batch(self):
+        class _LenDummy(BaseDetector):
+            name = "dummy_len"
+            modality = "text"
+
+            def predict(self, input_data):
+                return self._make_result(float(len(str(input_data))) / 100.0)
+
+        results = _LenDummy().predict_batch(["a", "bb", "ccc"])
+        assert len(results) == 3
+        assert all(isinstance(r, DetectionResult) for r in results)
+
+    def test_device_property_and_to(self):
+        d = self._dummy(0.5)
+        assert d.device == torch.device("cpu")
+        d.to("cpu")
+        assert d.device == torch.device("cpu")
+
+    def test_unload_clears_modules(self):
+        class _ModelDummy(BaseDetector):
+            name = "dummy_model"
+            modality = "text"
+
+            def __init__(self, **kw):
+                super().__init__(**kw)
+                self.net = torch.nn.Linear(2, 2)
+
+            def predict(self, input_data):
+                return self._make_result(0.5)
+
+        d = _ModelDummy()
+        assert isinstance(d.net, torch.nn.Module)
+        d.unload()
+        assert d.net is None
+
+    def test_repr(self):
+        assert "dummy_core" in repr(self._dummy(0.5))