diff --git a/detectzoo/__init__.py b/detectzoo/__init__.py index 9f207a4..4236e19 100644 --- a/detectzoo/__init__.py +++ b/detectzoo/__init__.py @@ -1,49 +1,25 @@ """DetectZoo: A unified toolkit for detecting AI-generated content.""" -import importlib -import warnings - -from detectzoo.utils.hf_quiet import configure_hf_quiet - -configure_hf_quiet() - - -def _try_import(module: str) -> None: - """Import a modality subpackage, warning (not failing) if optional deps are missing. - - This keeps pure-audio or pure-text workflows usable even when optional - image/text/audio extras are not installed. - """ - try: - importlib.import_module(module) - except ImportError as exc: - warnings.warn( - f"detectzoo: skipped loading '{module}' ({exc}). " - "Install the corresponding optional extra to enable it " - "(e.g. `pip install detectzoo[audio]`).", - stacklevel=2, - ) - - -for _mod in ( - "detectzoo.datasets.audio", - "detectzoo.datasets.image", - "detectzoo.datasets.text", - "detectzoo.detectors.audio", - "detectzoo.detectors.image", - "detectzoo.detectors.text", -): - _try_import(_mod) - -from detectzoo.core.base import BaseDetector, DetectionResult # noqa: E402 -from detectzoo.core.registry import ( # noqa: E402 +# isort: off +import detectzoo.utils.hf_quiet # noqa: F401 +# isort: on + +# Eager-load modality subpackages so @register_detector / @register_dataset run. +import detectzoo.datasets.audio # noqa: F401 +import detectzoo.datasets.image # noqa: F401 +import detectzoo.datasets.text # noqa: F401 +import detectzoo.detectors.audio # noqa: F401 +import detectzoo.detectors.image # noqa: F401 +import detectzoo.detectors.text # noqa: F401 +from detectzoo.core.base import BaseDetector, DetectionResult +from detectzoo.core.registry import ( list_datasets, list_detectors, load_dataset, load_detector, ) -__version__ = "0.1.0" +__version__ = "0.1.4" __all__ = [ "BaseDetector", diff --git a/detectzoo/benchmarks/evaluator.py b/detectzoo/benchmarks/evaluator.py index 3f8c20a..d58bb0c 100644 --- a/detectzoo/benchmarks/evaluator.py +++ b/detectzoo/benchmarks/evaluator.py @@ -22,8 +22,15 @@ # anti-spoofing trio (EER / AUC / F1); image and text keep DetectZoo's # original column set unchanged. _DEFAULT_PRINT_COLUMNS = [ - "detector", "accuracy", "precision", "recall", "f1", - "tpr", "fpr", "roc_auc", "pr_auc", + "detector", + "accuracy", + "precision", + "recall", + "f1", + "tpr", + "fpr", + "roc_auc", + "pr_auc", ] _PRINT_VIEWS = { "audio": ["detector", "eer", "roc_auc", "f1"], @@ -98,10 +105,7 @@ def evaluate_single( metrics["n_samples"] = len(labels) if save_scores: - metrics["samples"] = [ - {"label": lbl, "score": scr} - for lbl, scr in zip(labels, scores) - ] + metrics["samples"] = [{"label": lbl, "score": scr} for lbl, scr in zip(labels, scores)] return metrics @@ -159,8 +163,12 @@ def run_and_print(self, detectors: Sequence[BaseDetector]) -> None: print(header) print("-" * len(header)) for metrics in all_results.values(): - row = " | ".join(f"{metrics.get(k, ''):>18}" if isinstance(metrics.get(k), str) - else f"{metrics.get(k, 0):>18.4f}" for k in header_keys) + row = " | ".join( + f"{metrics.get(k, ''):>18}" + if isinstance(metrics.get(k), str) + else f"{metrics.get(k, 0):>18.4f}" + for k in header_keys + ) print(row) def _save_payload( diff --git a/detectzoo/core/registry.py b/detectzoo/core/registry.py index cedec3f..f28cb4d 100644 --- a/detectzoo/core/registry.py +++ b/detectzoo/core/registry.py @@ -56,9 +56,7 @@ def load_detector(name: str, **kwargs: Any) -> BaseDetector: resolved = _ALIASES.get(name, name) if resolved not in _REGISTRY: available = ", ".join(sorted(set(_REGISTRY) | set(_ALIASES))) or "(none)" - raise ValueError( - f"Unknown detector '{name}'. Available detectors: {available}" - ) + raise ValueError(f"Unknown detector '{name}'. Available detectors: {available}") return _REGISTRY[resolved](**kwargs) @@ -66,9 +64,7 @@ def list_detectors(modality: str | None = None) -> list[str]: """Return names of all registered detectors, optionally filtered by modality.""" if modality is None: return sorted(_REGISTRY) - return sorted( - name for name, cls in _REGISTRY.items() if cls.modality == modality - ) + return sorted(name for name, cls in _REGISTRY.items() if cls.modality == modality) # ====================================================================== @@ -127,9 +123,7 @@ def load_dataset(name: str, **kwargs: Any) -> BaseDataset: resolved = _DATASET_ALIASES.get(name, name) if resolved not in _DATASET_REGISTRY: available = ", ".join(sorted(set(_DATASET_REGISTRY) | set(_DATASET_ALIASES))) or "(none)" - raise ValueError( - f"Unknown dataset '{name}'. Available datasets: {available}" - ) + raise ValueError(f"Unknown dataset '{name}'. Available datasets: {available}") return _DATASET_REGISTRY[resolved](**kwargs) @@ -137,6 +131,4 @@ def list_datasets(modality: str | None = None) -> list[str]: """Return names of all registered datasets, optionally filtered by modality.""" if modality is None: return sorted(_DATASET_REGISTRY) - return sorted( - name for name, cls in _DATASET_REGISTRY.items() if cls.modality == modality - ) + return sorted(name for name, cls in _DATASET_REGISTRY.items() if cls.modality == modality) diff --git a/detectzoo/datasets/__init__.py b/detectzoo/datasets/__init__.py index 141d849..ee0666e 100644 --- a/detectzoo/datasets/__init__.py +++ b/detectzoo/datasets/__init__.py @@ -52,4 +52,3 @@ "WritingPromptsDataset", "XSumDataset", ] - diff --git a/detectzoo/datasets/_download.py b/detectzoo/datasets/_download.py index fdcc346..2c30d78 100644 --- a/detectzoo/datasets/_download.py +++ b/detectzoo/datasets/_download.py @@ -100,9 +100,9 @@ def download_and_extract_tar( *, force: bool = False, ) -> Path: - """Download a tar archive (optionally gzip/bzip2/xz-compressed), extract it, and cache the result. + """Download a tar archive (gzip/bzip2/xz optional), extract it, and cache the result. - A ``.download_complete`` marker file prevents re-downloading on + A ``.download_complete`` marker file prevents re-downloading on subsequent calls. """ marker = dest_dir / ".download_complete" diff --git a/detectzoo/datasets/audio/asvspoof2019.py b/detectzoo/datasets/audio/asvspoof2019.py index 35767be..d4ca43c 100644 --- a/detectzoo/datasets/audio/asvspoof2019.py +++ b/detectzoo/datasets/audio/asvspoof2019.py @@ -89,10 +89,7 @@ def _looks_like_track_root(d: Path) -> bool: if (d / proto_leaf).is_dir(): return True # Tolerate extracts that contain only flac dirs (no protocols). - return any( - (d / leaf / "flac").is_dir() - for leaf in (train_leaf, dev_leaf, eval_leaf) - ) + return any((d / leaf / "flac").is_dir() for leaf in (train_leaf, dev_leaf, eval_leaf)) # User pointed straight at one of the partition leaves. if u.name in {train_leaf, dev_leaf, eval_leaf}: @@ -119,9 +116,7 @@ def _looks_like_track_root(d: Path) -> bool: def _protocol_file(track_root: Path, track: str, partition: str) -> Path: proto_dir = track_root / f"ASVspoof2019_{track}_cm_protocols" if not proto_dir.is_dir(): - raise FileNotFoundError( - f"ASVspoof 2019: missing protocols directory {proto_dir}" - ) + raise FileNotFoundError(f"ASVspoof 2019: missing protocols directory {proto_dir}") if partition == "train": names = (f"ASVspoof2019.{track}.cm.train.trn.txt",) elif partition == "dev": @@ -143,9 +138,7 @@ def _flac_dir(track_root: Path, track: str, partition: str) -> Path: sub = "train" if partition == "train" else "dev" if partition == "dev" else "eval" d = track_root / f"ASVspoof2019_{track}_{sub}" / "flac" if not d.is_dir(): - raise FileNotFoundError( - f"ASVspoof 2019: expected FLAC directory {d}" - ) + raise FileNotFoundError(f"ASVspoof 2019: expected FLAC directory {d}") return d diff --git a/detectzoo/datasets/audio/deepfake_eval_2024.py b/detectzoo/datasets/audio/deepfake_eval_2024.py index 54f1a2a..ee5a31d 100644 --- a/detectzoo/datasets/audio/deepfake_eval_2024.py +++ b/detectzoo/datasets/audio/deepfake_eval_2024.py @@ -127,9 +127,7 @@ def _load_from_metadata( meta_path = _find_metadata_csv(root) audio_dir = root / _AUDIO_SUBDIR if not audio_dir.is_dir(): - raise FileNotFoundError( - f"Deepfake-Eval-2024: expected audio directory {audio_dir}" - ) + raise FileNotFoundError(f"Deepfake-Eval-2024: expected audio directory {audio_dir}") items: List[DatasetItem] = [] missing: List[str] = [] @@ -161,8 +159,7 @@ def _load_from_metadata( if skip_missing: continue raise FileNotFoundError( - f"Deepfake-Eval-2024: audio missing for {filename!r} " - f"(looked under {audio_dir})" + f"Deepfake-Eval-2024: audio missing for {filename!r} (looked under {audio_dir})" ) meta: dict[str, Any] = { @@ -182,8 +179,7 @@ def _load_from_metadata( if not items: raise RuntimeError( - f"Deepfake-Eval-2024: no labelled audio loaded from {meta_path} " - f"(split={split!r})." + f"Deepfake-Eval-2024: no labelled audio loaded from {meta_path} (split={split!r})." ) if missing and skip_missing: from detectzoo.utils.logger import get_logger diff --git a/detectzoo/datasets/audio/for_dataset.py b/detectzoo/datasets/audio/for_dataset.py index da26627..428de1b 100644 --- a/detectzoo/datasets/audio/for_dataset.py +++ b/detectzoo/datasets/audio/for_dataset.py @@ -278,15 +278,13 @@ def _load_preprocessed( empty = [role for role, n in c.items() if n == 0] if empty: avail = { - sp: cs for sp, cs in per_split_counts.items() - if all(v > 0 for v in cs.values()) + sp: cs for sp, cs in per_split_counts.items() if all(v > 0 for v in cs.values()) } hint = ( f" Try `split={next(iter(avail))!r}` instead — that split " f"has both classes ({avail[next(iter(avail))]})." if avail - else " No other split has both classes either; the local " - "extraction is incomplete." + else " No other split has both classes either; the local extraction is incomplete." ) raise RuntimeError( f"FoR ({variant_key}, split={split!r}): the {empty!r} class " diff --git a/detectzoo/datasets/audio/in_the_wild.py b/detectzoo/datasets/audio/in_the_wild.py index 08224b4..ec58b2b 100644 --- a/detectzoo/datasets/audio/in_the_wild.py +++ b/detectzoo/datasets/audio/in_the_wild.py @@ -140,7 +140,9 @@ def _load_from_metadata_csv(root: Path, meta_path: Path) -> List[DatasetItem]: raise ValueError(f"In-The-Wild: empty metadata file {meta_path}") field_map = {f.strip().lower(): f for f in reader.fieldnames} file_col = field_map.get("file") or field_map.get("filename") or field_map.get("path") - label_col = field_map.get("label") or field_map.get("class") or field_map.get("ground_truth") + label_col = ( + field_map.get("label") or field_map.get("class") or field_map.get("ground_truth") + ) if not file_col or not label_col: raise ValueError( f"In-The-Wild: {meta_path} must contain file and label columns; " @@ -183,9 +185,7 @@ def _load_from_metadata_csv(root: Path, meta_path: Path) -> List[DatasetItem]: def _load_from_class_dirs(root: Path) -> List[DatasetItem]: pairs = _class_dirs(root) if not pairs: - raise FileNotFoundError( - f"In-The-Wild: no real/fake class folders under {root}" - ) + raise FileNotFoundError(f"In-The-Wild: no real/fake class folders under {root}") items: List[DatasetItem] = [] for dir_path, label, role in pairs: meta = {"modality": "audio", "class": role} diff --git a/detectzoo/datasets/base.py b/detectzoo/datasets/base.py index b52a86c..4238398 100644 --- a/detectzoo/datasets/base.py +++ b/detectzoo/datasets/base.py @@ -60,9 +60,7 @@ def load(self) -> List[DatasetItem]: return self._items @staticmethod - def _balance_and_truncate( - items: List[DatasetItem], max_samples: int - ) -> List[DatasetItem]: + def _balance_and_truncate(items: List[DatasetItem], max_samples: int) -> List[DatasetItem]: """Pick ``max_samples`` items balanced across labels 0 and 1. Takes ``max_samples // 2`` from each class. If one class is short, diff --git a/detectzoo/datasets/image/__init__.py b/detectzoo/datasets/image/__init__.py index 66be5b9..d21f4d6 100644 --- a/detectzoo/datasets/image/__init__.py +++ b/detectzoo/datasets/image/__init__.py @@ -1,12 +1,12 @@ """Image-modality datasets for AI-generated image detection.""" from detectzoo.datasets.image.aigcdetect import AIGCDetectDataset +from detectzoo.datasets.image.chameleon import ChameleonDataset from detectzoo.datasets.image.cnn_detection import CNNDetectionDataset from detectzoo.datasets.image.drct2m import DRCT2MDataset +from detectzoo.datasets.image.genimage import GenImageDataset from detectzoo.datasets.image.self_synthesis import SelfSynthesisDataset from detectzoo.datasets.image.univfd import UnivFDDataset -from detectzoo.datasets.image.genimage import GenImageDataset -from detectzoo.datasets.image.chameleon import ChameleonDataset __all__ = [ "AIGCDetectDataset", diff --git a/detectzoo/datasets/image/aigcdetect.py b/detectzoo/datasets/image/aigcdetect.py index 7b87a41..e2a6f36 100644 --- a/detectzoo/datasets/image/aigcdetect.py +++ b/detectzoo/datasets/image/aigcdetect.py @@ -8,7 +8,8 @@ Note: In the original PatchCraft / AIGCDetectBenchmark setup, the **training split** is based on the CNNSpot/CNNDetection training data (i.e., the ForenSynths-style ProGAN-based training - set), while AIGCDetect is primarily used as a large, unified test benchmark across many generators. + set), while AIGCDetect is primarily used as a large unified test benchmark across + many generators. GitHub: https://github.com/Ekko-zn/AIGCDetectBenchmark ModelScope: ``aemilia/AIGCDetectionBenchmark`` @@ -21,8 +22,8 @@ from pathlib import Path from typing import Any, List, Optional, Sequence, Tuple -from detectzoo.datasets.base import BaseDataset, DatasetItem from detectzoo.core.registry import register_dataset +from detectzoo.datasets.base import BaseDataset, DatasetItem _MODELSCOPE_AIGCDETECT_DATASET: str = "aemilia/AIGCDetectionBenchmark" @@ -67,11 +68,7 @@ def _partition_layout_ok(parent: Path, folder_name: str) -> bool: return False try: for sub in base.iterdir(): - if ( - sub.is_dir() - and (sub / "0_real").is_dir() - and (sub / "1_fake").is_dir() - ): + if sub.is_dir() and (sub / "0_real").is_dir() and (sub / "1_fake").is_dir(): return True except OSError: return False @@ -198,9 +195,7 @@ def ensure_aigcdetect_downloaded( if found is not None: return found - raise RuntimeError( - "Could not locate AIGCDetectBenchmark after ModelScope download." - ) + raise RuntimeError("Could not locate AIGCDetectBenchmark after ModelScope download.") def resolve_aigcdetect_partition(partition: str) -> Tuple[str, str]: @@ -219,7 +214,7 @@ class AIGCDetectDataset(BaseDataset): Parameters ---------- root : str or Path, optional - Directory intended to contain partition folders, or a parent to search. + Directory intended to contain partition folders, or a parent to search. When omitted, the default cache directory ``.detectzoo_data/aigcdetect/`` is used. partitions : sequence of str, optional Partition(s) to load. Each entry may be either a **column** name @@ -248,7 +243,6 @@ def __init__( self._resolved_root: Optional[Path] = None def _data_root(self) -> Path: - from detectzoo.datasets._download import get_cache_dir if self._resolved_root is not None: return self._resolved_root @@ -290,6 +284,12 @@ def _load_all(self) -> List[DatasetItem]: ): for path in sorted(d.rglob("*")): if path.is_file() and path.suffix.lower() in _IMAGE_EXTS: - items.append(DatasetItem(data=str(path), label=label, metadata={**base_meta, "source": source})) + items.append( + DatasetItem( + data=str(path), + label=label, + metadata={**base_meta, "source": source}, + ) + ) return items diff --git a/detectzoo/datasets/image/chameleon.py b/detectzoo/datasets/image/chameleon.py index f2fe3ea..00f76e2 100644 --- a/detectzoo/datasets/image/chameleon.py +++ b/detectzoo/datasets/image/chameleon.py @@ -19,7 +19,7 @@ from detectzoo.datasets.base import BaseDataset, DatasetItem _GDRIVE_FILE_ID = "1QLYJMhy0CbBVT01BLkkw7KPPL5BpmxnH" -_ZIP_NAME = "chameleon.zip" +_ZIP_NAME = "chameleon.zip" _IMAGE_EXTS = frozenset({".png", ".jpg", ".jpeg", ".webp", ".bmp", ".tif", ".tiff"}) @@ -45,7 +45,7 @@ class ChameleonDataset(BaseDataset): Root cache directory (default ``.detectzoo_data``). """ - name: str = "chameleon" + name: str = "chameleon" modality: str = "image" def __init__( @@ -56,7 +56,7 @@ def __init__( **kwargs: Any, ) -> None: super().__init__(**kwargs) - self.root = Path(root) if root is not None else None + self.root = Path(root) if root is not None else None self.cache_dir = cache_dir def _ensure_download(self) -> Path: @@ -74,7 +74,7 @@ def _ensure_download(self) -> Path: return found import gdown - + zip_path = dest / _ZIP_NAME if not zip_path.is_file(): gdown.download( @@ -98,15 +98,18 @@ def _ensure_download(self) -> Path: def _load_all(self) -> List[DatasetItem]: test_root = self._ensure_download() - real_dir = test_root / "0_real" - fake_dir = test_root / "1_fake" - meta = {"source_dataset": "chameleon", "split": "test"} + real_dir = test_root / "0_real" + fake_dir = test_root / "1_fake" + meta = {"source_dataset": "chameleon", "split": "test"} items: List[DatasetItem] = [] for label, directory, source in ((0, real_dir, "real"), (1, fake_dir, "fake")): for path in sorted(directory.rglob("*")): if path.is_file() and path.suffix.lower() in _IMAGE_EXTS: - items.append(DatasetItem( - data=str(path), label=label, - metadata={**meta, "source": source}, - )) - return items \ No newline at end of file + items.append( + DatasetItem( + data=str(path), + label=label, + metadata={**meta, "source": source}, + ) + ) + return items diff --git a/detectzoo/datasets/image/cnn_detection.py b/detectzoo/datasets/image/cnn_detection.py index 01bef50..efcb96a 100644 --- a/detectzoo/datasets/image/cnn_detection.py +++ b/detectzoo/datasets/image/cnn_detection.py @@ -13,8 +13,8 @@ from pathlib import Path from typing import List, Optional, Sequence, Tuple -from detectzoo.datasets.base import BaseDataset, DatasetItem from detectzoo.core.registry import register_dataset +from detectzoo.datasets.base import BaseDataset, DatasetItem _SPLITS: Tuple[str, ...] = ("train", "val", "test") SPLIT_TRAIN, SPLIT_VAL, SPLIT_TEST = _SPLITS @@ -58,9 +58,26 @@ _FOLDER_TO_COLUMN: dict[str, str] = {f: c for c, f in CNN_DETECTION_TEST_PARTITIONS} _PROGAN_CLASS_FOLDERS: Tuple[str, ...] = ( - "airplane", "bird", "bicycle", "boat", "bottle", "bus", "car", "cat", "cow", "chair", - "diningtable", "dog", "person", "pottedplant", "motorbike", "tvmonitor", "train", "sheep", - "sofa", "horse", + "airplane", + "bird", + "bicycle", + "boat", + "bottle", + "bus", + "car", + "cat", + "cow", + "chair", + "diningtable", + "dog", + "person", + "pottedplant", + "motorbike", + "tvmonitor", + "train", + "sheep", + "sofa", + "horse", ) _IMAGE_EXTS = frozenset({".png", ".jpg", ".jpeg", ".webp", ".bmp", ".tif", ".tiff"}) @@ -130,7 +147,11 @@ def _collect_test(data_root: Path, col: str, folder: str) -> List[DatasetItem]: for label, directory, source in ((0, real_dir, "real"), (1, fake_dir, "fake")): for path in sorted(directory.rglob("*")): if path.is_file() and path.suffix.lower() in _IMAGE_EXTS: - out.append(DatasetItem(data=str(path), label=label, metadata={**meta, "source": source})) + out.append( + DatasetItem( + data=str(path), label=label, metadata={**meta, "source": source} + ) + ) return out diff --git a/detectzoo/datasets/image/drct2m.py b/detectzoo/datasets/image/drct2m.py index f5928aa..5ee59c9 100644 --- a/detectzoo/datasets/image/drct2m.py +++ b/detectzoo/datasets/image/drct2m.py @@ -10,7 +10,7 @@ from __future__ import annotations from pathlib import Path -from typing import Any, Iterable, List, Optional, Sequence, Tuple +from typing import Any, Iterable, List, Sequence, Tuple from detectzoo.core.registry import register_dataset from detectzoo.datasets.base import BaseDataset, DatasetItem @@ -43,7 +43,8 @@ class DRCT2MDataset(BaseDataset): Parameters ---------- split : str, optional - If provided, look under ``//`` first (common values: ``train``, ``val``, ``test``). + If provided, look under ``//`` first (e.g. ``train``, ``val``, + ``test``). If omitted, scan the full dataset root for any ``0_real`` / ``1_fake`` pairs. root : str or Path, optional Download/extract location. When omitted, uses ``.detectzoo_data/drct2m/``. @@ -98,7 +99,7 @@ def _load_all(self) -> List[DatasetItem]: for split_name, search_root in self._candidate_search_roots(base): if not search_root.exists(): continue - + for root in _find_split_roots(search_root): real_dir, fake_dir = root / "0_real", root / "1_fake" rel = str(root.relative_to(base)) if root.is_relative_to(base) else str(root) @@ -117,4 +118,3 @@ def _load_all(self) -> List[DatasetItem]: return items return items - diff --git a/detectzoo/datasets/image/genimage.py b/detectzoo/datasets/image/genimage.py index c1896d4..c3f1180 100644 --- a/detectzoo/datasets/image/genimage.py +++ b/detectzoo/datasets/image/genimage.py @@ -55,7 +55,7 @@ def _find_split_dirs(part_dir: Path, split: str) -> Optional[Tuple[Path, Path]]: ai = candidate / "ai" nature = candidate / "nature" if ai.is_dir() and nature.is_dir(): - return nature, ai # (real=nature, fake=ai) + return nature, ai # (real=nature, fake=ai) return None @@ -92,6 +92,7 @@ def _try_snapshot_download_hf(dest: Path, *, partition: str, force: bool) -> Non # Dataset # --------------------------------------------------------------------------- + @register_dataset("genimage", aliases=["gen_image", "genimage_dataset"]) class GenImageDataset(BaseDataset): """GenImage partition with ``/ai/`` (fake) and ``/nature/`` (real). @@ -146,7 +147,11 @@ def __init__( def _ensure_download(self, partition: str) -> Tuple[Path, Path]: from detectzoo.datasets._download import get_cache_dir - base = self.root.resolve() if self.root is not None else get_cache_dir("genimage", self.cache_dir) + base = ( + self.root.resolve() + if self.root is not None + else get_cache_dir("genimage", self.cache_dir) + ) part_dir = base / partition part_dir.mkdir(parents=True, exist_ok=True) @@ -180,6 +185,9 @@ def _load_all(self) -> List[DatasetItem]: for label, directory, source in ((0, real_dir, "real"), (1, fake_dir, "fake")): for path in sorted(directory.rglob("*")): if path.is_file() and path.suffix.lower() in _IMAGE_EXTS: - items.append(DatasetItem(data=str(path), label=label, - metadata={**meta, "source": source})) + items.append( + DatasetItem( + data=str(path), label=label, metadata={**meta, "source": source} + ) + ) return items diff --git a/detectzoo/datasets/image/self_synthesis.py b/detectzoo/datasets/image/self_synthesis.py index 58c0e69..b52108d 100644 --- a/detectzoo/datasets/image/self_synthesis.py +++ b/detectzoo/datasets/image/self_synthesis.py @@ -13,9 +13,9 @@ from pathlib import Path from typing import List, Optional, Sequence, Tuple -from detectzoo.datasets.base import BaseDataset, DatasetItem -from detectzoo.datasets._download import extract_tar_archive from detectzoo.core.registry import register_dataset +from detectzoo.datasets._download import extract_tar_archive +from detectzoo.datasets.base import BaseDataset, DatasetItem _DEFAULT_GENERATORS: Tuple[str, ...] = ( "AttGAN", @@ -93,13 +93,15 @@ def _ensure_download(self) -> None: return try: - import gdown - except Exception as e: + import gdown + except Exception as e: raise ModuleNotFoundError( "Missing dependency `gdown`. Install with `pip install gdown`." ) from e - gdown.download_folder(_GDRIVE_GANGEN_FOLDER, output=str(dest_parent), quiet=False, use_cookies=False) + gdown.download_folder( + _GDRIVE_GANGEN_FOLDER, output=str(dest_parent), quiet=False, use_cookies=False + ) if not self._finalize_gangen_layout(dest_parent, gens): raise RuntimeError( diff --git a/detectzoo/datasets/image/univfd.py b/detectzoo/datasets/image/univfd.py index c84e652..440607c 100644 --- a/detectzoo/datasets/image/univfd.py +++ b/detectzoo/datasets/image/univfd.py @@ -15,8 +15,8 @@ from typing import Any, List, Optional, Sequence, Tuple from detectzoo.core.registry import register_dataset -from detectzoo.datasets.base import BaseDataset, DatasetItem from detectzoo.datasets._download import extract_tar_archive, get_cache_dir +from detectzoo.datasets.base import BaseDataset, DatasetItem _IMAGE_EXTS = frozenset({".png", ".jpg", ".jpeg", ".webp", ".bmp", ".tif", ".tiff"}) @@ -37,9 +37,11 @@ def _collect_images(directory: Path, label: int, partition: str) -> List[DatasetItem]: - meta = {"source": "real" if label == 0 else "fake", - "partition": partition, - "source_dataset": "univfd_diffusion"} + meta = { + "source": "real" if label == 0 else "fake", + "partition": partition, + "source_dataset": "univfd_diffusion", + } return [ DatasetItem(data=str(p), label=label, metadata=meta) for p in sorted(directory.rglob("*")) @@ -82,8 +84,12 @@ def __init__( if (real_dir is None) ^ (fake_dir is None): raise ValueError("real_dir and fake_dir must both be set, or both omitted.") - self._manual_real: Optional[Path] = Path(real_dir).expanduser().resolve() if real_dir else None - self._manual_fake: Optional[Path] = Path(fake_dir).expanduser().resolve() if fake_dir else None + self._manual_real: Optional[Path] = ( + Path(real_dir).expanduser().resolve() if real_dir else None + ) + self._manual_fake: Optional[Path] = ( + Path(fake_dir).expanduser().resolve() if fake_dir else None + ) if partitions is None or list(partitions) == ["all"]: self._keys = UNIVFD_DIFFUSION_PARTITIONS @@ -111,6 +117,7 @@ def _ensure_download(self, key: str) -> Tuple[Path, Path]: if not real_dir.is_dir(): import gdown + archive = self.root / f"{key}.tar.gz" if not archive.is_file(): gdown.download_folder( @@ -126,10 +133,9 @@ def _ensure_download(self, key: str) -> Tuple[Path, Path]: def _load_all(self) -> List[DatasetItem]: if self._manual_real is not None: meta_key = self.partitions[0] if len(self.partitions) == 1 else "manual" - return ( - _collect_images(self._manual_real, label=0, partition=meta_key) - + _collect_images(self._manual_fake, label=1, partition=meta_key) - ) + return _collect_images( + self._manual_real, label=0, partition=meta_key + ) + _collect_images(self._manual_fake, label=1, partition=meta_key) items: List[DatasetItem] = [] for key in self._keys: diff --git a/detectzoo/datasets/text/cheat.py b/detectzoo/datasets/text/cheat.py index 0e6fd8d..871cbcf 100644 --- a/detectzoo/datasets/text/cheat.py +++ b/detectzoo/datasets/text/cheat.py @@ -116,14 +116,16 @@ def _load_jsonl(self, fp: Path, category: str, label: int) -> List[DatasetItem]: with open(fp, encoding="utf-8") as fh: for line in fh: row: dict[str, Any] = json.loads(line) - items.append(DatasetItem( - data=row["abstract"], - label=label, - metadata={ - "category": category, - "title": row.get("title", ""), - }, - )) + items.append( + DatasetItem( + data=row["abstract"], + label=label, + metadata={ + "category": category, + "title": row.get("title", ""), + }, + ) + ) return items def _load_all(self) -> List[DatasetItem]: diff --git a/detectzoo/datasets/text/hc3.py b/detectzoo/datasets/text/hc3.py index d77572a..03e6b04 100644 --- a/detectzoo/datasets/text/hc3.py +++ b/detectzoo/datasets/text/hc3.py @@ -92,21 +92,27 @@ def _load_from_huggingface(self) -> List[DatasetItem]: items: List[DatasetItem] = [] for subset in self.subsets: - ds = load_dataset("json", data_files=f"hf://datasets/Hello-SimpleAI/HC3/{subset}.jsonl")[self.split] + ds = load_dataset( + "json", data_files=f"hf://datasets/Hello-SimpleAI/HC3/{subset}.jsonl" + )[self.split] for row in ds: question = row.get("question", "") for answer in row.get("human_answers", []): - items.append(DatasetItem( - data=answer, - label=0, - metadata={"source": "human", "question": question, "subset": subset}, - )) + items.append( + DatasetItem( + data=answer, + label=0, + metadata={"source": "human", "question": question, "subset": subset}, + ) + ) for answer in row.get("chatgpt_answers", []): - items.append(DatasetItem( - data=answer, - label=1, - metadata={"source": "chatgpt", "question": question, "subset": subset}, - )) + items.append( + DatasetItem( + data=answer, + label=1, + metadata={"source": "chatgpt", "question": question, "subset": subset}, + ) + ) return items def _load_from_local(self) -> List[DatasetItem]: @@ -121,17 +127,21 @@ def _load_from_local(self) -> List[DatasetItem]: row: dict[str, Any] = json.loads(line) question = row.get("question", "") for answer in row.get("human_answers", []): - items.append(DatasetItem( - data=answer, - label=0, - metadata={"source": "human", "question": question}, - )) + items.append( + DatasetItem( + data=answer, + label=0, + metadata={"source": "human", "question": question}, + ) + ) for answer in row.get("chatgpt_answers", []): - items.append(DatasetItem( - data=answer, - label=1, - metadata={"source": "chatgpt", "question": question}, - )) + items.append( + DatasetItem( + data=answer, + label=1, + metadata={"source": "chatgpt", "question": question}, + ) + ) return items def _load_all(self) -> List[DatasetItem]: diff --git a/detectzoo/datasets/text/hc3_plus.py b/detectzoo/datasets/text/hc3_plus.py index 0eb2956..ff65688 100644 --- a/detectzoo/datasets/text/hc3_plus.py +++ b/detectzoo/datasets/text/hc3_plus.py @@ -18,8 +18,7 @@ from detectzoo.datasets.base import BaseDataset, DatasetItem _GITHUB_RAW = ( - "https://raw.githubusercontent.com/suu990901/" - "chatgpt-comparison-detection-HC3-Plus/main/data/en" + "https://raw.githubusercontent.com/suu990901/chatgpt-comparison-detection-HC3-Plus/main/data/en" ) _FILES = ( @@ -124,11 +123,7 @@ def _ensure_downloaded(self) -> Path: def _files_to_load(self, data_dir: Path) -> list[tuple[Path, str]]: """Return ``(path, split_name)`` pairs to load.""" if self.splits: - return [ - (data_dir / self._SPLIT_MAP[s], s) - for s in self.splits - if s in self._SPLIT_MAP - ] + return [(data_dir / self._SPLIT_MAP[s], s) for s in self.splits if s in self._SPLIT_MAP] return [(data_dir / fname, sname) for sname, fname in self._SPLIT_MAP.items()] def _load_jsonl(self, fp: Path, split_name: str) -> List[DatasetItem]: @@ -136,11 +131,13 @@ def _load_jsonl(self, fp: Path, split_name: str) -> List[DatasetItem]: with open(fp, encoding="utf-8") as fh: for line in fh: row: dict[str, Any] = json.loads(line) - items.append(DatasetItem( - data=row["text"], - label=int(row["label"]), - metadata={"split": split_name}, - )) + items.append( + DatasetItem( + data=row["text"], + label=int(row["label"]), + metadata={"split": split_name}, + ) + ) return items def _load_all(self) -> List[DatasetItem]: diff --git a/detectzoo/datasets/text/l2r.py b/detectzoo/datasets/text/l2r.py index 9cc87eb..9bfff63 100644 --- a/detectzoo/datasets/text/l2r.py +++ b/detectzoo/datasets/text/l2r.py @@ -156,9 +156,7 @@ def _resolve( for name in requested: key = name.lower() if key not in lookup: - raise ValueError( - f"Unknown L2R {kind} '{name}'. Valid: {list(valid)}" - ) + raise ValueError(f"Unknown L2R {kind} '{name}'. Valid: {list(valid)}") resolved.append(lookup[key]) return resolved diff --git a/detectzoo/datasets/text/m4.py b/detectzoo/datasets/text/m4.py index cbb40a9..d2d01d6 100644 --- a/detectzoo/datasets/text/m4.py +++ b/detectzoo/datasets/text/m4.py @@ -216,31 +216,35 @@ def _load_jsonl( if self.include_human: human_text = _as_text_field(row.get("human_text", "")) if human_text: - items.append(DatasetItem( - data=human_text, - label=0, - metadata={ - "domain": domain, - "model": "human", - "source": source, - "source_id": source_id, - "prompt": prompt, - }, - )) + items.append( + DatasetItem( + data=human_text, + label=0, + metadata={ + "domain": domain, + "model": "human", + "source": source, + "source_id": source_id, + "prompt": prompt, + }, + ) + ) if self.include_machine: machine_text = _as_text_field(row.get("machine_text", "")) if machine_text: - items.append(DatasetItem( - data=machine_text, - label=1, - metadata={ - "domain": domain, - "model": row_model, - "source": source, - "source_id": source_id, - "prompt": prompt, - }, - )) + items.append( + DatasetItem( + data=machine_text, + label=1, + metadata={ + "domain": domain, + "model": row_model, + "source": source, + "source_id": source_id, + "prompt": prompt, + }, + ) + ) return items def _load_all(self) -> List[DatasetItem]: diff --git a/detectzoo/datasets/text/mage.py b/detectzoo/datasets/text/mage.py index ba591b6..7add996 100644 --- a/detectzoo/datasets/text/mage.py +++ b/detectzoo/datasets/text/mage.py @@ -123,11 +123,13 @@ def _load_from_huggingface(self) -> List[DatasetItem]: source = row.get("src", "unknown") if self.sources and str(source).lower() not in self.sources: continue - items.append(DatasetItem( - data=row[self.text_column], - label=self._flip_label(int(row[self.label_column])), - metadata={"source": source}, - )) + items.append( + DatasetItem( + data=row[self.text_column], + label=self._flip_label(int(row[self.label_column])), + metadata={"source": source}, + ) + ) return items def _load_from_local(self) -> List[DatasetItem]: @@ -143,11 +145,13 @@ def _load_from_local(self) -> List[DatasetItem]: source = row.get("source", row.get("model", "unknown")) if self.sources and source.lower() not in self.sources: continue - items.append(DatasetItem( - data=row[self.text_column], - label=self._flip_label(int(row[self.label_column])), - metadata={"source": source}, - )) + items.append( + DatasetItem( + data=row[self.text_column], + label=self._flip_label(int(row[self.label_column])), + metadata={"source": source}, + ) + ) elif fp.suffix in (".json", ".jsonl"): with open(fp, encoding="utf-8") as fh: if fp.suffix == ".json": @@ -158,11 +162,13 @@ def _load_from_local(self) -> List[DatasetItem]: source = row.get("source", row.get("model", "unknown")) if self.sources and str(source).lower() not in self.sources: continue - items.append(DatasetItem( - data=row.get(self.text_column, ""), - label=self._flip_label(int(row.get(self.label_column, 0))), - metadata={"source": source}, - )) + items.append( + DatasetItem( + data=row.get(self.text_column, ""), + label=self._flip_label(int(row.get(self.label_column, 0))), + metadata={"source": source}, + ) + ) return items def _load_all(self) -> List[DatasetItem]: diff --git a/detectzoo/datasets/text/open_llm_text.py b/detectzoo/datasets/text/open_llm_text.py index 0929125..89f2fd4 100644 --- a/detectzoo/datasets/text/open_llm_text.py +++ b/detectzoo/datasets/text/open_llm_text.py @@ -149,11 +149,13 @@ def _load_jsonl( if not line.strip(): continue row: dict[str, Any] = json.loads(line) - items.append(DatasetItem( - data=row.get("text", row.get("string", "")), - label=label, - metadata={"source": source, "split": split_name}, - )) + items.append( + DatasetItem( + data=row.get("text", row.get("string", "")), + label=label, + metadata={"source": source, "split": split_name}, + ) + ) return items def _load_all(self) -> List[DatasetItem]: diff --git a/detectzoo/datasets/text/raid.py b/detectzoo/datasets/text/raid.py index 3388009..0bb6a1c 100644 --- a/detectzoo/datasets/text/raid.py +++ b/detectzoo/datasets/text/raid.py @@ -141,18 +141,44 @@ class RAIDDataset(BaseDataset): ) MODELS = ( - "chatgpt", "gpt4", "gpt3", "gpt2", - "llama-chat", "mistral", "mistral-chat", - "mpt", "mpt-chat", "cohere", "cohere-chat", "human", + "chatgpt", + "gpt4", + "gpt3", + "gpt2", + "llama-chat", + "mistral", + "mistral-chat", + "mpt", + "mpt-chat", + "cohere", + "cohere-chat", + "human", ) DOMAINS = ( - "abstracts", "books", "code", "czech", "german", - "news", "poetry", "recipes", "reddit", "reviews", "wiki", + "abstracts", + "books", + "code", + "czech", + "german", + "news", + "poetry", + "recipes", + "reddit", + "reviews", + "wiki", ) ATTACKS = ( - "none", "homoglyph", "number", "article_deletion", - "insert_paragraphs", "perplexity_misspelling", "upper_lower", - "whitespace", "zero_width_space", "synonym", "paraphrase", + "none", + "homoglyph", + "number", + "article_deletion", + "insert_paragraphs", + "perplexity_misspelling", + "upper_lower", + "whitespace", + "zero_width_space", + "synonym", + "paraphrase", "alternative_spelling", ) SPLITS = ("train", "test", "test_new", "test_attack") diff --git a/detectzoo/datasets/text/turingbench.py b/detectzoo/datasets/text/turingbench.py index 66a32af..f47f614 100644 --- a/detectzoo/datasets/text/turingbench.py +++ b/detectzoo/datasets/text/turingbench.py @@ -19,9 +19,7 @@ from detectzoo.core.registry import register_dataset from detectzoo.datasets.base import BaseDataset, DatasetItem -_HF_ZIP_URL = ( - "https://huggingface.co/datasets/turingbench/TuringBench/resolve/main/TuringBench.zip" -) +_HF_ZIP_URL = "https://huggingface.co/datasets/turingbench/TuringBench/resolve/main/TuringBench.zip" # Binary Turing-Test (human vs. one generator) configurations. _TT_MODELS: tuple[str, ...] = ( @@ -146,13 +144,9 @@ def __init__( ) -> None: super().__init__(**kwargs) if config not in _CONFIGS: - raise ValueError( - f"Unknown TuringBench config '{config}'. Valid: {list(_CONFIGS)}" - ) + raise ValueError(f"Unknown TuringBench config '{config}'. Valid: {list(_CONFIGS)}") if split not in _SPLIT_FILES: - raise ValueError( - f"Unknown TuringBench split '{split}'. Valid: {list(_SPLIT_FILES)}" - ) + raise ValueError(f"Unknown TuringBench split '{split}'. Valid: {list(_SPLIT_FILES)}") self.path = Path(path) if path is not None else None self.config = config self.split = split @@ -179,9 +173,7 @@ def _resolve_csv(self, data_dir: Path) -> Path: for cand in candidates: if cand.exists(): return cand - raise FileNotFoundError( - f"Could not find {self.config}/{filename} under {data_dir}." - ) + raise FileNotFoundError(f"Could not find {self.config}/{filename} under {data_dir}.") @staticmethod def _map_label(raw_label: str) -> int: @@ -208,13 +200,15 @@ def _load_all(self) -> List[DatasetItem]: for row in reader: text = row.get("Generation") or row.get("generation") or "" raw_label = row.get("label", "") - items.append(DatasetItem( - data=text, - label=self._map_label(raw_label), - metadata={ - "config": self.config, - "split": self.split, - "raw_label": raw_label, - }, - )) + items.append( + DatasetItem( + data=text, + label=self._map_label(raw_label), + metadata={ + "config": self.config, + "split": self.split, + "raw_label": raw_label, + }, + ) + ) return items diff --git a/detectzoo/datasets/text/writing_prompts.py b/detectzoo/datasets/text/writing_prompts.py index 2794f45..7df8b0e 100644 --- a/detectzoo/datasets/text/writing_prompts.py +++ b/detectzoo/datasets/text/writing_prompts.py @@ -107,14 +107,16 @@ def _load_from_huggingface(self) -> List[DatasetItem]: ds = load_dataset("euclaise/writingprompts", split=self.split) items: List[DatasetItem] = [] for row in ds: - items.append(DatasetItem( - data=row.get("story", ""), - label=0, - metadata={ - "source": "human", - "prompt": row.get("prompt", ""), - }, - )) + items.append( + DatasetItem( + data=row.get("story", ""), + label=0, + metadata={ + "source": "human", + "prompt": row.get("prompt", ""), + }, + ) + ) if self.max_samples and len(items) >= self.max_samples: break return items @@ -129,14 +131,16 @@ def _load_from_local(self) -> List[DatasetItem]: with open(fp, encoding="utf-8") as fh: for line in fh: row: dict[str, Any] = json.loads(line) - items.append(DatasetItem( - data=row.get("story", row.get("text", "")), - label=int(row.get("label", 0)), - metadata={ - "source": row.get("source", "human"), - "prompt": row.get("prompt", ""), - }, - )) + items.append( + DatasetItem( + data=row.get("story", row.get("text", "")), + label=int(row.get("label", 0)), + metadata={ + "source": row.get("source", "human"), + "prompt": row.get("prompt", ""), + }, + ) + ) if self.max_samples and len(items) >= self.max_samples: return items return items diff --git a/detectzoo/datasets/text/xsum.py b/detectzoo/datasets/text/xsum.py index d414feb..41afd4a 100644 --- a/detectzoo/datasets/text/xsum.py +++ b/detectzoo/datasets/text/xsum.py @@ -109,16 +109,18 @@ def _load_from_huggingface(self) -> List[DatasetItem]: ds = load_dataset("EdinburghNLP/xsum", split=self.split) items: List[DatasetItem] = [] for row in ds: - items.append(DatasetItem( - data=row[self.text_field], - label=0, - metadata={ - "source": "human", - "id": row.get("id", ""), - "document": row.get("document", ""), - "summary": row.get("summary", ""), - }, - )) + items.append( + DatasetItem( + data=row[self.text_field], + label=0, + metadata={ + "source": "human", + "id": row.get("id", ""), + "document": row.get("document", ""), + "summary": row.get("summary", ""), + }, + ) + ) if self.max_samples and len(items) >= self.max_samples: break return items @@ -133,16 +135,18 @@ def _load_from_local(self) -> List[DatasetItem]: with open(fp, encoding="utf-8") as fh: for line in fh: row: dict[str, Any] = json.loads(line) - items.append(DatasetItem( - data=row.get(self.text_field, row.get("summary", "")), - label=int(row.get("label", 0)), - metadata={ - "source": row.get("source", "human"), - "id": row.get("id", ""), - "document": row.get("document", ""), - "summary": row.get("summary", ""), - }, - )) + items.append( + DatasetItem( + data=row.get(self.text_field, row.get("summary", "")), + label=int(row.get("label", 0)), + metadata={ + "source": row.get("source", "human"), + "id": row.get("id", ""), + "document": row.get("document", ""), + "summary": row.get("summary", ""), + }, + ) + ) if self.max_samples and len(items) >= self.max_samples: return items return items diff --git a/detectzoo/detectors/audio/_anti_deepfake_common.py b/detectzoo/detectors/audio/_anti_deepfake_common.py index 68a18d9..6408bac 100644 --- a/detectzoo/detectors/audio/_anti_deepfake_common.py +++ b/detectzoo/detectors/audio/_anti_deepfake_common.py @@ -81,9 +81,8 @@ # DetectZoo: ast_asvspoof, xlsr_sls, ...). # --------------------------------------------------------------------------- -def load_audio_to_numpy( - path: Union[str, Path], target_sr: int = SAMPLE_RATE -) -> np.ndarray: + +def load_audio_to_numpy(path: Union[str, Path], target_sr: int = SAMPLE_RATE) -> np.ndarray: """Load an audio file -> mono float32 numpy array at ``target_sr``.""" try: import torchaudio @@ -174,12 +173,9 @@ def _translate_fairseq_ssl_state_dict( """ new_sd: Dict[str, torch.Tensor] = {} - pos_conv_legacy = any( - "pos_conv_embed.conv.weight_g" in k for k in expected_keys - ) + pos_conv_legacy = any("pos_conv_embed.conv.weight_g" in k for k in expected_keys) pos_conv_param = any( - "pos_conv_embed.conv.parametrizations.weight.original" in k - for k in expected_keys + "pos_conv_embed.conv.parametrizations.weight.original" in k for k in expected_keys ) def _put(target: str, value: torch.Tensor) -> None: @@ -195,7 +191,7 @@ def _put(target: str, value: torch.Tensor) -> None: if not key.startswith(_SSL_PREFIX): continue - k = key[len(_SSL_PREFIX):] + k = key[len(_SSL_PREFIX) :] if any(k.startswith(p) for p in _DROP_PREFIXES) or k == "label_embs_concat": continue @@ -209,25 +205,25 @@ def _put(target: str, value: torch.Tensor) -> None: i = parts[2] inner = ".".join(parts[3:]) if inner.startswith("0."): - tail = inner[len("0."):] + tail = inner[len("0.") :] _put(f"feature_extractor.conv_layers.{i}.conv.{tail}", val) elif inner.startswith("2.1."): - tail = inner[len("2.1."):] + tail = inner[len("2.1.") :] _put(f"feature_extractor.conv_layers.{i}.layer_norm.{tail}", val) continue if k.startswith("post_extract_proj."): - tail = k[len("post_extract_proj."):] + tail = k[len("post_extract_proj.") :] _put(f"feature_projection.projection.{tail}", val) continue if k.startswith("layer_norm."): - tail = k[len("layer_norm."):] + tail = k[len("layer_norm.") :] _put(f"feature_projection.layer_norm.{tail}", val) continue if k.startswith("encoder.pos_conv.0."): - tail = k[len("encoder.pos_conv.0."):] + tail = k[len("encoder.pos_conv.0.") :] if tail == "weight_g": if pos_conv_legacy: _put("encoder.pos_conv_embed.conv.weight_g", val) @@ -257,19 +253,19 @@ def _put(target: str, value: torch.Tensor) -> None: i = parts[2] inner = ".".join(parts[3:]) if inner.startswith("self_attn_layer_norm."): - tail = inner[len("self_attn_layer_norm."):] + tail = inner[len("self_attn_layer_norm.") :] _put(f"encoder.layers.{i}.layer_norm.{tail}", val) elif inner.startswith("self_attn."): - tail = inner[len("self_attn."):] + tail = inner[len("self_attn.") :] _put(f"encoder.layers.{i}.attention.{tail}", val) elif inner.startswith("fc1."): - tail = inner[len("fc1."):] + tail = inner[len("fc1.") :] _put(f"encoder.layers.{i}.feed_forward.intermediate_dense.{tail}", val) elif inner.startswith("fc2."): - tail = inner[len("fc2."):] + tail = inner[len("fc2.") :] _put(f"encoder.layers.{i}.feed_forward.output_dense.{tail}", val) elif inner.startswith("final_layer_norm."): - tail = inner[len("final_layer_norm."):] + tail = inner[len("final_layer_norm.") :] _put(f"encoder.layers.{i}.final_layer_norm.{tail}", val) continue @@ -280,6 +276,7 @@ def _put(target: str, value: torch.Tensor) -> None: # Detector module: SSL frontend + global-avg-pool + 2-class linear classifier # --------------------------------------------------------------------------- + class AntiDeepfakeDetectorModule(nn.Module): """Pure-HuggingFace re-implementation of the AntiDeepfake DeepfakeDetector. @@ -327,10 +324,10 @@ def extract_features(self, wav: torch.Tensor) -> torch.Tensor: return out[0] def forward(self, wav: torch.Tensor) -> torch.Tensor: - emb = self.extract_features(wav) # [B, T', D] - emb = emb.transpose(1, 2) # [B, D, T'] + emb = self.extract_features(wav) # [B, T', D] + emb = emb.transpose(1, 2) # [B, D, T'] pooled = self.adap_pool1d(emb).squeeze(-1) # [B, D] - logits = self.proj_fc(pooled) # [B, 2] + logits = self.proj_fc(pooled) # [B, 2] return logits @@ -338,6 +335,7 @@ def forward(self, wav: torch.Tensor) -> torch.Tensor: # End-to-end loader: download safetensors, build HF model, load weights # --------------------------------------------------------------------------- + def build_anti_deepfake_detector( model_name: str, cache_dir: Path, @@ -399,9 +397,7 @@ def build_anti_deepfake_detector( ) config = AutoConfig.from_pretrained(model_name, cache_dir=str(cache_dir)) - if expected_hidden_size is not None and int(config.hidden_size) != int( - expected_hidden_size - ): + if expected_hidden_size is not None and int(config.hidden_size) != int(expected_hidden_size): _LOGGER.warning( "AntiDeepfake: %s has hidden_size=%d but the wrapper expected " "%d -- proceeding with the value from the HF config.", @@ -418,9 +414,7 @@ def build_anti_deepfake_detector( # state dict and override the config so the architecture we build # matches the weights we are about to load. has_conv_bias = any( - ".feature_extractor.conv_layers." in k - and k.endswith(".0.bias") - for k in fairseq_sd.keys() + ".feature_extractor.conv_layers." in k and k.endswith(".0.bias") for k in fairseq_sd.keys() ) if hasattr(config, "conv_bias") and bool(config.conv_bias) != has_conv_bias: _LOGGER.debug( @@ -445,9 +439,7 @@ def build_anti_deepfake_detector( ssl_model.eval() expected_keys = set(ssl_model.state_dict().keys()) - new_sd, head_kept = _translate_fairseq_ssl_state_dict( - fairseq_sd, expected_keys - ) + new_sd, head_kept = _translate_fairseq_ssl_state_dict(fairseq_sd, expected_keys) # Pre-load visibility: how many fairseq SSL tensors did we actually # find a destination for, vs how many we know we're dropping on @@ -455,19 +447,18 @@ def build_anti_deepfake_detector( # the silent-failure mode -- if non-zero they get logged below). n_fairseq_ssl = sum(1 for k in fairseq_sd if k.startswith(_SSL_PREFIX)) n_intentionally_dropped = sum( - 1 for k in fairseq_sd + 1 + for k in fairseq_sd if k.startswith(_SSL_PREFIX) and ( - any(k[len(_SSL_PREFIX):].startswith(p) for p in _DROP_PREFIXES) - or k[len(_SSL_PREFIX):] == "label_embs_concat" + any(k[len(_SSL_PREFIX) :].startswith(p) for p in _DROP_PREFIXES) + or k[len(_SSL_PREFIX) :] == "label_embs_concat" ) ) n_translated = len(new_sd) n_unmapped_fairseq = n_fairseq_ssl - n_intentionally_dropped - n_translated - sample_fairseq = sorted( - k for k in fairseq_sd if k.startswith(_SSL_PREFIX) - )[:5] + sample_fairseq = sorted(k for k in fairseq_sd if k.startswith(_SSL_PREFIX))[:5] sample_hf = sorted(expected_keys)[:5] _LOGGER.info( "AntiDeepfake/%s: state-dict translation -- " @@ -482,11 +473,13 @@ def build_anti_deepfake_detector( ) _LOGGER.debug( "AntiDeepfake/%s: fairseq sample (5): %s", - model_name, sample_fairseq, + model_name, + sample_fairseq, ) _LOGGER.debug( "AntiDeepfake/%s: HF expected sample (5): %s", - model_name, sample_hf, + model_name, + sample_hf, ) missing, unexpected = ssl_model.load_state_dict(new_sd, strict=False) @@ -520,21 +513,20 @@ def build_anti_deepfake_detector( _LOGGER.debug( "AntiDeepfake/%s: %d SSL tensor(s) left at random init " "(all benign HF-only buffers): %s", - model_name, len(missing), missing, + model_name, + len(missing), + missing, ) if unexpected: _LOGGER.warning( - "AntiDeepfake/%s: %d unexpected SSL tensor(s) ignored by HF " - "model: %s%s", + "AntiDeepfake/%s: %d unexpected SSL tensor(s) ignored by HF model: %s%s", model_name, len(unexpected), unexpected[:8], " ..." if len(unexpected) > 8 else "", ) - detector = AntiDeepfakeDetectorModule( - ssl_model=ssl_model, hidden_size=int(config.hidden_size) - ) + detector = AntiDeepfakeDetectorModule(ssl_model=ssl_model, hidden_size=int(config.hidden_size)) head_sd = { "proj_fc.weight": fairseq_sd["proj_fc.weight"], @@ -557,6 +549,7 @@ def build_anti_deepfake_detector( # Single-utterance forward (matches the inference script on every model card) # --------------------------------------------------------------------------- + @torch.no_grad() def run_inference( detector: AntiDeepfakeDetectorModule, @@ -569,14 +562,12 @@ def run_inference( ``score_human`` are softmax probabilities (so ``score_ai + score_human == 1``) and ``logits`` is the raw 2-element logit tensor (still on ``device``). """ - wav_tensor = torch.from_numpy(np.ascontiguousarray(wav)).to( - device=device, dtype=torch.float32 - ) + wav_tensor = torch.from_numpy(np.ascontiguousarray(wav)).to(device=device, dtype=torch.float32) wav_tensor = F.layer_norm(wav_tensor, wav_tensor.shape) wav_tensor = wav_tensor.unsqueeze(0) # [1, T] logits = detector(wav_tensor).view(-1) probs = torch.softmax(logits, dim=-1) - score_ai = float(probs[0].item()) # index 0 = fake + score_ai = float(probs[0].item()) # index 0 = fake score_human = float(probs[1].item()) # index 1 = real return score_ai, score_human, logits diff --git a/detectzoo/detectors/audio/aasist.py b/detectzoo/detectors/audio/aasist.py index 0babfd3..1b504aa 100644 --- a/detectzoo/detectors/audio/aasist.py +++ b/detectzoo/detectors/audio/aasist.py @@ -36,13 +36,13 @@ # --------------------------------------------------------------------------- # Constants # --------------------------------------------------------------------------- -_CKPT_URL = "https://github.com/clovaai/aasist/raw/main/models/weights/AASIST.pth" +_CKPT_URL = "https://github.com/clovaai/aasist/raw/main/models/weights/AASIST.pth" _CKPT_URL_L = "https://github.com/clovaai/aasist/raw/main/models/weights/AASIST-L.pth" -_CKPT_NAME = "AASIST.pth" +_CKPT_NAME = "AASIST.pth" _CKPT_NAME_L = "AASIST-L.pth" _SAMPLE_RATE = 16_000 -_MAX_SAMPLES = 64_600 # ~4 s at 16 kHz (standard ASVspoof eval length) +_MAX_SAMPLES = 64_600 # ~4 s at 16 kHz (standard ASVspoof eval length) # Model configs (from clovaai/aasist/config/AASIST{,-L}.conf) _D_ARGS_BASE: dict = { @@ -194,21 +194,23 @@ def _derive_att_map_master(self, x: torch.Tensor, master: torch.Tensor) -> torch att_map = att_map / self.temp return F.softmax(att_map, dim=-2) - def _derive_att_map( - self, x: torch.Tensor, num_type1: int, num_type2: int - ) -> torch.Tensor: + def _derive_att_map(self, x: torch.Tensor, num_type1: int, num_type2: int) -> torch.Tensor: att_map = self._pairwise_mul_nodes(x) att_map = torch.tanh(self.att_proj(att_map)) att_board = torch.zeros_like(att_map[:, :, :, 0]).unsqueeze(-1) att_board[:, :num_type1, :num_type1, :] = torch.matmul( - att_map[:, :num_type1, :num_type1, :], self.att_weight11) + att_map[:, :num_type1, :num_type1, :], self.att_weight11 + ) att_board[:, num_type1:, num_type1:, :] = torch.matmul( - att_map[:, num_type1:, num_type1:, :], self.att_weight22) + att_map[:, num_type1:, num_type1:, :], self.att_weight22 + ) att_board[:, :num_type1, num_type1:, :] = torch.matmul( - att_map[:, :num_type1, num_type1:, :], self.att_weight12) + att_map[:, :num_type1, num_type1:, :], self.att_weight12 + ) att_board[:, num_type1:, :num_type1, :] = torch.matmul( - att_map[:, num_type1:, :num_type1, :], self.att_weight12) + att_map[:, num_type1:, :num_type1, :], self.att_weight12 + ) att_map = att_board / self.temp return F.softmax(att_map, dim=-2) @@ -221,9 +223,7 @@ def _project(self, x: torch.Tensor, att_map: torch.Tensor) -> torch.Tensor: def _project_master( self, x: torch.Tensor, master: torch.Tensor, att_map: torch.Tensor ) -> torch.Tensor: - x1 = self.proj_with_attM( - torch.matmul(att_map.squeeze(-1).unsqueeze(1), x) - ) + x1 = self.proj_with_attM(torch.matmul(att_map.squeeze(-1).unsqueeze(1), x)) x2 = self.proj_without_attM(master) return x1 + x2 @@ -313,9 +313,7 @@ def __init__( fmel = self._to_mel(f) filbandwidthsmel = np.linspace(np.min(fmel), np.max(fmel), self.out_channels + 1) self.mel = self._to_hz(filbandwidthsmel) - self.hsupp = torch.arange( - -(self.kernel_size - 1) / 2, (self.kernel_size - 1) / 2 + 1 - ) + self.hsupp = torch.arange(-(self.kernel_size - 1) / 2, (self.kernel_size - 1) / 2 + 1) band_pass = torch.zeros(self.out_channels, self.kernel_size) for i in range(len(self.mel) - 1): fmin, fmax = self.mel[i], self.mel[i + 1] @@ -337,12 +335,16 @@ def forward(self, x: torch.Tensor, mask: bool = False) -> torch.Tensor: if mask: A = int(np.random.uniform(0, 20)) A0 = random.randint(0, band_pass_filter.shape[0] - A) if A > 0 else 0 - band_pass_filter[A0:A0 + A, :] = 0 + band_pass_filter[A0 : A0 + A, :] = 0 filters = band_pass_filter.view(self.out_channels, 1, self.kernel_size) return F.conv1d( - x, filters, - stride=self.stride, padding=self.padding, dilation=self.dilation, - bias=None, groups=1, + x, + filters, + stride=self.stride, + padding=self.padding, + dilation=self.dilation, + bias=None, + groups=1, ) @@ -356,20 +358,29 @@ def __init__(self, nb_filts: list[int], first: bool = False) -> None: if not first: self.bn1 = nn.BatchNorm2d(num_features=nb_filts[0]) self.conv1 = nn.Conv2d( - in_channels=nb_filts[0], out_channels=nb_filts[1], - kernel_size=(2, 3), padding=(1, 1), stride=1, + in_channels=nb_filts[0], + out_channels=nb_filts[1], + kernel_size=(2, 3), + padding=(1, 1), + stride=1, ) self.selu = nn.SELU(inplace=True) self.bn2 = nn.BatchNorm2d(num_features=nb_filts[1]) self.conv2 = nn.Conv2d( - in_channels=nb_filts[1], out_channels=nb_filts[1], - kernel_size=(2, 3), padding=(0, 1), stride=1, + in_channels=nb_filts[1], + out_channels=nb_filts[1], + kernel_size=(2, 3), + padding=(0, 1), + stride=1, ) if nb_filts[0] != nb_filts[1]: self.downsample = True self.conv_downsample = nn.Conv2d( - in_channels=nb_filts[0], out_channels=nb_filts[1], - padding=(0, 1), kernel_size=(1, 3), stride=1, + in_channels=nb_filts[0], + out_channels=nb_filts[1], + padding=(0, 1), + kernel_size=(1, 3), + stride=1, ) else: self.downsample = False @@ -455,52 +466,42 @@ def __init__(self, d_args: dict) -> None: self.out_layer = nn.Linear(5 * gat_dims[1], 2) - def forward( - self, x: torch.Tensor, Freq_aug: bool = False - ) -> tuple[torch.Tensor, torch.Tensor]: + def forward(self, x: torch.Tensor, Freq_aug: bool = False) -> tuple[torch.Tensor, torch.Tensor]: """x : (B, T) raw waveform at 16 kHz. Returns (hidden[B,160], logits[B,2]).""" - x = x.unsqueeze(1) # [B, 1, T] - x = self.conv_time(x, mask=Freq_aug) # SincConv -> [B, filts0, T'] - x = x.unsqueeze(dim=1) # [B, 1, filts0, T'] - x = F.max_pool2d(torch.abs(x), (3, 3)) # [B, 1, 23, T''] + x = x.unsqueeze(1) # [B, 1, T] + x = self.conv_time(x, mask=Freq_aug) # SincConv -> [B, filts0, T'] + x = x.unsqueeze(dim=1) # [B, 1, filts0, T'] + x = F.max_pool2d(torch.abs(x), (3, 3)) # [B, 1, 23, T''] x = self.first_bn(x) x = self.selu(x) - e = self.encoder(x) # [B, C, F', T'''] + e = self.encoder(x) # [B, C, F', T'''] # ---- spectral / temporal branches ----------------------------------- - e_S, _ = torch.max(torch.abs(e), dim=3) # max over time + e_S, _ = torch.max(torch.abs(e), dim=3) # max over time e_S = e_S.transpose(1, 2) + self.pos_S gat_S = self.GAT_layer_S(e_S) out_S = self.pool_S(gat_S) - e_T, _ = torch.max(torch.abs(e), dim=2) # max over freq + e_T, _ = torch.max(torch.abs(e), dim=2) # max over freq e_T = e_T.transpose(1, 2) gat_T = self.GAT_layer_T(e_T) out_T = self.pool_T(gat_T) # ---- HS-GAL inference 1 -------------------------------------------- - out_T1, out_S1, master1 = self.HtrgGAT_layer_ST11( - out_T, out_S, master=self.master1 - ) + out_T1, out_S1, master1 = self.HtrgGAT_layer_ST11(out_T, out_S, master=self.master1) out_S1 = self.pool_hS1(out_S1) out_T1 = self.pool_hT1(out_T1) - out_T_aug, out_S_aug, master_aug = self.HtrgGAT_layer_ST12( - out_T1, out_S1, master=master1 - ) + out_T_aug, out_S_aug, master_aug = self.HtrgGAT_layer_ST12(out_T1, out_S1, master=master1) out_T1 = out_T1 + out_T_aug out_S1 = out_S1 + out_S_aug master1 = master1 + master_aug # ---- HS-GAL inference 2 -------------------------------------------- - out_T2, out_S2, master2 = self.HtrgGAT_layer_ST21( - out_T, out_S, master=self.master2 - ) + out_T2, out_S2, master2 = self.HtrgGAT_layer_ST21(out_T, out_S, master=self.master2) out_S2 = self.pool_hS2(out_S2) out_T2 = self.pool_hT2(out_T2) - out_T_aug, out_S_aug, master_aug = self.HtrgGAT_layer_ST22( - out_T2, out_S2, master=master2 - ) + out_T_aug, out_S_aug, master_aug = self.HtrgGAT_layer_ST22(out_T2, out_S2, master=master2) out_T2 = out_T2 + out_T_aug out_S2 = out_S2 + out_S_aug master2 = master2 + master_aug @@ -521,9 +522,7 @@ def forward( S_max, _ = torch.max(torch.abs(out_S), dim=1) S_avg = torch.mean(out_S, dim=1) - last_hidden = torch.cat( - [T_max, T_avg, S_max, S_avg, master.squeeze(1)], dim=1 - ) + last_hidden = torch.cat([T_max, T_avg, S_max, S_avg, master.squeeze(1)], dim=1) last_hidden = self.drop(last_hidden) output = self.out_layer(last_hidden) return last_hidden, output @@ -533,23 +532,27 @@ def forward( # Audio helpers # --------------------------------------------------------------------------- + def _load_audio(path: Union[str, Path], target_sr: int = _SAMPLE_RATE) -> torch.Tensor: """Load audio file -> mono float32 [1, T] at target_sr.""" try: import torchaudio + wav, sr = torchaudio.load(str(path)) if sr != target_sr: wav = torchaudio.functional.resample(wav, sr, target_sr) except Exception: import soundfile as sf + data, sr = sf.read(str(path), always_2d=True) wav = torch.from_numpy(data.T.astype(np.float32)) if sr != target_sr: import torchaudio + wav = torchaudio.functional.resample(wav, sr, target_sr) if wav.shape[0] > 1: wav = wav.mean(dim=0, keepdim=True) - return wav # [1, T] + return wav # [1, T] def _pad_or_trim(wav: torch.Tensor, length: int) -> torch.Tensor: @@ -565,6 +568,7 @@ def _pad_or_trim(wav: torch.Tensor, length: int) -> torch.Tensor: # DetectZoo detector wrapper # --------------------------------------------------------------------------- + @register_detector("aasist", aliases=["aasist_audio"]) class AASISTDetector(BaseDetector): """AASIST audio deepfake detector (Jung et al., ICASSP 2022). @@ -611,7 +615,7 @@ def __init__( else: cache = get_cache_dir("aasist", cache_dir) name = _CKPT_NAME if variant == "base" else _CKPT_NAME_L - url = _CKPT_URL if variant == "base" else _CKPT_URL_L + url = _CKPT_URL if variant == "base" else _CKPT_URL_L self._weight_path = cache / name download_file(url, self._weight_path) @@ -652,7 +656,7 @@ def _normalize_input(self, input_data: Any) -> torch.Tensor: wav = wav.unsqueeze(0) else: wav = _load_audio(input_data, _SAMPLE_RATE) - return _pad_or_trim(wav, _MAX_SAMPLES) # [1, T] + return _pad_or_trim(wav, _MAX_SAMPLES) # [1, T] @torch.no_grad() def predict(self, input_data: Any) -> DetectionResult: @@ -668,9 +672,9 @@ def predict(self, input_data: Any) -> DetectionResult: DetectionResult score=P(ai), label='ai'/'human', confidence in [0,1]. """ - wav = self._normalize_input(input_data).to(self._device) # [1, T] - _, logits = self._model(wav) # ([1,160], [1,2]) - probs = torch.softmax(logits, dim=-1) # [1, 2] + wav = self._normalize_input(input_data).to(self._device) # [1, T] + _, logits = self._model(wav) # ([1,160], [1,2]) + probs = torch.softmax(logits, dim=-1) # [1, 2] # AASIST training convention: index 0 = spoof/ai, index 1 = bonafide/human # (see clovaai/aasist data_utils.py: `d_meta[key] = 1 if label == "bonafide" else 0`, diff --git a/detectzoo/detectors/audio/anti_deepfake_hubert/detector.py b/detectzoo/detectors/audio/anti_deepfake_hubert/detector.py index 277691c..b43f7a3 100644 --- a/detectzoo/detectors/audio/anti_deepfake_hubert/detector.py +++ b/detectzoo/detectors/audio/anti_deepfake_hubert/detector.py @@ -89,6 +89,7 @@ # Detector wrapper # --------------------------------------------------------------------------- + @register_detector( "anti_deepfake_hubert", aliases=["nii_hubert"], @@ -184,9 +185,7 @@ def predict(self, input_data: Any) -> DetectionResult: 16 kHz internally; arbitrary lengths are accepted. """ wav = self._normalize_input(input_data) - score_ai, score_human, logits = run_inference( - self._model, wav, self._device - ) + score_ai, score_human, logits = run_inference(self._model, wav, self._device) return self._make_result( score_ai, diff --git a/detectzoo/detectors/audio/anti_deepfake_wav2vec/detector.py b/detectzoo/detectors/audio/anti_deepfake_wav2vec/detector.py index 5fcf1f1..62b3b0e 100644 --- a/detectzoo/detectors/audio/anti_deepfake_wav2vec/detector.py +++ b/detectzoo/detectors/audio/anti_deepfake_wav2vec/detector.py @@ -87,6 +87,7 @@ # Detector wrapper # --------------------------------------------------------------------------- + @register_detector( "anti_deepfake_wav2vec", aliases=["anti_deepfake", "nii_wav2vec"], @@ -177,9 +178,7 @@ def predict(self, input_data: Any) -> DetectionResult: average pool collapses the time axis). """ wav = self._normalize_input(input_data) - score_ai, score_human, logits = run_inference( - self._model, wav, self._device - ) + score_ai, score_human, logits = run_inference(self._model, wav, self._device) return self._make_result( score_ai, diff --git a/detectzoo/detectors/audio/anti_deepfake_xlsr2b/detector.py b/detectzoo/detectors/audio/anti_deepfake_xlsr2b/detector.py index 14f8482..f39922e 100644 --- a/detectzoo/detectors/audio/anti_deepfake_xlsr2b/detector.py +++ b/detectzoo/detectors/audio/anti_deepfake_xlsr2b/detector.py @@ -99,6 +99,7 @@ # Detector wrapper # --------------------------------------------------------------------------- + @register_detector( "anti_deepfake_xlsr2b", aliases=["nii_xlsr2b", "xlsr_2b"], @@ -196,9 +197,7 @@ def predict(self, input_data: Any) -> DetectionResult: 16 kHz internally; arbitrary lengths are accepted. """ wav = self._normalize_input(input_data) - score_ai, score_human, logits = run_inference( - self._model, wav, self._device - ) + score_ai, score_human, logits = run_inference(self._model, wav, self._device) return self._make_result( score_ai, diff --git a/detectzoo/detectors/audio/ast_asvspoof/detector.py b/detectzoo/detectors/audio/ast_asvspoof/detector.py index c45b9b8..021454a 100644 --- a/detectzoo/detectors/audio/ast_asvspoof/detector.py +++ b/detectzoo/detectors/audio/ast_asvspoof/detector.py @@ -71,9 +71,8 @@ # detectors in DetectZoo (aasist, rawnet2, …). # --------------------------------------------------------------------------- -def _load_audio_to_numpy( - path: Union[str, Path], target_sr: int = _SAMPLE_RATE -) -> np.ndarray: + +def _load_audio_to_numpy(path: Union[str, Path], target_sr: int = _SAMPLE_RATE) -> np.ndarray: """Load an audio file -> mono float32 numpy array at ``target_sr``.""" try: import torchaudio @@ -135,6 +134,7 @@ def _resolve_label_indices(id2label: Dict[int, str]) -> Tuple[int, int]: # Detector wrapper # --------------------------------------------------------------------------- + @register_detector( "ast_asvspoof", aliases=["ast", "ast-asvspoof", "ast_audio", "ast_synthetic_voice"], @@ -223,9 +223,7 @@ def __init__( if checkpoint_path is not None: source = Path(checkpoint_path).expanduser().resolve() if not source.exists(): - raise FileNotFoundError( - f"checkpoint_path does not exist: {source}" - ) + raise FileNotFoundError(f"checkpoint_path does not exist: {source}") _LOGGER.info("Loading AST-ASVspoof from local directory %s", source) source = str(source) else: @@ -264,9 +262,7 @@ def __init__( # Resolve which class index corresponds to "spoof"/"fake" (= AI). id2label = dict(self._model.config.id2label) auto_spoof, auto_bona = _resolve_label_indices(id2label) - self._spoof_idx = ( - auto_spoof if spoof_label_index is None else int(spoof_label_index) - ) + self._spoof_idx = auto_spoof if spoof_label_index is None else int(spoof_label_index) self._bonafide_idx = ( auto_bona if bonafide_label_index is None else int(bonafide_label_index) ) diff --git a/detectzoo/detectors/audio/rawgat_st.py b/detectzoo/detectors/audio/rawgat_st.py index 0a01a8f..8816bad 100644 --- a/detectzoo/detectors/audio/rawgat_st.py +++ b/detectzoo/detectors/audio/rawgat_st.py @@ -57,11 +57,11 @@ } _SAMPLE_RATE = 16_000 -_MAX_SAMPLES = 64_600 # ``nb_samp`` in the upstream config (~4 s) +_MAX_SAMPLES = 64_600 # ``nb_samp`` in the upstream config (~4 s) -_OUT_CHANNELS = 70 # sinc filters -_FIRST_CONV = 128 # sinc kernel size (becomes 129 after odd-fix) -_FILTS = [32, [32, 32], [32, 64], [64, 64]] +_OUT_CHANNELS = 70 # sinc filters +_FIRST_CONV = 128 # sinc kernel size (becomes 129 after odd-fix) +_FILTS = [32, [32, 32], [32, 64], [64, 64]] # --------------------------------------------------------------------------- @@ -111,7 +111,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: fmin = float(self.mel[i]) fmax = float(self.mel[i + 1]) h_high = (2 * fmax / sr) * torch.sinc(2 * fmax * hsupp / sr) - h_low = (2 * fmin / sr) * torch.sinc(2 * fmin * hsupp / sr) + h_low = (2 * fmin / sr) * torch.sinc(2 * fmin * hsupp / sr) filters[i] = hamming * (h_high - h_low) filters = filters.view(self.out_channels, 1, self.kernel_size) return F.conv1d(x, filters, stride=1, padding=0, bias=None) @@ -128,24 +128,36 @@ def __init__(self, nb_filts: list, first: bool = False) -> None: if not first: self.bn1 = nn.BatchNorm2d(nb_filts[0]) self.conv1 = nn.Conv2d( - nb_filts[0], nb_filts[1], - kernel_size=(2, 3), padding=(1, 1), stride=1, + nb_filts[0], + nb_filts[1], + kernel_size=(2, 3), + padding=(1, 1), + stride=1, ) self.conv_1 = nn.Conv2d( - 1, nb_filts[1], - kernel_size=(2, 3), padding=(1, 1), stride=1, + 1, + nb_filts[1], + kernel_size=(2, 3), + padding=(1, 1), + stride=1, ) self.bn2 = nn.BatchNorm2d(nb_filts[1]) self.conv2 = nn.Conv2d( - nb_filts[1], nb_filts[1], - kernel_size=(2, 3), padding=(0, 1), stride=1, + nb_filts[1], + nb_filts[1], + kernel_size=(2, 3), + padding=(0, 1), + stride=1, ) self.selu = nn.SELU(inplace=True) if nb_filts[0] != nb_filts[1]: self.downsample = True self.conv_downsample = nn.Conv2d( - nb_filts[0], nb_filts[1], - kernel_size=(1, 3), padding=(0, 1), stride=1, + nb_filts[0], + nb_filts[1], + kernel_size=(1, 3), + padding=(0, 1), + stride=1, ) else: self.downsample = False @@ -176,7 +188,7 @@ def __init__(self, in_dim: int, out_dim: int) -> None: self.att_proj = nn.Linear(in_dim, out_dim) self.att_weight = nn.Parameter(torch.empty(out_dim, 1)) nn.init.xavier_normal_(self.att_weight) - self.proj_with_att = nn.Linear(in_dim, out_dim) + self.proj_with_att = nn.Linear(in_dim, out_dim) self.proj_without_att = nn.Linear(in_dim, out_dim) self.bn = nn.BatchNorm1d(out_dim) self.input_drop = nn.Dropout(p=0.2) @@ -219,15 +231,15 @@ def forward(self, h: torch.Tensor) -> torch.Tensor: """Return ``(B, k, 1, D)`` — the extra singleton dim matches upstream ``Pool``'s indexing so ``transpose(1, 3)`` in the outer net works.""" z = self.drop(h) - scores = self.sigmoid(self.proj(z)) # (B, N, 1) + scores = self.sigmoid(self.proj(z)) # (B, N, 1) num_nodes = h.size(1) k = max(2, int(self.k * num_nodes)) - _, idx = torch.topk(scores, k, dim=1) # (B, k, 1) - weighted = h * scores # (B, N, D) + _, idx = torch.topk(scores, k, dim=1) # (B, k, 1) + weighted = h * scores # (B, N, D) picked = [] for i in range(h.size(0)): - picked.append(weighted[i, idx[i], :]) # (k, 1, D) - return torch.stack(picked, dim=0) # (B, k, 1, D) + picked.append(weighted[i, idx[i], :]) # (k, 1, D) + return torch.stack(picked, dim=0) # (B, k, 1, D) # --------------------------------------------------------------------------- @@ -260,6 +272,7 @@ def _mk_encoder() -> nn.Sequential: nn.Sequential(_ResidualBlock(_FILTS[3])), nn.Sequential(_ResidualBlock(_FILTS[3])), ) + self.encoder1 = _mk_encoder() self.encoder2 = _mk_encoder() @@ -283,47 +296,51 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: b, t = x.shape x = x.view(b, 1, t) x = self.conv_time(x) - x = x.unsqueeze(1) # (B, 1, 70, T') + x = x.unsqueeze(1) # (B, 1, 70, T') x = F.max_pool2d(torch.abs(x), (3, 3)) # (B, 1, 23, T'') x = self.selu(self.first_bn(x)) - e1 = self.encoder1(x) # (B, 64, 23, 29) - s_max, _ = torch.max(torch.abs(e1), dim=3) # (B, 64, 23) - g1 = self.GAT_layer1(s_max.transpose(1, 2)) # (B, 23, 32) + e1 = self.encoder1(x) # (B, 64, 23, 29) + s_max, _ = torch.max(torch.abs(e1), dim=3) # (B, 64, 23) + g1 = self.GAT_layer1(s_max.transpose(1, 2)) # (B, 23, 32) p1 = self.pool1(g1) o1 = self.proj1(p1.transpose(1, 3)) - o1 = o1.view(o1.size(0), o1.size(1), o1.size(3)) # (B, 32, 12) + o1 = o1.view(o1.size(0), o1.size(1), o1.size(3)) # (B, 32, 12) e2 = self.encoder2(x) - t_max, _ = torch.max(torch.abs(e2), dim=2) # (B, 64, 29) - g2 = self.GAT_layer2(t_max.transpose(1, 2)) # (B, 29, 32) + t_max, _ = torch.max(torch.abs(e2), dim=2) # (B, 64, 29) + g2 = self.GAT_layer2(t_max.transpose(1, 2)) # (B, 29, 32) p2 = self.pool2(g2) o2 = self.proj2(p2.transpose(1, 3)) - o2 = o2.view(o2.size(0), o2.size(1), o2.size(3)) # (B, 32, 12) + o2 = o2.view(o2.size(0), o2.size(1), o2.size(3)) # (B, 32, 12) fused = torch.mul(o1, o2) if self._fusion == "mul" else (o1 + o2) - g3 = self.GAT_layer3(fused.transpose(1, 2)) # (B, 12, 16) + g3 = self.GAT_layer3(fused.transpose(1, 2)) # (B, 12, 16) p3 = self.pool3(g3) - nodes = self.proj(p3).flatten(1) # (B, 7) - return self.proj_node(nodes) # (B, 2) + nodes = self.proj(p3).flatten(1) # (B, 7) + return self.proj_node(nodes) # (B, 2) # --------------------------------------------------------------------------- # Audio helpers (shared style with rawnet2.py) # --------------------------------------------------------------------------- + def _load_audio(path: Union[str, Path], target_sr: int = _SAMPLE_RATE) -> torch.Tensor: try: import torchaudio + wav, sr = torchaudio.load(str(path)) if sr != target_sr: wav = torchaudio.functional.resample(wav, sr, target_sr) except Exception: import soundfile as sf + data, sr = sf.read(str(path), always_2d=True) wav = torch.from_numpy(data.T.astype(np.float32)) if sr != target_sr: import torchaudio + wav = torchaudio.functional.resample(wav, sr, target_sr) if wav.shape[0] > 1: wav = wav.mean(dim=0, keepdim=True) @@ -341,6 +358,7 @@ def _pad_or_trim(wav: torch.Tensor, length: int) -> torch.Tensor: # DetectZoo detector wrapper # --------------------------------------------------------------------------- + @register_detector("rawgat_st", aliases=["rawgat", "rawgatst"]) class RawGATSTDetector(BaseDetector): """RawGAT-ST audio deepfake detector (Tak et al., ASVspoof 2021 Workshop). @@ -403,9 +421,7 @@ def __init__( self._model.to(self._device).eval() def _load_weights(self) -> None: - state = torch.load( - self._weight_path, map_location="cpu", weights_only=False - ) + state = torch.load(self._weight_path, map_location="cpu", weights_only=False) if isinstance(state, dict): for key in ("model_state_dict", "state_dict", "model"): if key in state: diff --git a/detectzoo/detectors/audio/rawnet2.py b/detectzoo/detectors/audio/rawnet2.py index 47696c4..02bfced 100644 --- a/detectzoo/detectors/audio/rawnet2.py +++ b/detectzoo/detectors/audio/rawnet2.py @@ -35,19 +35,19 @@ # --------------------------------------------------------------------------- # Constants — dims verified from official pretrained checkpoint # --------------------------------------------------------------------------- -_CKPT_URL = "https://www.asvspoof.org/asvspoof2021/pre_trained_LA_RawNet2.zip" +_CKPT_URL = "https://www.asvspoof.org/asvspoof2021/pre_trained_LA_RawNet2.zip" _CKPT_NAME = "pre_trained_LA_RawNet2.pth" -_CKPT_ZIP = "pre_trained_LA_RawNet2.zip" +_CKPT_ZIP = "pre_trained_LA_RawNet2.zip" -_SAMPLE_RATE = 16_000 -_MAX_SAMPLES = 64_600 +_SAMPLE_RATE = 16_000 +_MAX_SAMPLES = 64_600 _NB_SINC_FILTERS = 20 _SINC_FILTER_LEN = 1024 -_NB_FILTS = [20, 20, 20, 128, 128, 128, 128] -_GRU_NODE = 1024 -_NB_FC_NODE = 1024 -_NB_CLASSES = 2 +_NB_FILTS = [20, 20, 20, 128, 128, 128, 128] +_GRU_NODE = 1024 +_NB_FC_NODE = 1024 +_NB_CLASSES = 2 # --------------------------------------------------------------------------- @@ -84,20 +84,18 @@ def __init__( kernel_size += 1 # force odd (symmetric filter) self.out_channels = out_channels - self.kernel_size = kernel_size - self.sample_rate = sample_rate - self.stride = stride - self.padding = padding - self.dilation = dilation + self.kernel_size = kernel_size + self.sample_rate = sample_rate + self.stride = stride + self.padding = padding + self.dilation = dilation n_fft = 512 - f = int(sample_rate / 2) * np.linspace(0, 1, int(n_fft / 2) + 1) - fmel = self._to_mel(f) + f = int(sample_rate / 2) * np.linspace(0, 1, int(n_fft / 2) + 1) + fmel = self._to_mel(f) band_edges_mel = np.linspace(fmel.min(), fmel.max(), out_channels + 1) self.mel = self._to_hz(band_edges_mel) - self.hsupp = torch.arange( - -(kernel_size - 1) / 2.0, (kernel_size - 1) / 2.0 + 1 - ) + self.hsupp = torch.arange(-(kernel_size - 1) / 2.0, (kernel_size - 1) / 2.0 + 1) self.band_pass = torch.zeros(out_channels, kernel_size) def forward(self, x: torch.Tensor) -> torch.Tensor: @@ -112,15 +110,13 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: ) hideal = hHigh - hLow self.band_pass[i, :] = torch.from_numpy( - np.hamming(self.kernel_size).astype(np.float32) - * hideal.astype(np.float32) + np.hamming(self.kernel_size).astype(np.float32) * hideal.astype(np.float32) ) - filters = self.band_pass.to(x.device).view( - self.out_channels, 1, self.kernel_size - ) + filters = self.band_pass.to(x.device).view(self.out_channels, 1, self.kernel_size) return F.conv1d( - x, filters, + x, + filters, stride=self.stride, padding=self.padding, dilation=self.dilation, @@ -144,11 +140,9 @@ def __init__(self, nb_filts: list, first: bool = False) -> None: if not first: self.bn1 = nn.BatchNorm1d(nb_filts[0]) self.lrelu = nn.LeakyReLU(negative_slope=0.3) - self.conv1 = nn.Conv1d(nb_filts[0], nb_filts[1], - kernel_size=3, padding=1, stride=1) - self.bn2 = nn.BatchNorm1d(nb_filts[1]) - self.conv2 = nn.Conv1d(nb_filts[1], nb_filts[1], - kernel_size=3, padding=1, stride=1) + self.conv1 = nn.Conv1d(nb_filts[0], nb_filts[1], kernel_size=3, padding=1, stride=1) + self.bn2 = nn.BatchNorm1d(nb_filts[1]) + self.conv2 = nn.Conv1d(nb_filts[1], nb_filts[1], kernel_size=3, padding=1, stride=1) if nb_filts[0] != nb_filts[1]: self.downsample = True self.conv_downsample = nn.Conv1d( @@ -165,7 +159,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: out = self.lrelu(out) else: out = x - out = self.conv1(x) # upstream quirk: uses x, not bn1(x) + out = self.conv1(x) # upstream quirk: uses x, not bn1(x) out = self.bn2(out) out = self.lrelu(out) out = self.conv2(out) @@ -192,8 +186,8 @@ def __init__(self) -> None: sample_rate=_SAMPLE_RATE, ) self.first_bn = nn.BatchNorm1d(_NB_SINC_FILTERS) - self.selu = nn.SELU(inplace=True) - self.sig = nn.Sigmoid() + self.selu = nn.SELU(inplace=True) + self.sig = nn.Sigmoid() # Residual blocks (channel schedule: 20→20→20→128→128→128→128) self.block0 = nn.Sequential(_ResBlock([_NB_FILTS[0], _NB_FILTS[1]], first=True)) @@ -217,7 +211,7 @@ def __init__(self) -> None: self.gru = nn.GRU( input_size=_NB_FILTS[-1], hidden_size=_GRU_NODE, - num_layers=3, # upstream: nb_gru_layer=3 + num_layers=3, # upstream: nb_gru_layer=3 batch_first=True, ) self.fc1_gru = nn.Linear(_GRU_NODE, _NB_FC_NODE) @@ -226,8 +220,7 @@ def __init__(self) -> None: @staticmethod def _make_attention_fc(in_features: int, out_features: int) -> nn.Sequential: - return nn.Sequential(nn.Linear(in_features=in_features, - out_features=out_features)) + return nn.Sequential(nn.Linear(in_features=in_features, out_features=out_features)) def forward(self, x: torch.Tensor) -> torch.Tensor: """x: [B, T] raw waveform at 16 kHz.""" @@ -277,7 +270,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: x = self.bn_before_gru(x) x = self.selu(x) - x = x.permute(0, 2, 1) # (B, F, T) → (B, T, F) + x = x.permute(0, 2, 1) # (B, F, T) → (B, T, F) self.gru.flatten_parameters() x, _ = self.gru(x) x = x[:, -1, :] @@ -290,18 +283,22 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: # Audio helpers # --------------------------------------------------------------------------- + def _load_audio(path: Union[str, Path], target_sr: int = _SAMPLE_RATE) -> torch.Tensor: try: import torchaudio + wav, sr = torchaudio.load(str(path)) if sr != target_sr: wav = torchaudio.functional.resample(wav, sr, target_sr) except Exception: import soundfile as sf + data, sr = sf.read(str(path), always_2d=True) wav = torch.from_numpy(data.T.astype(np.float32)) if sr != target_sr: import torchaudio + wav = torchaudio.functional.resample(wav, sr, target_sr) if wav.shape[0] > 1: wav = wav.mean(dim=0, keepdim=True) @@ -319,6 +316,7 @@ def _pad_or_trim(wav: torch.Tensor, length: int) -> torch.Tensor: # DetectZoo detector wrapper # --------------------------------------------------------------------------- + @register_detector("rawnet2", aliases=["rawnet2_audio"]) class RawNet2Detector(BaseDetector): """RawNet2 audio deepfake detector (Tak et al., ICASSP 2021). @@ -380,9 +378,7 @@ def __init__( self._model.to(self._device).eval() def _load_weights(self) -> None: - state = torch.load( - self._weight_path, map_location="cpu", weights_only=False - ) + state = torch.load(self._weight_path, map_location="cpu", weights_only=False) if isinstance(state, dict): for key in ("model", "state_dict", "model_state_dict"): if key in state: @@ -435,9 +431,9 @@ def predict(self, input_data: Any) -> DetectionResult: DetectionResult score=P(ai), label='ai'/'human', confidence in [0,1]. """ - wav = self._normalize_input(input_data).unsqueeze(0).to(self._device) + wav = self._normalize_input(input_data).unsqueeze(0).to(self._device) logits = self._model(wav) - probs = torch.softmax(logits, dim=-1) + probs = torch.softmax(logits, dim=-1) score_ai = float(probs[0, 0]) diff --git a/detectzoo/detectors/audio/restssdnet.py b/detectzoo/detectors/audio/restssdnet.py index a0afff4..d5292c8 100644 --- a/detectzoo/detectors/audio/restssdnet.py +++ b/detectzoo/detectors/audio/restssdnet.py @@ -33,10 +33,7 @@ # --------------------------------------------------------------------------- # Constants — taken from official Res-TSSDNet checkpoint / upstream ``models.py`` # --------------------------------------------------------------------------- -_CKPT_FILE = ( - "Res_TSSDNet_time_frame_61_ASVspoof2019_LA_" - "Loss_0.0017_dEER_0.74%_eEER_1.64%.pth" -) +_CKPT_FILE = "Res_TSSDNet_time_frame_61_ASVspoof2019_LA_Loss_0.0017_dEER_0.74%_eEER_1.64%.pth" _CKPT_URL = ( "https://github.com/ghua-ac/end-to-end-synthetic-speech-detection/raw/" "main/pretrained/" + quote(_CKPT_FILE) @@ -59,7 +56,7 @@ class _RSM1D(nn.Module): def __init__(self, channels_in: int, channels_out: int) -> None: super().__init__() - self.conv1 = nn.Conv1d(channels_in, channels_out, kernel_size=3, padding=1, bias=False) + self.conv1 = nn.Conv1d(channels_in, channels_out, kernel_size=3, padding=1, bias=False) self.conv2 = nn.Conv1d(channels_out, channels_out, kernel_size=3, padding=1, bias=False) self.conv3 = nn.Conv1d(channels_out, channels_out, kernel_size=3, padding=1, bias=False) self.bn1 = nn.BatchNorm1d(channels_out) @@ -114,18 +111,22 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: # Audio helpers # --------------------------------------------------------------------------- + def _load_audio(path: Union[str, Path], target_sr: int = _SAMPLE_RATE) -> torch.Tensor: try: import torchaudio + wav, sr = torchaudio.load(str(path)) if sr != target_sr: wav = torchaudio.functional.resample(wav, sr, target_sr) except Exception: import soundfile as sf + data, sr = sf.read(str(path), always_2d=True) wav = torch.from_numpy(data.T.astype(np.float32)) if sr != target_sr: import torchaudio + wav = torchaudio.functional.resample(wav, sr, target_sr) if wav.shape[0] > 1: wav = wav.mean(dim=0, keepdim=True) @@ -143,6 +144,7 @@ def _pad_or_trim(wav: torch.Tensor, length: int) -> torch.Tensor: # DetectZoo detector wrapper # --------------------------------------------------------------------------- + @register_detector("res_tssdnet", aliases=["restssdnet", "tssdnet"]) class ResTSSDNetDetector(BaseDetector): """Res-TSSDNet audio deepfake detector (Hua et al., IEEE SPL 2021). @@ -198,9 +200,7 @@ def __init__( self._model.to(self._device).eval() def _load_weights(self) -> None: - state = torch.load( - self._weight_path, map_location="cpu", weights_only=False - ) + state = torch.load(self._weight_path, map_location="cpu", weights_only=False) if isinstance(state, dict): for key in ("model_state_dict", "state_dict", "model"): if key in state: diff --git a/detectzoo/detectors/audio/samo.py b/detectzoo/detectors/audio/samo.py index d456685..7bd2a6c 100644 --- a/detectzoo/detectors/audio/samo.py +++ b/detectzoo/detectors/audio/samo.py @@ -204,18 +204,10 @@ def _derive_att_map(self, x: Tensor, n1: int, n2: int) -> Tensor: att_map = self._pairwise_mul_nodes(x) att_map = torch.tanh(self.att_proj(att_map)) att_board = torch.zeros_like(att_map[:, :, :, 0]).unsqueeze(-1) - att_board[:, :n1, :n1, :] = torch.matmul( - att_map[:, :n1, :n1, :], self.att_weight11 - ) - att_board[:, n1:, n1:, :] = torch.matmul( - att_map[:, n1:, n1:, :], self.att_weight22 - ) - att_board[:, :n1, n1:, :] = torch.matmul( - att_map[:, :n1, n1:, :], self.att_weight12 - ) - att_board[:, n1:, :n1, :] = torch.matmul( - att_map[:, n1:, :n1, :], self.att_weight12 - ) + att_board[:, :n1, :n1, :] = torch.matmul(att_map[:, :n1, :n1, :], self.att_weight11) + att_board[:, n1:, n1:, :] = torch.matmul(att_map[:, n1:, n1:, :], self.att_weight22) + att_board[:, :n1, n1:, :] = torch.matmul(att_map[:, :n1, n1:, :], self.att_weight12) + att_board[:, n1:, :n1, :] = torch.matmul(att_map[:, n1:, :n1, :], self.att_weight12) att_map = att_board / self.temp return F.softmax(att_map, dim=-2) @@ -224,12 +216,8 @@ def _project(self, x: Tensor, att_map: Tensor) -> Tensor: x2 = self.proj_without_att(x) return x1 + x2 - def _project_master( - self, x: Tensor, master: Tensor, att_map: Tensor - ) -> Tensor: - x1 = self.proj_with_attM( - torch.matmul(att_map.squeeze(-1).unsqueeze(1), x) - ) + def _project_master(self, x: Tensor, master: Tensor, att_map: Tensor) -> Tensor: + x1 = self.proj_with_attM(torch.matmul(att_map.squeeze(-1).unsqueeze(1), x)) x2 = self.proj_without_attM(master) return x1 + x2 @@ -296,9 +284,7 @@ def __init__( ) -> None: super().__init__() if in_channels != 1: - raise ValueError( - f"SincConv only supports one input channel (got {in_channels})" - ) + raise ValueError(f"SincConv only supports one input channel (got {in_channels})") if bias: raise ValueError("SincConv does not support bias.") if groups > 1: @@ -319,9 +305,7 @@ def __init__( bands_hz = self.to_hz(bands_mel) self.mel = bands_hz - self.hsupp = torch.arange( - -(self.kernel_size - 1) / 2, (self.kernel_size - 1) / 2 + 1 - ) + self.hsupp = torch.arange(-(self.kernel_size - 1) / 2, (self.kernel_size - 1) / 2 + 1) self.band_pass = torch.zeros(self.out_channels, self.kernel_size) for i in range(len(self.mel) - 1): fmin, fmax = self.mel[i], self.mel[i + 1] @@ -331,16 +315,14 @@ def __init__( h_low = (2 * fmin / self.sample_rate) * np.sinc( 2 * fmin * self.hsupp / self.sample_rate ) - self.band_pass[i, :] = Tensor(np.hamming(self.kernel_size)) * Tensor( - h_high - h_low - ) + self.band_pass[i, :] = Tensor(np.hamming(self.kernel_size)) * Tensor(h_high - h_low) def forward(self, x: Tensor, mask: bool = False) -> Tensor: bp = self.band_pass.clone().to(x.device) if mask: a = int(np.random.uniform(0, 20)) a0 = random.randint(0, bp.shape[0] - a) - bp[a0:a0 + a, :] = 0 + bp[a0 : a0 + a, :] = 0 self.filters = bp.view(self.out_channels, 1, self.kernel_size) return F.conv1d( x, @@ -493,26 +475,18 @@ def forward(self, x: Tensor, Freq_aug: bool = False) -> Tuple[Tensor, Tensor]: gat_T = self.GAT_layer_T(e_T) out_T = self.pool_T(gat_T) - out_T1, out_S1, master1 = self.HtrgGAT_layer_ST11( - out_T, out_S, master=self.master1 - ) + out_T1, out_S1, master1 = self.HtrgGAT_layer_ST11(out_T, out_S, master=self.master1) out_S1 = self.pool_hS1(out_S1) out_T1 = self.pool_hT1(out_T1) - out_T_aug, out_S_aug, master_aug = self.HtrgGAT_layer_ST12( - out_T1, out_S1, master=master1 - ) + out_T_aug, out_S_aug, master_aug = self.HtrgGAT_layer_ST12(out_T1, out_S1, master=master1) out_T1 = out_T1 + out_T_aug out_S1 = out_S1 + out_S_aug master1 = master1 + master_aug - out_T2, out_S2, master2 = self.HtrgGAT_layer_ST21( - out_T, out_S, master=self.master2 - ) + out_T2, out_S2, master2 = self.HtrgGAT_layer_ST21(out_T, out_S, master=self.master2) out_S2 = self.pool_hS2(out_S2) out_T2 = self.pool_hT2(out_T2) - out_T_aug, out_S_aug, master_aug = self.HtrgGAT_layer_ST22( - out_T2, out_S2, master=master2 - ) + out_T_aug, out_S_aug, master_aug = self.HtrgGAT_layer_ST22(out_T2, out_S2, master=master2) out_T2 = out_T2 + out_T_aug out_S2 = out_S2 + out_S_aug master2 = master2 + master_aug @@ -533,9 +507,7 @@ def forward(self, x: Tensor, Freq_aug: bool = False) -> Tuple[Tensor, Tensor]: s_max, _ = torch.max(torch.abs(out_S), dim=1) s_avg = torch.mean(out_S, dim=1) - last_hidden = torch.cat( - [t_max, t_avg, s_max, s_avg, master.squeeze(1)], dim=1 - ) + last_hidden = torch.cat([t_max, t_avg, s_max, s_avg, master.squeeze(1)], dim=1) last_hidden = self.drop(last_hidden) return last_hidden, self.out_layer(last_hidden) @@ -569,19 +541,23 @@ def _register_pickle_shim() -> None: # Audio helpers (mirror other audio detectors for consistency) # --------------------------------------------------------------------------- + def _load_audio(path: Union[str, Path], target_sr: int = _SAMPLE_RATE) -> torch.Tensor: """Load audio → mono float32 [T] at ``target_sr``.""" try: import torchaudio + wav, sr = torchaudio.load(str(path)) if sr != target_sr: wav = torchaudio.functional.resample(wav, sr, target_sr) except Exception: import soundfile as sf + data, sr = sf.read(str(path), always_2d=True) wav = torch.from_numpy(data.T.astype(np.float32)) if sr != target_sr: import torchaudio + wav = torchaudio.functional.resample(wav, sr, target_sr) if wav.shape[0] > 1: wav = wav.mean(dim=0, keepdim=True) @@ -715,9 +691,7 @@ def _collect_asvspoof_enrollment( speaker_audio: dict[str, list[Path]] = {} for protocol in protocols: if not protocol.is_file(): - raise FileNotFoundError( - f"ASVspoof enrolment protocol not found: {protocol}" - ) + raise FileNotFoundError(f"ASVspoof enrolment protocol not found: {protocol}") with open(protocol, "r", encoding="utf-8") as fh: for raw_line in fh: line = raw_line.strip() @@ -737,9 +711,7 @@ def _collect_asvspoof_enrollment( speaker_audio.setdefault(spk_id, []).append(flac_path) if not speaker_audio: - raise RuntimeError( - f"No enrolment utterances found under {protocol_dir}" - ) + raise RuntimeError(f"No enrolment utterances found under {protocol_dir}") return speaker_audio def _load_checkpoint(self) -> nn.Module: @@ -747,13 +719,9 @@ def _load_checkpoint(self) -> nn.Module: _register_pickle_shim() try: - obj = torch.load( - self._weight_path, map_location="cpu", weights_only=True - ) + obj = torch.load(self._weight_path, map_location="cpu", weights_only=True) except Exception: - obj = torch.load( - self._weight_path, map_location="cpu", weights_only=False - ) + obj = torch.load(self._weight_path, map_location="cpu", weights_only=False) if isinstance(obj, nn.Module): return obj @@ -767,14 +735,10 @@ def _load_checkpoint(self) -> nn.Module: model = Model(_MODEL_CONFIG) missing, unexpected = model.load_state_dict(obj, strict=False) if missing: - raise RuntimeError( - f"SAMO checkpoint missing keys: {missing[:5]} …" - ) + raise RuntimeError(f"SAMO checkpoint missing keys: {missing[:5]} …") return model - raise RuntimeError( - f"Unexpected SAMO checkpoint type: {type(obj).__name__}" - ) + raise RuntimeError(f"Unexpected SAMO checkpoint type: {type(obj).__name__}") def _normalize_input(self, input_data: Any) -> torch.Tensor: """Accept path / numpy / tensor → mono waveform [T] at 16 kHz.""" diff --git a/detectzoo/detectors/audio/xlsr_sls/detector.py b/detectzoo/detectors/audio/xlsr_sls/detector.py index c563abb..5b4f59d 100644 --- a/detectzoo/detectors/audio/xlsr_sls/detector.py +++ b/detectzoo/detectors/audio/xlsr_sls/detector.py @@ -78,9 +78,8 @@ # detectors in DetectZoo (ast_asvspoof, aasist, ...). # --------------------------------------------------------------------------- -def _load_audio_to_numpy( - path: Union[str, Path], target_sr: int = _SAMPLE_RATE -) -> np.ndarray: + +def _load_audio_to_numpy(path: Union[str, Path], target_sr: int = _SAMPLE_RATE) -> np.ndarray: """Load an audio file -> mono float32 numpy array at ``target_sr``.""" try: import torchaudio @@ -140,6 +139,7 @@ def _resolve_label_indices(id2label: Dict[int, str]) -> Tuple[int, int]: # Detector wrapper # --------------------------------------------------------------------------- + @register_detector( "xlsr_sls", aliases=["xlsr-sls", "xlsr", "xls-r-sls", "xlsr_deepfake", "xlsr_audio"], @@ -227,9 +227,7 @@ def __init__( if checkpoint_path is not None: source = Path(checkpoint_path).expanduser().resolve() if not source.exists(): - raise FileNotFoundError( - f"checkpoint_path does not exist: {source}" - ) + raise FileNotFoundError(f"checkpoint_path does not exist: {source}") _LOGGER.info("Loading XLSR-SLS from local directory %s", source) source = str(source) else: @@ -267,9 +265,7 @@ def __init__( # Resolve which class index corresponds to "spoof"/"fake" (= AI). id2label = dict(self._model.config.id2label) auto_spoof, auto_bona = _resolve_label_indices(id2label) - self._spoof_idx = ( - auto_spoof if spoof_label_index is None else int(spoof_label_index) - ) + self._spoof_idx = auto_spoof if spoof_label_index is None else int(spoof_label_index) self._bonafide_idx = ( auto_bona if bonafide_label_index is None else int(bonafide_label_index) ) diff --git a/detectzoo/detectors/image/__init__.py b/detectzoo/detectors/image/__init__.py index e81dbbe..e95407c 100644 --- a/detectzoo/detectors/image/__init__.py +++ b/detectzoo/detectors/image/__init__.py @@ -2,20 +2,20 @@ from detectzoo.detectors.image.aeroblade import AerobladeDetector from detectzoo.detectors.image.aide import AIDEDetector +from detectzoo.detectors.image.c2p_clip import C2PCLIPDetector from detectzoo.detectors.image.cnnspot import CNNSpotDetector from detectzoo.detectors.image.cospy import CoSpyDetector, CoSpySDV14Detector from detectzoo.detectors.image.d3 import D3Detector +from detectzoo.detectors.image.drct import DRCTDetector from detectzoo.detectors.image.fatformer import FatFormerDetector +from detectzoo.detectors.image.freqnet import FreqNetDetector +from detectzoo.detectors.image.ladeda import LaDeDaDetector from detectzoo.detectors.image.lgrad import LGradDetector +from detectzoo.detectors.image.manifold_bias import ManifoldBiasDetector from detectzoo.detectors.image.npr_deepfake import NPRDeepfakeDetector from detectzoo.detectors.image.patchcraft import PatchCraftDetector from detectzoo.detectors.image.safe import SAFEDetector from detectzoo.detectors.image.univfd import UnivFDDetector -from detectzoo.detectors.image.c2p_clip import C2PCLIPDetector -from detectzoo.detectors.image.manifold_bias import ManifoldBiasDetector -from detectzoo.detectors.image.drct import DRCTDetector -from detectzoo.detectors.image.freqnet import FreqNetDetector -from detectzoo.detectors.image.ladeda import LaDeDaDetector __all__ = [ "AerobladeDetector", diff --git a/detectzoo/detectors/image/_lgrad_stylegan_discriminator.py b/detectzoo/detectors/image/_lgrad_stylegan_discriminator.py index 9f83a8b..1e496b4 100644 --- a/detectzoo/detectors/image/_lgrad_stylegan_discriminator.py +++ b/detectzoo/detectors/image/_lgrad_stylegan_discriminator.py @@ -10,13 +10,13 @@ import torch.nn as nn import torch.nn.functional as F -__all__ = ['StyleGANDiscriminator'] +__all__ = ["StyleGANDiscriminator"] _RESOLUTIONS_ALLOWED = [8, 16, 32, 64, 128, 256, 512, 1024] _INIT_RES = 4 -_FUSED_SCALE_ALLOWED = [True, False, 'auto'] +_FUSED_SCALE_ALLOWED = [True, False, "auto"] _AUTO_FUSED_SCALE_MIN_RES = 128 @@ -42,25 +42,30 @@ class StyleGANDiscriminator(nn.Module): (9) fmaps_max: Maximum number of feature maps in each layer. (default: 512) """ - def __init__(self, - resolution, - image_channels=3, - label_size=0, - fused_scale='auto', - use_wscale=True, - minibatch_std_group_size=4, - minibatch_std_channels=1, - fmaps_base=16 << 10, - fmaps_max=512): + def __init__( + self, + resolution, + image_channels=3, + label_size=0, + fused_scale="auto", + use_wscale=True, + minibatch_std_group_size=4, + minibatch_std_channels=1, + fmaps_base=16 << 10, + fmaps_max=512, + ): super().__init__() if resolution not in _RESOLUTIONS_ALLOWED: - raise ValueError(f'Invalid resolution: `{resolution}`!\n' - f'Resolutions allowed: {_RESOLUTIONS_ALLOWED}.') + raise ValueError( + f"Invalid resolution: `{resolution}`!\nResolutions allowed: {_RESOLUTIONS_ALLOWED}." + ) if fused_scale not in _FUSED_SCALE_ALLOWED: - raise ValueError(f'Invalid fused-scale option: `{fused_scale}`!\n' - f'Options allowed: {_FUSED_SCALE_ALLOWED}.') + raise ValueError( + f"Invalid fused-scale option: `{fused_scale}`!\n" + f"Options allowed: {_FUSED_SCALE_ALLOWED}." + ) self.init_res = _INIT_RES self.init_res_log2 = int(np.log2(self.init_res)) @@ -75,85 +80,107 @@ def __init__(self, self.fmaps_base = fmaps_base self.fmaps_max = fmaps_max - self.register_buffer('lod', torch.zeros(())) - self.pth_to_tf_var_mapping = {'lod': 'lod'} + self.register_buffer("lod", torch.zeros(())) + self.pth_to_tf_var_mapping = {"lod": "lod"} for res_log2 in range(self.final_res_log2, self.init_res_log2 - 1, -1): - res = 2 ** res_log2 + res = 2**res_log2 block_idx = self.final_res_log2 - res_log2 # Input convolution layer for each resolution. self.add_module( - f'input{block_idx}', - ConvBlock(in_channels=self.image_channels, - out_channels=self.get_nf(res), - kernel_size=1, - padding=0, - use_wscale=self.use_wscale)) - self.pth_to_tf_var_mapping[f'input{block_idx}.weight'] = ( - f'FromRGB_lod{block_idx}/weight') - self.pth_to_tf_var_mapping[f'input{block_idx}.bias'] = ( - f'FromRGB_lod{block_idx}/bias') + f"input{block_idx}", + ConvBlock( + in_channels=self.image_channels, + out_channels=self.get_nf(res), + kernel_size=1, + padding=0, + use_wscale=self.use_wscale, + ), + ) + self.pth_to_tf_var_mapping[f"input{block_idx}.weight"] = ( + f"FromRGB_lod{block_idx}/weight" + ) + self.pth_to_tf_var_mapping[f"input{block_idx}.bias"] = f"FromRGB_lod{block_idx}/bias" # Convolution block for each resolution (except the last one). if res != self.init_res: - if self.fused_scale == 'auto': - fused_scale = (res >= _AUTO_FUSED_SCALE_MIN_RES) + if self.fused_scale == "auto": + fused_scale = res >= _AUTO_FUSED_SCALE_MIN_RES else: fused_scale = self.fused_scale self.add_module( - f'layer{2 * block_idx}', - ConvBlock(in_channels=self.get_nf(res), - out_channels=self.get_nf(res), - use_wscale=self.use_wscale)) - tf_layer0_name = 'Conv0' + f"layer{2 * block_idx}", + ConvBlock( + in_channels=self.get_nf(res), + out_channels=self.get_nf(res), + use_wscale=self.use_wscale, + ), + ) + tf_layer0_name = "Conv0" self.add_module( - f'layer{2 * block_idx + 1}', - ConvBlock(in_channels=self.get_nf(res), - out_channels=self.get_nf(res // 2), - downsample=True, - fused_scale=fused_scale, - use_wscale=self.use_wscale)) - tf_layer1_name = 'Conv1_down' + f"layer{2 * block_idx + 1}", + ConvBlock( + in_channels=self.get_nf(res), + out_channels=self.get_nf(res // 2), + downsample=True, + fused_scale=fused_scale, + use_wscale=self.use_wscale, + ), + ) + tf_layer1_name = "Conv1_down" # Convolution block for last resolution. else: self.add_module( - f'layer{2 * block_idx}', - ConvBlock(in_channels=self.get_nf(res), - out_channels=self.get_nf(res), - use_wscale=self.use_wscale, - minibatch_std_group_size=minibatch_std_group_size, - minibatch_std_channels=minibatch_std_channels)) - tf_layer0_name = 'Conv' + f"layer{2 * block_idx}", + ConvBlock( + in_channels=self.get_nf(res), + out_channels=self.get_nf(res), + use_wscale=self.use_wscale, + minibatch_std_group_size=minibatch_std_group_size, + minibatch_std_channels=minibatch_std_channels, + ), + ) + tf_layer0_name = "Conv" self.add_module( - f'layer{2 * block_idx + 1}', - DenseBlock(in_channels=self.get_nf(res) * res * res, - out_channels=self.get_nf(res // 2), - use_wscale=self.use_wscale)) - tf_layer1_name = 'Dense0' - - self.pth_to_tf_var_mapping[f'layer{2 * block_idx}.weight'] = ( - f'{res}x{res}/{tf_layer0_name}/weight') - self.pth_to_tf_var_mapping[f'layer{2 * block_idx}.bias'] = ( - f'{res}x{res}/{tf_layer0_name}/bias') - self.pth_to_tf_var_mapping[f'layer{2 * block_idx + 1}.weight'] = ( - f'{res}x{res}/{tf_layer1_name}/weight') - self.pth_to_tf_var_mapping[f'layer{2 * block_idx + 1}.bias'] = ( - f'{res}x{res}/{tf_layer1_name}/bias') + f"layer{2 * block_idx + 1}", + DenseBlock( + in_channels=self.get_nf(res) * res * res, + out_channels=self.get_nf(res // 2), + use_wscale=self.use_wscale, + ), + ) + tf_layer1_name = "Dense0" + + self.pth_to_tf_var_mapping[f"layer{2 * block_idx}.weight"] = ( + f"{res}x{res}/{tf_layer0_name}/weight" + ) + self.pth_to_tf_var_mapping[f"layer{2 * block_idx}.bias"] = ( + f"{res}x{res}/{tf_layer0_name}/bias" + ) + self.pth_to_tf_var_mapping[f"layer{2 * block_idx + 1}.weight"] = ( + f"{res}x{res}/{tf_layer1_name}/weight" + ) + self.pth_to_tf_var_mapping[f"layer{2 * block_idx + 1}.bias"] = ( + f"{res}x{res}/{tf_layer1_name}/bias" + ) # Final dense block. self.add_module( - f'layer{2 * block_idx + 2}', - DenseBlock(in_channels=self.get_nf(res // 2), - out_channels=max(self.label_size, 1), - use_wscale=self.use_wscale, - wscale_gain=1.0, - activation_type='linear')) - self.pth_to_tf_var_mapping[f'layer{2 * block_idx + 2}.weight'] = ( - f'{res}x{res}/Dense1/weight') - self.pth_to_tf_var_mapping[f'layer{2 * block_idx + 2}.bias'] = ( - f'{res}x{res}/Dense1/bias') + f"layer{2 * block_idx + 2}", + DenseBlock( + in_channels=self.get_nf(res // 2), + out_channels=max(self.label_size, 1), + use_wscale=self.use_wscale, + wscale_gain=1.0, + activation_type="linear", + ), + ) + self.pth_to_tf_var_mapping[f"layer{2 * block_idx + 2}.weight"] = ( + f"{res}x{res}/Dense1/weight" + ) + self.pth_to_tf_var_mapping[f"layer{2 * block_idx + 2}.bias"] = f"{res}x{res}/Dense1/bias" self.downsample = DownsamplingLayer() @@ -164,46 +191,53 @@ def get_nf(self, res): def forward(self, image, label=None, lod=None, **_unused_kwargs): expected_shape = (self.image_channels, self.resolution, self.resolution) if image.ndim != 4 or image.shape[1:] != expected_shape: - raise ValueError(f'The input tensor should be with shape ' - f'[batch_size, channel, height, width], where ' - f'`channel` equals to {self.image_channels}, ' - f'`height`, `width` equal to {self.resolution}!\n' - f'But `{image.shape}` is received!') + raise ValueError( + f"The input tensor should be with shape " + f"[batch_size, channel, height, width], where " + f"`channel` equals to {self.image_channels}, " + f"`height`, `width` equal to {self.resolution}!\n" + f"But `{image.shape}` is received!" + ) lod = self.lod.cpu().tolist() if lod is None else lod if lod + self.init_res_log2 > self.final_res_log2: - raise ValueError(f'Maximum level-of-detail (lod) is ' - f'{self.final_res_log2 - self.init_res_log2}, ' - f'but `{lod}` is received!') + raise ValueError( + f"Maximum level-of-detail (lod) is " + f"{self.final_res_log2 - self.init_res_log2}, " + f"but `{lod}` is received!" + ) if self.label_size: if label is None: - raise ValueError(f'Model requires an additional label ' - f'(with size {self.label_size}) as input, ' - f'but no label is received!') + raise ValueError( + f"Model requires an additional label " + f"(with size {self.label_size}) as input, " + f"but no label is received!" + ) batch_size = image.shape[0] if label.ndim != 2 or label.shape != (batch_size, self.label_size): - raise ValueError(f'Input label should be with shape ' - f'[batch_size, label_size], where ' - f'`batch_size` equals to that of ' - f'images ({image.shape[0]}) and ' - f'`label_size` equals to {self.label_size}!\n' - f'But `{label.shape}` is received!') + raise ValueError( + f"Input label should be with shape " + f"[batch_size, label_size], where " + f"`batch_size` equals to that of " + f"images ({image.shape[0]}) and " + f"`label_size` equals to {self.label_size}!\n" + f"But `{label.shape}` is received!" + ) for res_log2 in range(self.final_res_log2, self.init_res_log2 - 1, -1): block_idx = current_lod = self.final_res_log2 - res_log2 if current_lod <= lod < current_lod + 1: - x = self.__getattr__(f'input{block_idx}')(image) + x = self.__getattr__(f"input{block_idx}")(image) elif current_lod - 1 < lod < current_lod: alpha = lod - np.floor(lod) - x = (self.__getattr__(f'input{block_idx}')(image) * alpha + - x * (1 - alpha)) + x = self.__getattr__(f"input{block_idx}")(image) * alpha + x * (1 - alpha) if lod < current_lod + 1: - x = self.__getattr__(f'layer{2 * block_idx}')(x) - x = self.__getattr__(f'layer{2 * block_idx + 1}')(x) + x = self.__getattr__(f"layer{2 * block_idx}")(x) + x = self.__getattr__(f"layer{2 * block_idx + 1}")(x) if lod > current_lod: image = self.downsample(image) - x = self.__getattr__(f'layer{2 * block_idx + 2}')(x) + x = self.__getattr__(f"layer{2 * block_idx + 2}")(x) if self.label_size: x = torch.sum(x * label, dim=1, keepdim=True) @@ -225,14 +259,14 @@ def forward(self, x): return x ng = min(self.group_size, x.shape[0]) nc = self.new_channels - temp_c = x.shape[1] // nc # [NCHW] + temp_c = x.shape[1] // nc # [NCHW] y = x.view(ng, -1, nc, temp_c, x.shape[2], x.shape[3]) # [GMncHW] - y = y - torch.mean(y, dim=0, keepdim=True) # [GMncHW] - y = torch.mean(y ** 2, dim=0) # [MncHW] - y = torch.sqrt(y + self.epsilon) # [MncHW] - y = torch.mean(y, dim=[2, 3, 4], keepdim=True) # [Mn111] - y = torch.mean(y, dim=2) # [Mn11] - y = y.repeat(ng, 1, x.shape[2], x.shape[3]) # [NnHW] + y = y - torch.mean(y, dim=0, keepdim=True) # [GMncHW] + y = torch.mean(y**2, dim=0) # [MncHW] + y = torch.sqrt(y + self.epsilon) # [MncHW] + y = torch.mean(y, dim=[2, 3, 4], keepdim=True) # [Mn111] + y = torch.mean(y, dim=2) # [Mn11] + y = y.repeat(ng, 1, x.shape[2], x.shape[3]) # [NnHW] return torch.cat([x, y], dim=1) @@ -246,10 +280,7 @@ def __init__(self, scale_factor=2): def forward(self, x): if self.scale_factor <= 1: return x - return F.avg_pool2d(x, - kernel_size=self.scale_factor, - stride=self.scale_factor, - padding=0) + return F.avg_pool2d(x, kernel_size=self.scale_factor, stride=self.scale_factor, padding=0) class Blur(torch.autograd.Function): @@ -258,17 +289,12 @@ class Blur(torch.autograd.Function): @staticmethod def forward(ctx, x, kernel): ctx.save_for_backward(kernel) - y = F.conv2d(input=x, - weight=kernel, - bias=None, - stride=1, - padding=1, - groups=x.shape[1]) + y = F.conv2d(input=x, weight=kernel, bias=None, stride=1, padding=1, groups=x.shape[1]) return y @staticmethod def backward(ctx, dy): - kernel, = ctx.saved_tensors + (kernel,) = ctx.saved_tensors dx = BlurBackPropagation.apply(dy, kernel) return dx, None, None @@ -279,33 +305,24 @@ class BlurBackPropagation(torch.autograd.Function): @staticmethod def forward(ctx, dy, kernel): ctx.save_for_backward(kernel) - dx = F.conv2d(input=dy, - weight=kernel.flip((2, 3)), - bias=None, - stride=1, - padding=1, - groups=dy.shape[1]) + dx = F.conv2d( + input=dy, weight=kernel.flip((2, 3)), bias=None, stride=1, padding=1, groups=dy.shape[1] + ) return dx @staticmethod def backward(ctx, ddx): - kernel, = ctx.saved_tensors - ddy = F.conv2d(input=ddx, - weight=kernel, - bias=None, - stride=1, - padding=1, - groups=ddx.shape[1]) + (kernel,) = ctx.saved_tensors + ddy = F.conv2d( + input=ddx, weight=kernel, bias=None, stride=1, padding=1, groups=ddx.shape[1] + ) return ddy, None, None class BlurLayer(nn.Module): """Implements the blur layer.""" - def __init__(self, - channels, - kernel=(1, 2, 1), - normalize=True): + def __init__(self, channels, kernel=(1, 2, 1), normalize=True): super().__init__() kernel = np.array(kernel, dtype=np.float32).reshape(1, -1) kernel = kernel.T.dot(kernel) @@ -313,7 +330,7 @@ def __init__(self, kernel = kernel / np.sum(kernel) kernel = kernel[np.newaxis, np.newaxis] kernel = np.tile(kernel, [channels, 1, 1, 1]) - self.register_buffer('kernel', torch.from_numpy(kernel)) + self.register_buffer("kernel", torch.from_numpy(kernel)) def forward(self, x): return Blur.apply(x, self.kernel) @@ -322,28 +339,31 @@ def forward(self, x): class ConvBlock(nn.Module): """Implements the convolutional block.""" - def __init__(self, - in_channels, - out_channels, - kernel_size=3, - stride=1, - padding=1, - add_bias=True, - downsample=False, - fused_scale=False, - use_wscale=True, - wscale_gain=_WSCALE_GAIN, - lr_mul=1.0, - activation_type='lrelu', - minibatch_std_group_size=0, - minibatch_std_channels=1): - + def __init__( + self, + in_channels, + out_channels, + kernel_size=3, + stride=1, + padding=1, + add_bias=True, + downsample=False, + fused_scale=False, + use_wscale=True, + wscale_gain=_WSCALE_GAIN, + lr_mul=1.0, + activation_type="lrelu", + minibatch_std_group_size=0, + minibatch_std_channels=1, + ): + super().__init__() if minibatch_std_group_size > 1: in_channels = in_channels + minibatch_std_channels - self.mbstd = MiniBatchSTDLayer(group_size=minibatch_std_group_size, - new_channels=minibatch_std_channels) + self.mbstd = MiniBatchSTDLayer( + group_size=minibatch_std_group_size, new_channels=minibatch_std_channels + ) else: self.mbstd = nn.Identity() @@ -373,8 +393,7 @@ def __init__(self, self.weight = nn.Parameter(torch.randn(*weight_shape) / lr_mul) self.wscale = wscale * lr_mul else: - self.weight = nn.Parameter( - torch.randn(*weight_shape) * wscale / lr_mul) + self.weight = nn.Parameter(torch.randn(*weight_shape) * wscale / lr_mul) self.wscale = lr_mul if add_bias: @@ -383,9 +402,9 @@ def __init__(self, else: self.bias = None - if activation_type == 'linear': + if activation_type == "linear": self.activate = nn.Identity() - elif activation_type == 'lrelu': + elif activation_type == "lrelu": self.activate = nn.LeakyReLU(negative_slope=0.2, inplace=True) def forward(self, x): @@ -394,14 +413,14 @@ def forward(self, x): weight = self.weight * self.wscale bias = self.bias * self.bscale if self.bias is not None else None if self.use_stride: - weight = F.pad(weight, (1, 1, 1, 1, 0, 0, 0, 0), 'constant', 0.0) - weight = (weight[:, :, 1:, 1:] + weight[:, :, :-1, 1:] + - weight[:, :, 1:, :-1] + weight[:, :, :-1, :-1]) * 0.25 - x = F.conv2d(x, - weight=weight, - bias=bias, - stride=self.stride, - padding=self.padding) + weight = F.pad(weight, (1, 1, 1, 1, 0, 0, 0, 0), "constant", 0.0) + weight = ( + weight[:, :, 1:, 1:] + + weight[:, :, :-1, 1:] + + weight[:, :, 1:, :-1] + + weight[:, :, :-1, :-1] + ) * 0.25 + x = F.conv2d(x, weight=weight, bias=bias, stride=self.stride, padding=self.padding) x = self.downsample(x) x = self.activate(x) return x @@ -410,14 +429,16 @@ def forward(self, x): class DenseBlock(nn.Module): """Implements the dense block.""" - def __init__(self, - in_channels, - out_channels, - add_bias=True, - use_wscale=True, - wscale_gain=_WSCALE_GAIN, - lr_mul=1.0, - activation_type='lrelu'): + def __init__( + self, + in_channels, + out_channels, + add_bias=True, + use_wscale=True, + wscale_gain=_WSCALE_GAIN, + lr_mul=1.0, + activation_type="lrelu", + ): super().__init__() weight_shape = (out_channels, in_channels) @@ -426,8 +447,7 @@ def __init__(self, self.weight = nn.Parameter(torch.randn(*weight_shape) / lr_mul) self.wscale = wscale * lr_mul else: - self.weight = nn.Parameter( - torch.randn(*weight_shape) * wscale / lr_mul) + self.weight = nn.Parameter(torch.randn(*weight_shape) * wscale / lr_mul) self.wscale = lr_mul if add_bias: @@ -436,9 +456,9 @@ def __init__(self, else: self.bias = None - if activation_type == 'linear': + if activation_type == "linear": self.activate = nn.Identity() - elif activation_type == 'lrelu': + elif activation_type == "lrelu": self.activate = nn.LeakyReLU(negative_slope=0.2, inplace=True) def forward(self, x): diff --git a/detectzoo/detectors/image/aeroblade.py b/detectzoo/detectors/image/aeroblade.py index cb5aaa6..d8fa6ff 100644 --- a/detectzoo/detectors/image/aeroblade.py +++ b/detectzoo/detectors/image/aeroblade.py @@ -84,9 +84,13 @@ def __init__( **kwargs: Any, ) -> None: super().__init__(threshold=threshold, device=device, **kwargs) - self.repo_ids: List[str] = list(repo_ids) if repo_ids is not None else [ - "CompVis/stable-diffusion-v1-1", - ] + self.repo_ids: List[str] = ( + list(repo_ids) + if repo_ids is not None + else [ + "CompVis/stable-diffusion-v1-1", + ] + ) self.lpips_vgg_index = lpips_vgg_index self.use_fp16 = bool(use_fp16) and str(device).startswith("cuda") @@ -182,8 +186,7 @@ def _normalize_input(self, input_data: Any) -> Image.Image: if path.is_file(): return load_image(path) raise TypeError( - "Expected a PIL Image or a path to an image file; got " - f"{type(input_data).__name__}." + f"Expected a PIL Image or a path to an image file; got {type(input_data).__name__}." ) # ------------------------------------------------------------------ diff --git a/detectzoo/detectors/image/aide.py b/detectzoo/detectors/image/aide.py index 334ec74..3777bb7 100644 --- a/detectzoo/detectors/image/aide.py +++ b/detectzoo/detectors/image/aide.py @@ -4,10 +4,10 @@ Yan et al., "A Sanity Check for AI-generated Image Detection", ICLR 2025. https://arxiv.org/abs/2406.19435 -The key idea: combines two complementary branches: (1) SRM high-pass filtered + -DCT-selected frequency patches processed by dual ResNet-50 trunks to capture artifact +The key idea: combines two complementary branches: (1) SRM high-pass filtered + +DCT-selected frequency patches processed by dual ResNet-50 trunks to capture artifact cues, and (2) a frozen OpenCLIP ConvNeXt-XXLarge trunk for robust semantic features. -These signals are fused so the model jointly reasons over low-level artifacts and +These signals are fused so the model jointly reasons over low-level artifacts and high-level semantics for detection. Upstream: https://github.com/shilinyan99/AIDE @@ -36,14 +36,19 @@ def _dct_mat(size: int) -> list[list[float]]: return [ - [(np.sqrt(1.0 / size) if i == 0 else np.sqrt(2.0 / size)) * np.cos((j + 0.5) * np.pi * i / size) - for j in range(size)] + [ + (np.sqrt(1.0 / size) if i == 0 else np.sqrt(2.0 / size)) + * np.cos((j + 0.5) * np.pi * i / size) + for j in range(size) + ] for i in range(size) ] def _gen_filter(start: float, end: float, size: int) -> list[list[float]]: - return [[0.0 if i + j > end or i + j < start else 1.0 for j in range(size)] for i in range(size)] + return [ + [0.0 if i + j > end or i + j < start else 1.0 for j in range(size)] for i in range(size) + ] class _Filter(nn.Module): @@ -52,7 +57,9 @@ def __init__(self, size: int, band_start: float, band_end: float, norm: bool = F self.register_buffer("base", torch.tensor(_gen_filter(band_start, band_end, size))) self.norm = norm if norm: - self.register_buffer("ft_num", torch.sum(torch.tensor(_gen_filter(band_start, band_end, size)))) + self.register_buffer( + "ft_num", torch.sum(torch.tensor(_gen_filter(band_start, band_end, size))) + ) def forward(self, x: torch.Tensor) -> torch.Tensor: return (x * self.base / self.ft_num) if self.norm else (x * self.base) @@ -61,7 +68,9 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: class _DCTRecModule(nn.Module): """DCT-based frequency patch selector""" - def __init__(self, window_size: int = 32, stride: int = 16, output: int = 256, grade_N: int = 6) -> None: + def __init__( + self, window_size: int = 32, stride: int = 16, output: int = 256, grade_N: int = 6 + ) -> None: super().__init__() self.window_size = window_size self.grade_N = grade_N @@ -71,15 +80,26 @@ def __init__(self, window_size: int = 32, stride: int = 16, output: int = 256, g self.register_buffer("_DT", torch.tensor(_dct_mat(window_size), dtype=torch.float32).T) self.unfold = nn.Unfold(kernel_size=window_size, stride=stride) - self.fold0 = nn.Fold(output_size=(window_size, window_size), kernel_size=window_size, stride=window_size) + self.fold0 = nn.Fold( + output_size=(window_size, window_size), kernel_size=window_size, stride=window_size + ) self.level_filters = nn.ModuleList([_Filter(window_size, 0, window_size * 2)]) - self.grade_filters = nn.ModuleList([ - _Filter(window_size, window_size * 2.0 / grade_N * i, window_size * 2.0 / grade_N * (i + 1), norm=True) - for i in range(grade_N) - ]) + self.grade_filters = nn.ModuleList( + [ + _Filter( + window_size, + window_size * 2.0 / grade_N * i, + window_size * 2.0 / grade_N * (i + 1), + norm=True, + ) + for i in range(grade_N) + ] + ) @torch.no_grad() - def forward(self, x: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: + def forward( + self, x: torch.Tensor + ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: C, W, H = x.shape ws = self.window_size xu = self.unfold(x.unsqueeze(0)).squeeze(0) @@ -88,7 +108,7 @@ def forward(self, x: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor, torch.Te xdct = self._D @ xu @ self._DT xp = self.level_filters[0](xdct) - y = (self._DT @ xp @ self._D) + y = self._DT @ xp @ self._D level_xu = y grade = torch.zeros(L, device=x.device) @@ -98,9 +118,16 @@ def forward(self, x: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor, torch.Te w *= k _, idx = torch.sort(grade) - N = self.N - pick = lambda i: self.fold0(level_xu[i:i+1].reshape(1, -1, 1)) - return pick(idx[0]), pick(torch.flip(idx, [0])[0]), pick(idx[min(1, len(idx)-1)]), pick(torch.flip(idx, [0])[min(1, len(idx)-1)]) + + def pick(i: torch.Tensor) -> torch.Tensor: + return self.fold0(level_xu[i : i + 1].reshape(1, -1, 1)) + + return ( + pick(idx[0]), + pick(torch.flip(idx, [0])[0]), + pick(idx[min(1, len(idx) - 1)]), + pick(torch.flip(idx, [0])[min(1, len(idx) - 1)]), + ) # --------------------------------------------------------------------------- @@ -131,9 +158,11 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: # ResNet-50 for noise features (input 30-ch from SRM) # --------------------------------------------------------------------------- + def _conv3x3(inp: int, out: int, s: int = 1) -> nn.Conv2d: return nn.Conv2d(inp, out, 3, stride=s, padding=1, bias=False) + def _conv1x1(inp: int, out: int, s: int = 1) -> nn.Conv2d: return nn.Conv2d(inp, out, 1, stride=s, bias=False) @@ -175,7 +204,9 @@ def __init__(self) -> None: def _make_layer(self, planes: int, blocks: int, stride: int = 1) -> nn.Sequential: ds = None if stride != 1 or self.inplanes != planes * 4: - ds = nn.Sequential(_conv1x1(self.inplanes, planes * 4, stride), nn.BatchNorm2d(planes * 4)) + ds = nn.Sequential( + _conv1x1(self.inplanes, planes * 4, stride), nn.BatchNorm2d(planes * 4) + ) layers = [_Bottleneck(self.inplanes, planes, stride, ds)] self.inplanes = planes * 4 layers.extend(_Bottleneck(self.inplanes, planes) for _ in range(1, blocks)) @@ -208,7 +239,10 @@ def __init__(self) -> None: self.fc = _Mlp(2048 + 256, 1024, 2) import open_clip - model, _, _ = open_clip.create_model_and_transforms("convnext_xxlarge", pretrained="laion2b_s34b_b82k_augreg_soup") + + model, _, _ = open_clip.create_model_and_transforms( + "convnext_xxlarge", pretrained="laion2b_s34b_b82k_augreg_soup" + ) trunk = model.visual.trunk trunk.head.global_pool = nn.Identity() trunk.head.flatten = nn.Identity() @@ -230,8 +264,12 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: hp_mm1 = self.hpf(x_minmin1) hp_MM1 = self.hpf(x_maxmax1) - clip_mean = torch.tensor([0.48145466, 0.4578275, 0.40821073], device=tokens.device).view(3, 1, 1) - clip_std = torch.tensor([0.26862954, 0.26130258, 0.27577711], device=tokens.device).view(3, 1, 1) + clip_mean = torch.tensor([0.48145466, 0.4578275, 0.40821073], device=tokens.device).view( + 3, 1, 1 + ) + clip_std = torch.tensor([0.26862954, 0.26130258, 0.27577711], device=tokens.device).view( + 3, 1, 1 + ) dinov2_mean = torch.tensor([0.485, 0.456, 0.406], device=tokens.device).view(3, 1, 1) dinov2_std = torch.tensor([0.229, 0.224, 0.225], device=tokens.device).view(3, 1, 1) @@ -240,8 +278,12 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: feat = self.openclip_convnext_xxl(convnext_in) x_0 = self.convnext_proj(self.avgpool(feat).flatten(1)) - x_1 = (self.model_min(hp_mm) + self.model_max(hp_MM) + - self.model_min(hp_mm1) + self.model_max(hp_MM1)) / 4.0 + x_1 = ( + self.model_min(hp_mm) + + self.model_max(hp_MM) + + self.model_min(hp_mm1) + + self.model_max(hp_MM1) + ) / 4.0 return self.fc(torch.cat([x_0, x_1], dim=1)) @@ -290,6 +332,7 @@ def __init__( if not self._ckpt.is_file(): import gdown + cache.mkdir(parents=True, exist_ok=True) gdown.download_folder( id=_GDRIVE_FOLDER, @@ -320,9 +363,8 @@ def _normalize_input(self, input_data: Any) -> Image.Image: if path.is_file(): return load_image(path) raise TypeError( - "Expected a PIL Image or a path to an image file; got " - f"{type(input_data).__name__}." - ) + f"Expected a PIL Image or a path to an image file; got {type(input_data).__name__}." + ) # ------------------------------------------------------------------ # Inference diff --git a/detectzoo/detectors/image/c2p_clip.py b/detectzoo/detectors/image/c2p_clip.py index bfef3fa..deb898c 100644 --- a/detectzoo/detectors/image/c2p_clip.py +++ b/detectzoo/detectors/image/c2p_clip.py @@ -36,7 +36,9 @@ class _C2PCLIP(nn.Module): """CLIP ViT-L/14 visual encoder + single-logit linear head.""" - def __init__(self, pretrained_name: str = "openai/clip-vit-large-patch14", num_classes: int = 1) -> None: + def __init__( + self, pretrained_name: str = "openai/clip-vit-large-patch14", num_classes: int = 1 + ) -> None: super().__init__() from transformers import CLIPModel as HFCLIPModel @@ -118,11 +120,13 @@ def __init__( self._model.load_state_dict(state, strict=True) self._model.to(self._device).eval() - self._transform = transforms.Compose([ - transforms.CenterCrop(224), - transforms.ToTensor(), - transforms.Normalize(mean=_CLIP_MEAN, std=_CLIP_STD), - ]) + self._transform = transforms.Compose( + [ + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize(mean=_CLIP_MEAN, std=_CLIP_STD), + ] + ) # ------------------------------------------------------------------ # Input handling @@ -135,8 +139,7 @@ def _normalize_input(self, input_data: Any) -> Image.Image: if path.is_file(): return load_image(path) raise TypeError( - "Expected a PIL Image or a path to an image file; got " - f"{type(input_data).__name__}." + f"Expected a PIL Image or a path to an image file; got {type(input_data).__name__}." ) # ------------------------------------------------------------------ diff --git a/detectzoo/detectors/image/cnnspot.py b/detectzoo/detectors/image/cnnspot.py index 46bd6a5..4baa02e 100644 --- a/detectzoo/detectors/image/cnnspot.py +++ b/detectzoo/detectors/image/cnnspot.py @@ -92,8 +92,7 @@ def _normalize_input(self, input_data: Any) -> Image.Image: if path.is_file(): return load_image(path) raise TypeError( - "Expected a PIL Image or a path to an image file; got " - f"{type(input_data).__name__}." + f"Expected a PIL Image or a path to an image file; got {type(input_data).__name__}." ) # ------------------------------------------------------------------ diff --git a/detectzoo/detectors/image/cospy.py b/detectzoo/detectors/image/cospy.py index ad37123..2b73dab 100644 --- a/detectzoo/detectors/image/cospy.py +++ b/detectzoo/detectors/image/cospy.py @@ -223,15 +223,19 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: def _strip_prefix(state: dict[str, torch.Tensor], prefix: str) -> dict[str, torch.Tensor]: - return {k[len(prefix):]: v for k, v in state.items() if k.startswith(prefix)} + return {k[len(prefix) :]: v for k, v in state.items() if k.startswith(prefix)} def _load_weight_files(model: nn.Module, checkpoint_dir: Path) -> None: - sem = torch.load(checkpoint_dir / "semantic_weights.pth", map_location="cpu", weights_only=False) + sem = torch.load( + checkpoint_dir / "semantic_weights.pth", map_location="cpu", weights_only=False + ) model.sem_fc.weight.data = sem["fc.weight"] model.sem_fc.bias.data = sem["fc.bias"] - art = torch.load(checkpoint_dir / "artifact_weights.pth", map_location="cpu", weights_only=False) + art = torch.load( + checkpoint_dir / "artifact_weights.pth", map_location="cpu", weights_only=False + ) art_enc_state = _strip_prefix(art, "artifact_encoder.") art_fc_state = _strip_prefix(art, "fc.") if art_enc_state: @@ -323,11 +327,13 @@ def __init__( resize = 256 if variant == "progan" else 384 crop = 224 if variant == "progan" else 384 - self._transform = transforms.Compose([ - transforms.Resize(resize), - transforms.CenterCrop(crop), - transforms.ToTensor(), - ]) + self._transform = transforms.Compose( + [ + transforms.Resize(resize), + transforms.CenterCrop(crop), + transforms.ToTensor(), + ] + ) def _normalize_input(self, input_data: Any) -> Image.Image: if hasattr(input_data, "mode") and hasattr(input_data, "convert"): @@ -336,8 +342,7 @@ def _normalize_input(self, input_data: Any) -> Image.Image: if path.is_file(): return load_image(path) raise TypeError( - "Expected a PIL Image or a path to an image file; got " - f"{type(input_data).__name__}." + f"Expected a PIL Image or a path to an image file; got {type(input_data).__name__}." ) @torch.no_grad() diff --git a/detectzoo/detectors/image/d3.py b/detectzoo/detectors/image/d3.py index b65e19b..2527bda 100644 --- a/detectzoo/detectors/image/d3.py +++ b/detectzoo/detectors/image/d3.py @@ -5,10 +5,10 @@ CVPR 2025. https://arxiv.org/abs/2404.04584 -The key idea: detects AI-generated images by modeling discrepancies at three levels: -pixel (data), feature distribution, and generation dynamics.It utilizes a dual-branch -approach where CLIP ViT-L/14 processes both the original image and a patch-shuffled -version. A learned transformer attention head aggregates penultimate-layer features +The key idea: detects AI-generated images by modeling discrepancies at three levels: +pixel (data), feature distribution, and generation dynamics.It utilizes a dual-branch +approach where CLIP ViT-L/14 processes both the original image and a patch-shuffled +version. A learned transformer attention head aggregates penultimate-layer features from both views, using the discrepancy between intact and distorted representations. Upstream: https://github.com/BigAandSmallq/D3 @@ -41,7 +41,7 @@ class _TransformerAttention(nn.Module): """Transformer attention head for aggregating penultimate-layer features.""" - + def __init__(self, input_dim: int, output_dim: int, last_dim: int = 1) -> None: super().__init__() self.query = nn.Linear(input_dim, input_dim) @@ -96,8 +96,8 @@ def hook(_module: nn.Module, _input: Any, output: torch.Tensor) -> None: if name == "ln_post": mod.register_forward_hook(hook) return - - # ------------------------------------------------------------------ + + # ------------------------------------------------------------------ # Distorting images # ------------------------------------------------------------------ @@ -105,8 +105,12 @@ def hook(_module: nn.Module, _input: Any, output: torch.Tensor) -> None: def _shuffle_patches(x: torch.Tensor, patch_size: int) -> torch.Tensor: patches = F.unfold(x, kernel_size=patch_size, stride=patch_size) shuffled = patches[:, :, torch.randperm(patches.size(-1))] - return F.fold(shuffled, output_size=(x.shape[2], x.shape[3]), - kernel_size=patch_size, stride=patch_size) + return F.fold( + shuffled, + output_size=(x.shape[2], x.shape[3]), + kernel_size=patch_size, + stride=patch_size, + ) def _encode_penultimate(self, x: torch.Tensor) -> torch.Tensor: self.clip(x) @@ -120,9 +124,9 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: features: list[torch.Tensor] = [] with torch.no_grad(): for _ in range(self.shuffle_times): - features.append(self._encode_penultimate( - self._shuffle_patches(x, self.patch_size[0]) - )) + features.append( + self._encode_penultimate(self._shuffle_patches(x, self.patch_size[0])) + ) for _ in range(self.original_times): features.append(self._encode_penultimate(x)) stacked = torch.stack(features, dim=-2) @@ -172,7 +176,9 @@ def __init__( download_file(_CKPT_URL, self._ckpt) self._model = _D3Model( - shuffle_times=shuffle_times, original_times=1, patch_size=patch_size, + shuffle_times=shuffle_times, + original_times=1, + patch_size=patch_size, ) head_state = torch.load(self._ckpt, map_location=self._device, weights_only=False) @@ -181,11 +187,13 @@ def __init__( self._model.attention_head.load_state_dict(head_state, strict=True) self._model.to(self._device).eval() - self._transform = transforms.Compose([ - transforms.CenterCrop(224), - transforms.ToTensor(), - transforms.Normalize(mean=_CLIP_MEAN, std=_CLIP_STD), - ]) + self._transform = transforms.Compose( + [ + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize(mean=_CLIP_MEAN, std=_CLIP_STD), + ] + ) # ------------------------------------------------------------------ # Input handling @@ -198,10 +206,9 @@ def _normalize_input(self, input_data: Any) -> Image.Image: if path.is_file(): return load_image(path) raise TypeError( - "Expected a PIL Image or a path to an image file; got " - f"{type(input_data).__name__}." + f"Expected a PIL Image or a path to an image file; got {type(input_data).__name__}." ) - + # ------------------------------------------------------------------ # Inference # ------------------------------------------------------------------ diff --git a/detectzoo/detectors/image/drct.py b/detectzoo/detectors/image/drct.py index b0a8b75..ebdae66 100644 --- a/detectzoo/detectors/image/drct.py +++ b/detectzoo/detectors/image/drct.py @@ -33,7 +33,9 @@ _IMAGENET = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) _DEFAULT_CKPT_NAME = "14_acc0.9996.pth" -_DEFAULT_CKPT_IN_ZIP = "pretrained/DRCT-2M/sdv14/convnext_base_in22k_224_drct_amp_crop/14_acc0.9996.pth" +_DEFAULT_CKPT_IN_ZIP = ( + "pretrained/DRCT-2M/sdv14/convnext_base_in22k_224_drct_amp_crop/14_acc0.9996.pth" +) _PRETRAINED_ZIP_URL = ( "https://modelscope.cn/datasets/BokingChen/DRCT-2M/resolve/master/pretrained.zip" ) @@ -47,6 +49,7 @@ class _DRCTContrastiveModel(nn.Module): def __init__(self, embedding_size: int = 1024) -> None: super().__init__() import timm + backbone = timm.create_model("convnext_base_in22k", pretrained=False) in_features = backbone.head.fc.in_features @@ -112,12 +115,14 @@ def __init__( self._model.load_state_dict(state, strict=True) self._model.to(self._device).eval() - self._transform = transforms.Compose([ - transforms.Resize(256), - transforms.CenterCrop(224), - transforms.ToTensor(), - transforms.Normalize(**_IMAGENET), - ]) + self._transform = transforms.Compose( + [ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize(**_IMAGENET), + ] + ) def _ensure_download(self, cache: Path) -> None: zip_path = cache / _PRETRAINED_ZIP_NAME @@ -151,4 +156,4 @@ def predict(self, input_data: Any) -> DetectionResult: return self._make_result( float(score), checkpoint=str(self._ckpt), - ) \ No newline at end of file + ) diff --git a/detectzoo/detectors/image/fatformer.py b/detectzoo/detectors/image/fatformer.py index 9bb41b0..8dd799a 100644 --- a/detectzoo/detectors/image/fatformer.py +++ b/detectzoo/detectors/image/fatformer.py @@ -18,7 +18,7 @@ import math from collections import OrderedDict from pathlib import Path -from typing import Any, Tuple, Union +from typing import Any import numpy as np import torch @@ -54,7 +54,9 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: class _ForgeryAwareAdapter(nn.Module): """Spatial conv-bottleneck + DWT frequency-aware attention.""" - def __init__(self, d_model: int = 1024, bottleneck: int = 64, dropout: float = 0.1, head: int = 8) -> None: + def __init__( + self, d_model: int = 1024, bottleneck: int = 64, dropout: float = 0.1, head: int = 8 + ) -> None: super().__init__() self.d_model = d_model self.scale = 0.1 @@ -66,6 +68,7 @@ def __init__(self, d_model: int = 1024, bottleneck: int = 64, dropout: float = 0 self.freq_scale = nn.Parameter(torch.zeros(1)) from pytorch_wavelets import DWTForward, DWTInverse + self.dwt_transform = DWTForward(J=1, wave="haar") self.idwt_transform = DWTInverse(wave="haar") @@ -85,16 +88,22 @@ def __init__(self, d_model: int = 1024, bottleneck: int = 64, dropout: float = 0 self.norm3 = nn.LayerNorm(d_model) def _ffn(self, tgt: torch.Tensor) -> torch.Tensor: - return self.norm3(tgt + self.dropout4(self.linear2(self.dropout3(self.activation(self.linear1(tgt)))))) + return self.norm3( + tgt + self.dropout4(self.linear2(self.dropout3(self.activation(self.linear1(tgt))))) + ) def _freq(self, x: torch.Tensor) -> torch.Tensor: B, C = x.shape[:2] nq = x.shape[2] q = k = v = x.transpose(0, 1).flatten(1, 2) - x = x + self.dropout_intra(self.intra_band(q, k, v)[0].reshape(C, B, nq, self.d_model).transpose(0, 1)) + x = x + self.dropout_intra( + self.intra_band(q, k, v)[0].reshape(C, B, nq, self.d_model).transpose(0, 1) + ) x = self.norm_intra(x) q = k = v = x.flatten(0, 1).transpose(0, 1) - x = x + self.dropout_inter(self.inter_band(q, k, v)[0].transpose(0, 1).reshape(B, C, nq, self.d_model)) + x = x + self.dropout_inter( + self.inter_band(q, k, v)[0].transpose(0, 1).reshape(B, C, nq, self.d_model) + ) x = self.norm_inter(x) return self._ffn(x) @@ -111,34 +120,74 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: freq_out = torch.cat([torch.zeros_like(x[:1]), freq_out], dim=0) down = self.non_linear_func(self.first_conv_layer(x.permute(1, 2, 0))) - up = F.dropout(self.second_conv_layer(down), p=self.dropout, training=self.training).permute(2, 0, 1) * self.scale + up = ( + F.dropout(self.second_conv_layer(down), p=self.dropout, training=self.training).permute( + 2, 0, 1 + ) + * self.scale + ) return up + freq_out * self.freq_scale class _ResidualAttentionBlock(nn.Module): - def __init__(self, d_model: int, n_head: int, attn_mask: torch.Tensor | None = None, add_adapter: bool = False) -> None: + def __init__( + self, + d_model: int, + n_head: int, + attn_mask: torch.Tensor | None = None, + add_adapter: bool = False, + ) -> None: super().__init__() self.attn = nn.MultiheadAttention(d_model, n_head) self.ln_1 = _LayerNorm(d_model) - self.mlp = nn.Sequential(OrderedDict([("c_fc", nn.Linear(d_model, d_model * 4)), ("gelu", _QuickGELU()), ("c_proj", nn.Linear(d_model * 4, d_model))])) + self.mlp = nn.Sequential( + OrderedDict( + [ + ("c_fc", nn.Linear(d_model, d_model * 4)), + ("gelu", _QuickGELU()), + ("c_proj", nn.Linear(d_model * 4, d_model)), + ] + ) + ) self.ln_2 = _LayerNorm(d_model) self.attn_mask = attn_mask self.forgery_aware_adapter = _ForgeryAwareAdapter(d_model) if add_adapter else None def forward(self, x: torch.Tensor) -> torch.Tensor: - mask = self.attn_mask.to(dtype=x.dtype, device=x.device) if self.attn_mask is not None else None - x = x + self.attn(self.ln_1(x), self.ln_1(x), self.ln_1(x), need_weights=False, attn_mask=mask)[0] + mask = ( + self.attn_mask.to(dtype=x.dtype, device=x.device) + if self.attn_mask is not None + else None + ) + x = ( + x + + self.attn( + self.ln_1(x), self.ln_1(x), self.ln_1(x), need_weights=False, attn_mask=mask + )[0] + ) adapt = self.forgery_aware_adapter(x) if self.forgery_aware_adapter is not None else 0 x = x + self.mlp(self.ln_2(x)) + adapt return x class _Transformer(nn.Module): - def __init__(self, width: int, layers: int, heads: int, attn_mask: torch.Tensor | None = None, add_adapter: list[bool] | None = None) -> None: + def __init__( + self, + width: int, + layers: int, + heads: int, + attn_mask: torch.Tensor | None = None, + add_adapter: list[bool] | None = None, + ) -> None: super().__init__() if add_adapter is None: add_adapter = [False] * layers - self.resblocks = nn.Sequential(*[_ResidualAttentionBlock(width, heads, attn_mask, add_adapter[i]) for i in range(layers)]) + self.resblocks = nn.Sequential( + *[ + _ResidualAttentionBlock(width, heads, attn_mask, add_adapter[i]) + for i in range(layers) + ] + ) def forward(self, x: torch.Tensor) -> tuple[dict, torch.Tensor]: out: dict[str, torch.Tensor] = {} @@ -149,12 +198,23 @@ def forward(self, x: torch.Tensor) -> tuple[dict, torch.Tensor]: class _VisionTransformer(nn.Module): - def __init__(self, input_resolution: int, patch_size: int, width: int, layers: int, heads: int, output_dim: int, num_adapter: int = 3) -> None: + def __init__( + self, + input_resolution: int, + patch_size: int, + width: int, + layers: int, + heads: int, + output_dim: int, + num_adapter: int = 3, + ) -> None: super().__init__() self.conv1 = nn.Conv2d(3, width, patch_size, stride=patch_size, bias=False) - scale = width ** -0.5 + scale = width**-0.5 self.class_embedding = nn.Parameter(scale * torch.randn(width)) - self.positional_embedding = nn.Parameter(scale * torch.randn((input_resolution // patch_size) ** 2 + 1, width)) + self.positional_embedding = nn.Parameter( + scale * torch.randn((input_resolution // patch_size) ** 2 + 1, width) + ) self.ln_pre = _LayerNorm(width) adapt_flags = [False] * layers @@ -168,7 +228,14 @@ def __init__(self, input_resolution: int, patch_size: int, width: int, layers: i def forward(self, x: torch.Tensor, return_full: bool = False) -> torch.Tensor: x = self.conv1(x).flatten(2).permute(0, 2, 1) - x = torch.cat([self.class_embedding + torch.zeros(x.shape[0], 1, x.shape[-1], dtype=x.dtype, device=x.device), x], dim=1) + x = torch.cat( + [ + self.class_embedding + + torch.zeros(x.shape[0], 1, x.shape[-1], dtype=x.dtype, device=x.device), + x, + ], + dim=1, + ) x = self.ln_pre(x + self.positional_embedding.to(x.dtype)) x = x.permute(1, 0, 2) _, x = self.transformer(x) @@ -180,15 +247,36 @@ def forward(self, x: torch.Tensor, return_full: bool = False) -> torch.Tensor: class _CLIP(nn.Module): - def __init__(self, embed_dim: int, image_resolution: int, vision_layers: int, vision_width: int, vision_patch_size: int, - context_length: int, vocab_size: int, transformer_width: int, transformer_heads: int, transformer_layers: int, - num_adapter: int = 3) -> None: + def __init__( + self, + embed_dim: int, + image_resolution: int, + vision_layers: int, + vision_width: int, + vision_patch_size: int, + context_length: int, + vocab_size: int, + transformer_width: int, + transformer_heads: int, + transformer_layers: int, + num_adapter: int = 3, + ) -> None: super().__init__() self.context_length = context_length vision_heads = vision_width // 64 - self.visual = _VisionTransformer(image_resolution, vision_patch_size, vision_width, vision_layers, vision_heads, embed_dim, num_adapter) + self.visual = _VisionTransformer( + image_resolution, + vision_patch_size, + vision_width, + vision_layers, + vision_heads, + embed_dim, + num_adapter, + ) mask = torch.empty(context_length, context_length).fill_(float("-inf")).triu_(1) - self.transformer = _Transformer(transformer_width, transformer_layers, transformer_heads, attn_mask=mask) + self.transformer = _Transformer( + transformer_width, transformer_layers, transformer_heads, attn_mask=mask + ) self.vocab_size = vocab_size self.token_embedding = nn.Embedding(vocab_size, transformer_width) self.positional_embedding = nn.Parameter(torch.empty(context_length, transformer_width)) @@ -232,13 +320,14 @@ def __init__(self, clip: _CLIP, classnames: list[str], n_ctx: int = 8) -> None: self.ctx = nn.Parameter(ctx_vectors) import open_clip as _oc + prompt_prefix = " ".join(["X"] * n_ctx) prompts = [prompt_prefix + " " + name + "." for name in classnames] tokenized_prompts = torch.cat([_oc.tokenize(p) for p in prompts]) with torch.no_grad(): embedding = clip.token_embedding(tokenized_prompts).type(dtype) self.register_buffer("token_prefix", embedding[:, :1, :]) - self.register_buffer("token_suffix", embedding[:, 1 + n_ctx:, :]) + self.register_buffer("token_suffix", embedding[:, 1 + n_ctx :, :]) self.n_cls = len(classnames) self.n_ctx = n_ctx self.tokenized_prompts = tokenized_prompts @@ -255,7 +344,12 @@ def _ffn(self, t: torch.Tensor) -> torch.Tensor: def forward(self, im_features: torch.Tensor) -> torch.Tensor: tgt = self.ctx[:, None].repeat_interleave(im_features.shape[0], dim=1) - tgt = self.norm1(tgt + self.patch_basaed_enhancer(tgt, im_features.transpose(0, 1), im_features.transpose(0, 1))[0]) + tgt = self.norm1( + tgt + + self.patch_basaed_enhancer( + tgt, im_features.transpose(0, 1), im_features.transpose(0, 1) + )[0] + ) tgt = self._ffn(tgt).transpose(0, 1) prompts = [] for ctx_i in tgt: @@ -271,12 +365,21 @@ def __init__(self, num_adapter: int = 3, n_ctx: int = 8) -> None: super().__init__() # ViT-L/14 configuration self.clip_model = _CLIP( - embed_dim=768, image_resolution=224, vision_layers=24, vision_width=1024, - vision_patch_size=14, context_length=77, vocab_size=49408, - transformer_width=768, transformer_heads=12, transformer_layers=12, + embed_dim=768, + image_resolution=224, + vision_layers=24, + vision_width=1024, + vision_patch_size=14, + context_length=77, + vocab_size=49408, + transformer_width=768, + transformer_heads=12, + transformer_layers=12, num_adapter=num_adapter, ) - self.language_guided_alignment = _LanguageGuidedAlignment(self.clip_model, ["real", "fake"], n_ctx) + self.language_guided_alignment = _LanguageGuidedAlignment( + self.clip_model, ["real", "fake"], n_ctx + ) self.tokenized_prompts = self.language_guided_alignment.tokenized_prompts self.image_encoder = self.clip_model.visual self.text_encoder = _TextEncoder(self.clip_model) @@ -322,7 +425,9 @@ def forward(self, image: torch.Tensor) -> torch.Tensor: aug_feats = tgt.transpose(0, 1).mean(dim=1) aug_feats = aug_feats / aug_feats.norm(dim=-1, keepdim=True) tf_n = text_feats / text_feats.norm(dim=-1, keepdim=True) - aug_logits = torch.stack([logit_scale * af @ tfn.t() for af, tfn in zip(aug_feats, tf_n.transpose(0, 1))]) + aug_logits = torch.stack( + [logit_scale * af @ tfn.t() for af, tfn in zip(aug_feats, tf_n.transpose(0, 1))] + ) return logits + aug_logits @@ -369,6 +474,7 @@ def __init__( self._ckpt = cache / _DEFAULT_CKPT_NAME if not self._ckpt.is_file(): import gdown + gdown.download(id=_GDRIVE_FILE_ID, output=str(self._ckpt), quiet=False) self._model = _FatFormerModel(num_adapter=num_vit_adapter, n_ctx=num_context_embedding) @@ -377,12 +483,14 @@ def __init__( self._model.load_state_dict(state, strict=False) self._model.to(self._device).eval() - self._transform = transforms.Compose([ - transforms.Resize(256), - transforms.CenterCrop(224), - transforms.ToTensor(), - transforms.Normalize(**_IMAGENET), - ]) + self._transform = transforms.Compose( + [ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize(**_IMAGENET), + ] + ) # ------------------------------------------------------------------ # Input handling @@ -395,8 +503,7 @@ def _normalize_input(self, input_data: Any) -> Image.Image: if path.is_file(): return load_image(path) raise TypeError( - "Expected a PIL Image or a path to an image file; got " - f"{type(input_data).__name__}." + f"Expected a PIL Image or a path to an image file; got {type(input_data).__name__}." ) # ------------------------------------------------------------------ diff --git a/detectzoo/detectors/image/freqnet.py b/detectzoo/detectors/image/freqnet.py index 3e7b1a0..c068cba 100644 --- a/detectzoo/detectors/image/freqnet.py +++ b/detectzoo/detectors/image/freqnet.py @@ -27,8 +27,7 @@ _IMAGENET = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) _CKPT_NAME = "4-classes-freqnet-v2.pth" _CKPT_URL = ( - "https://github.com/chuangchuangtan/FreqNet-DeepfakeDetection/" - "raw/main/4-classes-freqnet-v2.pth" + "https://github.com/chuangchuangtan/FreqNet-DeepfakeDetection/raw/main/4-classes-freqnet-v2.pth" ) @@ -78,7 +77,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: class _FreqNet(nn.Module): - """Matches upstream ``networks/freqnet.py``. """ + """Matches upstream ``networks/freqnet.py``.""" def __init__( self, @@ -153,7 +152,12 @@ def _hfreq_wh(self, x: torch.Tensor, scale: int) -> torch.Tensor: x = torch.fft.fft2(x, norm="ortho") x = torch.fft.fftshift(x, dim=[-2, -1]) b, c, h, w = x.shape - x[:, :, h // 2 - h // scale : h // 2 + h // scale, w // 2 - w // scale : w // 2 + w // scale] = 0.0 + x[ + :, + :, + h // 2 - h // scale : h // 2 + h // scale, + w // 2 - w // scale : w // 2 + w // scale, + ] = 0.0 x = torch.fft.ifftshift(x, dim=[-2, -1]) x = torch.fft.ifft2(x, norm="ortho") x = torch.real(x) @@ -322,8 +326,7 @@ def _normalize_input(self, input_data: Any) -> Image.Image: if path.is_file(): return load_image(path) raise TypeError( - "Expected a PIL Image or a path to an image file; got " - f"{type(input_data).__name__}." + f"Expected a PIL Image or a path to an image file; got {type(input_data).__name__}." ) @torch.no_grad() diff --git a/detectzoo/detectors/image/ladeda.py b/detectzoo/detectors/image/ladeda.py index 5075b4b..ba36fe0 100644 --- a/detectzoo/detectors/image/ladeda.py +++ b/detectzoo/detectors/image/ladeda.py @@ -30,9 +30,9 @@ from detectzoo.detectors.image.resnet50_binary import load_pytorch_checkpoint from detectzoo.utils.io import load_image -_IMAGENET = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) -_CKPT_NAME = "ForenSynth_LaDeDa.pth" -_GDRIVE_ID = "1KxNdnPRJJTuqxmzBPiGsg43tXzO8AN2d" +_IMAGENET = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) +_CKPT_NAME = "ForenSynth_LaDeDa.pth" +_GDRIVE_ID = "1KxNdnPRJJTuqxmzBPiGsg43tXzO8AN2d" _PATCH_SIZE = 9 _LOAD_SIZE = 256 @@ -41,9 +41,11 @@ # ResNet-50 variant with 9×9 receptive field # --------------------------------------------------------------------------- + def _conv3x3(inp: int, out: int, stride: int = 1) -> nn.Conv2d: return nn.Conv2d(inp, out, 3, stride=stride, padding=0, bias=False) + def _conv1x1(inp: int, out: int, stride: int = 1) -> nn.Conv2d: return nn.Conv2d(inp, out, 1, stride=stride, bias=False) @@ -61,12 +63,14 @@ def __init__( ) -> None: super().__init__() self.conv1 = _conv1x1(inplanes, planes) - self.bn1 = nn.BatchNorm2d(planes) - self.conv2 = _conv1x1(planes, planes, stride) if use_1x1 else _conv3x3(planes, planes, stride) - self.bn2 = nn.BatchNorm2d(planes) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = ( + _conv1x1(planes, planes, stride) if use_1x1 else _conv3x3(planes, planes, stride) + ) + self.bn2 = nn.BatchNorm2d(planes) self.conv3 = _conv1x1(planes, planes * 4) - self.bn3 = nn.BatchNorm2d(planes * 4) - self.relu = nn.ReLU(inplace=True) + self.bn3 = nn.BatchNorm2d(planes * 4) + self.relu = nn.ReLU(inplace=True) self.downsample = downsample def forward(self, x: torch.Tensor) -> torch.Tensor: @@ -79,9 +83,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: if identity.shape[-2:] != out.shape[-2:]: diff_h = identity.size(-2) - out.size(-2) diff_w = identity.size(-1) - out.size(-1) - identity = identity[ - :, :, : identity.size(-2) - diff_h, : identity.size(-1) - diff_w - ] + identity = identity[:, :, : identity.size(-2) - diff_h, : identity.size(-1) - diff_w] return self.relu(out + identity) @@ -92,18 +94,26 @@ def __init__(self, num_classes: int = 1) -> None: super().__init__() self.inplanes = 64 - self.conv1 = _conv1x1(3, 64) - self.conv2 = _conv3x3(64, 64) - self.bn1 = nn.BatchNorm2d(64, momentum=0.001) - self.relu = nn.ReLU(inplace=True) + self.conv1 = _conv1x1(3, 64) + self.conv2 = _conv3x3(64, 64) + self.bn1 = nn.BatchNorm2d(64, momentum=0.001) + self.relu = nn.ReLU(inplace=True) - self.layer1 = self._make_layer(64, 3, stride=2, first_block_1x1=False, later_blocks_1x1=True) - self.layer2 = self._make_layer(128, 4, stride=2, first_block_1x1=False, later_blocks_1x1=True) - self.layer3 = self._make_layer(256, 6, stride=2, first_block_1x1=True, later_blocks_1x1=True) - self.layer4 = self._make_layer(512, 3, stride=1, first_block_1x1=True, later_blocks_1x1=True) + self.layer1 = self._make_layer( + 64, 3, stride=2, first_block_1x1=False, later_blocks_1x1=True + ) + self.layer2 = self._make_layer( + 128, 4, stride=2, first_block_1x1=False, later_blocks_1x1=True + ) + self.layer3 = self._make_layer( + 256, 6, stride=2, first_block_1x1=True, later_blocks_1x1=True + ) + self.layer4 = self._make_layer( + 512, 3, stride=1, first_block_1x1=True, later_blocks_1x1=True + ) self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) - self.fc = nn.Linear(512 * _Bottleneck.expansion, num_classes) + self.fc = nn.Linear(512 * _Bottleneck.expansion, num_classes) for m in self.modules(): if isinstance(m, nn.Conv2d): @@ -165,7 +175,7 @@ class LaDeDaDetector(BaseDetector): patch_size : int Local receptive-field size in pixels (default 9, matching the paper). load_size : int or None - Resize each image to ``load_size × load_size`` before inference. + Resize each image to ``load_size × load_size`` before inference. Pass ``None`` to keep the original image size. threshold : float Decision boundary (default 0.5). @@ -192,7 +202,7 @@ def __init__( self.patch_size = int(patch_size) self.load_size = None if load_size is None else int(load_size) - cache = get_cache_dir("ladeda", cache_dir) + cache = get_cache_dir("ladeda", cache_dir) self._ckpt = ( Path(checkpoint_path).expanduser().resolve() if checkpoint_path is not None @@ -203,7 +213,7 @@ def __init__( self._ensure_download(cache) self._model = _LaDeDaNet(num_classes=1) - raw = load_pytorch_checkpoint(self._ckpt, self._device) + raw = load_pytorch_checkpoint(self._ckpt, self._device) state = raw.get("model", raw) if isinstance(raw, dict) else raw state = {k.replace("module.", ""): v for k, v in state.items()} self._model.load_state_dict(state, strict=True) @@ -212,10 +222,12 @@ def __init__( steps: list[Any] = [] if self.load_size is not None: steps.append(transforms.Resize((self.load_size, self.load_size))) - steps.extend([ - transforms.ToTensor(), - transforms.Normalize(**_IMAGENET), - ]) + steps.extend( + [ + transforms.ToTensor(), + transforms.Normalize(**_IMAGENET), + ] + ) self._transform = transforms.Compose(steps) def _ensure_download(self, cache: Path) -> None: @@ -237,18 +249,18 @@ def _ensure_download(self, cache: Path) -> None: ) matches[0].rename(self._ckpt) - # -------------------------------------------------------------------------- ------------------------------------------- + # --------------------------------------------------------------------------- # Input handling - # -------------------------------------------------------------------------- ------------------------------------------- + # --------------------------------------------------------------------------- def _normalize_input(self, input_data: Any) -> Image.Image: if hasattr(input_data, "mode"): return input_data.convert("RGB") return load_image(Path(str(input_data))) - # -------------------------------------------------------------------------- ------------------------------------------- + # --------------------------------------------------------------------------- # Inference - # -------------------------------------------------------------------------- ------------------------------------------- + # --------------------------------------------------------------------------- @torch.no_grad() def predict(self, input_data: Any) -> DetectionResult: diff --git a/detectzoo/detectors/image/lgrad.py b/detectzoo/detectors/image/lgrad.py index 27a88da..ae2861e 100644 --- a/detectzoo/detectors/image/lgrad.py +++ b/detectzoo/detectors/image/lgrad.py @@ -7,7 +7,7 @@ instead of RGB, so detector features capture generator-agnostic high-frequency traces. DetectZoo pipeline: By default ``input_mode="rgb"`` runs the official **PyTorch img2grad** -path from the LGrad repo, then applies the ResNet-50 head. Use ``input_mode="gradient"`` +path from the LGrad repo, then applies the ResNet-50 head. Use ``input_mode="gradient"`` only if ``data`` are already saved gradient PNGs. Upstream: https://github.com/chuangchuangtan/LGrad @@ -66,6 +66,7 @@ def _raw_state_dict(ckpt: Any) -> dict[str, torch.Tensor]: def _download_from_gdrive(file_id: str, dest: Path) -> None: import gdown + dest.parent.mkdir(parents=True, exist_ok=True) gdown.download(id=file_id, output=str(dest), quiet=True) @@ -85,6 +86,7 @@ def _grad_chw_to_pil(grad: torch.Tensor) -> Image.Image: # LGradDetector # ------------------------------------------------------------------ + @register_detector("lgrad", aliases=["lgrad_cvpr2023", "learning_on_gradients"]) class LGradDetector(BaseDetector): """LGrad ResNet-50 head with optional built-in RGB→gradient (official PyTorch img2grad).""" @@ -167,8 +169,7 @@ def _normalize_input(self, input_data: Any) -> Image.Image: if path.is_file(): return load_image(path) raise TypeError( - "Expected a PIL Image or a path to an image file; got " - f"{type(input_data).__name__}." + f"Expected a PIL Image or a path to an image file; got {type(input_data).__name__}." ) def _img2grad_pil(self, pil: Image.Image) -> Image.Image: diff --git a/detectzoo/detectors/image/manifold_bias.py b/detectzoo/detectors/image/manifold_bias.py index ee2ee0c..52622e3 100644 --- a/detectzoo/detectors/image/manifold_bias.py +++ b/detectzoo/detectors/image/manifold_bias.py @@ -5,9 +5,9 @@ of Generated Images", ICLR 2025. https://arxiv.org/abs/2504.15470 -The key idea: real and AI-generated images lie on slightly different data manifolds, and -generative models introduce subtle geometric biases. By measuring how well an image -aligns with the natural image manifold, the method detects fakes in a zero-shot, +The key idea: real and AI-generated images lie on slightly different data manifolds, and +generative models introduce subtle geometric biases. By measuring how well an image +aligns with the natural image manifold, the method detects fakes in a zero-shot, generator-agnostic way. Threshold calibration (required): @@ -33,7 +33,6 @@ from detectzoo.core.registry import register_detector from detectzoo.utils.io import load_image - _SD_REPO = "CompVis/stable-diffusion-v1-4" _CLIP_REPO = "openai/clip-vit-large-patch14" _CAPTION_MODEL = "Salesforce/blip-image-captioning-base" @@ -50,8 +49,8 @@ def _resize_and_crop(img_t: torch.Tensor, siz: int) -> torch.Tensor: start_x = (img_t.size(-1) - siz) // 2 start_y = (img_t.size(-2) - siz) // 2 if img_t.dim() == 3: - return img_t[:, start_y:start_y + siz, start_x:start_x + siz] - return img_t[:, :, start_y:start_y + siz, start_x:start_x + siz] + return img_t[:, start_y : start_y + siz, start_x : start_x + siz] + return img_t[:, :, start_y : start_y + siz, start_x : start_x + siz] def _normalize_batch(batch: torch.Tensor, epsilon: float = _DEFAULT_EPSILON) -> torch.Tensor: @@ -62,6 +61,7 @@ def _normalize_batch(batch: torch.Tensor, epsilon: float = _DEFAULT_EPSILON) -> def _pil_to_raw_tensor(img: Image.Image) -> torch.Tensor: import numpy as np + return torch.from_numpy(np.array(img.convert("RGB"))) @@ -107,6 +107,7 @@ def _decode_in_subbatches( # Detector # --------------------------------------------------------------------------- + @register_detector("manifold_bias", aliases=["mib", "manifold_induced_bias", "brokman2025"]) class ManifoldBiasDetector(BaseDetector): """Zero-shot AI-generated image detector based on diffusion manifold biases @@ -156,14 +157,16 @@ def __init__( **kwargs: Any, ) -> None: # Pass threshold=0.0 to BaseDetector as a placeholder, we override it below. - super().__init__(threshold=threshold if threshold is not None else 0.0, device=device, **kwargs) + super().__init__( + threshold=threshold if threshold is not None else 0.0, device=device, **kwargs + ) self._calibrated = threshold is not None if threshold is not None: self.threshold = threshold self.sd_repo = sd_repo self.clip_repo = clip_repo - self.caption_model_name = caption_model + self.caption_model_name = caption_model self.num_noise = int(num_noise) self.time_frac = float(time_frac) self.image_size = int(image_size) @@ -224,9 +227,9 @@ def calibrate( def _load_sd(self) -> None: from diffusers import DDPMScheduler, StableDiffusionPipeline - pipe = StableDiffusionPipeline.from_pretrained( - self.sd_repo, torch_dtype=self._dtype - ).to(self._device) + pipe = StableDiffusionPipeline.from_pretrained(self.sd_repo, torch_dtype=self._dtype).to( + self._device + ) self._unet = pipe.unet.eval() self._vae = pipe.vae.eval() self._tokenizer = pipe.tokenizer @@ -236,32 +239,36 @@ def _load_sd(self) -> None: def _load_clip(self) -> None: from transformers import AutoImageProcessor, CLIPModel + self._clip = CLIPModel.from_pretrained(self.clip_repo).to(self._device).eval() self._clip_processor = AutoImageProcessor.from_pretrained(self.clip_repo) def _load_captioner(self) -> None: from transformers import pipeline as hf_pipeline + self._captioner = hf_pipeline( - "image-to-text", model=self.caption_model_name, device=self._device, + "image-to-text", + model=self.caption_model_name, + device=self._device, ) @property def unet(self) -> nn.Module: if self._unet is None: self._load_sd() - return self._unet + return self._unet @property def vae(self) -> nn.Module: if self._vae is None: self._load_sd() - return self._vae + return self._vae @property def clip(self) -> nn.Module: if self._clip is None: self._load_clip() - return self._clip + return self._clip @property def clip_processor(self): @@ -291,7 +298,11 @@ def _get_prompt(self, pil_img: Image.Image, prompt: Optional[str]) -> str: if prompt is not None: return prompt result = self.captioner(pil_img, max_new_tokens=64) - text = result[0].get("generated_text", "") if isinstance(result, list) and result else str(result) + text = ( + result[0].get("generated_text", "") + if isinstance(result, list) and result + else str(result) + ) return text.strip() or "a photograph" # ------------------------------------------------------------------ @@ -309,7 +320,7 @@ def _clip_features(self, images_uint8_float: torch.Tensor) -> torch.Tensor: return feats.detach().cpu() def _compute_criterion(self, pil_img: Image.Image, prompt: str) -> dict[str, float]: - _ = self.unet # ensure SD is loaded + _ = self.unet # ensure SD is loaded K = self.num_noise siz = self.image_size @@ -335,8 +346,11 @@ def _compute_criterion(self, pil_img: Image.Image, prompt: str) -> dict[str, flo # 4. Text conditioning tokens = self._tokenizer( - [prompt] * K, padding="max_length", max_length=77, - truncation=True, return_tensors="pt", + [prompt] * K, + padding="max_length", + max_length=77, + truncation=True, + return_tensors="pt", ) with torch.no_grad(): text_emb = self._text_encoder(tokens.input_ids.to(self._device)).last_hidden_state @@ -350,7 +364,7 @@ def _compute_criterion(self, pil_img: Image.Image, prompt: str) -> dict[str, flo dec_noise, dec_sphere = _decode_in_subbatches( self._vae, noise_pred_scaled, sphere / self._vae.config.scaling_factor ) - dec_noise_pp = _postprocess_decoded(dec_noise, siz) + dec_noise_pp = _postprocess_decoded(dec_noise, siz) dec_sphere_pp = _postprocess_decoded(dec_sphere, siz) # 7. CLIP embeddings @@ -361,7 +375,7 @@ def _compute_criterion(self, pil_img: Image.Image, prompt: str) -> dict[str, flo clip_sphere = self._clip_features(dec_sphere_pp) # 8. 3 criterion terms - bias_vec = self._cos(clip_orig, clip_dnoise).numpy() + bias_vec = self._cos(clip_orig, clip_dnoise).numpy() kappa_vec = self._cos(clip_dnoise, clip_sphere).numpy() D_vec = torch.norm(clip_dnoise, p=2, dim=1).numpy() @@ -370,16 +384,16 @@ def _compute_criterion(self, pil_img: Image.Image, prompt: str) -> dict[str, flo D_mean = float(D_vec.mean()) # 9. Final criterion - sqrt_d = float(_CLIP_DIM ** 0.5) + sqrt_d = float(_CLIP_DIM**0.5) criterion = 1.0 + (sqrt_d * bias_mean - D_mean + kappa_mean) / (sqrt_d + 2.0) return { "criterion": criterion, - "bias_mean": bias_mean, + "bias_mean": bias_mean, "kappa_mean": kappa_mean, - "D_mean": D_mean, - "timestep": t_abs, - "num_noise": K, + "D_mean": D_mean, + "timestep": t_abs, + "num_noise": K, } # ------------------------------------------------------------------ @@ -408,4 +422,4 @@ def predict(self, input_data: Any, *, prompt: Optional[str] = None) -> Detection num_noise=result["num_noise"], sd_repo=self.sd_repo, threshold_calibrated=self._calibrated, - ) \ No newline at end of file + ) diff --git a/detectzoo/detectors/image/npr_deepfake.py b/detectzoo/detectors/image/npr_deepfake.py index e0d667a..c2b5f40 100644 --- a/detectzoo/detectors/image/npr_deepfake.py +++ b/detectzoo/detectors/image/npr_deepfake.py @@ -217,8 +217,7 @@ def _normalize_input(self, input_data: Any) -> Image.Image: if path.is_file(): return load_image(path) raise TypeError( - "Expected a PIL Image or a path to an image file; got " - f"{type(input_data).__name__}." + f"Expected a PIL Image or a path to an image file; got {type(input_data).__name__}." ) @torch.no_grad() diff --git a/detectzoo/detectors/image/patchcraft.py b/detectzoo/detectors/image/patchcraft.py index d2bea35..20eda49 100644 --- a/detectzoo/detectors/image/patchcraft.py +++ b/detectzoo/detectors/image/patchcraft.py @@ -10,60 +10,63 @@ """ from __future__ import annotations - + from pathlib import Path from random import Random from typing import Any - + import torch import torch.nn as nn import torch.nn.functional as F import torchvision.transforms as transforms from PIL import Image - + from detectzoo.core.base import BaseDetector, DetectionResult from detectzoo.core.registry import register_detector from detectzoo.datasets._download import get_cache_dir from detectzoo.detectors.image.resnet50_binary import load_pytorch_checkpoint from detectzoo.utils.io import load_image - + _DEFAULT_CKPT_NAME = "RPTC.pth" -_HF_REPO_ID = "slxhere/PatchCraft" -_UPSTREAM_WEIGHTS = "https://fdmas.github.io/AIGCDetect/" - - +_HF_REPO_ID = "slxhere/PatchCraft" +_UPSTREAM_WEIGHTS = "https://fdmas.github.io/AIGCDetect/" + + # --------------------------------------------------------------------------- # SRM high-pass filters # --------------------------------------------------------------------------- - + + def _srm_hpf_weights() -> torch.Tensor: from detectzoo.detectors.image.srm_filter_kernel import all_normalized_hpf_list + hpf_5x5 = [] for h in all_normalized_hpf_list: if h.shape[0] == 3: h = F.pad(torch.from_numpy(h).float(), (1, 1, 1, 1)).numpy() hpf_5x5.append(h) return torch.tensor(hpf_5x5, dtype=torch.float32).view(30, 1, 5, 5) - - + + class _HPF(nn.Module): def __init__(self) -> None: super().__init__() self.hpf = nn.Conv2d(1, 30, kernel_size=5, padding=2, bias=False) self.hpf.weight = nn.Parameter(_srm_hpf_weights(), requires_grad=False) - + def forward(self, x: torch.Tensor) -> torch.Tensor: return self.hpf(x) - - + + # --------------------------------------------------------------------------- # RPTC network # --------------------------------------------------------------------------- - + + def _conv_bn_relu(ch: int) -> nn.Sequential: return nn.Sequential(nn.Conv2d(ch, ch, 3, padding=1), nn.BatchNorm2d(ch), nn.ReLU()) - - + + class RPTCNet(nn.Module): def __init__(self) -> None: super().__init__() @@ -133,19 +136,20 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: out = self.group5(out) out = self.advpool(out).view(out.size(0), -1) return self.fc2(out) - - + + def _load_weights(model: nn.Module, ckpt: Path, device: torch.device) -> None: - raw = load_pytorch_checkpoint(ckpt, device) + raw = load_pytorch_checkpoint(ckpt, device) state = raw.get("model") or raw.get("netC") or raw if isinstance(raw, dict) else raw state = {k.replace("module.", "").removeprefix("model."): v for k, v in state.items()} model.load_state_dict(state, strict=True) - - + + # --------------------------------------------------------------------------- # RPTC patch preprocessing # --------------------------------------------------------------------------- - + + def _edge_density(img: torch.Tensor) -> float: return float( torch.abs(img[:, :-1] - img[:, 1:]).sum() @@ -153,53 +157,59 @@ def _edge_density(img: torch.Tensor) -> float: + torch.abs(img[:, :-1, :-1] - img[:, 1:, 1:]).sum() + torch.abs(img[:, :-1, 1:] - img[:, 1:, :-1]).sum() ) - - -def _processing_rptc(img: Image.Image, *, load_size: int, patch_num: int, seed: int) -> torch.Tensor: - num_block = 2 ** patch_num + + +def _processing_rptc( + img: Image.Image, *, load_size: int, patch_num: int, seed: int +) -> torch.Tensor: + num_block = 2**patch_num patch_size = load_size // num_block - + if min(img.size) < patch_size: img = transforms.Resize((patch_size, patch_size))(img) - - x = transforms.ToTensor()(img) + + x = transforms.ToTensor()(img) _, h, w = x.shape rng = Random(seed) - + crops = sorted( [ - (x[:, cy:cy + patch_size, cx:cx + patch_size], - _edge_density(x[:, cy:cy + patch_size, cx:cx + patch_size])) + ( + x[:, cy : cy + patch_size, cx : cx + patch_size], + _edge_density(x[:, cy : cy + patch_size, cx : cx + patch_size]), + ) for _ in range(num_block * num_block * 3) - for cx, cy in [( - rng.randrange(0, max(1, w - patch_size + 1)), - rng.randrange(0, max(1, h - patch_size + 1)), - )] + for cx, cy in [ + ( + rng.randrange(0, max(1, w - patch_size + 1)), + rng.randrange(0, max(1, h - patch_size + 1)), + ) + ] ], key=lambda t: t[1], ) - + def _fill(indices) -> torch.Tensor: t = torch.zeros(3, load_size, load_size, dtype=x.dtype) - for k, (ii, jj) in enumerate( - (i, j) for i in range(num_block) for j in range(num_block) - ): - t[:, ii * patch_size:(ii + 1) * patch_size, - jj * patch_size:(jj + 1) * patch_size] = crops[indices[k]][0] + for k, (ii, jj) in enumerate((i, j) for i in range(num_block) for j in range(num_block)): + t[ + :, ii * patch_size : (ii + 1) * patch_size, jj * patch_size : (jj + 1) * patch_size + ] = crops[indices[k]][0] return t - + n = num_block * num_block return torch.stack((_fill(list(range(n))), _fill(list(range(-1, -n - 1, -1))))) - - + + # --------------------------------------------------------------------------- # Detector # --------------------------------------------------------------------------- - + + @register_detector("patchcraft", aliases=["patch_craft", "patchcraft_detector"]) class PatchCraftDetector(BaseDetector): """PatchCraft RPTC detector (Zhong et al., arXiv 2311.12397). - + Parameters ---------- checkpoint_path : str or Path, optional @@ -217,9 +227,9 @@ class PatchCraftDetector(BaseDetector): cache_dir : str or Path, optional Override the default cache directory (``.detectzoo_data``). """ - + modality = "image" - + def __init__( self, *, @@ -235,24 +245,25 @@ def __init__( super().__init__(threshold=threshold, device=device, **kwargs) self.load_size = int(load_size) self.patch_num = int(patch_num) - self.seed = int(seed) - - cache = get_cache_dir("patchcraft", cache_dir) + self.seed = int(seed) + + cache = get_cache_dir("patchcraft", cache_dir) self._ckpt = ( Path(checkpoint_path).expanduser().resolve() if checkpoint_path is not None else cache / _DEFAULT_CKPT_NAME ) - + if not self._ckpt.is_file(): self._ensure_download(cache) - + self._model = RPTCNet() _load_weights(self._model, self._ckpt, self._device) self._model.to(self._device).eval() - + def _ensure_download(self, cache: Path) -> None: from huggingface_hub import hf_hub_download + self._ckpt = Path( hf_hub_download(repo_id=_HF_REPO_ID, filename=_DEFAULT_CKPT_NAME, cache_dir=str(cache)) ) @@ -260,24 +271,28 @@ def _ensure_download(self, cache: Path) -> None: # ------------------------------------------------------------------ # Input handling # ------------------------------------------------------------------ - + def _normalize_input(self, input_data: Any) -> Image.Image: if hasattr(input_data, "mode"): return input_data.convert("RGB") return load_image(Path(str(input_data))) - + # ------------------------------------------------------------------ # Inference # ------------------------------------------------------------------ @torch.no_grad() def predict(self, input_data: Any) -> DetectionResult: - x = _processing_rptc( - self._normalize_input(input_data), - load_size=self.load_size, - patch_num=self.patch_num, - seed=self.seed, - ).unsqueeze(0).to(self._device) + x = ( + _processing_rptc( + self._normalize_input(input_data), + load_size=self.load_size, + patch_num=self.patch_num, + seed=self.seed, + ) + .unsqueeze(0) + .to(self._device) + ) score = self._model(x).sigmoid().item() return self._make_result( float(score), @@ -285,4 +300,4 @@ def predict(self, input_data: Any) -> DetectionResult: load_size=self.load_size, patch_num=self.patch_num, seed=self.seed, - ) \ No newline at end of file + ) diff --git a/detectzoo/detectors/image/resnet50_binary.py b/detectzoo/detectors/image/resnet50_binary.py index d92bac3..e11e70c 100644 --- a/detectzoo/detectors/image/resnet50_binary.py +++ b/detectzoo/detectors/image/resnet50_binary.py @@ -53,7 +53,9 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: class _ResNet(nn.Module): - def __init__(self, block: type[_Bottleneck], layers: list[int], *, num_classes: int = 1) -> None: + def __init__( + self, block: type[_Bottleneck], layers: list[int], *, num_classes: int = 1 + ) -> None: super().__init__() self.inplanes = 64 self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) diff --git a/detectzoo/detectors/image/safe.py b/detectzoo/detectors/image/safe.py index ffeea77..270ab83 100644 --- a/detectzoo/detectors/image/safe.py +++ b/detectzoo/detectors/image/safe.py @@ -42,7 +42,9 @@ def _conv1x1(in_planes: int, out_planes: int, stride: int = 1) -> nn.Conv2d: class _SAFEBottleneck(nn.Module): expansion = 4 - def __init__(self, inplanes: int, planes: int, stride: int = 1, downsample: nn.Module | None = None) -> None: + def __init__( + self, inplanes: int, planes: int, stride: int = 1, downsample: nn.Module | None = None + ) -> None: super().__init__() self.conv1 = _conv1x1(inplanes, planes) self.bn1 = nn.BatchNorm2d(planes) @@ -85,7 +87,9 @@ def __init__(self, num_classes: int = 2) -> None: nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) - def _make_layer(self, block: type[_SAFEBottleneck], planes: int, blocks: int, stride: int = 1) -> nn.Sequential: + def _make_layer( + self, block: type[_SAFEBottleneck], planes: int, blocks: int, stride: int = 1 + ) -> nn.Sequential: downsample = None if stride != 1 or self.inplanes != planes * block.expansion: downsample = nn.Sequential( @@ -98,8 +102,11 @@ def _make_layer(self, block: type[_SAFEBottleneck], planes: int, blocks: int, st return nn.Sequential(*layers) @staticmethod - def _preprocess_dwt(x: torch.Tensor, mode: str = "symmetric", wave: str = "bior1.3") -> torch.Tensor: + def _preprocess_dwt( + x: torch.Tensor, mode: str = "symmetric", wave: str = "bior1.3" + ) -> torch.Tensor: from pytorch_wavelets import DWTForward + dwt = DWTForward(J=1, mode=mode, wave=wave).to(x.device) _, yh = dwt(x) hp = yh[0][:, :, 2, :, :] @@ -167,10 +174,12 @@ def __init__( _load_safe_checkpoint(self._model, self._ckpt, self._device) self._model.to(self._device).eval() - self._transform = transforms.Compose([ - transforms.CenterCrop(input_size), - transforms.ToTensor(), - ]) + self._transform = transforms.Compose( + [ + transforms.CenterCrop(input_size), + transforms.ToTensor(), + ] + ) # ------------------------------------------------------------------ # Input handling @@ -183,8 +192,7 @@ def _normalize_input(self, input_data: Any) -> Image.Image: if path.is_file(): return load_image(path) raise TypeError( - "Expected a PIL Image or a path to an image file; got " - f"{type(input_data).__name__}." + f"Expected a PIL Image or a path to an image file; got {type(input_data).__name__}." ) # ------------------------------------------------------------------ diff --git a/detectzoo/detectors/image/srm_filter_kernel.py b/detectzoo/detectors/image/srm_filter_kernel.py index a54a387..2ce78ce 100644 --- a/detectzoo/detectors/image/srm_filter_kernel.py +++ b/detectzoo/detectors/image/srm_filter_kernel.py @@ -69,26 +69,56 @@ filter_edge_5x5 = [ np.array( - [[-1, 2, -2, 2, -1], [2, -6, 8, -6, 2], [-2, 8, -12, 8, -2], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]], + [ + [-1, 2, -2, 2, -1], + [2, -6, 8, -6, 2], + [-2, 8, -12, 8, -2], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + ], dtype=np.float32, ), np.array( - [[0, 0, -2, 2, -1], [0, 0, 8, -6, 2], [0, 0, -12, 8, -2], [0, 0, 8, -6, 2], [0, 0, -2, 2, -1]], + [ + [0, 0, -2, 2, -1], + [0, 0, 8, -6, 2], + [0, 0, -12, 8, -2], + [0, 0, 8, -6, 2], + [0, 0, -2, 2, -1], + ], dtype=np.float32, ), np.array( - [[0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [-2, 8, -12, 8, -2], [2, -6, 8, -6, 2], [-1, 2, -2, 2, -1]], + [ + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + [-2, 8, -12, 8, -2], + [2, -6, 8, -6, 2], + [-1, 2, -2, 2, -1], + ], dtype=np.float32, ), np.array( - [[-1, 2, -2, 0, 0], [2, -6, 8, 0, 0], [-2, 8, -12, 0, 0], [2, -6, 8, 0, 0], [-1, 2, -2, 0, 0]], + [ + [-1, 2, -2, 0, 0], + [2, -6, 8, 0, 0], + [-2, 8, -12, 0, 0], + [2, -6, 8, 0, 0], + [-1, 2, -2, 0, 0], + ], dtype=np.float32, ), ] square_3x3 = np.array([[-1, 2, -1], [2, -4, 2], [-1, 2, -1]], dtype=np.float32) square_5x5 = np.array( - [[-1, 2, -2, 2, -1], [2, -6, 8, -6, 2], [-2, 8, -12, 8, -2], [2, -6, 8, -6, 2], [-1, 2, -2, 2, -1]], + [ + [-1, 2, -2, 2, -1], + [2, -6, 8, -6, 2], + [-2, 8, -12, 8, -2], + [2, -6, 8, -6, 2], + [-1, 2, -2, 2, -1], + ], dtype=np.float32, ) @@ -107,4 +137,3 @@ + normalized_filter_edge_5x5 + [normalized_square_3x3, normalized_square_5x5] ) - diff --git a/detectzoo/detectors/image/univfd.py b/detectzoo/detectors/image/univfd.py index 8a4f2b2..04b152c 100644 --- a/detectzoo/detectors/image/univfd.py +++ b/detectzoo/detectors/image/univfd.py @@ -5,7 +5,7 @@ Generative Models", CVPR 2023. https://arxiv.org/abs/2302.10174 -The key idea: Univ-FD uses pretrained CLIP image embeddings to represent images in a rich, +The key idea: Univ-FD uses pretrained CLIP image embeddings to represent images in a rich, semantic feature space. A simple linear classifier is trained on top to distinguish real vs fake. Upstream: https://github.com/WisconsinAIVision/UniversalFakeDetect @@ -69,7 +69,9 @@ def _build_clip_linear(device: torch.device) -> _CLIPLinearModel: ) from e clip_model, _, _ = open_clip.create_model_and_transforms( - "ViT-L-14", pretrained="openai", device=device, + "ViT-L-14", + pretrained="openai", + device=device, ) clip_model.eval() for param in clip_model.parameters(): @@ -144,8 +146,7 @@ def _normalize_input(self, input_data: Any) -> Image.Image: if path.is_file(): return load_image(path) raise TypeError( - "Expected a PIL Image or a path to an image file; got " - f"{type(input_data).__name__}." + f"Expected a PIL Image or a path to an image file; got {type(input_data).__name__}." ) # ------------------------------------------------------------------ diff --git a/detectzoo/detectors/text/ada_detect_gpt.py b/detectzoo/detectors/text/ada_detect_gpt.py index e37adda..436e36c 100644 --- a/detectzoo/detectors/text/ada_detect_gpt.py +++ b/detectzoo/detectors/text/ada_detect_gpt.py @@ -30,8 +30,9 @@ logger = get_logger(__name__) -def _bspline_basis(x: torch.Tensor, start: float, end: float, - n_bases: int, order: int) -> torch.Tensor: +def _bspline_basis( + x: torch.Tensor, start: float, end: float, n_bases: int, order: int +) -> torch.Tensor: """Evaluate B-spline basis functions of given order via Cox-de Boor. Returns tensor of shape ``(*x.shape, n_bases + order)`` containing @@ -51,8 +52,7 @@ def _bspline_basis(x: torch.Tensor, start: float, end: float, # Recursive evaluation up to the desired order for k in range(1, order + 1): - new_bases = torch.zeros(flat.shape[0], n_knots - 1 - k, - device=x.device, dtype=x.dtype) + new_bases = torch.zeros(flat.shape[0], n_knots - 1 - k, device=x.device, dtype=x.dtype) for i in range(n_knots - 1 - k): denom1 = knots[i + k] - knots[i] denom2 = knots[i + k + 1] - knots[i + 1] @@ -64,9 +64,15 @@ def _bspline_basis(x: torch.Tensor, start: float, end: float, return bases.reshape(*x.shape, -1) -def _apply_witness(log_probs: torch.Tensor, beta: torch.Tensor, - start: float, end: float, n_bases: int, - order: int, intercept: bool) -> torch.Tensor: +def _apply_witness( + log_probs: torch.Tensor, + beta: torch.Tensor, + start: float, + end: float, + n_bases: int, + order: int, + intercept: bool, +) -> torch.Tensor: """Apply the B-spline witness function ``w(z) = basis(z) @ beta``.""" basis = _bspline_basis(log_probs, start, end, n_bases, order) if intercept: @@ -76,8 +82,7 @@ def _apply_witness(log_probs: torch.Tensor, beta: torch.Tensor, # Pre-trained coefficients from the official repo (GPT-4o + Claude-3.5 + Gemini-2.5) -_DEFAULT_BETA = [0.0, -0.011333, -0.037667, -0.056667, - -0.281667, -0.592, 0.157833, 0.727333] +_DEFAULT_BETA = [0.0, -0.011333, -0.037667, -0.056667, -0.281667, -0.592, 0.157833, 0.727333] @register_detector("adadetectgpt") @@ -108,8 +113,7 @@ def __init__( device: str = "cpu", **kwargs: Any, ) -> None: - super().__init__(model_name=model_name, threshold=threshold, - device=device, **kwargs) + super().__init__(model_name=model_name, threshold=threshold, device=device, **kwargs) self.beta_list = beta if beta is not None else list(_DEFAULT_BETA) self.n_bases = n_bases self.spline_order = spline_order @@ -131,8 +135,13 @@ def predict(self, input_data: Any) -> DetectionResult: # Apply witness function to log-probs w_lp = _apply_witness( - log_probs, beta, self.spline_start, self.spline_end, - self.n_bases, self.spline_order, intercept=True, + log_probs, + beta, + self.spline_start, + self.spline_end, + self.n_bases, + self.spline_order, + intercept=True, ) # Gather observed witness-transformed log-prob diff --git a/detectzoo/detectors/text/binoculars.py b/detectzoo/detectors/text/binoculars.py index 1feab90..ce40137 100644 --- a/detectzoo/detectors/text/binoculars.py +++ b/detectzoo/detectors/text/binoculars.py @@ -87,9 +87,9 @@ def _load_performer(self) -> None: self._performer_tokenizer = AutoTokenizer.from_pretrained(self.performer_model_name) if self._performer_tokenizer.pad_token is None: self._performer_tokenizer.pad_token = self._performer_tokenizer.eos_token - self._performer_model = AutoModelForCausalLM.from_pretrained( - self.performer_model_name - ).to(self._device) + self._performer_model = AutoModelForCausalLM.from_pretrained(self.performer_model_name).to( + self._device + ) self._performer_model.eval() # ------------------------------------------------------------------ @@ -105,7 +105,7 @@ def _get_both_logits(self, text: str): truncation=True, max_length=self.max_length, ).to(self._device) - observer_logits = self.model(**enc).logits # [1, T, V] + observer_logits = self.model(**enc).logits # [1, T, V] performer_logits = self.performer_model(**enc).logits # [1, T, V] return observer_logits, performer_logits, enc diff --git a/detectzoo/detectors/text/biscope.py b/detectzoo/detectors/text/biscope.py index 9043b7c..aed4b1e 100644 --- a/detectzoo/detectors/text/biscope.py +++ b/detectzoo/detectors/text/biscope.py @@ -77,8 +77,7 @@ def __init__( device: str = "cpu", **kwargs: Any, ) -> None: - super().__init__(model_name=model_name, threshold=threshold, - device=device, **kwargs) + super().__init__(model_name=model_name, threshold=threshold, device=device, **kwargs) self.summary_model_name = summary_model self.sample_clip = sample_clip self.n_segments = n_segments @@ -94,7 +93,8 @@ def _load_summary_model(self) -> None: logger.info("Loading BiScope summary model '%s' …", self.summary_model_name) self._summary_tokenizer = AutoTokenizer.from_pretrained( - self.summary_model_name, padding_side="left", + self.summary_model_name, + padding_side="left", ) if self._summary_tokenizer.pad_token is None: self._summary_tokenizer.pad_token = self._summary_tokenizer.eos_token @@ -130,8 +130,10 @@ def _generate_summary(self, text: str) -> str: """Generate a short title/summary for the text.""" prompt = f"Write a title for this text: {text}\nJust output the title:" ids = self.summary_tokenizer( - prompt, return_tensors="pt", - max_length=self.sample_clip, truncation=True, + prompt, + return_tensors="pt", + max_length=self.sample_clip, + truncation=True, ).input_ids.to(self._device) ids = ids[:, 1:] # remove start token trigger_len = ids.shape[1] @@ -139,7 +141,8 @@ def _generate_summary(self, text: str) -> str: config.max_new_tokens = 64 attn = torch.ones_like(ids) out = self.summary_model.generate( - ids, attention_mask=attn, + ids, + attention_mask=attn, generation_config=config, pad_token_id=self.summary_tokenizer.pad_token_id, )[0] @@ -160,7 +163,8 @@ def _build_prompt(self, text: str) -> str: @torch.no_grad() def _compute_biscope_losses( - self, text: str, + self, + text: str, ) -> tuple[np.ndarray, np.ndarray]: """Compute per-token FCE and BCE loss arrays on the text region. @@ -170,12 +174,15 @@ def _compute_biscope_losses( prompt_text = self._build_prompt(text) prompt_ids = self.tokenizer( - prompt_text, return_tensors="pt", + prompt_text, + return_tensors="pt", ).input_ids.to(self._device) text_ids = self.tokenizer( - text, return_tensors="pt", - max_length=self.sample_clip, truncation=True, + text, + return_tensors="pt", + max_length=self.sample_clip, + truncation=True, ).input_ids.to(self._device) combined_ids = torch.cat([prompt_ids, text_ids], dim=1) # [1, P+T] @@ -186,14 +193,14 @@ def _compute_biscope_losses( return np.array([0.0]), np.array([0.0]) logits = self.model(input_ids=combined_ids).logits # [1, P+T, V] - targets = combined_ids[0, prompt_len:total_len] # [T] + targets = combined_ids[0, prompt_len:total_len] # [T] # FCE: logits at [prompt_len-1 .. total_len-2] predict targets [0..T-1] fce_logits = logits[0, prompt_len - 1 : total_len - 1, :] fce = F.cross_entropy(fce_logits, targets, reduction="none") # BCE: logits at [prompt_len .. total_len-1] predict targets [0..T-1] - bce_logits = logits[0, prompt_len : total_len, :] + bce_logits = logits[0, prompt_len:total_len, :] bce = F.cross_entropy(bce_logits, targets, reduction="none") return fce.cpu().numpy(), bce.cpu().numpy() @@ -212,12 +219,18 @@ def _extract_features(self, fce: np.ndarray, bce: np.ndarray) -> np.ndarray: if len(fce_suffix) == 0: features.extend([0.0] * 8) continue - features.extend([ - float(np.mean(fce_suffix)), float(np.max(fce_suffix)), - float(np.min(fce_suffix)), float(np.std(fce_suffix)), - float(np.mean(bce_suffix)), float(np.max(bce_suffix)), - float(np.min(bce_suffix)), float(np.std(bce_suffix)), - ]) + features.extend( + [ + float(np.mean(fce_suffix)), + float(np.max(fce_suffix)), + float(np.min(fce_suffix)), + float(np.std(fce_suffix)), + float(np.mean(bce_suffix)), + float(np.max(bce_suffix)), + float(np.min(bce_suffix)), + float(np.std(bce_suffix)), + ] + ) return np.array(features) def predict(self, input_data: Any) -> DetectionResult: diff --git a/detectzoo/detectors/text/detect_gpt.py b/detectzoo/detectors/text/detect_gpt.py index 407e712..f76e34c 100644 --- a/detectzoo/detectors/text/detect_gpt.py +++ b/detectzoo/detectors/text/detect_gpt.py @@ -95,9 +95,9 @@ def _load_perturbation_model(self) -> None: logger.info("Loading perturbation model '%s' …", self.perturbation_model_name) self._ptokenizer = AutoTokenizer.from_pretrained(self.perturbation_model_name) - self._pmodel = AutoModelForSeq2SeqLM.from_pretrained( - self.perturbation_model_name - ).to(self._device) + self._pmodel = AutoModelForSeq2SeqLM.from_pretrained(self.perturbation_model_name).to( + self._device + ) self._pmodel.eval() # ------------------------------------------------------------------ diff --git a/detectzoo/detectors/text/detective.py b/detectzoo/detectors/text/detective.py index 8a915a8..2c6feb0 100644 --- a/detectzoo/detectors/text/detective.py +++ b/detectzoo/detectors/text/detective.py @@ -110,6 +110,7 @@ def _load_checkpoint(self) -> None: repo_id = _AVAILABLE_CHECKPOINTS[ckpt_path] try: from huggingface_hub import hf_hub_download + ckpt_path = hf_hub_download( repo_id=repo_id, filename=str(self.checkpoint), @@ -117,13 +118,16 @@ def _load_checkpoint(self) -> None: except Exception as exc: logger.warning( "Could not download checkpoint '%s' from %s: %s", - self.checkpoint, repo_id, exc, + self.checkpoint, + repo_id, + exc, ) return else: logger.warning( "Checkpoint '%s' not found locally and is not a known " - "official checkpoint. Skipping.", self.checkpoint, + "official checkpoint. Skipping.", + self.checkpoint, ) return @@ -135,13 +139,14 @@ def _load_checkpoint(self) -> None: enc_state: dict[str, Any] = {} for key, val in state_dict.items(): if key.startswith("model.model."): - enc_state[key[len("model.model."):]] = val + enc_state[key[len("model.model.") :]] = val elif key.startswith("model."): - enc_state[key[len("model."):]] = val + enc_state[key[len("model.") :]] = val if enc_state: missing, unexpected = self._enc_model.load_state_dict( # type: ignore[union-attr] - enc_state, strict=False, + enc_state, + strict=False, ) if missing: logger.debug("Missing keys when loading checkpoint: %s", missing[:5]) @@ -192,8 +197,7 @@ def predict(self, input_data: Any) -> DetectionResult: if weight is not None: ref_dir = F.normalize(weight.mean(dim=0, keepdim=True), dim=-1).squeeze(0) - score = float(F.cosine_similarity(emb.unsqueeze(0), - ref_dir.unsqueeze(0)).squeeze()) + score = float(F.cosine_similarity(emb.unsqueeze(0), ref_dir.unsqueeze(0)).squeeze()) else: score = float(emb.abs().mean()) diff --git a/detectzoo/detectors/text/dna_detectllm.py b/detectzoo/detectors/text/dna_detectllm.py index 86dd7ca..db7239a 100644 --- a/detectzoo/detectors/text/dna_detectllm.py +++ b/detectzoo/detectors/text/dna_detectllm.py @@ -78,15 +78,15 @@ def _load_model(self) -> None: if self._shared_tokenizer.pad_token is None: self._shared_tokenizer.pad_token = self._shared_tokenizer.eos_token - self._performer_model = AutoModelForCausalLM.from_pretrained( - self.performer_model_name - ).to(self._device) + self._performer_model = AutoModelForCausalLM.from_pretrained(self.performer_model_name).to( + self._device + ) self._performer_model.eval() logger.info("Loading DNA-DetectLLM observer '%s' …", self.observer_model_name) - self._observer_model = AutoModelForCausalLM.from_pretrained( - self.observer_model_name - ).to(self._device) + self._observer_model = AutoModelForCausalLM.from_pretrained(self.observer_model_name).to( + self._device + ) self._observer_model.eval() self._model = self._performer_model diff --git a/detectzoo/detectors/text/dna_gpt.py b/detectzoo/detectors/text/dna_gpt.py index 6c31e2e..9590a7d 100644 --- a/detectzoo/detectors/text/dna_gpt.py +++ b/detectzoo/detectors/text/dna_gpt.py @@ -123,9 +123,9 @@ def _load_regen_model(self) -> None: self._regen_tokenizer = AutoTokenizer.from_pretrained(self.regen_model_name) if self._regen_tokenizer.pad_token is None: self._regen_tokenizer.pad_token = self._regen_tokenizer.eos_token - self._regen_model = AutoModelForCausalLM.from_pretrained( - self.regen_model_name - ).to(self._device) + self._regen_model = AutoModelForCausalLM.from_pretrained(self.regen_model_name).to( + self._device + ) self._regen_model.eval() # ------------------------------------------------------------------ @@ -165,7 +165,9 @@ def _sample_batch(self, prompt: str) -> list[str]: return decoded logger.debug( "DNA-GPT regeneration attempt %d: shortest %d < %d words, retrying.", - attempt + 1, m, self.min_words, + attempt + 1, + m, + self.min_words, ) return decoded # last attempt, even if still too short diff --git a/detectzoo/detectors/text/fast_detect_gpt.py b/detectzoo/detectors/text/fast_detect_gpt.py index 3930eba..c19f1ff 100644 --- a/detectzoo/detectors/text/fast_detect_gpt.py +++ b/detectzoo/detectors/text/fast_detect_gpt.py @@ -87,9 +87,8 @@ def _load_reference_model(self) -> None: self._ref_tokenizer = AutoTokenizer.from_pretrained(self.reference_model_name) if self._ref_tokenizer.pad_token is None: self._ref_tokenizer.pad_token = self._ref_tokenizer.eos_token - self._ref_model = ( - AutoModelForCausalLM.from_pretrained(self.reference_model_name) - .to(self._device) + self._ref_model = AutoModelForCausalLM.from_pretrained(self.reference_model_name).to( + self._device ) self._ref_model.eval() @@ -124,13 +123,12 @@ def _sampling_discrepancy_analytic( probs_ref = F.softmax(logits_ref, dim=-1) log_likelihood = lprobs_score.gather(dim=-1, index=labels).squeeze(-1) # [1, T-1] - mean_ref = (probs_ref * lprobs_score).sum(dim=-1) # [1, T-1] + mean_ref = (probs_ref * lprobs_score).sum(dim=-1) # [1, T-1] var_ref = (probs_ref * lprobs_score.square()).sum(dim=-1) - mean_ref.square() - discrepancy = ( - (log_likelihood.sum(dim=-1) - mean_ref.sum(dim=-1)) - / var_ref.sum(dim=-1).clamp(min=1e-10).sqrt() - ) + discrepancy = (log_likelihood.sum(dim=-1) - mean_ref.sum(dim=-1)) / var_ref.sum( + dim=-1 + ).clamp(min=1e-10).sqrt() return float(discrepancy.mean()) # ------------------------------------------------------------------ diff --git a/detectzoo/detectors/text/gecscore.py b/detectzoo/detectors/text/gecscore.py index fb6b9f5..63606b6 100644 --- a/detectzoo/detectors/text/gecscore.py +++ b/detectzoo/detectors/text/gecscore.py @@ -62,9 +62,9 @@ def _load_model(self) -> None: logger.info("Loading GEC model '%s' …", self.gec_model_name) self._gec_tokenizer = AutoTokenizer.from_pretrained(self.gec_model_name) - self._gec_model = AutoModelForSeq2SeqLM.from_pretrained( - self.gec_model_name - ).to(self._device) + self._gec_model = AutoModelForSeq2SeqLM.from_pretrained(self.gec_model_name).to( + self._device + ) self._gec_model.eval() @property diff --git a/detectzoo/detectors/text/ghostbuster.py b/detectzoo/detectors/text/ghostbuster.py index f30d24d..5f08e08 100644 --- a/detectzoo/detectors/text/ghostbuster.py +++ b/detectzoo/detectors/text/ghostbuster.py @@ -34,6 +34,7 @@ def _unigram_probs(tokens: List[str]) -> np.ndarray: """Cheap unigram probability proxy based on token frequency.""" from collections import Counter + if not tokens: return np.array([1.0]) counts = Counter(tokens) @@ -84,9 +85,9 @@ def _load_large_model(self) -> None: self._large_tokenizer = AutoTokenizer.from_pretrained(self.large_model_name) if self._large_tokenizer.pad_token is None: self._large_tokenizer.pad_token = self._large_tokenizer.eos_token - self._large_model = AutoModelForCausalLM.from_pretrained( - self.large_model_name - ).to(self._device) + self._large_model = AutoModelForCausalLM.from_pretrained(self.large_model_name).to( + self._device + ) self._large_model.eval() @property @@ -108,7 +109,9 @@ def _token_probs(self, text: str, use_large: bool = False) -> np.ndarray: tok = self.large_tokenizer if use_large else self.tokenizer enc = tok( - text, return_tensors="pt", truncation=True, + text, + return_tensors="pt", + truncation=True, max_length=self.max_length, ).to(self._device) logits = model(**enc).logits @@ -158,7 +161,7 @@ def _build_features(self, text: str) -> np.ndarray: # Handcrafted features from the paper diff = l_ - s sorted_diff = np.sort(diff)[::-1] - features.append(np.mean(sorted_diff[:min(25, len(sorted_diff))])) + features.append(np.mean(sorted_diff[: min(25, len(sorted_diff))])) if len(sorted_diff) > 25: features.append(np.mean(sorted_diff[25:50])) else: @@ -168,7 +171,7 @@ def _build_features(self, text: str) -> np.ndarray: features.append(float(np.sum(l_ > 0.95))) # Mean of top-25 large probs sorted_l = np.sort(l_)[::-1] - features.append(np.mean(sorted_l[:min(25, len(sorted_l))])) + features.append(np.mean(sorted_l[: min(25, len(sorted_l))])) if len(sorted_l) > 25: features.append(np.mean(sorted_l[25:50])) else: diff --git a/detectzoo/detectors/text/glimpse.py b/detectzoo/detectors/text/glimpse.py index 6f957d5..39498b0 100644 --- a/detectzoo/detectors/text/glimpse.py +++ b/detectzoo/detectors/text/glimpse.py @@ -58,9 +58,8 @@ def _geometric_estimate( lam = lam.clamp(min=1e-10, max=1.0 - 1e-10) tail_len = rank_size - K - exponents = torch.arange(1, tail_len + 1, device=top_k_probs.device, - dtype=top_k_probs.dtype) - tail = p_K * (lam ** exponents) + exponents = torch.arange(1, tail_len + 1, device=top_k_probs.device, dtype=top_k_probs.dtype) + tail = p_K * (lam**exponents) full = torch.cat([top_k_probs, tail]) return full / full.sum().clamp(min=1e-30) @@ -91,8 +90,7 @@ def __init__( device: str = "cpu", **kwargs: Any, ) -> None: - super().__init__(model_name=model_name, threshold=threshold, - device=device, **kwargs) + super().__init__(model_name=model_name, threshold=threshold, device=device, **kwargs) self.top_k = top_k self.rank_size = rank_size self.use_full_dist = use_full_dist @@ -109,9 +107,9 @@ def predict(self, input_data: Any) -> DetectionResult: log_probs_full = F.log_softmax(shift_logits, dim=-1) # Observed token log-probabilities - ll_observed = log_probs_full.gather( - 2, shift_labels.unsqueeze(-1) - ).squeeze(-1).squeeze(0) # [T] + ll_observed = ( + log_probs_full.gather(2, shift_labels.unsqueeze(-1)).squeeze(-1).squeeze(0) + ) # [T] if self.use_full_dist: probs_full = F.softmax(shift_logits, dim=-1).squeeze(0) @@ -126,7 +124,7 @@ def predict(self, input_data: Any) -> DetectionResult: for t in range(T): p_t = probs[t] sorted_probs, _ = p_t.sort(descending=True) - top_k_probs = sorted_probs[:self.top_k] + top_k_probs = sorted_probs[: self.top_k] est_dist = _geometric_estimate(top_k_probs, self.rank_size) log_est = torch.log(est_dist.clamp(min=1e-30)) diff --git a/detectzoo/detectors/text/ide.py b/detectzoo/detectors/text/ide.py index 4cc37dc..8d6caed 100644 --- a/detectzoo/detectors/text/ide.py +++ b/detectzoo/detectors/text/ide.py @@ -194,6 +194,7 @@ def enc_tokenizer(self): def _get_point_cloud(self, text: str) -> np.ndarray: """Encode text into a point cloud of token embeddings.""" import re + text = re.sub(r"\n", " ", text) text = re.sub(r" +", " ", text) @@ -237,8 +238,7 @@ def __init__( device: str = "cpu", **kwargs: Any, ) -> None: - super().__init__(encoder_model=encoder_model, threshold=threshold, - device=device, **kwargs) + super().__init__(encoder_model=encoder_model, threshold=threshold, device=device, **kwargs) self.alpha = alpha self.n_reruns = n_reruns @@ -278,8 +278,7 @@ def __init__( device: str = "cpu", **kwargs: Any, ) -> None: - super().__init__(encoder_model=encoder_model, threshold=threshold, - device=device, **kwargs) + super().__init__(encoder_model=encoder_model, threshold=threshold, device=device, **kwargs) self.n_neighbors = n_neighbors def predict(self, input_data: Any) -> DetectionResult: diff --git a/detectzoo/detectors/text/imbd.py b/detectzoo/detectors/text/imbd.py index 543bcb1..7595b9e 100644 --- a/detectzoo/detectors/text/imbd.py +++ b/detectzoo/detectors/text/imbd.py @@ -101,8 +101,9 @@ def _load_model(self) -> None: if self.use_peft: from peft import PeftModel - base_model = AutoModelForCausalLM.from_pretrained(self.base_model_name, - device_map=self._device) + base_model = AutoModelForCausalLM.from_pretrained( + self.base_model_name, device_map=self._device + ) self._model = PeftModel.from_pretrained( base_model, diff --git a/detectzoo/detectors/text/ipad.py b/detectzoo/detectors/text/ipad.py index 94b19ee..4097356 100644 --- a/detectzoo/detectors/text/ipad.py +++ b/detectzoo/detectors/text/ipad.py @@ -51,9 +51,7 @@ "generated by LLM with a similar prompt.\n" "Prompt: {prompt}\nText: {text}\nAnswer:" ) -_RC_TEMPLATE = ( - "Can LLM generate the input text {text} through the prompt {prompt}?" -) +_RC_TEMPLATE = "Can LLM generate the input text {text} through the prompt {prompt}?" @register_detector("ipad") @@ -88,8 +86,13 @@ def __init__( max_length: int = 512, **kwargs: Any, ) -> None: - super().__init__(model_name=model_name, threshold=threshold, - device=device, max_length=max_length, **kwargs) + super().__init__( + model_name=model_name, + threshold=threshold, + device=device, + max_length=max_length, + **kwargs, + ) self.use_peft = use_peft self.fusion_weight = fusion_weight self.max_new_tokens = max_new_tokens @@ -121,41 +124,47 @@ def _load_peft_models(self) -> None: from peft import PeftModel base = AutoModelForCausalLM.from_pretrained( - self.model_name, torch_dtype=torch.float16, + self.model_name, + torch_dtype=torch.float16, device_map=self._device, ) base.eval() logger.info("Loading IPAD Prompt Inverter adapter …") self._inverter_model = PeftModel.from_pretrained( - base, _INVERTER_ADAPTER, + base, + _INVERTER_ADAPTER, ) self._inverter_model.eval() logger.info("Loading IPAD PTCV adapter …") base_ptcv = AutoModelForCausalLM.from_pretrained( - self.model_name, torch_dtype=torch.float16, + self.model_name, + torch_dtype=torch.float16, device_map=self._device, ) self._ptcv_model = PeftModel.from_pretrained( - base_ptcv, _PTCV_ADAPTER, + base_ptcv, + _PTCV_ADAPTER, ) self._ptcv_model.eval() logger.info("Loading IPAD RC adapter …") base_rc = AutoModelForCausalLM.from_pretrained( - self.model_name, torch_dtype=torch.float16, + self.model_name, + torch_dtype=torch.float16, device_map=self._device, ) self._rc_model = PeftModel.from_pretrained( - base_rc, _RC_ADAPTER, + base_rc, + _RC_ADAPTER, ) self._rc_model.eval() except Exception as exc: logger.warning( - "Failed to load PEFT adapters (%s); " - "falling back to zero-shot proxy.", exc, + "Failed to load PEFT adapters (%s); falling back to zero-shot proxy.", + exc, ) self.use_peft = False self._load_fallback_model() @@ -184,7 +193,9 @@ def ipad_tokenizer(self): def _softmax_prob(self, model: torch.nn.Module, prompt: str) -> float: """Extract binary 'Yes'/'No' probability from a distinguisher.""" enc = self.ipad_tokenizer( - prompt, return_tensors="pt", truncation=True, + prompt, + return_tensors="pt", + truncation=True, max_length=self.max_length, ).to(self._device) @@ -195,9 +206,7 @@ def _softmax_prob(self, model: torch.nn.Module, prompt: str) -> float: no_id = self.ipad_tokenizer.encode("No", add_special_tokens=False) if yes_id and no_id: - pair_logits = torch.tensor( - [logits[0, no_id[0]].item(), logits[0, yes_id[0]].item()] - ) + pair_logits = torch.tensor([logits[0, no_id[0]].item(), logits[0, yes_id[0]].item()]) probs = torch.softmax(pair_logits, dim=0) return float(probs[1]) @@ -207,15 +216,18 @@ def _softmax_prob(self, model: torch.nn.Module, prompt: str) -> float: def _invert_prompt_peft(self, text: str) -> str: prompt = _INVERTER_TEMPLATE.format(text=text[:1024]) enc = self.ipad_tokenizer( - prompt, return_tensors="pt", truncation=True, + prompt, + return_tensors="pt", + truncation=True, max_length=self.max_length, ).to(self._device) out = self._inverter_model.generate( # type: ignore[union-attr] - **enc, max_new_tokens=self.max_new_tokens, + **enc, + max_new_tokens=self.max_new_tokens, do_sample=False, ) - generated = out[0][enc["input_ids"].shape[1]:] + generated = out[0][enc["input_ids"].shape[1] :] return self.ipad_tokenizer.decode(generated, skip_special_tokens=True).strip() def _predict_peft(self, text: str) -> DetectionResult: @@ -243,18 +255,22 @@ def _predict_peft(self, text: str) -> DetectionResult: @torch.no_grad() def _invert_prompt_simple(self, text: str) -> str: words = text.split() - return " ".join(words[:min(30, len(words))]) + return " ".join(words[: min(30, len(words))]) @torch.no_grad() def _conditional_logprob(self, prompt: str, text: str) -> float: combined = prompt + " " + text enc = self.ipad_tokenizer( - combined, return_tensors="pt", truncation=True, + combined, + return_tensors="pt", + truncation=True, max_length=self.max_length, ).to(self._device) prompt_enc = self.ipad_tokenizer( - prompt, return_tensors="pt", truncation=True, + prompt, + return_tensors="pt", + truncation=True, max_length=self.max_length, ) prompt_len = prompt_enc["input_ids"].shape[1] @@ -265,7 +281,7 @@ def _conditional_logprob(self, prompt: str, text: str) -> float: token_lp = log_probs.gather(2, ids.unsqueeze(-1)).squeeze(-1).squeeze(0) if prompt_len < len(token_lp): - text_lp = token_lp[prompt_len - 1:] + text_lp = token_lp[prompt_len - 1 :] return float(text_lp.mean()) return float(token_lp.mean()) diff --git a/detectzoo/detectors/text/irm.py b/detectzoo/detectors/text/irm.py index 4a1cab9..7953ba2 100644 --- a/detectzoo/detectors/text/irm.py +++ b/detectzoo/detectors/text/irm.py @@ -76,15 +76,15 @@ def _load_model(self) -> None: if self._shared_tokenizer.pad_token is None: self._shared_tokenizer.pad_token = self._shared_tokenizer.eos_token - self._instruct_model = AutoModelForCausalLM.from_pretrained( - self.instruct_model_name - ).to(self._device) + self._instruct_model = AutoModelForCausalLM.from_pretrained(self.instruct_model_name).to( + self._device + ) self._instruct_model.eval() logger.info("Loading IRM base model '%s' …", self.base_model_name) - self._base_model_obj = AutoModelForCausalLM.from_pretrained( - self.base_model_name - ).to(self._device) + self._base_model_obj = AutoModelForCausalLM.from_pretrained(self.base_model_name).to( + self._device + ) self._base_model_obj.eval() self._model = self._instruct_model @@ -122,12 +122,8 @@ def predict(self, input_data: Any) -> DetectionResult: instruct_lp = F.log_softmax(instruct_logits, dim=-1) base_lp = F.log_softmax(base_logits, dim=-1) - instruct_token_lp = instruct_lp.gather( - 2, shift_labels.unsqueeze(-1) - ).squeeze(-1) - base_token_lp = base_lp.gather( - 2, shift_labels.unsqueeze(-1) - ).squeeze(-1) + instruct_token_lp = instruct_lp.gather(2, shift_labels.unsqueeze(-1)).squeeze(-1) + base_token_lp = base_lp.gather(2, shift_labels.unsqueeze(-1)).squeeze(-1) # IRM score: sum of per-token log-prob differences score = float((instruct_token_lp - base_token_lp).sum()) diff --git a/detectzoo/detectors/text/lastde.py b/detectzoo/detectors/text/lastde.py index 9d33b0a..d968963 100644 --- a/detectzoo/detectors/text/lastde.py +++ b/detectzoo/detectors/text/lastde.py @@ -221,7 +221,7 @@ def _lastde_score(self, log_likelihood: torch.Tensor, n_bins: int) -> torch.Tens mean_lp = log_likelihood.mean(dim=1) # [1, S] mde = _compute_mde(log_likelihood, self.embed_size, n_bins, self.tau_prime) mde = mde.clamp(min=1e-10) - return (mean_lp.squeeze(0) / mde) # [S] + return mean_lp.squeeze(0) / mde # [S] @torch.no_grad() def predict(self, input_data: Any) -> DetectionResult: diff --git a/detectzoo/detectors/text/log_likelihood.py b/detectzoo/detectors/text/log_likelihood.py index b861d1b..ffa0fc7 100644 --- a/detectzoo/detectors/text/log_likelihood.py +++ b/detectzoo/detectors/text/log_likelihood.py @@ -6,7 +6,7 @@ Scores text by average token log-probability under a causal LM. Machine-generated text tends to have *higher* average log-prob (lower -perplexity) than human text, so we use the mean log-probability: +perplexity) than human text, so we use the mean log-probability: higher score → more likely AI. """ diff --git a/detectzoo/detectors/text/ood_detectors.py b/detectzoo/detectors/text/ood_detectors.py index e79e4da..14a36db 100644 --- a/detectzoo/detectors/text/ood_detectors.py +++ b/detectzoo/detectors/text/ood_detectors.py @@ -94,9 +94,14 @@ def _contrastive_loss( Positive pairs share the same label; negatives differ. """ - sim = F.cosine_similarity( - embeddings.unsqueeze(1), embeddings.unsqueeze(0), dim=-1, - ) / temperature + sim = ( + F.cosine_similarity( + embeddings.unsqueeze(1), + embeddings.unsqueeze(0), + dim=-1, + ) + / temperature + ) batch = embeddings.size(0) mask_pos = labels.unsqueeze(1) == labels.unsqueeze(0) mask_self = torch.eye(batch, dtype=torch.bool, device=embeddings.device) @@ -181,20 +186,24 @@ def _load_detective_weights(self, checkpoint: str) -> None: repo_id = _DETECTIVE_CHECKPOINTS[checkpoint] try: from huggingface_hub import hf_hub_download + ckpt_path = hf_hub_download( repo_id=repo_id, filename=checkpoint, ) except Exception as exc: logger.warning( - "Could not download DeTeCtive checkpoint '%s' " - "from %s: %s", checkpoint, repo_id, exc, + "Could not download DeTeCtive checkpoint '%s' from %s: %s", + checkpoint, + repo_id, + exc, ) return else: logger.warning( "DeTeCtive checkpoint '%s' not found locally and is not " - "a known official checkpoint. Skipping.", checkpoint, + "a known official checkpoint. Skipping.", + checkpoint, ) return @@ -204,9 +213,9 @@ def _load_detective_weights(self, checkpoint: str) -> None: enc_state: dict[str, Any] = {} for key, val in state_dict.items(): if key.startswith("model.model."): - enc_state[key[len("model.model."):]] = val + enc_state[key[len("model.model.") :]] = val elif key.startswith("model."): - enc_state[key[len("model."):]] = val + enc_state[key[len("model.") :]] = val if enc_state: missing, unexpected = self.enc_model.load_state_dict(enc_state, strict=False) @@ -289,8 +298,7 @@ def __init__( device: str = "cpu", **kwargs: Any, ) -> None: - super().__init__(encoder_model=encoder_model, threshold=threshold, - device=device, **kwargs) + super().__init__(encoder_model=encoder_model, threshold=threshold, device=device, **kwargs) self._center: torch.Tensor | None = None if center is not None: self._center = torch.tensor(center, dtype=torch.float32) @@ -371,7 +379,7 @@ def fit( total_loss = 0.0 n_batches = 0 for start in range(0, len(all_texts), batch_size): - idx = perm[start:start + batch_size] + idx = perm[start : start + batch_size] batch_texts = [all_texts[i] for i in idx] batch_labels = torch.tensor([all_labels[i] for i in idx], device=self._device) @@ -396,22 +404,29 @@ def fit( total_loss += float(loss) n_batches += 1 - logger.info("Epoch %d/%d — loss %.4f", epoch + 1, epochs, - total_loss / max(n_batches, 1)) + logger.info( + "Epoch %d/%d — loss %.4f", epoch + 1, epochs, total_loss / max(n_batches, 1) + ) self.enc_model.eval() if save_path is not None: - torch.save({ - "encoder": self.enc_model.state_dict(), - "center": self._center, - }, save_path) + torch.save( + { + "encoder": self.enc_model.state_dict(), + "center": self._center, + }, + save_path, + ) logger.info("Saved D-SVDD checkpoint to '%s'", save_path) def _embed_no_grad_off(self, text: str) -> torch.Tensor: """Embed with gradient tracking (for training).""" enc = self.enc_tokenizer( - text, return_tensors="pt", truncation=True, - max_length=self.max_length, padding="max_length", + text, + return_tensors="pt", + truncation=True, + max_length=self.max_length, + padding="max_length", ).to(self._device) out = self.enc_model(**enc) mask = enc["attention_mask"].unsqueeze(-1).float() @@ -465,8 +480,7 @@ def __init__( device: str = "cpu", **kwargs: Any, ) -> None: - super().__init__(encoder_model=encoder_model, threshold=threshold, - device=device, **kwargs) + super().__init__(encoder_model=encoder_model, threshold=threshold, device=device, **kwargs) self.gp_lambda = gp_lambda self.gp_power = gp_power self._classifiers: nn.ModuleList | None = None @@ -483,10 +497,12 @@ def _get_embed_dim(self) -> int: return getattr(cfg, "hidden_size", 768) def _init_classifiers(self, n: int, embed_dim: int) -> None: - self._classifiers = nn.ModuleList([ - _ClassificationHead(embed_dim, 1, activation="relu").to(self._device) - for _ in range(n) - ]) + self._classifiers = nn.ModuleList( + [ + _ClassificationHead(embed_dim, 1, activation="relu").to(self._device) + for _ in range(n) + ] + ) def _load_checkpoint(self, path: str) -> None: state = torch.load(path, map_location=self._device, weights_only=False) @@ -560,8 +576,9 @@ def fit( self.enc_model.eval() for i, model_name in enumerate(model_names): - logger.info("Training HRN classifier %d/%d for '%s' …", - i + 1, len(model_names), model_name) + logger.info( + "Training HRN classifier %d/%d for '%s' …", i + 1, len(model_names), model_name + ) clf = self._classifiers[i] optimizer = torch.optim.Adam(clf.parameters(), lr=lr, betas=(0.9, 0.98)) @@ -575,10 +592,9 @@ def fit( total_loss = 0.0 n_batches = 0 for start in range(0, len(all_texts), batch_size): - idx = perm[start:start + batch_size] + idx = perm[start : start + batch_size] batch_texts = [all_texts[j] for j in idx] - batch_labels = torch.tensor([all_labels[j] for j in idx], - device=self._device) + batch_labels = torch.tensor([all_labels[j] for j in idx], device=self._device) with torch.no_grad(): embs = self._embed_batch(batch_texts) @@ -606,28 +622,40 @@ def fit( total_loss += float(loss) n_batches += 1 - logger.info(" [%s] epoch %d/%d — loss %.4f", model_name, - epoch + 1, epochs, total_loss / max(n_batches, 1)) + logger.info( + " [%s] epoch %d/%d — loss %.4f", + model_name, + epoch + 1, + epochs, + total_loss / max(n_batches, 1), + ) if not freeze_encoder: self.enc_model.eval() if save_path is not None: - torch.save({ - "encoder": self.enc_model.state_dict(), - "classifiers": [c.state_dict() for c in self._classifiers], - }, save_path) + torch.save( + { + "encoder": self.enc_model.state_dict(), + "classifiers": [c.state_dict() for c in self._classifiers], + }, + save_path, + ) logger.info("Saved HRN checkpoint to '%s'", save_path) def _gradient_penalty( - self, clf: _ClassificationHead, real: torch.Tensor, + self, + clf: _ClassificationHead, + real: torch.Tensor, ) -> torch.Tensor: """WGAN-GP style gradient penalty with ``p=gp_power``.""" eps = torch.rand(real.size(0), 1, device=real.device) interp = (eps * real + (1 - eps) * real).detach().requires_grad_(True) out = clf(interp) grad = torch.autograd.grad( - outputs=out.sum(), inputs=interp, - create_graph=True, retain_graph=True, + outputs=out.sum(), + inputs=interp, + create_graph=True, + retain_graph=True, )[0] penalty = ((grad.norm(2, dim=1) - 1) ** self.gp_power).mean() return penalty @@ -683,8 +711,7 @@ def __init__( device: str = "cpu", **kwargs: Any, ) -> None: - super().__init__(encoder_model=encoder_model, threshold=threshold, - device=device, **kwargs) + super().__init__(encoder_model=encoder_model, threshold=threshold, device=device, **kwargs) self.m_in = m_in self.m_out = m_out self._classifier: _ClassificationHead | None = None @@ -695,7 +722,9 @@ def __init__( elif n_classes > 0: embed_dim = self._get_embed_dim() self._classifier = _ClassificationHead( - embed_dim, n_classes, activation="tanh", + embed_dim, + n_classes, + activation="tanh", ).to(self._device) def _get_embed_dim(self) -> int: @@ -711,11 +740,12 @@ def _load_checkpoint(self, path: str) -> None: in_dim = sd["net.0.weight"].shape[1] out_dim = sd["net.4.weight"].shape[0] self._classifier = _ClassificationHead( - in_dim, out_dim, activation="tanh", + in_dim, + out_dim, + activation="tanh", ).to(self._device) self._classifier.load_state_dict(sd) - logger.info("Loaded Energy classifier (%d classes) from checkpoint", - out_dim) + logger.info("Loaded Energy classifier (%d classes) from checkpoint", out_dim) @torch.no_grad() def predict(self, input_data: Any) -> DetectionResult: @@ -769,7 +799,9 @@ def fit( embed_dim = self._get_embed_dim() self._classifier = _ClassificationHead( - embed_dim, n_classes, activation="tanh", + embed_dim, + n_classes, + activation="tanh", ).to(self._device) self.enc_model.train() @@ -796,12 +828,10 @@ def fit( total_loss = 0.0 n_batches = 0 for start in range(0, len(all_texts), batch_size): - idx = perm[start:start + batch_size] + idx = perm[start : start + batch_size] batch_texts = [all_texts[i] for i in idx] - batch_binary = torch.tensor([all_binary[i] for i in idx], - device=self._device) - batch_class = torch.tensor([all_class[i] for i in idx], - device=self._device) + batch_binary = torch.tensor([all_binary[i] for i in idx], device=self._device) + batch_class = torch.tensor([all_class[i] for i in idx], device=self._device) embs = torch.stack([self._embed_no_grad_off(t) for t in batch_texts]) @@ -820,19 +850,20 @@ def fit( energy_all = -torch.logsumexp(logits_all, dim=-1) loss_energy = torch.tensor(0.0, device=self._device) if machine_mask.any(): - loss_energy = loss_energy + F.relu( - energy_all[machine_mask] - self.m_in - ).pow(2).mean() + loss_energy = ( + loss_energy + F.relu(energy_all[machine_mask] - self.m_in).pow(2).mean() + ) if human_mask.any(): - loss_energy = loss_energy + F.relu( - self.m_out - energy_all[human_mask] - ).pow(2).mean() + loss_energy = ( + loss_energy + F.relu(self.m_out - energy_all[human_mask]).pow(2).mean() + ) # 3) Contrastive loss loss_contrastive = _contrastive_loss(embs, batch_binary) - loss = (alpha * loss_contrastive - + beta * (loss_classify + energy_weight * loss_energy)) + loss = alpha * loss_contrastive + beta * ( + loss_classify + energy_weight * loss_energy + ) optimizer.zero_grad() loss.backward() @@ -840,23 +871,30 @@ def fit( total_loss += float(loss) n_batches += 1 - logger.info("Epoch %d/%d — loss %.4f", epoch + 1, epochs, - total_loss / max(n_batches, 1)) + logger.info( + "Epoch %d/%d — loss %.4f", epoch + 1, epochs, total_loss / max(n_batches, 1) + ) self.enc_model.eval() self._classifier.eval() if save_path is not None: - torch.save({ - "encoder": self.enc_model.state_dict(), - "classifier": self._classifier.state_dict(), - }, save_path) + torch.save( + { + "encoder": self.enc_model.state_dict(), + "classifier": self._classifier.state_dict(), + }, + save_path, + ) logger.info("Saved Energy checkpoint to '%s'", save_path) def _embed_no_grad_off(self, text: str) -> torch.Tensor: """Embed with gradient tracking (for training).""" enc = self.enc_tokenizer( - text, return_tensors="pt", truncation=True, - max_length=self.max_length, padding="max_length", + text, + return_tensors="pt", + truncation=True, + max_length=self.max_length, + padding="max_length", ).to(self._device) out = self.enc_model(**enc) mask = enc["attention_mask"].unsqueeze(-1).float() diff --git a/detectzoo/detectors/text/radar.py b/detectzoo/detectors/text/radar.py index 5f204eb..8c93fe8 100644 --- a/detectzoo/detectors/text/radar.py +++ b/detectzoo/detectors/text/radar.py @@ -66,7 +66,8 @@ def _load_model(self) -> None: logger.info("Loading RADAR model '%s' …", self.model_name) self._cls_tokenizer = AutoTokenizer.from_pretrained(self.model_name) self._cls_model = AutoModelForSequenceClassification.from_pretrained( - self.model_name, num_labels=2, + self.model_name, + num_labels=2, ).to(self._device) self._cls_model.eval() diff --git a/detectzoo/detectors/text/raidar.py b/detectzoo/detectors/text/raidar.py index fc6c6aa..32d49ea 100644 --- a/detectzoo/detectors/text/raidar.py +++ b/detectzoo/detectors/text/raidar.py @@ -56,11 +56,12 @@ def _levenshtein_ratio(s1: str, s2: str) -> float: def _ngram_overlap(text1: str, text2: str, n: int) -> float: """Normalised n-gram overlap between two texts.""" + def _ngrams(text: str, n: int) -> dict[tuple[str, ...], int]: tokens = text.lower().split() ng: dict[tuple[str, ...], int] = {} for i in range(len(tokens) - n + 1): - key = tuple(tokens[i:i + n]) + key = tuple(tokens[i : i + n]) ng[key] = ng.get(key, 0) + 1 return ng @@ -115,9 +116,9 @@ def _load_model(self) -> None: logger.info("Loading Raidar rewrite model '%s' …", self.rewrite_model_name) self._rw_tokenizer = AutoTokenizer.from_pretrained(self.rewrite_model_name) - self._rw_model = AutoModelForSeq2SeqLM.from_pretrained( - self.rewrite_model_name - ).to(self._device) + self._rw_model = AutoModelForSeq2SeqLM.from_pretrained(self.rewrite_model_name).to( + self._device + ) self._rw_model.eval() @property @@ -135,12 +136,16 @@ def rw_tokenizer(self): @torch.no_grad() def _rewrite(self, text: str) -> str: enc = self.rw_tokenizer( - text, return_tensors="pt", truncation=True, + text, + return_tensors="pt", + truncation=True, max_length=self.max_length, ).to(self._device) out = self.rw_model.generate( - **enc, max_new_tokens=self.max_length, - num_beams=4, length_penalty=1.0, + **enc, + max_new_tokens=self.max_length, + num_beams=4, + length_penalty=1.0, ) return self.rw_tokenizer.decode(out[0], skip_special_tokens=True) diff --git a/detectzoo/detectors/text/remodetect.py b/detectzoo/detectors/text/remodetect.py index 2c8356d..b2af7c7 100644 --- a/detectzoo/detectors/text/remodetect.py +++ b/detectzoo/detectors/text/remodetect.py @@ -64,9 +64,8 @@ def _load_model(self) -> None: logger.info("Loading ReMoDetect reward model '%s' …", self.model_name) self._rm_tokenizer = AutoTokenizer.from_pretrained(self.model_name) - self._rm_model = ( - AutoModelForSequenceClassification.from_pretrained(self.model_name) - .to(self._device) + self._rm_model = AutoModelForSequenceClassification.from_pretrained(self.model_name).to( + self._device ) self._rm_model.eval() diff --git a/detectzoo/detectors/text/revise_detect.py b/detectzoo/detectors/text/revise_detect.py index b4bc548..878b8cd 100644 --- a/detectzoo/detectors/text/revise_detect.py +++ b/detectzoo/detectors/text/revise_detect.py @@ -76,9 +76,9 @@ def _load_model(self) -> None: logger.info("Loading revision model '%s' …", self.revision_model_name) self._seq2seq_tokenizer = AutoTokenizer.from_pretrained(self.revision_model_name) - self._seq2seq_model = AutoModelForSeq2SeqLM.from_pretrained( - self.revision_model_name - ).to(self._device) + self._seq2seq_model = AutoModelForSeq2SeqLM.from_pretrained(self.revision_model_name).to( + self._device + ) self._seq2seq_model.eval() @property @@ -101,10 +101,16 @@ def seq2seq_tokenizer(self): def _revise(self, text: str) -> str: """Generate a revised version of *text* using the seq2seq model.""" enc = self.seq2seq_tokenizer( - text, return_tensors="pt", truncation=True, max_length=self.max_length, + text, + return_tensors="pt", + truncation=True, + max_length=self.max_length, ).to(self._device) out = self.seq2seq_model.generate( - **enc, max_new_tokens=self.max_length, num_beams=4, length_penalty=1.0, + **enc, + max_new_tokens=self.max_length, + num_beams=4, + length_penalty=1.0, ) return self.seq2seq_tokenizer.decode(out[0], skip_special_tokens=True) @@ -116,10 +122,16 @@ def _revise(self, text: str) -> str: def _bart_score(self, source: str, target: str) -> float: """Compute average token log-prob of *target* conditioned on *source*.""" src_enc = self.seq2seq_tokenizer( - source, return_tensors="pt", truncation=True, max_length=self.max_length, + source, + return_tensors="pt", + truncation=True, + max_length=self.max_length, ).to(self._device) tgt_enc = self.seq2seq_tokenizer( - target, return_tensors="pt", truncation=True, max_length=self.max_length, + target, + return_tensors="pt", + truncation=True, + max_length=self.max_length, ) labels = tgt_enc["input_ids"].to(self._device) diff --git a/detectzoo/detectors/text/text_fluoroscopy.py b/detectzoo/detectors/text/text_fluoroscopy.py index afa77d4..b41766c 100644 --- a/detectzoo/detectors/text/text_fluoroscopy.py +++ b/detectzoo/detectors/text/text_fluoroscopy.py @@ -68,9 +68,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: # ------------------------------------------------------------------ -def _last_token_pool( - hidden_states: torch.Tensor, attention_mask: torch.Tensor -) -> torch.Tensor: +def _last_token_pool(hidden_states: torch.Tensor, attention_mask: torch.Tensor) -> torch.Tensor: """Pool the last non-padding token from *hidden_states*.""" left_padding = attention_mask[:, -1].sum() == attention_mask.shape[0] if left_padding: @@ -155,7 +153,8 @@ def _load_model(self) -> None: logger.info("Loading Text Fluoroscopy encoder '%s' …", self.model_name) self._tokenizer = AutoTokenizer.from_pretrained( - self.model_name, trust_remote_code=True, + self.model_name, + trust_remote_code=True, ) if self._tokenizer.pad_token is None: self._tokenizer.pad_token = self._tokenizer.eos_token @@ -170,7 +169,8 @@ def _load_model(self) -> None: load_kwargs["low_cpu_mem_usage"] = True self._model = AutoModelForCausalLM.from_pretrained( - self.model_name, **load_kwargs, + self.model_name, + **load_kwargs, ) if self._device.type != "cuda": self._model.to(self._device) @@ -198,7 +198,8 @@ def _ensure_classifier(self, input_dim: int) -> _BinaryClassifier: @torch.no_grad() def _forward_hidden( - self, text: str, + self, + text: str, ) -> tuple[tuple[torch.Tensor, ...], torch.Tensor]: """Tokenise *text*, run the encoder, and return (hidden_states, attention_mask). @@ -219,7 +220,8 @@ def _forward_hidden( @torch.no_grad() def _compute_kl_divergences( - self, hidden_states: tuple[torch.Tensor, ...], + self, + hidden_states: tuple[torch.Tensor, ...], ) -> list[float]: """KL(middle‖first) + KL(middle‖last) for each middle layer. @@ -232,10 +234,12 @@ def _compute_kl_divergences( vocab_head = _get_vocab_head(self.model) first_probs = F.softmax( - vocab_head(hidden_states[0]).squeeze(0).float(), dim=-1, + vocab_head(hidden_states[0]).squeeze(0).float(), + dim=-1, ) last_probs = F.softmax( - vocab_head(hidden_states[-1]).squeeze(0).float(), dim=-1, + vocab_head(hidden_states[-1]).squeeze(0).float(), + dim=-1, ) kls: list[float] = [] @@ -254,7 +258,8 @@ def _compute_kl_divergences( return kls def _select_layer( - self, hidden_states: tuple[torch.Tensor, ...], + self, + hidden_states: tuple[torch.Tensor, ...], ) -> tuple[int, list[float]]: """Return ``(layer_index, kl_values)``. @@ -270,7 +275,8 @@ def _select_layer( @torch.no_grad() def _extract_features( - self, text: str, + self, + text: str, ) -> tuple[torch.Tensor, dict[str, Any]]: """Extract the intrinsic-layer embedding and metadata for *text*.""" hidden_states, attention_mask = self._forward_hidden(text) diff --git a/detectzoo/detectors/text/tocsin.py b/detectzoo/detectors/text/tocsin.py index 494effc..31edd5e 100644 --- a/detectzoo/detectors/text/tocsin.py +++ b/detectzoo/detectors/text/tocsin.py @@ -65,8 +65,7 @@ def __init__( device: str = "cpu", **kwargs: Any, ) -> None: - super().__init__(model_name=model_name, threshold=threshold, - device=device, **kwargs) + super().__init__(model_name=model_name, threshold=threshold, device=device, **kwargs) self.bart_model_name = bart_model self.n_copies = n_copies self.deletion_rate = deletion_rate @@ -82,9 +81,9 @@ def _load_bart(self) -> None: logger.info("Loading BART model '%s' for BARTScore …", self.bart_model_name) self._bart_tokenizer = AutoTokenizer.from_pretrained(self.bart_model_name) - self._bart_model = AutoModelForSeq2SeqLM.from_pretrained( - self.bart_model_name - ).to(self._device) + self._bart_model = AutoModelForSeq2SeqLM.from_pretrained(self.bart_model_name).to( + self._device + ) self._bart_model.eval() @property @@ -115,10 +114,16 @@ def _random_delete(self, text: str) -> str: def _bart_score(self, source: str, target: str) -> float: """BARTScore: avg token log-prob of *target* given *source*.""" src_enc = self.bart_tokenizer( - source, return_tensors="pt", truncation=True, max_length=1024, + source, + return_tensors="pt", + truncation=True, + max_length=1024, ).to(self._device) tgt_enc = self.bart_tokenizer( - target, return_tensors="pt", truncation=True, max_length=1024, + target, + return_tensors="pt", + truncation=True, + max_length=1024, ) labels = tgt_enc["input_ids"].to(self._device) diff --git a/detectzoo/utils/hf_quiet.py b/detectzoo/utils/hf_quiet.py index c074d25..c70a35b 100644 --- a/detectzoo/utils/hf_quiet.py +++ b/detectzoo/utils/hf_quiet.py @@ -49,3 +49,6 @@ def configure_hf_quiet() -> None: logging.getLogger(name).setLevel(logging.ERROR) _sync_huggingface_hub_verbosity() + + +configure_hf_quiet() diff --git a/examples/audio/_smoke_test_audio_replicate.py b/examples/audio/_smoke_test_audio_replicate.py index a614fdd..c4c4feb 100644 --- a/examples/audio/_smoke_test_audio_replicate.py +++ b/examples/audio/_smoke_test_audio_replicate.py @@ -1,4 +1,5 @@ """Smoke test reproducibility/audio/audio_replicate.py (run manually, not part of CI).""" + from __future__ import annotations import csv @@ -11,12 +12,11 @@ from pathlib import Path from unittest.mock import patch -REPO = Path(__file__).resolve().parents[2] -sys.path.insert(0, str(REPO)) - from detectzoo import load_dataset from detectzoo.core.base import BaseDetector, DetectionResult +REPO = Path(__file__).resolve().parents[2] + SCRIPT = REPO / "reproducibility" / "audio" / "audio_replicate.py" spec = importlib.util.spec_from_file_location("audio_replicate", SCRIPT) mod = importlib.util.module_from_spec(spec) @@ -132,7 +132,11 @@ def main() -> int: items = ds.load() assert_balanced(items, cap) n0 = sum(i.label == 0 for i in items) - print(f"[OK] load_dataset {name} max_samples={cap} -> {len(items)} ({n0}+{len(items)-n0})") + n1 = len(items) - n0 + print( + f"[OK] load_dataset {name} max_samples={cap} -> {len(items)} " + f"({n0}+{n1})" + ) except Exception as exc: errors.append(f"load_dataset {name} n={cap}: {exc}") @@ -192,7 +196,9 @@ class Args: ] if ds_name == "deepfake_eval_2024": argv.extend(["--split", "test"]) - with patch.object(mod, "load_detector", side_effect=lambda name, **kw: MockAudioDetector(**kw)): + with patch.object( + mod, "load_detector", side_effect=lambda name, **kw: MockAudioDetector(**kw) + ): with patch.object(sys, "argv", argv): mod.main() diff --git a/examples/audio/audio_replicate.py b/examples/audio/audio_replicate.py index c167f3b..322be9d 100644 --- a/examples/audio/audio_replicate.py +++ b/examples/audio/audio_replicate.py @@ -18,16 +18,11 @@ import argparse import random -import sys import traceback from datetime import datetime from pathlib import Path from typing import Any, List -REPO_ROOT = Path(__file__).resolve().parents[2] -if str(REPO_ROOT) not in sys.path: - sys.path.insert(0, str(REPO_ROOT)) - from detectzoo import load_dataset, load_detector from detectzoo.benchmarks import BenchmarkEvaluator @@ -220,9 +215,7 @@ def main() -> None: n_ai = sum(1 for it in items if it.label == 1) print(f" loaded {n} items (bonafide={n_h}, spoof={n_ai})") if n_h == 0 or n_ai == 0: - print( - " [WARN] Single-class eval set — EER / ROC-AUC will be undefined (NaN)." - ) + print(" [WARN] Single-class eval set — EER / ROC-AUC will be undefined (NaN).") print(f"\nLoading {len(args.detectors)} detector(s) on {args.device} …") detectors: List[Any] = [] diff --git a/examples/custom_detector.py b/examples/custom_detector.py index 9ef1ba6..8c0d9a0 100644 --- a/examples/custom_detector.py +++ b/examples/custom_detector.py @@ -59,7 +59,7 @@ def main() -> None: for label, text in samples: result = detector.predict(text) - print(f" [{label}] \"{text[:60]}…\"") + print(f' [{label}] "{text[:60]}…"') print( f" score={result.score:.2f} label={result.label} " f"avg_word_length={result.metadata['avg_word_length']:.2f}\n" diff --git a/examples/image/image_replicate.py b/examples/image/image_replicate.py index 7cf8043..b68cae5 100644 --- a/examples/image/image_replicate.py +++ b/examples/image/image_replicate.py @@ -8,13 +8,12 @@ """ import argparse -import sys from pathlib import Path -REPO_ROOT = Path(__file__).resolve().parents[2] -if str(REPO_ROOT) not in sys.path: - sys.path.insert(0, str(REPO_ROOT)) +import torch +from detectzoo import load_dataset, load_detector +from detectzoo.benchmarks.evaluator import BenchmarkEvaluator DATASETS_DICT = { "self_synthesis": { @@ -66,10 +65,6 @@ def main(): args = parse_args() dataset_info = DATASETS_DICT[args.dataset] - import torch - from detectzoo import load_dataset, load_detector - from detectzoo.benchmarks.evaluator import BenchmarkEvaluator - device = "cpu" if args.cpu else ("cuda" if torch.cuda.is_available() else "cpu") dataset_kwargs = dict(dataset_info["dataset_kwargs"]) diff --git a/examples/text/gecscore_replicate.py b/examples/text/gecscore_replicate.py index 387fc17..7b4789f 100644 --- a/examples/text/gecscore_replicate.py +++ b/examples/text/gecscore_replicate.py @@ -64,8 +64,7 @@ GECSCORE_OWNER_REPO = "NLP2CT/GECScore" GECSCORE_API_BASE = f"https://api.github.com/repos/{GECSCORE_OWNER_REPO}/contents/data/normal_data" GECSCORE_RAW_BASE = ( - f"https://raw.githubusercontent.com/{GECSCORE_OWNER_REPO}/" - "refs/heads/main/data/normal_data" + f"https://raw.githubusercontent.com/{GECSCORE_OWNER_REPO}/refs/heads/main/data/normal_data" ) NORMAL_DATA_SUFFIX = ".normal.test_data.json" @@ -111,17 +110,13 @@ def slug(self) -> str: def _http_get_json(url: str) -> Any: - req = urllib.request.Request( - url, headers={"User-Agent": "DetectZoo-GECScore-replicate"} - ) + req = urllib.request.Request(url, headers={"User-Agent": "DetectZoo-GECScore-replicate"}) with urllib.request.urlopen(req, timeout=60) as resp: return json.load(resp) def _http_get_bytes(url: str) -> bytes: - req = urllib.request.Request( - url, headers={"User-Agent": "DetectZoo-GECScore-replicate"} - ) + req = urllib.request.Request(url, headers={"User-Agent": "DetectZoo-GECScore-replicate"}) with urllib.request.urlopen(req, timeout=120) as resp: return resp.read() @@ -199,9 +194,7 @@ def _label_to_binary(label: Any) -> int: return 0 if s in ("llm", "machine", "ai", "fake", "1"): return 1 - raise ValueError( - f"Unsupported label: {label!r} (expected 'human' or 'llm')" - ) + raise ValueError(f"Unsupported label: {label!r} (expected 'human' or 'llm')") class GECScoreJsonDataset(BaseDataset): @@ -258,17 +251,62 @@ def _safe_slug(s: str) -> str: def parse_args() -> argparse.Namespace: - p = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) - p.add_argument("--sources", nargs="+", default=None, help="Only these corpora (e.g. xsum writing). Default: all files on GitHub.") - p.add_argument("--models", nargs="+", default=None, help="Only these generator names as in the filename (e.g. GPT-4o gpt3.5). Default: all files on GitHub.") - p.add_argument("--data-url", type=str, default=None, help="If set, evaluate this single URL only and skip API discovery. Cache path defaults to data/gecscore/normal_data/ derived from the URL name.") - p.add_argument("--cache-path", type=Path, default=None, help="With --data-url, where to save the file (default: under data/gecscore/normal_data/).") - p.add_argument("--max-samples", type=int, default=None, help="Cap samples per file for quick debug runs.") + p = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + p.add_argument( + "--sources", + nargs="+", + default=None, + help="Only these corpora (e.g. xsum writing). Default: all files on GitHub.", + ) + p.add_argument( + "--models", + nargs="+", + default=None, + help=( + "Only these generator names as in the filename (e.g. GPT-4o gpt3.5). " + "Default: all files on GitHub." + ), + ) + p.add_argument( + "--data-url", + type=str, + default=None, + help=( + "If set, evaluate this single URL only and skip API discovery. " + "Cache path defaults to data/gecscore/normal_data/ from the URL name." + ), + ) + p.add_argument( + "--cache-path", + type=Path, + default=None, + help="With --data-url, where to save the file (default: under data/gecscore/normal_data/).", + ) + p.add_argument( + "--max-samples", type=int, default=None, help="Cap samples per file for quick debug runs." + ) p.add_argument("--device", type=str, default="cuda", help="Device for detectors.") - p.add_argument("--detectors", nargs="+", default=DEFAULT_DETECTOR_NAMES, help="Detector registry names.") - p.add_argument("--output-dir", type=Path, default=Path("experiments"), help="Directory for per-file benchmark JSONs.") - p.add_argument("--save-scores", action="store_true", help="Store per-sample scores in each output JSON (like imbd).") - p.add_argument("--list-only", action="store_true", help="List discovered files and exit (no download or evaluation).") + p.add_argument( + "--detectors", nargs="+", default=DEFAULT_DETECTOR_NAMES, help="Detector registry names." + ) + p.add_argument( + "--output-dir", + type=Path, + default=Path("experiments"), + help="Directory for per-file benchmark JSONs.", + ) + p.add_argument( + "--save-scores", + action="store_true", + help="Store per-sample scores in each output JSON (like imbd).", + ) + p.add_argument( + "--list-only", + action="store_true", + help="List discovered files and exit (no download or evaluation).", + ) return p.parse_args() @@ -377,7 +415,9 @@ def main() -> None: out_path = args.output_dir / f"gecscore__{out_slug}__{ts}.json" evaluator = BenchmarkEvaluator(dataset) try: - evaluator.run_and_save(detectors, out_path, save_scores=args.save_scores, meta=meta, incremental=True) + evaluator.run_and_save( + detectors, out_path, save_scores=args.save_scores, meta=meta, incremental=True + ) print(f" results -> {out_path}") except Exception: print(f" [ERROR] evaluation failed for {f.slug}") diff --git a/examples/text/imbd_replicate.py b/examples/text/imbd_replicate.py index 33c2b99..591005b 100644 --- a/examples/text/imbd_replicate.py +++ b/examples/text/imbd_replicate.py @@ -175,17 +175,21 @@ def _load_all(self) -> List[DatasetItem]: items: List[DatasetItem] = [] for idx, (human, ai) in enumerate(zip(originals, rewrittens)): if isinstance(human, str) and human.strip(): - items.append(DatasetItem( - data=human, - label=0, - metadata={"source": "human", "index": idx, "file": str(self.path)}, - )) + items.append( + DatasetItem( + data=human, + label=0, + metadata={"source": "human", "index": idx, "file": str(self.path)}, + ) + ) if isinstance(ai, str) and ai.strip(): - items.append(DatasetItem( - data=ai, - label=1, - metadata={"source": "ai", "index": idx, "file": str(self.path)}, - )) + items.append( + DatasetItem( + data=ai, + label=1, + metadata={"source": "ai", "index": idx, "file": str(self.path)}, + ) + ) return items @@ -195,15 +199,49 @@ def _load_all(self) -> List[DatasetItem]: def parse_args() -> argparse.Namespace: - p = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) - p.add_argument("--tasks", nargs="+", default=TASKS, choices=TASKS, help="ImBD task folders to evaluate (default: all).") - p.add_argument("--models", nargs="+", default=MODELS, help="Generator model folders to evaluate (default: all).") - p.add_argument("--sources", nargs="+", default=None, help="Restrict to these source corpora (e.g. xsum writing pubmed squad).") - p.add_argument("--detectors", nargs="+", default=DEFAULT_DETECTOR_NAMES, help="Detector registry names to run.") - p.add_argument("--device", type=str, default="cuda", help="Device for detectors (default: cuda).") - p.add_argument("--max-samples", type=int, default=None, help="Cap samples per file for quick debug runs.") - p.add_argument("--output-dir", type=Path, default=Path("experiments"), help="Directory for per-file benchmark JSONs.") - p.add_argument("--save-scores", action="store_true", help="Store per-sample scores in each output JSON.") + p = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + p.add_argument( + "--tasks", + nargs="+", + default=TASKS, + choices=TASKS, + help="ImBD task folders to evaluate (default: all).", + ) + p.add_argument( + "--models", + nargs="+", + default=MODELS, + help="Generator model folders to evaluate (default: all).", + ) + p.add_argument( + "--sources", + nargs="+", + default=None, + help="Restrict to these source corpora (e.g. xsum writing pubmed squad).", + ) + p.add_argument( + "--detectors", + nargs="+", + default=DEFAULT_DETECTOR_NAMES, + help="Detector registry names to run.", + ) + p.add_argument( + "--device", type=str, default="cuda", help="Device for detectors (default: cuda)." + ) + p.add_argument( + "--max-samples", type=int, default=None, help="Cap samples per file for quick debug runs." + ) + p.add_argument( + "--output-dir", + type=Path, + default=Path("experiments"), + help="Directory for per-file benchmark JSONs.", + ) + p.add_argument( + "--save-scores", action="store_true", help="Store per-sample scores in each output JSON." + ) return p.parse_args() @@ -274,7 +312,9 @@ def main() -> None: out_path = args.output_dir / f"imbd__{file.slug}__{ts}.json" evaluator = BenchmarkEvaluator(dataset) try: - evaluator.run_and_save(detectors, out_path, save_scores=args.save_scores, meta=meta, incremental=True) + evaluator.run_and_save( + detectors, out_path, save_scores=args.save_scores, meta=meta, incremental=True + ) print(f" results -> {out_path}") except Exception: print(f" [ERROR] evaluation failed for {file.slug}") diff --git a/examples/text/ood_replicate.py b/examples/text/ood_replicate.py index 11c0d94..bc52c43 100644 --- a/examples/text/ood_replicate.py +++ b/examples/text/ood_replicate.py @@ -50,16 +50,49 @@ def parse_args() -> argparse.Namespace: - p = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) - p.add_argument("--max-samples", type=int, default=10000, help="Max RAID rows to load (default: 10000).") + p = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + p.add_argument( + "--max-samples", type=int, default=10000, help="Max RAID rows to load (default: 10000)." + ) p.add_argument("--split", type=str, default="test", help="RAID split (default: test).") - p.add_argument("--device", type=str, default="cuda", help="Device for detectors (default: cuda).") - p.add_argument("--hf-repo", type=str, default="Shengkun/Raid_split", help="HuggingFace dataset id for RAID split.") - p.add_argument("--attacks", nargs="+", default=None, help="Pass through to RAIDDataset (e.g. `none` for non-adversarial only).") - p.add_argument("--models", nargs="+", default=None, help="Pass through to RAIDDataset: restrict to these generators (e.g. `human` `chatgpt`).") - p.add_argument("--detectors", nargs="+", default=DEFAULT_DETECTOR_NAMES, help="Detector registry names to run (default: OOD-paper set).") - p.add_argument("--output-dir", type=Path, default=Path("experiments"), help="Directory for result JSON files.") - p.add_argument("--save-scores", action="store_true", help="Store per-sample scores in the output JSON.") + p.add_argument( + "--device", type=str, default="cuda", help="Device for detectors (default: cuda)." + ) + p.add_argument( + "--hf-repo", + type=str, + default="Shengkun/Raid_split", + help="HuggingFace dataset id for RAID split.", + ) + p.add_argument( + "--attacks", + nargs="+", + default=None, + help="Pass through to RAIDDataset (e.g. `none` for non-adversarial only).", + ) + p.add_argument( + "--models", + nargs="+", + default=None, + help="Pass through to RAIDDataset: restrict to these generators (e.g. `human` `chatgpt`).", + ) + p.add_argument( + "--detectors", + nargs="+", + default=DEFAULT_DETECTOR_NAMES, + help="Detector registry names to run (default: OOD-paper set).", + ) + p.add_argument( + "--output-dir", + type=Path, + default=Path("experiments"), + help="Directory for result JSON files.", + ) + p.add_argument( + "--save-scores", action="store_true", help="Store per-sample scores in the output JSON." + ) return p.parse_args() @@ -80,8 +113,7 @@ def main() -> None: ds_kwargs["models"] = args.models print( - f"Loading RAID: split={args.split}, max_samples={args.max_samples}, " - f"hf_repo={args.hf_repo}" + f"Loading RAID: split={args.split}, max_samples={args.max_samples}, hf_repo={args.hf_repo}" ) try: dataset = load_dataset("raid", **ds_kwargs) @@ -132,7 +164,9 @@ def main() -> None: evaluator = BenchmarkEvaluator(dataset) try: - evaluator.run_and_save(detectors, out_path, save_scores=args.save_scores, meta=meta, incremental=True) + evaluator.run_and_save( + detectors, out_path, save_scores=args.save_scores, meta=meta, incremental=True + ) print(f" results -> {out_path}") except Exception: print(" [ERROR] evaluation failed") diff --git a/examples/text/text_fluoroscopy_replicate.py b/examples/text/text_fluoroscopy_replicate.py index 2e6ce4d..21313c4 100644 --- a/examples/text/text_fluoroscopy_replicate.py +++ b/examples/text/text_fluoroscopy_replicate.py @@ -158,14 +158,37 @@ def _load_all(self) -> List[DatasetItem]: def parse_args() -> argparse.Namespace: - p = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) - p.add_argument("--files", nargs="+", required=True, metavar="NAME", help="Exact processed_data/*.json basename(s) from the Text-Fluoroscopy repo.") - p.add_argument("--force-download", action="store_true", help="Re-download even if cache exists.") - p.add_argument("--max-samples", type=int, default=None, help="Cap items per file (default: all).") + p = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + p.add_argument( + "--files", + nargs="+", + required=True, + metavar="NAME", + help="Exact processed_data/*.json basename(s) from the Text-Fluoroscopy repo.", + ) + p.add_argument( + "--force-download", action="store_true", help="Re-download even if cache exists." + ) + p.add_argument( + "--max-samples", type=int, default=None, help="Cap items per file (default: all)." + ) p.add_argument("--device", type=str, default="cuda", help="Device for detectors.") - p.add_argument("--detectors", nargs="+", default=DEFAULT_DETECTOR_NAMES, help="Detector registry names.") - p.add_argument("--output-dir", type=Path, default=Path("experiments"), help="Directory for result JSON files.") - p.add_argument("--save-scores", action="store_true", help="Store per-sample labels and scores in the output JSON.") + p.add_argument( + "--detectors", nargs="+", default=DEFAULT_DETECTOR_NAMES, help="Detector registry names." + ) + p.add_argument( + "--output-dir", + type=Path, + default=Path("experiments"), + help="Directory for result JSON files.", + ) + p.add_argument( + "--save-scores", + action="store_true", + help="Store per-sample labels and scores in the output JSON.", + ) return p.parse_args() @@ -232,7 +255,9 @@ def main() -> None: out_path = args.output_dir / f"text_fluoroscopy__{stem}__{ts}.json" evaluator = BenchmarkEvaluator(dataset) try: - evaluator.run_and_save(detectors, out_path, save_scores=args.save_scores, meta=meta, incremental=True) + evaluator.run_and_save( + detectors, out_path, save_scores=args.save_scores, meta=meta, incremental=True + ) print(f" results -> {out_path}") except Exception: print(f" [ERROR] evaluation failed for {filename}") diff --git a/tests/test_core.py b/tests/test_core.py index 8a261b1..4eff5d2 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -59,10 +59,25 @@ def test_text_detectors_present(self): assert len(text) >= 18, f"Expected >=18 text detectors, got {sorted(text)}" # A representative, stable subset that should always exist. expected = { - "log_likelihood", "log_rank", "rank", "entropy", "detectgpt", - "fast_detectgpt", "binoculars", "lrr", "npr", "dna_gpt", - "revise_detect", "imbd", "lastde", "lastde_pp", "radar", - "text_fluoroscopy", "coco", "roberta_base", "roberta_large", + "log_likelihood", + "log_rank", + "rank", + "entropy", + "detectgpt", + "fast_detectgpt", + "binoculars", + "lrr", + "npr", + "dna_gpt", + "revise_detect", + "imbd", + "lastde", + "lastde_pp", + "radar", + "text_fluoroscopy", + "coco", + "roberta_base", + "roberta_large", } missing = expected - text assert not missing, f"Missing expected text detectors: {missing}" diff --git a/tests/test_utils.py b/tests/test_utils.py index 682653f..2e6469d 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -65,8 +65,16 @@ def test_all_wrong(self): def test_threshold_dependent_keys_present(self): m = compute_metrics([0, 1], [0.2, 0.8], threshold=0.5) for key in ( - "accuracy", "precision", "recall", "f1", "tpr", "fpr", - "roc_auc", "pr_auc", "avg_precision", "eer", + "accuracy", + "precision", + "recall", + "f1", + "tpr", + "fpr", + "roc_auc", + "pr_auc", + "avg_precision", + "eer", ): assert key in m