sadjadeb · sadjadeb · Jun 3, 2026 · Jun 3, 2026 · Jun 3, 2026 · Jun 3, 2026
diff --git a/.gitignore b/.gitignore
@@ -71,10 +71,3 @@ scripts/smoke_*
 scripts/diag_*
 scripts/scratch_*
 scripts/local/
-
-
-# Temporarily excluded dataset modules
-tests/
-maura/
-examples/
-print_repro_results/
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -0,0 +1 @@
+prune tests
diff --git a/METHODS_AND_MODELS.md b/METHODS_AND_MODELS.md
@@ -188,3 +188,5 @@ Datasets integrate with `load_dataset(name, ...)` (see registry via `list_datase
 | FoR | `FoRDataset` | Fake-or-Real speech corpus covering a range of TTS and voice conversion systems. Registry: `for`. | Official website |
 | In-the-Wild | `InTheWildDataset` | Celebrity / politician audio from online video (Müller et al., 2022); ~31.8k clips. Registry: `in_the_wild`. | Hugging Face (`mueller91/In-The-Wild`) |
 | Deepfake-Eval-2024 | `DeepfakeEval2024Dataset` | Social-media / TrueMedia.org deepfakes (2024); audio split ~40k clips. Registry: `deepfake_eval_2024`. | Hugging Face (`nuriachandra/Deepfake-Eval-2024`, gated) |
+
+* Note: the `DeepfakeEval2024Dataset` is gated on Hugging Face. You must first request access to the dataset from Hugging Face.
diff --git a/README.md b/README.md
@@ -28,14 +28,14 @@ cd detectzoo
 pip install -e .
 ```
 
-Optional extras:
+Optional extra for contributors (`pytest`, `pytest-cov`, `ruff`):
 
 ```bash
-pip install -e ".[image,audio]"      # everything for image + audio detectors
-pip install detectzoo[datasets]     # when you need ModelScope / gdown-based downloads
-pip install -e ".[dev]"             # contributors
+pip install -e ".[dev]"
 ```
 
+The base install already includes dependencies for text, image, and audio detectors.
+
 ---
 
 ## Quick Start
@@ -227,16 +227,42 @@ The detector is then immediately available via `load_detector("my_detector")`. S
 
 ## Examples
 
-The `examples/` directory contains self-contained scripts you can run immediately:
+The `examples/` directory contains runnable scripts grouped by modality. Most replication scripts download public benchmark data, run detectors with `BenchmarkEvaluator`, and write metrics under `experiments/`.
+
+### Getting started
 
 | Script | Description |
 |--------|-------------|
-| `text_detection.py` | Compare text detectors (log-likelihood, log-rank, entropy, fast-detectgpt) on sample human and AI passages. |
+| [custom_detector.py](examples/custom_detector.py) | Create, register, and use a toy custom text detector (`word_length`). |
+
+### Replication scripts
 
-Run any example from the project root:
+| Script | Description |
+|--------|-------------|
+| [text/ood_replicate.py](examples/text/ood_replicate.py) | Replicate OOD paper baselines on the labeled RAID test split (default 1000 samples). |
+| [text/gecscore_replicate.py](examples/text/gecscore_replicate.py) | Replicate GECScore baselines on released `normal_data` JSON files (per source × generator model). |
+| [text/imbd_replicate.py](examples/text/imbd_replicate.py) | Replicate ImBD baselines on released rewrite/paraphrase JSON (human `original` vs AI `rewritten`). |
+| [text/text_fluoroscopy_replicate.py](examples/text/text_fluoroscopy_replicate.py) | Replicate Text-Fluoroscopy baselines on processed JSON files from the authors' repo. |
+| [image/image_replicate.py](examples/image/image_replicate.py) | Run image detectors on built-in datasets (`self_synthesis`, `aigcdetect`, `cnn_detection`, `genimage`, `univfd_diffusion`) and save benchmark JSON. |
+| [audio/audio_replicate.py](examples/audio/audio_replicate.py) | Run audio detectors on built-in benchmarks (`asvspoof2019`, `for`, `in_the_wild`, `deepfake_eval_2024`) with balanced sampling. |
+
+Run from the project root:
 
 ```bash
-python examples/text_detection.py --device cuda
+# Quick start — no downloads
+python examples/custom_detector.py
+
+# Text replication (OOD on RAID)
+python examples/text/ood_replicate.py --device cuda --max-samples 100
+
+# Image replication
+python examples/image/image_replicate.py \
+    --dataset self_synthesis \
+    --partitions AttGAN BEGAN \
+    --detectors cnnspot patchcraft univfd
+
+# Audio replication
+python examples/audio/audio_replicate.py --dataset in_the_wild --detectors rawnet2 aasist
 ```
 
 ---

diff --git a/examples/image/image_replicate.py b/examples/image/image_replicate.py
@@ -51,7 +51,7 @@ def parse_args():
     parser.add_argument("--detectors", nargs="+", required=True)
     parser.add_argument("--save-scores", action="store_true")
     parser.add_argument("--cpu", action="store_true")
-    parser.add_argument("--output-dir", type=Path, default=Path("results"))
+    parser.add_argument("--output-dir", type=Path, default=Path("experiments"))
     return parser.parse_args()
 
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,11 +4,11 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "detectzoo"
-version = "0.1.0"
+version = "0.1.4"
 description = "DetectZoo: A Unified Toolkit for AI-Generated Content Detection Across Text, Audio, and Image Modalities"
 readme = "README.md"
 requires-python = ">=3.9"
-license = {text = "Apache-2.0"}
+license = "Apache-2.0"
 keywords = ["ai-detection", "deepfake", "llm", "generative-ai", "machine-learning"]
 classifiers = [
     "Development Status :: 3 - Alpha",
@@ -36,29 +36,22 @@ dependencies = [
     "torch>=2.11.0",
     "tqdm>=4.67.3",
     "transformers>=4.57.6",
-]
-
-[project.optional-dependencies]
-datasets = [
-    "modelscope>=1.9",
-    "gdown>=4.0",
-]
-image = [
     "torchvision>=0.26.0",
     "Pillow>=12.1.1",
     "open-clip-torch>=2.20",
     "diffusers>=0.21",
     "lpips>=0.1.4",
     "pytorch-wavelets>=1.3",
     "timm>=0.9.0",
-]
-audio = [
+    "modelscope>=1.9",
+    "gdown>=4.0",
     "torchaudio>=2.11.0",
     "librosa>=0.11.0",
     "soundfile>=0.13.1",
 ]
+
+[project.optional-dependencies]
 dev = [
-    "detectzoo[datasets,image,audio]",
     "pytest>=7.0",
     "pytest-cov>=4.0",
     "ruff>=0.15.5",
@@ -71,6 +64,7 @@ Issues = "https://anonymous.4open.science/r/DetectZoo-1BEC/issues"
 
 [tool.setuptools.packages.find]
 include = ["detectzoo*"]
+exclude = ["tests*"]
 
 [tool.ruff]
 line-length = 100