From e56deadf790d4790d544d387c6fa6abb0991ccfb Mon Sep 17 00:00:00 2001 From: Sajad Ebrahimi Date: Tue, 2 Jun 2026 20:16:51 -0400 Subject: [PATCH 1/8] Add note about DeepfakeEval2024Dataset to doc file --- METHODS_AND_MODELS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/METHODS_AND_MODELS.md b/METHODS_AND_MODELS.md index cbfc3e6..40dc3fd 100644 --- a/METHODS_AND_MODELS.md +++ b/METHODS_AND_MODELS.md @@ -188,3 +188,5 @@ Datasets integrate with `load_dataset(name, ...)` (see registry via `list_datase | FoR | `FoRDataset` | Fake-or-Real speech corpus covering a range of TTS and voice conversion systems. Registry: `for`. | Official website | | In-the-Wild | `InTheWildDataset` | Celebrity / politician audio from online video (Müller et al., 2022); ~31.8k clips. Registry: `in_the_wild`. | Hugging Face (`mueller91/In-The-Wild`) | | Deepfake-Eval-2024 | `DeepfakeEval2024Dataset` | Social-media / TrueMedia.org deepfakes (2024); audio split ~40k clips. Registry: `deepfake_eval_2024`. | Hugging Face (`nuriachandra/Deepfake-Eval-2024`, gated) | + +* Note: the `DeepfakeEval2024Dataset` is gated on Hugging Face. You must first request access to the dataset from Hugging Face. \ No newline at end of file From 15520e99727b473fedb833963ee7bd3e540a35c5 Mon Sep 17 00:00:00 2001 From: Sajad Ebrahimi Date: Tue, 2 Jun 2026 20:34:08 -0400 Subject: [PATCH 2/8] Add MANIFEST to prevent including tests/ in build files --- MANIFEST.in | 1 + pyproject.toml | 1 + 2 files changed, 2 insertions(+) create mode 100644 MANIFEST.in diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..1eeef06 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +prune tests diff --git a/pyproject.toml b/pyproject.toml index e378d33..8954a30 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -71,6 +71,7 @@ Issues = "https://anonymous.4open.science/r/DetectZoo-1BEC/issues" [tool.setuptools.packages.find] include = ["detectzoo*"] +exclude = ["tests*"] [tool.ruff] line-length = 100 From 4ac05426879cb47d710d15548509b5a7f1a36830 Mon Sep 17 00:00:00 2001 From: Sajad Ebrahimi Date: Tue, 2 Jun 2026 20:35:07 -0400 Subject: [PATCH 3/8] Update output path in image example to be matched with other modules --- examples/image/image_replicate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/image/image_replicate.py b/examples/image/image_replicate.py index b68cae5..da00d5f 100644 --- a/examples/image/image_replicate.py +++ b/examples/image/image_replicate.py @@ -51,7 +51,7 @@ def parse_args(): parser.add_argument("--detectors", nargs="+", required=True) parser.add_argument("--save-scores", action="store_true") parser.add_argument("--cpu", action="store_true") - parser.add_argument("--output-dir", type=Path, default=Path("results")) + parser.add_argument("--output-dir", type=Path, default=Path("experiments")) return parser.parse_args() From 52e513ab7fa05fd9c39d7bc9b7b0c0db9260611a Mon Sep 17 00:00:00 2001 From: Sajad Ebrahimi Date: Tue, 2 Jun 2026 20:37:46 -0400 Subject: [PATCH 4/8] Update .gitignore to drop temp paths --- .gitignore | 7 ------- 1 file changed, 7 deletions(-) diff --git a/.gitignore b/.gitignore index 668b868..8eba00c 100644 --- a/.gitignore +++ b/.gitignore @@ -71,10 +71,3 @@ scripts/smoke_* scripts/diag_* scripts/scratch_* scripts/local/ - - -# Temporarily excluded dataset modules -tests/ -maura/ -examples/ -print_repro_results/ From 28e8b511e35385fd2efa4657030e106ee971f188 Mon Sep 17 00:00:00 2001 From: Sajad Ebrahimi Date: Tue, 2 Jun 2026 20:42:54 -0400 Subject: [PATCH 5/8] Make all audio and image packages as main dependencies --- README.md | 8 ++++---- pyproject.toml | 15 ++++----------- 2 files changed, 8 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 1ede591..c37d093 100644 --- a/README.md +++ b/README.md @@ -28,14 +28,14 @@ cd detectzoo pip install -e . ``` -Optional extras: +Optional extra for contributors (`pytest`, `pytest-cov`, `ruff`): ```bash -pip install -e ".[image,audio]" # everything for image + audio detectors -pip install detectzoo[datasets] # when you need ModelScope / gdown-based downloads -pip install -e ".[dev]" # contributors +pip install -e ".[dev]" ``` +The base install already includes dependencies for text, image, and audio detectors. + --- ## Quick Start diff --git a/pyproject.toml b/pyproject.toml index 8954a30..0a00c78 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,14 +36,6 @@ dependencies = [ "torch>=2.11.0", "tqdm>=4.67.3", "transformers>=4.57.6", -] - -[project.optional-dependencies] -datasets = [ - "modelscope>=1.9", - "gdown>=4.0", -] -image = [ "torchvision>=0.26.0", "Pillow>=12.1.1", "open-clip-torch>=2.20", @@ -51,14 +43,15 @@ image = [ "lpips>=0.1.4", "pytorch-wavelets>=1.3", "timm>=0.9.0", -] -audio = [ + "modelscope>=1.9", + "gdown>=4.0", "torchaudio>=2.11.0", "librosa>=0.11.0", "soundfile>=0.13.1", ] + +[project.optional-dependencies] dev = [ - "detectzoo[datasets,image,audio]", "pytest>=7.0", "pytest-cov>=4.0", "ruff>=0.15.5", From e37ca51e43c0b39be5d6a8ea625cf953c2b3f98a Mon Sep 17 00:00:00 2001 From: Sajad Ebrahimi Date: Tue, 2 Jun 2026 20:47:09 -0400 Subject: [PATCH 6/8] Complete example section in README --- README.md | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index c37d093..ae11cbc 100644 --- a/README.md +++ b/README.md @@ -227,16 +227,42 @@ The detector is then immediately available via `load_detector("my_detector")`. S ## Examples -The `examples/` directory contains self-contained scripts you can run immediately: +The `examples/` directory contains runnable scripts grouped by modality. Most replication scripts download public benchmark data, run detectors with `BenchmarkEvaluator`, and write metrics under `experiments/`. + +### Getting started + +| Script | Description | +|--------|-------------| +| [custom_detector.py](examples/custom_detector.py) | Create, register, and use a toy custom text detector (`word_length`). | + +### Replication scripts | Script | Description | |--------|-------------| -| `text_detection.py` | Compare text detectors (log-likelihood, log-rank, entropy, fast-detectgpt) on sample human and AI passages. | +| [text/ood_replicate.py](examples/text/ood_replicate.py) | Replicate OOD paper baselines on the labeled RAID test split (default 1000 samples). | +| [text/gecscore_replicate.py](examples/text/gecscore_replicate.py) | Replicate GECScore baselines on released `normal_data` JSON files (per source × generator model). | +| [text/imbd_replicate.py](examples/text/imbd_replicate.py) | Replicate ImBD baselines on released rewrite/paraphrase JSON (human `original` vs AI `rewritten`). | +| [text/text_fluoroscopy_replicate.py](examples/text/text_fluoroscopy_replicate.py) | Replicate Text-Fluoroscopy baselines on processed JSON files from the authors' repo. | +| [image/image_replicate.py](examples/image/image_replicate.py) | Run image detectors on built-in datasets (`self_synthesis`, `aigcdetect`, `cnn_detection`, `genimage`, `univfd_diffusion`) and save benchmark JSON. | +| [audio/audio_replicate.py](examples/audio/audio_replicate.py) | Run audio detectors on built-in benchmarks (`asvspoof2019`, `for`, `in_the_wild`, `deepfake_eval_2024`) with balanced sampling. | -Run any example from the project root: +Run from the project root: ```bash -python examples/text_detection.py --device cuda +# Quick start — no downloads +python examples/custom_detector.py + +# Text replication (OOD on RAID) +python examples/text/ood_replicate.py --device cuda --max-samples 100 + +# Image replication +python examples/image/image_replicate.py \ + --dataset self_synthesis \ + --partitions AttGAN BEGAN \ + --detectors cnnspot patchcraft univfd + +# Audio replication +python examples/audio/audio_replicate.py --dataset in_the_wild --detectors rawnet2 aasist ``` --- From c0836dff9f607bfb662a9ba2e3f661dc4161e837 Mon Sep 17 00:00:00 2001 From: Sajad Ebrahimi Date: Tue, 2 Jun 2026 20:47:47 -0400 Subject: [PATCH 7/8] Update license line in pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 0a00c78..5d3b15e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ version = "0.1.0" description = "DetectZoo: A Unified Toolkit for AI-Generated Content Detection Across Text, Audio, and Image Modalities" readme = "README.md" requires-python = ">=3.9" -license = {text = "Apache-2.0"} +license = "Apache-2.0" keywords = ["ai-detection", "deepfake", "llm", "generative-ai", "machine-learning"] classifiers = [ "Development Status :: 3 - Alpha", From a9bec1e88cbf96ae796ee05c0b29307717a1c420 Mon Sep 17 00:00:00 2001 From: Sajad Ebrahimi Date: Tue, 2 Jun 2026 20:48:35 -0400 Subject: [PATCH 8/8] Bump version to 0.1.4 - Ready to publish --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 5d3b15e..e641306 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "detectzoo" -version = "0.1.0" +version = "0.1.4" description = "DetectZoo: A Unified Toolkit for AI-Generated Content Detection Across Text, Audio, and Image Modalities" readme = "README.md" requires-python = ">=3.9"