fluendo · ahresse · May 21, 2026 · May 21, 2026 · May 21, 2026
diff --git a/README.md b/README.md
@@ -26,6 +26,7 @@ suites.
     - [Run](#run)
     - [Download](#download)
     - [Reference](#reference)
+    - [Local Mirror](#local-mirror)
   - [Report](#report)
   - [FAQ](#faq)
     - [Where does the name come from?](#where-does-the-name-come-from)
@@ -642,7 +643,7 @@ optional arguments:
 ```bash
 ./fluster.py download --help
 
-usage: fluster.py download [-h] [-j JOBS] [-k] [-r RETRIES] [-c CODEC] [testsuites ...]
+usage: fluster.py download [-h] [-j JOBS] [-k] [-r RETRIES] [-m MIRROR] [-c CODEC] [testsuites ...]
 
 positional arguments:
   testsuites            list of testsuites to download
@@ -654,6 +655,8 @@ optional arguments:
   -k, --keep            keep original downloaded file after extracting. Only applicable to compressed files such as .zip, .tar.gz, etc
   -r RETRIES, --retries RETRIES
                         number of retries, before failing
+  -m MIRROR, --mirror MIRROR
+                        base URL of a local mirror to download resources from (falls back to original source on failure)
   -c CODEC, --codec CODEC
                         download test suites for specific codecs only (comma-separated)
 ```
@@ -663,6 +666,82 @@ optional arguments:
 - When using both `-c/--codec` and specific test suites, the behavior is **union-based**:
   - All test suites matching the codec filter are downloaded
   - Additionally, all specified test suites are downloaded, regardless of codec
+### Local Mirror
+
+When running fluster on multiple machines or in a CI environment, downloading test vectors from the internet for each run can be slow. Fluster supports a **local mirror** to serve resources from a server on your LAN instead.
+
+#### How it works
+
+The `--mirror` option takes a base URL pointing to a mirror server. When downloading, fluster rewrites each source URL to point to the mirror first. If the mirror is unreachable or returns an error, fluster automatically falls back to the original internet source.
+
+For example, given a source URL:
+```
+https://storage.googleapis.com/aom-test-data/av1-1-b10-00-quantizer-00.ivf
+```
+and a mirror base URL:
+```
+http://mirror.local:8080/fluster/
+```
+fluster will first attempt to download from:
+```
+http://mirror.local:8080/fluster/storage.googleapis.com/aom-test-data/av1-1-b10-00-quantizer-00.ivf
+```
+
+#### Usage
+
+```bash
+./fluster.py download --mirror http://mirror.local:8080/fluster/
+```
+
+The `--mirror` option works with all other download options:
+```bash
+./fluster.py download -c H.264,H.265 --mirror http://mirror.local:8080/fluster/
+./fluster.py download AV1-TEST-VECTORS -j 8 --mirror http://mirror.local:8080/fluster/
+```
+
+#### Setting up a mirror
+
+Use the `scripts/mirror_sync.py` script to populate a directory with all test vector resources:
+
+```bash
+python3 scripts/mirror_sync.py -o /path/to/mirror -j 8
+```
+
+This will scan all test suite JSON files and download every source URL into a directory tree that mirrors the original URL structure. Already-downloaded files are skipped on subsequent runs.
+
+Then serve the directory with any HTTP server:
+
+```bash
+# Python (quick testing)
+cd /path/to/mirror && python3 -m http.server 8080
+
+# nginx (production)
+# Point nginx root to /path/to/mirror
+```
+
+Use the same root path as the `--mirror` argument:
+```bash
+./fluster.py download --mirror http://mirror.local:8080/
+```
+
+#### mirror_sync.py options
+
+```bash
+python3 scripts/mirror_sync.py --help
+
+usage: mirror_sync.py [-h] [-o OUTPUT] [-t TEST_SUITES_DIR] [-j JOBS] [-r RETRIES]
+
+options:
+  -h, --help            show this help message and exit
+  -o OUTPUT, --output OUTPUT
+                        output directory for the mirror tree (default: ./mirror)
+  -t TEST_SUITES_DIR, --test-suites-dir TEST_SUITES_DIR
+                        directory containing test suite JSON files
+  -j JOBS, --jobs JOBS  number of parallel downloads (default: 4)
+  -r RETRIES, --retries RETRIES
+                        number of retries per download (default: 2)
+```
+
 ### Reference
 
 ```bash

diff --git a/fluster/fluster.py b/fluster/fluster.py
@@ -953,7 +953,13 @@ def _generate_global_summary(results: Dict[str, List[Tuple[Decoder, TestSuite]]]
             print(output)
 
     def download_test_suites(
-        self, test_suites: List[str], jobs: int, keep_file: bool, retries: int, codec_string: Optional[str] = None
+        self,
+        test_suites: List[str],
+        jobs: int,
+        keep_file: bool,
+        retries: int,
+        codec_string: Optional[str] = None,
+        mirror: Optional[str] = None,
     ) -> None:
         """Download a group of test suites"""
         self._load_test_suites()
@@ -999,4 +1005,5 @@ def download_test_suites(
                 verify=True,
                 keep_file=keep_file,
                 retries=retries,
+                mirror=mirror,
             )
diff --git a/fluster/main.py b/fluster/main.py
@@ -352,6 +352,13 @@ def _add_download_cmd(self, subparsers: Any) -> None:
             type=int,
             default=2,
         )
+        subparser.add_argument(
+            "-m",
+            "--mirror",
+            help="base URL of a local mirror to download resources from (falls back to original source on failure)",
+            type=str,
+            default=None,
+        )
         subparser.add_argument(
             "-c",
             "--codec",
@@ -419,4 +426,5 @@ def _download_cmd(args: Any, fluster: Fluster) -> None:
             keep_file=args.keep,
             retries=args.retries,
             codec_string=args.codec,
+            mirror=args.mirror,
         )
diff --git a/fluster/test_suite.py b/fluster/test_suite.py
@@ -47,13 +47,15 @@ def __init__(
         keep_file: bool,
         test_suite_name: str,
         retries: int,
+        mirror: Optional[str] = None,
     ):
         self.out_dir = out_dir
         self.verify = verify
         self.extract_all = extract_all
         self.keep_file = keep_file
         self.test_suite_name = test_suite_name
         self.retries = retries
+        self.mirror = mirror
 
     # This is added to avoid having to create an extra ancestor class
     def set_test_vector(self, test_vector: TestVector) -> None:
@@ -74,8 +76,9 @@ def __init__(
         test_suite_name: str,
         test_vectors: Dict[str, TestVector],
         retries: int,
+        mirror: Optional[str] = None,
     ):
-        super().__init__(out_dir, verify, extract_all, keep_file, test_suite_name, retries)
+        super().__init__(out_dir, verify, extract_all, keep_file, test_suite_name, retries, mirror)
         self.test_vectors = test_vectors
 
 
@@ -230,7 +233,7 @@ def _download_single_test_vector(ctx: DownloadWork) -> None:
             return
 
         print(f"\tDownloading test vector {ctx.test_vector.name} from {ctx.test_vector.source}")
-        utils.download(ctx.test_vector.source, dest_dir, ctx.retries**ctx.retries)
+        utils.download(ctx.test_vector.source, dest_dir, ctx.retries**ctx.retries, mirror=ctx.mirror)
 
         if ctx.test_vector.source_checksum != "__skip__":
             checksum = utils.file_checksum(dest_path)
@@ -264,7 +267,7 @@ def _download_single_archive(ctx: DownloadWorkSingleArchive) -> None:
             os.remove(dest_path)
 
         print(f"\tDownloading source file from {first_tv.source}")
-        utils.download(first_tv.source, dest_dir, ctx.retries**ctx.retries)
+        utils.download(first_tv.source, dest_dir, ctx.retries**ctx.retries, mirror=ctx.mirror)
 
         # Check that source file was downloaded correctly
         if first_tv.source_checksum != "__skip__":
@@ -301,6 +304,7 @@ def download(
         extract_all: bool = False,
         keep_file: bool = False,
         retries: int = 2,
+        mirror: Optional[str] = None,
     ) -> None:
         """Download the test suite"""
         os.makedirs(out_dir, exist_ok=True)
@@ -314,14 +318,14 @@ def download(
             # Download test suite of multiple test vectors from a single archive
             print(f"Downloading test suite {self.name} using 1 job (single archive)")
             dwork_single = DownloadWorkSingleArchive(
-                out_dir, verify, extract_all, keep_file, self.name, self.test_vectors, retries
+                out_dir, verify, extract_all, keep_file, self.name, self.test_vectors, retries, mirror
             )
             self._download_single_archive(dwork_single)
         elif len(unique_sources) == 1 and len(self.test_vectors) == 1:
             # Download test suite of single test vector
             print(f"Downloading test suite {self.name} using 1 job (single file)")
             single_tv = next(iter(self.test_vectors.values()))
-            dwork = DownloadWork(out_dir, verify, extract_all, keep_file, self.name, retries)
+            dwork = DownloadWork(out_dir, verify, extract_all, keep_file, self.name, retries, mirror)
             dwork.set_test_vector(single_tv)
             self._download_single_test_vector(dwork)
         else:
@@ -338,7 +342,7 @@ def _callback_error(err: Any) -> None:
 
                 downloads = []
                 for tv in self.test_vectors.values():
-                    dwork = DownloadWork(out_dir, verify, extract_all, keep_file, self.name, retries)
+                    dwork = DownloadWork(out_dir, verify, extract_all, keep_file, self.name, retries, mirror)
                     dwork.set_test_vector(tv)
                     downloads.append(
                         pool.apply_async(

diff --git a/fluster/utils.py b/fluster/utils.py
@@ -43,6 +43,24 @@
 
 download_lock = Lock()
 
+MIRROR_NETWORK_ERRORS = (
+    urllib.error.URLError,
+    urllib.error.HTTPError,
+    OSError,
+    IOError,
+    ConnectionError,
+    TimeoutError,
+    http.client.IncompleteRead,
+)
+
+
+def rewrite_url(source_url: str, mirror_base: str) -> str:
+    parsed = urllib.parse.urlparse(source_url)
+    path = parsed.netloc + parsed.path
+    if parsed.query:
+        path += "?" + parsed.query
+    return mirror_base.rstrip("/") + "/" + path.lstrip("/")
+
 
 def create_enhanced_opener() -> urllib.request.OpenerDirector:
     """Creates an enhanced URL opener with custom headers and cookie support."""
@@ -159,26 +177,33 @@ def download(
     max_retries: int = 5,
     timeout: int = 300,
     chunk_size: int = 2048 * 2048,  # 4MB
+    mirror: Optional[str] = None,
 ) -> None:
     """Downloads a file to a directory with a mutex lock
-    to avoid conflicts and retries with exponential backoff."""
+    to avoid conflicts and retries with exponential backoff.
+    If mirror is provided, tries the mirror URL first and falls back to the original URL."""
     os.makedirs(dest_dir, exist_ok=True)
     filename = os.path.basename(url)
     dest_path = os.path.join(dest_dir, filename)
+
+    if mirror:
+        mirror_url = rewrite_url(url, mirror)
+        try:
+            with download_lock:
+                _download_simple(mirror_url, dest_path, filename, timeout, chunk_size)
+            return
+        except MIRROR_NETWORK_ERRORS as e:
+            if os.path.exists(dest_path):
+                os.remove(dest_path)
+            print(f"\tWARNING: Mirror download failed for {mirror_url}: {e}")
+            print(f"\tFalling back to original source: {url}")
+
     for attempt in range(max_retries):
         try:
             with download_lock:
                 _download_simple(url, dest_path, filename, timeout, chunk_size)
             break
-        except (
-            urllib.error.URLError,
-            urllib.error.HTTPError,
-            OSError,
-            IOError,
-            ConnectionError,
-            TimeoutError,
-            http.client.IncompleteRead,
-        ) as e:
+        except MIRROR_NETWORK_ERRORS as e:
             if os.path.exists(dest_path):
                 os.remove(dest_path)