From e57d5751ecd39276acb9986c2992afa28de48bf6 Mon Sep 17 00:00:00 2001
From: Glen Beane <356266+gbeane@users.noreply.github.com>
Date: Tue, 16 Jun 2026 15:51:31 -0400
Subject: [PATCH 1/2] Add filename pattern cross-validation grouping with group
 preview

---
 packages/jabs-core/src/jabs/core/constants.py |   2 +
 .../jabs-core/src/jabs/core/enums/__init__.py |   9 +-
 .../src/jabs/core/enums/cv_grouping.py        |  53 ++++
 packages/jabs-core/tests/test_cv_grouping.py  |  75 +++++
 src/jabs/classifier/classifier.py             |  31 +-
 src/jabs/classifier/cross_validation.py       |  10 +-
 src/jabs/classifier/multi_class_classifier.py |  31 +-
 src/jabs/classifier/training_report.py        |   6 +
 src/jabs/project/export_training.py           |  47 +--
 src/jabs/project/project.py                   |  91 +++++-
 src/jabs/project/settings_manager.py          |  14 +-
 src/jabs/scripts/cli/cross_validation.py      |   1 +
 src/jabs/ui/main_window/central_widget.py     |   4 +
 .../cross_validation_settings_group.py        | 277 ++++++++++++++++--
 .../ui/settings_dialog/settings_dialog.py     |  25 +-
 src/jabs/ui/settings_dialog/settings_group.py |  12 +
 src/jabs/ui/training_strategy.py              |   5 +
 src/jabs/ui/training_thread.py                |   1 +
 tests/classifier/test_classifier.py           |  51 ++++
 .../classifier/test_multi_class_classifier.py |  48 +++
 tests/project/test_cv_grouping.py             | 106 +++++++
 tests/project/test_settings_manager.py        |  19 ++
 tests/ui/_fakes.py                            |   1 +
 tests/ui/test_settings_dialog.py              | 150 +++++++++-
 24 files changed, 1006 insertions(+), 63 deletions(-)
 create mode 100644 packages/jabs-core/tests/test_cv_grouping.py
 create mode 100644 tests/project/test_cv_grouping.py

diff --git a/packages/jabs-core/src/jabs/core/constants.py b/packages/jabs-core/src/jabs/core/constants.py
index 5ea79323..0b6a9bd1 100644
--- a/packages/jabs-core/src/jabs/core/constants.py
+++ b/packages/jabs-core/src/jabs/core/constants.py
@@ -13,6 +13,8 @@
 
 # settings keys for project settings stored in the project.json file
 CV_GROUPING_KEY = "cv_grouping"
+# regex used when CV_GROUPING_KEY is the "Filename Pattern" strategy
+CV_GROUPING_REGEX_KEY = "cv_grouping_regex"
 CLASSIFIER_MODE_KEY = "classifier_mode"
 CACHE_FORMAT_KEY = "cache_format"
 
diff --git a/packages/jabs-core/src/jabs/core/enums/__init__.py b/packages/jabs-core/src/jabs/core/enums/__init__.py
index 18c8db90..5a2c7f5d 100644
--- a/packages/jabs-core/src/jabs/core/enums/__init__.py
+++ b/packages/jabs-core/src/jabs/core/enums/__init__.py
@@ -3,7 +3,12 @@
 from .cache_format import CacheFormat
 from .classifier_mode import DEFAULT_CLASSIFIER_MODE, ClassifierMode
 from .classifier_types import ClassifierType
-from .cv_grouping import DEFAULT_CV_GROUPING_STRATEGY, CrossValidationGroupingStrategy
+from .cv_grouping import (
+    DEFAULT_CV_GROUPING_STRATEGY,
+    CrossValidationGroupingStrategy,
+    compile_grouping_regex,
+    filename_group_key,
+)
 from .inference import ConfidenceMetric, Method, SamplingStrategy
 from .prediction_type import PredictionType
 from .storage_format import StorageFormat
@@ -22,4 +27,6 @@
     "ProjectDistanceUnit",
     "SamplingStrategy",
     "StorageFormat",
+    "compile_grouping_regex",
+    "filename_group_key",
 ]
diff --git a/packages/jabs-core/src/jabs/core/enums/cv_grouping.py b/packages/jabs-core/src/jabs/core/enums/cv_grouping.py
index c3ce9cb0..3c592840 100644
--- a/packages/jabs-core/src/jabs/core/enums/cv_grouping.py
+++ b/packages/jabs-core/src/jabs/core/enums/cv_grouping.py
@@ -1,3 +1,6 @@
+"""Cross-validation grouping strategy enum and filename-pattern helpers."""
+
+import re
 from enum import Enum
 
 
@@ -10,6 +13,56 @@ class CrossValidationGroupingStrategy(str, Enum):
 
     INDIVIDUAL = "Individual Animal"
     VIDEO = "Video"
+    FILENAME_PATTERN = "Filename Pattern"
 
 
 DEFAULT_CV_GROUPING_STRATEGY = CrossValidationGroupingStrategy.INDIVIDUAL
+
+
+def compile_grouping_regex(regex: str) -> re.Pattern[str]:
+    """Compile a filename-pattern cross-validation grouping regular expression.
+
+    Args:
+        regex: Regular expression used to extract a grouping key from a video
+            filename.
+
+    Returns:
+        The compiled regular expression pattern.
+
+    Raises:
+        ValueError: If ``regex`` is empty or not a valid regular expression.
+    """
+    if not regex:
+        raise ValueError("Filename pattern grouping requires a non-empty regular expression")
+    try:
+        return re.compile(regex)
+    except re.error as e:
+        raise ValueError(f"Invalid filename grouping pattern: {e}") from e
+
+
+def filename_group_key(video_name: str, pattern: re.Pattern[str]) -> str:
+    """Extract a cross-validation grouping key from a video filename.
+
+    The pattern is applied with :meth:`re.Pattern.search`, so it matches anywhere
+    in ``video_name``. If the pattern defines a capturing group and it matched, the
+    first captured group is used as the key (so a pattern that captures the digits
+    in ``cage_1234.mp4`` yields ``"1234"``); otherwise the full matched text is used
+    (a pattern matching the whole ``cage_1234`` token yields ``"cage_1234"``).
+    Videos that do not match the pattern are placed in their own group, keyed by the
+    filename itself.
+
+    Args:
+        video_name: Video filename to extract a grouping key from.
+        pattern: Compiled regular expression (see :func:`compile_grouping_regex`).
+
+    Returns:
+        The grouping key string. All videos that yield the same key are placed in
+        the same cross-validation group.
+    """
+    match = pattern.search(video_name)
+    if match is None:
+        # No match: the video becomes its own group (keyed by its unique filename).
+        return video_name
+    if pattern.groups >= 1 and match.group(1) is not None:
+        return match.group(1)
+    return match.group(0)
diff --git a/packages/jabs-core/tests/test_cv_grouping.py b/packages/jabs-core/tests/test_cv_grouping.py
new file mode 100644
index 00000000..1c2982b9
--- /dev/null
+++ b/packages/jabs-core/tests/test_cv_grouping.py
@@ -0,0 +1,75 @@
+"""Tests for cross-validation grouping enum and filename-pattern helpers."""
+
+import pytest
+
+from jabs.core.enums import (
+    CrossValidationGroupingStrategy,
+    compile_grouping_regex,
+    filename_group_key,
+)
+
+
+def test_filename_pattern_member_exists() -> None:
+    """The FILENAME_PATTERN strategy is available with its display value."""
+    assert CrossValidationGroupingStrategy.FILENAME_PATTERN.value == "Filename Pattern"
+    assert CrossValidationGroupingStrategy("Filename Pattern") is (
+        CrossValidationGroupingStrategy.FILENAME_PATTERN
+    )
+
+
+def test_compile_grouping_regex_valid() -> None:
+    """A valid pattern compiles to a usable regex."""
+    pattern = compile_grouping_regex(r"cage_(\d+)")
+    assert pattern.search("cage_0042.mp4") is not None
+
+
+@pytest.mark.parametrize("regex", ["", None], ids=["empty", "none"])
+def test_compile_grouping_regex_empty_raises(regex) -> None:
+    """An empty (or falsy) pattern is rejected."""
+    with pytest.raises(ValueError, match="non-empty"):
+        compile_grouping_regex(regex)
+
+
+def test_compile_grouping_regex_invalid_raises() -> None:
+    """A syntactically invalid pattern raises ValueError, not re.error."""
+    with pytest.raises(ValueError, match="Invalid filename grouping pattern"):
+        compile_grouping_regex("cage_(")
+
+
+def test_filename_group_key_uses_capture_group() -> None:
+    """When the pattern has a capture group, the captured text is the key."""
+    pattern = compile_grouping_regex(r"cage_(\d+)")
+    assert filename_group_key("cage_0042_2026-06-16.mp4", pattern) == "0042"
+
+
+def test_filename_group_key_uses_full_match_without_capture_group() -> None:
+    """Without a capture group, the whole matched substring is the key."""
+    pattern = compile_grouping_regex(r"cage_\d+")
+    assert filename_group_key("cage_0042_2026-06-16.mp4", pattern) == "cage_0042"
+
+
+def test_filename_group_key_searches_anywhere() -> None:
+    """The pattern matches anywhere in the filename (re.search semantics)."""
+    pattern = compile_grouping_regex(r"cage_(\d+)")
+    assert filename_group_key("2026-06-16_cage_0007_cam1.avi", pattern) == "0007"
+
+
+def test_filename_group_key_unmatched_returns_filename() -> None:
+    """A filename that does not match becomes its own group (keyed by the name)."""
+    pattern = compile_grouping_regex(r"cage_(\d+)")
+    assert filename_group_key("mouse_video.mp4", pattern) == "mouse_video.mp4"
+
+
+def test_filename_group_key_same_cage_different_files_share_key() -> None:
+    """Different files from the same cage produce the same grouping key."""
+    pattern = compile_grouping_regex(r"cage_(\d+)")
+    key_a = filename_group_key("cage_0042_day1.mp4", pattern)
+    key_b = filename_group_key("cage_0042_day2.avi", pattern)
+    assert key_a == key_b == "0042"
+
+
+def test_filename_group_key_optional_capture_group_falls_back_to_full_match() -> None:
+    """An optional capture group that does not participate falls back to the full match."""
+    pattern = compile_grouping_regex(r"cage(_extra)?_\d+")
+    # The optional group does not match here, so the full match is used.
+    assert filename_group_key("cage_0042.mp4", pattern) == "cage_0042"
diff --git a/src/jabs/classifier/classifier.py b/src/jabs/classifier/classifier.py
index 0162fb18..3003d99a 100644
--- a/src/jabs/classifier/classifier.py
+++ b/src/jabs/classifier/classifier.py
@@ -12,6 +12,8 @@
     DEFAULT_CV_GROUPING_STRATEGY,
     ClassifierType,
     CrossValidationGroupingStrategy,
+    compile_grouping_regex,
+    filename_group_key,
 )
 from jabs.core.utils import hash_file
 from jabs.project import Project, load_training_data
@@ -318,6 +320,7 @@ def confusion_matrix(truth: np.ndarray, predictions: np.ndarray) -> np.ndarray:
     def count_label_threshold(
         all_counts: dict,
         cv_grouping_strategy: CrossValidationGroupingStrategy = DEFAULT_CV_GROUPING_STRATEGY,
+        cv_grouping_regex: str | None = None,
     ) -> int:
         """Count groups that meet the label-threshold criteria.
 
@@ -326,6 +329,9 @@ def count_label_threshold(
                 Structure is a dict[video_name][identity] of fragmented and
                 unfragmented frame/bout count tuples.
             cv_grouping_strategy: Cross-validation grouping strategy.
+            cv_grouping_regex: Regex used to extract a grouping key from each video
+                filename when ``cv_grouping_strategy`` is ``FILENAME_PATTERN``. An
+                empty or invalid regex yields a count of 0 (no trainable groups).
 
         Returns:
             Number of groups that meet the labeling threshold criteria.
@@ -356,6 +362,24 @@ def count_label_threshold(
                     and not_behavior_sum >= Classifier.LABEL_THRESHOLD
                 ):
                     group_count += 1
+        elif cv_grouping_strategy == CrossValidationGroupingStrategy.FILENAME_PATTERN:
+            try:
+                pattern = compile_grouping_regex(cv_grouping_regex or "")
+            except ValueError:
+                return 0
+            group_sums: dict[str, list[int]] = {}
+            for video in all_counts:
+                label = filename_group_key(video, pattern)
+                sums = group_sums.setdefault(label, [0, 0])
+                for identity_count in all_counts[video].values():
+                    sums[0] += identity_count["fragmented_frame_counts"][0]
+                    sums[1] += identity_count["fragmented_frame_counts"][1]
+            for behavior_sum, not_behavior_sum in group_sums.values():
+                if (
+                    behavior_sum >= Classifier.LABEL_THRESHOLD
+                    and not_behavior_sum >= Classifier.LABEL_THRESHOLD
+                ):
+                    group_count += 1
         else:
             raise ValueError(f"Unknown cv_grouping_strategy: {cv_grouping_strategy}")
         return group_count
@@ -365,6 +389,7 @@ def label_threshold_met(
         all_counts: dict,
         min_groups: int,
         cv_grouping_strategy: CrossValidationGroupingStrategy = DEFAULT_CV_GROUPING_STRATEGY,
+        cv_grouping_regex: str | None = None,
     ) -> bool:
         """Determine whether the labeling threshold is met.
 
@@ -372,11 +397,15 @@ def label_threshold_met(
             all_counts: Labeled frame and bout counts for the entire project.
             min_groups: Minimum number of groups required.
             cv_grouping_strategy: Cross-validation grouping strategy.
+            cv_grouping_regex: Regex used for ``FILENAME_PATTERN`` grouping (see
+                :meth:`count_label_threshold`).
 
         Returns:
             True if there are enough groups meeting the threshold.
         """
         group_count = Classifier.count_label_threshold(
-            all_counts, cv_grouping_strategy=cv_grouping_strategy
+            all_counts,
+            cv_grouping_strategy=cv_grouping_strategy,
+            cv_grouping_regex=cv_grouping_regex,
         )
         return 1 < group_count >= min_groups
diff --git a/src/jabs/classifier/cross_validation.py b/src/jabs/classifier/cross_validation.py
index d015334a..6ea2caeb 100644
--- a/src/jabs/classifier/cross_validation.py
+++ b/src/jabs/classifier/cross_validation.py
@@ -125,7 +125,15 @@ def _train_multiclass_fold(
 
 
 def _test_label_from_group(test_info: dict) -> str:
-    """Render a CV test-group label for the report (video name + optional identity)."""
+    """Render a CV test-group label for the report.
+
+    Filename-pattern groups carry a ``label`` (the regex-extracted key, e.g.
+    ``"cage_1234"``); otherwise the label is the video name plus an optional
+    identity.
+    """
+    label = test_info.get("label")
+    if label is not None:
+        return label
     if test_info["identity"] is not None:
         return f"{test_info['video']} [{test_info['identity']}]"
     return test_info["video"]
diff --git a/src/jabs/classifier/multi_class_classifier.py b/src/jabs/classifier/multi_class_classifier.py
index b1e2783b..cb28153d 100644
--- a/src/jabs/classifier/multi_class_classifier.py
+++ b/src/jabs/classifier/multi_class_classifier.py
@@ -17,6 +17,8 @@
     DEFAULT_CV_GROUPING_STRATEGY,
     ClassifierType,
     CrossValidationGroupingStrategy,
+    compile_grouping_regex,
+    filename_group_key,
 )
 from jabs.core.utils import hash_file
 from jabs.project import load_multiclass_training_data
@@ -423,6 +425,7 @@ def count_label_threshold(
         counts_by_behavior: dict[str, dict],
         behavior_names: list[str],
         cv_grouping_strategy: CrossValidationGroupingStrategy = DEFAULT_CV_GROUPING_STRATEGY,
+        cv_grouping_regex: str | None = None,
     ) -> int:
         """Count multi-class LOGO groups that satisfy the relaxed acceptance rule.
 
@@ -439,6 +442,9 @@ def count_label_threshold(
                 ``counts_by_behavior``. Typically includes
                 ``MULTICLASS_NONE_BEHAVIOR``.
             cv_grouping_strategy: Cross-validation grouping strategy.
+            cv_grouping_regex: Regex used to extract a grouping key from each video
+                filename when ``cv_grouping_strategy`` is ``FILENAME_PATTERN``. An
+                empty or invalid regex yields a count of 0 (no trainable groups).
 
         Returns:
             Number of groups that can serve as a valid multi-class LOGO test split.
@@ -450,13 +456,30 @@ def count_label_threshold(
         if not behavior_names:
             return 0
 
+        # FILENAME_PATTERN aggregates like VIDEO grouping, but keys each group by
+        # the regex-extracted filename key instead of the video name (so several
+        # videos can share one group). An empty/invalid regex means no groups.
+        pattern = None
+        if cv_grouping_strategy == CrossValidationGroupingStrategy.FILENAME_PATTERN:
+            try:
+                pattern = compile_grouping_regex(cv_grouping_regex or "")
+            except ValueError:
+                return 0
+
         threshold = MultiClassClassifier.LABEL_THRESHOLD
         group_class_counts: dict[tuple[str, int] | str, dict[str, int]] = {}
         for behavior_name in behavior_names:
             behavior_counts = counts_by_behavior.get(behavior_name, {})
             for video_name, video_counts in behavior_counts.items():
-                if cv_grouping_strategy == CrossValidationGroupingStrategy.VIDEO:
-                    key: tuple[str, int] | str = video_name
+                if cv_grouping_strategy in (
+                    CrossValidationGroupingStrategy.VIDEO,
+                    CrossValidationGroupingStrategy.FILENAME_PATTERN,
+                ):
+                    key: tuple[str, int] | str = (
+                        filename_group_key(video_name, pattern)
+                        if pattern is not None
+                        else video_name
+                    )
                     group_entry = group_class_counts.setdefault(key, {})
                     group_entry[behavior_name] = group_entry.get(behavior_name, 0) + sum(
                         identity_counts["fragmented_frame_counts"][0]
@@ -498,6 +521,7 @@ def label_threshold_met(
         behavior_names: list[str],
         min_groups: int,
         cv_grouping_strategy: CrossValidationGroupingStrategy = DEFAULT_CV_GROUPING_STRATEGY,
+        cv_grouping_regex: str | None = None,
     ) -> bool:
         """Determine whether multi-class labels support ``min_groups`` LOGO splits.
 
@@ -511,6 +535,8 @@ class names are supplied.
                 at 1, since multi-class training requires at least one valid
                 split.
             cv_grouping_strategy: Cross-validation grouping strategy.
+            cv_grouping_regex: Regex used for ``FILENAME_PATTERN`` grouping (see
+                :meth:`count_label_threshold`).
 
         Returns:
             True if the count of valid splits meets ``max(1, min_groups)``.
@@ -521,5 +547,6 @@ class names are supplied.
             counts_by_behavior=counts_by_behavior,
             behavior_names=behavior_names,
             cv_grouping_strategy=cv_grouping_strategy,
+            cv_grouping_regex=cv_grouping_regex,
         )
         return valid_splits >= max(1, min_groups)
diff --git a/src/jabs/classifier/training_report.py b/src/jabs/classifier/training_report.py
index b14dd60f..948ce8ba 100644
--- a/src/jabs/classifier/training_report.py
+++ b/src/jabs/classifier/training_report.py
@@ -105,6 +105,8 @@ class TrainingReportData:
         training_time_ms: Total training time in milliseconds.
         timestamp: Datetime when training was completed.
         cv_grouping_strategy: Strategy used for cross-validation grouping.
+        cv_grouping_regex: Filename-pattern regex used for grouping. Only set when
+            the grouping strategy is "Filename Pattern".
     """
 
     behavior_name: str
@@ -124,6 +126,7 @@ class TrainingReportData:
     bouts_not_behavior: int = 0
     class_frame_counts: dict[str, int] | None = None
     class_bout_counts: dict[str, int] | None = None
+    cv_grouping_regex: str | None = None
 
 
 def _escape_markdown(text: str) -> str:
@@ -287,6 +290,8 @@ def generate_markdown_report(data: TrainingReportData) -> str:
 
         lines.append("### Iteration Details")
         lines.append(f"CV Grouping Strategy: {data.cv_grouping_strategy.value}")
+        if data.cv_grouping_regex:
+            lines.append(f"CV Grouping Pattern: `{data.cv_grouping_regex}`")
         lines.append("")
         lines.append(_format_iteration_table(data.cv_results))
         lines.append("")
@@ -410,6 +415,7 @@ def generate_json_report(data: TrainingReportData) -> dict:
         "training_time_ms": int(data.training_time_ms),
         "timestamp": timestamp_str,
         "cv_grouping_strategy": data.cv_grouping_strategy.value,
+        "cv_grouping_regex": data.cv_grouping_regex,
         "frames_behavior": int(data.frames_behavior),
         "frames_not_behavior": int(data.frames_not_behavior),
         "bouts_behavior": int(data.bouts_behavior),
diff --git a/src/jabs/project/export_training.py b/src/jabs/project/export_training.py
index 4ecaa032..577950f7 100644
--- a/src/jabs/project/export_training.py
+++ b/src/jabs/project/export_training.py
@@ -20,6 +20,34 @@
     from jabs.project import Project
 
 
+def _write_group_mapping(
+    out_h5: h5py.File, group_mapping: dict[int, dict], string_type: np.dtype
+) -> None:
+    """Write the cross-validation group mapping into the exported HDF5 file.
+
+    For each group, stores an ``identity`` (the animal identity, or ``-1`` when the
+    group is not identity-specific) and a ``video_name``. ``identity`` is ``-1`` for
+    ``VIDEO`` and ``FILENAME_PATTERN`` grouping. For ``FILENAME_PATTERN`` groups,
+    ``video_name`` holds the regex-extracted group label (e.g. ``"cage_1234"``)
+    rather than a single filename, since one group can span multiple videos.
+
+    Args:
+        out_h5: Open HDF5 file to write into.
+        group_mapping: Mapping of group id to its source descriptor.
+        string_type: h5py variable-length string dtype for the video_name dataset.
+    """
+    for group, info in group_mapping.items():
+        identity_dset = out_h5.create_dataset(
+            f"group_mapping/{group}/identity", (1,), dtype=np.int64
+        )
+        identity = info["identity"]
+        identity_dset[:] = identity if identity is not None else -1
+        video_name_dset = out_h5.create_dataset(
+            f"group_mapping/{group}/video_name", (1,), dtype=string_type
+        )
+        video_name_dset[:] = info["label"] if info.get("label") is not None else info["video"]
+
+
 def export_training_data(
     project: "Project",
     behavior: str,
@@ -76,15 +104,7 @@ def export_training_data(
         out_h5.create_dataset("label", data=features["labels"])
 
         # store the video/identity to group mapping in the h5 file
-        # identity is None when VIDEO grouping strategy is used; store -1 as a sentinel
-        for group in group_mapping:
-            dset = out_h5.create_dataset(f"group_mapping/{group}/identity", (1,), dtype=np.int64)
-            identity = group_mapping[group]["identity"]
-            dset[:] = identity if identity is not None else -1
-            dset = out_h5.create_dataset(
-                f"group_mapping/{group}/video_name", (1,), dtype=string_type
-            )
-            dset[:] = group_mapping[group]["video"]
+        _write_group_mapping(out_h5, group_mapping, string_type)
 
     # return output path, so if it was generated automatically the caller
     # will know
@@ -159,14 +179,7 @@ def export_training_data_multiclass(
 
         out_h5.create_dataset("group", data=features["groups"])
 
-        for group in group_mapping:
-            dset = out_h5.create_dataset(f"group_mapping/{group}/identity", (1,), dtype=np.int64)
-            identity = group_mapping[group]["identity"]
-            dset[:] = identity if identity is not None else -1
-            dset = out_h5.create_dataset(
-                f"group_mapping/{group}/video_name", (1,), dtype=string_type
-            )
-            dset[:] = group_mapping[group]["video"]
+        _write_group_mapping(out_h5, group_mapping, string_type)
 
     return out_file
 
diff --git a/src/jabs/project/project.py b/src/jabs/project/project.py
index a6746842..f25faad5 100644
--- a/src/jabs/project/project.py
+++ b/src/jabs/project/project.py
@@ -23,6 +23,8 @@
     ClassifierMode,
     CrossValidationGroupingStrategy,
     ProjectDistanceUnit,
+    compile_grouping_regex,
+    filename_group_key,
 )
 from jabs.pose_estimation import (
     PoseEstimation,
@@ -879,6 +881,7 @@ def _assign_cv_group_ids(
         all_group_keys: list[tuple[str, int]],
         videos: list[str],
         grouping_strategy: CrossValidationGroupingStrategy,
+        regex: str | None = None,
     ) -> tuple[dict[tuple[str, int], int], dict[int, dict]]:
         """Assign deterministic cross-validation group ids.
 
@@ -886,12 +889,25 @@ def _assign_cv_group_ids(
             all_group_keys: ``(video, identity)`` tuples in row order.
             videos: Canonical list of project videos; ids are assigned in this order.
             grouping_strategy: ``INDIVIDUAL`` groups one (video, identity) pair per
-                gid; ``VIDEO`` groups all identities of a video together.
+                gid; ``VIDEO`` groups all identities of a video together;
+                ``FILENAME_PATTERN`` groups all videos (and their identities) whose
+                filename yields the same key under ``regex``.
+            regex: Regular expression used to extract a grouping key from each
+                video filename. Required for ``FILENAME_PATTERN`` grouping; ignored
+                otherwise.
 
         Returns:
             Tuple of ``(key_to_gid, group_mapping)`` where ``key_to_gid`` maps each
             ``(video, identity)`` pair to its group id and ``group_mapping`` maps
-            each group id back to ``{"video": ..., "identity": ...}``.
+            each group id back to its source. ``INDIVIDUAL``/``VIDEO`` entries are
+            ``{"video": ..., "identity": ...}``; ``FILENAME_PATTERN`` entries are
+            ``{"video": None, "identity": None, "label": <key>, "videos": [...]}``
+            where ``videos`` lists the labeled videos in the group.
+
+        Raises:
+            ValueError: If ``grouping_strategy`` is ``FILENAME_PATTERN`` and
+                ``regex`` is empty or not a valid regular expression, or if the
+                strategy is unknown.
         """
         key_to_gid: dict[tuple[str, int], int] = {}
         group_mapping: dict[int, dict] = {}
@@ -918,6 +934,29 @@ def _assign_cv_group_ids(
                 for video_name, ident in all_group_keys:
                     if video_name == v:
                         key_to_gid[(v, ident)] = video_to_gid[v]
+        elif grouping_strategy == CrossValidationGroupingStrategy.FILENAME_PATTERN:
+            pattern = compile_grouping_regex(regex or "")
+            label_to_gid: dict[str, int] = {}
+            # Group ids are created lazily in row order (which follows canonical
+            # video order). Videos whose filename does not match the pattern fall
+            # back to their own group, since filename_group_key returns the
+            # (unique) filename as the key.
+            for video_name, ident in all_group_keys:
+                label = filename_group_key(video_name, pattern)
+                if label not in label_to_gid:
+                    label_to_gid[label] = gid
+                    group_mapping[gid] = {
+                        "video": None,
+                        "identity": None,
+                        "label": label,
+                        "videos": [],
+                    }
+                    gid += 1
+                group_gid = label_to_gid[label]
+                key_to_gid[(video_name, ident)] = group_gid
+                videos_in_group = group_mapping[group_gid]["videos"]
+                if video_name not in videos_in_group:
+                    videos_in_group.append(video_name)
         else:
             raise ValueError(f"Unknown grouping strategy: {grouping_strategy}")
         return key_to_gid, group_mapping
@@ -936,21 +975,32 @@ def _build_groups_array(
         return np.concatenate(groups_list) if groups_list else np.array([], dtype=np.int32)
 
     def _excluded_group_ids(self, group_mapping: dict[int, dict]) -> set[int]:
-        """Return CV group ids whose source video is excluded from training.
+        """Return CV group ids whose source video(s) are excluded from training.
 
         Args:
-            group_mapping: Mapping of group id to ``{"video": ..., "identity": ...}``.
+            group_mapping: Mapping of group id to its source. ``INDIVIDUAL``/``VIDEO``
+                groups carry a single ``"video"``; ``FILENAME_PATTERN`` groups carry
+                a ``"videos"`` list (one group can span several videos).
 
         Returns:
-            Set of group ids belonging to videos marked excluded from training.
-            These groups are still eligible as the held-out test group in
-            leave-one-group-out cross-validation but are never used for training.
+            Set of group ids whose constituent videos are all marked excluded from
+            training. These groups are still eligible as the held-out test group in
+            leave-one-group-out cross-validation but are never used for training. A
+            filename-pattern group is excluded only when *every* labeled video in it
+            is excluded, so a partially-excluded group still contributes its
+            non-excluded videos' data to training folds.
         """
-        return {
-            gid
-            for gid, info in group_mapping.items()
-            if self._settings_manager.is_video_excluded(info["video"])
-        }
+        excluded: set[int] = set()
+        for gid, info in group_mapping.items():
+            group_videos = info.get("videos")
+            if group_videos is None:
+                video = info.get("video")
+                group_videos = [video] if video is not None else []
+            if group_videos and all(
+                self._settings_manager.is_video_excluded(v) for v in group_videos
+            ):
+                excluded.add(gid)
+        return excluded
 
     def get_labeled_features(
         self,
@@ -958,6 +1008,7 @@ def get_labeled_features(
         progress_callable: Callable[[], None] | None = None,
         should_terminate_callable: Callable[[], None] | None = None,
         grouping_strategy: CrossValidationGroupingStrategy | None = None,
+        grouping_regex: str | None = None,
     ) -> tuple[dict, dict]:
         """Get labeled features for training (parallel per-video).
 
@@ -977,6 +1028,8 @@ def get_labeled_features(
                 and as results complete; it should raise a `ThreadTerminatedError` if the user
                 has requested early termination.
             grouping_strategy: Optional override for cross-validation grouping strategy. If None, uses project settings.
+            grouping_regex: Optional override for the filename-pattern grouping regex
+                (only used when the strategy is ``FILENAME_PATTERN``). If None, uses project settings.
 
         Returns:
             tuple[dict, dict]: A tuple of (features, group_mapping).
@@ -989,13 +1042,16 @@ def get_labeled_features(
 
                 The values in the first dict are suitable for `Classifier.leave_one_group_out()`.
 
-                The second dict maps group ids to their source:
+                The second dict maps group ids to their source (the exact shape
+                depends on the grouping strategy; see ``_assign_cv_group_ids``):
                     { <group id>: {'video': <video filename>, 'identity': <identity>}, ... }
         """
         behavior_settings = self._settings_manager.get_behavior(behavior)
         videos = list(self._video_manager.videos)
         if grouping_strategy is None:
             grouping_strategy = self.settings_manager.cv_grouping_strategy
+        if grouping_regex is None:
+            grouping_regex = self.settings_manager.cv_grouping_regex
 
         if not videos:
             return {
@@ -1046,7 +1102,7 @@ def get_labeled_features(
             }, {}
 
         key_to_gid, group_mapping = self._assign_cv_group_ids(
-            all_group_keys, videos, grouping_strategy
+            all_group_keys, videos, grouping_strategy, grouping_regex
         )
         groups = self._build_groups_array(all_group_keys, all_per_frame, key_to_gid)
         excluded_groups = self._excluded_group_ids(group_mapping)
@@ -1075,6 +1131,7 @@ def get_multiclass_labeled_features(
         should_terminate_callable: Callable[[], None] | None = None,
         grouping_strategy: CrossValidationGroupingStrategy | None = None,
         behavior_settings: dict[str, object] | None = None,
+        grouping_regex: str | None = None,
     ) -> tuple[dict, dict]:
         """Get multiclass-labeled features for training (parallel per-video).
 
@@ -1090,6 +1147,8 @@ def get_multiclass_labeled_features(
                 If None, uses project settings.
             behavior_settings: Feature-extraction settings (must include ``window_size``).
                 If None, falls back to ``get_project_defaults()``.
+            grouping_regex: Optional override for the filename-pattern grouping regex
+                (only used when the strategy is ``FILENAME_PATTERN``). If None, uses project settings.
 
         Returns:
             tuple[dict, dict]: A tuple of ``(features, group_mapping)``.
@@ -1106,6 +1165,8 @@ def get_multiclass_labeled_features(
         videos = list(self._video_manager.videos)
         if grouping_strategy is None:
             grouping_strategy = self.settings_manager.cv_grouping_strategy
+        if grouping_regex is None:
+            grouping_regex = self.settings_manager.cv_grouping_regex
 
         if not videos:
             return {
@@ -1179,7 +1240,7 @@ def get_multiclass_labeled_features(
             }, {}
 
         key_to_gid, group_mapping = self._assign_cv_group_ids(
-            all_group_keys, videos, grouping_strategy
+            all_group_keys, videos, grouping_strategy, grouping_regex
         )
         groups = self._build_groups_array(all_group_keys, all_per_frame, key_to_gid)
         excluded_groups = self._excluded_group_ids(group_mapping)
diff --git a/src/jabs/project/settings_manager.py b/src/jabs/project/settings_manager.py
index 410acf85..2b636bad 100644
--- a/src/jabs/project/settings_manager.py
+++ b/src/jabs/project/settings_manager.py
@@ -3,7 +3,7 @@
 import typing
 
 import jabs.feature_extraction as feature_extraction
-from jabs.core.constants import CLASSIFIER_MODE_KEY, CV_GROUPING_KEY
+from jabs.core.constants import CLASSIFIER_MODE_KEY, CV_GROUPING_KEY, CV_GROUPING_REGEX_KEY
 from jabs.core.enums.classifier_mode import DEFAULT_CLASSIFIER_MODE, ClassifierMode
 from jabs.core.enums.cv_grouping import (
     DEFAULT_CV_GROUPING_STRATEGY,
@@ -129,6 +129,18 @@ def cv_grouping_strategy(self) -> CrossValidationGroupingStrategy:
             )
             return DEFAULT_CV_GROUPING_STRATEGY
 
+    @property
+    def cv_grouping_regex(self) -> str:
+        """Get the filename-pattern regex used for cross-validation grouping.
+
+        Only meaningful when :attr:`cv_grouping_strategy` is
+        ``CrossValidationGroupingStrategy.FILENAME_PATTERN``.
+
+        Returns:
+            The configured regular expression, or an empty string if none is set.
+        """
+        return self._project_info.get("settings", {}).get(CV_GROUPING_REGEX_KEY, "")
+
     def video_metadata(self, video: str) -> dict:
         """Get metadata for a specific video.
 
diff --git a/src/jabs/scripts/cli/cross_validation.py b/src/jabs/scripts/cli/cross_validation.py
index a15a7aa7..52175114 100644
--- a/src/jabs/scripts/cli/cross_validation.py
+++ b/src/jabs/scripts/cli/cross_validation.py
@@ -194,6 +194,7 @@ def progress_callback():
         timestamp=report_timestamp,
         window_size=behavior_settings["window_size"],
         cv_grouping_strategy=project.settings_manager.cv_grouping_strategy,
+        cv_grouping_regex=project.settings_manager.cv_grouping_regex,
     )
 
     # Save markdown report
diff --git a/src/jabs/ui/main_window/central_widget.py b/src/jabs/ui/main_window/central_widget.py
index 36680879..d0f5168a 100644
--- a/src/jabs/ui/main_window/central_widget.py
+++ b/src/jabs/ui/main_window/central_widget.py
@@ -963,12 +963,14 @@ def _train_button_clicked(self) -> None:
                     counts_by_behavior=counts_by_behavior,
                     behavior_names=behavior_names,
                     cv_grouping_strategy=self._project.settings_manager.cv_grouping_strategy,
+                    cv_grouping_regex=self._project.settings_manager.cv_grouping_regex,
                 )
             else:
                 project_counts = self._project.counts(self._controls.current_behavior)
                 total_steps += self._classifier.count_label_threshold(
                     project_counts,
                     cv_grouping_strategy=self._project.settings_manager.cv_grouping_strategy,
+                    cv_grouping_regex=self._project.settings_manager.cv_grouping_regex,
                 )
         else:
             total_steps += self._controls.kfold_value
@@ -1447,12 +1449,14 @@ def set_train_button_enabled_state(self) -> None:
                 behavior_names=behavior_names,
                 min_groups=min_groups,
                 cv_grouping_strategy=self._project.settings_manager.cv_grouping_strategy,
+                cv_grouping_regex=self._project.settings_manager.cv_grouping_regex,
             )
         else:
             threshold_met = Classifier.label_threshold_met(
                 self._included_counts(self._counts),
                 self._controls.kfold_value,
                 self._project.settings_manager.cv_grouping_strategy,
+                cv_grouping_regex=self._project.settings_manager.cv_grouping_regex,
             )
 
         if threshold_met:
diff --git a/src/jabs/ui/settings_dialog/cross_validation_settings_group.py b/src/jabs/ui/settings_dialog/cross_validation_settings_group.py
index c10890a9..c7d80011 100644
--- a/src/jabs/ui/settings_dialog/cross_validation_settings_group.py
+++ b/src/jabs/ui/settings_dialog/cross_validation_settings_group.py
@@ -1,13 +1,29 @@
 """Cross-validation settings group for configuring model training and validation."""
 
-from PySide6.QtCore import Qt
-from PySide6.QtWidgets import QComboBox, QLabel, QSizePolicy
+import html
+import re
 
-from jabs.core.constants import CV_GROUPING_KEY
-from jabs.core.enums import DEFAULT_CV_GROUPING_STRATEGY, CrossValidationGroupingStrategy
+from PySide6.QtCore import Qt, QTimer
+from PySide6.QtWidgets import QComboBox, QLabel, QLineEdit, QSizePolicy
 
+from jabs.core.constants import CV_GROUPING_KEY, CV_GROUPING_REGEX_KEY
+from jabs.core.enums import (
+    DEFAULT_CV_GROUPING_STRATEGY,
+    CrossValidationGroupingStrategy,
+    filename_group_key,
+)
+
+from .collapsible_section import CollapsibleSection
 from .settings_group import SettingsGroup
 
+# Debounce delay (ms) before recomputing the filename-pattern preview while typing.
+_PREVIEW_DEBOUNCE_MS = 200
+
+
+def _count_phrase(count: int, noun: str) -> str:
+    """Return ``"<count> <noun>"`` with a naive plural ``s`` suffix."""
+    return f"{count} {noun}" if count == 1 else f"{count} {noun}s"
+
 
 class CrossValidationSettingsGroup(SettingsGroup):
     """
@@ -16,8 +32,21 @@ class CrossValidationSettingsGroup(SettingsGroup):
     This group controls how data is split during model training and validation.
     """
 
-    def __init__(self, parent=None):
-        """Initialize the cross-validation settings group."""
+    def __init__(
+        self,
+        videos: list[tuple[str, bool]] | None = None,
+        parent=None,
+    ) -> None:
+        """Initialize the cross-validation settings group.
+
+        Args:
+            videos: ``(video_filename, is_excluded)`` pairs for every video in the
+                project, used to render the filename-pattern grouping preview. When
+                omitted, the preview is empty (the rest of the group still works).
+            parent: Parent widget.
+        """
+        # Stored before super().__init__ so it is available to _create_controls().
+        self._video_entries: list[tuple[str, bool]] = list(videos) if videos else []
         super().__init__("Cross-Validation", parent)
 
     def _create_controls(self) -> None:
@@ -31,9 +60,176 @@ def _create_controls(self) -> None:
             self._cv_grouping_combo.findData(DEFAULT_CV_GROUPING_STRATEGY)
         )
         self._cv_grouping_combo.setSizeAdjustPolicy(QComboBox.SizeAdjustPolicy.AdjustToContents)
-
         self.add_control_row("CV Grouping:", self._cv_grouping_combo)
 
+        # Regex input, only relevant (and visible) for the "Filename Pattern" strategy.
+        self._cv_grouping_regex_input = QLineEdit()
+        self._cv_grouping_regex_input.setPlaceholderText(r"e.g. cage_(\d+)")
+        self._cv_grouping_regex_input.setMinimumWidth(220)
+        regex_row = self.add_control_row("Filename Pattern:", self._cv_grouping_regex_input)
+        regex_label_item = self._grid_layout.itemAtPosition(regex_row, 0)
+        self._regex_label = regex_label_item.widget() if regex_label_item is not None else None
+
+        # Inline validation message shown below the input when the regex is invalid.
+        self._regex_error_label = QLabel()
+        self._regex_error_label.setWordWrap(True)
+        self._regex_error_label.setStyleSheet("color: #c0392b;")
+        self.add_widget_row(self._regex_error_label)
+
+        # Live preview of how project videos partition into groups under the pattern.
+        self._preview_summary_label = QLabel()
+        self._preview_summary_label.setWordWrap(True)
+        self._preview_summary_label.setStyleSheet("color: #555;")
+        self.add_widget_row(self._preview_summary_label)
+
+        self._preview_detail = QLabel()
+        self._preview_detail.setTextFormat(Qt.TextFormat.RichText)
+        self._preview_detail.setWordWrap(True)
+        self._preview_detail.setSizePolicy(
+            QSizePolicy.Policy.Preferred, QSizePolicy.Policy.Preferred
+        )
+        self._preview_section = CollapsibleSection("Preview groups", self._preview_detail, self)
+        self._preview_section.sizeChanged.connect(self._relayout_preview)
+        self.add_widget_row(self._preview_section)
+
+        # Debounce preview recomputation while the user is typing a pattern.
+        self._preview_timer = QTimer(self)
+        self._preview_timer.setSingleShot(True)
+        self._preview_timer.setInterval(_PREVIEW_DEBOUNCE_MS)
+        self._preview_timer.timeout.connect(self._refresh_preview)
+
+        self._cv_grouping_combo.currentIndexChanged.connect(self._update_regex_row_visibility)
+        self._cv_grouping_regex_input.textChanged.connect(self._update_regex_validation)
+        self._cv_grouping_regex_input.textChanged.connect(self._preview_timer.start)
+
+        self._update_regex_row_visibility()
+
+    def _is_filename_pattern_selected(self) -> bool:
+        """Return True if the "Filename Pattern" grouping strategy is selected."""
+        return (
+            self._cv_grouping_combo.currentData()
+            == CrossValidationGroupingStrategy.FILENAME_PATTERN
+        )
+
+    def _update_regex_row_visibility(self) -> None:
+        """Show the regex input only when the filename-pattern strategy is selected."""
+        visible = self._is_filename_pattern_selected()
+        if self._regex_label is not None:
+            self._regex_label.setVisible(visible)
+        self._cv_grouping_regex_input.setVisible(visible)
+        self._update_regex_validation()
+        self._refresh_preview()
+        # Notify the layout so the group grows/shrinks as the rows appear/hide.
+        self.updateGeometry()
+
+    def _update_regex_validation(self) -> None:
+        """Refresh the inline regex validation message."""
+        message = ""
+        if self._is_filename_pattern_selected():
+            regex = self._cv_grouping_regex_input.text().strip()
+            if not regex:
+                message = "Enter a regular expression to group videos by filename."
+            else:
+                try:
+                    re.compile(regex)
+                except re.error as e:
+                    message = f"Invalid regular expression: {e}"
+        self._regex_error_label.setText(message)
+        self._regex_error_label.setVisible(bool(message))
+
+    def _compiled_preview_pattern(self) -> re.Pattern[str] | None:
+        """Return the compiled pattern to preview, or None when not applicable.
+
+        Returns None when the filename-pattern strategy is not selected or the
+        current regex is empty or invalid (the inline error already covers those).
+        """
+        if not self._is_filename_pattern_selected():
+            return None
+        regex = self._cv_grouping_regex_input.text().strip()
+        if not regex:
+            return None
+        try:
+            return re.compile(regex)
+        except re.error:
+            return None
+
+    def _refresh_preview(self) -> None:
+        """Recompute and render the filename-pattern grouping preview."""
+        self._preview_timer.stop()
+        pattern = self._compiled_preview_pattern()
+        if pattern is None:
+            self._preview_summary_label.setVisible(False)
+            self._preview_section.setVisible(False)
+            return
+
+        matched: dict[str, list[tuple[str, bool]]] = {}
+        unmatched: list[tuple[str, bool]] = []
+        for name, excluded in self._video_entries:
+            if pattern.search(name) is None:
+                unmatched.append((name, excluded))
+            else:
+                key = filename_group_key(name, pattern)
+                matched.setdefault(key, []).append((name, excluded))
+
+        n_videos = len(self._video_entries)
+        # Each unmatched video forms its own group, so it counts toward the total.
+        n_groups = len(matched) + len(unmatched)
+
+        if n_videos == 0:
+            self._preview_summary_label.setText("No videos in the project to preview.")
+            self._preview_summary_label.setVisible(True)
+            self._preview_section.setVisible(False)
+            return
+
+        summary = f"{_count_phrase(n_videos, 'video')} → {_count_phrase(n_groups, 'group')}"
+        if unmatched:
+            summary += f" ({_count_phrase(len(unmatched), 'unmatched video')})"
+        self._preview_summary_label.setText(summary)
+        self._preview_summary_label.setVisible(True)
+
+        self._preview_detail.setText(self._render_preview_detail(matched, unmatched))
+        self._preview_section.setVisible(True)
+
+    @staticmethod
+    def _render_video(name: str, excluded: bool) -> str:
+        """Render one video filename for the preview, marking excluded videos."""
+        safe = html.escape(name)
+        if excluded:
+            return f'{safe} <span style="color:#888;">(excluded)</span>'
+        return safe
+
+    def _render_preview_detail(
+        self,
+        matched: dict[str, list[tuple[str, bool]]],
+        unmatched: list[tuple[str, bool]],
+    ) -> str:
+        """Build the rich-text breakdown of groups and their member videos."""
+        lines: list[str] = []
+        for key in sorted(matched):
+            members = ", ".join(self._render_video(name, excl) for name, excl in matched[key])
+            lines.append(f"<b>{html.escape(key)}</b> &rarr; {members}")
+        if unmatched:
+            members = ", ".join(self._render_video(name, excl) for name, excl in unmatched)
+            lines.append(f"<i>unmatched (each its own group)</i>: {members}")
+        return "<br>".join(lines)
+
+    def _relayout_preview(self) -> None:
+        """Grow/shrink the group (and the dialog page) when the preview is toggled."""
+        self._preview_detail.adjustSize()
+        self._preview_section.adjustSize()
+        self.adjustSize()
+        parent = self.parentWidget()
+        if parent is not None:
+            parent.adjustSize()
+            parent_layout = parent.layout()
+            if parent_layout is not None:
+                parent_layout.activate()
+            parent.adjustSize()
+            dialog = self._find_parent_dialog()
+            if dialog is not None and hasattr(dialog, "_sync_page_width"):
+                # Defer so the scrollbar settles before the width is synced.
+                QTimer.singleShot(0, dialog._sync_page_width)
+
     def _create_documentation(self) -> QLabel:
         """Create help documentation for cross-validation settings."""
         help_label = QLabel(self)
@@ -42,25 +238,38 @@ def _create_documentation(self) -> QLabel:
         help_label.setText(
             """
             <h3>What is Cross-Validation Grouping?</h3>
-            <p>Cross-validation grouping determines how training data is split when 
+            <p>Cross-validation grouping determines how training data is split when
             evaluating model performance using leave-one-group-out cross-validation.</p>
-            
+
             <ul>
-              <li><b>Individual Animal:</b> Each group represents a single animal identity 
-              within a single video. During cross-validation, all labeled data for one 
-              animal from one video is held out for validation while the remaining animals' 
+              <li><b>Individual Animal:</b> Each group represents a single animal identity
+              within a single video. During cross-validation, all labeled data for one
+              animal from one video is held out for validation while the remaining animals'
               data is used for training.</li>
-              
-              <li><b>Video:</b> Each group represents a single video recording. During 
-              cross-validation, all labeled data from one video is held out for validation 
+
+              <li><b>Video:</b> Each group represents a single video recording. During
+              cross-validation, all labeled data from one video is held out for validation
               while data from other videos is used for training.</li>
+
+              <li><b>Filename Pattern:</b> Each group is defined by a regular expression
+              applied to the video filename. All videos whose filenames produce the same
+              key are placed in the same group, letting you group videos by an identifier
+              embedded in their names (for example, a cage ID). If the pattern contains a
+              capture group, the captured text is used as the key; otherwise the entire
+              match is used. Videos that do not match the pattern are each placed in their
+              own group.<br>
+              <b>Example - group by cage ID:</b> if your videos are named like
+              <tt>cage_0042_2026-06-16.mp4</tt>, the pattern <tt>cage_(\\d+)</tt> extracts
+              the cage number (<tt>0042</tt>), so every video recorded from the same cage
+              forms a single cross-validation group.</li>
             </ul>
-            
-            <p><b>Note:</b> For cross-validation to work properly, you need labeled data 
-            from multiple groups (multiple animals or multiple videos, depending on the 
-            grouping method selected). For rare behaviors, it may be easier to meet the 
-            minimum label requirements per group at the video level rather than at the 
-            individual animal level within a single video.</p>
+
+            <p><b>Note:</b> For cross-validation to work properly, you need labeled data
+            from multiple groups (multiple animals, videos, or filename-pattern groups,
+            depending on the grouping method selected). For rare behaviors, it may be
+            easier to meet the minimum label requirements per group at the video or
+            filename-pattern level rather than at the individual animal level within a
+            single video.</p>
             """
         )
         help_label.setSizePolicy(QSizePolicy.Policy.Preferred, QSizePolicy.Policy.Preferred)
@@ -73,9 +282,11 @@ def get_values(self) -> dict:
         Returns:
             Dictionary with setting names and their current values.
         """
-        # Return the enum, not just the string
+        # Return the enum, not just the string. The regex is always returned (even
+        # for non-pattern strategies) so a previously entered pattern is preserved.
         return {
             CV_GROUPING_KEY: self._cv_grouping_combo.currentData(),
+            CV_GROUPING_REGEX_KEY: self._cv_grouping_regex_input.text().strip(),
         }
 
     def set_values(self, values: dict) -> None:
@@ -102,3 +313,25 @@ def set_values(self, values: dict) -> None:
             self._cv_grouping_combo.setCurrentIndex(
                 self._cv_grouping_combo.findData(CrossValidationGroupingStrategy.INDIVIDUAL)
             )
+
+        regex = values.get(CV_GROUPING_REGEX_KEY, "")
+        self._cv_grouping_regex_input.setText(regex if isinstance(regex, str) else "")
+        self._update_regex_row_visibility()
+
+    def validate(self) -> str | None:
+        """Validate the filename-pattern regex when that strategy is selected.
+
+        Returns:
+            An error message if the "Filename Pattern" strategy is selected with an
+            empty or invalid regular expression; otherwise None.
+        """
+        if not self._is_filename_pattern_selected():
+            return None
+        regex = self._cv_grouping_regex_input.text().strip()
+        if not regex:
+            return "Filename Pattern cross-validation grouping requires a regular expression."
+        try:
+            re.compile(regex)
+        except re.error as e:
+            return f"The filename grouping pattern is not a valid regular expression: {e}"
+        return None
diff --git a/src/jabs/ui/settings_dialog/settings_dialog.py b/src/jabs/ui/settings_dialog/settings_dialog.py
index 0894a8e8..bdb14ea1 100644
--- a/src/jabs/ui/settings_dialog/settings_dialog.py
+++ b/src/jabs/ui/settings_dialog/settings_dialog.py
@@ -214,6 +214,20 @@ def _load_settings(self) -> None:
         for group in self._settings_groups:
             group.set_values(current_settings)
 
+    def _validate_all_groups(self) -> bool:
+        """Validate every settings group, warning on the first invalid one.
+
+        Returns:
+            True if all groups validated successfully; False if a group reported an
+            error (a warning dialog has been shown and saving should abort).
+        """
+        for group in self._settings_groups:
+            error = group.validate()
+            if error:
+                MessageDialog.warning(self, message=error)
+                return False
+        return True
+
     def _on_save(self) -> None:
         """Save settings from all groups to project and close dialog.
 
@@ -223,6 +237,8 @@ def _on_save(self) -> None:
         """
         if self._settings_manager is None:
             raise RuntimeError("Settings manager is not set for this dialog.")
+        if not self._validate_all_groups():
+            return
         # Collect settings from all groups
         all_settings = {}
         for group in self._settings_groups:
@@ -269,7 +285,11 @@ def _create_settings_groups(self, parent: QWidget) -> None:
         """
         mode_group = ClassifierModeSettingsGroup(parent)
         self._settings_groups.append(mode_group)
-        cv_group = CrossValidationSettingsGroup(parent)
+        cv_videos = [
+            (video, self._settings_manager.is_video_excluded(video))
+            for video in self._project.video_manager.videos
+        ]
+        cv_group = CrossValidationSettingsGroup(videos=cv_videos, parent=parent)
         self._settings_groups.append(cv_group)
         cache_format_group = CacheFormatSettingsGroup(parent)
         self._settings_groups.append(cache_format_group)
@@ -282,6 +302,9 @@ def _on_save(self) -> None:
         - Multi-class -> Binary: warns that None labels will be inactive but preserved.
         - No mode change: saves immediately.
         """
+        if not self._validate_all_groups():
+            return
+
         all_new_settings: dict = {}
         for group in self._settings_groups:
             all_new_settings.update(group.get_values())
diff --git a/src/jabs/ui/settings_dialog/settings_group.py b/src/jabs/ui/settings_dialog/settings_group.py
index 274b547c..63459274 100644
--- a/src/jabs/ui/settings_dialog/settings_group.py
+++ b/src/jabs/ui/settings_dialog/settings_group.py
@@ -274,3 +274,15 @@ def set_values(self, values: dict) -> None:
             values: Dictionary mapping setting names to values.
         """
         pass
+
+    def validate(self) -> str | None:
+        """Validate the current control values before they are saved.
+
+        Subclasses can override this to block saving when input is invalid (for
+        example, a malformed regular expression).
+
+        Returns:
+            A human-readable error message describing the problem, or ``None`` when
+            the current values are valid.
+        """
+        return None
diff --git a/src/jabs/ui/training_strategy.py b/src/jabs/ui/training_strategy.py
index b0a002f0..de092003 100644
--- a/src/jabs/ui/training_strategy.py
+++ b/src/jabs/ui/training_strategy.py
@@ -98,6 +98,7 @@ def build_report_data(
         cv_grouping_strategy: CrossValidationGroupingStrategy,
         distance_unit: str,
         settings: dict,
+        cv_grouping_regex: str | None = None,
     ) -> TrainingReportData:
         """Assemble the ``TrainingReportData`` for the trained model."""
         raise NotImplementedError
@@ -174,6 +175,7 @@ def build_report_data(
         cv_grouping_strategy: CrossValidationGroupingStrategy,
         distance_unit: str,
         settings: dict,
+        cv_grouping_regex: str | None = None,
     ) -> TrainingReportData:
         """Build the binary-mode training report with frame and bout counts.
 
@@ -202,6 +204,7 @@ def build_report_data(
             timestamp=timestamp,
             window_size=settings["window_size"],
             cv_grouping_strategy=cv_grouping_strategy,
+            cv_grouping_regex=cv_grouping_regex,
         )
 
     def cv_secondary_metric(self, cv_results: list[CrossValidationResult]) -> float | None:
@@ -286,6 +289,7 @@ def build_report_data(
         cv_grouping_strategy: CrossValidationGroupingStrategy,
         distance_unit: str,
         settings: dict,
+        cv_grouping_regex: str | None = None,
     ) -> TrainingReportData:
         """Build the multi-class training report with per-class frame and bout counts.
 
@@ -330,6 +334,7 @@ def build_report_data(
             timestamp=timestamp,
             window_size=settings.get("window_size", 0),
             cv_grouping_strategy=cv_grouping_strategy,
+            cv_grouping_regex=cv_grouping_regex,
             class_frame_counts=class_frame_counts,
             class_bout_counts=class_bout_counts,
         )
diff --git a/src/jabs/ui/training_thread.py b/src/jabs/ui/training_thread.py
index 7f40e32a..8ad7d452 100644
--- a/src/jabs/ui/training_thread.py
+++ b/src/jabs/ui/training_thread.py
@@ -166,6 +166,7 @@ def id_processed() -> None:
                 elapsed_ms=elapsed_ms,
                 timestamp=datetime.now(),
                 cv_grouping_strategy=self._project.settings_manager.cv_grouping_strategy,
+                cv_grouping_regex=self._project.settings_manager.cv_grouping_regex,
                 distance_unit=unit,
                 settings=settings,
             )
diff --git a/tests/classifier/test_classifier.py b/tests/classifier/test_classifier.py
index 4922d9de..f5a733bd 100644
--- a/tests/classifier/test_classifier.py
+++ b/tests/classifier/test_classifier.py
@@ -686,6 +686,37 @@ def test_count_label_threshold(self):
         # Only first identity meets threshold (>= 20)
         assert count == 1
 
+    def test_count_label_threshold_filename_pattern(self):
+        """Filename-pattern grouping aggregates videos that share a regex key."""
+        all_counts = {
+            "cage_1_day1.avi": {0: {"fragmented_frame_counts": (10, 10)}},
+            "cage_1_day2.avi": {0: {"fragmented_frame_counts": (15, 15)}},
+            "cage_2_day1.avi": {0: {"fragmented_frame_counts": (25, 25)}},
+            "cage_3_day1.avi": {0: {"fragmented_frame_counts": (5, 5)}},
+        }
+        # cage 1: 25/25 summed across two videos -> meets threshold
+        # cage 2: 25/25 -> meets threshold
+        # cage 3: 5/5 -> below threshold
+        count = Classifier.count_label_threshold(
+            all_counts,
+            cv_grouping_strategy=CrossValidationGroupingStrategy.FILENAME_PATTERN,
+            cv_grouping_regex=r"cage_(\d+)",
+        )
+        assert count == 2
+
+    def test_count_label_threshold_filename_pattern_invalid_regex_returns_zero(self):
+        """An empty or invalid filename-pattern regex yields no trainable groups."""
+        all_counts = {"cage_1.avi": {0: {"fragmented_frame_counts": (25, 25)}}}
+        for bad_regex in ("", "cage_("):
+            assert (
+                Classifier.count_label_threshold(
+                    all_counts,
+                    cv_grouping_strategy=CrossValidationGroupingStrategy.FILENAME_PATTERN,
+                    cv_grouping_regex=bad_regex,
+                )
+                == 0
+            )
+
     def test_label_threshold_met(self):
         """Test checking if label threshold is met."""
         all_counts = {
@@ -754,6 +785,26 @@ def test_label_threshold_met(self):
             min_groups=3,
             cv_grouping_strategy=CrossValidationGroupingStrategy.VIDEO,
         )
+
+    def test_label_threshold_met_filename_pattern(self):
+        """Filename-pattern grouping enables training when enough cages qualify."""
+        counts = {
+            "cage_1_day1.avi": {0: {"fragmented_frame_counts": (25, 25)}},
+            "cage_2_day1.avi": {0: {"fragmented_frame_counts": (25, 25)}},
+        }
+        # Two cages meet the threshold -> two CV groups available.
+        assert Classifier.label_threshold_met(
+            counts,
+            min_groups=2,
+            cv_grouping_strategy=CrossValidationGroupingStrategy.FILENAME_PATTERN,
+            cv_grouping_regex=r"cage_(\d+)",
+        )
+        assert not Classifier.label_threshold_met(
+            counts,
+            min_groups=3,
+            cv_grouping_strategy=CrossValidationGroupingStrategy.FILENAME_PATTERN,
+            cv_grouping_regex=r"cage_(\d+)",
+        )
         # VIDEO: One video below threshold, one above
         multi_video_counts_below = {
             "video1.avi": {
diff --git a/tests/classifier/test_multi_class_classifier.py b/tests/classifier/test_multi_class_classifier.py
index eb9d629e..86a42d31 100644
--- a/tests/classifier/test_multi_class_classifier.py
+++ b/tests/classifier/test_multi_class_classifier.py
@@ -714,6 +714,54 @@ def test_count_label_threshold_video_grouping(self) -> None:
 
         assert valid == 1
 
+    def test_count_label_threshold_filename_pattern(self) -> None:
+        """Filename-pattern grouping merges videos sharing a key before validity checks."""
+        counts_by_behavior = {
+            "None": {
+                "cage_1_day1.avi": {0: {"fragmented_frame_counts": (20, 0)}},
+                "cage_1_day2.avi": {0: {"fragmented_frame_counts": (0, 0)}},
+                "cage_2_day1.avi": {0: {"fragmented_frame_counts": (20, 0)}},
+            },
+            "Walk": {
+                "cage_1_day1.avi": {0: {"fragmented_frame_counts": (20, 0)}},
+                "cage_1_day2.avi": {0: {"fragmented_frame_counts": (0, 0)}},
+                "cage_2_day1.avi": {0: {"fragmented_frame_counts": (20, 0)}},
+            },
+            "Run": {
+                "cage_1_day1.avi": {0: {"fragmented_frame_counts": (10, 0)}},
+                "cage_1_day2.avi": {0: {"fragmented_frame_counts": (10, 0)}},
+                "cage_2_day1.avi": {0: {"fragmented_frame_counts": (20, 0)}},
+            },
+        }
+
+        valid = MultiClassClassifier.count_label_threshold(
+            counts_by_behavior=counts_by_behavior,
+            behavior_names=["None", "Walk", "Run"],
+            cv_grouping_strategy=CrossValidationGroupingStrategy.FILENAME_PATTERN,
+            cv_grouping_regex=r"cage_(\d+)",
+        )
+
+        # cage 1 (merged across two videos): None=20, Walk=20, Run=20.
+        # cage 2: None=20, Walk=20, Run=20. Each cage is a valid test split.
+        assert valid == 2
+
+    def test_count_label_threshold_filename_pattern_invalid_regex_returns_zero(self) -> None:
+        """An empty or invalid filename-pattern regex yields no valid splits."""
+        counts_by_behavior = {
+            "None": {"cage_1.avi": {0: {"fragmented_frame_counts": (20, 0)}}},
+            "Walk": {"cage_1.avi": {0: {"fragmented_frame_counts": (20, 0)}}},
+        }
+        for bad_regex in ("", "cage_("):
+            assert (
+                MultiClassClassifier.count_label_threshold(
+                    counts_by_behavior=counts_by_behavior,
+                    behavior_names=["None", "Walk"],
+                    cv_grouping_strategy=CrossValidationGroupingStrategy.FILENAME_PATTERN,
+                    cv_grouping_regex=bad_regex,
+                )
+                == 0
+            )
+
     def test_count_label_threshold_empty_behavior_names_returns_zero(self) -> None:
         """No behaviors → no valid splits."""
         assert (
diff --git a/tests/project/test_cv_grouping.py b/tests/project/test_cv_grouping.py
new file mode 100644
index 00000000..65067907
--- /dev/null
+++ b/tests/project/test_cv_grouping.py
@@ -0,0 +1,106 @@
+"""Tests for Project._assign_cv_group_ids cross-validation group assignment."""
+
+import pytest
+
+from jabs.core.enums import CrossValidationGroupingStrategy
+from jabs.project.project import Project
+
+
+def test_assign_cv_group_ids_filename_pattern_groups_videos_by_key() -> None:
+    """Videos whose filenames share a regex key are merged into one CV group."""
+    videos = ["cage_1_a.mp4", "cage_1_b.mp4", "cage_2_a.mp4", "loose.mp4"]
+    all_group_keys = [
+        ("cage_1_a.mp4", 0),
+        ("cage_1_a.mp4", 1),
+        ("cage_1_b.mp4", 0),
+        ("cage_2_a.mp4", 0),
+        ("loose.mp4", 0),
+    ]
+
+    key_to_gid, group_mapping = Project._assign_cv_group_ids(
+        all_group_keys,
+        videos,
+        CrossValidationGroupingStrategy.FILENAME_PATTERN,
+        regex=r"cage_(\d+)",
+    )
+
+    # All identities of cage 1 (across both videos) share one group id.
+    cage1_gid = key_to_gid[("cage_1_a.mp4", 0)]
+    assert key_to_gid[("cage_1_a.mp4", 1)] == cage1_gid
+    assert key_to_gid[("cage_1_b.mp4", 0)] == cage1_gid
+
+    # Cage 2 and the unmatched video are each their own group.
+    cage2_gid = key_to_gid[("cage_2_a.mp4", 0)]
+    loose_gid = key_to_gid[("loose.mp4", 0)]
+    assert len({cage1_gid, cage2_gid, loose_gid}) == 3
+
+    assert group_mapping[cage1_gid]["label"] == "1"
+    assert group_mapping[cage1_gid]["videos"] == ["cage_1_a.mp4", "cage_1_b.mp4"]
+    assert group_mapping[cage1_gid]["video"] is None
+    assert group_mapping[cage1_gid]["identity"] is None
+
+    # An unmatched filename forms its own group keyed by the filename itself.
+    assert group_mapping[loose_gid]["label"] == "loose.mp4"
+    assert group_mapping[loose_gid]["videos"] == ["loose.mp4"]
+
+
+def test_assign_cv_group_ids_filename_pattern_ids_are_contiguous() -> None:
+    """Group ids are assigned contiguously from zero in row order."""
+    videos = ["cage_2_x.mp4", "cage_1_y.mp4"]
+    all_group_keys = [("cage_2_x.mp4", 0), ("cage_1_y.mp4", 0)]
+
+    key_to_gid, group_mapping = Project._assign_cv_group_ids(
+        all_group_keys,
+        videos,
+        CrossValidationGroupingStrategy.FILENAME_PATTERN,
+        regex=r"cage_(\d+)",
+    )
+
+    assert sorted(group_mapping) == [0, 1]
+    # First key encountered ("cage_2_x") gets gid 0.
+    assert key_to_gid[("cage_2_x.mp4", 0)] == 0
+    assert group_mapping[0]["label"] == "2"
+
+
+def test_assign_cv_group_ids_filename_pattern_requires_regex() -> None:
+    """Filename-pattern grouping with an empty regex raises ValueError."""
+    with pytest.raises(ValueError, match="non-empty"):
+        Project._assign_cv_group_ids(
+            [("cage_1_a.mp4", 0)],
+            ["cage_1_a.mp4"],
+            CrossValidationGroupingStrategy.FILENAME_PATTERN,
+            regex="",
+        )
+
+
+def test_assign_cv_group_ids_video_grouping_unchanged() -> None:
+    """Regression: VIDEO grouping still assigns one group per video."""
+    videos = ["video_a.mp4", "video_b.mp4"]
+    all_group_keys = [("video_a.mp4", 0), ("video_a.mp4", 1), ("video_b.mp4", 0)]
+
+    key_to_gid, group_mapping = Project._assign_cv_group_ids(
+        all_group_keys, videos, CrossValidationGroupingStrategy.VIDEO
+    )
+
+    assert key_to_gid[("video_a.mp4", 0)] == key_to_gid[("video_a.mp4", 1)]
+    assert key_to_gid[("video_a.mp4", 0)] != key_to_gid[("video_b.mp4", 0)]
+    assert group_mapping[key_to_gid[("video_a.mp4", 0)]] == {
+        "video": "video_a.mp4",
+        "identity": None,
+    }
+
+
+def test_assign_cv_group_ids_individual_grouping_unchanged() -> None:
+    """Regression: INDIVIDUAL grouping assigns one group per (video, identity)."""
+    videos = ["video_a.mp4"]
+    all_group_keys = [("video_a.mp4", 0), ("video_a.mp4", 1)]
+
+    key_to_gid, group_mapping = Project._assign_cv_group_ids(
+        all_group_keys, videos, CrossValidationGroupingStrategy.INDIVIDUAL
+    )
+
+    assert key_to_gid[("video_a.mp4", 0)] != key_to_gid[("video_a.mp4", 1)]
+    assert group_mapping[key_to_gid[("video_a.mp4", 1)]] == {
+        "video": "video_a.mp4",
+        "identity": 1,
+    }
diff --git a/tests/project/test_settings_manager.py b/tests/project/test_settings_manager.py
index dc724863..bc39542b 100644
--- a/tests/project/test_settings_manager.py
+++ b/tests/project/test_settings_manager.py
@@ -233,3 +233,22 @@ def test_set_video_excluded_toggle_back_to_included(mock_project):
     settings_manager.set_video_excluded("video1.avi", False)
 
     assert settings_manager.is_video_excluded("video1.avi") is False
+
+
+def test_cv_grouping_regex_defaults_to_empty(mock_project):
+    """cv_grouping_regex returns an empty string when not configured."""
+    with mock_project.project_paths.project_file.open("w") as f:
+        json.dump({}, f)
+
+    settings_manager = SettingsManager(mock_project.project_paths)
+    assert settings_manager.cv_grouping_regex == ""
+
+
+def test_cv_grouping_regex_reads_configured_value(mock_project):
+    """cv_grouping_regex returns the value stored under project settings."""
+    settings = {"settings": {"cv_grouping_regex": r"cage_(\d+)"}}
+    with mock_project.project_paths.project_file.open("w") as f:
+        json.dump(settings, f)
+
+    settings_manager = SettingsManager(mock_project.project_paths)
+    assert settings_manager.cv_grouping_regex == r"cage_(\d+)"
diff --git a/tests/ui/_fakes.py b/tests/ui/_fakes.py
index 620188d9..95059381 100644
--- a/tests/ui/_fakes.py
+++ b/tests/ui/_fakes.py
@@ -156,6 +156,7 @@ def __init__(
         self.settings_manager = SimpleNamespace(
             classifier_mode=mode,
             cv_grouping_strategy=CrossValidationGroupingStrategy.INDIVIDUAL,
+            cv_grouping_regex="",
             get_behavior=lambda _behavior: dict(self._DEFAULT_BEHAVIOR_SETTINGS),
             is_video_excluded=lambda _video: False,
         )
diff --git a/tests/ui/test_settings_dialog.py b/tests/ui/test_settings_dialog.py
index 381885fa..e413d5a2 100644
--- a/tests/ui/test_settings_dialog.py
+++ b/tests/ui/test_settings_dialog.py
@@ -3,8 +3,8 @@
 
 import pytest
 
-from jabs.core.constants import CLASSIFIER_MODE_KEY
-from jabs.core.enums import ClassifierMode
+from jabs.core.constants import CLASSIFIER_MODE_KEY, CV_GROUPING_KEY, CV_GROUPING_REGEX_KEY
+from jabs.core.enums import ClassifierMode, CrossValidationGroupingStrategy
 
 try:
     from PySide6.QtWidgets import QApplication
@@ -12,6 +12,9 @@
     from jabs.ui.settings_dialog.classifier_mode_settings_group import (
         ClassifierModeSettingsGroup,
     )
+    from jabs.ui.settings_dialog.cross_validation_settings_group import (
+        CrossValidationSettingsGroup,
+    )
     from jabs.ui.settings_dialog.settings_dialog import _OverlapCheckThread
 
     SKIP_UI_TESTS = False
@@ -81,3 +84,146 @@ def test_classifier_mode_group_roundtrips_enum_not_label() -> None:
 
     group.set_values({CLASSIFIER_MODE_KEY: ClassifierMode.BINARY.value})
     assert group.get_values() == {CLASSIFIER_MODE_KEY: ClassifierMode.BINARY}
+
+
+def test_cv_grouping_group_roundtrips_strategy_and_regex() -> None:
+    """Set/get round-trips the grouping strategy enum and the filename regex."""
+    group = CrossValidationSettingsGroup()
+
+    group.set_values(
+        {
+            CV_GROUPING_KEY: CrossValidationGroupingStrategy.FILENAME_PATTERN.value,
+            CV_GROUPING_REGEX_KEY: r"cage_(\d+)",
+        }
+    )
+    values = group.get_values()
+    assert values[CV_GROUPING_KEY] == CrossValidationGroupingStrategy.FILENAME_PATTERN
+    assert values[CV_GROUPING_REGEX_KEY] == r"cage_(\d+)"
+
+
+def test_cv_grouping_group_validate_blocks_empty_pattern() -> None:
+    """The Filename Pattern strategy with no regex fails validation."""
+    group = CrossValidationSettingsGroup()
+    group.set_values(
+        {
+            CV_GROUPING_KEY: CrossValidationGroupingStrategy.FILENAME_PATTERN.value,
+            CV_GROUPING_REGEX_KEY: "",
+        }
+    )
+    assert group.validate() is not None
+
+
+def test_cv_grouping_group_validate_blocks_invalid_pattern() -> None:
+    """The Filename Pattern strategy with an invalid regex fails validation."""
+    group = CrossValidationSettingsGroup()
+    group.set_values(
+        {
+            CV_GROUPING_KEY: CrossValidationGroupingStrategy.FILENAME_PATTERN.value,
+            CV_GROUPING_REGEX_KEY: "cage_(",
+        }
+    )
+    assert group.validate() is not None
+
+
+def test_cv_grouping_group_validate_passes_for_valid_pattern() -> None:
+    """A valid Filename Pattern regex passes validation."""
+    group = CrossValidationSettingsGroup()
+    group.set_values(
+        {
+            CV_GROUPING_KEY: CrossValidationGroupingStrategy.FILENAME_PATTERN.value,
+            CV_GROUPING_REGEX_KEY: r"cage_(\d+)",
+        }
+    )
+    assert group.validate() is None
+
+
+def test_cv_grouping_group_validate_ignores_regex_for_other_strategies() -> None:
+    """Non-pattern strategies do not validate the regex field, even if it is invalid."""
+    group = CrossValidationSettingsGroup()
+    group.set_values(
+        {
+            CV_GROUPING_KEY: CrossValidationGroupingStrategy.VIDEO.value,
+            CV_GROUPING_REGEX_KEY: "cage_(",  # invalid, but ignored for VIDEO grouping
+        }
+    )
+    assert group.validate() is None
+
+
+_PREVIEW_VIDEOS = [
+    ("cage_0042_day1.mp4", False),
+    ("cage_0042_day2.mp4", True),  # excluded from training
+    ("cage_0043_day1.mp4", False),
+    ("calibration.mp4", False),  # does not match cage_(\d+)
+]
+
+
+def _select_filename_pattern(group, regex: str) -> None:
+    group.set_values(
+        {
+            CV_GROUPING_KEY: CrossValidationGroupingStrategy.FILENAME_PATTERN.value,
+            CV_GROUPING_REGEX_KEY: regex,
+        }
+    )
+
+
+def test_cv_grouping_preview_summarizes_groups_and_unmatched() -> None:
+    """The preview summary counts videos, groups, and unmatched files."""
+    group = CrossValidationSettingsGroup(videos=_PREVIEW_VIDEOS)
+    _select_filename_pattern(group, r"cage_(\d+)")
+
+    summary = group._preview_summary_label.text()
+    # 4 videos -> cage 0042, cage 0043, plus the unmatched calibration video = 3 groups.
+    assert "4 videos" in summary
+    assert "3 groups" in summary
+    assert "1 unmatched video" in summary
+    assert not group._preview_summary_label.isHidden()
+    assert not group._preview_section.isHidden()
+
+
+def test_cv_grouping_preview_lists_groups_and_marks_excluded() -> None:
+    """The breakdown lists group keys, members, and marks excluded videos."""
+    group = CrossValidationSettingsGroup(videos=_PREVIEW_VIDEOS)
+    _select_filename_pattern(group, r"cage_(\d+)")
+
+    detail = group._preview_detail.text()
+    assert "0042" in detail
+    assert "0043" in detail
+    assert "calibration.mp4" in detail
+    assert "unmatched" in detail
+    # The excluded video is annotated; non-excluded videos are not.
+    assert "(excluded)" in detail
+    assert detail.count("(excluded)") == 1
+
+
+@pytest.mark.parametrize("regex", ["", "cage_("], ids=["empty", "invalid"])
+def test_cv_grouping_preview_hidden_for_empty_or_invalid_regex(regex) -> None:
+    """No preview is shown when the regex is empty or does not compile."""
+    group = CrossValidationSettingsGroup(videos=_PREVIEW_VIDEOS)
+    _select_filename_pattern(group, regex)
+
+    assert group._preview_summary_label.isHidden()
+    assert group._preview_section.isHidden()
+
+
+def test_cv_grouping_preview_hidden_for_non_pattern_strategy() -> None:
+    """No preview is shown for the Video or Individual Animal strategies."""
+    group = CrossValidationSettingsGroup(videos=_PREVIEW_VIDEOS)
+    group.set_values(
+        {
+            CV_GROUPING_KEY: CrossValidationGroupingStrategy.VIDEO.value,
+            CV_GROUPING_REGEX_KEY: r"cage_(\d+)",
+        }
+    )
+
+    assert group._preview_summary_label.isHidden()
+    assert group._preview_section.isHidden()
+
+
+def test_cv_grouping_preview_handles_no_videos() -> None:
+    """With no project videos, the preview reports that there is nothing to show."""
+    group = CrossValidationSettingsGroup(videos=[])
+    _select_filename_pattern(group, r"cage_(\d+)")
+
+    assert "No videos" in group._preview_summary_label.text()
+    assert not group._preview_summary_label.isHidden()
+    assert group._preview_section.isHidden()

From 03150f5fa7f7590dd60ac1d335df98311a4e32df Mon Sep 17 00:00:00 2001
From: Glen Beane <356266+gbeane@users.noreply.github.com>
Date: Tue, 16 Jun 2026 16:06:22 -0400
Subject: [PATCH 2/2] Document filename pattern CV grouping in user guide

---
 docs/user-guide/gui.md                    | 12 +++++++++++-
 src/jabs/resources/docs/user_guide/gui.md | 12 +++++++++++-
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/docs/user-guide/gui.md b/docs/user-guide/gui.md
index 19058412..54d46694 100644
--- a/docs/user-guide/gui.md
+++ b/docs/user-guide/gui.md
@@ -161,10 +161,20 @@ Project settings are saved within the project directory and apply only to the cu
 
 | Setting                        | Description                                                      |
 |-------------------------------|------------------------------------------------------------------|
-| Cross Validation Grouping      | Determines how cross-validation groups are defined. Options are "Individual Animal" (default) or "Video". |
+| Cross Validation Grouping      | Determines how cross-validation groups are defined. Options are "Individual Animal" (default), "Video", or "Filename Pattern". See [Cross-Validation Grouping](#cross-validation-grouping) below. |
 
 As new settings are added, they will appear in this dialog with inline documentation.
 
+### Cross-Validation Grouping
+
+The **Cross Validation Grouping** setting controls how labeled data is partitioned into groups for leave-one-group-out cross-validation:
+
+- **Individual Animal** (default): each group is a single animal identity within a single video.
+- **Video**: each group is a single video; all identities within a video are held out together.
+- **Filename Pattern**: groups are defined by a regular expression applied to each video's filename. All videos whose filenames produce the same key are placed in the same group, which is useful for grouping videos by an identifier embedded in their names (for example, a cage ID). If the pattern contains a capture group, the captured text is used as the key; otherwise the entire match is used. Videos that do not match the pattern are each placed in their own group.
+
+When you select **Filename Pattern**, a text field appears for the regular expression. For example, if your videos are named like `cage_0042_2026-06-16.mp4`, the pattern `cage_(\d+)` extracts the cage number (`0042`) so that every video recorded from the same cage forms a single cross-validation group. A live preview below the field shows how your project's videos partition into groups under the current pattern (videos excluded from training are marked), so you can confirm the pattern before saving.
+
 
 ## Overlays
 
diff --git a/src/jabs/resources/docs/user_guide/gui.md b/src/jabs/resources/docs/user_guide/gui.md
index aec7c611..ca61b770 100644
--- a/src/jabs/resources/docs/user_guide/gui.md
+++ b/src/jabs/resources/docs/user_guide/gui.md
@@ -161,10 +161,20 @@ Project settings are saved within the project directory and apply only to the cu
 
 | Setting                        | Description                                                      |
 |-------------------------------|------------------------------------------------------------------|
-| Cross Validation Grouping      | Determines how cross-validation groups are defined. Options are "Individual Animal" (default) or "Video". |
+| Cross Validation Grouping      | Determines how cross-validation groups are defined. Options are "Individual Animal" (default), "Video", or "Filename Pattern". See [Cross-Validation Grouping](#cross-validation-grouping) below. |
 
 As new settings are added, they will appear in this dialog with inline documentation.
 
+### Cross-Validation Grouping
+
+The **Cross Validation Grouping** setting controls how labeled data is partitioned into groups for leave-one-group-out cross-validation:
+
+- **Individual Animal** (default): each group is a single animal identity within a single video.
+- **Video**: each group is a single video; all identities within a video are held out together.
+- **Filename Pattern**: groups are defined by a regular expression applied to each video's filename. All videos whose filenames produce the same key are placed in the same group, which is useful for grouping videos by an identifier embedded in their names (for example, a cage ID). If the pattern contains a capture group, the captured text is used as the key; otherwise the entire match is used. Videos that do not match the pattern are each placed in their own group.
+
+When you select **Filename Pattern**, a text field appears for the regular expression. For example, if your videos are named like `cage_0042_2026-06-16.mp4`, the pattern `cage_(\d+)` extracts the cage number (`0042`) so that every video recorded from the same cage forms a single cross-validation group. A live preview below the field shows how your project's videos partition into groups under the current pattern (videos excluded from training are marked), so you can confirm the pattern before saving.
+
 
 ## Overlays