MaterializeInc · jasonhernandez · May 29, 2026 · May 29, 2026
diff --git a/misc/python/materialize/cli/mz_workload_anonymize.py b/misc/python/materialize/cli/mz_workload_anonymize.py
@@ -263,6 +263,16 @@ def main() -> int:
         help="After anonymizing, scan the output for surviving original identifiers and "
         "non-anonymized string literals, and refuse to write if any are found.",
     )
+    parser.add_argument(
+        "--require-parser",
+        action=argparse.BooleanOptionalAction,
+        default=True,
+        help="Require the mz-sql-anonymize parser for query literal redaction "
+        "(the default). With --no-require-parser, fall back to a weaker regex "
+        "that only redacts single-quoted strings (missing numbers, dollar-quoted "
+        "strings, and comments) when the parser binary is unavailable or a "
+        "statement does not parse.",
+    )
 
     parser.add_argument(
         "file",
@@ -272,6 +282,20 @@ def main() -> int:
 
     args = parser.parse_args()
 
+    # Resolve the output target up front so an invalid invocation fails before
+    # any work (and before the parser-availability check below).
+    if args.output:
+        output = args.output
+    elif args.in_place:
+        output = args.file
+    else:
+        print(
+            "error: specify an output with -o/--output (use '-' for stdout) "
+            "or pass --in-place to overwrite the input file",
+            file=sys.stderr,
+        )
+        return 1
+
     with open(args.file) as f:
         workload = yaml.load(f, Loader=yaml.CSafeLoader)
 
@@ -555,22 +579,47 @@ def replace_literals(d: dict[str, Any], entry: str) -> None:
     # Redact literals in query SQL with Materialize's own parser, in one batch.
     # The parser handles every literal form the dialect supports (numbers, hex
     # strings, intervals, dollar-quoted and escape strings) where the regex only
-    # caught single-quoted strings. Fall back to the regex per-statement when
-    # the helper binary is unavailable or cannot parse a given statement.
+    # caught single-quoted strings.
+    #
+    # --require-parser gates only the wholesale case: if the parser binary is
+    # unavailable, the tool errors rather than silently redacting every query
+    # with the weaker regex. Individual statements that do not parse fall back
+    # to the regex with a warning either way (this is a property of the captured
+    # SQL, not of whether the parser is present), and the verify pass still
+    # scans the result.
     if query_literal_targets:
         sqls = [q["sql"] for q in query_literal_targets]
         redacted = redact_literals_via_parser(sqls)
         if redacted is None:
+            if args.require_parser:
+                print(
+                    "error: mz-sql-anonymize helper not found, so query literals "
+                    "cannot be redacted with the parser. Build it with:\n"
+                    "    cargo build --release -p mz-sql-anonymize\n"
+                    "or pass --no-require-parser to fall back to a weaker regex "
+                    "that only redacts single-quoted strings (missing numbers, "
+                    "dollar-quoted strings, and comments).",
+                    file=sys.stderr,
+                )
+                return 1
             print(
                 "warning: mz-sql-anonymize helper not found; using regex literal "
                 "redaction for queries, which misses numbers, dollar-quoted "
-                "strings, and comments. Build it for exact redaction:\n"
-                "    cargo build --release -p mz-sql-anonymize",
+                "strings, and comments (--no-require-parser).",
                 file=sys.stderr,
             )
             for q in query_literal_targets:
                 q["sql"] = anonymize_literals_in_sql(q["sql"])
         else:
+            unparsed = [i for i, red in enumerate(redacted) if red is None]
+            if unparsed:
+                print(
+                    f"warning: mz-sql-anonymize could not parse {len(unparsed)} of "
+                    f"{len(redacted)} captured queries; falling back to the regex "
+                    "for those (it only redacts single-quoted strings). The verify "
+                    "pass still scans them.",
+                    file=sys.stderr,
+                )
             for q, red in zip(query_literal_targets, redacted):
                 q["sql"] = (
                     red if red is not None else anonymize_literals_in_sql(q["sql"])
@@ -588,18 +637,6 @@ def replace_literals(d: dict[str, Any], entry: str) -> None:
                 print(f"  {problem}", file=sys.stderr)
             return 1
 
-    if args.output:
-        output = args.output
-    elif args.in_place:
-        output = args.file
-    else:
-        print(
-            "error: specify an output with -o/--output (use '-' for stdout) "
-            "or pass --in-place to overwrite the input file",
-            file=sys.stderr,
-        )
-        return 1
-
     if output == "-":
         yaml.dump(new, sys.stdout, Dumper=yaml.CSafeDumper)
     else:

diff --git a/misc/python/materialize/cli/mz_workload_anonymize_test.py b/misc/python/materialize/cli/mz_workload_anonymize_test.py
@@ -93,8 +93,14 @@ def run_tool(
     workload: dict[str, Any],
     *extra_args: str,
     in_place: bool = False,
+    require_parser: bool = False,
 ) -> tuple[int, dict[str, Any] | None, str]:
-    """Run main() against a workload, returning (exit_code, output, dumped_text)."""
+    """Run main() against a workload, returning (exit_code, output, dumped_text).
+
+    Defaults to --no-require-parser so tests are deterministic whether or not
+    the mz-sql-anonymize binary is built; tests that exercise the parser
+    requirement pass require_parser=True.
+    """
     inp = tmp_path / "workload.yml"
     inp.write_text(yaml.safe_dump(workload))
     argv = ["mz-workload-anonymize", str(inp)]
@@ -104,6 +110,8 @@ def run_tool(
     else:
         out = tmp_path / "out.yml"
         argv += ["-o", str(out)]
+    if not require_parser:
+        argv.append("--no-require-parser")
     argv += list(extra_args)
 
     with mock.patch.object(sys, "argv", argv):
@@ -260,6 +268,19 @@ def test_regex_fallback_warns(
     assert "mz-sql-anonymize helper not found" in capsys.readouterr().err
 
 
+def test_require_parser_errors_without_binary(
+    tmp_path: Any, force_regex: None, capsys: pytest.CaptureFixture[str]
+) -> None:
+    # By default the parser is required: with no binary the tool must refuse to
+    # run rather than silently fall back to the weaker regex.
+    rc, out, _text = run_tool(tmp_path, base_workload(), require_parser=True)
+    assert rc == 1
+    assert out is None
+    err = capsys.readouterr().err
+    assert "mz-sql-anonymize" in err
+    assert "--no-require-parser" in err
+
+
 @pytest.mark.skipif(
     mz_workload_anonymize._locate_redactor() is None,
     reason="mz-sql-anonymize binary not built; run `cargo build --release -p mz-sql-anonymize`",

diff --git a/test/workload-replay/README.md b/test/workload-replay/README.md
@@ -80,14 +80,18 @@ Anonymizes identifiers and literals in workload captures for sharing without exp
 *Literals (`--literals`, enabled by default):*
 - Query SQL is redacted with Materialize's own parser (`mz-sql-anonymize`),
   replacing all literals — strings, numbers, hex strings, intervals — with
-  `'<REDACTED>'`. If the helper binary is not built, the tool falls back to a
-  regex that only catches single-quoted strings (and prints a warning).
+  `'<REDACTED>'`. **The parser binary is required by default**: if it is not
+  built, the tool errors instead of silently redacting every query with the
+  weaker regex. Pass `--no-require-parser` to allow that regex fallback (it
+  only catches single-quoted strings, missing numbers, dollar-quoted strings,
+  and comments). Individual statements that do not parse fall back to the regex
+  with a warning regardless, and the verify pass still scans them.
 - `create_sql` strings (including connection hosts/users, sink topics, source
   options, and column defaults) → `'literal_1'`, `'literal_2'`, ... via regex.
   The parser is not used here because `to_ast_string_redacted()` intentionally
   does not redact DDL option strings.
 
-For exact, parser-based query redaction, build the helper once:
+Build the helper once (required for the default `--require-parser` mode):
 ```bash
 cargo build --release -p mz-sql-anonymize
 ```
@@ -105,6 +109,7 @@ bin/mz-workload-anonymize <file> [OPTIONS]
 | `--identifiers` / `--no-identifiers` | Anonymize object names | enabled |
 | `--literals` / `--no-literals` | Anonymize literals | enabled |
 | `--verify` / `--no-verify` | Re-scan output for leaks and refuse to write if any are found | enabled |
+| `--require-parser` / `--no-require-parser` | Require the parser for query literals; error rather than fall back to the weaker regex | enabled |
 
 **Examples:**
 ```bash