From 786a6083eb25ab5e8db3d674fc04de01e6c7abec Mon Sep 17 00:00:00 2001
From: Thomas Chauchefoin <thomas.chauchefoin@trailofbits.com>
Date: Tue, 26 May 2026 00:58:42 +0200
Subject: [PATCH 1/2] Split shorten_code() formatting from dedup tracking
 (GHSA-cffv-grgg-g429)

AnalysisContext.shorten_code() mutated a shared reported_shortened_code
set as a side effect of formatting, even though most callers only wanted
the formatted string. It could then shadow findings from future analysis
runs.

For instance, UnsafeImportsML processed every import and would
leave MLAllowlist with an already-reported state for all of them.
Any import outside ML_ALLOWLIST would not be flagged by the second run.

Split shorten_code() into a pure formatter and an explicit mark_reported()
helper.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 fickling/analysis.py  | 42 ++++++++++++++++++++++++------------------
 fickling/ml.py        |  4 +---
 test/test_bypasses.py | 22 ++++++++++++++++++++++
 3 files changed, 47 insertions(+), 21 deletions(-)

diff --git a/fickling/analysis.py b/fickling/analysis.py
index d24afc8..a7ab43c 100644
--- a/fickling/analysis.py
+++ b/fickling/analysis.py
@@ -78,19 +78,25 @@ def analyze(self, analysis: Analysis) -> list[AnalysisResult]:
     def results(self) -> AnalysisResults:
         return AnalysisResults(pickled=self.pickled, results=self.previous_results)
 
-    def shorten_code(self, ast_node) -> tuple[str, bool]:
+    def shorten_code(self, ast_node) -> str:
+        """Return a short, human-readable form of an AST node for use in
+        analysis messages. Pure formatter — does not touch dedup state.
+        """
         code = unparse(ast_node).strip()
         if len(code) > 32:
             cutoff = code.find("(")
-            if code[cutoff] == "(":
-                shortened_code = f"{code[: code.find('(')].strip()}(...)"
-            else:
-                shortened_code = code
-        else:
-            shortened_code = code
-        was_already_reported = shortened_code in self.reported_shortened_code
-        self.reported_shortened_code.add(shortened_code)
-        return shortened_code, was_already_reported
+            if cutoff >= 0:
+                return f"{code[:cutoff].strip()}(...)"
+        return code
+
+    def mark_reported(self, shortened: str) -> bool:
+        """Mark a shortened code fragment as reported. Returns True if
+        this was the first mark, False if a prior call already marked it.
+        """
+        if shortened in self.reported_shortened_code:
+            return False
+        self.reported_shortened_code.add(shortened)
+        return True
 
 
 class Analyzer(metaclass=AnalyzerMeta):
@@ -255,8 +261,8 @@ def analyze(self, context: AnalysisContext) -> Iterator[AnalysisResult]:
 class NonStandardImports(Analysis):
     def analyze(self, context: AnalysisContext) -> Iterator[AnalysisResult]:
         for node in context.pickled.non_standard_imports():
-            shortened, already_reported = context.shorten_code(node)
-            if not already_reported:
+            shortened = context.shorten_code(node)
+            if context.mark_reported(shortened):
                 yield AnalysisResult(
                     Severity.LIKELY_UNSAFE,
                     f"`{shortened}` imports a Python module that is not a part of "
@@ -324,7 +330,7 @@ class UnsafeImportsML(Analysis):
 
     def analyze(self, context: AnalysisContext) -> Iterator[AnalysisResult]:
         for node in context.pickled.properties.imports:
-            shortened, _ = context.shorten_code(node)
+            shortened = context.shorten_code(node)
             all_modules = [
                 node.module.rsplit(".", i)[0] for i in range(0, node.module.count(".") + 1)
             ]
@@ -377,7 +383,7 @@ class BadCalls(Analysis):
 
     def analyze(self, context: AnalysisContext) -> Iterator[AnalysisResult]:
         for node in context.pickled.properties.calls:
-            shortened, _already_reported = context.shorten_code(node)
+            shortened = context.shorten_code(node)
             if any(shortened.startswith(f"{c}(") for c in self.BAD_CALLS):
                 yield AnalysisResult(
                     Severity.OVERTLY_MALICIOUS,
@@ -397,7 +403,7 @@ def analyze(self, context: AnalysisContext) -> Iterator[AnalysisResult]:
                 # if the call is to a constructor of an object imported from the Python
                 # standard library, it's probably okay
                 continue
-            shortened, already_reported = context.shorten_code(node)
+            shortened = context.shorten_code(node)
             if (
                 shortened.startswith("eval(")
                 or shortened.startswith("exec(")
@@ -413,7 +419,7 @@ def analyze(self, context: AnalysisContext) -> Iterator[AnalysisResult]:
                     "OvertlyBadEval",
                     trigger=shortened,
                 )
-            elif not already_reported:
+            elif context.mark_reported(shortened):
                 yield AnalysisResult(
                     Severity.LIKELY_UNSAFE,
                     f"Call to `{shortened}` can execute arbitrary code and is inherently unsafe",
@@ -429,7 +435,7 @@ def analyze(self, context: AnalysisContext) -> Iterator[AnalysisResult]:
                 n.name in SAFE_BUILTINS for n in node.names
             ):
                 continue
-            shortened, _ = context.shorten_code(node)
+            shortened = context.shorten_code(node)
             yield AnalysisResult(
                 Severity.LIKELY_OVERTLY_MALICIOUS,
                 f"`{shortened}` is suspicious and indicative of an overtly malicious pickle file",
@@ -447,7 +453,7 @@ def analyze(self, context: AnalysisContext) -> Iterator[AnalysisResult]:
             # Malformed pickle or resource exhaustion - dedicated analyses will report this
             return
         for varname, asmt in unused.items():
-            shortened, _ = context.shorten_code(asmt.value)
+            shortened = context.shorten_code(asmt.value)
             yield AnalysisResult(
                 Severity.SUSPICIOUS,
                 f"Variable `{varname}` is assigned value `{shortened}` but unused afterward; "
diff --git a/fickling/ml.py b/fickling/ml.py
index 12b415e..37c88c9 100644
--- a/fickling/ml.py
+++ b/fickling/ml.py
@@ -295,9 +295,7 @@ def __init__(self):
 
     def analyze(self, context: AnalysisContext) -> Iterator[AnalysisResult]:
         for node in context.pickled.properties.imports:
-            shortened, already_reported = context.shorten_code(node)
-            if already_reported:
-                continue
+            shortened = context.shorten_code(node)
 
             if isinstance(node, ast.ImportFrom):
                 # from module import x
diff --git a/test/test_bypasses.py b/test/test_bypasses.py
index 745e793..950a3cd 100644
--- a/test/test_bypasses.py
+++ b/test/test_bypasses.py
@@ -683,6 +683,28 @@ def test_missing_osx_support(self):
             "from _osx_support import _find_build_tool",
         )
 
+    # https://github.com/trailofbits/fickling/security/advisories/GHSA-cffv-grgg-g429
+    def test_ml_allowlist_not_shadowed_by_unsafe_imports_ml(self):
+        """MLAllowlist must flag imports outside ML_ALLOWLIST even when another
+        analysis (UnsafeImportsML) has already iterated the same import.
+        """
+        pickled = Pickled(
+            [
+                op.Proto.create(4),
+                op.ShortBinUnicode("ast"),
+                op.ShortBinUnicode("parse"),
+                op.StackGlobal(),
+                op.Stop(),
+            ]
+        )
+        res = check_safety(pickled)
+        self.assertGreater(res.severity, Severity.LIKELY_SAFE)
+        detailed = res.detailed_results().get("AnalysisResult", {})
+        self.assertIsNotNone(
+            detailed.get("MLAllowlist"),
+            "MLAllowlist did not produce a finding for `from ast import parse`",
+        )
+
 
 class TestUnsafeModuleCoverage(TestCase):
     """Verify every entry in UNSAFE_MODULES and UNSAFE_IMPORTS triggers detection."""

From f10380c2d7eaa9bf22350195348c622b8f7ce81e Mon Sep 17 00:00:00 2001
From: Thomas Chauchefoin <thomas.chauchefoin@trailofbits.com>
Date: Wed, 27 May 2026 16:35:27 +0200
Subject: [PATCH 2/2] Make MLAllowlist opt-in

Analysis.__init_subclass__ put every subclass into Analysis.ALL, and
fickling/__init__.py transitively imported fickling.ml via hook.py, wich
registered MLAllowlist.

The shorten_code() shadowing bug (GHSA-cffv-grgg-g429) made it dead code
until the previous commit, and now it conflicts with other default
analysis passes.

This commit makes MLAllowlist opt-in; use `check_safety(analyze=...)`
if you need it.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 fickling/analysis.py  |  6 ++++--
 fickling/ml.py        |  2 +-
 test/test_analysis.py | 19 +++++++++++++++++++
 test/test_bypasses.py |  6 ++++--
 4 files changed, 28 insertions(+), 5 deletions(-)
 create mode 100644 test/test_analysis.py

diff --git a/fickling/analysis.py b/fickling/analysis.py
index a7ab43c..9e213e2 100644
--- a/fickling/analysis.py
+++ b/fickling/analysis.py
@@ -170,8 +170,10 @@ def __str__(self):
 class Analysis(ABC):
     ALL: list[Analysis] = []
 
-    def __init_subclass__(cls, **kwargs):
-        Analysis.ALL.append(cls())
+    def __init_subclass__(cls, *, register: bool = True, **kwargs):
+        super().__init_subclass__(**kwargs)
+        if register:
+            Analysis.ALL.append(cls())
 
     @abstractmethod
     def analyze(self, context: AnalysisContext) -> Iterator[AnalysisResult]:
diff --git a/fickling/ml.py b/fickling/ml.py
index 37c88c9..fda18b8 100644
--- a/fickling/ml.py
+++ b/fickling/ml.py
@@ -288,7 +288,7 @@
 }
 
 
-class MLAllowlist(Analysis):
+class MLAllowlist(Analysis, register=False):
     def __init__(self):
         super().__init__()
         self.allowlist = ML_ALLOWLIST
diff --git a/test/test_analysis.py b/test/test_analysis.py
new file mode 100644
index 0000000..3b5ab2c
--- /dev/null
+++ b/test/test_analysis.py
@@ -0,0 +1,19 @@
+from unittest import TestCase
+
+import fickling.fickle as op
+from fickling.analysis import Severity, check_safety
+from fickling.fickle import Pickled
+
+
+class TestAnalysis(TestCase):
+    def test_benign_pickle(self):
+        pickled = Pickled(
+            [
+                op.Proto.create(4),
+                op.ShortBinUnicode("collections"),
+                op.ShortBinUnicode("deque"),
+                op.StackGlobal(),
+                op.Stop(),
+            ]
+        )
+        self.assertEqual(check_safety(pickled).severity, Severity.LIKELY_SAFE)
diff --git a/test/test_bypasses.py b/test/test_bypasses.py
index 950a3cd..e24e2d4 100644
--- a/test/test_bypasses.py
+++ b/test/test_bypasses.py
@@ -2,8 +2,9 @@
 from unittest import TestCase
 
 import fickling.fickle as op
-from fickling.analysis import Severity, UnsafeImportsML, check_safety
+from fickling.analysis import Analyzer, Severity, UnsafeImportsML, check_safety
 from fickling.fickle import Pickled
+from fickling.ml import MLAllowlist
 
 
 class TestBypasses(TestCase):
@@ -697,7 +698,8 @@ def test_ml_allowlist_not_shadowed_by_unsafe_imports_ml(self):
                 op.Stop(),
             ]
         )
-        res = check_safety(pickled)
+        analyzer = Analyzer([UnsafeImportsML(), MLAllowlist()])
+        res = check_safety(pickled, analyzer=analyzer)
         self.assertGreater(res.severity, Severity.LIKELY_SAFE)
         detailed = res.detailed_results().get("AnalysisResult", {})
         self.assertIsNotNone(