Skip to content

Commit b1c7e1d

Browse files
committed
parser: emit REFERENCES edges for Python callback arguments (#363)
The dispatcher in `_extract_value_references` only matched the `arguments` AST node type, which is the JS/TS shape. Python's tree-sitter grammar uses `argument_list` for the same construct, so function references in callback positions — `executor.submit(handler)`, `filter(predicate, xs)`, `map(transform, xs)`, `df.apply(fn)`, etc. — silently produced no REFERENCES edges. Downstream effect: `find_dead_code` already checks `has_references` (refactor.py), but for Python the edge was never created, so callback functions were systematically flagged as dead — exactly the high false positive rate users report in #363 (executor pools, pandas .apply, higher-order functions, returned closures). Fix is one line of intent: introduce `_ARGUMENTS_TYPES = {"arguments", "argument_list"}` and dispatch on set membership instead of string equality. The existing `_ref_from_arguments` walker already handles identifier children correctly and `_emit_reference_if_known` still gates emission on the name being locally defined or imported, so this does not introduce noise from arbitrary identifiers. Scoped to the dispatcher only: JS/TS shorthand-property, pair-value, and array-element handling are unchanged.
1 parent 0919071 commit b1c7e1d

3 files changed

Lines changed: 89 additions & 1 deletion

File tree

code_review_graph/parser.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3113,6 +3113,11 @@ def _resolve_jsx_component_target(
31133113
# AST node types for array/list containers.
31143114
_ARRAY_TYPES = frozenset({"array", "list"})
31153115

3116+
# AST node types for call argument containers. JS/TS uses ``arguments``;
3117+
# Python uses ``argument_list``. Both share the same identifier-child shape
3118+
# for bare-identifier callbacks like ``executor.submit(my_handler)``.
3119+
_ARGUMENTS_TYPES = frozenset({"arguments", "argument_list"})
3120+
31163121
# Names that are almost certainly not function references (constants,
31173122
# common primitives). All-uppercase identifiers and very short names
31183123
# are excluded by a length/casing heuristic in the method itself.
@@ -3185,7 +3190,7 @@ def _extract_value_references(
31853190
return
31863191

31873192
# --- Callback arguments (identifier args inside call_expression) ---
3188-
if node_type == "arguments":
3193+
if node_type in self._ARGUMENTS_TYPES:
31893194
self._ref_from_arguments(
31903195
child, source, language, file_path, caller, edges, imap, dnames,
31913196
)
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
"""Fixture for issue #363: function references in callback positions.
2+
3+
Each `*_callback` function is passed as a bare-identifier argument to
4+
another call. They are never invoked with parens, so without REFERENCES
5+
edge tracking they would be flagged as dead code.
6+
"""
7+
from concurrent.futures import ThreadPoolExecutor
8+
9+
10+
def executor_callback():
11+
return "submitted"
12+
13+
14+
def filter_callback(item):
15+
return item > 0
16+
17+
18+
def map_callback(item):
19+
return item * 2
20+
21+
22+
def trigger_executor():
23+
with ThreadPoolExecutor() as executor:
24+
future = executor.submit(executor_callback)
25+
return future
26+
27+
28+
def trigger_filter():
29+
items = [1, -2, 3, -4]
30+
return list(filter(filter_callback, items))
31+
32+
33+
def trigger_map():
34+
items = [1, 2, 3]
35+
return list(map(map_callback, items))

tests/test_parser.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -648,6 +648,54 @@ def test_vitest_tested_by_edges(self):
648648
f"All edges: {[(e.kind, e.source, e.target) for e in edges]}"
649649
)
650650

651+
# --- Python callback REFERENCES (#363) ---
652+
# Functions passed as bare-identifier arguments (executor.submit(fn),
653+
# filter(fn, xs), map(fn, xs), df.apply(fn), ...) should produce
654+
# REFERENCES edges so dead-code detection does not flag them as unused.
655+
# Pre-fix: only the JS/TS `arguments` node type triggered the
656+
# _ref_from_arguments dispatcher; Python's `argument_list` was ignored.
657+
658+
def test_python_callback_references_emitted(self):
659+
"""A function passed as a bare identifier to another call should
660+
produce a REFERENCES edge from the calling function to it."""
661+
nodes, edges = self.parser.parse_file(FIXTURES / "sample_callback_refs.py")
662+
refs = [e for e in edges if e.kind == "REFERENCES"]
663+
ref_target_names = {e.target.rsplit("::", 1)[-1] for e in refs}
664+
for callback in ("executor_callback", "filter_callback", "map_callback"):
665+
assert callback in ref_target_names, (
666+
f"Expected REFERENCES edge to {callback}, got targets: "
667+
f"{ref_target_names}"
668+
)
669+
670+
def test_python_callback_references_not_treated_as_dead(self):
671+
"""End-to-end: with REFERENCES edges in place, find_dead_code
672+
should not flag callback functions as dead."""
673+
from code_review_graph.graph import GraphStore
674+
from code_review_graph.refactor import find_dead_code
675+
676+
with tempfile.TemporaryDirectory() as tmp_dir:
677+
db_path = Path(tmp_dir) / "graph.db"
678+
store = GraphStore(db_path)
679+
try:
680+
nodes, edges = self.parser.parse_file(
681+
FIXTURES / "sample_callback_refs.py"
682+
)
683+
store.store_file_nodes_edges(
684+
str(FIXTURES / "sample_callback_refs.py"),
685+
nodes, edges, "",
686+
)
687+
dead = find_dead_code(store)
688+
dead_names = {d["name"] for d in dead}
689+
for callback in (
690+
"executor_callback", "filter_callback", "map_callback",
691+
):
692+
assert callback not in dead_names, (
693+
f"{callback} was flagged as dead but is used as a "
694+
f"callback. Dead names: {dead_names}"
695+
)
696+
finally:
697+
store.close()
698+
651699
def test_non_test_file_describe_not_special(self):
652700
"""describe() in a non-test file should NOT create Test nodes."""
653701
import tempfile

0 commit comments

Comments
 (0)