From 6e6072b2638a1717b0947804431d9f359b729eab Mon Sep 17 00:00:00 2001 From: Amit Kumar Date: Wed, 1 Apr 2026 16:11:45 +0000 Subject: [PATCH 1/7] fix: replace N+1 loop in findRelatedEndpoints() with single batch Cypher query Added GraphStore.findEndpointNeighborsBatch() that fetches all endpoint neighbors for a list of node IDs in one MATCH ... WHERE n.id IN $nodeIds query, eliminating up to 50 separate findNeighbors() calls per invocation. QueryService.findRelatedEndpoints() now separates the direct-endpoint pass from the neighbor pass, using the new batch method for the latter. Deduplication and connected_via semantics are preserved. Added 3 unit tests covering: batch usage (verifying findNeighbors is never called), direct endpoint matches, and deduplication. Co-Authored-By: Paperclip --- .../randomcodespace/iq/graph/GraphStore.java | 25 +++++++++ .../iq/query/QueryService.java | 17 +++--- .../iq/query/QueryServiceTest.java | 53 +++++++++++++++++++ 3 files changed, 89 insertions(+), 6 deletions(-) diff --git a/src/main/java/io/github/randomcodespace/iq/graph/GraphStore.java b/src/main/java/io/github/randomcodespace/iq/graph/GraphStore.java index 3e579d5c..d5058108 100644 --- a/src/main/java/io/github/randomcodespace/iq/graph/GraphStore.java +++ b/src/main/java/io/github/randomcodespace/iq/graph/GraphStore.java @@ -273,6 +273,31 @@ public List findIncomingNeighbors(String nodeId) { Map.of("nodeId", nodeId)); } + /** + * Batch-find all ENDPOINT/WEBSOCKET_ENDPOINT neighbors for a list of node IDs in one query. + * Returns a map of sourceNodeId -> list of endpoint neighbor nodes. + */ + public Map> findEndpointNeighborsBatch(List nodeIds) { + Map> result = new java.util.LinkedHashMap<>(); + if (nodeIds.isEmpty()) return result; + try (Transaction tx = graphDb.beginTx()) { + var queryResult = tx.execute( + "MATCH (n:CodeNode)-[]-(m:CodeNode) " + + "WHERE n.id IN $nodeIds AND m.kind IN ['ENDPOINT', 'WEBSOCKET_ENDPOINT'] " + + "RETURN n.id AS sourceId, m", + Map.of("nodeIds", nodeIds)); + while (queryResult.hasNext()) { + var row = queryResult.next(); + String sourceId = (String) row.get("sourceId"); + Object val = row.get("m"); + if (val instanceof org.neo4j.graphdb.Node neo4jNode) { + result.computeIfAbsent(sourceId, k -> new ArrayList<>()).add(nodeFromNeo4j(neo4jNode)); + } + } + } + return result; + } + public long count() { try (Transaction tx = graphDb.beginTx()) { var result = tx.execute("MATCH (n:CodeNode) RETURN count(n) AS cnt"); diff --git a/src/main/java/io/github/randomcodespace/iq/query/QueryService.java b/src/main/java/io/github/randomcodespace/iq/query/QueryService.java index 26555192..7800e104 100644 --- a/src/main/java/io/github/randomcodespace/iq/query/QueryService.java +++ b/src/main/java/io/github/randomcodespace/iq/query/QueryService.java @@ -337,19 +337,24 @@ public Map findRelatedEndpoints(String identifier) { Set seenIds = new java.util.LinkedHashSet<>(); List> endpoints = new ArrayList<>(); + // First pass: collect matches that are themselves endpoints for (CodeNode match : matches) { if (match.getKind() == NodeKind.ENDPOINT || match.getKind() == NodeKind.WEBSOCKET_ENDPOINT) { if (seenIds.add(match.getId())) { endpoints.add(nodeToMap(match)); } } - // Check neighbors for connected endpoints - List neighbors = graphStore.findNeighbors(match.getId()); - for (CodeNode neighbor : neighbors) { - if ((neighbor.getKind() == NodeKind.ENDPOINT || neighbor.getKind() == NodeKind.WEBSOCKET_ENDPOINT) - && seenIds.add(neighbor.getId())) { + } + + // Single batched query for all endpoint neighbors (replaces N+1 loop) + List matchIds = matches.stream().map(CodeNode::getId).toList(); + Map> endpointNeighbors = graphStore.findEndpointNeighborsBatch(matchIds); + for (Map.Entry> entry : endpointNeighbors.entrySet()) { + String sourceId = entry.getKey(); + for (CodeNode neighbor : entry.getValue()) { + if (seenIds.add(neighbor.getId())) { Map epMap = nodeToMap(neighbor); - epMap.put("connected_via", match.getId()); + epMap.put("connected_via", sourceId); endpoints.add(epMap); } } diff --git a/src/test/java/io/github/randomcodespace/iq/query/QueryServiceTest.java b/src/test/java/io/github/randomcodespace/iq/query/QueryServiceTest.java index da4750a6..29740ad1 100644 --- a/src/test/java/io/github/randomcodespace/iq/query/QueryServiceTest.java +++ b/src/test/java/io/github/randomcodespace/iq/query/QueryServiceTest.java @@ -502,6 +502,59 @@ void findDeadCodeShouldReturnEmptyWhenAllNodesHaveSemanticEdges() { assertTrue(deadCode.isEmpty()); } + // --- findRelatedEndpoints --- + + @Test + void findRelatedEndpointsShouldUsesBatchQueryInsteadOfNPlusOne() { + var classNode = makeNode("cls:UserService", NodeKind.CLASS, "UserService"); + var endpointNode = makeNode("ep:getUsers", NodeKind.ENDPOINT, "getUsers"); + when(graphStore.search("UserService", 50)).thenReturn(List.of(classNode)); + when(graphStore.findEndpointNeighborsBatch(List.of("cls:UserService"))) + .thenReturn(Map.of("cls:UserService", List.of(endpointNode))); + + Map result = service.findRelatedEndpoints("UserService"); + + assertEquals("UserService", result.get("identifier")); + assertEquals(1, result.get("count")); + assertEquals(1, result.get("searched_nodes")); + @SuppressWarnings("unchecked") + List> endpoints = (List>) result.get("endpoints"); + assertEquals("ep:getUsers", endpoints.getFirst().get("id")); + assertEquals("cls:UserService", endpoints.getFirst().get("connected_via")); + // Verify no per-node findNeighbors calls were made + verify(graphStore, never()).findNeighbors(anyString()); + } + + @Test + void findRelatedEndpointsShouldIncludeDirectEndpointMatches() { + var endpointNode = makeNode("ep:getUsers", NodeKind.ENDPOINT, "getUsers"); + when(graphStore.search("getUsers", 50)).thenReturn(List.of(endpointNode)); + when(graphStore.findEndpointNeighborsBatch(List.of("ep:getUsers"))).thenReturn(Map.of()); + + Map result = service.findRelatedEndpoints("getUsers"); + + assertEquals(1, result.get("count")); + @SuppressWarnings("unchecked") + List> endpoints = (List>) result.get("endpoints"); + assertEquals("ep:getUsers", endpoints.getFirst().get("id")); + // Direct endpoint matches have no connected_via + assertNull(endpoints.getFirst().get("connected_via")); + } + + @Test + void findRelatedEndpointsShouldDeduplicateEndpoints() { + var endpointNode = makeNode("ep:getUsers", NodeKind.ENDPOINT, "getUsers"); + // Same endpoint appears as both a direct match and a neighbor + when(graphStore.search("ep", 50)).thenReturn(List.of(endpointNode)); + when(graphStore.findEndpointNeighborsBatch(List.of("ep:getUsers"))) + .thenReturn(Map.of("ep:getUsers", List.of(endpointNode))); + + Map result = service.findRelatedEndpoints("ep"); + + // Should only appear once + assertEquals(1, result.get("count")); + } + // --- nodeToMap --- @Test From ed2c5876a19f5cd0698d86134e3fb5e47931174f Mon Sep 17 00:00:00 2001 From: Amit Kumar Date: Wed, 1 Apr 2026 16:19:34 +0000 Subject: [PATCH 2/7] perf: replace toLower() full scans with pre-lowered indexed properties for search Store label_lower and fqn_lower on every node during bulkSave() so that case-insensitive search can hit a B-tree index instead of doing a full graph scan with toLower() on both sides of the CONTAINS predicate. - nodeToProps(): adds label_lower/fqn_lower to the Neo4j property map - bulkSave(): creates indexes on label_lower and fqn_lower - EnrichCommand: creates label_lower/fqn_lower indexes alongside kind/layer/module/filePath - GraphStore.search(text, limit): lowercase input, query against pre-lowered props - GraphRepository.search(): same query update (SDN path) - nodeFromNeo4j(): label_lower/fqn_lower implicitly excluded (no prop_ prefix) All 1459 tests pass. Co-Authored-By: Paperclip --- .../randomcodespace/iq/cli/EnrichCommand.java | 2 ++ .../randomcodespace/iq/graph/GraphRepository.java | 2 +- .../randomcodespace/iq/graph/GraphStore.java | 14 ++++++++++---- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/src/main/java/io/github/randomcodespace/iq/cli/EnrichCommand.java b/src/main/java/io/github/randomcodespace/iq/cli/EnrichCommand.java index 44b4a83a..d2923579 100644 --- a/src/main/java/io/github/randomcodespace/iq/cli/EnrichCommand.java +++ b/src/main/java/io/github/randomcodespace/iq/cli/EnrichCommand.java @@ -307,6 +307,8 @@ private int enrichFromCache(AnalysisCache cache, Path root, NumberFormat nf, Ins tx.execute("CREATE INDEX IF NOT EXISTS FOR (n:CodeNode) ON (n.layer)"); tx.execute("CREATE INDEX IF NOT EXISTS FOR (n:CodeNode) ON (n.module)"); tx.execute("CREATE INDEX IF NOT EXISTS FOR (n:CodeNode) ON (n.filePath)"); + tx.execute("CREATE INDEX IF NOT EXISTS FOR (n:CodeNode) ON (n.label_lower)"); + tx.execute("CREATE INDEX IF NOT EXISTS FOR (n:CodeNode) ON (n.fqn_lower)"); tx.commit(); } CliOutput.info(" Created Neo4j indexes"); diff --git a/src/main/java/io/github/randomcodespace/iq/graph/GraphRepository.java b/src/main/java/io/github/randomcodespace/iq/graph/GraphRepository.java index de843aa0..0528bdcf 100644 --- a/src/main/java/io/github/randomcodespace/iq/graph/GraphRepository.java +++ b/src/main/java/io/github/randomcodespace/iq/graph/GraphRepository.java @@ -23,7 +23,7 @@ public interface GraphRepository extends Neo4jRepository { @Query("MATCH (n:CodeNode) WHERE n.filePath = $filePath RETURN n") List findByFilePath(String filePath); - @Query("MATCH (n:CodeNode) WHERE toLower(n.label) CONTAINS toLower($text) OR toLower(n.fqn) CONTAINS toLower($text) RETURN n LIMIT $limit") + @Query("MATCH (n:CodeNode) WHERE n.label_lower CONTAINS $text OR n.fqn_lower CONTAINS $text RETURN n LIMIT $limit") List search(String text, int limit); @Query("MATCH (n:CodeNode) WHERE n.label CONTAINS $text OR n.fqn CONTAINS $text RETURN n") diff --git a/src/main/java/io/github/randomcodespace/iq/graph/GraphStore.java b/src/main/java/io/github/randomcodespace/iq/graph/GraphStore.java index d5058108..564d4c8e 100644 --- a/src/main/java/io/github/randomcodespace/iq/graph/GraphStore.java +++ b/src/main/java/io/github/randomcodespace/iq/graph/GraphStore.java @@ -84,9 +84,11 @@ public void bulkSave(List nodes) { } } while (deleted > 0); - // 2. Create index on id property for fast MATCH during edge creation + // 2. Create indexes: id for MATCH, label_lower/fqn_lower for fast case-insensitive search try (Transaction tx = graphDb.beginTx()) { tx.execute("CREATE INDEX IF NOT EXISTS FOR (n:CodeNode) ON (n.id)"); + tx.execute("CREATE INDEX IF NOT EXISTS FOR (n:CodeNode) ON (n.label_lower)"); + tx.execute("CREATE INDEX IF NOT EXISTS FOR (n:CodeNode) ON (n.fqn_lower)"); tx.commit(); } @@ -182,6 +184,9 @@ private Map nodeToProps(CodeNode node) { if (node.getAnnotations() != null && !node.getAnnotations().isEmpty()) { props.put("annotations", String.join(",", node.getAnnotations())); } + // Pre-lowered properties for index-backed case-insensitive search + props.put("label_lower", node.getLabel() != null ? node.getLabel().toLowerCase() : ""); + if (node.getFqn() != null) props.put("fqn_lower", node.getFqn().toLowerCase()); if (node.getProperties() != null) { for (var entry : node.getProperties().entrySet()) { if (entry.getValue() != null) { @@ -249,10 +254,11 @@ public List search(String text) { } public List search(String text, int limit) { + String lowerText = text.toLowerCase(); return queryNodes( - "MATCH (n:CodeNode) WHERE toLower(n.label) CONTAINS toLower($text) " - + "OR toLower(n.fqn) CONTAINS toLower($text) RETURN n LIMIT $limit", - Map.of("text", text, "limit", limit)); + "MATCH (n:CodeNode) WHERE n.label_lower CONTAINS $text " + + "OR n.fqn_lower CONTAINS $text RETURN n LIMIT $limit", + Map.of("text", lowerText, "limit", limit)); } public List findNeighbors(String nodeId) { From ac97db007762c3d692087b8250a6c0b23fe19f20 Mon Sep 17 00:00:00 2001 From: Amit Kumar Date: Wed, 1 Apr 2026 16:25:13 +0000 Subject: [PATCH 3/7] feat: add GuardLinker to create PROTECTS edges between GUARD/MIDDLEWARE and ENDPOINT nodes Implements RAN-61. GuardLinker uses file-path proximity (same file = match) to infer that guards and middleware in a file protect endpoints in that file. This surfaces security architecture in the graph for Spring @PreAuthorize, @Secured, DjangoAuth, FastAPIAuth, NestJSGuards, and generic middleware nodes. - 9 unit tests: positive match, middleware, class-level, cross-file negative, no-guards, no-endpoints, duplicate avoidance, null filePath, determinism - 1468 total tests pass, 0 failures Co-Authored-By: Paperclip --- .../iq/analyzer/linker/GuardLinker.java | 92 +++++++++++ .../iq/analyzer/linker/GuardLinkerTest.java | 145 ++++++++++++++++++ 2 files changed, 237 insertions(+) create mode 100644 src/main/java/io/github/randomcodespace/iq/analyzer/linker/GuardLinker.java create mode 100644 src/test/java/io/github/randomcodespace/iq/analyzer/linker/GuardLinkerTest.java diff --git a/src/main/java/io/github/randomcodespace/iq/analyzer/linker/GuardLinker.java b/src/main/java/io/github/randomcodespace/iq/analyzer/linker/GuardLinker.java new file mode 100644 index 00000000..4b9d4924 --- /dev/null +++ b/src/main/java/io/github/randomcodespace/iq/analyzer/linker/GuardLinker.java @@ -0,0 +1,92 @@ +package io.github.randomcodespace.iq.analyzer.linker; + +import io.github.randomcodespace.iq.model.CodeEdge; +import io.github.randomcodespace.iq.model.CodeNode; +import io.github.randomcodespace.iq.model.EdgeKind; +import io.github.randomcodespace.iq.model.NodeKind; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.stereotype.Component; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeMap; +import java.util.TreeSet; + +/** + * Links GUARD and MIDDLEWARE nodes to ENDPOINT nodes via PROTECTS edges. + *

+ * Uses file-path proximity as the matching heuristic: a guard or middleware + * in the same file as an endpoint is assumed to protect that endpoint. + * This correctly handles class-level Spring Security annotations + * (@PreAuthorize, @Secured on a class) which appear in the same file as + * the endpoint methods they protect. + */ +@Component +public class GuardLinker implements Linker { + + private static final Logger log = LoggerFactory.getLogger(GuardLinker.class); + + @Override + public LinkResult link(List nodes, List edges) { + // Group guards/middlewares and endpoints by filePath + Map> guardsByFile = new TreeMap<>(); + Map> endpointsByFile = new TreeMap<>(); + + for (CodeNode node : nodes) { + String fp = node.getFilePath(); + if (fp == null || fp.isBlank()) continue; + + if (node.getKind() == NodeKind.GUARD || node.getKind() == NodeKind.MIDDLEWARE) { + guardsByFile.computeIfAbsent(fp, k -> new ArrayList<>()).add(node); + } else if (node.getKind() == NodeKind.ENDPOINT) { + endpointsByFile.computeIfAbsent(fp, k -> new ArrayList<>()).add(node); + } + } + + if (guardsByFile.isEmpty() || endpointsByFile.isEmpty()) { + return LinkResult.empty(); + } + + // Collect existing PROTECTS edges to avoid duplicates + Set existingProtects = new HashSet<>(); + for (CodeEdge edge : edges) { + if (edge.getKind() == EdgeKind.PROTECTS && edge.getTarget() != null) { + existingProtects.add(edge.getSourceId() + "->" + edge.getTarget().getId()); + } + } + + List newEdges = new ArrayList<>(); + + // Same-file matching: each guard protects all endpoints in the same file + for (String filePath : new TreeSet<>(guardsByFile.keySet())) { + List fileEndpoints = endpointsByFile.get(filePath); + if (fileEndpoints == null || fileEndpoints.isEmpty()) continue; + + List fileGuards = guardsByFile.get(filePath); + for (CodeNode guard : fileGuards) { + for (CodeNode endpoint : fileEndpoints) { + String key = guard.getId() + "->" + endpoint.getId(); + if (!existingProtects.contains(key)) { + var edge = new CodeEdge(); + edge.setId("guard-link:" + guard.getId() + "->" + endpoint.getId()); + edge.setKind(EdgeKind.PROTECTS); + edge.setSourceId(guard.getId()); + edge.setTarget(endpoint); + edge.setProperties(Map.of("inferred", true)); + newEdges.add(edge); + existingProtects.add(key); + } + } + } + } + + if (!newEdges.isEmpty()) { + log.debug("GuardLinker created {} PROTECTS edges", newEdges.size()); + } + return LinkResult.ofEdges(newEdges); + } +} diff --git a/src/test/java/io/github/randomcodespace/iq/analyzer/linker/GuardLinkerTest.java b/src/test/java/io/github/randomcodespace/iq/analyzer/linker/GuardLinkerTest.java new file mode 100644 index 00000000..0919919e --- /dev/null +++ b/src/test/java/io/github/randomcodespace/iq/analyzer/linker/GuardLinkerTest.java @@ -0,0 +1,145 @@ +package io.github.randomcodespace.iq.analyzer.linker; + +import io.github.randomcodespace.iq.model.CodeEdge; +import io.github.randomcodespace.iq.model.CodeNode; +import io.github.randomcodespace.iq.model.EdgeKind; +import io.github.randomcodespace.iq.model.NodeKind; +import org.junit.jupiter.api.Test; + +import java.util.List; + +import static org.junit.jupiter.api.Assertions.*; + +class GuardLinkerTest { + + private final GuardLinker linker = new GuardLinker(); + + private CodeNode guardNode(String id, String filePath) { + var node = new CodeNode(id, NodeKind.GUARD, id); + node.setFilePath(filePath); + return node; + } + + private CodeNode middlewareNode(String id, String filePath) { + var node = new CodeNode(id, NodeKind.MIDDLEWARE, id); + node.setFilePath(filePath); + return node; + } + + private CodeNode endpointNode(String id, String filePath) { + var node = new CodeNode(id, NodeKind.ENDPOINT, id); + node.setFilePath(filePath); + return node; + } + + @Test + void linksGuardToEndpointInSameFile() { + var guard = guardNode("auth:UserController.java:Secured:10", "UserController.java"); + var endpoint = endpointNode("ep:UserController.java:GET:/users:20", "UserController.java"); + + LinkResult result = linker.link(List.of(guard, endpoint), List.of()); + + assertEquals(1, result.edges().size()); + CodeEdge edge = result.edges().getFirst(); + assertEquals(EdgeKind.PROTECTS, edge.getKind()); + assertEquals(guard.getId(), edge.getSourceId()); + assertEquals(endpoint.getId(), edge.getTarget().getId()); + assertEquals(true, edge.getProperties().get("inferred")); + } + + @Test + void linksMiddlewareToEndpointInSameFile() { + var middleware = middlewareNode("mw:routes.ts:authMiddleware:5", "routes.ts"); + var endpoint = endpointNode("ep:routes.ts:GET:/profile:15", "routes.ts"); + + LinkResult result = linker.link(List.of(middleware, endpoint), List.of()); + + assertEquals(1, result.edges().size()); + assertEquals(EdgeKind.PROTECTS, result.edges().getFirst().getKind()); + } + + @Test + void classLevelGuardProtectsAllEndpointsInSameFile() { + var guard = guardNode("auth:OrderController.java:PreAuthorize:3", "OrderController.java"); + var ep1 = endpointNode("ep:OrderController.java:GET:/orders:25", "OrderController.java"); + var ep2 = endpointNode("ep:OrderController.java:POST:/orders:35", "OrderController.java"); + var ep3 = endpointNode("ep:OrderController.java:DELETE:/orders/{id}:45", "OrderController.java"); + + LinkResult result = linker.link(List.of(guard, ep1, ep2, ep3), List.of()); + + assertEquals(3, result.edges().size()); + assertTrue(result.edges().stream().allMatch(e -> e.getKind() == EdgeKind.PROTECTS)); + assertTrue(result.edges().stream().allMatch(e -> e.getSourceId().equals(guard.getId()))); + } + + @Test + void guardInDifferentFileDoesNotProtectEndpoint() { + var guard = guardNode("auth:SecurityConfig.java:EnableWebSecurity:1", "SecurityConfig.java"); + var endpoint = endpointNode("ep:UserController.java:GET:/users:10", "UserController.java"); + + LinkResult result = linker.link(List.of(guard, endpoint), List.of()); + + assertTrue(result.edges().isEmpty()); + } + + @Test + void noGuardsReturnsEmpty() { + var endpoint = endpointNode("ep:UserController.java:GET:/users:10", "UserController.java"); + + LinkResult result = linker.link(List.of(endpoint), List.of()); + + assertTrue(result.edges().isEmpty()); + } + + @Test + void noEndpointsReturnsEmpty() { + var guard = guardNode("auth:UserController.java:Secured:5", "UserController.java"); + + LinkResult result = linker.link(List.of(guard), List.of()); + + assertTrue(result.edges().isEmpty()); + } + + @Test + void avoidsDuplicateEdges() { + var guard = guardNode("auth:UserController.java:Secured:5", "UserController.java"); + var endpoint = endpointNode("ep:UserController.java:GET:/users:15", "UserController.java"); + + var existing = new CodeEdge(); + existing.setId("existing"); + existing.setKind(EdgeKind.PROTECTS); + existing.setSourceId(guard.getId()); + existing.setTarget(endpoint); + + LinkResult result = linker.link(List.of(guard, endpoint), List.of(existing)); + + assertTrue(result.edges().isEmpty()); + } + + @Test + void nodesWithNullFilePathAreIgnored() { + var guard = new CodeNode("auth:guard:1", NodeKind.GUARD, "guard"); + // filePath is null by default + var endpoint = endpointNode("ep:file.java:GET:/users:10", "file.java"); + + LinkResult result = linker.link(List.of(guard, endpoint), List.of()); + + assertTrue(result.edges().isEmpty()); + } + + @Test + void determinismRunTwiceProducesSameResult() { + var guard1 = guardNode("auth:Ctrl.java:Secured:5", "Ctrl.java"); + var guard2 = guardNode("auth:Ctrl.java:PreAuthorize:8", "Ctrl.java"); + var ep1 = endpointNode("ep:Ctrl.java:GET:/a:20", "Ctrl.java"); + var ep2 = endpointNode("ep:Ctrl.java:POST:/b:30", "Ctrl.java"); + + LinkResult r1 = linker.link(List.of(guard1, guard2, ep1, ep2), List.of()); + LinkResult r2 = linker.link(List.of(guard1, guard2, ep1, ep2), List.of()); + + assertEquals(r1.edges().size(), r2.edges().size()); + for (int i = 0; i < r1.edges().size(); i++) { + assertEquals(r1.edges().get(i).getId(), r2.edges().get(i).getId()); + } + } +} From c5f98524bd3c7b40e6cc25d2d7acdd2ece8c8db9 Mon Sep 17 00:00:00 2001 From: Amit Kumar Date: Wed, 1 Apr 2026 17:12:06 +0000 Subject: [PATCH 4/7] fix: exclude message-driven and security components from dead code detection - Add GUARD, MIDDLEWARE, TOPIC, QUEUE, EVENT, MESSAGE_QUEUE to ENTRY_POINT_KINDS so they are never flagged as dead code (they are entry points / cross-cutting concerns) - Remove invalid 'uses' edge kind from SEMANTIC_EDGE_KINDS (not a valid EdgeKind) - Add 'protects' to SEMANTIC_EDGE_KINDS so PROTECTS edges from GuardLinker count as semantic usage when determining reachability - Add two new tests: verifying new entry point kinds are excluded, and verifying 'protects' is included / 'uses' is excluded from semantic edge kinds Co-Authored-By: Paperclip --- .../iq/query/QueryService.java | 13 ++++-- .../iq/query/QueryServiceTest.java | 42 +++++++++++++++++++ 2 files changed, 52 insertions(+), 3 deletions(-) diff --git a/src/main/java/io/github/randomcodespace/iq/query/QueryService.java b/src/main/java/io/github/randomcodespace/iq/query/QueryService.java index 7800e104..954a21bd 100644 --- a/src/main/java/io/github/randomcodespace/iq/query/QueryService.java +++ b/src/main/java/io/github/randomcodespace/iq/query/QueryService.java @@ -384,9 +384,10 @@ public Map getTopology() { * they are always present from parent modules/config files. */ private static final List SEMANTIC_EDGE_KINDS = List.of( - "calls", "imports", "depends_on", "uses", "extends", "implements", + "calls", "imports", "depends_on", "extends", "implements", "injects", "queries", "maps_to", "consumes", "listens", - "invokes_rmi", "overrides", "connects_to", "triggers", "renders"); + "invokes_rmi", "overrides", "connects_to", "triggers", "renders", + "protects"); /** * Node kinds that are entry points — they are intended to have no callers @@ -398,7 +399,13 @@ public Map getTopology() { NodeKind.MIGRATION.getValue(), NodeKind.CONFIG_FILE.getValue(), NodeKind.CONFIG_KEY.getValue(), - NodeKind.CONFIG_DEFINITION.getValue()); + NodeKind.CONFIG_DEFINITION.getValue(), + NodeKind.GUARD.getValue(), + NodeKind.MIDDLEWARE.getValue(), + NodeKind.TOPIC.getValue(), + NodeKind.QUEUE.getValue(), + NodeKind.EVENT.getValue(), + NodeKind.MESSAGE_QUEUE.getValue()); @Cacheable(value = "dead-code", key = "#kind + ':' + #limit") public Map findDeadCode(String kind, int limit) { diff --git a/src/test/java/io/github/randomcodespace/iq/query/QueryServiceTest.java b/src/test/java/io/github/randomcodespace/iq/query/QueryServiceTest.java index 29740ad1..51a2a91b 100644 --- a/src/test/java/io/github/randomcodespace/iq/query/QueryServiceTest.java +++ b/src/test/java/io/github/randomcodespace/iq/query/QueryServiceTest.java @@ -477,6 +477,48 @@ void findDeadCodeShouldExcludeEntryPointKinds() { assertTrue(excludeKinds.contains("websocket_endpoint"), "Should exclude websocket endpoints"); assertTrue(excludeKinds.contains("migration"), "Should exclude migrations"); assertTrue(excludeKinds.contains("config_file"), "Should exclude config files"); + assertTrue(excludeKinds.contains("guard"), "Should exclude guards"); + assertTrue(excludeKinds.contains("middleware"), "Should exclude middleware"); + assertTrue(excludeKinds.contains("topic"), "Should exclude topics"); + assertTrue(excludeKinds.contains("queue"), "Should exclude queues"); + assertTrue(excludeKinds.contains("event"), "Should exclude events"); + assertTrue(excludeKinds.contains("message_queue"), "Should exclude message queues"); + } + + @Test + void findDeadCodeShouldNotFlagMessageDrivenComponents() { + var guard = makeNode("guard:AuthGuard", NodeKind.GUARD, "AuthGuard"); + var middleware = makeNode("mid:LoggingMiddleware", NodeKind.MIDDLEWARE, "LoggingMiddleware"); + var topic = makeNode("topic:UserEvents", NodeKind.TOPIC, "UserEvents"); + var queue = makeNode("queue:EmailQueue", NodeKind.QUEUE, "EmailQueue"); + var event = makeNode("event:OrderPlaced", NodeKind.EVENT, "OrderPlaced"); + var messageQueue = makeNode("mq:NotificationQueue", NodeKind.MESSAGE_QUEUE, "NotificationQueue"); + + // These are excluded via ENTRY_POINT_KINDS so graphStore won't return them + when(graphStore.findNodesWithoutIncomingSemantic(anyList(), anyList(), anyList(), eq(0), eq(100))) + .thenReturn(List.of()); + + Map result = service.findDeadCode(null, 100); + + @SuppressWarnings("unchecked") + List> deadCode = (List>) result.get("dead_code"); + assertTrue(deadCode.isEmpty(), "Message-driven and security components should not be flagged as dead code"); + } + + @Test + void findDeadCodeShouldIncludeProtectsInSemanticEdgeKinds() { + when(graphStore.findNodesWithoutIncomingSemantic(anyList(), anyList(), anyList(), eq(0), eq(50))) + .thenReturn(List.of()); + + service.findDeadCode(null, 50); + + @SuppressWarnings("unchecked") + var captor = org.mockito.ArgumentCaptor.forClass(List.class); + verify(graphStore).findNodesWithoutIncomingSemantic(anyList(), captor.capture(), anyList(), eq(0), eq(50)); + @SuppressWarnings("unchecked") + List semanticKinds = captor.getValue(); + assertTrue(semanticKinds.contains("protects"), "Should include 'protects' as semantic edge"); + assertFalse(semanticKinds.contains("uses"), "Should NOT include 'uses' — not a valid EdgeKind"); } @Test From bfe2c42d298ae1881a47a7470055264a92043f79 Mon Sep 17 00:00:00 2001 From: Amit Kumar Date: Wed, 1 Apr 2026 17:17:58 +0000 Subject: [PATCH 5/7] fix: add NestJS discriminator guards and fix EXPOSES edge target MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - NestJSControllerDetector: bail out early if no @nestjs/ import to prevent false positives on Angular controllers and generic TypeScript - NestJSGuardsDetector: bail out early if no @nestjs/ import to prevent false positives on any TypeScript with canActivate() - NestJSControllerDetector: add edge.setTarget(node) on EXPOSES edges — previously missing, causing all class→endpoint edges to be silently dropped by GraphBuilder Co-Authored-By: Paperclip --- .../typescript/NestJSControllerDetector.java | 7 +++- .../typescript/NestJSGuardsDetector.java | 6 ++- .../NestJSControllerDetectorTest.java | 42 +++++++++++++++++-- .../typescript/NestJSGuardsDetectorTest.java | 21 +++++++++- 4 files changed, 69 insertions(+), 7 deletions(-) diff --git a/src/main/java/io/github/randomcodespace/iq/detector/typescript/NestJSControllerDetector.java b/src/main/java/io/github/randomcodespace/iq/detector/typescript/NestJSControllerDetector.java index 50844cf8..593b0372 100644 --- a/src/main/java/io/github/randomcodespace/iq/detector/typescript/NestJSControllerDetector.java +++ b/src/main/java/io/github/randomcodespace/iq/detector/typescript/NestJSControllerDetector.java @@ -38,6 +38,8 @@ public class NestJSControllerDetector extends AbstractAntlrDetector { private static final Pattern FETCH_RE = Pattern.compile( "\\bfetch\\s*\\(\\s*['\"`]"); + private static final Pattern NESTJS_IMPORT = Pattern.compile("from\\s+['\"]@nestjs/"); + private static final Pattern CONTROLLER_PATTERN = Pattern.compile( "@Controller\\(\\s*['\"`]?([^'\"`\\)\\s]*)['\"`]?\\s*\\)(?:\\s*@\\w+\\([^)]*\\))*\\s*\\n\\s*(?:export\\s+)?class\\s+(\\w+)" ); @@ -78,9 +80,11 @@ public DetectorResult detect(DetectorContext ctx) { @Override protected DetectorResult detectWithRegex(DetectorContext ctx) { + String text = ctx.content(); + if (!NESTJS_IMPORT.matcher(text).find()) return DetectorResult.empty(); + List nodes = new ArrayList<>(); List edges = new ArrayList<>(); - String text = ctx.content(); String filePath = ctx.filePath(); String moduleName = ctx.moduleName(); @@ -161,6 +165,7 @@ protected DetectorResult detectWithRegex(DetectorContext ctx) { edge.setId(classId + "->exposes->" + nodeId); edge.setKind(EdgeKind.EXPOSES); edge.setSourceId(classId); + edge.setTarget(node); edges.add(edge); } } diff --git a/src/main/java/io/github/randomcodespace/iq/detector/typescript/NestJSGuardsDetector.java b/src/main/java/io/github/randomcodespace/iq/detector/typescript/NestJSGuardsDetector.java index 420bd8f7..177509c4 100644 --- a/src/main/java/io/github/randomcodespace/iq/detector/typescript/NestJSGuardsDetector.java +++ b/src/main/java/io/github/randomcodespace/iq/detector/typescript/NestJSGuardsDetector.java @@ -28,6 +28,8 @@ @Component public class NestJSGuardsDetector extends AbstractAntlrDetector { + private static final Pattern NESTJS_IMPORT = Pattern.compile("from\\s+['\"]@nestjs/"); + private static final Pattern USE_GUARDS_PATTERN = Pattern.compile( "@UseGuards\\(\\s*([^)]+)\\)" ); @@ -67,8 +69,10 @@ public DetectorResult detect(DetectorContext ctx) { @Override protected DetectorResult detectWithRegex(DetectorContext ctx) { - List nodes = new ArrayList<>(); String text = ctx.content(); + if (!NESTJS_IMPORT.matcher(text).find()) return DetectorResult.empty(); + + List nodes = new ArrayList<>(); String filePath = ctx.filePath(); String moduleName = ctx.moduleName(); diff --git a/src/test/java/io/github/randomcodespace/iq/detector/typescript/NestJSControllerDetectorTest.java b/src/test/java/io/github/randomcodespace/iq/detector/typescript/NestJSControllerDetectorTest.java index 56a2a5a3..aa1e9fe8 100644 --- a/src/test/java/io/github/randomcodespace/iq/detector/typescript/NestJSControllerDetectorTest.java +++ b/src/test/java/io/github/randomcodespace/iq/detector/typescript/NestJSControllerDetectorTest.java @@ -3,6 +3,7 @@ import io.github.randomcodespace.iq.detector.DetectorContext; import io.github.randomcodespace.iq.detector.DetectorResult; import io.github.randomcodespace.iq.detector.DetectorTestUtils; +import io.github.randomcodespace.iq.model.EdgeKind; import io.github.randomcodespace.iq.model.NodeKind; import org.junit.jupiter.api.Test; @@ -15,6 +16,7 @@ class NestJSControllerDetectorTest { @Test void detectsNestJSController() { String code = """ + import { Controller, Get, Post } from '@nestjs/common'; @Controller('users') export class UsersController { @Get() @@ -35,8 +37,42 @@ export class UsersController { // Endpoints assertTrue(result.nodes().stream().anyMatch(n -> n.getKind() == NodeKind.ENDPOINT && "GET /users".equals(n.getLabel()))); - // EXPOSES edges + // EXPOSES edges with valid targets assertEquals(3, result.edges().size()); + assertTrue(result.edges().stream().allMatch(e -> + e.getKind() == EdgeKind.EXPOSES && e.getTarget() != null)); + } + + @Test + void noMatchWithoutNestJSImport() { + // Generic TypeScript with @Controller-like patterns but no @nestjs import + String code = """ + @Controller('users') + export class UsersController { + @Get() + findAll() {} + } + """; + DetectorContext ctx = DetectorTestUtils.contextFor("src/users.controller.ts", "typescript", code); + DetectorResult result = detector.detect(ctx); + assertTrue(result.nodes().isEmpty()); + assertTrue(result.edges().isEmpty()); + } + + @Test + void noMatchOnAngularComponent() { + // Angular also uses @Component decorator, should not match NestJS + String code = """ + import { Component } from '@angular/core'; + @Component({ selector: 'app-root', templateUrl: './app.component.html' }) + export class AppComponent { + @Get('/users') + getUsers() {} + } + """; + DetectorContext ctx = DetectorTestUtils.contextFor("src/app.component.ts", "typescript", code); + DetectorResult result = detector.detect(ctx); + assertTrue(result.nodes().isEmpty()); } @Test @@ -49,8 +85,8 @@ void noMatchOnNonNestJSCode() { @Test void deterministic() { - String code = "@Controller('test')\nexport class TestController {\n @Get()\n find() {}\n}"; - DetectorContext ctx = DetectorTestUtils.contextFor("typescript", code); + String code = "import { Controller, Get } from '@nestjs/common';\n@Controller('test')\nexport class TestController {\n @Get()\n find() {}\n}"; + DetectorContext ctx = DetectorTestUtils.contextFor("src/test.controller.ts", "typescript", code); DetectorTestUtils.assertDeterministic(detector, ctx); } } diff --git a/src/test/java/io/github/randomcodespace/iq/detector/typescript/NestJSGuardsDetectorTest.java b/src/test/java/io/github/randomcodespace/iq/detector/typescript/NestJSGuardsDetectorTest.java index 54f8c040..952ac548 100644 --- a/src/test/java/io/github/randomcodespace/iq/detector/typescript/NestJSGuardsDetectorTest.java +++ b/src/test/java/io/github/randomcodespace/iq/detector/typescript/NestJSGuardsDetectorTest.java @@ -15,6 +15,8 @@ class NestJSGuardsDetectorTest { @Test void detectsGuardsAndRoles() { String code = """ + import { Injectable, CanActivate } from '@nestjs/common'; + import { AuthGuard } from '@nestjs/passport'; @UseGuards(JwtAuthGuard, RolesGuard) @Roles('admin', 'user') canActivate(context) { @@ -34,6 +36,21 @@ void detectsGuardsAndRoles() { n.getLabel().contains("Roles(admin, user)"))); } + @Test + void noMatchWithoutNestJSImport() { + // Generic TypeScript with canActivate() but no @nestjs import + String code = """ + class RouteGuard { + canActivate(context) { + return this.authService.isAuthenticated(); + } + } + """; + DetectorContext ctx = DetectorTestUtils.contextFor("src/route.guard.ts", "typescript", code); + DetectorResult result = detector.detect(ctx); + assertTrue(result.nodes().isEmpty()); + } + @Test void noMatchOnNonGuardCode() { String code = "class SomeService {}"; @@ -44,8 +61,8 @@ void noMatchOnNonGuardCode() { @Test void deterministic() { - String code = "@UseGuards(AuthGuard)\n@Roles('admin')"; - DetectorContext ctx = DetectorTestUtils.contextFor("typescript", code); + String code = "import { Injectable } from '@nestjs/common';\n@UseGuards(AuthGuard)\n@Roles('admin')"; + DetectorContext ctx = DetectorTestUtils.contextFor("src/auth.guard.ts", "typescript", code); DetectorTestUtils.assertDeterministic(detector, ctx); } } From e8a8040fca40ab765a32266929dad91fdfedde57 Mon Sep 17 00:00:00 2001 From: Amit Kumar Date: Wed, 1 Apr 2026 17:23:36 +0000 Subject: [PATCH 6/7] fix: extend TopicLinker to handle SENDS_TO/RECEIVES_FROM/PUBLISHES/LISTENS edges Tibco EMS, Azure Service Bus/Event Hub, and Spring Events emit different edge kinds than Kafka/RabbitMQ. TopicLinker previously only matched PRODUCES/CONSUMES, silently dropping cross-service CALLS edges for all three messaging patterns. - Add SENDS_TO and RECEIVES_FROM (Tibco/Azure) as producer/consumer edges - Add PUBLISHES and LISTENS (Spring Events) as producer/consumer edges - Add EVENT and MESSAGE_QUEUE node kinds to topic matching (alongside TOPIC/QUEUE) - Add 4 new test cases: SENDS_TO/RECEIVES_FROM, PUBLISHES/LISTENS, MESSAGE_QUEUE, determinism Co-Authored-By: Paperclip --- .../iq/analyzer/linker/TopicLinker.java | 19 +-- .../iq/analyzer/linker/TopicLinkerTest.java | 117 ++++++++++++++++++ 2 files changed, 128 insertions(+), 8 deletions(-) diff --git a/src/main/java/io/github/randomcodespace/iq/analyzer/linker/TopicLinker.java b/src/main/java/io/github/randomcodespace/iq/analyzer/linker/TopicLinker.java index ebabd316..534b7e46 100644 --- a/src/main/java/io/github/randomcodespace/iq/analyzer/linker/TopicLinker.java +++ b/src/main/java/io/github/randomcodespace/iq/analyzer/linker/TopicLinker.java @@ -10,7 +10,6 @@ import java.util.ArrayList; import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; @@ -18,10 +17,11 @@ import java.util.TreeSet; /** - * Links Kafka/RabbitMQ producers to consumers via shared topic names. + * Links messaging producers to consumers via shared topic/queue/event names. *

- * Scans for TOPIC/QUEUE nodes and matches PRODUCES edges with CONSUMES - * edges on the same topic label to create direct producer-to-consumer + * Scans for TOPIC/QUEUE/EVENT/MESSAGE_QUEUE nodes and matches producer edges + * (PRODUCES, SENDS_TO, PUBLISHES) with consumer edges (CONSUMES, RECEIVES_FROM, + * LISTENS) on the same topic label to create direct producer-to-consumer * CALLS edges. */ @Component @@ -31,10 +31,11 @@ public class TopicLinker implements Linker { @Override public LinkResult link(List nodes, List edges) { - // Collect topic/queue nodes by label + // Collect topic/queue/event/message_queue nodes by label Map> topicIdsByLabel = new TreeMap<>(); for (CodeNode node : nodes) { - if (node.getKind() == NodeKind.TOPIC || node.getKind() == NodeKind.QUEUE) { + if (node.getKind() == NodeKind.TOPIC || node.getKind() == NodeKind.QUEUE + || node.getKind() == NodeKind.EVENT || node.getKind() == NodeKind.MESSAGE_QUEUE) { topicIdsByLabel .computeIfAbsent(node.getLabel(), k -> new ArrayList<>()) .add(node.getId()); @@ -51,11 +52,13 @@ public LinkResult link(List nodes, List edges) { Map> consumersByTopic = new TreeMap<>(); for (CodeEdge edge : edges) { - if (edge.getKind() == EdgeKind.PRODUCES && edge.getTarget() != null) { + if (edge.getTarget() == null) continue; + EdgeKind kind = edge.getKind(); + if (kind == EdgeKind.PRODUCES || kind == EdgeKind.SENDS_TO || kind == EdgeKind.PUBLISHES) { producersByTopic .computeIfAbsent(edge.getTarget().getId(), k -> new ArrayList<>()) .add(edge.getSourceId()); - } else if (edge.getKind() == EdgeKind.CONSUMES && edge.getTarget() != null) { + } else if (kind == EdgeKind.CONSUMES || kind == EdgeKind.RECEIVES_FROM || kind == EdgeKind.LISTENS) { consumersByTopic .computeIfAbsent(edge.getTarget().getId(), k -> new ArrayList<>()) .add(edge.getSourceId()); diff --git a/src/test/java/io/github/randomcodespace/iq/analyzer/linker/TopicLinkerTest.java b/src/test/java/io/github/randomcodespace/iq/analyzer/linker/TopicLinkerTest.java index e56c4cf1..f35deb3f 100644 --- a/src/test/java/io/github/randomcodespace/iq/analyzer/linker/TopicLinkerTest.java +++ b/src/test/java/io/github/randomcodespace/iq/analyzer/linker/TopicLinkerTest.java @@ -103,4 +103,121 @@ void handlesQueueNodes() { assertEquals(1, result.edges().size()); } + + @Test + void linksSendsToReceivesFromEdgesViaTopic() { + // Tibco EMS / Azure Service Bus pattern + var topic = new CodeNode("topic:orders", NodeKind.TOPIC, "orders"); + var producer = new CodeNode("svc:TibcoSender", NodeKind.CLASS, "TibcoSender"); + var consumer = new CodeNode("svc:TibcoReceiver", NodeKind.CLASS, "TibcoReceiver"); + + var sendsToEdge = new CodeEdge(); + sendsToEdge.setId("e1"); + sendsToEdge.setKind(EdgeKind.SENDS_TO); + sendsToEdge.setSourceId("svc:TibcoSender"); + sendsToEdge.setTarget(topic); + + var receivesFromEdge = new CodeEdge(); + receivesFromEdge.setId("e2"); + receivesFromEdge.setKind(EdgeKind.RECEIVES_FROM); + receivesFromEdge.setSourceId("svc:TibcoReceiver"); + receivesFromEdge.setTarget(topic); + + LinkResult result = linker.link( + List.of(topic, producer, consumer), + List.of(sendsToEdge, receivesFromEdge) + ); + + assertEquals(1, result.edges().size()); + CodeEdge callsEdge = result.edges().getFirst(); + assertEquals(EdgeKind.CALLS, callsEdge.getKind()); + assertEquals("svc:TibcoSender", callsEdge.getSourceId()); + assertEquals("svc:TibcoReceiver", callsEdge.getTarget().getId()); + assertEquals(true, callsEdge.getProperties().get("inferred")); + } + + @Test + void linksPublishesListensEdgesViaEventNode() { + // Spring Events pattern using EVENT node kind + var event = new CodeNode("event:UserCreated", NodeKind.EVENT, "UserCreated"); + var publisher = new CodeNode("svc:UserService", NodeKind.CLASS, "UserService"); + var listener = new CodeNode("svc:EmailService", NodeKind.CLASS, "EmailService"); + + var publishesEdge = new CodeEdge(); + publishesEdge.setId("e1"); + publishesEdge.setKind(EdgeKind.PUBLISHES); + publishesEdge.setSourceId("svc:UserService"); + publishesEdge.setTarget(event); + + var listensEdge = new CodeEdge(); + listensEdge.setId("e2"); + listensEdge.setKind(EdgeKind.LISTENS); + listensEdge.setSourceId("svc:EmailService"); + listensEdge.setTarget(event); + + LinkResult result = linker.link( + List.of(event, publisher, listener), + List.of(publishesEdge, listensEdge) + ); + + assertEquals(1, result.edges().size()); + CodeEdge callsEdge = result.edges().getFirst(); + assertEquals(EdgeKind.CALLS, callsEdge.getKind()); + assertEquals("svc:UserService", callsEdge.getSourceId()); + assertEquals("svc:EmailService", callsEdge.getTarget().getId()); + assertEquals("UserCreated", callsEdge.getProperties().get("topic")); + } + + @Test + void handlesMessageQueueNodeKind() { + var mq = new CodeNode("mq:notifications", NodeKind.MESSAGE_QUEUE, "notifications"); + var sender = new CodeNode("svc:NotifySender", NodeKind.CLASS, "NotifySender"); + var receiver = new CodeNode("svc:NotifyWorker", NodeKind.CLASS, "NotifyWorker"); + + var sendsEdge = new CodeEdge(); + sendsEdge.setId("e1"); + sendsEdge.setKind(EdgeKind.SENDS_TO); + sendsEdge.setSourceId("svc:NotifySender"); + sendsEdge.setTarget(mq); + + var receivesEdge = new CodeEdge(); + receivesEdge.setId("e2"); + receivesEdge.setKind(EdgeKind.RECEIVES_FROM); + receivesEdge.setSourceId("svc:NotifyWorker"); + receivesEdge.setTarget(mq); + + LinkResult result = linker.link( + List.of(mq, sender, receiver), + List.of(sendsEdge, receivesEdge) + ); + + assertEquals(1, result.edges().size()); + } + + @Test + void determinismTest() { + var topic = new CodeNode("topic:payments", NodeKind.TOPIC, "payments"); + var prod1 = new CodeNode("svc:P1", NodeKind.CLASS, "P1"); + var prod2 = new CodeNode("svc:P2", NodeKind.CLASS, "P2"); + var cons = new CodeNode("svc:C1", NodeKind.CLASS, "C1"); + + var e1 = new CodeEdge(); + e1.setId("e1"); e1.setKind(EdgeKind.PUBLISHES); e1.setSourceId("svc:P1"); e1.setTarget(topic); + var e2 = new CodeEdge(); + e2.setId("e2"); e2.setKind(EdgeKind.SENDS_TO); e2.setSourceId("svc:P2"); e2.setTarget(topic); + var e3 = new CodeEdge(); + e3.setId("e3"); e3.setKind(EdgeKind.LISTENS); e3.setSourceId("svc:C1"); e3.setTarget(topic); + + List nodeList = new ArrayList<>(List.of(topic, prod1, prod2, cons)); + List edgeList = new ArrayList<>(List.of(e1, e2, e3)); + + LinkResult result1 = linker.link(nodeList, edgeList); + LinkResult result2 = linker.link(nodeList, edgeList); + + assertEquals(result1.edges().size(), result2.edges().size()); + for (int i = 0; i < result1.edges().size(); i++) { + assertEquals(result1.edges().get(i).getId(), result2.edges().get(i).getId()); + assertEquals(result1.edges().get(i).getSourceId(), result2.edges().get(i).getSourceId()); + } + } } From 4c3310a08c44c303361c52a432087d065b51b9ec Mon Sep 17 00:00:00 2001 From: Amit Kumar Date: Wed, 1 Apr 2026 17:24:01 +0000 Subject: [PATCH 7/7] perf: replace B-tree CONTAINS scan with fulltext index for search() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit B-tree indexes on label_lower/fqn_lower cannot serve CONTAINS queries in Neo4j — every search caused a full graph scan. Replace with a fulltext index using the keyword analyzer so wildcard (*text*) queries are backed by an index. - Add FULLTEXT INDEX search_index on (n.label_lower, n.fqn_lower) in both GraphStore.bulkSave() and EnrichCommand secondary-index block - Use keyword analyzer to preserve whole-property tokens (avoids Lucene tokenisation splitting FQNs on dots) - Replace search() CONTAINS queries with db.index.fulltext.queryNodes() + *text* wildcard wrapping - Escape Lucene special characters before wrapping in toLuceneQuery() - Add CALL db.awaitIndexes(300) after secondary index creation in EnrichCommand so the first search request hits the index Fixes RAN-66 Co-Authored-By: Paperclip --- .../randomcodespace/iq/cli/EnrichCommand.java | 8 ++++ .../randomcodespace/iq/graph/GraphStore.java | 41 ++++++++++++++++--- 2 files changed, 43 insertions(+), 6 deletions(-) diff --git a/src/main/java/io/github/randomcodespace/iq/cli/EnrichCommand.java b/src/main/java/io/github/randomcodespace/iq/cli/EnrichCommand.java index d2923579..501bed68 100644 --- a/src/main/java/io/github/randomcodespace/iq/cli/EnrichCommand.java +++ b/src/main/java/io/github/randomcodespace/iq/cli/EnrichCommand.java @@ -309,6 +309,14 @@ private int enrichFromCache(AnalysisCache cache, Path root, NumberFormat nf, Ins tx.execute("CREATE INDEX IF NOT EXISTS FOR (n:CodeNode) ON (n.filePath)"); tx.execute("CREATE INDEX IF NOT EXISTS FOR (n:CodeNode) ON (n.label_lower)"); tx.execute("CREATE INDEX IF NOT EXISTS FOR (n:CodeNode) ON (n.fqn_lower)"); + tx.execute("CREATE FULLTEXT INDEX search_index IF NOT EXISTS " + + "FOR (n:CodeNode) ON EACH [n.label_lower, n.fqn_lower] " + + "OPTIONS {indexConfig: {`fulltext.analyzer`: 'keyword'}}"); + tx.commit(); + } + // Wait for all indexes (including fulltext) to finish building + try (Transaction tx = db.beginTx()) { + tx.execute("CALL db.awaitIndexes(300)"); tx.commit(); } CliOutput.info(" Created Neo4j indexes"); diff --git a/src/main/java/io/github/randomcodespace/iq/graph/GraphStore.java b/src/main/java/io/github/randomcodespace/iq/graph/GraphStore.java index 564d4c8e..cc4c962f 100644 --- a/src/main/java/io/github/randomcodespace/iq/graph/GraphStore.java +++ b/src/main/java/io/github/randomcodespace/iq/graph/GraphStore.java @@ -89,6 +89,9 @@ public void bulkSave(List nodes) { tx.execute("CREATE INDEX IF NOT EXISTS FOR (n:CodeNode) ON (n.id)"); tx.execute("CREATE INDEX IF NOT EXISTS FOR (n:CodeNode) ON (n.label_lower)"); tx.execute("CREATE INDEX IF NOT EXISTS FOR (n:CodeNode) ON (n.fqn_lower)"); + tx.execute("CREATE FULLTEXT INDEX search_index IF NOT EXISTS " + + "FOR (n:CodeNode) ON EACH [n.label_lower, n.fqn_lower] " + + "OPTIONS {indexConfig: {`fulltext.analyzer`: 'keyword'}}"); tx.commit(); } @@ -249,16 +252,42 @@ public List findByFilePath(String filePath) { public List search(String text) { return queryNodes( - "MATCH (n:CodeNode) WHERE n.label CONTAINS $text OR n.fqn CONTAINS $text RETURN n", - Map.of("text", text)); + "CALL db.index.fulltext.queryNodes('search_index', $text) " + + "YIELD node RETURN node AS n", + Map.of("text", toLuceneQuery(text))); } public List search(String text, int limit) { - String lowerText = text.toLowerCase(); return queryNodes( - "MATCH (n:CodeNode) WHERE n.label_lower CONTAINS $text " - + "OR n.fqn_lower CONTAINS $text RETURN n LIMIT $limit", - Map.of("text", lowerText, "limit", limit)); + "CALL db.index.fulltext.queryNodes('search_index', $text) " + + "YIELD node RETURN node AS n LIMIT $limit", + Map.of("text", toLuceneQuery(text), "limit", limit)); + } + + /** + * Wraps a search term in Lucene wildcard syntax for substring matching against + * the fulltext index (which stores lowercased property values via keyword analyzer). + * Escapes Lucene special characters before wrapping. + */ + private static String toLuceneQuery(String text) { + String lower = text.toLowerCase(); + String escaped = lower + .replace("\\", "\\\\") + .replace("+", "\\+") + .replace("-", "\\-") + .replace("!", "\\!") + .replace("(", "\\(") + .replace(")", "\\)") + .replace("{", "\\{") + .replace("}", "\\}") + .replace("[", "\\[") + .replace("]", "\\]") + .replace("^", "\\^") + .replace("\"", "\\\"") + .replace("~", "\\~") + .replace(":", "\\:") + .replace("/", "\\/"); + return "*" + escaped + "*"; } public List findNeighbors(String nodeId) {