From e8a8040fca40ab765a32266929dad91fdfedde57 Mon Sep 17 00:00:00 2001 From: Amit Kumar Date: Wed, 1 Apr 2026 17:23:36 +0000 Subject: [PATCH 1/2] fix: extend TopicLinker to handle SENDS_TO/RECEIVES_FROM/PUBLISHES/LISTENS edges Tibco EMS, Azure Service Bus/Event Hub, and Spring Events emit different edge kinds than Kafka/RabbitMQ. TopicLinker previously only matched PRODUCES/CONSUMES, silently dropping cross-service CALLS edges for all three messaging patterns. - Add SENDS_TO and RECEIVES_FROM (Tibco/Azure) as producer/consumer edges - Add PUBLISHES and LISTENS (Spring Events) as producer/consumer edges - Add EVENT and MESSAGE_QUEUE node kinds to topic matching (alongside TOPIC/QUEUE) - Add 4 new test cases: SENDS_TO/RECEIVES_FROM, PUBLISHES/LISTENS, MESSAGE_QUEUE, determinism Co-Authored-By: Paperclip --- .../iq/analyzer/linker/TopicLinker.java | 19 +-- .../iq/analyzer/linker/TopicLinkerTest.java | 117 ++++++++++++++++++ 2 files changed, 128 insertions(+), 8 deletions(-) diff --git a/src/main/java/io/github/randomcodespace/iq/analyzer/linker/TopicLinker.java b/src/main/java/io/github/randomcodespace/iq/analyzer/linker/TopicLinker.java index ebabd316..534b7e46 100644 --- a/src/main/java/io/github/randomcodespace/iq/analyzer/linker/TopicLinker.java +++ b/src/main/java/io/github/randomcodespace/iq/analyzer/linker/TopicLinker.java @@ -10,7 +10,6 @@ import java.util.ArrayList; import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; @@ -18,10 +17,11 @@ import java.util.TreeSet; /** - * Links Kafka/RabbitMQ producers to consumers via shared topic names. + * Links messaging producers to consumers via shared topic/queue/event names. *

- * Scans for TOPIC/QUEUE nodes and matches PRODUCES edges with CONSUMES - * edges on the same topic label to create direct producer-to-consumer + * Scans for TOPIC/QUEUE/EVENT/MESSAGE_QUEUE nodes and matches producer edges + * (PRODUCES, SENDS_TO, PUBLISHES) with consumer edges (CONSUMES, RECEIVES_FROM, + * LISTENS) on the same topic label to create direct producer-to-consumer * CALLS edges. */ @Component @@ -31,10 +31,11 @@ public class TopicLinker implements Linker { @Override public LinkResult link(List nodes, List edges) { - // Collect topic/queue nodes by label + // Collect topic/queue/event/message_queue nodes by label Map> topicIdsByLabel = new TreeMap<>(); for (CodeNode node : nodes) { - if (node.getKind() == NodeKind.TOPIC || node.getKind() == NodeKind.QUEUE) { + if (node.getKind() == NodeKind.TOPIC || node.getKind() == NodeKind.QUEUE + || node.getKind() == NodeKind.EVENT || node.getKind() == NodeKind.MESSAGE_QUEUE) { topicIdsByLabel .computeIfAbsent(node.getLabel(), k -> new ArrayList<>()) .add(node.getId()); @@ -51,11 +52,13 @@ public LinkResult link(List nodes, List edges) { Map> consumersByTopic = new TreeMap<>(); for (CodeEdge edge : edges) { - if (edge.getKind() == EdgeKind.PRODUCES && edge.getTarget() != null) { + if (edge.getTarget() == null) continue; + EdgeKind kind = edge.getKind(); + if (kind == EdgeKind.PRODUCES || kind == EdgeKind.SENDS_TO || kind == EdgeKind.PUBLISHES) { producersByTopic .computeIfAbsent(edge.getTarget().getId(), k -> new ArrayList<>()) .add(edge.getSourceId()); - } else if (edge.getKind() == EdgeKind.CONSUMES && edge.getTarget() != null) { + } else if (kind == EdgeKind.CONSUMES || kind == EdgeKind.RECEIVES_FROM || kind == EdgeKind.LISTENS) { consumersByTopic .computeIfAbsent(edge.getTarget().getId(), k -> new ArrayList<>()) .add(edge.getSourceId()); diff --git a/src/test/java/io/github/randomcodespace/iq/analyzer/linker/TopicLinkerTest.java b/src/test/java/io/github/randomcodespace/iq/analyzer/linker/TopicLinkerTest.java index e56c4cf1..f35deb3f 100644 --- a/src/test/java/io/github/randomcodespace/iq/analyzer/linker/TopicLinkerTest.java +++ b/src/test/java/io/github/randomcodespace/iq/analyzer/linker/TopicLinkerTest.java @@ -103,4 +103,121 @@ void handlesQueueNodes() { assertEquals(1, result.edges().size()); } + + @Test + void linksSendsToReceivesFromEdgesViaTopic() { + // Tibco EMS / Azure Service Bus pattern + var topic = new CodeNode("topic:orders", NodeKind.TOPIC, "orders"); + var producer = new CodeNode("svc:TibcoSender", NodeKind.CLASS, "TibcoSender"); + var consumer = new CodeNode("svc:TibcoReceiver", NodeKind.CLASS, "TibcoReceiver"); + + var sendsToEdge = new CodeEdge(); + sendsToEdge.setId("e1"); + sendsToEdge.setKind(EdgeKind.SENDS_TO); + sendsToEdge.setSourceId("svc:TibcoSender"); + sendsToEdge.setTarget(topic); + + var receivesFromEdge = new CodeEdge(); + receivesFromEdge.setId("e2"); + receivesFromEdge.setKind(EdgeKind.RECEIVES_FROM); + receivesFromEdge.setSourceId("svc:TibcoReceiver"); + receivesFromEdge.setTarget(topic); + + LinkResult result = linker.link( + List.of(topic, producer, consumer), + List.of(sendsToEdge, receivesFromEdge) + ); + + assertEquals(1, result.edges().size()); + CodeEdge callsEdge = result.edges().getFirst(); + assertEquals(EdgeKind.CALLS, callsEdge.getKind()); + assertEquals("svc:TibcoSender", callsEdge.getSourceId()); + assertEquals("svc:TibcoReceiver", callsEdge.getTarget().getId()); + assertEquals(true, callsEdge.getProperties().get("inferred")); + } + + @Test + void linksPublishesListensEdgesViaEventNode() { + // Spring Events pattern using EVENT node kind + var event = new CodeNode("event:UserCreated", NodeKind.EVENT, "UserCreated"); + var publisher = new CodeNode("svc:UserService", NodeKind.CLASS, "UserService"); + var listener = new CodeNode("svc:EmailService", NodeKind.CLASS, "EmailService"); + + var publishesEdge = new CodeEdge(); + publishesEdge.setId("e1"); + publishesEdge.setKind(EdgeKind.PUBLISHES); + publishesEdge.setSourceId("svc:UserService"); + publishesEdge.setTarget(event); + + var listensEdge = new CodeEdge(); + listensEdge.setId("e2"); + listensEdge.setKind(EdgeKind.LISTENS); + listensEdge.setSourceId("svc:EmailService"); + listensEdge.setTarget(event); + + LinkResult result = linker.link( + List.of(event, publisher, listener), + List.of(publishesEdge, listensEdge) + ); + + assertEquals(1, result.edges().size()); + CodeEdge callsEdge = result.edges().getFirst(); + assertEquals(EdgeKind.CALLS, callsEdge.getKind()); + assertEquals("svc:UserService", callsEdge.getSourceId()); + assertEquals("svc:EmailService", callsEdge.getTarget().getId()); + assertEquals("UserCreated", callsEdge.getProperties().get("topic")); + } + + @Test + void handlesMessageQueueNodeKind() { + var mq = new CodeNode("mq:notifications", NodeKind.MESSAGE_QUEUE, "notifications"); + var sender = new CodeNode("svc:NotifySender", NodeKind.CLASS, "NotifySender"); + var receiver = new CodeNode("svc:NotifyWorker", NodeKind.CLASS, "NotifyWorker"); + + var sendsEdge = new CodeEdge(); + sendsEdge.setId("e1"); + sendsEdge.setKind(EdgeKind.SENDS_TO); + sendsEdge.setSourceId("svc:NotifySender"); + sendsEdge.setTarget(mq); + + var receivesEdge = new CodeEdge(); + receivesEdge.setId("e2"); + receivesEdge.setKind(EdgeKind.RECEIVES_FROM); + receivesEdge.setSourceId("svc:NotifyWorker"); + receivesEdge.setTarget(mq); + + LinkResult result = linker.link( + List.of(mq, sender, receiver), + List.of(sendsEdge, receivesEdge) + ); + + assertEquals(1, result.edges().size()); + } + + @Test + void determinismTest() { + var topic = new CodeNode("topic:payments", NodeKind.TOPIC, "payments"); + var prod1 = new CodeNode("svc:P1", NodeKind.CLASS, "P1"); + var prod2 = new CodeNode("svc:P2", NodeKind.CLASS, "P2"); + var cons = new CodeNode("svc:C1", NodeKind.CLASS, "C1"); + + var e1 = new CodeEdge(); + e1.setId("e1"); e1.setKind(EdgeKind.PUBLISHES); e1.setSourceId("svc:P1"); e1.setTarget(topic); + var e2 = new CodeEdge(); + e2.setId("e2"); e2.setKind(EdgeKind.SENDS_TO); e2.setSourceId("svc:P2"); e2.setTarget(topic); + var e3 = new CodeEdge(); + e3.setId("e3"); e3.setKind(EdgeKind.LISTENS); e3.setSourceId("svc:C1"); e3.setTarget(topic); + + List nodeList = new ArrayList<>(List.of(topic, prod1, prod2, cons)); + List edgeList = new ArrayList<>(List.of(e1, e2, e3)); + + LinkResult result1 = linker.link(nodeList, edgeList); + LinkResult result2 = linker.link(nodeList, edgeList); + + assertEquals(result1.edges().size(), result2.edges().size()); + for (int i = 0; i < result1.edges().size(); i++) { + assertEquals(result1.edges().get(i).getId(), result2.edges().get(i).getId()); + assertEquals(result1.edges().get(i).getSourceId(), result2.edges().get(i).getSourceId()); + } + } } From 4c3310a08c44c303361c52a432087d065b51b9ec Mon Sep 17 00:00:00 2001 From: Amit Kumar Date: Wed, 1 Apr 2026 17:24:01 +0000 Subject: [PATCH 2/2] perf: replace B-tree CONTAINS scan with fulltext index for search() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit B-tree indexes on label_lower/fqn_lower cannot serve CONTAINS queries in Neo4j — every search caused a full graph scan. Replace with a fulltext index using the keyword analyzer so wildcard (*text*) queries are backed by an index. - Add FULLTEXT INDEX search_index on (n.label_lower, n.fqn_lower) in both GraphStore.bulkSave() and EnrichCommand secondary-index block - Use keyword analyzer to preserve whole-property tokens (avoids Lucene tokenisation splitting FQNs on dots) - Replace search() CONTAINS queries with db.index.fulltext.queryNodes() + *text* wildcard wrapping - Escape Lucene special characters before wrapping in toLuceneQuery() - Add CALL db.awaitIndexes(300) after secondary index creation in EnrichCommand so the first search request hits the index Fixes RAN-66 Co-Authored-By: Paperclip --- .../randomcodespace/iq/cli/EnrichCommand.java | 8 ++++ .../randomcodespace/iq/graph/GraphStore.java | 41 ++++++++++++++++--- 2 files changed, 43 insertions(+), 6 deletions(-) diff --git a/src/main/java/io/github/randomcodespace/iq/cli/EnrichCommand.java b/src/main/java/io/github/randomcodespace/iq/cli/EnrichCommand.java index d2923579..501bed68 100644 --- a/src/main/java/io/github/randomcodespace/iq/cli/EnrichCommand.java +++ b/src/main/java/io/github/randomcodespace/iq/cli/EnrichCommand.java @@ -309,6 +309,14 @@ private int enrichFromCache(AnalysisCache cache, Path root, NumberFormat nf, Ins tx.execute("CREATE INDEX IF NOT EXISTS FOR (n:CodeNode) ON (n.filePath)"); tx.execute("CREATE INDEX IF NOT EXISTS FOR (n:CodeNode) ON (n.label_lower)"); tx.execute("CREATE INDEX IF NOT EXISTS FOR (n:CodeNode) ON (n.fqn_lower)"); + tx.execute("CREATE FULLTEXT INDEX search_index IF NOT EXISTS " + + "FOR (n:CodeNode) ON EACH [n.label_lower, n.fqn_lower] " + + "OPTIONS {indexConfig: {`fulltext.analyzer`: 'keyword'}}"); + tx.commit(); + } + // Wait for all indexes (including fulltext) to finish building + try (Transaction tx = db.beginTx()) { + tx.execute("CALL db.awaitIndexes(300)"); tx.commit(); } CliOutput.info(" Created Neo4j indexes"); diff --git a/src/main/java/io/github/randomcodespace/iq/graph/GraphStore.java b/src/main/java/io/github/randomcodespace/iq/graph/GraphStore.java index 564d4c8e..cc4c962f 100644 --- a/src/main/java/io/github/randomcodespace/iq/graph/GraphStore.java +++ b/src/main/java/io/github/randomcodespace/iq/graph/GraphStore.java @@ -89,6 +89,9 @@ public void bulkSave(List nodes) { tx.execute("CREATE INDEX IF NOT EXISTS FOR (n:CodeNode) ON (n.id)"); tx.execute("CREATE INDEX IF NOT EXISTS FOR (n:CodeNode) ON (n.label_lower)"); tx.execute("CREATE INDEX IF NOT EXISTS FOR (n:CodeNode) ON (n.fqn_lower)"); + tx.execute("CREATE FULLTEXT INDEX search_index IF NOT EXISTS " + + "FOR (n:CodeNode) ON EACH [n.label_lower, n.fqn_lower] " + + "OPTIONS {indexConfig: {`fulltext.analyzer`: 'keyword'}}"); tx.commit(); } @@ -249,16 +252,42 @@ public List findByFilePath(String filePath) { public List search(String text) { return queryNodes( - "MATCH (n:CodeNode) WHERE n.label CONTAINS $text OR n.fqn CONTAINS $text RETURN n", - Map.of("text", text)); + "CALL db.index.fulltext.queryNodes('search_index', $text) " + + "YIELD node RETURN node AS n", + Map.of("text", toLuceneQuery(text))); } public List search(String text, int limit) { - String lowerText = text.toLowerCase(); return queryNodes( - "MATCH (n:CodeNode) WHERE n.label_lower CONTAINS $text " - + "OR n.fqn_lower CONTAINS $text RETURN n LIMIT $limit", - Map.of("text", lowerText, "limit", limit)); + "CALL db.index.fulltext.queryNodes('search_index', $text) " + + "YIELD node RETURN node AS n LIMIT $limit", + Map.of("text", toLuceneQuery(text), "limit", limit)); + } + + /** + * Wraps a search term in Lucene wildcard syntax for substring matching against + * the fulltext index (which stores lowercased property values via keyword analyzer). + * Escapes Lucene special characters before wrapping. + */ + private static String toLuceneQuery(String text) { + String lower = text.toLowerCase(); + String escaped = lower + .replace("\\", "\\\\") + .replace("+", "\\+") + .replace("-", "\\-") + .replace("!", "\\!") + .replace("(", "\\(") + .replace(")", "\\)") + .replace("{", "\\{") + .replace("}", "\\}") + .replace("[", "\\[") + .replace("]", "\\]") + .replace("^", "\\^") + .replace("\"", "\\\"") + .replace("~", "\\~") + .replace(":", "\\:") + .replace("/", "\\/"); + return "*" + escaped + "*"; } public List findNeighbors(String nodeId) {