diff --git a/src/main/java/io/github/randomcodespace/iq/analyzer/Analyzer.java b/src/main/java/io/github/randomcodespace/iq/analyzer/Analyzer.java index 3903e3fa..9094cd40 100644 --- a/src/main/java/io/github/randomcodespace/iq/analyzer/Analyzer.java +++ b/src/main/java/io/github/randomcodespace/iq/analyzer/Analyzer.java @@ -236,7 +236,7 @@ private AnalysisResult runWithCache(Path root, Integer parallelism, AnalysisCach // 2. Analyze files in parallel with virtual threads report.accept("Analyzing " + totalFiles + " files..."); DetectorResult[] resultSlots = new DetectorResult[files.size()]; - int[] cacheHits = {0}; + var cacheHitsCounter = new java.util.concurrent.atomic.AtomicInteger(0); final DetectorRegistry detectorRegistry = effectiveRegistry; var executorService = parallelism != null && parallelism > 0 @@ -258,9 +258,7 @@ private AnalysisResult runWithCache(Path root, Integer parallelism, AnalysisCach var cached = cacheRef.loadCachedResults(hash); if (cached != null) { resultSlots[idx] = DetectorResult.of(cached.nodes(), cached.edges()); - synchronized (cacheHits) { - cacheHits[0]++; - } + cacheHitsCounter.incrementAndGet(); return null; } } @@ -296,8 +294,8 @@ private AnalysisResult runWithCache(Path root, Integer parallelism, AnalysisCach } } - if (cache != null && cacheHits[0] > 0) { - report.accept("Cache hits: " + cacheHits[0] + " / " + totalFiles + " files"); + if (cache != null && cacheHitsCounter.get() > 0) { + report.accept("Cache hits: " + cacheHitsCounter.get() + " / " + totalFiles + " files"); } // 3. Build graph (batched) diff --git a/src/main/java/io/github/randomcodespace/iq/api/TopologyController.java b/src/main/java/io/github/randomcodespace/iq/api/TopologyController.java index 4803a6f1..707f1b95 100644 --- a/src/main/java/io/github/randomcodespace/iq/api/TopologyController.java +++ b/src/main/java/io/github/randomcodespace/iq/api/TopologyController.java @@ -91,7 +91,7 @@ private synchronized void ensureDataLoaded() { /** * Invalidate the in-memory cache (e.g. after re-analysis). */ - public void invalidateCache() { + public synchronized void invalidateCache() { cachedNodes = null; cachedEdges = null; neo4jHasData = null; diff --git a/src/main/java/io/github/randomcodespace/iq/cache/AnalysisCache.java b/src/main/java/io/github/randomcodespace/iq/cache/AnalysisCache.java index 46d0edcf..b953a661 100644 --- a/src/main/java/io/github/randomcodespace/iq/cache/AnalysisCache.java +++ b/src/main/java/io/github/randomcodespace/iq/cache/AnalysisCache.java @@ -240,7 +240,8 @@ public synchronized CachedResult loadCachedResults(String contentHash) { stmt.setString(1, contentHash); try (ResultSet rs = stmt.executeQuery()) { while (rs.next()) { - nodes.add(deserializeNode(rs.getString(1))); + CodeNode node = deserializeNode(rs.getString(1)); + if (node != null) nodes.add(node); } } } @@ -250,7 +251,8 @@ public synchronized CachedResult loadCachedResults(String contentHash) { stmt.setString(1, contentHash); try (ResultSet rs = stmt.executeQuery()) { while (rs.next()) { - edges.add(deserializeEdge(rs.getString(1))); + CodeEdge edge = deserializeEdge(rs.getString(1)); + if (edge != null) edges.add(edge); } } } @@ -471,9 +473,14 @@ private String serializeNode(CodeNode node) { private CodeNode deserializeNode(String json) { try { Map data = MAPPER.readValue(json, new TypeReference<>() {}); + String kindStr = (String) data.get("kind"); + if (kindStr == null) { + log.debug("Skipping node with null kind: {}", json); + return null; + } CodeNode node = new CodeNode(); node.setId((String) data.get("id")); - node.setKind(NodeKind.fromValue((String) data.get("kind"))); + node.setKind(NodeKind.fromValue(kindStr)); node.setLabel((String) data.get("label")); node.setFqn((String) data.get("fqn")); node.setModule((String) data.get("module")); @@ -492,7 +499,7 @@ private CodeNode deserializeNode(String json) { return node; } catch (Exception e) { log.debug("Failed to deserialize node: {}", json, e); - return new CodeNode("unknown", NodeKind.CLASS, "unknown"); + return null; } } @@ -519,6 +526,10 @@ private CodeEdge deserializeEdge(String json) { Map data = MAPPER.readValue(json, new TypeReference<>() {}); String id = (String) data.get("id"); String kindStr = (String) data.get("kind"); + if (kindStr == null) { + log.debug("Skipping edge with null kind: {}", json); + return null; + } String sourceId = (String) data.get("source_id"); String targetId = (String) data.get("target_id"); @@ -537,7 +548,7 @@ private CodeEdge deserializeEdge(String json) { return edge; } catch (Exception e) { log.debug("Failed to deserialize edge: {}", json, e); - return new CodeEdge("unknown", EdgeKind.CALLS, "unknown", null); + return null; } } @@ -622,7 +633,8 @@ INNER JOIN (SELECT id, MAX(row_id) AS max_id FROM nodes GROUP BY id) m """)) { try (ResultSet rs = stmt.executeQuery()) { while (rs.next()) { - nodes.add(deserializeNode(rs.getString(1))); + CodeNode node = deserializeNode(rs.getString(1)); + if (node != null) nodes.add(node); } } } catch (SQLException e) { @@ -641,7 +653,8 @@ public synchronized List loadAllEdges() { try (var stmt = conn.prepareStatement("SELECT data FROM edges")) { try (ResultSet rs = stmt.executeQuery()) { while (rs.next()) { - edges.add(deserializeEdge(rs.getString(1))); + CodeEdge edge = deserializeEdge(rs.getString(1)); + if (edge != null) edges.add(edge); } } } catch (SQLException e) { diff --git a/src/main/java/io/github/randomcodespace/iq/config/GraphBootstrapper.java b/src/main/java/io/github/randomcodespace/iq/config/GraphBootstrapper.java index 25840ef2..2082607b 100644 --- a/src/main/java/io/github/randomcodespace/iq/config/GraphBootstrapper.java +++ b/src/main/java/io/github/randomcodespace/iq/config/GraphBootstrapper.java @@ -99,8 +99,9 @@ public void bootstrapNeo4jFromCache() { sourceNode.getEdges().add(edge); } - // Save all nodes (with their attached edges) to Neo4j - graphStore.saveAll(nodes); + // Save all nodes (with their attached edges) to Neo4j using bulk Cypher + // (not SDN saveAll which recursively hydrates @Relationship edges → OOM) + graphStore.bulkSave(nodes); log.info("Bootstrapped Neo4j with {} nodes and {} edges from H2 cache", nodes.size(), edges.size()); diff --git a/src/main/java/io/github/randomcodespace/iq/graph/GraphStore.java b/src/main/java/io/github/randomcodespace/iq/graph/GraphStore.java index 51e720c4..98d3e7e8 100644 --- a/src/main/java/io/github/randomcodespace/iq/graph/GraphStore.java +++ b/src/main/java/io/github/randomcodespace/iq/graph/GraphStore.java @@ -418,7 +418,6 @@ public List> countByFileExtension() { try (Transaction tx = graphDb.beginTx()) { var result = tx.execute( "MATCH (n:CodeNode) WHERE n.filePath IS NOT NULL AND n.filePath CONTAINS '.' " - + "WITH reverse(split(n.filePath, '.')[-1]) AS ext, n " + "WITH split(n.filePath, '.')[-1] AS ext " + "RETURN ext, count(*) AS cnt ORDER BY cnt DESC"); while (result.hasNext()) { diff --git a/src/main/java/io/github/randomcodespace/iq/mcp/McpTools.java b/src/main/java/io/github/randomcodespace/iq/mcp/McpTools.java index 3601b159..9bfb4388 100644 --- a/src/main/java/io/github/randomcodespace/iq/mcp/McpTools.java +++ b/src/main/java/io/github/randomcodespace/iq/mcp/McpTools.java @@ -257,11 +257,15 @@ public String generateFlow( public String runCypher( @McpToolParam(description = "Cypher query string") String query) { // Block any mutation keywords anywhere in the query (defense-in-depth) - Set BLOCKED = Set.of("CREATE", "DELETE", "DETACH", "SET ", "REMOVE", "MERGE", "DROP", "FOREACH", "LOAD CSV"); String upper = query.trim().toUpperCase(); - for (String blocked : BLOCKED) { - if (upper.contains(blocked)) { - return toJson(Map.of("error", "Read-only queries only. Mutation keyword found: " + blocked.trim())); + List BLOCKED_PATTERNS = List.of( + "\\bCREATE\\b", "\\bDELETE\\b", "\\bDETACH\\b", "\\bSET\\b", + "\\bREMOVE\\b", "\\bMERGE\\b", "\\bDROP\\b", "\\bFOREACH\\b", + "\\bLOAD\\s+CSV\\b", "\\bCALL\\b"); + for (String pattern : BLOCKED_PATTERNS) { + if (java.util.regex.Pattern.compile(pattern).matcher(upper).find()) { + String keyword = pattern.replace("\\b", "").replace("\\s+", " "); + return toJson(Map.of("error", "Read-only queries only. Mutation keyword found: " + keyword)); } } try { diff --git a/src/test/java/io/github/randomcodespace/iq/cache/AnalysisCacheBatchReplaceTest.java b/src/test/java/io/github/randomcodespace/iq/cache/AnalysisCacheBatchReplaceTest.java new file mode 100644 index 00000000..ffa743ea --- /dev/null +++ b/src/test/java/io/github/randomcodespace/iq/cache/AnalysisCacheBatchReplaceTest.java @@ -0,0 +1,229 @@ +package io.github.randomcodespace.iq.cache; + +import io.github.randomcodespace.iq.model.CodeEdge; +import io.github.randomcodespace.iq.model.CodeNode; +import io.github.randomcodespace.iq.model.EdgeKind; +import io.github.randomcodespace.iq.model.NodeKind; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.*; + +/** + * Tests for {@link AnalysisCache#replaceAll} and {@link AnalysisCache#storeBatchResults} + * which are critical indexing pipeline methods with previously zero coverage. + */ +class AnalysisCacheBatchReplaceTest { + + private AnalysisCache cache; + + @BeforeEach + void setUp(@TempDir Path tempDir) { + cache = new AnalysisCache(tempDir.resolve("test-cache.db")); + } + + @AfterEach + void tearDown() { + if (cache != null) { + cache.close(); + } + } + + // --- storeBatchResults tests --- + + @Test + void storeBatchResultsDelegatesCorrectly() { + CodeNode node = new CodeNode("batch:n1", NodeKind.CLASS, "BatchClass"); + node.setFilePath("src/Batch.java"); + CodeEdge edge = new CodeEdge("batch:e1", EdgeKind.CALLS, "batch:n1", + new CodeNode("batch:n2", NodeKind.METHOD, "batchMethod")); + + cache.storeBatchResults("batch-001", "src/Batch.java", "java", + List.of(node), List.of(edge)); + + assertTrue(cache.isCached("batch-001")); + var result = cache.loadCachedResults("batch-001"); + assertNotNull(result); + assertEquals(1, result.nodes().size()); + assertEquals(1, result.edges().size()); + assertEquals("batch:n1", result.nodes().getFirst().getId()); + } + + @Test + void storeBatchResultsWithEmptyLists() { + cache.storeBatchResults("empty-batch", "src/empty.java", "java", + List.of(), List.of()); + + assertTrue(cache.isCached("empty-batch")); + // Empty nodes/edges return null from loadCachedResults + assertNull(cache.loadCachedResults("empty-batch")); + } + + @Test + void storeBatchResultsPreservesNodeProperties() { + CodeNode node = new CodeNode("batch:props", NodeKind.ENDPOINT, "GET /api/test"); + node.setFilePath("src/Controller.java"); + node.setLayer("backend"); + node.setModule("api"); + node.setFqn("com.example.Controller.getTest"); + node.setLineStart(10); + node.setLineEnd(20); + node.setAnnotations(List.of("@GetMapping", "@ResponseBody")); + node.setProperties(Map.of("method", "GET", "path", "/api/test", "framework", "spring_boot")); + + cache.storeBatchResults("batch-props", "src/Controller.java", "java", + List.of(node), List.of()); + + var result = cache.loadCachedResults("batch-props"); + assertNotNull(result); + CodeNode loaded = result.nodes().getFirst(); + assertEquals("batch:props", loaded.getId()); + assertEquals(NodeKind.ENDPOINT, loaded.getKind()); + assertEquals("GET /api/test", loaded.getLabel()); + assertEquals("src/Controller.java", loaded.getFilePath()); + assertEquals("backend", loaded.getLayer()); + assertEquals("api", loaded.getModule()); + assertEquals("com.example.Controller.getTest", loaded.getFqn()); + assertEquals(10, loaded.getLineStart()); + assertEquals(20, loaded.getLineEnd()); + assertEquals(List.of("@GetMapping", "@ResponseBody"), loaded.getAnnotations()); + assertEquals("GET", loaded.getProperties().get("method")); + assertEquals("spring_boot", loaded.getProperties().get("framework")); + } + + // --- replaceAll tests --- + + @Test + void replaceAllClearsPreviousDataAndStoresNew() { + // Store initial data + CodeNode original = new CodeNode("orig:n1", NodeKind.CLASS, "Original"); + cache.storeResults("hash1", "src/orig.java", "java", List.of(original), List.of()); + assertEquals(1, cache.getNodeCount()); + + // Replace with enriched data + CodeNode enriched1 = new CodeNode("enr:n1", NodeKind.CLASS, "EnrichedClass"); + enriched1.setLayer("backend"); + CodeNode enriched2 = new CodeNode("enr:n2", NodeKind.SERVICE, "MyService"); + enriched2.setLayer("backend"); + CodeEdge enrichedEdge = new CodeEdge("enr:e1", EdgeKind.CONTAINS, "enr:n2", enriched1); + + cache.replaceAll(List.of(enriched1, enriched2), List.of(enrichedEdge)); + + // Original data should be gone + assertFalse(cache.isCached("hash1")); + + // New enriched data should be present under __enriched__ hash + assertTrue(cache.isCached("__enriched__")); + assertEquals(2, cache.getNodeCount()); + assertEquals(1, cache.getEdgeCount()); + } + + @Test + void replaceAllWithEmptyListsClearsEverything() { + // Store initial data + CodeNode node = new CodeNode("n1", NodeKind.CLASS, "C1"); + cache.storeResults("hash1", "f1.java", "java", List.of(node), List.of()); + assertEquals(1, cache.getNodeCount()); + + cache.replaceAll(List.of(), List.of()); + + assertEquals(0, cache.getNodeCount()); + assertEquals(0, cache.getEdgeCount()); + } + + @Test + void replaceAllPreservesAnalysisRunMetadata() { + cache.recordRun("commit-sha-1", 100); + cache.recordRun("commit-sha-2", 200); + + CodeNode node = new CodeNode("n1", NodeKind.CLASS, "C1"); + cache.storeResults("hash1", "f1.java", "java", List.of(node), List.of()); + + cache.replaceAll(List.of(node), List.of()); + + // Analysis run metadata should survive the replace + assertEquals("commit-sha-2", cache.getLastCommit()); + } + + @Test + void replaceAllWithLargeDataset() { + List nodes = new ArrayList<>(); + List edges = new ArrayList<>(); + for (int i = 0; i < 500; i++) { + CodeNode n = new CodeNode("node:" + i, NodeKind.CLASS, "Class" + i); + n.setFilePath("src/Class" + i + ".java"); + n.setLayer("backend"); + nodes.add(n); + } + for (int i = 0; i < 499; i++) { + edges.add(new CodeEdge("edge:" + i, EdgeKind.CALLS, "node:" + i, + new CodeNode("node:" + (i + 1), NodeKind.CLASS, "Class" + (i + 1)))); + } + + cache.replaceAll(nodes, edges); + + assertEquals(500, cache.getNodeCount()); + assertEquals(499, cache.getEdgeCount()); + } + + @Test + void replaceAllDataCanBeLoadedBack() { + CodeNode node = new CodeNode("enr:n1", NodeKind.ENDPOINT, "GET /health"); + node.setLayer("backend"); + node.setProperties(Map.of("method", "GET")); + CodeEdge edge = new CodeEdge("enr:e1", EdgeKind.DEFINES, "enr:n1", + new CodeNode("enr:n2", NodeKind.METHOD, "health")); + + cache.replaceAll(List.of(node), List.of(edge)); + + List allNodes = cache.loadAllNodes(); + List allEdges = cache.loadAllEdges(); + assertEquals(1, allNodes.size()); + assertEquals(1, allEdges.size()); + assertEquals("enr:n1", allNodes.getFirst().getId()); + assertEquals("backend", allNodes.getFirst().getLayer()); + } + + // --- Concurrent access test --- + + @Test + void concurrentStoreAndReadDoesNotCorrupt() throws InterruptedException { + // Store some initial data + for (int i = 0; i < 10; i++) { + CodeNode n = new CodeNode("n:" + i, NodeKind.CLASS, "C" + i); + cache.storeResults("hash:" + i, "f" + i + ".java", "java", + List.of(n), List.of()); + } + + // Run concurrent reads and writes + Thread writer = new Thread(() -> { + for (int i = 10; i < 20; i++) { + CodeNode n = new CodeNode("n:" + i, NodeKind.CLASS, "C" + i); + cache.storeResults("hash:" + i, "f" + i + ".java", "java", + List.of(n), List.of()); + } + }); + + Thread reader = new Thread(() -> { + for (int i = 0; i < 10; i++) { + cache.loadCachedResults("hash:" + i); + cache.getNodeCount(); + } + }); + + writer.start(); + reader.start(); + writer.join(5000); + reader.join(5000); + + // Should not have corrupted data + assertTrue(cache.getNodeCount() >= 10); + } +} diff --git a/src/test/java/io/github/randomcodespace/iq/config/GraphBootstrapperTest.java b/src/test/java/io/github/randomcodespace/iq/config/GraphBootstrapperTest.java new file mode 100644 index 00000000..0386acff --- /dev/null +++ b/src/test/java/io/github/randomcodespace/iq/config/GraphBootstrapperTest.java @@ -0,0 +1,155 @@ +package io.github.randomcodespace.iq.config; + +import io.github.randomcodespace.iq.cache.AnalysisCache; +import io.github.randomcodespace.iq.graph.GraphStore; +import io.github.randomcodespace.iq.model.CodeEdge; +import io.github.randomcodespace.iq.model.CodeNode; +import io.github.randomcodespace.iq.model.EdgeKind; +import io.github.randomcodespace.iq.model.NodeKind; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; +import org.mockito.ArgumentCaptor; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.Mockito.*; + +@SuppressWarnings("unchecked") +class GraphBootstrapperTest { + + private GraphStore graphStore; + private CodeIqConfig config; + private GraphBootstrapper bootstrapper; + + @BeforeEach + void setUp() { + graphStore = mock(GraphStore.class); + config = new CodeIqConfig(); + bootstrapper = new GraphBootstrapper(graphStore, config); + } + + @Test + void skipsBootstrapWhenNeo4jAlreadyHasData(@TempDir Path tempDir) { + config.setRootPath(tempDir.toString()); + when(graphStore.count()).thenReturn(100L); + + bootstrapper.bootstrapNeo4jFromCache(); + + verify(graphStore, never()).bulkSave(anyList()); + } + + @Test + void skipsBootstrapWhenNoCacheFileExists(@TempDir Path tempDir) { + config.setRootPath(tempDir.toString()); + when(graphStore.count()).thenReturn(0L); + + bootstrapper.bootstrapNeo4jFromCache(); + + verify(graphStore, never()).bulkSave(anyList()); + } + + @Test + void bootstrapsFromH2WhenNeo4jIsEmpty(@TempDir Path tempDir) throws IOException { + config.setRootPath(tempDir.toString()); + when(graphStore.count()).thenReturn(0L); + + // Create a real H2 cache with test data + Path cacheDir = tempDir.resolve(config.getCacheDir()); + Files.createDirectories(cacheDir); + Path dbPath = cacheDir.resolve("analysis-cache.db"); + + try (AnalysisCache cache = new AnalysisCache(dbPath)) { + CodeNode node1 = new CodeNode("n1", NodeKind.CLASS, "MyClass"); + node1.setFilePath("src/MyClass.java"); + CodeNode node2 = new CodeNode("n2", NodeKind.METHOD, "myMethod"); + node2.setFilePath("src/MyClass.java"); + CodeEdge edge = new CodeEdge("e1", EdgeKind.CALLS, "n1", + new CodeNode("n2", NodeKind.METHOD, "myMethod")); + cache.storeResults("hash1", "src/MyClass.java", "java", + List.of(node1, node2), List.of(edge)); + } + + bootstrapper.bootstrapNeo4jFromCache(); + + @SuppressWarnings({"unchecked", "rawtypes"}) + ArgumentCaptor> captor = (ArgumentCaptor) ArgumentCaptor.forClass(List.class); + verify(graphStore).bulkSave(captor.capture()); + List saved = new ArrayList<>(captor.getValue()); + assertEquals(2, saved.size()); + } + + @Test + void skipsBootstrapWhenH2CacheIsEmpty(@TempDir Path tempDir) throws IOException { + config.setRootPath(tempDir.toString()); + when(graphStore.count()).thenReturn(0L); + + // Create an empty H2 cache + Path cacheDir = tempDir.resolve(config.getCacheDir()); + Files.createDirectories(cacheDir); + Path dbPath = cacheDir.resolve("analysis-cache.db"); + + try (AnalysisCache cache = new AnalysisCache(dbPath)) { + // Empty cache - no data stored + } + + bootstrapper.bootstrapNeo4jFromCache(); + + verify(graphStore, never()).bulkSave(anyList()); + } + + @Test + void attachesEdgesToSourceNodesBeforeSaving(@TempDir Path tempDir) throws IOException { + config.setRootPath(tempDir.toString()); + when(graphStore.count()).thenReturn(0L); + + Path cacheDir = tempDir.resolve(config.getCacheDir()); + Files.createDirectories(cacheDir); + Path dbPath = cacheDir.resolve("analysis-cache.db"); + + try (AnalysisCache cache = new AnalysisCache(dbPath)) { + CodeNode source = new CodeNode("src:node", NodeKind.CLASS, "Source"); + CodeNode target = new CodeNode("tgt:node", NodeKind.METHOD, "Target"); + CodeEdge edge = new CodeEdge("e1", EdgeKind.CALLS, "src:node", target); + cache.storeResults("hash1", "src/file.java", "java", + List.of(source, target), List.of(edge)); + } + + bootstrapper.bootstrapNeo4jFromCache(); + + @SuppressWarnings({"unchecked", "rawtypes"}) + ArgumentCaptor> captor2 = (ArgumentCaptor) ArgumentCaptor.forClass(List.class); + verify(graphStore).bulkSave(captor2.capture()); + List saved = new ArrayList<>(captor2.getValue()); + + // Find the source node and verify it has the edge attached + boolean foundEdge = false; + for (CodeNode n : saved) { + if ("src:node".equals(n.getId())) { + foundEdge = !n.getEdges().isEmpty(); + } + } + assertTrue(foundEdge, "Source node should have edges attached"); + } + + @Test + void handlesExceptionGracefully(@TempDir Path tempDir) throws IOException { + config.setRootPath(tempDir.toString()); + when(graphStore.count()).thenReturn(0L); + + // Create a corrupt "cache" file + Path cacheDir = tempDir.resolve(config.getCacheDir()); + Files.createDirectories(cacheDir); + Files.writeString(cacheDir.resolve("analysis-cache.mv.db"), "corrupt data"); + + // Should not throw + bootstrapper.bootstrapNeo4jFromCache(); + + verify(graphStore, never()).bulkSave(anyList()); + } +}