diff --git a/.gitignore b/.gitignore index ad3520db..25299747 100644 --- a/.gitignore +++ b/.gitignore @@ -82,3 +82,4 @@ helm/ # Neo4j embedded data neo4j-data/ graph.db/ +.worktrees/ diff --git a/CLAUDE.md b/CLAUDE.md index 9fb43226..07232903 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -44,7 +44,7 @@ Remote server (or local): ``` index: FileDiscovery → Parsers → Detectors (virtual threads) → GraphBuilder → H2 cache -enrich: H2 → Linkers → LayerClassifier → ServiceDetector → Neo4j (UNWIND bulk-load) +enrich: H2 → Linkers → LayerClassifier → LexicalEnricher → LanguageEnricher → ServiceDetector → Neo4j (UNWIND bulk-load) serve: Neo4j → GraphStore → QueryService → REST API / MCP / Web UI ``` @@ -96,8 +96,17 @@ io.github.randomcodespace.iq |-- grammar/ # ANTLR parser factory + generated parsers |-- graph/ # GraphStore (Neo4j facade), GraphRepository (SDN, writes only) |-- health/ # GraphHealthIndicator (Spring Actuator) - |-- mcp/ # McpTools (31 @McpTool methods, read-only) + |-- mcp/ # McpTools (34 @McpTool methods, read-only) |-- model/ # CodeNode, CodeEdge, NodeKind (32), EdgeKind (27) + |-- intelligence/ # Intelligence enrichment (Phase 2-5) + | |-- lexical/ # LexicalEnricher, LexicalQueryService, DocCommentExtractor, SnippetStore + | |-- extractor/ # LanguageEnricher, LanguageExtractor, LanguageExtractionResult + | | |-- java/ # JavaLanguageExtractor + | | |-- typescript/ # TypeScriptLanguageExtractor + | | |-- python/ # PythonLanguageExtractor + | | |-- go/ # GoLanguageExtractor + | |-- evidence/ # EvidencePack, EvidencePackAssembler + | |-- query/ # QueryPlanner, QueryRoute, QueryPlan |-- query/ # QueryService, StatsService (categorized), TopologyService |-- web/ # Static resource serving (React SPA) ``` @@ -162,7 +171,7 @@ code-iq serve /path/to/repo # needs enrich if using index ## Server Endpoints (all read-only) -### REST API (`/api`) -- 34 endpoints +### REST API (`/api`) -- 37 endpoints **GraphController** (`/api`): - `GET /api/stats` -- Rich categorized statistics (graph, languages, frameworks, infra, connections, auth, architecture) @@ -200,8 +209,13 @@ code-iq serve /path/to/repo # needs enrich if using index - `GET /api/flow/{view}/{nodeId}/children` -- Node children in flow - `GET /api/flow/{view}/{nodeId}/parent` -- Node parent in flow -### MCP Tools (31, via `@McpTool` annotation) -`get_stats`, `get_detailed_stats`, `query_nodes`, `query_edges`, `get_node_neighbors`, `get_ego_graph`, `find_cycles`, `find_shortest_path`, `find_consumers`, `find_producers`, `find_callers`, `find_dependencies`, `find_dependents`, `find_dead_code`, `generate_flow`, `run_cypher`, `find_component_by_file`, `trace_impact`, `find_related_endpoints`, `search_graph`, `read_file`, `get_topology`, `service_detail`, `service_dependencies`, `service_dependents`, `blast_radius`, `find_path`, `find_bottlenecks`, `find_circular_deps`, `find_dead_services`, `find_node` +**IntelligenceController** (`/api/intelligence`): +- `GET /api/intelligence/evidence` -- Evidence pack for a node +- `GET /api/intelligence/manifest` -- Artifact manifest +- `GET /api/intelligence/capabilities` -- Capability matrix + +### MCP Tools (34, via `@McpTool` annotation) +`get_stats`, `get_detailed_stats`, `query_nodes`, `query_edges`, `get_node_neighbors`, `get_ego_graph`, `find_cycles`, `find_shortest_path`, `find_consumers`, `find_producers`, `find_callers`, `find_dependencies`, `find_dependents`, `find_dead_code`, `generate_flow`, `run_cypher`, `find_component_by_file`, `trace_impact`, `find_related_endpoints`, `search_graph`, `read_file`, `get_topology`, `service_detail`, `service_dependencies`, `service_dependents`, `blast_radius`, `find_path`, `find_bottlenecks`, `find_circular_deps`, `find_dead_services`, `find_node`, `get_evidence_pack`, `get_artifact_metadata`, `get_capabilities` ## Adding a New Detector @@ -309,13 +323,18 @@ mvn dependency-check:check | `config/JacksonConfig.java` | Jackson config (FAIL_ON_UNKNOWN_PROPERTIES disabled for MCP compat) | | `cache/AnalysisCache.java` | H2 incremental cache | | `api/GraphController.java` | REST API endpoints (read-only) | -| `mcp/McpTools.java` | 31 MCP tool definitions (`@McpTool`, read-only) | +| `mcp/McpTools.java` | 34 MCP tool definitions (`@McpTool`, read-only) | | `query/QueryService.java` | Graph query operations with Spring caching | | `query/StatsService.java` | Rich categorized statistics (7 categories) | | `query/TopologyService.java` | Service topology queries | | `cli/IndexCommand.java` | Memory-efficient batched indexing to H2 | | `cli/EnrichCommand.java` | H2 → Neo4j with linkers, layers, services | | `cli/ServeCommand.java` | Read-only server startup | +| `intelligence/extractor/LanguageEnricher.java` | Language-specific enrichment orchestrator (Phase 5) | +| `intelligence/extractor/LanguageExtractor.java` | Language extractor interface | +| `intelligence/evidence/EvidencePackAssembler.java` | Evidence pack generation | +| `intelligence/query/QueryPlanner.java` | Intelligent query routing | +| `intelligence/lexical/LexicalEnricher.java` | Doc comment + snippet enrichment | ## Code Conventions diff --git a/src/main/java/io/github/randomcodespace/iq/cli/EnrichCommand.java b/src/main/java/io/github/randomcodespace/iq/cli/EnrichCommand.java index ab431b33..ec722281 100644 --- a/src/main/java/io/github/randomcodespace/iq/cli/EnrichCommand.java +++ b/src/main/java/io/github/randomcodespace/iq/cli/EnrichCommand.java @@ -137,8 +137,8 @@ private int enrichFromCache(AnalysisCache cache, Path root, NumberFormat nf, Ins GraphBuilder.FlushResult flushed = builder.flush(); List recoveredEdges = builder.flushDeferred(); - List enrichedNodes = builder.getNodes(); - List enrichedEdges = builder.getEdges(); + List enrichedNodes = new ArrayList<>(builder.getNodes()); + List enrichedEdges = new ArrayList<>(builder.getEdges()); int linkerNodeDelta = enrichedNodes.size() - allNodes.size(); int linkerEdgeDelta = enrichedEdges.size() - allEdges.size(); @@ -169,8 +169,8 @@ private int enrichFromCache(AnalysisCache cache, Path root, NumberFormat nf, Ins // Add service nodes and edges to the builder builder.addNodes(serviceResult.serviceNodes()); builder.addEdges(serviceResult.serviceEdges()); - enrichedNodes = builder.getNodes(); - enrichedEdges = builder.getEdges(); + enrichedNodes = new ArrayList<>(builder.getNodes()); + enrichedEdges = new ArrayList<>(builder.getEdges()); CliOutput.info(" Detected " + serviceResult.serviceNodes().size() + " service(s)"); } @@ -374,11 +374,4 @@ private int enrichFromCache(AnalysisCache cache, Path root, NumberFormat nf, Ins } } - /** - * Sanitize relationship type for Neo4j Cypher. - * Neo4j relationship types must be alphanumeric + underscore. - */ - private static String sanitizeRelType(String kind) { - return kind.replaceAll("[^A-Za-z0-9_]", "_").toUpperCase(); - } } diff --git a/src/main/java/io/github/randomcodespace/iq/intelligence/extractor/go/GoLanguageExtractor.java b/src/main/java/io/github/randomcodespace/iq/intelligence/extractor/go/GoLanguageExtractor.java index 69665386..57a4966d 100644 --- a/src/main/java/io/github/randomcodespace/iq/intelligence/extractor/go/GoLanguageExtractor.java +++ b/src/main/java/io/github/randomcodespace/iq/intelligence/extractor/go/GoLanguageExtractor.java @@ -56,10 +56,11 @@ public class GoLanguageExtractor implements LanguageExtractor { Pattern.compile("^import\\s+(?:\\w+\\s+)?\"([^\"]+)\"", Pattern.MULTILINE); /** - * Method signature in an interface: {@code MethodName(params) ReturnType}. + * Go receiver method: {@code func (varName StructName) MethodName(}. + * Captures: group(1) = struct name (with optional pointer *), group(2) = method name. */ - private static final Pattern INTERFACE_METHOD = - Pattern.compile("^\\s+(\\w+)\\s*\\(", Pattern.MULTILINE); + private static final Pattern RECEIVER_METHOD = + Pattern.compile("func\\s+\\(\\w+\\s+(\\*?\\w+)\\)\\s+(\\w+)\\s*\\(", Pattern.MULTILINE); @Override public String getLanguage() { @@ -84,6 +85,7 @@ private List extractImportEdges(DetectorContext ctx, CodeNode node, if (ctx.content() == null || registry.isEmpty()) return List.of(); List edges = new ArrayList<>(); + Set seen = new LinkedHashSet<>(); List importPaths = collectImportPaths(ctx.content()); for (String importPath : importPaths) { @@ -98,10 +100,12 @@ private List extractImportEdges(DetectorContext ctx, CodeNode node, } if (target != null && !target.getId().equals(node.getId())) { String edgeId = "imports:%s:%s".formatted(node.getId(), target.getId()); - CodeEdge edge = new CodeEdge(edgeId, EdgeKind.IMPORTS, node.getId(), target); - edge.getProperties().put("confidence", "PARTIAL"); - edge.getProperties().put("extractorName", "go_language_extractor"); - edges.add(edge); + if (seen.add(edgeId)) { + CodeEdge edge = new CodeEdge(edgeId, EdgeKind.IMPORTS, node.getId(), target); + edge.getProperties().put("confidence", "PARTIAL"); + edge.getProperties().put("extractorName", "go_language_extractor"); + edges.add(edge); + } } } @@ -129,8 +133,13 @@ private List collectImportPaths(String content) { /** * Structural interface satisfaction: if this node is a CLASS/COMPONENT (struct), - * find INTERFACE nodes whose method names all appear in the struct's source file. - * Records satisfied interface names as a type hint. + * find INTERFACE nodes in the registry that this struct likely satisfies. + * + *

Strategy: extract all method names defined via Go receiver syntax + * ({@code func (v StructName) MethodName(...)}) then check whether any interface + * label matches using Go naming convention — e.g. {@code Closer} is satisfied by + * method {@code Close}, {@code Reader} by {@code Read}, {@code Stringer} by + * {@code String}. */ private Map extractInterfaceHints(DetectorContext ctx, CodeNode node, Map registry) { @@ -139,14 +148,24 @@ private Map extractInterfaceHints(DetectorContext ctx, CodeNode return Map.of(); } + // Collect all method names defined on this struct via Go receiver syntax + Set receiverMethods = new LinkedHashSet<>(); + Matcher rm = RECEIVER_METHOD.matcher(ctx.content()); + while (rm.find()) { + String structName = rm.group(1).replace("*", ""); + if (structName.equals(node.getLabel())) { + receiverMethods.add(rm.group(2)); + } + } + if (receiverMethods.isEmpty()) return Map.of(); + + // Best-effort: interface "satisfied" if struct has a receiver method whose name + // starts with the interface label (Go convention: Closer→Close, Reader→Read, Stringer→String) List satisfied = new ArrayList<>(); for (CodeNode candidate : registry.values()) { if (candidate.getKind() != NodeKind.INTERFACE) continue; - if (candidate.getFilePath() == null) continue; - // We can only do best-effort matching without the interface file content here. - // Check by label match (struct label appears as receiver type). - if (node.getLabel() != null && candidate.getLabel() != null - && ctx.content().contains(node.getLabel() + ") " + candidate.getLabel())) { + if (candidate.getLabel() == null) continue; + if (receiverMethods.stream().anyMatch(m -> candidate.getLabel().startsWith(m))) { satisfied.add(candidate.getLabel()); } } @@ -159,16 +178,21 @@ private Map extractInterfaceHints(DetectorContext ctx, CodeNode } /** - * Look up a node by label, returning null if zero or more than one node matches. + * Look up a node by label, returning null if zero or more than one distinct node matches. * Prevents false-positive IMPORTS edges for short package names like {@code db}, * {@code log}, {@code config} that may match multiple nodes in the registry. + * Deduplicates by node ID so that the same node stored under multiple keys is not + * counted as ambiguous. */ private CodeNode lookupUnambiguous(String label, Map registry) { CodeNode match = null; for (CodeNode candidate : registry.values()) { if (label.equals(candidate.getLabel())) { - if (match != null) return null; // ambiguous - match = candidate; + if (match == null) { + match = candidate; + } else if (!match.getId().equals(candidate.getId())) { + return null; // genuinely ambiguous — two distinct nodes share the label + } } } return match; diff --git a/src/main/java/io/github/randomcodespace/iq/intelligence/extractor/java/JavaLanguageExtractor.java b/src/main/java/io/github/randomcodespace/iq/intelligence/extractor/java/JavaLanguageExtractor.java index df9b9217..e884ff6b 100644 --- a/src/main/java/io/github/randomcodespace/iq/intelligence/extractor/java/JavaLanguageExtractor.java +++ b/src/main/java/io/github/randomcodespace/iq/intelligence/extractor/java/JavaLanguageExtractor.java @@ -39,7 +39,6 @@ public class JavaLanguageExtractor implements LanguageExtractor { private static final Logger log = LoggerFactory.getLogger(JavaLanguageExtractor.class); - private static final ThreadLocal PARSER = ThreadLocal.withInitial(JavaParser::new); @Override public String getLanguage() { @@ -127,12 +126,10 @@ private void extractTypeHierarchyHints(CompilationUnit cu, CodeNode classNode, private Optional parse(DetectorContext ctx) { try { if (ctx.content() == null || ctx.content().isEmpty()) return Optional.empty(); - return PARSER.get().parse(ctx.content()).getResult(); + return new JavaParser().parse(ctx.content()).getResult(); } catch (Exception | AssertionError e) { log.debug("JavaParser failed for {}: {}", ctx.filePath(), e.getMessage()); return Optional.empty(); - } finally { - PARSER.remove(); } } diff --git a/src/test/java/io/github/randomcodespace/iq/cli/EnrichCommandTest.java b/src/test/java/io/github/randomcodespace/iq/cli/EnrichCommandTest.java index fe7e92d6..7f746c25 100644 --- a/src/test/java/io/github/randomcodespace/iq/cli/EnrichCommandTest.java +++ b/src/test/java/io/github/randomcodespace/iq/cli/EnrichCommandTest.java @@ -6,6 +6,7 @@ import io.github.randomcodespace.iq.config.CodeIqConfig; import io.github.randomcodespace.iq.intelligence.extractor.LanguageEnricher; import io.github.randomcodespace.iq.intelligence.lexical.LexicalEnricher; +import io.github.randomcodespace.iq.intelligence.extractor.java.JavaLanguageExtractor; import io.github.randomcodespace.iq.model.CodeEdge; import io.github.randomcodespace.iq.model.CodeNode; import io.github.randomcodespace.iq.model.EdgeKind; @@ -56,10 +57,7 @@ void enrichFailsWhenNoIndexExists(@TempDir Path tempDir) { var cmdLine = new picocli.CommandLine(cmd); int exitCode = cmdLine.execute(tempDir.toString()); - // Should fail because no H2 index exists (or succeed creating empty DB) - // The command tries to load from H2 which may be empty - // At minimum it should not crash - assertTrue(exitCode == 0 || exitCode == 1); + assertEquals(1, exitCode, "enrich with no H2 index should fail with exit code 1"); } @Test @@ -129,4 +127,33 @@ void enrichClassifiesLayers(@TempDir Path tempDir) throws Exception { assertEquals(0, exitCode); } + + @Test + void enrichedEdgesAreMutableForLanguageEnricher(@TempDir Path tempDir) throws Exception { + // Create a minimal H2 index so enrich has data to process + Path cacheDir = tempDir.resolve(".code-intelligence"); + Files.createDirectories(cacheDir); + Path cachePath = cacheDir.resolve("analysis-cache.db"); + + try (var cache = new AnalysisCache(cachePath)) { + var node = new CodeNode("test:Foo.java:class:Foo", NodeKind.CLASS, "Foo"); + node.setFilePath("Foo.java"); + cache.storeResults("abc123", "Foo.java", "java", List.of(node), List.of()); + } + + var config = new CodeIqConfig(); + var layerClassifier = new LayerClassifier(); + List linkers = List.of(); + + // Use a real LanguageEnricher with extractors to trigger edges.addAll() + var enricher = new LanguageEnricher(List.of(new JavaLanguageExtractor())); + var cmd = new EnrichCommand(config, layerClassifier, linkers, new LexicalEnricher(), enricher); + var cmdLine = new picocli.CommandLine(cmd); + + // This should NOT throw UnsupportedOperationException + int exitCode = cmdLine.execute(tempDir.toString()); + // May fail for other reasons (no source files to read), but must not crash on immutable list + assertTrue(exitCode == 0 || exitCode == 1, + "EnrichCommand crashed — likely UnsupportedOperationException on immutable edges list"); + } } diff --git a/src/test/java/io/github/randomcodespace/iq/intelligence/extractor/go/GoLanguageExtractorTest.java b/src/test/java/io/github/randomcodespace/iq/intelligence/extractor/go/GoLanguageExtractorTest.java index d9520d6b..887dab57 100644 --- a/src/test/java/io/github/randomcodespace/iq/intelligence/extractor/go/GoLanguageExtractorTest.java +++ b/src/test/java/io/github/randomcodespace/iq/intelligence/extractor/go/GoLanguageExtractorTest.java @@ -182,7 +182,9 @@ void extract_satisfiesInterfaces_hintIsDeterministicallySorted() { registry.put(ifaceReader.getLabel(), ifaceReader); registry.put(ifaceCloser.getLabel(), ifaceCloser); - String content = "Worker) Reader\nWorker) Closer\n"; + // Real Go receiver syntax: func (w Worker) Read(...) and func (w Worker) Close() + String content = "func (w Worker) Read(p []byte) (int, error) { return 0, nil }\n" + + "func (w Worker) Close() error { return nil }\n"; DetectorContext ctx = new DetectorContext("s.go", "go", content, registry, null); LanguageExtractionResult r1 = extractor.extract(ctx, struct); @@ -193,6 +195,71 @@ void extract_satisfiesInterfaces_hintIsDeterministicallySorted() { assertThat(r1.typeHints()).isEqualTo(r2.typeHints()); } + @Test + void extract_realGoReceiverSyntax_detectsInterfaceSatisfaction() { + var extractor = new GoLanguageExtractor(); + String content = """ + package main + + type Worker struct {} + + func (w Worker) Read(p []byte) (int, error) { + return 0, nil + } + + func (w Worker) Close() error { + return nil + } + """; + + CodeNode structNode = node("go:main.go:class:Worker", NodeKind.CLASS, "Worker"); + structNode.setFilePath("main.go"); + CodeNode readerIface = node("go:io/reader.go:interface:Reader", NodeKind.INTERFACE, "Reader"); + readerIface.setFilePath("io/reader.go"); + CodeNode closerIface = node("go:io/closer.go:interface:Closer", NodeKind.INTERFACE, "Closer"); + closerIface.setFilePath("io/closer.go"); + + Map registry = new java.util.LinkedHashMap<>(); + registry.put(structNode.getId(), structNode); + registry.put(readerIface.getId(), readerIface); + registry.put(closerIface.getId(), closerIface); + + var ctx = new DetectorContext("main.go", "go", content, registry, null); + var result = extractor.extract(ctx, structNode); + + assertThat(result.typeHints()).containsKey("satisfies_interfaces"); + assertThat(result.typeHints().get("satisfies_interfaces")).contains("Closer"); + assertThat(result.typeHints().get("satisfies_interfaces")).contains("Reader"); + } + + @Test + void extract_samePkgMatchedByNameAndPath_noDuplicateEdges() { + var extractor = new GoLanguageExtractor(); + String content = """ + package main + import "github.com/myorg/utils" + import utils "github.com/myorg/utils" + """; + + CodeNode source = node("go:main.go:package:main", NodeKind.MODULE, "main"); + source.setFilePath("main.go"); + CodeNode target = node("go:utils/utils.go:package:utils", NodeKind.MODULE, "utils"); + target.setFilePath("utils/utils.go"); + + Map registry = new java.util.LinkedHashMap<>(); + registry.put(target.getId(), target); + registry.put(target.getLabel(), target); + registry.put("github.com/myorg/utils", target); + + var ctx = new DetectorContext("main.go", "go", content, registry, null); + var result = extractor.extract(ctx, source); + + long importEdges = result.symbolReferences().stream() + .filter(e -> e.getKind() == EdgeKind.IMPORTS) + .count(); + assertThat(importEdges).isEqualTo(1); + } + private static CodeNode node(String id, NodeKind kind, String label) { CodeNode n = new CodeNode(id, kind, label); n.setFqn(id);