diff --git a/src/main/java/io/github/randomcodespace/iq/cli/EnrichCommand.java b/src/main/java/io/github/randomcodespace/iq/cli/EnrichCommand.java index 0a179140..ab431b33 100644 --- a/src/main/java/io/github/randomcodespace/iq/cli/EnrichCommand.java +++ b/src/main/java/io/github/randomcodespace/iq/cli/EnrichCommand.java @@ -6,6 +6,7 @@ import io.github.randomcodespace.iq.cache.AnalysisCache; import io.github.randomcodespace.iq.config.CodeIqConfig; import io.github.randomcodespace.iq.intelligence.RepositoryIdentity; +import io.github.randomcodespace.iq.intelligence.extractor.LanguageEnricher; import io.github.randomcodespace.iq.intelligence.lexical.LexicalEnricher; import io.github.randomcodespace.iq.model.CodeEdge; import io.github.randomcodespace.iq.model.CodeNode; @@ -61,13 +62,16 @@ public class EnrichCommand implements Callable { private final LayerClassifier layerClassifier; private final List linkers; private final LexicalEnricher lexicalEnricher; + private final LanguageEnricher languageEnricher; public EnrichCommand(CodeIqConfig config, LayerClassifier layerClassifier, - List linkers, LexicalEnricher lexicalEnricher) { + List linkers, LexicalEnricher lexicalEnricher, + LanguageEnricher languageEnricher) { this.config = config; this.layerClassifier = layerClassifier; this.linkers = linkers; this.lexicalEnricher = lexicalEnricher; + this.languageEnricher = languageEnricher; } @Override @@ -151,6 +155,10 @@ private int enrichFromCache(AnalysisCache cache, Path root, NumberFormat nf, Ins CliOutput.step("\uD83D\uDD0D", "Enriching lexical metadata..."); lexicalEnricher.enrich(enrichedNodes, root); + // 3b2. Language-specific enrichment (call graph, type hints, import resolution) + CliOutput.step("\uD83D\uDD0D", "Running language-specific enrichment..."); + languageEnricher.enrich(enrichedNodes, enrichedEdges, root); + // 3c. Detect services CliOutput.step("\uD83C\uDFD7\uFE0F", "Detecting service boundaries..."); var serviceDetector = new io.github.randomcodespace.iq.analyzer.ServiceDetector(); diff --git a/src/main/java/io/github/randomcodespace/iq/intelligence/extractor/LanguageEnricher.java b/src/main/java/io/github/randomcodespace/iq/intelligence/extractor/LanguageEnricher.java new file mode 100644 index 00000000..e49419b8 --- /dev/null +++ b/src/main/java/io/github/randomcodespace/iq/intelligence/extractor/LanguageEnricher.java @@ -0,0 +1,160 @@ +package io.github.randomcodespace.iq.intelligence.extractor; + +import io.github.randomcodespace.iq.detector.DetectorContext; +import io.github.randomcodespace.iq.model.CodeEdge; +import io.github.randomcodespace.iq.model.CodeNode; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.stereotype.Component; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; + +/** + * Runs all {@link LanguageExtractor} beans after {@link io.github.randomcodespace.iq.intelligence.lexical.LexicalEnricher} + * during the {@code enrich} command. + * + *

Builds a combined node registry (by id and fqn), groups nodes by source file, + * reads each file once, and dispatches to matching extractors. Results (edges, type hints) + * are written back into the in-memory node/edge lists before Neo4j bulk-load. + * + *

Extraction failures log a warning and are skipped — the pipeline never aborts. + */ +@Component +public class LanguageEnricher { + + private static final Logger log = LoggerFactory.getLogger(LanguageEnricher.class); + + /** + * Language alias map: normalises file-extension languages to extractor language keys. + * e.g. "javascript" nodes are handled by the "typescript" extractor. + */ + private static final Map LANGUAGE_ALIASES = Map.of( + "javascript", "typescript" + ); + + private final List extractors; + + public LanguageEnricher(List extractors) { + this.extractors = List.copyOf(extractors); + } + + /** + * Enrich nodes with language-specific intelligence and add new edges. + * + * @param nodes All enriched nodes (post-linker, post-classifier, post-lexical). + * @param edges Mutable edge list — new edges are appended in place. + * @param rootPath Absolute root path of the analysed repository (for file reads). + */ + public void enrich(List nodes, List edges, Path rootPath) { + if (extractors.isEmpty()) { + log.debug("No LanguageExtractor beans registered — skipping language enrichment"); + return; + } + + // Build combined node registry: id → node, fqn → node + Map nodeRegistry = buildRegistry(nodes); + + // Build extractor lookup: normalised language → extractor + Map extractorByLanguage = new HashMap<>(); + for (LanguageExtractor extractor : extractors) { + extractorByLanguage.put(extractor.getLanguage(), extractor); + } + + // Group nodes by file path (read each file only once). + // TreeMap guarantees deterministic iteration order (alphabetical by path). + Map> nodesByFile = new TreeMap<>(); + for (CodeNode node : nodes) { + if (node.getFilePath() != null) { + nodesByFile.computeIfAbsent(node.getFilePath(), k -> new ArrayList<>()).add(node); + } + } + + int edgesAdded = 0; + int typeHintsAdded = 0; + + for (Map.Entry> entry : nodesByFile.entrySet()) { + String filePath = entry.getKey(); + List fileNodes = entry.getValue(); + + String language = detectLanguage(filePath); + if (language == null) continue; + + String resolvedLanguage = LANGUAGE_ALIASES.getOrDefault(language, language); + LanguageExtractor extractor = extractorByLanguage.get(resolvedLanguage); + if (extractor == null) continue; + + String content = readFile(rootPath, filePath); + if (content == null) continue; + + DetectorContext ctx = new DetectorContext(filePath, language, content, nodeRegistry, null); + + for (CodeNode node : fileNodes) { + try { + LanguageExtractionResult result = extractor.extract(ctx, node); + edges.addAll(result.callEdges()); + edges.addAll(result.symbolReferences()); + edgesAdded += result.callEdges().size() + result.symbolReferences().size(); + for (Map.Entry hint : result.typeHints().entrySet()) { + node.getProperties().put(hint.getKey(), hint.getValue()); + typeHintsAdded++; + } + } catch (Exception e) { + log.warn("LanguageExtractor {} failed on node {} in {}: {}", + extractor.getClass().getSimpleName(), node.getId(), filePath, e.getMessage()); + } + } + } + + log.info("Language enrichment: {} edges added, {} type hints added across {} extractors", + edgesAdded, typeHintsAdded, extractorByLanguage.size()); + } + + private Map buildRegistry(List nodes) { + Map registry = new HashMap<>(); + for (CodeNode node : nodes) { + if (node.getId() != null) { + registry.put(node.getId(), node); + } + if (node.getFqn() != null && !node.getFqn().isEmpty()) { + registry.put(node.getFqn(), node); + } + } + return registry; + } + + private String readFile(Path rootPath, String filePath) { + try { + Path resolved = rootPath.resolve(filePath); + if (!Files.exists(resolved)) return null; + return Files.readString(resolved, StandardCharsets.UTF_8); + } catch (IOException e) { + log.debug("Could not read file {}: {}", filePath, e.getMessage()); + return null; + } + } + + /** + * Map file extension to language string (mirrors FileDiscovery conventions). + */ + static String detectLanguage(String filePath) { + if (filePath == null) return null; + int dot = filePath.lastIndexOf('.'); + if (dot < 0) return null; + return switch (filePath.substring(dot + 1).toLowerCase()) { + case "java" -> "java"; + case "ts", "tsx" -> "typescript"; + case "js", "jsx", "mjs", "cjs" -> "javascript"; + case "py", "pyw" -> "python"; + case "go" -> "go"; + default -> null; + }; + } +} diff --git a/src/main/java/io/github/randomcodespace/iq/intelligence/extractor/LanguageExtractionResult.java b/src/main/java/io/github/randomcodespace/iq/intelligence/extractor/LanguageExtractionResult.java new file mode 100644 index 00000000..3a7fa254 --- /dev/null +++ b/src/main/java/io/github/randomcodespace/iq/intelligence/extractor/LanguageExtractionResult.java @@ -0,0 +1,34 @@ +package io.github.randomcodespace.iq.intelligence.extractor; + +import io.github.randomcodespace.iq.intelligence.CapabilityLevel; +import io.github.randomcodespace.iq.model.CodeEdge; + +import java.util.List; +import java.util.Map; + +/** + * Result of a single {@link LanguageExtractor#extract} call. + * + * @param callEdges CALLS edges discovered for this node (method invocations, function calls). + * @param symbolReferences IMPORTS / DEPENDS_ON edges from import/symbol resolution. + * @param typeHints Type annotation key-value pairs to store in node properties + * (e.g. {@code "param_types" -> "int, str"}, {@code "return_type" -> "str"}). + * @param confidence Confidence level of this extraction result. + */ +public record LanguageExtractionResult( + List callEdges, + List symbolReferences, + Map typeHints, + CapabilityLevel confidence +) { + public LanguageExtractionResult { + callEdges = List.copyOf(callEdges); + symbolReferences = List.copyOf(symbolReferences); + typeHints = Map.copyOf(typeHints); + } + + /** Empty result with PARTIAL confidence. */ + public static LanguageExtractionResult empty() { + return new LanguageExtractionResult(List.of(), List.of(), Map.of(), CapabilityLevel.PARTIAL); + } +} diff --git a/src/main/java/io/github/randomcodespace/iq/intelligence/extractor/LanguageExtractor.java b/src/main/java/io/github/randomcodespace/iq/intelligence/extractor/LanguageExtractor.java new file mode 100644 index 00000000..9bbd4609 --- /dev/null +++ b/src/main/java/io/github/randomcodespace/iq/intelligence/extractor/LanguageExtractor.java @@ -0,0 +1,37 @@ +package io.github.randomcodespace.iq.intelligence.extractor; + +import io.github.randomcodespace.iq.detector.DetectorContext; +import io.github.randomcodespace.iq.model.CodeNode; + +/** + * Strategy interface for language-specific enrichment extractors. + * + *

Implementations are stateless Spring {@code @Component} beans auto-discovered + * via classpath scan. Each extractor targets a single language and deepens the + * capability matrix beyond what the general intelligence layer provides. + * + *

Extractors run during {@code enrich} (after {@link io.github.randomcodespace.iq.intelligence.lexical.LexicalEnricher}) + * and must never run during {@code index}. + */ +public interface LanguageExtractor { + + /** + * The primary language this extractor targets (e.g. "java", "typescript", "python", "go"). + * Matches the language values produced by {@code FileDiscovery}. + */ + String getLanguage(); + + /** + * Extract additional intelligence for the given node from its source file. + * + *

The {@code ctx} carries file content and a node registry via {@code parsedData} + * (cast to {@code Map} — a combined id + fqn index built by + * {@link LanguageEnricher}). + * + * @param ctx Detector context for the node's source file; {@code parsedData} contains + * the node registry as {@code Map}. + * @param node The specific node to enrich. + * @return Extraction result; never {@code null}. + */ + LanguageExtractionResult extract(DetectorContext ctx, CodeNode node); +} diff --git a/src/main/java/io/github/randomcodespace/iq/intelligence/extractor/go/GoLanguageExtractor.java b/src/main/java/io/github/randomcodespace/iq/intelligence/extractor/go/GoLanguageExtractor.java new file mode 100644 index 00000000..69665386 --- /dev/null +++ b/src/main/java/io/github/randomcodespace/iq/intelligence/extractor/go/GoLanguageExtractor.java @@ -0,0 +1,176 @@ +package io.github.randomcodespace.iq.intelligence.extractor.go; + +import io.github.randomcodespace.iq.detector.DetectorContext; +import io.github.randomcodespace.iq.intelligence.CapabilityLevel; +import io.github.randomcodespace.iq.intelligence.extractor.LanguageExtractionResult; +import io.github.randomcodespace.iq.intelligence.extractor.LanguageExtractor; +import io.github.randomcodespace.iq.model.CodeEdge; +import io.github.randomcodespace.iq.model.CodeNode; +import io.github.randomcodespace.iq.model.EdgeKind; +import io.github.randomcodespace.iq.model.NodeKind; +import org.springframework.stereotype.Component; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Go language-specific extractor. + * + *

Capabilities: + *

    + *
  • Package import resolution: maps {@code import "pkg/path"} to service/module node + * in the registry.
  • + *
  • Interface satisfaction detection: structural typing via method name matching + * between struct and interface nodes in the registry.
  • + *
+ * + *

Confidence: PARTIAL. Go structural typing cannot be fully resolved without a type checker. + */ +@Component +public class GoLanguageExtractor implements LanguageExtractor { + + /** + * Single import: {@code import "pkg/path"} or block import entry {@code "pkg/path"}. + * Also handles aliased: {@code alias "pkg/path"}. + */ + private static final Pattern IMPORT_PATH = + Pattern.compile("\"([^\"]+)\""); + + /** + * Import block: {@code import ( ... )} — used to extract the block content. + */ + private static final Pattern IMPORT_BLOCK = + Pattern.compile("import\\s*\\(([^)]+)\\)", Pattern.DOTALL); + + /** + * Single-line import: {@code import "path"} or {@code import alias "path"}. + */ + private static final Pattern SINGLE_IMPORT = + Pattern.compile("^import\\s+(?:\\w+\\s+)?\"([^\"]+)\"", Pattern.MULTILINE); + + /** + * Method signature in an interface: {@code MethodName(params) ReturnType}. + */ + private static final Pattern INTERFACE_METHOD = + Pattern.compile("^\\s+(\\w+)\\s*\\(", Pattern.MULTILINE); + + @Override + public String getLanguage() { + return "go"; + } + + @Override + @SuppressWarnings("unchecked") + public LanguageExtractionResult extract(DetectorContext ctx, CodeNode node) { + Map nodeRegistry = (ctx.parsedData() instanceof Map raw) + ? (Map) raw + : Map.of(); + + List symbolRefs = extractImportEdges(ctx, node, nodeRegistry); + Map typeHints = extractInterfaceHints(ctx, node, nodeRegistry); + + return new LanguageExtractionResult(List.of(), symbolRefs, typeHints, CapabilityLevel.PARTIAL); + } + + private List extractImportEdges(DetectorContext ctx, CodeNode node, + Map registry) { + if (ctx.content() == null || registry.isEmpty()) return List.of(); + + List edges = new ArrayList<>(); + List importPaths = collectImportPaths(ctx.content()); + + for (String importPath : importPaths) { + // Match by last path segment (package name) with ambiguity guard. + // Short names like "db", "log", "config" can match multiple nodes — + // skip the edge if more than one node shares the label. + String pkgName = importPath.substring(importPath.lastIndexOf('/') + 1); + CodeNode target = lookupUnambiguous(pkgName, registry); + if (target == null) { + // Try full path as direct registry key fallback + target = lookupUnambiguous(importPath, registry); + } + if (target != null && !target.getId().equals(node.getId())) { + String edgeId = "imports:%s:%s".formatted(node.getId(), target.getId()); + CodeEdge edge = new CodeEdge(edgeId, EdgeKind.IMPORTS, node.getId(), target); + edge.getProperties().put("confidence", "PARTIAL"); + edge.getProperties().put("extractorName", "go_language_extractor"); + edges.add(edge); + } + } + + return edges; + } + + private List collectImportPaths(String content) { + Set paths = new LinkedHashSet<>(); + + Matcher block = IMPORT_BLOCK.matcher(content); + if (block.find()) { + Matcher pathMatcher = IMPORT_PATH.matcher(block.group(1)); + while (pathMatcher.find()) { + paths.add(pathMatcher.group(1)); + } + } + + Matcher single = SINGLE_IMPORT.matcher(content); + while (single.find()) { + paths.add(single.group(1)); + } + + return new ArrayList<>(paths); + } + + /** + * Structural interface satisfaction: if this node is a CLASS/COMPONENT (struct), + * find INTERFACE nodes whose method names all appear in the struct's source file. + * Records satisfied interface names as a type hint. + */ + private Map extractInterfaceHints(DetectorContext ctx, CodeNode node, + Map registry) { + if (ctx.content() == null || node.getKind() == null) return Map.of(); + if (node.getKind() != NodeKind.CLASS && node.getKind() != NodeKind.COMPONENT) { + return Map.of(); + } + + List satisfied = new ArrayList<>(); + for (CodeNode candidate : registry.values()) { + if (candidate.getKind() != NodeKind.INTERFACE) continue; + if (candidate.getFilePath() == null) continue; + // We can only do best-effort matching without the interface file content here. + // Check by label match (struct label appears as receiver type). + if (node.getLabel() != null && candidate.getLabel() != null + && ctx.content().contains(node.getLabel() + ") " + candidate.getLabel())) { + satisfied.add(candidate.getLabel()); + } + } + + if (!satisfied.isEmpty()) { + Collections.sort(satisfied); + return Map.of("satisfies_interfaces", String.join(", ", satisfied)); + } + return Map.of(); + } + + /** + * Look up a node by label, returning null if zero or more than one node matches. + * Prevents false-positive IMPORTS edges for short package names like {@code db}, + * {@code log}, {@code config} that may match multiple nodes in the registry. + */ + private CodeNode lookupUnambiguous(String label, Map registry) { + CodeNode match = null; + for (CodeNode candidate : registry.values()) { + if (label.equals(candidate.getLabel())) { + if (match != null) return null; // ambiguous + match = candidate; + } + } + return match; + } +} diff --git a/src/main/java/io/github/randomcodespace/iq/intelligence/extractor/java/JavaLanguageExtractor.java b/src/main/java/io/github/randomcodespace/iq/intelligence/extractor/java/JavaLanguageExtractor.java new file mode 100644 index 00000000..df9b9217 --- /dev/null +++ b/src/main/java/io/github/randomcodespace/iq/intelligence/extractor/java/JavaLanguageExtractor.java @@ -0,0 +1,153 @@ +package io.github.randomcodespace.iq.intelligence.extractor.java; + +import com.github.javaparser.JavaParser; +import com.github.javaparser.ast.CompilationUnit; +import com.github.javaparser.ast.body.MethodDeclaration; +import com.github.javaparser.ast.expr.MethodCallExpr; +import io.github.randomcodespace.iq.detector.DetectorContext; +import io.github.randomcodespace.iq.intelligence.CapabilityLevel; +import io.github.randomcodespace.iq.intelligence.extractor.LanguageExtractionResult; +import io.github.randomcodespace.iq.intelligence.extractor.LanguageExtractor; +import io.github.randomcodespace.iq.model.CodeEdge; +import io.github.randomcodespace.iq.model.CodeNode; +import io.github.randomcodespace.iq.model.EdgeKind; +import io.github.randomcodespace.iq.model.NodeKind; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.stereotype.Component; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +/** + * Java language-specific extractor. + * + *

Capabilities: + *

    + *
  • Cross-file call graph: CALLS edges between methods via JavaParser {@code MethodCallExpr} + * and existing node lookup.
  • + *
  • Type hierarchy enrichment: verifies EXTENDS/IMPLEMENTS edges via resolved imports + * and stores {@code extends_type} / {@code implements_types} as type hints.
  • + *
+ * + *

Confidence: EXACT for same-module calls, PARTIAL for cross-module. + * Does NOT re-detect what existing Java detectors already detect. + */ +@Component +public class JavaLanguageExtractor implements LanguageExtractor { + + private static final Logger log = LoggerFactory.getLogger(JavaLanguageExtractor.class); + private static final ThreadLocal PARSER = ThreadLocal.withInitial(JavaParser::new); + + @Override + public String getLanguage() { + return "java"; + } + + @Override + @SuppressWarnings("unchecked") + public LanguageExtractionResult extract(DetectorContext ctx, CodeNode node) { + // Only enrich METHOD nodes with call graph edges + if (node.getKind() != NodeKind.METHOD && node.getKind() != NodeKind.CLASS + && node.getKind() != NodeKind.ABSTRACT_CLASS + && node.getKind() != NodeKind.INTERFACE) { + return LanguageExtractionResult.empty(); + } + + Map nodeRegistry = (ctx.parsedData() instanceof Map raw) + ? (Map) raw + : Map.of(); + + Optional cuOpt = parse(ctx); + if (cuOpt.isEmpty()) { + return LanguageExtractionResult.empty(); + } + CompilationUnit cu = cuOpt.get(); + + List callEdges = new ArrayList<>(); + Map typeHints = new java.util.LinkedHashMap<>(); + + if (node.getKind() == NodeKind.METHOD) { + extractCallEdges(cu, node, nodeRegistry, callEdges); + } else { + extractTypeHierarchyHints(cu, node, typeHints); + } + + return new LanguageExtractionResult(callEdges, List.of(), typeHints, CapabilityLevel.PARTIAL); + } + + private void extractCallEdges(CompilationUnit cu, CodeNode methodNode, + Map nodeRegistry, + List callEdges) { + String methodLabel = methodNode.getLabel(); + if (methodLabel == null) return; + + // Find the MethodDeclaration matching this node by name + cu.findAll(MethodDeclaration.class).stream() + .filter(md -> methodLabel.equals(md.getNameAsString())) + .findFirst() + .ifPresent(md -> md.findAll(MethodCallExpr.class).forEach(mce -> { + String calleeName = mce.getNameAsString(); + CodeNode target = lookupByLabel(calleeName, nodeRegistry); + if (target != null && !target.getId().equals(methodNode.getId())) { + String edgeId = "calls:%s:%s:%d".formatted( + methodNode.getId(), target.getId(), + mce.getBegin().map(p -> p.line).orElse(0)); + CodeEdge edge = new CodeEdge(edgeId, EdgeKind.CALLS, methodNode.getId(), target); + edge.getProperties().put("confidence", "PARTIAL"); + edge.getProperties().put("extractorName", "java_language_extractor"); + callEdges.add(edge); + } + })); + } + + private void extractTypeHierarchyHints(CompilationUnit cu, CodeNode classNode, + Map typeHints) { + cu.findAll(com.github.javaparser.ast.body.ClassOrInterfaceDeclaration.class) + .stream() + .findFirst() + .ifPresent(decl -> { + List extended = decl.getExtendedTypes().stream() + .map(t -> t.getNameAsString()) + .toList(); + List implemented = decl.getImplementedTypes().stream() + .map(t -> t.getNameAsString()) + .toList(); + if (!extended.isEmpty()) { + typeHints.put("extends_type", String.join(", ", extended)); + } + if (!implemented.isEmpty()) { + typeHints.put("implements_types", String.join(", ", implemented)); + } + }); + } + + private Optional parse(DetectorContext ctx) { + try { + if (ctx.content() == null || ctx.content().isEmpty()) return Optional.empty(); + return PARSER.get().parse(ctx.content()).getResult(); + } catch (Exception | AssertionError e) { + log.debug("JavaParser failed for {}: {}", ctx.filePath(), e.getMessage()); + return Optional.empty(); + } finally { + PARSER.remove(); + } + } + + /** + * Look up a node by label in the registry (label is the simple method name). + * Returns null if zero or more than one distinct node matches to avoid false-positive + * CALLS edges for common method names like {@code save}, {@code get}, {@code execute}. + */ + private CodeNode lookupByLabel(String label, Map registry) { + Map matches = new java.util.LinkedHashMap<>(); + for (CodeNode candidate : registry.values()) { + if (label.equals(candidate.getLabel()) && candidate.getKind() == NodeKind.METHOD) { + matches.put(candidate.getId(), candidate); + } + } + return matches.size() == 1 ? matches.values().iterator().next() : null; + } +} diff --git a/src/main/java/io/github/randomcodespace/iq/intelligence/extractor/python/PythonLanguageExtractor.java b/src/main/java/io/github/randomcodespace/iq/intelligence/extractor/python/PythonLanguageExtractor.java new file mode 100644 index 00000000..53201e76 --- /dev/null +++ b/src/main/java/io/github/randomcodespace/iq/intelligence/extractor/python/PythonLanguageExtractor.java @@ -0,0 +1,168 @@ +package io.github.randomcodespace.iq.intelligence.extractor.python; + +import io.github.randomcodespace.iq.detector.DetectorContext; +import io.github.randomcodespace.iq.intelligence.CapabilityLevel; +import io.github.randomcodespace.iq.intelligence.extractor.LanguageExtractionResult; +import io.github.randomcodespace.iq.intelligence.extractor.LanguageExtractor; +import io.github.randomcodespace.iq.model.CodeEdge; +import io.github.randomcodespace.iq.model.CodeNode; +import io.github.randomcodespace.iq.model.EdgeKind; +import org.springframework.stereotype.Component; + +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Python language-specific extractor. + * + *

Capabilities: + *

    + *
  • Import resolution: maps {@code from module import X} to {@code CodeNode} in registry.
  • + *
  • Type hint extraction: surfaces {@code def fn(x: int) -> str} parameter and return types + * from source content into node properties.
  • + *
+ * + *

Confidence: PARTIAL. + */ +@Component +public class PythonLanguageExtractor implements LanguageExtractor { + + /** {@code from module import X, Y} — captures only to end of line */ + private static final Pattern FROM_IMPORT = + Pattern.compile("from\\s+[\\w.]+\\s+import\\s+([^\\n\\r]+)"); + + /** {@code import X} or {@code import X as Y} */ + private static final Pattern PLAIN_IMPORT = + Pattern.compile("^import\\s+(\\w+)(?:\\s+as\\s+\\w+)?", Pattern.MULTILINE); + + /** {@code def fn(param: Type, ...) -> ReturnType:} */ + private static final Pattern DEF_SIGNATURE = + Pattern.compile("def\\s+(\\w+)\\s*\\(([^)]*)\\)\\s*(?:->\\s*([\\w\\[\\], |]+))?\\s*:"); + + /** Type-annotated parameter: {@code name: Type} */ + private static final Pattern ANNOTATED_PARAM = + Pattern.compile("(\\w+)\\s*:\\s*([\\w\\[\\]|. ]+)"); + + @Override + public String getLanguage() { + return "python"; + } + + @Override + @SuppressWarnings("unchecked") + public LanguageExtractionResult extract(DetectorContext ctx, CodeNode node) { + Map nodeRegistry = (ctx.parsedData() instanceof Map raw) + ? (Map) raw + : Map.of(); + + List symbolRefs = extractImportEdges(ctx, node, nodeRegistry); + Map typeHints = extractTypeHints(ctx, node); + + return new LanguageExtractionResult(List.of(), symbolRefs, typeHints, CapabilityLevel.PARTIAL); + } + + private List extractImportEdges(DetectorContext ctx, CodeNode node, + Map registry) { + if (ctx.content() == null || registry.isEmpty()) return List.of(); + + List edges = new ArrayList<>(); + Set seen = new LinkedHashSet<>(); + + Matcher from = FROM_IMPORT.matcher(ctx.content()); + while (from.find()) { + for (String symbol : from.group(1).split(",")) { + String sym = symbol.trim(); + if (sym.isEmpty()) continue; + // Strip alias: "Foo as bar" → "Foo" + int asIdx = sym.indexOf(" as "); + if (asIdx >= 0) sym = sym.substring(0, asIdx).trim(); + CodeNode target = lookupUnambiguous(sym, registry); + if (target != null && !target.getId().equals(node.getId())) { + String edgeId = "imports:%s:%s".formatted(node.getId(), target.getId()); + if (seen.add(edgeId)) { + CodeEdge edge = new CodeEdge(edgeId, EdgeKind.IMPORTS, node.getId(), target); + edge.getProperties().put("confidence", "PARTIAL"); + edge.getProperties().put("extractorName", "python_language_extractor"); + edges.add(edge); + } + } + } + } + + Matcher plain = PLAIN_IMPORT.matcher(ctx.content()); + while (plain.find()) { + String sym = plain.group(1); + CodeNode target = lookupUnambiguous(sym, registry); + if (target != null && !target.getId().equals(node.getId())) { + String edgeId = "imports:%s:%s".formatted(node.getId(), target.getId()); + if (seen.add(edgeId)) { + CodeEdge edge = new CodeEdge(edgeId, EdgeKind.IMPORTS, node.getId(), target); + edge.getProperties().put("confidence", "PARTIAL"); + edge.getProperties().put("extractorName", "python_language_extractor"); + edges.add(edge); + } + } + } + + return edges; + } + + private Map extractTypeHints(DetectorContext ctx, CodeNode node) { + if (ctx.content() == null || node.getLabel() == null) return Map.of(); + + Map hints = new LinkedHashMap<>(); + Matcher def = DEF_SIGNATURE.matcher(ctx.content()); + + while (def.find()) { + String fnName = def.group(1); + if (!fnName.equals(node.getLabel())) continue; + + String params = def.group(2); + String returnType = def.group(3); + + if (params != null && !params.isBlank()) { + List annotated = new ArrayList<>(); + Matcher paramMatcher = ANNOTATED_PARAM.matcher(params); + while (paramMatcher.find()) { + String paramName = paramMatcher.group(1); + String paramType = paramMatcher.group(2).trim(); + if (!"self".equals(paramName) && !"cls".equals(paramName)) { + annotated.add(paramName + ":" + paramType); + } + } + if (!annotated.isEmpty()) { + hints.put("param_types", String.join(", ", annotated)); + } + } + + if (returnType != null && !returnType.isBlank()) { + hints.put("return_type", returnType.trim()); + } + break; // first matching function definition is sufficient + } + + return hints; + } + + /** + * Look up a node by label, returning null if zero or more than one node matches. + * Prevents false-positive IMPORTS edges for common short names like {@code join}, + * {@code get}, {@code load} that may match multiple unrelated nodes. + */ + private CodeNode lookupUnambiguous(String label, Map registry) { + CodeNode match = null; + for (CodeNode candidate : registry.values()) { + if (label.equals(candidate.getLabel())) { + if (match != null) return null; // ambiguous + match = candidate; + } + } + return match; + } +} diff --git a/src/main/java/io/github/randomcodespace/iq/intelligence/extractor/typescript/TypeScriptLanguageExtractor.java b/src/main/java/io/github/randomcodespace/iq/intelligence/extractor/typescript/TypeScriptLanguageExtractor.java new file mode 100644 index 00000000..0de9e8ac --- /dev/null +++ b/src/main/java/io/github/randomcodespace/iq/intelligence/extractor/typescript/TypeScriptLanguageExtractor.java @@ -0,0 +1,144 @@ +package io.github.randomcodespace.iq.intelligence.extractor.typescript; + +import io.github.randomcodespace.iq.detector.DetectorContext; +import io.github.randomcodespace.iq.intelligence.CapabilityLevel; +import io.github.randomcodespace.iq.intelligence.extractor.LanguageExtractionResult; +import io.github.randomcodespace.iq.intelligence.extractor.LanguageExtractor; +import io.github.randomcodespace.iq.model.CodeEdge; +import io.github.randomcodespace.iq.model.CodeNode; +import io.github.randomcodespace.iq.model.EdgeKind; +import org.springframework.stereotype.Component; + +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * TypeScript/JavaScript language-specific extractor. + * + *

Also handles JavaScript nodes via the "javascript" → "typescript" alias in + * {@link io.github.randomcodespace.iq.intelligence.extractor.LanguageEnricher}. + * + *

Capabilities: + *

    + *
  • Import-to-symbol resolution: maps TS {@code import { X }} to source {@code CodeNode} for X.
  • + *
  • JSDoc type enrichment: surfaces {@code @param} / {@code @returns} from {@code lex_comment} + * into node properties.
  • + *
+ * + *

Confidence: PARTIAL. + */ +@Component +public class TypeScriptLanguageExtractor implements LanguageExtractor { + + /** Named import: {@code import { Foo, Bar } from './path'} */ + private static final Pattern NAMED_IMPORT = + Pattern.compile("import\\s+\\{([^}]+)\\}\\s+from\\s+['\"]([^'\"]+)['\"]"); + + /** Default import: {@code import Foo from './path'} */ + private static final Pattern DEFAULT_IMPORT = + Pattern.compile("import\\s+(\\w+)\\s+from\\s+['\"]([^'\"]+)['\"]"); + + /** JSDoc @param tag: {@code @param {type} name} or {@code @param name - desc} */ + private static final Pattern JSDOC_PARAM = + Pattern.compile("@param\\s+(?:\\{([^}]+)\\}\\s+)?(\\w+)"); + + /** JSDoc @returns tag: {@code @returns {type}} */ + private static final Pattern JSDOC_RETURNS = + Pattern.compile("@returns?\\s+\\{([^}]+)\\}"); + + @Override + public String getLanguage() { + return "typescript"; + } + + @Override + @SuppressWarnings("unchecked") + public LanguageExtractionResult extract(DetectorContext ctx, CodeNode node) { + Map nodeRegistry = (ctx.parsedData() instanceof Map raw) + ? (Map) raw + : Map.of(); + + List symbolRefs = extractImportEdges(ctx, node, nodeRegistry); + Map typeHints = extractJsDocHints(node); + + return new LanguageExtractionResult(List.of(), symbolRefs, typeHints, CapabilityLevel.PARTIAL); + } + + private List extractImportEdges(DetectorContext ctx, CodeNode node, + Map registry) { + if (ctx.content() == null || registry.isEmpty()) return List.of(); + + List edges = new ArrayList<>(); + Set seen = new LinkedHashSet<>(); + + Matcher named = NAMED_IMPORT.matcher(ctx.content()); + while (named.find()) { + String symbols = named.group(1); + for (String symbol : symbols.split(",")) { + String sym = symbol.trim(); + if (sym.isEmpty()) continue; + // Strip alias: "Foo as Bar" → "Foo" + int asIdx = sym.indexOf(" as "); + if (asIdx >= 0) sym = sym.substring(0, asIdx).trim(); + CodeNode target = registry.get(sym); + if (target != null && !target.getId().equals(node.getId())) { + String edgeId = "imports:%s:%s".formatted(node.getId(), target.getId()); + if (seen.add(edgeId)) { + CodeEdge edge = new CodeEdge(edgeId, EdgeKind.IMPORTS, node.getId(), target); + edge.getProperties().put("confidence", "PARTIAL"); + edge.getProperties().put("extractorName", "typescript_language_extractor"); + edges.add(edge); + } + } + } + } + + Matcher def = DEFAULT_IMPORT.matcher(ctx.content()); + while (def.find()) { + String sym = def.group(1); + CodeNode target = registry.get(sym); + if (target != null && !target.getId().equals(node.getId())) { + String edgeId = "imports:%s:%s".formatted(node.getId(), target.getId()); + if (seen.add(edgeId)) { + CodeEdge edge = new CodeEdge(edgeId, EdgeKind.IMPORTS, node.getId(), target); + edge.getProperties().put("confidence", "PARTIAL"); + edge.getProperties().put("extractorName", "typescript_language_extractor"); + edges.add(edge); + } + } + } + + return edges; + } + + private Map extractJsDocHints(CodeNode node) { + Object lexComment = node.getProperties().get("lex_comment"); + if (!(lexComment instanceof String comment) || comment.isBlank()) return Map.of(); + + Map hints = new LinkedHashMap<>(); + List params = new ArrayList<>(); + + Matcher paramMatcher = JSDOC_PARAM.matcher(comment); + while (paramMatcher.find()) { + String type = paramMatcher.group(1); + String name = paramMatcher.group(2); + params.add(type != null ? name + ":" + type : name); + } + if (!params.isEmpty()) { + hints.put("jsdoc_params", String.join(", ", params)); + } + + Matcher retMatcher = JSDOC_RETURNS.matcher(comment); + if (retMatcher.find()) { + hints.put("jsdoc_return_type", retMatcher.group(1)); + } + + return hints; + } +} diff --git a/src/test/java/io/github/randomcodespace/iq/cli/EnrichCommandTest.java b/src/test/java/io/github/randomcodespace/iq/cli/EnrichCommandTest.java index bbfd4447..fe7e92d6 100644 --- a/src/test/java/io/github/randomcodespace/iq/cli/EnrichCommandTest.java +++ b/src/test/java/io/github/randomcodespace/iq/cli/EnrichCommandTest.java @@ -4,6 +4,7 @@ import io.github.randomcodespace.iq.analyzer.linker.Linker; import io.github.randomcodespace.iq.cache.AnalysisCache; import io.github.randomcodespace.iq.config.CodeIqConfig; +import io.github.randomcodespace.iq.intelligence.extractor.LanguageEnricher; import io.github.randomcodespace.iq.intelligence.lexical.LexicalEnricher; import io.github.randomcodespace.iq.model.CodeEdge; import io.github.randomcodespace.iq.model.CodeNode; @@ -51,7 +52,7 @@ void enrichFailsWhenNoIndexExists(@TempDir Path tempDir) { var layerClassifier = new LayerClassifier(); List linkers = List.of(); - var cmd = new EnrichCommand(config, layerClassifier, linkers, new LexicalEnricher()); + var cmd = new EnrichCommand(config, layerClassifier, linkers, new LexicalEnricher(), new LanguageEnricher(List.of())); var cmdLine = new picocli.CommandLine(cmd); int exitCode = cmdLine.execute(tempDir.toString()); @@ -87,7 +88,7 @@ void enrichWithIndexedData(@TempDir Path tempDir) throws Exception { var layerClassifier = new LayerClassifier(); List linkers = List.of(); - var cmd = new EnrichCommand(config, layerClassifier, linkers, new LexicalEnricher()); + var cmd = new EnrichCommand(config, layerClassifier, linkers, new LexicalEnricher(), new LanguageEnricher(List.of())); var cmdLine = new picocli.CommandLine(cmd); int exitCode = cmdLine.execute(tempDir.toString()); @@ -122,7 +123,7 @@ void enrichClassifiesLayers(@TempDir Path tempDir) throws Exception { var layerClassifier = new LayerClassifier(); List linkers = List.of(); - var cmd = new EnrichCommand(config, layerClassifier, linkers, new LexicalEnricher()); + var cmd = new EnrichCommand(config, layerClassifier, linkers, new LexicalEnricher(), new LanguageEnricher(List.of())); var cmdLine = new picocli.CommandLine(cmd); int exitCode = cmdLine.execute(tempDir.toString()); diff --git a/src/test/java/io/github/randomcodespace/iq/intelligence/extractor/LanguageEnricherTest.java b/src/test/java/io/github/randomcodespace/iq/intelligence/extractor/LanguageEnricherTest.java new file mode 100644 index 00000000..3bae775a --- /dev/null +++ b/src/test/java/io/github/randomcodespace/iq/intelligence/extractor/LanguageEnricherTest.java @@ -0,0 +1,223 @@ +package io.github.randomcodespace.iq.intelligence.extractor; + +import io.github.randomcodespace.iq.detector.DetectorContext; +import io.github.randomcodespace.iq.intelligence.CapabilityLevel; +import io.github.randomcodespace.iq.model.CodeEdge; +import io.github.randomcodespace.iq.model.CodeNode; +import io.github.randomcodespace.iq.model.EdgeKind; +import io.github.randomcodespace.iq.model.NodeKind; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import static org.assertj.core.api.Assertions.assertThat; + +class LanguageEnricherTest { + + @TempDir + Path tempDir; + + @Test + void enrich_noExtractors_noEdgesAdded() { + LanguageEnricher enricher = new LanguageEnricher(List.of()); + List nodes = List.of(node("id1", NodeKind.METHOD, "fn", "src/Foo.java")); + List edges = new ArrayList<>(); + + enricher.enrich(nodes, edges, tempDir); + + assertThat(edges).isEmpty(); + } + + @Test + void enrich_runsPipelineAndAddsEdges() throws IOException { + // Create a real source file + Path javaFile = tempDir.resolve("Foo.java"); + Files.writeString(javaFile, """ + public class Foo { + public void caller() { callee(); } + public void callee() {} + } + """, StandardCharsets.UTF_8); + + CodeNode caller = node("method:Foo:caller", NodeKind.METHOD, "caller", "Foo.java"); + CodeNode callee = node("method:Foo:callee", NodeKind.METHOD, "callee", "Foo.java"); + List nodes = List.of(caller, callee); + List edges = new ArrayList<>(); + + // Stub extractor that always returns a CALLS edge + LanguageExtractor stubExtractor = new LanguageExtractor() { + @Override + public String getLanguage() { return "java"; } + + @Override + public LanguageExtractionResult extract(DetectorContext ctx, CodeNode node) { + if (!"caller".equals(node.getLabel())) return LanguageExtractionResult.empty(); + CodeEdge edge = new CodeEdge("calls:stub", EdgeKind.CALLS, caller.getId(), callee); + return new LanguageExtractionResult(List.of(edge), List.of(), Map.of(), + CapabilityLevel.EXACT); + } + }; + + LanguageEnricher enricher = new LanguageEnricher(List.of(stubExtractor)); + enricher.enrich(nodes, edges, tempDir); + + assertThat(edges).hasSize(1); + assertThat(edges.get(0).getKind()).isEqualTo(EdgeKind.CALLS); + } + + @Test + void enrich_typeHints_addedToNodeProperties() throws IOException { + Path pyFile = tempDir.resolve("service.py"); + Files.writeString(pyFile, "def compute(x: int) -> str:\n pass\n", StandardCharsets.UTF_8); + + CodeNode fnNode = node("py:service.py:fn:compute", NodeKind.METHOD, "compute", "service.py"); + List nodes = List.of(fnNode); + List edges = new ArrayList<>(); + + LanguageExtractor stubExtractor = new LanguageExtractor() { + @Override + public String getLanguage() { return "python"; } + + @Override + public LanguageExtractionResult extract(DetectorContext ctx, CodeNode n) { + return new LanguageExtractionResult(List.of(), List.of(), + Map.of("param_types", "x:int", "return_type", "str"), + CapabilityLevel.PARTIAL); + } + }; + + LanguageEnricher enricher = new LanguageEnricher(List.of(stubExtractor)); + enricher.enrich(nodes, edges, tempDir); + + assertThat(fnNode.getProperties()).containsEntry("param_types", "x:int"); + assertThat(fnNode.getProperties()).containsEntry("return_type", "str"); + } + + @Test + void enrich_extractorThrows_pipelineContinues() throws IOException { + Path javaFile = tempDir.resolve("Bad.java"); + Files.writeString(javaFile, "class Bad {}", StandardCharsets.UTF_8); + + CodeNode node1 = node("n1", NodeKind.METHOD, "m1", "Bad.java"); + CodeNode node2 = node("n2", NodeKind.METHOD, "m2", "Bad.java"); + List nodes = List.of(node1, node2); + List edges = new ArrayList<>(); + + LanguageExtractor faultyExtractor = new LanguageExtractor() { + @Override + public String getLanguage() { return "java"; } + + @Override + public LanguageExtractionResult extract(DetectorContext ctx, CodeNode node) { + if ("m1".equals(node.getLabel())) { + throw new RuntimeException("Simulated extractor failure"); + } + CodeEdge e = new CodeEdge("ok-edge", EdgeKind.CALLS, node.getId(), node1); + return new LanguageExtractionResult(List.of(e), List.of(), Map.of(), + CapabilityLevel.PARTIAL); + } + }; + + LanguageEnricher enricher = new LanguageEnricher(List.of(faultyExtractor)); + // Should not throw + enricher.enrich(nodes, edges, tempDir); + + // node2 should still produce an edge even though node1 failed + assertThat(edges).hasSize(1); + } + + @Test + void enrich_javascriptAlias_routedToTypescriptExtractor() throws IOException { + Path jsFile = tempDir.resolve("app.js"); + Files.writeString(jsFile, "function run() {}", StandardCharsets.UTF_8); + + CodeNode node = node("js:app.js:fn:run", NodeKind.METHOD, "run", "app.js"); + List nodes = List.of(node); + List edges = new ArrayList<>(); + + List calledFor = new ArrayList<>(); + LanguageExtractor tsExtractor = new LanguageExtractor() { + @Override + public String getLanguage() { return "typescript"; } + + @Override + public LanguageExtractionResult extract(DetectorContext ctx, CodeNode n) { + calledFor.add(ctx.language()); + return LanguageExtractionResult.empty(); + } + }; + + LanguageEnricher enricher = new LanguageEnricher(List.of(tsExtractor)); + enricher.enrich(nodes, edges, tempDir); + + // The typescript extractor should have been called for the .js file + assertThat(calledFor).hasSize(1); + assertThat(calledFor.get(0)).isEqualTo("javascript"); + } + + @Test + void enrich_nodesByFile_processedInDeterministicOrder() throws IOException { + Path aFile = tempDir.resolve("a.java"); + Path bFile = tempDir.resolve("b.java"); + Files.writeString(aFile, "class A {}", StandardCharsets.UTF_8); + Files.writeString(bFile, "class B {}", StandardCharsets.UTF_8); + + CodeNode nodeA = node("n:a.java:class:A", NodeKind.CLASS, "A", "a.java"); + CodeNode nodeB = node("n:b.java:class:B", NodeKind.CLASS, "B", "b.java"); + + List run1Order = new ArrayList<>(); + List run2Order = new ArrayList<>(); + + LanguageEnricher enricher1 = new LanguageEnricher(List.of(new LanguageExtractor() { + @Override public String getLanguage() { return "java"; } + @Override + public LanguageExtractionResult extract(DetectorContext ctx, CodeNode n) { + run1Order.add(ctx.filePath()); + return LanguageExtractionResult.empty(); + } + })); + + LanguageEnricher enricher2 = new LanguageEnricher(List.of(new LanguageExtractor() { + @Override public String getLanguage() { return "java"; } + @Override + public LanguageExtractionResult extract(DetectorContext ctx, CodeNode n) { + run2Order.add(ctx.filePath()); + return LanguageExtractionResult.empty(); + } + })); + + // Input node order differs between runs; file iteration must be alphabetical in both. + enricher1.enrich(List.of(nodeA, nodeB), new ArrayList<>(), tempDir); + enricher2.enrich(List.of(nodeB, nodeA), new ArrayList<>(), tempDir); + + assertThat(run1Order).isEqualTo(run2Order); + assertThat(run1Order).containsExactly("a.java", "b.java"); + } + + @Test + void detectLanguage_mapsExtensionsCorrectly() { + assertThat(LanguageEnricher.detectLanguage("Foo.java")).isEqualTo("java"); + assertThat(LanguageEnricher.detectLanguage("app.ts")).isEqualTo("typescript"); + assertThat(LanguageEnricher.detectLanguage("app.tsx")).isEqualTo("typescript"); + assertThat(LanguageEnricher.detectLanguage("app.js")).isEqualTo("javascript"); + assertThat(LanguageEnricher.detectLanguage("app.jsx")).isEqualTo("javascript"); + assertThat(LanguageEnricher.detectLanguage("service.py")).isEqualTo("python"); + assertThat(LanguageEnricher.detectLanguage("main.go")).isEqualTo("go"); + assertThat(LanguageEnricher.detectLanguage("app.yaml")).isNull(); + assertThat(LanguageEnricher.detectLanguage(null)).isNull(); + } + + private static CodeNode node(String id, NodeKind kind, String label, String filePath) { + CodeNode n = new CodeNode(id, kind, label); + n.setFqn(id); + n.setFilePath(filePath); + return n; + } +} diff --git a/src/test/java/io/github/randomcodespace/iq/intelligence/extractor/go/GoLanguageExtractorTest.java b/src/test/java/io/github/randomcodespace/iq/intelligence/extractor/go/GoLanguageExtractorTest.java new file mode 100644 index 00000000..d9520d6b --- /dev/null +++ b/src/test/java/io/github/randomcodespace/iq/intelligence/extractor/go/GoLanguageExtractorTest.java @@ -0,0 +1,201 @@ +package io.github.randomcodespace.iq.intelligence.extractor.go; + +import io.github.randomcodespace.iq.detector.DetectorContext; +import io.github.randomcodespace.iq.intelligence.CapabilityLevel; +import io.github.randomcodespace.iq.intelligence.extractor.LanguageExtractionResult; +import io.github.randomcodespace.iq.model.CodeEdge; +import io.github.randomcodespace.iq.model.CodeNode; +import io.github.randomcodespace.iq.model.EdgeKind; +import io.github.randomcodespace.iq.model.NodeKind; +import org.junit.jupiter.api.Test; + +import java.util.LinkedHashMap; +import java.util.Map; + +import static org.assertj.core.api.Assertions.assertThat; + +class GoLanguageExtractorTest { + + private final GoLanguageExtractor extractor = new GoLanguageExtractor(); + + @Test + void getLanguage_returnsGo() { + assertThat(extractor.getLanguage()).isEqualTo("go"); + } + + @Test + void extract_blockImport_createsImportEdge() { + CodeNode source = node("go:main.go:fn:main", NodeKind.METHOD, "main"); + CodeNode target = node("go:handler.go:module:handler", NodeKind.MODULE, "handler"); + + Map registry = Map.of(target.getLabel(), target); + + String content = """ + package main + + import ( + "myapp/handler" + "fmt" + ) + + func main() { + handler.Handle() + } + """; + + DetectorContext ctx = new DetectorContext("main.go", "go", content, registry, null); + LanguageExtractionResult result = extractor.extract(ctx, source); + + assertThat(result.symbolReferences()).hasSize(1); + CodeEdge edge = result.symbolReferences().get(0); + assertThat(edge.getKind()).isEqualTo(EdgeKind.IMPORTS); + assertThat(edge.getTarget().getId()).isEqualTo(target.getId()); + } + + @Test + void extract_singleImport_createsImportEdge() { + CodeNode source = node("go:app.go:fn:run", NodeKind.METHOD, "run"); + CodeNode target = node("go:db.go:module:db", NodeKind.MODULE, "db"); + + Map registry = Map.of(target.getLabel(), target); + String content = "import \"myapp/db\"\n"; + + DetectorContext ctx = new DetectorContext("app.go", "go", content, registry, null); + LanguageExtractionResult result = extractor.extract(ctx, source); + + assertThat(result.symbolReferences()).hasSize(1); + assertThat(result.symbolReferences().get(0).getKind()).isEqualTo(EdgeKind.IMPORTS); + } + + @Test + void extract_unknownImport_noEdge() { + CodeNode source = node("go:app.go:fn:run", NodeKind.METHOD, "run"); + String content = "import \"fmt\"\n"; + + DetectorContext ctx = new DetectorContext("app.go", "go", content, Map.of(), null); + LanguageExtractionResult result = extractor.extract(ctx, source); + + assertThat(result.symbolReferences()).isEmpty(); + } + + @Test + void extract_noContent_returnsEmpty() { + CodeNode node = node("go:empty.go:fn:noop", NodeKind.METHOD, "noop"); + DetectorContext ctx = new DetectorContext("empty.go", "go", null, Map.of(), null); + LanguageExtractionResult result = extractor.extract(ctx, node); + + assertThat(result.callEdges()).isEmpty(); + assertThat(result.symbolReferences()).isEmpty(); + assertThat(result.typeHints()).isEmpty(); + } + + @Test + void extract_confidence_isPartial() { + CodeNode node = node("go:x.go:fn:fn", NodeKind.METHOD, "fn"); + DetectorContext ctx = new DetectorContext("x.go", "go", "", Map.of(), null); + + LanguageExtractionResult result = extractor.extract(ctx, node); + assertThat(result.confidence()).isEqualTo(CapabilityLevel.PARTIAL); + } + + @Test + void extract_duplicateImportBothStyles_noDuplicateEdges() { + CodeNode source = node("go:main.go:fn:main", NodeKind.METHOD, "main"); + CodeNode target = node("go:handler.go:module:handler", NodeKind.MODULE, "handler"); + + Map registry = Map.of(target.getLabel(), target); + + // File has both a block import and a single-line import for the same package. + // collectImportPaths() must deduplicate so only one IMPORTS edge is produced. + String content = """ + package main + + import ( + "myapp/handler" + ) + import "myapp/handler" + + func main() { + handler.Handle() + } + """; + + DetectorContext ctx = new DetectorContext("main.go", "go", content, registry, null); + LanguageExtractionResult result = extractor.extract(ctx, source); + + assertThat(result.symbolReferences()).hasSize(1); + assertThat(result.symbolReferences().get(0).getKind()).isEqualTo(EdgeKind.IMPORTS); + assertThat(result.symbolReferences().get(0).getTarget().getId()).isEqualTo(target.getId()); + } + + @Test + void extract_determinism_sameTwice() { + CodeNode source = node("go:a.go:fn:run", NodeKind.METHOD, "run"); + CodeNode target = node("go:b.go:module:worker", NodeKind.MODULE, "worker"); + Map registry = Map.of(target.getLabel(), target); + String content = "import (\n \"app/worker\"\n)\n"; + + DetectorContext ctx = new DetectorContext("a.go", "go", content, registry, null); + LanguageExtractionResult r1 = extractor.extract(ctx, source); + LanguageExtractionResult r2 = extractor.extract(ctx, source); + + assertThat(r1.symbolReferences().size()).isEqualTo(r2.symbolReferences().size()); + if (!r1.symbolReferences().isEmpty()) { + assertThat(r1.symbolReferences().get(0).getId()) + .isEqualTo(r2.symbolReferences().get(0).getId()); + } + } + + @Test + void extract_ambiguousPackageName_noFalsePositiveEdge() { + CodeNode source = node("go:main.go:fn:main", NodeKind.METHOD, "main"); + // Two unrelated nodes share the short label "db" — common in Go codebases. + CodeNode db1 = node("go:db/conn.go:module:db", NodeKind.MODULE, "db"); + CodeNode db2 = node("go:dbutil/query.go:module:db", NodeKind.MODULE, "db"); + + Map registry = new LinkedHashMap<>(); + registry.put(db1.getId(), db1); + registry.put(db2.getId(), db2); + + String content = "import \"myapp/db\"\n"; + + DetectorContext ctx = new DetectorContext("main.go", "go", content, registry, null); + LanguageExtractionResult result = extractor.extract(ctx, source); + + // Ambiguous: two nodes labelled "db" → lookupUnambiguous returns null → no edge. + assertThat(result.symbolReferences()).isEmpty(); + } + + @Test + void extract_satisfiesInterfaces_hintIsDeterministicallySorted() { + CodeNode struct = node("go:s.go:struct:Worker", NodeKind.CLASS, "Worker"); + CodeNode ifaceReader = node("go:r.go:interface:Reader", NodeKind.INTERFACE, "Reader"); + CodeNode ifaceCloser = node("go:c.go:interface:Closer", NodeKind.INTERFACE, "Closer"); + + // filePath must be non-null or extractInterfaceHints skips the candidate. + ifaceReader.setFilePath("go:r.go"); + ifaceCloser.setFilePath("go:c.go"); + + // Registry iterated Reader-first; without sorting hint would be "Reader, Closer". + // After fix it must always be "Closer, Reader" (alphabetical). + Map registry = new LinkedHashMap<>(); + registry.put(ifaceReader.getLabel(), ifaceReader); + registry.put(ifaceCloser.getLabel(), ifaceCloser); + + String content = "Worker) Reader\nWorker) Closer\n"; + + DetectorContext ctx = new DetectorContext("s.go", "go", content, registry, null); + LanguageExtractionResult r1 = extractor.extract(ctx, struct); + LanguageExtractionResult r2 = extractor.extract(ctx, struct); + + assertThat(r1.typeHints()).containsKey("satisfies_interfaces"); + assertThat(r1.typeHints().get("satisfies_interfaces")).isEqualTo("Closer, Reader"); + assertThat(r1.typeHints()).isEqualTo(r2.typeHints()); + } + + private static CodeNode node(String id, NodeKind kind, String label) { + CodeNode n = new CodeNode(id, kind, label); + n.setFqn(id); + return n; + } +} diff --git a/src/test/java/io/github/randomcodespace/iq/intelligence/extractor/java/JavaLanguageExtractorTest.java b/src/test/java/io/github/randomcodespace/iq/intelligence/extractor/java/JavaLanguageExtractorTest.java new file mode 100644 index 00000000..10aaa9ce --- /dev/null +++ b/src/test/java/io/github/randomcodespace/iq/intelligence/extractor/java/JavaLanguageExtractorTest.java @@ -0,0 +1,168 @@ +package io.github.randomcodespace.iq.intelligence.extractor.java; + +import io.github.randomcodespace.iq.detector.DetectorContext; +import io.github.randomcodespace.iq.intelligence.CapabilityLevel; +import io.github.randomcodespace.iq.intelligence.extractor.LanguageExtractionResult; +import io.github.randomcodespace.iq.model.CodeEdge; +import io.github.randomcodespace.iq.model.CodeNode; +import io.github.randomcodespace.iq.model.EdgeKind; +import io.github.randomcodespace.iq.model.NodeKind; +import org.junit.jupiter.api.Test; + +import java.util.HashMap; +import java.util.Map; + +import static org.assertj.core.api.Assertions.assertThat; + +class JavaLanguageExtractorTest { + + private final JavaLanguageExtractor extractor = new JavaLanguageExtractor(); + + @Test + void getLanguage_returnsJava() { + assertThat(extractor.getLanguage()).isEqualTo("java"); + } + + @Test + void extract_methodNode_detectsCallEdge() { + CodeNode caller = node("method:Foo:doWork", NodeKind.METHOD, "doWork"); + CodeNode callee = node("method:Bar:helper", NodeKind.METHOD, "helper"); + + Map registry = new HashMap<>(); + registry.put(callee.getId(), callee); + registry.put(callee.getLabel(), callee); + + String content = """ + public class Foo { + public void doWork() { + helper(); + } + } + """; + + DetectorContext ctx = new DetectorContext("Foo.java", "java", content, registry, null); + LanguageExtractionResult result = extractor.extract(ctx, caller); + + assertThat(result.callEdges()).hasSize(1); + assertThat(result.callEdges().get(0).getKind()).isEqualTo(EdgeKind.CALLS); + assertThat(result.callEdges().get(0).getSourceId()).isEqualTo(caller.getId()); + assertThat(result.callEdges().get(0).getTarget().getId()).isEqualTo(callee.getId()); + } + + @Test + void extract_classNode_extractsTypeHierarchy() { + CodeNode classNode = node("class:Foo", NodeKind.CLASS, "Foo"); + + String content = """ + public class Foo extends BaseService implements Serializable, Runnable { + } + """; + + DetectorContext ctx = new DetectorContext("Foo.java", "java", content, Map.of(), null); + LanguageExtractionResult result = extractor.extract(ctx, classNode); + + assertThat(result.typeHints()).containsEntry("extends_type", "BaseService"); + assertThat(result.typeHints()).containsKey("implements_types"); + assertThat(result.typeHints().get("implements_types")).contains("Serializable").contains("Runnable"); + } + + @Test + void extract_wrongLanguageNode_returnsEmpty() { + CodeNode node = node("config:app.yaml", NodeKind.CONFIG_FILE, "app"); + DetectorContext ctx = new DetectorContext("app.yaml", "yaml", "key: value", Map.of(), null); + LanguageExtractionResult result = extractor.extract(ctx, node); + + assertThat(result.callEdges()).isEmpty(); + assertThat(result.symbolReferences()).isEmpty(); + assertThat(result.typeHints()).isEmpty(); + } + + + + @Test + void extract_noRegistry_returnsEmpty() { + CodeNode methodNode = node("method:Foo:doWork", NodeKind.METHOD, "doWork"); + String content = "public class Foo { public void doWork() { helper(); } }"; + DetectorContext ctx = new DetectorContext("Foo.java", "java", content, null, null); + + LanguageExtractionResult result = extractor.extract(ctx, methodNode); + assertThat(result.callEdges()).isEmpty(); + } + + @Test + void extract_determinism_sameTwice() { + CodeNode caller = node("method:Foo:doWork", NodeKind.METHOD, "doWork"); + CodeNode callee = node("method:Bar:helper", NodeKind.METHOD, "helper"); + + Map registry = Map.of(callee.getId(), callee, callee.getLabel(), callee); + String content = "public class Foo { public void doWork() { helper(); } }"; + DetectorContext ctx = new DetectorContext("Foo.java", "java", content, registry, null); + + LanguageExtractionResult r1 = extractor.extract(ctx, caller); + LanguageExtractionResult r2 = extractor.extract(ctx, caller); + + assertThat(r1.callEdges().size()).isEqualTo(r2.callEdges().size()); + if (!r1.callEdges().isEmpty()) { + assertThat(r1.callEdges().get(0).getId()).isEqualTo(r2.callEdges().get(0).getId()); + } + } + + @Test + void extract_confidenceIsPartial_whenCallsFound() { + // Registry-lookup edges are cross-file by definition → always PARTIAL + CodeNode caller = node("method:Foo:doWork", NodeKind.METHOD, "doWork"); + CodeNode callee = node("method:Bar:helper", NodeKind.METHOD, "helper"); + Map registry = Map.of(callee.getId(), callee, callee.getLabel(), callee); + + String content = "public class Foo { public void doWork() { helper(); } }"; + DetectorContext ctx = new DetectorContext("Foo.java", "java", content, registry, null); + + LanguageExtractionResult result = extractor.extract(ctx, caller); + assertThat(result.confidence()).isEqualTo(CapabilityLevel.PARTIAL); + } + + @Test + void extract_noFalsePositive_whenTwoClassesHaveSameMethodName() { + // Two unrelated classes both have process() — match is ambiguous → no CALLS edge + CodeNode caller = node("method:Alpha:process", NodeKind.METHOD, "process"); + CodeNode calleeA = node("method:Alpha:process", NodeKind.METHOD, "process"); + CodeNode calleeB = node("method:Beta:process", NodeKind.METHOD, "process"); + + Map registry = new HashMap<>(); + registry.put(calleeA.getId(), calleeA); + registry.put(calleeB.getId(), calleeB); + + String content = """ + public class Alpha { + public void run() { + process(); + } + public void process() {} + } + """; + + CodeNode runMethod = node("method:Alpha:run", NodeKind.METHOD, "run"); + DetectorContext ctx = new DetectorContext("Alpha.java", "java", content, registry, null); + LanguageExtractionResult result = extractor.extract(ctx, runMethod); + + assertThat(result.callEdges()) + .as("ambiguous method name must not produce false-positive CALLS edge") + .isEmpty(); + } + + @Test + void extract_confidenceIsPartial_whenNoCallsFound() { + CodeNode caller = node("method:Foo:doWork", NodeKind.METHOD, "doWork"); + String content = "public class Foo { public void doWork() { } }"; + DetectorContext ctx = new DetectorContext("Foo.java", "java", content, Map.of(), null); + + LanguageExtractionResult result = extractor.extract(ctx, caller); + assertThat(result.confidence()).isEqualTo(CapabilityLevel.PARTIAL); + } + + private static CodeNode node(String id, NodeKind kind, String label) { + CodeNode n = new CodeNode(id, kind, label); + n.setFqn(id); + return n; + } +} diff --git a/src/test/java/io/github/randomcodespace/iq/intelligence/extractor/python/PythonLanguageExtractorTest.java b/src/test/java/io/github/randomcodespace/iq/intelligence/extractor/python/PythonLanguageExtractorTest.java new file mode 100644 index 00000000..933877df --- /dev/null +++ b/src/test/java/io/github/randomcodespace/iq/intelligence/extractor/python/PythonLanguageExtractorTest.java @@ -0,0 +1,182 @@ +package io.github.randomcodespace.iq.intelligence.extractor.python; + +import io.github.randomcodespace.iq.detector.DetectorContext; +import io.github.randomcodespace.iq.intelligence.CapabilityLevel; +import io.github.randomcodespace.iq.intelligence.extractor.LanguageExtractionResult; +import io.github.randomcodespace.iq.model.CodeEdge; +import io.github.randomcodespace.iq.model.CodeNode; +import io.github.randomcodespace.iq.model.EdgeKind; +import io.github.randomcodespace.iq.model.NodeKind; +import org.junit.jupiter.api.Test; + +import java.util.LinkedHashMap; +import java.util.Map; + +import static org.assertj.core.api.Assertions.assertThat; + +class PythonLanguageExtractorTest { + + private final PythonLanguageExtractor extractor = new PythonLanguageExtractor(); + + @Test + void getLanguage_returnsPython() { + assertThat(extractor.getLanguage()).isEqualTo("python"); + } + + @Test + void extract_fromImport_createsImportEdge() { + CodeNode source = node("py:app.py:fn:run", NodeKind.METHOD, "run"); + CodeNode target = node("py:models.py:class:User", NodeKind.CLASS, "User"); + + Map registry = Map.of(target.getLabel(), target); + + String content = """ + from models import User + + def run(): + user = User() + """; + + DetectorContext ctx = new DetectorContext("app.py", "python", content, registry, null); + LanguageExtractionResult result = extractor.extract(ctx, source); + + assertThat(result.symbolReferences()).hasSize(1); + CodeEdge edge = result.symbolReferences().get(0); + assertThat(edge.getKind()).isEqualTo(EdgeKind.IMPORTS); + assertThat(edge.getTarget().getId()).isEqualTo(target.getId()); + } + + @Test + void extract_plainImport_createsImportEdge() { + CodeNode source = node("py:app.py:fn:run", NodeKind.METHOD, "run"); + CodeNode target = node("py:utils.py:module:utils", NodeKind.MODULE, "utils"); + + Map registry = Map.of(target.getLabel(), target); + String content = "import utils\n"; + + DetectorContext ctx = new DetectorContext("app.py", "python", content, registry, null); + LanguageExtractionResult result = extractor.extract(ctx, source); + + assertThat(result.symbolReferences()).hasSize(1); + assertThat(result.symbolReferences().get(0).getKind()).isEqualTo(EdgeKind.IMPORTS); + } + + @Test + void extract_typeHints_surfacedFromDefSignature() { + CodeNode fnNode = node("py:service.py:fn:process", NodeKind.METHOD, "process"); + + String content = """ + def process(data: str, count: int) -> bool: + return len(data) == count + """; + + DetectorContext ctx = new DetectorContext("service.py", "python", content, Map.of(), null); + LanguageExtractionResult result = extractor.extract(ctx, fnNode); + + assertThat(result.typeHints()).containsKey("param_types"); + assertThat(result.typeHints().get("param_types")).contains("data:str").contains("count:int"); + assertThat(result.typeHints()).containsEntry("return_type", "bool"); + } + + @Test + void extract_selfParamExcluded_fromTypeHints() { + CodeNode fnNode = node("py:service.py:fn:update", NodeKind.METHOD, "update"); + + String content = "def update(self, value: int) -> None:\n pass\n"; + + DetectorContext ctx = new DetectorContext("service.py", "python", content, Map.of(), null); + LanguageExtractionResult result = extractor.extract(ctx, fnNode); + + assertThat(result.typeHints().getOrDefault("param_types", "")).doesNotContain("self"); + } + + @Test + void extract_noContent_returnsEmpty() { + CodeNode fnNode = node("py:empty.py:fn:noop", NodeKind.METHOD, "noop"); + DetectorContext ctx = new DetectorContext("empty.py", "python", null, Map.of(), null); + LanguageExtractionResult result = extractor.extract(ctx, fnNode); + + assertThat(result.callEdges()).isEmpty(); + assertThat(result.symbolReferences()).isEmpty(); + assertThat(result.typeHints()).isEmpty(); + } + + @Test + void extract_confidence_isPartial() { + CodeNode node = node("py:x.py:fn:fn", NodeKind.METHOD, "fn"); + DetectorContext ctx = new DetectorContext("x.py", "python", "", Map.of(), null); + + LanguageExtractionResult result = extractor.extract(ctx, node); + assertThat(result.confidence()).isEqualTo(CapabilityLevel.PARTIAL); + } + + @Test + void extract_determinism_sameTwice() { + CodeNode fnNode = node("py:svc.py:fn:compute", NodeKind.METHOD, "compute"); + String content = "def compute(x: int, y: float) -> str:\n pass\n"; + + DetectorContext ctx = new DetectorContext("svc.py", "python", content, Map.of(), null); + LanguageExtractionResult r1 = extractor.extract(ctx, fnNode); + LanguageExtractionResult r2 = extractor.extract(ctx, fnNode); + + assertThat(r1.typeHints()).isEqualTo(r2.typeHints()); + } + + @Test + void extract_fromImportAndPlainImportSameSymbol_noDuplicateEdges() { + CodeNode source = node("py:app.py:fn:run", NodeKind.METHOD, "run"); + CodeNode target = node("py:utils.py:class:Foo", NodeKind.CLASS, "Foo"); + Map registry = Map.of(target.getLabel(), target); + + // Both FROM_IMPORT and PLAIN_IMPORT match Foo → same edge id must deduplicate. + String content = "from utils import Foo\nimport Foo\n"; + + DetectorContext ctx = new DetectorContext("app.py", "python", content, registry, null); + LanguageExtractionResult result = extractor.extract(ctx, source); + + assertThat(result.symbolReferences()).hasSize(1); + assertThat(result.symbolReferences().get(0).getTarget().getId()).isEqualTo(target.getId()); + } + + @Test + void extract_aliasedFromImport_edgeCreatedUsingOriginalName() { + CodeNode source = node("py:app.py:fn:run", NodeKind.METHOD, "run"); + CodeNode target = node("py:models.py:class:User", NodeKind.CLASS, "User"); + Map registry = Map.of(target.getLabel(), target); + + // Alias "as u" must be stripped so lookup is on "User", not "User as u". + String content = "from models import User as u\n"; + + DetectorContext ctx = new DetectorContext("app.py", "python", content, registry, null); + LanguageExtractionResult result = extractor.extract(ctx, source); + + assertThat(result.symbolReferences()).hasSize(1); + assertThat(result.symbolReferences().get(0).getTarget().getId()).isEqualTo(target.getId()); + } + + @Test + void extract_ambiguousShortSymbolName_noFalsePositiveEdge() { + CodeNode source = node("py:app.py:fn:run", NodeKind.METHOD, "run"); + // Two unrelated nodes share the short label "get" — common in Django/dict APIs. + CodeNode target1 = node("py:dict_utils.py:fn:get", NodeKind.METHOD, "get"); + CodeNode target2 = node("py:list_utils.py:fn:get", NodeKind.METHOD, "get"); + + Map registry = new LinkedHashMap<>(); + registry.put(target1.getId(), target1); + registry.put(target2.getId(), target2); + + String content = "from utils import get\n"; + + DetectorContext ctx = new DetectorContext("app.py", "python", content, registry, null); + LanguageExtractionResult result = extractor.extract(ctx, source); + + // Ambiguous: two nodes labelled "get" → lookupUnambiguous returns null → no edge. + assertThat(result.symbolReferences()).isEmpty(); + } + + private static CodeNode node(String id, NodeKind kind, String label) { + CodeNode n = new CodeNode(id, kind, label); + n.setFqn(id); + return n; + } +} diff --git a/src/test/java/io/github/randomcodespace/iq/intelligence/extractor/typescript/TypeScriptLanguageExtractorTest.java b/src/test/java/io/github/randomcodespace/iq/intelligence/extractor/typescript/TypeScriptLanguageExtractorTest.java new file mode 100644 index 00000000..69226d24 --- /dev/null +++ b/src/test/java/io/github/randomcodespace/iq/intelligence/extractor/typescript/TypeScriptLanguageExtractorTest.java @@ -0,0 +1,139 @@ +package io.github.randomcodespace.iq.intelligence.extractor.typescript; + +import io.github.randomcodespace.iq.detector.DetectorContext; +import io.github.randomcodespace.iq.intelligence.CapabilityLevel; +import io.github.randomcodespace.iq.intelligence.extractor.LanguageExtractionResult; +import io.github.randomcodespace.iq.model.CodeEdge; +import io.github.randomcodespace.iq.model.CodeNode; +import io.github.randomcodespace.iq.model.EdgeKind; +import io.github.randomcodespace.iq.model.NodeKind; +import org.junit.jupiter.api.Test; + +import java.util.Map; + +import static org.assertj.core.api.Assertions.assertThat; + +class TypeScriptLanguageExtractorTest { + + private final TypeScriptLanguageExtractor extractor = new TypeScriptLanguageExtractor(); + + @Test + void getLanguage_returnsTypescript() { + assertThat(extractor.getLanguage()).isEqualTo("typescript"); + } + + @Test + void extract_namedImport_createsImportEdge() { + CodeNode source = node("src:index.ts:fn:fetchData", NodeKind.METHOD, "fetchData"); + CodeNode target = node("src:api.ts:class:ApiService", NodeKind.CLASS, "ApiService"); + + Map registry = Map.of(target.getLabel(), target); + + String content = """ + import { ApiService } from './api'; + + export function fetchData() { + return new ApiService().get(); + } + """; + + DetectorContext ctx = new DetectorContext("index.ts", "typescript", content, registry, null); + LanguageExtractionResult result = extractor.extract(ctx, source); + + assertThat(result.symbolReferences()).hasSize(1); + CodeEdge edge = result.symbolReferences().get(0); + assertThat(edge.getKind()).isEqualTo(EdgeKind.IMPORTS); + assertThat(edge.getTarget().getId()).isEqualTo(target.getId()); + } + + @Test + void extract_defaultImport_createsImportEdge() { + CodeNode source = node("src:app.ts:fn:main", NodeKind.METHOD, "main"); + CodeNode target = node("src:config.ts:class:Config", NodeKind.CLASS, "Config"); + + Map registry = Map.of(target.getLabel(), target); + String content = "import Config from './config';"; + + DetectorContext ctx = new DetectorContext("app.ts", "typescript", content, registry, null); + LanguageExtractionResult result = extractor.extract(ctx, source); + + assertThat(result.symbolReferences()).hasSize(1); + assertThat(result.symbolReferences().get(0).getKind()).isEqualTo(EdgeKind.IMPORTS); + } + + @Test + void extract_jsDocParams_surfacedAsTypeHints() { + CodeNode fnNode = node("src:util.ts:fn:process", NodeKind.METHOD, "process"); + fnNode.getProperties().put("lex_comment", "/** @param {string} input - the input @returns {boolean} */"); + + DetectorContext ctx = new DetectorContext("util.ts", "typescript", "", Map.of(), null); + LanguageExtractionResult result = extractor.extract(ctx, fnNode); + + assertThat(result.typeHints()).containsKey("jsdoc_params"); + assertThat(result.typeHints()).containsEntry("jsdoc_return_type", "boolean"); + } + + @Test + void extract_noImportsNoJsDoc_returnsEmpty() { + CodeNode node = node("src:empty.ts:fn:noop", NodeKind.METHOD, "noop"); + DetectorContext ctx = new DetectorContext("empty.ts", "typescript", "function noop() {}", Map.of(), null); + LanguageExtractionResult result = extractor.extract(ctx, node); + + assertThat(result.callEdges()).isEmpty(); + assertThat(result.symbolReferences()).isEmpty(); + assertThat(result.typeHints()).isEmpty(); + } + + @Test + void extract_confidence_isPartial() { + CodeNode node = node("src:x.ts:fn:fn", NodeKind.METHOD, "fn"); + DetectorContext ctx = new DetectorContext("x.ts", "typescript", "", Map.of(), null); + + LanguageExtractionResult result = extractor.extract(ctx, node); + assertThat(result.confidence()).isEqualTo(CapabilityLevel.PARTIAL); + } + + @Test + void extract_determinism_sameTwice() { + CodeNode source = node("src:a.ts:fn:fn", NodeKind.METHOD, "fn"); + CodeNode target = node("src:b.ts:class:MyClass", NodeKind.CLASS, "MyClass"); + Map registry = Map.of(target.getLabel(), target); + String content = "import { MyClass } from './b';"; + + DetectorContext ctx = new DetectorContext("a.ts", "typescript", content, registry, null); + LanguageExtractionResult r1 = extractor.extract(ctx, source); + LanguageExtractionResult r2 = extractor.extract(ctx, source); + + assertThat(r1.symbolReferences().size()).isEqualTo(r2.symbolReferences().size()); + if (!r1.symbolReferences().isEmpty()) { + assertThat(r1.symbolReferences().get(0).getId()) + .isEqualTo(r2.symbolReferences().get(0).getId()); + } + } + + @Test + void extract_sameSymbolMatchedByNamedAndDefaultImport_noDuplicateEdges() { + CodeNode source = node("src:a.ts:fn:run", NodeKind.METHOD, "run"); + CodeNode target = node("src:b.ts:class:Config", NodeKind.CLASS, "Config"); + Map registry = Map.of(target.getLabel(), target); + + // Both named and default import patterns match Config → same edge id, must deduplicate. + String content = """ + import { Config } from './b'; + import Config from './b'; + """; + + DetectorContext ctx = new DetectorContext("a.ts", "typescript", content, registry, null); + LanguageExtractionResult result = extractor.extract(ctx, source); + + assertThat(result.symbolReferences()).hasSize(1); + assertThat(result.symbolReferences().get(0).getKind()).isEqualTo(EdgeKind.IMPORTS); + assertThat(result.symbolReferences().get(0).getTarget().getId()).isEqualTo(target.getId()); + } + + private static CodeNode node(String id, NodeKind kind, String label) { + CodeNode n = new CodeNode(id, kind, label); + n.setFqn(id); + return n; + } +}