From 0b45b191a765876bdaeeb364004c30b3c2bc4b8b Mon Sep 17 00:00:00 2001 From: Amit Kumar Date: Sat, 4 Apr 2026 13:15:07 +0000 Subject: [PATCH 1/2] refactor: extract AbstractPythonAntlrDetector + fix MD5 security hotspot MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Extract AbstractPythonAntlrDetector base class (parse, getSupportedLanguages, getBaseClassesText, extractClassBody, NEXT_CLASS_RE) shared across all 10 Python ANTLR detectors — eliminates ~325 lines of duplicated code - Extract AbstractPythonDbDetector for Django/SQLAlchemy ORM detectors (ensureDbNode, addDbEdge) — further reduces duplication - Replace MD5 with SHA-256 in FileHasher (fixes 2 SonarCloud security hotspots) - Update FileHasherTest + CacheCoverageTest for SHA-256 output length (64 vs 32) Co-Authored-By: Claude Sonnet 4.6 --- .../randomcodespace/iq/cache/FileHasher.java | 20 ++--- .../python/AbstractPythonAntlrDetector.java | 65 +++++++++++++++ .../python/AbstractPythonDbDetector.java | 71 ++++++++++++++++ .../detector/python/CeleryTaskDetector.java | 18 +--- .../detector/python/DjangoAuthDetector.java | 19 +---- .../detector/python/DjangoModelDetector.java | 82 +------------------ .../detector/python/DjangoViewDetector.java | 31 +------ .../detector/python/FastAPIAuthDetector.java | 19 +---- .../detector/python/FastAPIRouteDetector.java | 18 +--- .../detector/python/FlaskRouteDetector.java | 19 +---- .../python/PydanticModelDetector.java | 35 +------- .../python/PythonStructuresDetector.java | 29 +------ .../python/SQLAlchemyModelDetector.java | 80 +----------------- .../iq/cache/CacheCoverageTest.java | 6 +- .../iq/cache/FileHasherTest.java | 4 +- 15 files changed, 162 insertions(+), 354 deletions(-) create mode 100644 src/main/java/io/github/randomcodespace/iq/detector/python/AbstractPythonAntlrDetector.java create mode 100644 src/main/java/io/github/randomcodespace/iq/detector/python/AbstractPythonDbDetector.java diff --git a/src/main/java/io/github/randomcodespace/iq/cache/FileHasher.java b/src/main/java/io/github/randomcodespace/iq/cache/FileHasher.java index 08f7b9a2..06d87ffe 100644 --- a/src/main/java/io/github/randomcodespace/iq/cache/FileHasher.java +++ b/src/main/java/io/github/randomcodespace/iq/cache/FileHasher.java @@ -9,9 +9,7 @@ import java.util.HexFormat; /** - * Computes MD5 hash of file content for change detection. - * MD5 is used because it is fast and sufficient for content-change - * detection (not for cryptographic purposes). + * Computes SHA-256 hash of file content for change detection. */ public final class FileHasher { @@ -19,15 +17,15 @@ private FileHasher() { } /** - * Compute the MD5 hex digest of a file's content. + * Compute the SHA-256 hex digest of a file's content. * * @param file path to the file - * @return lowercase hex MD5 hash string + * @return lowercase hex SHA-256 hash string * @throws IOException if the file cannot be read */ public static String hash(Path file) throws IOException { try { - MessageDigest md = MessageDigest.getInstance("MD5"); + MessageDigest md = MessageDigest.getInstance("SHA-256"); byte[] buf = new byte[8192]; try (InputStream is = Files.newInputStream(file)) { int n; @@ -37,23 +35,23 @@ public static String hash(Path file) throws IOException { } return HexFormat.of().formatHex(md.digest()); } catch (NoSuchAlgorithmException e) { - throw new RuntimeException("MD5 not available", e); + throw new RuntimeException("SHA-256 not available", e); } } /** - * Compute the MD5 hex digest of a string's content (UTF-8 bytes). + * Compute the SHA-256 hex digest of a string's content (UTF-8 bytes). * * @param content the string to hash - * @return lowercase hex MD5 hash string + * @return lowercase hex SHA-256 hash string */ public static String hashString(String content) { try { - MessageDigest md = MessageDigest.getInstance("MD5"); + MessageDigest md = MessageDigest.getInstance("SHA-256"); md.update(content.getBytes(java.nio.charset.StandardCharsets.UTF_8)); return HexFormat.of().formatHex(md.digest()); } catch (NoSuchAlgorithmException e) { - throw new RuntimeException("MD5 not available", e); + throw new RuntimeException("SHA-256 not available", e); } } } diff --git a/src/main/java/io/github/randomcodespace/iq/detector/python/AbstractPythonAntlrDetector.java b/src/main/java/io/github/randomcodespace/iq/detector/python/AbstractPythonAntlrDetector.java new file mode 100644 index 00000000..703d4cce --- /dev/null +++ b/src/main/java/io/github/randomcodespace/iq/detector/python/AbstractPythonAntlrDetector.java @@ -0,0 +1,65 @@ +package io.github.randomcodespace.iq.detector.python; + +import io.github.randomcodespace.iq.detector.AbstractAntlrDetector; +import io.github.randomcodespace.iq.detector.DetectorContext; +import io.github.randomcodespace.iq.grammar.AntlrParserFactory; +import io.github.randomcodespace.iq.grammar.python.Python3Parser; +import org.antlr.v4.runtime.tree.ParseTree; + +import java.util.Set; +import java.util.regex.Pattern; + +/** + * Abstract base for Python ANTLR-based detectors. + * Provides shared {@link #parse(DetectorContext)} with large-file regex fallback, + * language support declaration, and Python-specific AST helpers used across + * multiple Python detectors. + */ +public abstract class AbstractPythonAntlrDetector extends AbstractAntlrDetector { + + /** Matches the start of the next class definition — used to bound class bodies in regex fallback. */ + protected static final Pattern NEXT_CLASS_RE = Pattern.compile("\\nclass\\s+\\w+"); + + @Override + public Set getSupportedLanguages() { + return Set.of("python"); + } + + @Override + protected ParseTree parse(DetectorContext ctx) { + // Skip ANTLR for very large files (>500KB) — regex fallback is faster + if (ctx.content().length() > 500_000) { + return null; + } + return AntlrParserFactory.parse("python", ctx.content()); + } + + /** + * Build a comma-separated string of base class names from an ANTLR class definition context. + * + * @param classCtx the parsed class definition + * @return base class text, or null if no base classes + */ + protected static String getBaseClassesText(Python3Parser.ClassdefContext classCtx) { + if (classCtx.arglist() == null) return null; + StringBuilder sb = new StringBuilder(); + for (var arg : classCtx.arglist().argument()) { + if (sb.length() > 0) sb.append(", "); + sb.append(arg.getText()); + } + return sb.toString(); + } + + /** + * Extract the source text of an entire class body using ANTLR token positions. + * + * @param text full source content + * @param classCtx the parsed class definition + * @return substring covering the full class body + */ + protected static String extractClassBody(String text, Python3Parser.ClassdefContext classCtx) { + int start = classCtx.getStart().getStartIndex(); + int stop = classCtx.getStop() != null ? classCtx.getStop().getStopIndex() + 1 : text.length(); + return text.substring(Math.min(start, text.length()), Math.min(stop, text.length())); + } +} diff --git a/src/main/java/io/github/randomcodespace/iq/detector/python/AbstractPythonDbDetector.java b/src/main/java/io/github/randomcodespace/iq/detector/python/AbstractPythonDbDetector.java new file mode 100644 index 00000000..e99e6555 --- /dev/null +++ b/src/main/java/io/github/randomcodespace/iq/detector/python/AbstractPythonDbDetector.java @@ -0,0 +1,71 @@ +package io.github.randomcodespace.iq.detector.python; + +import io.github.randomcodespace.iq.analyzer.InfraEndpoint; +import io.github.randomcodespace.iq.analyzer.InfrastructureRegistry; +import io.github.randomcodespace.iq.model.CodeEdge; +import io.github.randomcodespace.iq.model.CodeNode; +import io.github.randomcodespace.iq.model.EdgeKind; +import io.github.randomcodespace.iq.model.NodeKind; + +import java.util.List; + +/** + * Abstract base for Python ORM detectors that emit DATABASE_CONNECTION nodes and CONNECTS_TO edges. + * Extends {@link AbstractPythonAntlrDetector} with shared database node/edge helpers + * used by Django and SQLAlchemy detectors. + */ +public abstract class AbstractPythonDbDetector extends AbstractPythonAntlrDetector { + + /** + * Ensure a DATABASE_CONNECTION node exists in the result, creating it if needed. + * Uses the first database from the InfrastructureRegistry if available, + * otherwise creates a generic "database:unknown" node. + * + * @param registry infrastructure registry (may be null) + * @param nodes the nodes list to add the DB node to if missing + * @return the database node ID + */ + protected static String ensureDbNode(InfrastructureRegistry registry, List nodes) { + String dbNodeId; + if (registry != null && !registry.getDatabases().isEmpty()) { + InfraEndpoint db = registry.getDatabases().values().iterator().next(); + dbNodeId = "infra:" + db.id(); + if (nodes.stream().noneMatch(n -> dbNodeId.equals(n.getId()))) { + CodeNode dbNode = new CodeNode(dbNodeId, NodeKind.DATABASE_CONNECTION, + db.name() + " (" + db.type() + ")"); + dbNode.getProperties().put("type", db.type()); + if (db.connectionUrl() != null) dbNode.getProperties().put("url", db.connectionUrl()); + nodes.add(dbNode); + } + } else { + dbNodeId = "database:unknown"; + if (nodes.stream().noneMatch(n -> dbNodeId.equals(n.getId()))) { + nodes.add(new CodeNode(dbNodeId, NodeKind.DATABASE_CONNECTION, "Database")); + } + } + return dbNodeId; + } + + /** + * Add a CONNECTS_TO edge from the given source node to the database node. + * + * @param sourceId the source node ID + * @param registry infrastructure registry (may be null) + * @param nodes the nodes list (used to find/create the DB node) + * @param edges the edges list to add the edge to + */ + protected static void addDbEdge(String sourceId, InfrastructureRegistry registry, + List nodes, List edges) { + String dbNodeId = ensureDbNode(registry, nodes); + CodeNode targetRef = nodes.stream() + .filter(n -> dbNodeId.equals(n.getId())) + .findFirst() + .orElseGet(() -> new CodeNode(dbNodeId, NodeKind.DATABASE_CONNECTION, "Database")); + CodeEdge edge = new CodeEdge(); + edge.setId(sourceId + "->connects_to->" + dbNodeId); + edge.setKind(EdgeKind.CONNECTS_TO); + edge.setSourceId(sourceId); + edge.setTarget(targetRef); + edges.add(edge); + } +} diff --git a/src/main/java/io/github/randomcodespace/iq/detector/python/CeleryTaskDetector.java b/src/main/java/io/github/randomcodespace/iq/detector/python/CeleryTaskDetector.java index 45250d9b..f6e7aca0 100644 --- a/src/main/java/io/github/randomcodespace/iq/detector/python/CeleryTaskDetector.java +++ b/src/main/java/io/github/randomcodespace/iq/detector/python/CeleryTaskDetector.java @@ -1,9 +1,7 @@ package io.github.randomcodespace.iq.detector.python; -import io.github.randomcodespace.iq.detector.AbstractAntlrDetector; import io.github.randomcodespace.iq.detector.DetectorContext; import io.github.randomcodespace.iq.detector.DetectorResult; -import io.github.randomcodespace.iq.grammar.AntlrParserFactory; import io.github.randomcodespace.iq.grammar.python.Python3Parser; import io.github.randomcodespace.iq.grammar.python.Python3ParserBaseListener; import io.github.randomcodespace.iq.model.CodeEdge; @@ -33,7 +31,7 @@ properties = {"broker", "task_name"} ) @Component -public class CeleryTaskDetector extends AbstractAntlrDetector { +public class CeleryTaskDetector extends AbstractPythonAntlrDetector { // --- Regex patterns --- private static final Pattern TASK_DECORATOR = Pattern.compile( @@ -56,20 +54,6 @@ public String getName() { return "python.celery_tasks"; } - @Override - public Set getSupportedLanguages() { - return Set.of("python"); - } - - @Override - protected ParseTree parse(DetectorContext ctx) { - // Skip ANTLR for very large files (>500KB) — regex fallback is faster - if (ctx.content().length() > 500_000) { - return null; // triggers regex fallback - } - return AntlrParserFactory.parse("python", ctx.content()); - } - @Override protected DetectorResult detectWithAst(ParseTree tree, DetectorContext ctx) { List nodes = new ArrayList<>(); diff --git a/src/main/java/io/github/randomcodespace/iq/detector/python/DjangoAuthDetector.java b/src/main/java/io/github/randomcodespace/iq/detector/python/DjangoAuthDetector.java index 96512e26..d421ab0b 100644 --- a/src/main/java/io/github/randomcodespace/iq/detector/python/DjangoAuthDetector.java +++ b/src/main/java/io/github/randomcodespace/iq/detector/python/DjangoAuthDetector.java @@ -1,9 +1,7 @@ package io.github.randomcodespace.iq.detector.python; -import io.github.randomcodespace.iq.detector.AbstractAntlrDetector; import io.github.randomcodespace.iq.detector.DetectorContext; import io.github.randomcodespace.iq.detector.DetectorResult; -import io.github.randomcodespace.iq.grammar.AntlrParserFactory; import io.github.randomcodespace.iq.grammar.python.Python3Parser; import io.github.randomcodespace.iq.grammar.python.Python3ParserBaseListener; import io.github.randomcodespace.iq.model.CodeNode; @@ -15,7 +13,6 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; -import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; import io.github.randomcodespace.iq.detector.DetectorInfo; @@ -31,7 +28,7 @@ properties = {"auth_type", "permissions"} ) @Component -public class DjangoAuthDetector extends AbstractAntlrDetector { +public class DjangoAuthDetector extends AbstractPythonAntlrDetector { // --- Regex fallback patterns --- private static final Pattern LOGIN_REQUIRED_RE = Pattern.compile("@login_required\\b"); @@ -56,20 +53,6 @@ public String getName() { return "django_auth"; } - @Override - public Set getSupportedLanguages() { - return Set.of("python"); - } - - @Override - protected ParseTree parse(DetectorContext ctx) { - // Skip ANTLR for very large files (>500KB) — regex fallback is faster - if (ctx.content().length() > 500_000) { - return null; // triggers regex fallback - } - return AntlrParserFactory.parse("python", ctx.content()); - } - @Override protected DetectorResult detectWithAst(ParseTree tree, DetectorContext ctx) { List nodes = new ArrayList<>(); diff --git a/src/main/java/io/github/randomcodespace/iq/detector/python/DjangoModelDetector.java b/src/main/java/io/github/randomcodespace/iq/detector/python/DjangoModelDetector.java index 80a9ffe1..e6480d55 100644 --- a/src/main/java/io/github/randomcodespace/iq/detector/python/DjangoModelDetector.java +++ b/src/main/java/io/github/randomcodespace/iq/detector/python/DjangoModelDetector.java @@ -1,9 +1,7 @@ package io.github.randomcodespace.iq.detector.python; -import io.github.randomcodespace.iq.detector.AbstractAntlrDetector; import io.github.randomcodespace.iq.detector.DetectorContext; import io.github.randomcodespace.iq.detector.DetectorResult; -import io.github.randomcodespace.iq.grammar.AntlrParserFactory; import io.github.randomcodespace.iq.grammar.python.Python3Parser; import io.github.randomcodespace.iq.grammar.python.Python3ParserBaseListener; import io.github.randomcodespace.iq.model.CodeEdge; @@ -19,7 +17,6 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; import io.github.randomcodespace.iq.detector.DetectorInfo; @@ -36,7 +33,7 @@ properties = {"framework", "table_name"} ) @Component -public class DjangoModelDetector extends AbstractAntlrDetector { +public class DjangoModelDetector extends AbstractPythonDbDetector { // --- Regex patterns (used in both AST body extraction and regex fallback) --- private static final Pattern DJANGO_MODEL_RE = Pattern.compile( @@ -64,7 +61,6 @@ public class DjangoModelDetector extends AbstractAntlrDetector { private static final Pattern MANAGER_ASSIGNMENT_RE = Pattern.compile( "(\\w+)\\s*=\\s*(\\w+)\\s*\\(\\s*\\)", Pattern.MULTILINE ); - private static final Pattern NEXT_CLASS_RE = Pattern.compile("\\nclass\\s+\\w+"); private static final Pattern META_CLASS_RE = Pattern.compile("class\\s+Meta\\s*:"); private static final Pattern META_END_RE = Pattern.compile("\\n\\s{4}\\S"); @@ -73,20 +69,6 @@ public String getName() { return "python.django_models"; } - @Override - public Set getSupportedLanguages() { - return Set.of("python"); - } - - @Override - protected ParseTree parse(DetectorContext ctx) { - // Skip ANTLR for very large files (>500KB) — regex fallback is faster - if (ctx.content().length() > 500_000) { - return null; // triggers regex fallback - } - return AntlrParserFactory.parse("python", ctx.content()); - } - @Override protected DetectorResult detectWithAst(ParseTree tree, DetectorContext ctx) { List nodes = new ArrayList<>(); @@ -185,7 +167,7 @@ public void enterClassdef(Python3Parser.ClassdefContext classCtx) { node.getProperties().put("ordering", ordering); } nodes.add(node); - DjangoModelDetector.addDbEdge(nodeId, ctx.registry(), nodes, edges); + addDbEdge(nodeId, ctx.registry(), nodes, edges); // FK / OneToOne edges Matcher fkMatcher = FK_RE.matcher(classBody); @@ -366,64 +348,4 @@ protected DetectorResult detectWithRegex(DetectorContext ctx) { return DetectorResult.of(nodes, edges); } - private static String getBaseClassesText(Python3Parser.ClassdefContext classCtx) { - if (classCtx.arglist() == null) return null; - StringBuilder sb = new StringBuilder(); - for (var arg : classCtx.arglist().argument()) { - if (sb.length() > 0) sb.append(", "); - sb.append(arg.getText()); - } - return sb.toString(); - } - - /** - * Extract the text of a class body from the source using the AST context positions. - */ - private static String extractClassBody(String text, Python3Parser.ClassdefContext classCtx) { - int start = classCtx.getStart().getStartIndex(); - int stop = classCtx.getStop() != null ? classCtx.getStop().getStopIndex() + 1 : text.length(); - return text.substring(Math.min(start, text.length()), Math.min(stop, text.length())); - } - - // ==================== InfrastructureRegistry helpers ==================== - - static String ensureDbNode( - io.github.randomcodespace.iq.analyzer.InfrastructureRegistry registry, - List nodes) { - String dbNodeId; - if (registry != null && !registry.getDatabases().isEmpty()) { - io.github.randomcodespace.iq.analyzer.InfraEndpoint db = - registry.getDatabases().values().iterator().next(); - dbNodeId = "infra:" + db.id(); - if (nodes.stream().noneMatch(n -> dbNodeId.equals(n.getId()))) { - CodeNode dbNode = new CodeNode(dbNodeId, NodeKind.DATABASE_CONNECTION, - db.name() + " (" + db.type() + ")"); - dbNode.getProperties().put("type", db.type()); - if (db.connectionUrl() != null) dbNode.getProperties().put("url", db.connectionUrl()); - nodes.add(dbNode); - } - } else { - dbNodeId = "database:unknown"; - if (nodes.stream().noneMatch(n -> dbNodeId.equals(n.getId()))) { - nodes.add(new CodeNode(dbNodeId, NodeKind.DATABASE_CONNECTION, "Database")); - } - } - return dbNodeId; - } - - static void addDbEdge(String sourceId, - io.github.randomcodespace.iq.analyzer.InfrastructureRegistry registry, - List nodes, List edges) { - String dbNodeId = ensureDbNode(registry, nodes); - CodeNode targetRef = nodes.stream() - .filter(n -> dbNodeId.equals(n.getId())) - .findFirst() - .orElseGet(() -> new CodeNode(dbNodeId, NodeKind.DATABASE_CONNECTION, "Database")); - CodeEdge edge = new CodeEdge(); - edge.setId(sourceId + "->connects_to->" + dbNodeId); - edge.setKind(EdgeKind.CONNECTS_TO); - edge.setSourceId(sourceId); - edge.setTarget(targetRef); - edges.add(edge); - } } diff --git a/src/main/java/io/github/randomcodespace/iq/detector/python/DjangoViewDetector.java b/src/main/java/io/github/randomcodespace/iq/detector/python/DjangoViewDetector.java index fcebc312..b706ea10 100644 --- a/src/main/java/io/github/randomcodespace/iq/detector/python/DjangoViewDetector.java +++ b/src/main/java/io/github/randomcodespace/iq/detector/python/DjangoViewDetector.java @@ -1,9 +1,7 @@ package io.github.randomcodespace.iq.detector.python; -import io.github.randomcodespace.iq.detector.AbstractAntlrDetector; import io.github.randomcodespace.iq.detector.DetectorContext; import io.github.randomcodespace.iq.detector.DetectorResult; -import io.github.randomcodespace.iq.grammar.AntlrParserFactory; import io.github.randomcodespace.iq.grammar.python.Python3Parser; import io.github.randomcodespace.iq.grammar.python.Python3ParserBaseListener; import io.github.randomcodespace.iq.model.CodeNode; @@ -14,7 +12,6 @@ import java.util.ArrayList; import java.util.List; -import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; import io.github.randomcodespace.iq.detector.DetectorInfo; @@ -30,7 +27,7 @@ properties = {"framework", "protocol"} ) @Component -public class DjangoViewDetector extends AbstractAntlrDetector { +public class DjangoViewDetector extends AbstractPythonAntlrDetector { private static final Pattern URL_PATTERN = Pattern.compile( "(?:path|re_path|url)\\(\\s*['\"]([^'\"]+)['\"]\\s*,\\s*(\\w[\\w.]*)" @@ -44,20 +41,6 @@ public String getName() { return "python.django_views"; } - @Override - public Set getSupportedLanguages() { - return Set.of("python"); - } - - @Override - protected ParseTree parse(DetectorContext ctx) { - // Skip ANTLR for very large files (>500KB) — regex fallback is faster - if (ctx.content().length() > 500_000) { - return null; // triggers regex fallback - } - return AntlrParserFactory.parse("python", ctx.content()); - } - @Override protected DetectorResult detectWithAst(ParseTree tree, DetectorContext ctx) { List nodes = new ArrayList<>(); @@ -181,16 +164,4 @@ protected DetectorResult detectWithRegex(DetectorContext ctx) { return DetectorResult.of(nodes, List.of()); } - /** - * Extract base classes text from a classdef context's arglist. - */ - private static String getBaseClassesText(Python3Parser.ClassdefContext classCtx) { - if (classCtx.arglist() == null) return null; - StringBuilder sb = new StringBuilder(); - for (var arg : classCtx.arglist().argument()) { - if (sb.length() > 0) sb.append(", "); - sb.append(arg.getText()); - } - return sb.toString(); - } } diff --git a/src/main/java/io/github/randomcodespace/iq/detector/python/FastAPIAuthDetector.java b/src/main/java/io/github/randomcodespace/iq/detector/python/FastAPIAuthDetector.java index 7f1bce3d..7585a249 100644 --- a/src/main/java/io/github/randomcodespace/iq/detector/python/FastAPIAuthDetector.java +++ b/src/main/java/io/github/randomcodespace/iq/detector/python/FastAPIAuthDetector.java @@ -1,9 +1,7 @@ package io.github.randomcodespace.iq.detector.python; -import io.github.randomcodespace.iq.detector.AbstractAntlrDetector; import io.github.randomcodespace.iq.detector.DetectorContext; import io.github.randomcodespace.iq.detector.DetectorResult; -import io.github.randomcodespace.iq.grammar.AntlrParserFactory; import io.github.randomcodespace.iq.grammar.python.Python3Parser; import io.github.randomcodespace.iq.grammar.python.Python3ParserBaseListener; import io.github.randomcodespace.iq.model.CodeNode; @@ -14,7 +12,6 @@ import java.util.ArrayList; import java.util.List; -import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; import io.github.randomcodespace.iq.detector.DetectorInfo; @@ -30,7 +27,7 @@ properties = {"auth_type"} ) @Component -public class FastAPIAuthDetector extends AbstractAntlrDetector { +public class FastAPIAuthDetector extends AbstractPythonAntlrDetector { // --- Regex fallback patterns --- private static final Pattern DEPENDS_AUTH_RE = Pattern.compile( @@ -54,20 +51,6 @@ public String getName() { return "fastapi_auth"; } - @Override - public Set getSupportedLanguages() { - return Set.of("python"); - } - - @Override - protected ParseTree parse(DetectorContext ctx) { - // Skip ANTLR for very large files (>500KB) — regex fallback is faster - if (ctx.content().length() > 500_000) { - return null; // triggers regex fallback - } - return AntlrParserFactory.parse("python", ctx.content()); - } - @Override protected DetectorResult detectWithAst(ParseTree tree, DetectorContext ctx) { List nodes = new ArrayList<>(); diff --git a/src/main/java/io/github/randomcodespace/iq/detector/python/FastAPIRouteDetector.java b/src/main/java/io/github/randomcodespace/iq/detector/python/FastAPIRouteDetector.java index 2ba933c1..b47c2702 100644 --- a/src/main/java/io/github/randomcodespace/iq/detector/python/FastAPIRouteDetector.java +++ b/src/main/java/io/github/randomcodespace/iq/detector/python/FastAPIRouteDetector.java @@ -1,9 +1,7 @@ package io.github.randomcodespace.iq.detector.python; -import io.github.randomcodespace.iq.detector.AbstractAntlrDetector; import io.github.randomcodespace.iq.detector.DetectorContext; import io.github.randomcodespace.iq.detector.DetectorResult; -import io.github.randomcodespace.iq.grammar.AntlrParserFactory; import io.github.randomcodespace.iq.grammar.python.Python3Parser; import io.github.randomcodespace.iq.grammar.python.Python3ParserBaseListener; import io.github.randomcodespace.iq.model.CodeNode; @@ -32,7 +30,7 @@ properties = {"framework", "http_method", "protocol"} ) @Component -public class FastAPIRouteDetector extends AbstractAntlrDetector { +public class FastAPIRouteDetector extends AbstractPythonAntlrDetector { private static final Set HTTP_METHODS = Set.of( "get", "post", "put", "delete", "patch", "options", "head" @@ -55,20 +53,6 @@ public String getName() { return "python.fastapi_routes"; } - @Override - public Set getSupportedLanguages() { - return Set.of("python"); - } - - @Override - protected ParseTree parse(DetectorContext ctx) { - // Skip ANTLR for very large files (>500KB) — regex fallback is faster - if (ctx.content().length() > 500_000) { - return null; // triggers regex fallback - } - return AntlrParserFactory.parse("python", ctx.content()); - } - @Override protected DetectorResult detectWithAst(ParseTree tree, DetectorContext ctx) { List nodes = new ArrayList<>(); diff --git a/src/main/java/io/github/randomcodespace/iq/detector/python/FlaskRouteDetector.java b/src/main/java/io/github/randomcodespace/iq/detector/python/FlaskRouteDetector.java index a6c99694..d3ac12b7 100644 --- a/src/main/java/io/github/randomcodespace/iq/detector/python/FlaskRouteDetector.java +++ b/src/main/java/io/github/randomcodespace/iq/detector/python/FlaskRouteDetector.java @@ -1,9 +1,7 @@ package io.github.randomcodespace.iq.detector.python; -import io.github.randomcodespace.iq.detector.AbstractAntlrDetector; import io.github.randomcodespace.iq.detector.DetectorContext; import io.github.randomcodespace.iq.detector.DetectorResult; -import io.github.randomcodespace.iq.grammar.AntlrParserFactory; import io.github.randomcodespace.iq.grammar.python.Python3Parser; import io.github.randomcodespace.iq.grammar.python.Python3ParserBaseListener; import io.github.randomcodespace.iq.model.CodeEdge; @@ -16,7 +14,6 @@ import java.util.ArrayList; import java.util.List; -import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; import io.github.randomcodespace.iq.detector.DetectorInfo; @@ -33,7 +30,7 @@ properties = {"framework", "http_method", "protocol"} ) @Component -public class FlaskRouteDetector extends AbstractAntlrDetector { +public class FlaskRouteDetector extends AbstractPythonAntlrDetector { // --- Regex fallback patterns --- private static final Pattern ROUTE_PATTERN = Pattern.compile( @@ -48,20 +45,6 @@ public String getName() { return "python.flask_routes"; } - @Override - public Set getSupportedLanguages() { - return Set.of("python"); - } - - @Override - protected ParseTree parse(DetectorContext ctx) { - // Skip ANTLR for very large files (>500KB) — regex fallback is faster - if (ctx.content().length() > 500_000) { - return null; // triggers regex fallback - } - return AntlrParserFactory.parse("python", ctx.content()); - } - @Override protected DetectorResult detectWithAst(ParseTree tree, DetectorContext ctx) { List nodes = new ArrayList<>(); diff --git a/src/main/java/io/github/randomcodespace/iq/detector/python/PydanticModelDetector.java b/src/main/java/io/github/randomcodespace/iq/detector/python/PydanticModelDetector.java index a71cafa8..7e99f173 100644 --- a/src/main/java/io/github/randomcodespace/iq/detector/python/PydanticModelDetector.java +++ b/src/main/java/io/github/randomcodespace/iq/detector/python/PydanticModelDetector.java @@ -1,9 +1,7 @@ package io.github.randomcodespace.iq.detector.python; -import io.github.randomcodespace.iq.detector.AbstractAntlrDetector; import io.github.randomcodespace.iq.detector.DetectorContext; import io.github.randomcodespace.iq.detector.DetectorResult; -import io.github.randomcodespace.iq.grammar.AntlrParserFactory; import io.github.randomcodespace.iq.grammar.python.Python3Parser; import io.github.randomcodespace.iq.grammar.python.Python3ParserBaseListener; import io.github.randomcodespace.iq.model.CodeEdge; @@ -19,7 +17,6 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; import io.github.randomcodespace.iq.detector.DetectorInfo; @@ -36,7 +33,7 @@ properties = {"base_class", "framework"} ) @Component -public class PydanticModelDetector extends AbstractAntlrDetector { +public class PydanticModelDetector extends AbstractPythonAntlrDetector { // --- Regex patterns --- private static final Pattern PYDANTIC_CLASS_RE = Pattern.compile( @@ -54,7 +51,6 @@ public class PydanticModelDetector extends AbstractAntlrDetector { private static final Pattern CONFIG_ATTR_RE = Pattern.compile( "^\\s{8}(\\w+)\\s*=\\s*(.+)", Pattern.MULTILINE ); - private static final Pattern NEXT_CLASS_RE = Pattern.compile("\\nclass\\s+\\w+"); private static final Pattern CONFIG_END_RE = Pattern.compile("\\n\\S"); @Override @@ -62,20 +58,6 @@ public String getName() { return "python.pydantic_models"; } - @Override - public Set getSupportedLanguages() { - return Set.of("python"); - } - - @Override - protected ParseTree parse(DetectorContext ctx) { - // Skip ANTLR for very large files (>500KB) — regex fallback is faster - if (ctx.content().length() > 500_000) { - return null; // triggers regex fallback - } - return AntlrParserFactory.parse("python", ctx.content()); - } - @Override protected DetectorResult detectWithAst(ParseTree tree, DetectorContext ctx) { List nodes = new ArrayList<>(); @@ -274,19 +256,4 @@ protected DetectorResult detectWithRegex(DetectorContext ctx) { return DetectorResult.of(nodes, edges); } - private static String getBaseClassesText(Python3Parser.ClassdefContext classCtx) { - if (classCtx.arglist() == null) return null; - StringBuilder sb = new StringBuilder(); - for (var arg : classCtx.arglist().argument()) { - if (sb.length() > 0) sb.append(", "); - sb.append(arg.getText()); - } - return sb.toString(); - } - - private static String extractClassBody(String text, Python3Parser.ClassdefContext classCtx) { - int start = classCtx.getStart().getStartIndex(); - int stop = classCtx.getStop() != null ? classCtx.getStop().getStopIndex() + 1 : text.length(); - return text.substring(Math.min(start, text.length()), Math.min(stop, text.length())); - } } diff --git a/src/main/java/io/github/randomcodespace/iq/detector/python/PythonStructuresDetector.java b/src/main/java/io/github/randomcodespace/iq/detector/python/PythonStructuresDetector.java index 8750be7f..84b349e6 100644 --- a/src/main/java/io/github/randomcodespace/iq/detector/python/PythonStructuresDetector.java +++ b/src/main/java/io/github/randomcodespace/iq/detector/python/PythonStructuresDetector.java @@ -1,9 +1,7 @@ package io.github.randomcodespace.iq.detector.python; -import io.github.randomcodespace.iq.detector.AbstractAntlrDetector; import io.github.randomcodespace.iq.detector.DetectorContext; import io.github.randomcodespace.iq.detector.DetectorResult; -import io.github.randomcodespace.iq.grammar.AntlrParserFactory; import io.github.randomcodespace.iq.grammar.python.Python3Parser; import io.github.randomcodespace.iq.grammar.python.Python3ParserBaseListener; import io.github.randomcodespace.iq.model.CodeEdge; @@ -19,7 +17,6 @@ import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; import io.github.randomcodespace.iq.detector.DetectorInfo; @@ -35,7 +32,7 @@ edgeKinds = {EdgeKind.DEFINES, EdgeKind.EXTENDS, EdgeKind.IMPORTS} ) @Component -public class PythonStructuresDetector extends AbstractAntlrDetector { +public class PythonStructuresDetector extends AbstractPythonAntlrDetector { // --- Regex patterns (for fallback) --- private static final Pattern CLASS_RE = Pattern.compile( @@ -60,20 +57,6 @@ public String getName() { return "python_structures"; } - @Override - public Set getSupportedLanguages() { - return Set.of("python"); - } - - @Override - protected ParseTree parse(DetectorContext ctx) { - // Skip ANTLR for very large files (>500KB) — regex fallback is faster - if (ctx.content().length() > 500_000) { - return null; // triggers regex fallback - } - return AntlrParserFactory.parse("python", ctx.content()); - } - @Override protected DetectorResult detectWithAst(ParseTree tree, DetectorContext ctx) { List nodes = new ArrayList<>(); @@ -493,16 +476,6 @@ private List findDecoratorsForLine(Map> decoratorM return reversed; } - private static String getBaseClassesText(Python3Parser.ClassdefContext classCtx) { - if (classCtx.arglist() == null) return null; - StringBuilder sb = new StringBuilder(); - for (var arg : classCtx.arglist().argument()) { - if (sb.length() > 0) sb.append(", "); - sb.append(arg.getText()); - } - return sb.toString(); - } - /** * Get the indentation of a function definition from the source text. * For async functions, uses the async keyword position (parent start). diff --git a/src/main/java/io/github/randomcodespace/iq/detector/python/SQLAlchemyModelDetector.java b/src/main/java/io/github/randomcodespace/iq/detector/python/SQLAlchemyModelDetector.java index d8bc2eab..77e62509 100644 --- a/src/main/java/io/github/randomcodespace/iq/detector/python/SQLAlchemyModelDetector.java +++ b/src/main/java/io/github/randomcodespace/iq/detector/python/SQLAlchemyModelDetector.java @@ -1,9 +1,7 @@ package io.github.randomcodespace.iq.detector.python; -import io.github.randomcodespace.iq.detector.AbstractAntlrDetector; import io.github.randomcodespace.iq.detector.DetectorContext; import io.github.randomcodespace.iq.detector.DetectorResult; -import io.github.randomcodespace.iq.grammar.AntlrParserFactory; import io.github.randomcodespace.iq.grammar.python.Python3Parser; import io.github.randomcodespace.iq.grammar.python.Python3ParserBaseListener; import io.github.randomcodespace.iq.model.CodeEdge; @@ -16,7 +14,6 @@ import java.util.ArrayList; import java.util.List; -import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; import io.github.randomcodespace.iq.detector.DetectorInfo; @@ -33,7 +30,7 @@ properties = {"columns", "framework", "table_name"} ) @Component -public class SQLAlchemyModelDetector extends AbstractAntlrDetector { +public class SQLAlchemyModelDetector extends AbstractPythonDbDetector { // --- Regex patterns --- private static final Pattern MODEL_PATTERN = Pattern.compile( @@ -48,27 +45,11 @@ public class SQLAlchemyModelDetector extends AbstractAntlrDetector { private static final Pattern RELATIONSHIP_PATTERN = Pattern.compile( "(\\w+)\\s*(?::\\s*Mapped\\[.*?\\])?\\s*=\\s*relationship\\(\\s*['\"]((\\w+))['\"]" ); - private static final Pattern NEXT_CLASS_RE = Pattern.compile("\\nclass\\s+\\w+"); - @Override public String getName() { return "python.sqlalchemy_models"; } - @Override - public Set getSupportedLanguages() { - return Set.of("python"); - } - - @Override - protected ParseTree parse(DetectorContext ctx) { - // Skip ANTLR for very large files (>500KB) — regex fallback is faster - if (ctx.content().length() > 500_000) { - return null; // triggers regex fallback - } - return AntlrParserFactory.parse("python", ctx.content()); - } - @Override protected DetectorResult detectWithAst(ParseTree tree, DetectorContext ctx) { List nodes = new ArrayList<>(); @@ -113,7 +94,7 @@ public void enterClassdef(Python3Parser.ClassdefContext classCtx) { node.getProperties().put("columns", columns); node.getProperties().put("framework", "sqlalchemy"); nodes.add(node); - SQLAlchemyModelDetector.addDbEdge(nodeId, ctx.registry(), nodes, edges); + addDbEdge(nodeId, ctx.registry(), nodes, edges); // Relationships Matcher relMatcher = RELATIONSHIP_PATTERN.matcher(classBody); @@ -198,61 +179,4 @@ protected DetectorResult detectWithRegex(DetectorContext ctx) { return DetectorResult.of(nodes, edges); } - private static String getBaseClassesText(Python3Parser.ClassdefContext classCtx) { - if (classCtx.arglist() == null) return null; - StringBuilder sb = new StringBuilder(); - for (var arg : classCtx.arglist().argument()) { - if (sb.length() > 0) sb.append(", "); - sb.append(arg.getText()); - } - return sb.toString(); - } - - private static String extractClassBody(String text, Python3Parser.ClassdefContext classCtx) { - int start = classCtx.getStart().getStartIndex(); - int stop = classCtx.getStop() != null ? classCtx.getStop().getStopIndex() + 1 : text.length(); - return text.substring(Math.min(start, text.length()), Math.min(stop, text.length())); - } - - // ==================== InfrastructureRegistry helpers ==================== - - static String ensureDbNode( - io.github.randomcodespace.iq.analyzer.InfrastructureRegistry registry, - List nodes) { - String dbNodeId; - if (registry != null && !registry.getDatabases().isEmpty()) { - io.github.randomcodespace.iq.analyzer.InfraEndpoint db = - registry.getDatabases().values().iterator().next(); - dbNodeId = "infra:" + db.id(); - if (nodes.stream().noneMatch(n -> dbNodeId.equals(n.getId()))) { - CodeNode dbNode = new CodeNode(dbNodeId, NodeKind.DATABASE_CONNECTION, - db.name() + " (" + db.type() + ")"); - dbNode.getProperties().put("type", db.type()); - if (db.connectionUrl() != null) dbNode.getProperties().put("url", db.connectionUrl()); - nodes.add(dbNode); - } - } else { - dbNodeId = "database:unknown"; - if (nodes.stream().noneMatch(n -> dbNodeId.equals(n.getId()))) { - nodes.add(new CodeNode(dbNodeId, NodeKind.DATABASE_CONNECTION, "Database")); - } - } - return dbNodeId; - } - - static void addDbEdge(String sourceId, - io.github.randomcodespace.iq.analyzer.InfrastructureRegistry registry, - List nodes, List edges) { - String dbNodeId = ensureDbNode(registry, nodes); - CodeNode targetRef = nodes.stream() - .filter(n -> dbNodeId.equals(n.getId())) - .findFirst() - .orElseGet(() -> new CodeNode(dbNodeId, NodeKind.DATABASE_CONNECTION, "Database")); - CodeEdge edge = new CodeEdge(); - edge.setId(sourceId + "->connects_to->" + dbNodeId); - edge.setKind(EdgeKind.CONNECTS_TO); - edge.setSourceId(sourceId); - edge.setTarget(targetRef); - edges.add(edge); - } } diff --git a/src/test/java/io/github/randomcodespace/iq/cache/CacheCoverageTest.java b/src/test/java/io/github/randomcodespace/iq/cache/CacheCoverageTest.java index a34839da..a71698e1 100644 --- a/src/test/java/io/github/randomcodespace/iq/cache/CacheCoverageTest.java +++ b/src/test/java/io/github/randomcodespace/iq/cache/CacheCoverageTest.java @@ -39,7 +39,7 @@ void hashEmptyFile(@TempDir Path tempDir) throws IOException { String hash = FileHasher.hash(empty); assertNotNull(hash); - assertEquals(32, hash.length()); + assertEquals(64, hash.length()); assertTrue(hash.matches("[0-9a-f]+")); } @@ -47,7 +47,7 @@ void hashEmptyFile(@TempDir Path tempDir) throws IOException { void hashEmptyString() { String hash = FileHasher.hashString(""); assertNotNull(hash); - assertEquals(32, hash.length()); + assertEquals(64, hash.length()); } @Test @@ -68,7 +68,7 @@ void hashLargeContent() { // 1 MB string String large = "x".repeat(1_000_000); String hash = FileHasher.hashString(large); - assertEquals(32, hash.length()); + assertEquals(64, hash.length()); } @Test diff --git a/src/test/java/io/github/randomcodespace/iq/cache/FileHasherTest.java b/src/test/java/io/github/randomcodespace/iq/cache/FileHasherTest.java index 742bc3e2..1523f1a6 100644 --- a/src/test/java/io/github/randomcodespace/iq/cache/FileHasherTest.java +++ b/src/test/java/io/github/randomcodespace/iq/cache/FileHasherTest.java @@ -21,7 +21,7 @@ void hashProducesDeterministicResult(@TempDir Path tempDir) throws IOException { String hash2 = FileHasher.hash(file); assertEquals(hash1, hash2, "Same file should produce same hash"); - assertEquals(32, hash1.length(), "MD5 hash should be 32 hex chars"); + assertEquals(64, hash1.length(), "SHA-256 hash should be 64 hex chars"); } @Test @@ -40,7 +40,7 @@ void hashStringProducesDeterministicResult() { String hash2 = FileHasher.hashString("test content"); assertEquals(hash1, hash2); - assertEquals(32, hash1.length()); + assertEquals(64, hash1.length()); } @Test From dd7586b24a1d79d7615ff0c25b5e639945ffe466 Mon Sep 17 00:00:00 2001 From: Amit Kumar Date: Sat, 4 Apr 2026 14:34:26 +0000 Subject: [PATCH 2/2] checkpoint: pre-yolo 20260404-143426 --- .claude/settings.local.json | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.claude/settings.local.json b/.claude/settings.local.json index 369fab44..ef864017 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -1,4 +1,9 @@ { + "spinnerTipsEnabled": false, "prefersReducedMotion": true, - "spinnerTipsEnabled": false + "permissions": { + "allow": [ + "Bash(rtk gain:*)" + ] + } }