diff --git a/src/main/java/io/github/randomcodespace/iq/analyzer/LayerClassifier.java b/src/main/java/io/github/randomcodespace/iq/analyzer/LayerClassifier.java index 3f6a4c39..32c1d732 100644 --- a/src/main/java/io/github/randomcodespace/iq/analyzer/LayerClassifier.java +++ b/src/main/java/io/github/randomcodespace/iq/analyzer/LayerClassifier.java @@ -36,7 +36,8 @@ public class LayerClassifier { ); private static final Set INFRA_NODE_KINDS = Set.of( - NodeKind.INFRA_RESOURCE, NodeKind.AZURE_RESOURCE, NodeKind.AZURE_FUNCTION + NodeKind.INFRA_RESOURCE, NodeKind.AZURE_RESOURCE, NodeKind.AZURE_FUNCTION, + NodeKind.SQL_ENTITY ); private static final Set INFRA_LANGUAGES = Set.of( diff --git a/src/main/java/io/github/randomcodespace/iq/detector/sql/SqlMigrationDetector.java b/src/main/java/io/github/randomcodespace/iq/detector/sql/SqlMigrationDetector.java new file mode 100644 index 00000000..1c69f4f1 --- /dev/null +++ b/src/main/java/io/github/randomcodespace/iq/detector/sql/SqlMigrationDetector.java @@ -0,0 +1,542 @@ +package io.github.randomcodespace.iq.detector.sql; + +import io.github.randomcodespace.iq.detector.AbstractRegexDetector; +import io.github.randomcodespace.iq.detector.DetectorContext; +import io.github.randomcodespace.iq.detector.DetectorInfo; +import io.github.randomcodespace.iq.detector.DetectorResult; +import io.github.randomcodespace.iq.model.CodeEdge; +import io.github.randomcodespace.iq.model.CodeNode; +import io.github.randomcodespace.iq.model.EdgeKind; +import io.github.randomcodespace.iq.model.NodeKind; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.stereotype.Component; + +import java.util.ArrayList; +import java.util.Comparator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Set; +import java.util.TreeMap; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Detects schema-level entities (tables, views, schemas) from raw SQL DDL and + * framework-specific migration files (Flyway, Liquibase XML/YAML, Alembic, Rails, + * Prisma), emitting {@link NodeKind#SQL_ENTITY} nodes, {@link NodeKind#MIGRATION} + * nodes, and {@link EdgeKind#REFERENCES_TABLE} / {@link EdgeKind#MIGRATES} edges. + *

+ * Path discriminators are mandatory -- a plain {@code .py}, {@code .rb}, + * {@code .xml}, or {@code .yml} file will NOT be treated as a migration unless + * the filename/path pattern matches OR a framework-specific marker is present. + *

+ * Stateless -- all parsing state is method-local. + */ +@DetectorInfo( + name = "sql_migration", + category = "database", + description = "Extracts schema entities from raw SQL and migration files " + + "(Flyway, Liquibase, Alembic, Rails, Prisma)", + languages = {"sql", "python", "ruby", "xml", "yaml"}, + nodeKinds = {NodeKind.SQL_ENTITY, NodeKind.MIGRATION}, + edgeKinds = {EdgeKind.MIGRATES, EdgeKind.REFERENCES_TABLE}, + properties = {"schema", "table", "sql_object_type", "format", "version", + "indexes", "columns_added"} +) +@Component +public class SqlMigrationDetector extends AbstractRegexDetector { + + private static final Logger log = LoggerFactory.getLogger(SqlMigrationDetector.class); + + // -- Node id / property keys (extracted constants; used 3+ times below) -- + private static final String NS_SQL = "sql"; + private static final String NS_MIGRATION = "migration"; + private static final String PROP_SQL_OBJECT_TYPE = "sql_object_type"; + private static final String PROP_SCHEMA = "schema"; + private static final String PROP_TABLE = "table"; + private static final String PROP_FORMAT = "format"; + private static final String PROP_VERSION = "version"; + private static final String PROP_INDEXES = "indexes"; + private static final String PROP_COLUMNS_ADDED = "columns_added"; + private static final String PROP_APPLIED_TO = "applied_to"; + private static final String OBJECT_TABLE = "table"; + private static final String OBJECT_VIEW = "view"; + private static final String OBJECT_SCHEMA = "schema"; + + private static final String FMT_RAW = "raw"; + private static final String FMT_FLYWAY = "flyway"; + private static final String FMT_LIQUIBASE = "liquibase"; + private static final String FMT_ALEMBIC = "alembic"; + private static final String FMT_RAILS = "rails"; + private static final String FMT_PRISMA = "prisma"; + + // -- Path discriminators (normalized to forward slashes before matching) -- + private static final Pattern FLYWAY_PATH = Pattern.compile( + "(?:^|/)V\\d+(?:_\\d+)*+__.+\\.sql$", Pattern.CASE_INSENSITIVE); + private static final Pattern RAILS_PATH = Pattern.compile( + "(?:^|/)db/migrate/\\d{14}_.+\\.rb$"); + private static final Pattern ALEMBIC_PATH = Pattern.compile( + "(?:^|/)versions/.+\\.py$"); + private static final Pattern PRISMA_PATH = Pattern.compile( + "(?:^|/)migrations/.+/migration\\.sql$"); + private static final Pattern LIQUIBASE_PATH = Pattern.compile( + "(?:^|/)(?:changelog|db\\.changelog[^/]*)\\.(?:xml|ya?ml)$", + Pattern.CASE_INSENSITIVE); + + // -- Raw SQL DDL patterns -- + private static final Pattern SQL_CREATE_TABLE = Pattern.compile( + "CREATE\\s++TABLE\\s++(?:IF\\s++NOT\\s++EXISTS\\s++)?+(?:(\\w++)\\.)?(\\w++)", + Pattern.CASE_INSENSITIVE); + private static final Pattern SQL_CREATE_VIEW = Pattern.compile( + "CREATE\\s++(?:OR\\s++REPLACE\\s++)?+VIEW\\s++(?:IF\\s++NOT\\s++EXISTS\\s++)?+" + + "(?:(\\w++)\\.)?(\\w++)", + Pattern.CASE_INSENSITIVE); + private static final Pattern SQL_CREATE_SCHEMA = Pattern.compile( + "CREATE\\s++SCHEMA\\s++(?:IF\\s++NOT\\s++EXISTS\\s++)?+(\\w++)", + Pattern.CASE_INSENSITIVE); + private static final Pattern SQL_ALTER_TABLE_ADD = Pattern.compile( + "ALTER\\s++TABLE\\s++(?:(\\w++)\\.)?(\\w++)\\s++ADD\\s++(?:COLUMN\\s++)?+(\\w++)\\s++(\\w++)", + Pattern.CASE_INSENSITIVE); + private static final Pattern SQL_DROP_TABLE = Pattern.compile( + "DROP\\s++TABLE\\b", Pattern.CASE_INSENSITIVE); + private static final Pattern SQL_CREATE_INDEX = Pattern.compile( + "CREATE\\s++(?:UNIQUE\\s++)?+INDEX\\s++(?:IF\\s++NOT\\s++EXISTS\\s++)?+(\\w++)" + + "\\s++ON\\s++(?:(\\w++)\\.)?(\\w++)", + Pattern.CASE_INSENSITIVE); + private static final Pattern SQL_FK = Pattern.compile( + "FOREIGN\\s++KEY\\s*+\\([^)]*+\\)\\s++REFERENCES\\s++(?:(\\w++)\\.)?(\\w++)", + Pattern.CASE_INSENSITIVE); + + // -- Alembic op.* patterns -- + private static final Pattern ALEMBIC_MARKER = Pattern.compile( + "\\bfrom\\s++alembic\\b|\\bop\\.create_table\\b|\\bop\\.add_column\\b"); + private static final Pattern ALEMBIC_CREATE_TABLE = Pattern.compile( + "op\\.create_table\\(\\s*+['\"](\\w++)['\"]"); + private static final Pattern ALEMBIC_ADD_COLUMN = Pattern.compile( + "op\\.add_column\\(\\s*+['\"](\\w++)['\"]\\s*+,\\s*+sa\\.Column\\(\\s*+['\"](\\w++)['\"]"); + private static final Pattern ALEMBIC_CREATE_INDEX = Pattern.compile( + "op\\.create_index\\(\\s*+['\"](\\w++)['\"]\\s*+,\\s*+['\"](\\w++)['\"]"); + private static final Pattern ALEMBIC_CREATE_FK = Pattern.compile( + "op\\.create_foreign_key\\(\\s*+['\"][^'\"]*+['\"]\\s*+,\\s*+['\"](\\w++)['\"]\\s*+," + + "\\s*+['\"](\\w++)['\"]"); + + // -- Rails migration patterns -- + private static final Pattern RAILS_CREATE_TABLE = Pattern.compile( + "create_table\\s++:(\\w++)"); + private static final Pattern RAILS_ADD_COLUMN = Pattern.compile( + "add_column\\s++:(\\w++)\\s*+,\\s*+:(\\w++)"); + private static final Pattern RAILS_ADD_FK = Pattern.compile( + "add_foreign_key\\s++:(\\w++)\\s*+,\\s*+:(\\w++)"); + + // -- Liquibase XML tag patterns (simple regex over the raw text) -- + private static final Pattern LQ_CREATE_TABLE_XML = Pattern.compile( + "]*?\\btableName\\s*+=\\s*+\"(\\w++)\"[^>]*?" + + "(?:\\bschemaName\\s*+=\\s*+\"(\\w++)\")?"); + private static final Pattern LQ_ADD_COLUMN_XML = Pattern.compile( + "]*?\\btableName\\s*+=\\s*+\"(\\w++)\""); + private static final Pattern LQ_ADD_FK_XML = Pattern.compile( + "]*?\\bbaseTableName\\s*+=\\s*+\"(\\w++)\"" + + "[^>]*?\\breferencedTableName\\s*+=\\s*+\"(\\w++)\""); + + // -- Liquibase YAML patterns (regex-based; avoids pulling in SnakeYAML here) -- + private static final Pattern LQ_CREATE_TABLE_YAML = Pattern.compile( + "createTable\\s*+:[^\\n]*+\\n(?:\\s++[^\\n]*+\\n)*?\\s++tableName\\s*+:\\s*+([\\w\"']++)"); + private static final Pattern LQ_ADD_FK_YAML = Pattern.compile( + "addForeignKeyConstraint\\s*+:[^\\n]*+\\n" + + "(?:\\s++[^\\n]*+\\n)*?\\s++baseTableName\\s*+:\\s*+([\\w\"']++)[^\\n]*+\\n" + + "(?:\\s++[^\\n]*+\\n)*?\\s++referencedTableName\\s*+:\\s*+([\\w\"']++)"); + + // -- Flyway version parsing -- + private static final Pattern FLYWAY_VERSION = Pattern.compile( + "^V(\\d++(?:_\\d++)*+)__", Pattern.CASE_INSENSITIVE); + private static final Pattern RAILS_VERSION = Pattern.compile( + "^(\\d{14})_"); + + @Override + public String getName() { + return "sql_migration"; + } + + @Override + public Set getSupportedLanguages() { + return Set.of("sql", "python", "ruby", "xml", "yaml"); + } + + @Override + public DetectorResult detect(DetectorContext ctx) { + String content = ctx.content(); + String filePath = ctx.filePath(); + if (content == null || content.isEmpty() || filePath == null) { + return DetectorResult.empty(); + } + String normalized = filePath.replace('\\', '/'); + String lang = ctx.language(); + String lowerName = extractFileName(normalized).toLowerCase(Locale.ROOT); + + String format = classifyFormat(normalized, lowerName, lang, content); + if (format == null) { + return DetectorResult.empty(); + } + + ParseState state = new ParseState(ctx, normalized); + switch (format) { + case FMT_FLYWAY -> { + state.migrationFormat = FMT_FLYWAY; + state.migrationVersion = parseFlywayVersion(lowerName); + parseRawSql(content, state); + } + case FMT_PRISMA -> { + state.migrationFormat = FMT_PRISMA; + // Version is the parent directory name. + state.migrationVersion = parsePrismaVersion(normalized); + parseRawSql(content, state); + } + case FMT_ALEMBIC -> { + state.migrationFormat = FMT_ALEMBIC; + parseAlembic(content, state); + } + case FMT_RAILS -> { + state.migrationFormat = FMT_RAILS; + state.migrationVersion = parseRailsVersion(lowerName); + parseRails(content, state); + } + case FMT_LIQUIBASE -> { + state.migrationFormat = FMT_LIQUIBASE; + if (lowerName.endsWith(".xml")) { + parseLiquibaseXml(content, state); + } else { + parseLiquibaseYaml(content, state); + } + } + case FMT_RAW -> parseRawSql(content, state); + default -> { /* unreachable */ } + } + + return state.toResult(); + } + + // -- Format classification (discriminator guards) -- + + private String classifyFormat(String path, String lowerName, String lang, String content) { + if (PRISMA_PATH.matcher(path).find()) return FMT_PRISMA; + if (FLYWAY_PATH.matcher(path).find()) return FMT_FLYWAY; + if (RAILS_PATH.matcher(path).find()) return FMT_RAILS; + if (LIQUIBASE_PATH.matcher(path).find()) return FMT_LIQUIBASE; + if (ALEMBIC_PATH.matcher(path).find() && ALEMBIC_MARKER.matcher(content).find()) { + return FMT_ALEMBIC; + } + // Raw .sql fallback -- only for actual SQL files (extension or declared language). + if (lowerName.endsWith(".sql") || "sql".equals(lang)) { + return FMT_RAW; + } + return null; + } + + // -- Raw SQL parsing (shared by Flyway, Prisma, and bare .sql) -- + + private void parseRawSql(String content, ParseState state) { + for (IndexedLine line : iterLines(content)) { + parseSqlLine(line.text(), line.lineNumber(), state); + } + } + + private void parseSqlLine(String line, int lineNum, ParseState state) { + Matcher m = SQL_CREATE_TABLE.matcher(line); + if (m.find()) { + state.addOrGetSqlEntity(m.group(1), m.group(2), OBJECT_TABLE, lineNum); + return; + } + m = SQL_CREATE_VIEW.matcher(line); + if (m.find()) { + state.addOrGetSqlEntity(m.group(1), m.group(2), OBJECT_VIEW, lineNum); + return; + } + m = SQL_CREATE_SCHEMA.matcher(line); + if (m.find()) { + state.addOrGetSqlEntity(null, m.group(1), OBJECT_SCHEMA, lineNum); + return; + } + m = SQL_ALTER_TABLE_ADD.matcher(line); + if (m.find()) { + SqlEntityRef ref = state.addOrGetSqlEntity(m.group(1), m.group(2), OBJECT_TABLE, lineNum); + state.appendListProp(ref.id, PROP_COLUMNS_ADDED, m.group(3)); + return; + } + if (SQL_DROP_TABLE.matcher(line).find()) { + log.debug("Skipping DROP TABLE in {} at line {}", state.filePath, lineNum); + return; + } + m = SQL_CREATE_INDEX.matcher(line); + if (m.find()) { + String idxName = m.group(1); + SqlEntityRef ref = state.addOrGetSqlEntity(m.group(2), m.group(3), OBJECT_TABLE, lineNum); + state.appendListProp(ref.id, PROP_INDEXES, idxName); + return; + } + m = SQL_FK.matcher(line); + if (m.find() && state.lastTableId != null) { + // Capture source BEFORE resolving target -- addOrGetSqlEntity for a TABLE + // mutates lastTableId on the state object. + String sourceId = state.lastTableId; + SqlEntityRef target = state.addOrGetSqlEntity(m.group(1), m.group(2), OBJECT_TABLE, lineNum); + state.addReferencesEdge(sourceId, target.id); + state.lastTableId = sourceId; // restore owning-table context for subsequent FKs + } + } + + // -- Alembic (Python) parsing -- + + private void parseAlembic(String content, ParseState state) { + for (IndexedLine line : iterLines(content)) { + String text = line.text(); + Matcher m = ALEMBIC_CREATE_TABLE.matcher(text); + if (m.find()) { + state.addOrGetSqlEntity(null, m.group(1), OBJECT_TABLE, line.lineNumber()); + continue; + } + m = ALEMBIC_ADD_COLUMN.matcher(text); + if (m.find()) { + SqlEntityRef ref = state.addOrGetSqlEntity(null, m.group(1), OBJECT_TABLE, line.lineNumber()); + state.appendListProp(ref.id, PROP_COLUMNS_ADDED, m.group(2)); + continue; + } + m = ALEMBIC_CREATE_INDEX.matcher(text); + if (m.find()) { + SqlEntityRef ref = state.addOrGetSqlEntity(null, m.group(2), OBJECT_TABLE, line.lineNumber()); + state.appendListProp(ref.id, PROP_INDEXES, m.group(1)); + continue; + } + m = ALEMBIC_CREATE_FK.matcher(text); + if (m.find()) { + String sourceId = state.addOrGetSqlEntity(null, m.group(1), OBJECT_TABLE, line.lineNumber()).id; + String targetId = state.addOrGetSqlEntity(null, m.group(2), OBJECT_TABLE, line.lineNumber()).id; + state.addReferencesEdge(sourceId, targetId); + } + } + } + + // -- Rails parsing -- + + private void parseRails(String content, ParseState state) { + for (IndexedLine line : iterLines(content)) { + String text = line.text(); + Matcher m = RAILS_CREATE_TABLE.matcher(text); + if (m.find()) { + state.addOrGetSqlEntity(null, m.group(1), OBJECT_TABLE, line.lineNumber()); + continue; + } + m = RAILS_ADD_COLUMN.matcher(text); + if (m.find()) { + SqlEntityRef ref = state.addOrGetSqlEntity(null, m.group(1), OBJECT_TABLE, line.lineNumber()); + state.appendListProp(ref.id, PROP_COLUMNS_ADDED, m.group(2)); + continue; + } + m = RAILS_ADD_FK.matcher(text); + if (m.find()) { + SqlEntityRef source = state.addOrGetSqlEntity(null, m.group(1), OBJECT_TABLE, line.lineNumber()); + SqlEntityRef target = state.addOrGetSqlEntity(null, m.group(2), OBJECT_TABLE, line.lineNumber()); + state.addReferencesEdge(source.id, target.id); + } + } + } + + // -- Liquibase XML parsing (regex-based; no DOM parse to keep things simple) -- + + private void parseLiquibaseXml(String content, ParseState state) { + Matcher m = LQ_CREATE_TABLE_XML.matcher(content); + while (m.find()) { + int lineNum = findLineNumber(content, m.start()); + state.addOrGetSqlEntity(m.group(2), m.group(1), OBJECT_TABLE, lineNum); + } + m = LQ_ADD_COLUMN_XML.matcher(content); + while (m.find()) { + int lineNum = findLineNumber(content, m.start()); + state.addOrGetSqlEntity(null, m.group(1), OBJECT_TABLE, lineNum); + } + m = LQ_ADD_FK_XML.matcher(content); + while (m.find()) { + int lineNum = findLineNumber(content, m.start()); + SqlEntityRef source = state.addOrGetSqlEntity(null, m.group(1), OBJECT_TABLE, lineNum); + SqlEntityRef target = state.addOrGetSqlEntity(null, m.group(2), OBJECT_TABLE, lineNum); + state.addReferencesEdge(source.id, target.id); + } + } + + // -- Liquibase YAML parsing -- + + private void parseLiquibaseYaml(String content, ParseState state) { + Matcher m = LQ_CREATE_TABLE_YAML.matcher(content); + while (m.find()) { + int lineNum = findLineNumber(content, m.start()); + state.addOrGetSqlEntity(null, stripQuotes(m.group(1)), OBJECT_TABLE, lineNum); + } + m = LQ_ADD_FK_YAML.matcher(content); + while (m.find()) { + int lineNum = findLineNumber(content, m.start()); + SqlEntityRef source = state.addOrGetSqlEntity(null, stripQuotes(m.group(1)), OBJECT_TABLE, lineNum); + SqlEntityRef target = state.addOrGetSqlEntity(null, stripQuotes(m.group(2)), OBJECT_TABLE, lineNum); + state.addReferencesEdge(source.id, target.id); + } + } + + // -- Helpers -- + + private static String extractFileName(String normalizedPath) { + int slash = normalizedPath.lastIndexOf('/'); + return slash >= 0 ? normalizedPath.substring(slash + 1) : normalizedPath; + } + + private static String stripQuotes(String s) { + if (s == null || s.length() < 2) return s; + char first = s.charAt(0); + char last = s.charAt(s.length() - 1); + if ((first == '"' || first == '\'') && first == last) { + return s.substring(1, s.length() - 1); + } + return s; + } + + private static String parseFlywayVersion(String fileName) { + Matcher m = FLYWAY_VERSION.matcher(fileName); + return m.find() ? m.group(1).replace('_', '.') : null; + } + + private static String parseRailsVersion(String fileName) { + Matcher m = RAILS_VERSION.matcher(fileName); + return m.find() ? m.group(1) : null; + } + + private static String parsePrismaVersion(String path) { + // .../migrations//migration.sql + int end = path.lastIndexOf("/migration.sql"); + if (end <= 0) return null; + int start = path.lastIndexOf('/', end - 1); + return start >= 0 ? path.substring(start + 1, end) : path.substring(0, end); + } + + /** + * Per-invocation state. NOT a field on the detector -- the detector itself + * is stateless; this record-like holder is constructed fresh per {@code detect()} call. + */ + private static final class ParseState { + final DetectorContext ctx; + final String filePath; + // Deterministic order: insertion order. We sort on emit. + final Map sqlEntities = new LinkedHashMap<>(); + final Map refEdges = new LinkedHashMap<>(); + // Most recently touched table id (for raw SQL FKs that land on the next line). + String lastTableId; + // Migration metadata (null for non-migration raw SQL). + String migrationFormat; + String migrationVersion; + + ParseState(DetectorContext ctx, String filePath) { + this.ctx = ctx; + this.filePath = filePath; + } + + SqlEntityRef addOrGetSqlEntity(String schema, String name, String objectType, int lineNum) { + String normSchema = schema != null ? schema : ""; + String id = NS_SQL + ":" + normSchema + ":" + name; + CodeNode node = sqlEntities.get(id); + if (node == null) { + node = new CodeNode(id, NodeKind.SQL_ENTITY, name); + node.setFqn(normSchema.isEmpty() ? name : normSchema + "." + name); + node.setModule(ctx.moduleName()); + node.setFilePath(filePath); + node.setLineStart(lineNum); + Map props = new TreeMap<>(); + props.put(PROP_SQL_OBJECT_TYPE, objectType); + if (schema != null) { + props.put(PROP_SCHEMA, schema); + } + props.put(PROP_TABLE, name); + node.setProperties(props); + sqlEntities.put(id, node); + } + if (OBJECT_TABLE.equals(objectType)) { + lastTableId = id; + } + return new SqlEntityRef(id, node); + } + + void appendListProp(String nodeId, String key, String value) { + CodeNode node = sqlEntities.get(nodeId); + if (node == null) return; + Map props = node.getProperties(); + Object existing = props.get(key); + String combined; + if (existing instanceof String s && !s.isEmpty()) { + if (s.contains(value)) { + combined = s; // de-duplicate + } else { + combined = s + "," + value; + } + } else { + combined = value; + } + props.put(key, combined); + } + + void addReferencesEdge(String sourceId, String targetId) { + if (sourceId == null || targetId == null || sourceId.equals(targetId)) { + return; + } + String edgeId = sourceId + "->" + targetId + ":references_table"; + if (refEdges.containsKey(edgeId)) return; + CodeEdge edge = new CodeEdge(); + edge.setId(edgeId); + edge.setKind(EdgeKind.REFERENCES_TABLE); + edge.setSourceId(sourceId); + edge.setTarget(new CodeNode(targetId, null, null)); + refEdges.put(edgeId, edge); + } + + DetectorResult toResult() { + List nodes = new ArrayList<>(sqlEntities.values()); + List edges = new ArrayList<>(refEdges.values()); + + // Build the MIGRATION node (if applicable) and link to every SQL_ENTITY + // we created/altered -- but ONLY if we actually found schema entities. + if (migrationFormat != null && !sqlEntities.isEmpty()) { + String migId = NS_MIGRATION + ":" + filePath; + CodeNode migNode = new CodeNode(migId, NodeKind.MIGRATION, filePath); + migNode.setFqn(filePath); + migNode.setModule(ctx.moduleName()); + migNode.setFilePath(filePath); + migNode.setLineStart(1); + + Map migProps = new TreeMap<>(); + migProps.put(PROP_FORMAT, migrationFormat); + if (migrationVersion != null) { + migProps.put(PROP_VERSION, migrationVersion); + } + List appliedTo = new ArrayList<>(sqlEntities.keySet()); + appliedTo.sort(Comparator.naturalOrder()); + migProps.put(PROP_APPLIED_TO, String.join(",", appliedTo)); + migNode.setProperties(migProps); + + nodes.add(migNode); + + for (String sqlId : appliedTo) { + CodeEdge migratesEdge = new CodeEdge(); + migratesEdge.setId(migId + "->" + sqlId + ":migrates"); + migratesEdge.setKind(EdgeKind.MIGRATES); + migratesEdge.setSourceId(migId); + migratesEdge.setTarget(new CodeNode(sqlId, null, null)); + edges.add(migratesEdge); + } + } + + // Determinism: sort by id before emitting. + nodes.sort(Comparator.comparing(CodeNode::getId)); + edges.sort(Comparator.comparing(CodeEdge::getId)); + return DetectorResult.of(nodes, edges); + } + } + + private record SqlEntityRef(String id, CodeNode node) {} +} diff --git a/src/main/java/io/github/randomcodespace/iq/detector/sql/package-info.java b/src/main/java/io/github/randomcodespace/iq/detector/sql/package-info.java new file mode 100644 index 00000000..fadf06e2 --- /dev/null +++ b/src/main/java/io/github/randomcodespace/iq/detector/sql/package-info.java @@ -0,0 +1,8 @@ +/** + * SQL and migration detectors. + *

+ * Extracts schema-level entities (tables, views, schemas) from raw SQL DDL and + * framework-specific migration files (Flyway, Liquibase, Alembic, Rails, Prisma), + * and links migrations to the SQL entities they create or alter. + */ +package io.github.randomcodespace.iq.detector.sql; diff --git a/src/main/java/io/github/randomcodespace/iq/model/EdgeKind.java b/src/main/java/io/github/randomcodespace/iq/model/EdgeKind.java index 843ec193..52177c79 100644 --- a/src/main/java/io/github/randomcodespace/iq/model/EdgeKind.java +++ b/src/main/java/io/github/randomcodespace/iq/model/EdgeKind.java @@ -32,7 +32,8 @@ public enum EdgeKind { SENDS_TO("sends_to"), RECEIVES_FROM("receives_from"), PROTECTS("protects"), - RENDERS("renders"); + RENDERS("renders"), + REFERENCES_TABLE("references_table"); private final String value; diff --git a/src/main/java/io/github/randomcodespace/iq/model/NodeKind.java b/src/main/java/io/github/randomcodespace/iq/model/NodeKind.java index 76b97abc..f1760bb1 100644 --- a/src/main/java/io/github/randomcodespace/iq/model/NodeKind.java +++ b/src/main/java/io/github/randomcodespace/iq/model/NodeKind.java @@ -38,7 +38,8 @@ public enum NodeKind { MIDDLEWARE("middleware"), HOOK("hook"), SERVICE("service"), - EXTERNAL("external"); + EXTERNAL("external"), + SQL_ENTITY("sql_entity"); private final String value; diff --git a/src/test/java/io/github/randomcodespace/iq/detector/sql/SqlMigrationDetectorTest.java b/src/test/java/io/github/randomcodespace/iq/detector/sql/SqlMigrationDetectorTest.java new file mode 100644 index 00000000..7fdb0ad1 --- /dev/null +++ b/src/test/java/io/github/randomcodespace/iq/detector/sql/SqlMigrationDetectorTest.java @@ -0,0 +1,369 @@ +package io.github.randomcodespace.iq.detector.sql; + +import io.github.randomcodespace.iq.detector.DetectorContext; +import io.github.randomcodespace.iq.detector.DetectorResult; +import io.github.randomcodespace.iq.detector.DetectorTestUtils; +import io.github.randomcodespace.iq.model.CodeEdge; +import io.github.randomcodespace.iq.model.CodeNode; +import io.github.randomcodespace.iq.model.EdgeKind; +import io.github.randomcodespace.iq.model.NodeKind; +import org.junit.jupiter.api.Test; + +import java.util.List; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +class SqlMigrationDetectorTest { + + private final SqlMigrationDetector detector = new SqlMigrationDetector(); + + // -- Positive: raw SQL -- + + @Test + void rawSqlCreateTableEmitsEntity() { + String sql = """ + CREATE TABLE users ( + id INT PRIMARY KEY, + name VARCHAR(100) + ); + CREATE VIEW active_users AS SELECT * FROM users; + CREATE SCHEMA analytics; + """; + DetectorContext ctx = new DetectorContext("schema.sql", "sql", sql); + DetectorResult result = detector.detect(ctx); + + List sqlEntities = sqlEntitiesOf(result); + assertEquals(3, sqlEntities.size(), "expected 3 SQL_ENTITY nodes"); + assertTrue(hasEntity(sqlEntities, "users", "table")); + assertTrue(hasEntity(sqlEntities, "active_users", "view")); + assertTrue(hasEntity(sqlEntities, "analytics", "schema")); + } + + @Test + void foreignKeyEmitsReferencesTableEdge() { + String sql = """ + CREATE TABLE users (id INT PRIMARY KEY); + CREATE TABLE orders ( + id INT PRIMARY KEY, + user_id INT, + FOREIGN KEY (user_id) REFERENCES users(id) + ); + """; + DetectorContext ctx = new DetectorContext("schema.sql", "sql", sql); + DetectorResult result = detector.detect(ctx); + + boolean hasFk = result.edges().stream() + .anyMatch(e -> e.getKind() == EdgeKind.REFERENCES_TABLE + && e.getSourceId().endsWith(":orders") + && e.getTarget().getId().endsWith(":users")); + assertTrue(hasFk, "expected REFERENCES_TABLE edge orders -> users"); + } + + @Test + void dropTableIsSkipped() { + String sql = "DROP TABLE obsolete_thing;"; + DetectorContext ctx = new DetectorContext("cleanup.sql", "sql", sql); + DetectorResult result = detector.detect(ctx); + assertTrue(result.nodes().isEmpty(), "DROP TABLE should not emit nodes"); + } + + @Test + void createIndexEnrichesOwningTable() { + String sql = """ + CREATE TABLE users (id INT); + CREATE INDEX idx_users_id ON users(id); + """; + DetectorContext ctx = new DetectorContext("ix.sql", "sql", sql); + DetectorResult result = detector.detect(ctx); + CodeNode users = firstEntity(result, "users"); + assertNotNull(users); + assertEquals("idx_users_id", users.getProperties().get("indexes")); + } + + @Test + void alterTableAddColumnEnrichesEntity() { + String sql = """ + CREATE TABLE users (id INT); + ALTER TABLE users ADD COLUMN email VARCHAR(255); + """; + DetectorContext ctx = new DetectorContext("alter.sql", "sql", sql); + DetectorResult result = detector.detect(ctx); + CodeNode users = firstEntity(result, "users"); + assertNotNull(users); + assertEquals("email", users.getProperties().get("columns_added")); + } + + // -- Positive: Flyway -- + + @Test + void flywayFilenameParsedAsVersionedMigration() { + String sql = "CREATE TABLE customers (id INT PRIMARY KEY);"; + DetectorContext ctx = new DetectorContext( + "src/main/resources/db/migration/V1_2__add_customers.sql", "sql", sql); + DetectorResult result = detector.detect(ctx); + + CodeNode migration = firstMigration(result); + assertNotNull(migration, "expected MIGRATION node for Flyway file"); + assertEquals("flyway", migration.getProperties().get("format")); + assertEquals("1.2", migration.getProperties().get("version")); + + long migrates = result.edges().stream() + .filter(e -> e.getKind() == EdgeKind.MIGRATES).count(); + assertEquals(1, migrates); + } + + // -- Positive: Alembic -- + + @Test + void alembicOpCreateTableDetected() { + String py = """ + from alembic import op + import sqlalchemy as sa + + def upgrade(): + op.create_table('accounts', sa.Column('id', sa.Integer())) + op.add_column('accounts', sa.Column('email', sa.String())) + op.create_foreign_key('fk_o_a', 'orders', 'accounts', ['account_id'], ['id']) + """; + DetectorContext ctx = new DetectorContext( + "alembic/versions/abc123_create_accounts.py", "python", py); + DetectorResult result = detector.detect(ctx); + + assertTrue(hasEntity(sqlEntitiesOf(result), "accounts", "table")); + CodeNode accounts = firstEntity(result, "accounts"); + assertNotNull(accounts); + assertEquals("email", accounts.getProperties().get("columns_added")); + assertTrue(result.edges().stream() + .anyMatch(e -> e.getKind() == EdgeKind.REFERENCES_TABLE)); + CodeNode mig = firstMigration(result); + assertNotNull(mig); + assertEquals("alembic", mig.getProperties().get("format")); + } + + // -- Positive: Liquibase XML -- + + @Test + void liquibaseXmlChangeSetDetected() { + String xml = """ + + + + + + + + + + """; + DetectorContext ctx = new DetectorContext( + "src/main/resources/db/changelog.xml", "xml", xml); + DetectorResult result = detector.detect(ctx); + + assertTrue(hasEntity(sqlEntitiesOf(result), "products", "table")); + assertTrue(result.edges().stream() + .anyMatch(e -> e.getKind() == EdgeKind.REFERENCES_TABLE + && e.getSourceId().endsWith(":line_items") + && e.getTarget().getId().endsWith(":products"))); + assertEquals("liquibase", firstMigration(result).getProperties().get("format")); + } + + // -- Positive: Liquibase YAML -- + + @Test + void liquibaseYamlChangeSetDetected() { + String yaml = """ + databaseChangeLog: + - changeSet: + id: 1 + author: bob + changes: + - createTable: + tableName: invoices + columns: + - column: + name: id + type: int + - addForeignKeyConstraint: + baseTableName: invoice_items + referencedTableName: invoices + baseColumnNames: invoice_id + referencedColumnNames: id + """; + DetectorContext ctx = new DetectorContext( + "src/main/resources/db/db.changelog-master.yml", "yaml", yaml); + DetectorResult result = detector.detect(ctx); + + assertTrue(hasEntity(sqlEntitiesOf(result), "invoices", "table"), + "invoices table must be detected from Liquibase YAML"); + assertTrue(result.edges().stream() + .anyMatch(e -> e.getKind() == EdgeKind.REFERENCES_TABLE + && e.getSourceId().endsWith(":invoice_items") + && e.getTarget().getId().endsWith(":invoices"))); + } + + // -- Positive: Rails -- + + @Test + void railsCreateTableDetected() { + String rb = """ + class CreateOrders < ActiveRecord::Migration[7.0] + def change + create_table :orders do |t| + t.integer :customer_id + t.timestamps + end + add_column :orders, :note, :text + add_foreign_key :orders, :customers + end + end + """; + DetectorContext ctx = new DetectorContext( + "db/migrate/20240115120000_create_orders.rb", "ruby", rb); + DetectorResult result = detector.detect(ctx); + + assertTrue(hasEntity(sqlEntitiesOf(result), "orders", "table")); + CodeNode orders = firstEntity(result, "orders"); + assertEquals("note", orders.getProperties().get("columns_added")); + assertTrue(result.edges().stream() + .anyMatch(e -> e.getKind() == EdgeKind.REFERENCES_TABLE + && e.getSourceId().endsWith(":orders") + && e.getTarget().getId().endsWith(":customers"))); + + CodeNode mig = firstMigration(result); + assertEquals("rails", mig.getProperties().get("format")); + assertEquals("20240115120000", mig.getProperties().get("version")); + } + + // -- Positive: Prisma -- + + @Test + void prismaMigrationSqlDetected() { + String sql = "CREATE TABLE Post (id INT PRIMARY KEY);"; + DetectorContext ctx = new DetectorContext( + "prisma/migrations/20240101120000_init/migration.sql", "sql", sql); + DetectorResult result = detector.detect(ctx); + + assertTrue(hasEntity(sqlEntitiesOf(result), "Post", "table")); + CodeNode mig = firstMigration(result); + assertEquals("prisma", mig.getProperties().get("format")); + assertEquals("20240101120000_init", mig.getProperties().get("version")); + } + + // -- Negative -- + + @Test + void plainPythonFileIgnored() { + String py = """ + def hello(): + print("op.create_table is just a comment here") + """; + DetectorContext ctx = new DetectorContext("app/utils.py", "python", py); + DetectorResult result = detector.detect(ctx); + assertTrue(result.nodes().isEmpty(), + "non-alembic .py outside versions/ must not produce nodes"); + } + + @Test + void plainYamlIgnored() { + String yaml = """ + name: build + on: + push: + branches: [main] + jobs: + test: + runs-on: ubuntu-latest + """; + DetectorContext ctx = new DetectorContext(".github/workflows/ci.yml", "yaml", yaml); + DetectorResult result = detector.detect(ctx); + assertTrue(result.nodes().isEmpty(), + "arbitrary YAML must not produce sql_migration nodes"); + } + + @Test + void emptyContentReturnsEmptyResult() { + DetectorContext ctx = new DetectorContext("empty.sql", "sql", ""); + DetectorResult result = detector.detect(ctx); + assertTrue(result.nodes().isEmpty()); + assertTrue(result.edges().isEmpty()); + } + + @Test + void alembicPathWithoutMarkerIsIgnored() { + // Even under versions/, without the alembic marker we must NOT fire. + String py = """ + # random script that happens to live under versions/ + def helper(): + return 1 + """; + DetectorContext ctx = new DetectorContext( + "alembic/versions/abc.py", "python", py); + DetectorResult result = detector.detect(ctx); + assertTrue(result.nodes().isEmpty()); + } + + // -- Determinism -- + + @Test + void determinismIdenticalOutputAcrossRuns() { + String sql = """ + CREATE TABLE a (id INT); + CREATE TABLE b (id INT, FOREIGN KEY (id) REFERENCES a(id)); + CREATE TABLE c (id INT, FOREIGN KEY (id) REFERENCES a(id)); + CREATE INDEX ix1 ON a(id); + CREATE INDEX ix2 ON a(id); + """; + DetectorContext ctx = new DetectorContext( + "db/migration/V1__multi.sql", "sql", sql); + DetectorTestUtils.assertDeterministic(detector, ctx); + + // Stronger check: byte-equal ID order across runs. + DetectorResult r1 = detector.detect(ctx); + DetectorResult r2 = detector.detect(ctx); + assertEquals( + r1.nodes().stream().map(CodeNode::getId).toList(), + r2.nodes().stream().map(CodeNode::getId).toList(), + "node id order must be byte-equal across runs"); + assertEquals( + r1.edges().stream().map(CodeEdge::getId).toList(), + r2.edges().stream().map(CodeEdge::getId).toList(), + "edge id order must be byte-equal across runs"); + } + + // -- Helpers -- + + private static List sqlEntitiesOf(DetectorResult r) { + return r.nodes().stream().filter(n -> n.getKind() == NodeKind.SQL_ENTITY).toList(); + } + + private static boolean hasEntity(List nodes, String name, String type) { + return nodes.stream().anyMatch(n -> + name.equals(n.getLabel()) + && type.equals(n.getProperties().get("sql_object_type"))); + } + + private static CodeNode firstEntity(DetectorResult r, String name) { + return r.nodes().stream() + .filter(n -> n.getKind() == NodeKind.SQL_ENTITY && name.equals(n.getLabel())) + .findFirst().orElse(null); + } + + private static CodeNode firstMigration(DetectorResult r) { + return r.nodes().stream() + .filter(n -> n.getKind() == NodeKind.MIGRATION) + .findFirst().orElse(null); + } + + @SuppressWarnings("unused") + private static void requireFalse(boolean cond, String msg) { + assertFalse(cond, msg); + } +} diff --git a/src/test/java/io/github/randomcodespace/iq/model/EdgeKindTest.java b/src/test/java/io/github/randomcodespace/iq/model/EdgeKindTest.java index 8d928511..b623bd54 100644 --- a/src/test/java/io/github/randomcodespace/iq/model/EdgeKindTest.java +++ b/src/test/java/io/github/randomcodespace/iq/model/EdgeKindTest.java @@ -7,8 +7,9 @@ class EdgeKindTest { @Test - void shouldHave27Values() { - assertEquals(27, EdgeKind.values().length, "EdgeKind must have exactly 27 types"); + void shouldHave28Values() { + assertEquals(28, EdgeKind.values().length, + "EdgeKind must have exactly 28 types (includes REFERENCES_TABLE)"); } @Test diff --git a/src/test/java/io/github/randomcodespace/iq/model/ModelCoverageTest.java b/src/test/java/io/github/randomcodespace/iq/model/ModelCoverageTest.java index 991e0a9d..6a8ba1d3 100644 --- a/src/test/java/io/github/randomcodespace/iq/model/ModelCoverageTest.java +++ b/src/test/java/io/github/randomcodespace/iq/model/ModelCoverageTest.java @@ -74,8 +74,8 @@ void specificKindValues() { } @Test - void has32Values() { - assertEquals(33, NodeKind.values().length); + void has34Values() { + assertEquals(34, NodeKind.values().length); } @Test @@ -164,8 +164,8 @@ void specificEdgeKindValues() { } @Test - void has27Values() { - assertEquals(27, EdgeKind.values().length); + void has28Values() { + assertEquals(28, EdgeKind.values().length); } } @@ -451,6 +451,7 @@ void nodeKindContainsAllExpectedKinds() { assertNotNull(NodeKind.MIDDLEWARE); assertNotNull(NodeKind.HOOK); assertNotNull(NodeKind.SERVICE); + assertNotNull(NodeKind.SQL_ENTITY); } @Test @@ -482,6 +483,7 @@ void edgeKindContainsAllExpectedKinds() { assertNotNull(EdgeKind.RECEIVES_FROM); assertNotNull(EdgeKind.PROTECTS); assertNotNull(EdgeKind.RENDERS); + assertNotNull(EdgeKind.REFERENCES_TABLE); } } diff --git a/src/test/java/io/github/randomcodespace/iq/model/NodeKindTest.java b/src/test/java/io/github/randomcodespace/iq/model/NodeKindTest.java index f8debef7..86edf64a 100644 --- a/src/test/java/io/github/randomcodespace/iq/model/NodeKindTest.java +++ b/src/test/java/io/github/randomcodespace/iq/model/NodeKindTest.java @@ -7,8 +7,9 @@ class NodeKindTest { @Test - void shouldHave31Values() { - assertEquals(33, NodeKind.values().length, "NodeKind must have exactly 33 types (includes EXTERNAL)"); + void shouldHave34Values() { + assertEquals(34, NodeKind.values().length, + "NodeKind must have exactly 34 types (includes EXTERNAL, SQL_ENTITY)"); } @Test