diff --git a/internal/storage/factory.go b/internal/storage/factory.go index 733e142..633fc65 100644 --- a/internal/storage/factory.go +++ b/internal/storage/factory.go @@ -69,6 +69,25 @@ func NewDatabase(driver, dsn string) (*gorm.DB, error) { db, err := gorm.Open(dialector, &gorm.Config{ Logger: logger.Default.LogMode(logger.Error), + // RAN-49: never emit FK constraints during AutoMigrate. + // + // (1) Async ingestion: spans/logs can arrive before their parent trace, + // so a real FK on (spans|logs).trace_id → traces.trace_id would reject + // valid out-of-order writes — which is why the MySQL path explicitly + // drops fk_traces_spans / fk_traces_logs after AutoMigrate. + // + // (2) RAN-21 made trace identity tenant-scoped: traces no longer carries + // a single-column unique on trace_id (only the composite + // (tenant_id, trace_id)). On Postgres, CREATE TABLE spans (... FK + // trace_id REFERENCES traces(trace_id)) then fails at DDL time with + // SQLSTATE 42830 ("there is no unique constraint matching given keys + // for referenced table"), aborting boot on a fresh DB. SQLite hides + // this because it does not validate FK targets at CREATE TABLE time. + // + // The model-level `constraint:false` GORM tag is not honored by + // gorm v2's relationship parser, so the only reliable way to suppress + // FK creation across all drivers is this config flag. + DisableForeignKeyConstraintWhenMigrating: true, }) if err != nil { // Never surface the DSN in error wraps — pgx occasionally embeds connection @@ -168,7 +187,11 @@ func getEnvPoolDuration(key string, fallback time.Duration) time.Duration { func AutoMigrateModels(db *gorm.DB, driver string) error { driver = strings.ToLower(driver) - // Disable FK checks during migration for MySQL + // Disable FK checks during migration for MySQL. + // New databases will not get FKs created (DisableForeignKeyConstraintWhenMigrating + // in NewDatabase), but legacy MySQL DBs may still carry fk_traces_spans / + // fk_traces_logs from before RAN-49 — toggling FK_CHECKS=0 keeps the + // post-migrate DROP statements below safe regardless of legacy state. if driver == "mysql" { db.Exec("SET FOREIGN_KEY_CHECKS = 0") log.Println("🔓 Disabled foreign key checks for migration") @@ -187,12 +210,15 @@ func AutoMigrateModels(db *gorm.DB, driver string) error { log.Printf("⚠️ legacy trace_id unique index drop failed: %v", err) } - // Drop foreign keys that AutoMigrate may have created (MySQL) + // Legacy MySQL cleanup: drop FKs that pre-RAN-49 migrations created. Fresh + // MySQL DBs after RAN-49 won't have these (FK creation is now disabled at + // the gorm.Config layer), but pre-existing deployments still need this + // drop to keep async ingestion non-blocking. if driver == "mysql" { db.Exec("ALTER TABLE spans DROP FOREIGN KEY fk_traces_spans") db.Exec("ALTER TABLE logs DROP FOREIGN KEY fk_traces_logs") db.Exec("SET FOREIGN_KEY_CHECKS = 1") - log.Println("🔓 Dropped FK constraints for async ingestion compatibility") + log.Println("🔓 Dropped legacy FK constraints (no-op on fresh DBs)") } // Postgres: enable pg_trgm and create a GIN index on logs.body for fuzzy ILIKE search. diff --git a/internal/storage/pg_integration_test.go b/internal/storage/pg_integration_test.go index a1a2bbd..46e04b3 100644 --- a/internal/storage/pg_integration_test.go +++ b/internal/storage/pg_integration_test.go @@ -348,6 +348,79 @@ func TestPG_PurgeTracesBatched_OrphanSpanSweep_NOT_IN(t *testing.T) { } } +// TestPG_AutoMigrate_FromEmpty_NoSpansTracesFK is the RAN-49 regression +// guard. It boots a fresh Postgres 16 container, runs AutoMigrateModels +// directly against the empty schema, and asserts: +// +// 1. The migrator completes (no SQLSTATE 42830 from the legacy +// spans.trace_id → traces.trace_id FK that pre-RAN-49 GORM emitted). +// 2. No FK constraint exists on spans or logs that references traces — +// async ingestion can land child rows before their parent trace, and +// RAN-21 made trace identity composite, so a single-column FK on +// trace_id would either reject valid writes or fail at DDL time. +// +// SQLite cannot exercise this path: it does not validate FK targets at +// CREATE TABLE time, which is exactly why RAN-21 silently regressed past +// CI. Keep this test on the integration build tag so the regression +// class can not sneak back in. +func TestPG_AutoMigrate_FromEmpty_NoSpansTracesFK(t *testing.T) { + ctx := context.Background() + + pgContainer, err := postgres.Run(ctx, "postgres:16-alpine", + postgres.WithDatabase("otel_test"), + postgres.WithUsername("otel"), + postgres.WithPassword("otel"), + postgres.BasicWaitStrategies(), + ) + if err != nil { + t.Skipf("docker unavailable, skipping pg integration tests: %v", err) + } + defer func() { _ = pgContainer.Terminate(ctx) }() + + dsn, err := pgContainer.ConnectionString(ctx, "sslmode=disable") + if err != nil { + t.Fatalf("ConnectionString: %v", err) + } + + db, err := NewDatabase("postgres", dsn) + if err != nil { + t.Fatalf("NewDatabase(postgres): %v", err) + } + defer func() { + if sqlDB, dbErr := db.DB(); dbErr == nil { + _ = sqlDB.Close() + } + }() + + if err := AutoMigrateModels(db, "postgres"); err != nil { + t.Fatalf("AutoMigrateModels(postgres) on empty DB failed (RAN-49 regression): %v", err) + } + + // Assert no FK from spans/logs references traces. + rows, err := db.Raw(` + SELECT tc.table_name, tc.constraint_name, ccu.table_name AS foreign_table + FROM information_schema.table_constraints tc + JOIN information_schema.constraint_column_usage ccu + ON tc.constraint_name = ccu.constraint_name + AND tc.table_schema = ccu.table_schema + WHERE tc.constraint_type = 'FOREIGN KEY' + AND tc.table_schema = 'public' + AND tc.table_name IN ('spans','logs') + AND ccu.table_name = 'traces'`).Rows() + if err != nil { + t.Fatalf("information_schema FK lookup: %v", err) + } + defer rows.Close() + + for rows.Next() { + var table, name, foreignTable string + if scanErr := rows.Scan(&table, &name, &foreignTable); scanErr != nil { + t.Fatalf("FK row scan: %v", scanErr) + } + t.Errorf("unexpected FK %s on %s referencing %s — async ingestion expects no FK back to traces (RAN-49)", name, table, foreignTable) + } +} + // TestPG_AutoMigrate_BlobTypesBecomeBytea inspects information_schema to // assert that CompressedText fields are materialized as bytea columns on // Postgres — proves the GormDBDataType dialect mapping survives migration.