Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 19 additions & 1 deletion internal/storage/factory.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package storage

import (
"context"
"fmt"
"log"
"os"
Expand Down Expand Up @@ -204,6 +205,13 @@ type MigrateOptions struct {
// PartitionLookaheadDays is the number of future daily partitions to
// pre-create at boot. Defaults to 3 when zero.
PartitionLookaheadDays int
// Timeout, when > 0, bounds the AutoMigrate call. Without it,
// db.AutoMigrate inherits no deadline and an ALTER TABLE waiting on a
// Postgres relation lock can hang startup indefinitely. The timeout is
// applied via db.WithContext to the AutoMigrate call only — pre/post
// hooks (FTS5 triggers, legacy index drops) are not bounded since they
// don't take long-held locks. Zero preserves legacy unbounded behaviour.
Timeout time.Duration
}

// AutoMigrateModelsWithOptions is the option-driven variant of
Expand Down Expand Up @@ -251,7 +259,17 @@ func AutoMigrateModelsWithOptions(db *gorm.DB, driver string, opts MigrateOption
if !logsPartitioned {
migrateModels = append(migrateModels, &Log{})
}
if err := db.AutoMigrate(migrateModels...); err != nil {
// Apply a deadline to the AutoMigrate call when configured so a Postgres
// relation-lock wait cannot hang startup indefinitely. WithContext returns
// a session-scoped *gorm.DB; the parent db is unaffected for the post-
// migration helpers below.
migrator := db
if opts.Timeout > 0 {
ctx, cancel := context.WithTimeout(context.Background(), opts.Timeout)
defer cancel()
migrator = db.WithContext(ctx)
}
if err := migrator.AutoMigrate(migrateModels...); err != nil {
return fmt.Errorf("failed to migrate database: %w", err)
}

Expand Down
73 changes: 73 additions & 0 deletions internal/storage/migrate_timeout_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
package storage

import (
"testing"
"time"
)

func TestMigrateTimeoutFromEnv(t *testing.T) {
cases := []struct {
name string
env string
want time.Duration
}{
{"default when unset", "", 60 * time.Second},
{"explicit 30s", "30", 30 * time.Second},
{"explicit 0 = opt out", "0", 0},
{"negative falls back", "-5", 60 * time.Second},
{"malformed falls back", "abc", 60 * time.Second},
{"caps at 1h", "7200", time.Hour},
{"surrounding whitespace", " 120 ", 120 * time.Second},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
if tc.env == "" {
t.Setenv("DB_MIGRATE_TIMEOUT_SECS", "")
} else {
t.Setenv("DB_MIGRATE_TIMEOUT_SECS", tc.env)
}
if got := migrateTimeoutFromEnv(); got != tc.want {
t.Errorf("env=%q -> got %v, want %v", tc.env, got, tc.want)
}
})
}
}

// TestAutoMigrateModels_RespectsTimeout exercises the timeout-bounded path
// against an in-memory SQLite DB. SQLite migrations are too fast to actually
// trip the deadline, so this test asserts: (a) Timeout > 0 doesn't break
// the happy path, (b) Timeout=0 still works (legacy unbounded behaviour).
//
// A real Postgres lock-contention test would need a live Postgres + advisory
// lock and lives in pg_integration_test.go behind an external-DB build tag.
func TestAutoMigrateModels_RespectsTimeout(t *testing.T) {
t.Run("WithTimeout", func(t *testing.T) {
db, err := NewDatabase("sqlite", ":memory:")
if err != nil {
t.Fatalf("NewDatabase: %v", err)
}
defer func() { sqlDB, _ := db.DB(); _ = sqlDB.Close() }()
opts := MigrateOptions{Timeout: 30 * time.Second}
if err := AutoMigrateModelsWithOptions(db, "sqlite", opts); err != nil {
t.Fatalf("AutoMigrate with timeout: %v", err)
}
if !db.Migrator().HasTable("traces") {
t.Fatalf("expected traces table to be created")
}
})

t.Run("ZeroTimeoutLegacyPath", func(t *testing.T) {
db, err := NewDatabase("sqlite", ":memory:")
if err != nil {
t.Fatalf("NewDatabase: %v", err)
}
defer func() { sqlDB, _ := db.DB(); _ = sqlDB.Close() }()
opts := MigrateOptions{Timeout: 0}
if err := AutoMigrateModelsWithOptions(db, "sqlite", opts); err != nil {
t.Fatalf("AutoMigrate without timeout: %v", err)
}
if !db.Migrator().HasTable("traces") {
t.Fatalf("expected traces table to be created")
}
})
}
34 changes: 34 additions & 0 deletions internal/storage/repository.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,39 @@ func partitionLookaheadFromEnv() int {
return n
}

// migrateTimeoutFromEnv reads DB_MIGRATE_TIMEOUT_SECS, defaulting to 60s
// when unset, malformed, or non-positive. The migration timeout bounds
// db.AutoMigrate so a Postgres relation-lock wait cannot hang startup
// indefinitely. Set to 0 explicitly to opt out (legacy unbounded behaviour).
//
// Cap at 1 hour — anyone needing a longer migration window should run
// schema changes out-of-band with versioned migrations and DB_AUTOMIGRATE=false.
func migrateTimeoutFromEnv() time.Duration {
const (
defaultTimeout = 60 * time.Second
maxTimeout = time.Hour
)
v := strings.TrimSpace(os.Getenv("DB_MIGRATE_TIMEOUT_SECS"))
if v == "" {
return defaultTimeout
}
n, err := strconv.Atoi(v)
if err != nil {
return defaultTimeout
}
if n == 0 {
return 0 // explicit opt-out — preserves legacy behaviour
}
if n < 0 {
return defaultTimeout
}
d := time.Duration(n) * time.Second
if d > maxTimeout {
return maxTimeout
}
return d
}

// likeOpFor returns the case-insensitive LIKE operator for the given driver.
// Postgres LIKE is case-sensitive; SQLite/MySQL LIKE is case-insensitive by default.
// Callers should embed the returned token directly into SQL fragments.
Expand Down Expand Up @@ -109,6 +142,7 @@ func NewRepository(metrics *telemetry.Metrics) (*Repository, error) {
opts := MigrateOptions{
PostgresPartitioning: strings.ToLower(strings.TrimSpace(os.Getenv("DB_POSTGRES_PARTITIONING"))),
PartitionLookaheadDays: partitionLookaheadFromEnv(),
Timeout: migrateTimeoutFromEnv(),
}
if err := AutoMigrateModelsWithOptions(db, driver, opts); err != nil {
return nil, err
Expand Down
Loading