diff --git a/.github/workflows/package-smoke.yml b/.github/workflows/package-smoke.yml
index 09b5a803..a01e0a0d 100644
--- a/.github/workflows/package-smoke.yml
+++ b/.github/workflows/package-smoke.yml
@@ -115,3 +115,29 @@ jobs:
# Binary runs (no DB needed for these).
/usr/bin/openwatch --version
/usr/bin/openwatch check-config
+
+ # Full package-UPGRADE path: build an old (release 1) + new (release 2) RPM,
+ # then in a rockylinux:9 container install the old one, stand up Postgres,
+ # roll the schema back one migration, and `rpm -U` the new one — asserting
+ # the %post scriptlet migrates the DB to head, takes a pre-upgrade backup,
+ # and stop/starts the service. This is the half the per-distro `smoke` job
+ # (a fresh install) does not cover. The driver script builds both RPMs and
+ # runs the container; it uses --network host (so the container's dnf reaches
+ # mirrors) and a throwaway Postgres on port 55432.
+ upgrade:
+ name: Package upgrade (rpm -U auto-migrate)
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ - uses: actions/setup-go@v5
+ with:
+ go-version: '1.26.4'
+ - uses: actions/setup-node@v4
+ with:
+ node-version: '20'
+ cache: 'npm'
+ cache-dependency-path: frontend/package-lock.json
+ - name: Install rpmbuild
+ run: sudo apt-get update && sudo apt-get install -y rpm
+ - name: Build old+new RPMs and run the container upgrade test
+ run: bash packaging/tests/run-upgrade-container-test.sh
diff --git a/.gitignore b/.gitignore
index 60eca9f7..e020773f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -439,6 +439,9 @@ frontend_test.log
*test*.md
*test*.js
*test*.cjs
+# Exception: committed test harness scripts under packaging/tests/ (e.g. the
+# container upgrade test) are real tracked files, not transient scratch.
+!packaging/tests/*test*.sh
*test*.mjs
*test*.ts
*test*.tsx
diff --git a/cmd/openwatch/main.go b/cmd/openwatch/main.go
index e7201c86..5ab69ed9 100644
--- a/cmd/openwatch/main.go
+++ b/cmd/openwatch/main.go
@@ -34,6 +34,7 @@ import (
"github.com/Hanalyx/openwatch/internal/db"
"github.com/Hanalyx/openwatch/internal/db/migrations"
+ "github.com/Hanalyx/openwatch/internal/dbbackup"
"github.com/Hanalyx/openwatch/internal/eventbus"
"github.com/Hanalyx/openwatch/internal/exception"
"github.com/Hanalyx/openwatch/internal/group"
@@ -646,15 +647,36 @@ func parseLogLevel(s string) slog.Level {
}
}
-// cmdMigrate connects to the configured database, runs goose Up for every
-// pending migration, and prints the resulting version.
-func cmdMigrate(cfg *config.Config, _ []string, stdout, stderr *os.File) int {
+// cmdMigrate connects to the configured database and runs goose Up for
+// every pending migration.
+//
+// Flags:
+//
+// --status report the current version + whether migrations are
+// pending, WITHOUT applying anything (used by the
+// package upgrade scriptlet and by operators).
+// --backup-dir
pg_dump to as a restore point BEFORE applying.
+// Skipped when the DB has no schema yet (fresh install,
+// nothing to back up). If the backup fails the command
+// fails WITHOUT migrating — we never migrate without the
+// restore point we promised.
+func cmdMigrate(cfg *config.Config, args []string, stdout, stderr *os.File) int {
+ fs := flag.NewFlagSet("migrate", flag.ContinueOnError)
+ fs.SetOutput(stderr)
+ backupDir := fs.String("backup-dir", "", "pg_dump to this directory before applying (skipped when the DB has no schema yet)")
+ statusOnly := fs.Bool("status", false, "report current version + pending count without applying")
+ if err := fs.Parse(args); err != nil {
+ return 1
+ }
+
if err := cfg.Validate(); err != nil {
fmt.Fprintf(stderr, "openwatch migrate: invalid config:\n%v\n", err)
return 1
}
- ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
+ // Generous timeout: a pg_dump of a large DB before migrating can take
+ // minutes; this is an operator/scriptlet command, not a hot path.
+ ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute)
defer cancel()
pool, err := db.NewPool(ctx, cfg.Database.DSN, cfg.Database.MaxConnections)
@@ -664,23 +686,47 @@ func cmdMigrate(cfg *config.Config, _ []string, stdout, stderr *os.File) int {
}
defer pool.Close()
+ curr, files, err := migrations.Status(ctx, pool)
+ if err != nil {
+ fmt.Fprintf(stderr, "openwatch migrate: status: %v\n", err)
+ return 1
+ }
+ total := len(files)
+
+ if *statusOnly {
+ fmt.Fprintf(stdout, "current version: %d\n", curr)
+ if int(curr) >= total {
+ fmt.Fprintln(stdout, "up to date — no migrations pending")
+ } else {
+ fmt.Fprintf(stdout, "PENDING: %d migration(s) not yet applied — run `openwatch migrate`\n", total-int(curr))
+ }
+ return 0
+ }
+
+ // Restore point before applying — only when a schema already exists; a
+ // fresh DB (curr == 0) has nothing to dump. Fail closed on backup error.
+ if *backupDir != "" && curr > 0 {
+ stamp := time.Now().UTC().Format("20060102T150405Z")
+ path, berr := dbbackup.Run(ctx, cfg.Database.DSN, *backupDir, version.Version, stamp)
+ if berr != nil {
+ fmt.Fprintf(stderr, "openwatch migrate: backup failed, refusing to migrate: %v\n", berr)
+ return 1
+ }
+ fmt.Fprintf(stdout, "backed up to %s\n", path)
+ }
+
fmt.Fprintf(stdout, "applying migrations against %s ...\n", config.RedactDSN(cfg.Database.DSN))
if err := migrations.Apply(ctx, pool); err != nil {
fmt.Fprintf(stderr, "openwatch migrate: %v\n", err)
return 1
}
- version, files, err := migrations.Status(ctx, pool)
+ newVer, _, err := migrations.Status(ctx, pool)
if err != nil {
fmt.Fprintf(stderr, "openwatch migrate: status: %v\n", err)
return 1
}
- fmt.Fprintf(stdout, " current version: %d\n", version)
- fmt.Fprintf(stdout, " migration files: %d\n", len(files))
- for _, name := range files {
- fmt.Fprintf(stdout, " - %s\n", name)
- }
- fmt.Fprintln(stdout, "migrations applied")
+ fmt.Fprintf(stdout, "migrations applied — version %d -> %d\n", curr, newVer)
return 0
}
@@ -792,6 +838,8 @@ subcommands:
serve run the HTTPS API server (default)
worker run the scan-job claimer/dispatcher loop
migrate apply pending goose migrations
+ --status report version + pending count, don't apply
+ --backup-dir pg_dump a restore point before applying
create-admin create the first admin user (requires --username --email --password)
check-config validate and print resolved config
diff --git a/docs/runbooks/UPGRADING.md b/docs/runbooks/UPGRADING.md
new file mode 100644
index 00000000..41cdbcbc
--- /dev/null
+++ b/docs/runbooks/UPGRADING.md
@@ -0,0 +1,89 @@
+# Upgrading OpenWatch
+
+OpenWatch upgrades are **one command**. The package post-install scriptlet
+applies any pending database migrations automatically — taking a backup
+restore point first — and restarts the service.
+
+```bash
+# RHEL / CentOS / Rocky / Alma / Fedora
+sudo dnf update -y 'openwatch*' 'kensa-rules*'
+
+# Debian / Ubuntu
+sudo apt update && sudo apt install --only-upgrade openwatch kensa-rules
+```
+
+That's it. On a single-instance install this is all an operator needs to do.
+
+## What happens automatically (on upgrade)
+
+The scriptlet runs **only on upgrade**, never on a fresh install, and does:
+
+1. **Checks the database is reachable.** If it isn't, migrations are skipped
+ with a warning (the upgrade doesn't fail) — run `openwatch migrate`
+ manually once the DB is back, then `systemctl restart openwatch`.
+2. **Stops the service** — so the new binary never runs against an old schema
+ and vice versa.
+3. **Backs up the database** with `pg_dump` to `/var/lib/openwatch/backups/`
+ (this is your restore point; the password is passed to `pg_dump` via the
+ environment, never on the command line).
+4. **Applies pending migrations.** Each migration runs in a transaction, so a
+ failure rolls back atomically — your data is never left half-migrated.
+5. **On success → starts the service** on the new version.
+ **On failure → leaves the service stopped**, prints the restore path, and
+ exits non-zero so `dnf`/`apt` flag that the upgrade needs attention.
+
+### Pre-flight (optional)
+
+See what would change before upgrading:
+
+```bash
+sudo openwatch migrate --status
+# -> "up to date — no migrations pending" OR "PENDING: N migration(s) ..."
+```
+
+## If a migration fails
+
+The service is left **stopped** and your data is intact (the failed migration
+rolled back). Recover with:
+
+```bash
+# 1. read the error in the dnf/apt output or: journalctl -u openwatch
+# 2. fix the cause, then re-apply:
+sudo openwatch migrate
+sudo systemctl start openwatch
+# on Debian, also clear the half-configured state:
+sudo dpkg --configure -a
+```
+
+### Restoring from the pre-upgrade backup (last resort)
+
+```bash
+ls -t /var/lib/openwatch/backups/ # newest dump first
+sudo systemctl stop openwatch
+# DSN is in /etc/openwatch/secrets.env (OPENWATCH_DATABASE_DSN)
+psql "$OPENWATCH_DATABASE_DSN" < /var/lib/openwatch/backups/openwatch-pre-upgrade-<...>.sql
+```
+
+## Backups: location, retention, opt-out
+
+- Dumps live in `/var/lib/openwatch/backups/`.
+- A systemd timer (`openwatch-backup-cleanup.timer`, runs daily) prunes dumps
+ older than `BACKUP_RETENTION_DAYS` (default 30) but **always keeps the most
+ recent one**, so you never lose your last restore point.
+- Tune behavior in `/etc/openwatch/upgrade.conf`:
+ - `AUTO_BACKUP=yes|no` — set `no` only if you run your own verified
+ pre-upgrade backups.
+ - `BACKUP_DIR`, `BACKUP_RETENTION_DAYS`.
+
+## Scope and limits
+
+- **App schema migrations: automatic and safe** (this document).
+- **Minor PostgreSQL / dependency updates**: handled by `dnf`/`apt` itself via
+ package dependencies — nothing extra to do.
+- **PostgreSQL MAJOR-version upgrade** (e.g. 15 → 16): **NOT** performed by the
+ OpenWatch scriptlet. That is a data-directory migration (`pg_upgrade` or
+ dump/restore) that needs both server versions and must be operator-supervised
+ — doing it silently from a package upgrade risks the whole database. Plan it
+ separately, with its own backup.
+- **Brief downtime** during the migrate step is expected (the appliance model).
+ Multi-instance / zero-downtime upgrades are out of scope.
diff --git a/internal/dbbackup/dbbackup.go b/internal/dbbackup/dbbackup.go
new file mode 100644
index 00000000..eb3544a4
--- /dev/null
+++ b/internal/dbbackup/dbbackup.go
@@ -0,0 +1,107 @@
+// Package dbbackup creates a plain-SQL pg_dump of the OpenWatch database,
+// used as the pre-upgrade restore point before migrations run.
+//
+// The cardinal rule: connection parameters (especially the password) go to
+// pg_dump via PG* environment variables, NEVER on the command line — so the
+// password never appears in the process argv (visible in `ps`). This mirrors
+// how the rest of the codebase keeps credentials out of argv.
+package dbbackup
+
+import (
+ "context"
+ "fmt"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "strconv"
+
+ "github.com/jackc/pgx/v5/pgxpool"
+)
+
+// dumpArgs are the pg_dump flags shared by Command and Run. --no-owner /
+// --no-privileges keep the dump restorable regardless of the target role
+// layout (the restore path is `psql < file`); -f writes to the file.
+func dumpArgs(outPath string) []string {
+ return []string{"--no-owner", "--no-privileges", "-f", outPath}
+}
+
+// dumpEnv translates dsn into the PG* environment pg_dump reads, so no
+// connection parameter (least of all the password) ends up in argv.
+func dumpEnv(dsn string) ([]string, error) {
+ cfg, err := pgxpool.ParseConfig(dsn)
+ if err != nil {
+ return nil, fmt.Errorf("dbbackup: parse dsn: %w", err)
+ }
+ cc := cfg.ConnConfig
+ env := append(os.Environ(),
+ "PGHOST="+cc.Host,
+ "PGPORT="+strconv.Itoa(int(cc.Port)),
+ "PGUSER="+cc.User,
+ "PGPASSWORD="+cc.Password,
+ "PGDATABASE="+cc.Database,
+ )
+ // pgxpool leaves TLSConfig nil for sslmode=disable; map that back so
+ // pg_dump doesn't attempt TLS against a non-TLS local Postgres. When TLS
+ // IS configured we leave PGSSLMODE unset (pg_dump defaults to prefer).
+ if cc.TLSConfig == nil {
+ env = append(env, "PGSSLMODE=disable")
+ }
+ return env, nil
+}
+
+// DumpFileName returns the backup filename for a dump taken at stamp (an
+// already-formatted timestamp, e.g. "20260616T010203Z") for version. Pure,
+// so the caller owns the clock and tests are deterministic.
+func DumpFileName(version, stamp string) string {
+ v := version
+ if v == "" {
+ v = "unknown"
+ }
+ safe := make([]rune, 0, len(v))
+ for _, r := range v {
+ switch {
+ case r >= 'a' && r <= 'z', r >= 'A' && r <= 'Z', r >= '0' && r <= '9', r == '.', r == '-', r == '_':
+ safe = append(safe, r)
+ default:
+ safe = append(safe, '_')
+ }
+ }
+ return fmt.Sprintf("openwatch-pre-upgrade-%s-%s.sql", string(safe), stamp)
+}
+
+// Command builds the pg_dump command writing a plain-SQL dump to outPath.
+// Connection parameters live in cmd.Env (PG*), NONE in cmd.Args — the tests
+// pin that the password never reaches argv.
+func Command(dsn, outPath string) (*exec.Cmd, error) {
+ env, err := dumpEnv(dsn)
+ if err != nil {
+ return nil, err
+ }
+ // #nosec G204 -- the command is the constant "pg_dump"; the args are
+ // package-controlled flags + an output path (never user input), and the
+ // connection parameters (incl. the password) travel via env, not argv.
+ cmd := exec.Command("pg_dump", dumpArgs(outPath)...)
+ cmd.Env = env
+ return cmd, nil
+}
+
+// Run writes a dump to dir (created if absent) and returns the file path.
+// The caller supplies version + stamp for the filename.
+func Run(ctx context.Context, dsn, dir, version, stamp string) (string, error) {
+ if err := os.MkdirAll(dir, 0o750); err != nil {
+ return "", fmt.Errorf("dbbackup: mkdir %s: %w", dir, err)
+ }
+ out := filepath.Join(dir, DumpFileName(version, stamp))
+ env, err := dumpEnv(dsn)
+ if err != nil {
+ return "", err
+ }
+ // #nosec G204 -- see Command: constant "pg_dump", package-controlled
+ // args, secrets via env not argv.
+ cmd := exec.CommandContext(ctx, "pg_dump", dumpArgs(out)...)
+ cmd.Env = env
+ if combined, err := cmd.CombinedOutput(); err != nil {
+ return "", fmt.Errorf("dbbackup: pg_dump failed: %w: %s", err, string(combined))
+ }
+ return out, nil
+}
diff --git a/internal/dbbackup/dbbackup_test.go b/internal/dbbackup/dbbackup_test.go
new file mode 100644
index 00000000..2f46a733
--- /dev/null
+++ b/internal/dbbackup/dbbackup_test.go
@@ -0,0 +1,71 @@
+// @spec release-upgrade
+//
+// AC traceability (this file):
+//
+// AC-01 TestCommand_PasswordNeverInArgv
+// AC-02 TestDumpFileName_DeterministicAndSanitized
+
+package dbbackup
+
+import (
+ "strings"
+ "testing"
+)
+
+// @ac AC-01
+// AC-01: the pg_dump command carries connection params (incl. the password)
+// in the environment only — NEVER in argv, where `ps` would expose it.
+func TestCommand_PasswordNeverInArgv(t *testing.T) {
+ t.Run("release-upgrade/AC-01", func(t *testing.T) {
+ const secret = "s3cr3t-p@ss"
+ dsn := "postgres://owuser:" + secret + "@db.example:6543/owdb?sslmode=disable"
+ cmd, err := Command(dsn, "/var/lib/openwatch/backups/x.sql")
+ if err != nil {
+ t.Fatalf("Command: %v", err)
+ }
+ // Password must not appear anywhere in argv.
+ argv := strings.Join(cmd.Args, " ")
+ if strings.Contains(argv, secret) {
+ t.Errorf("password leaked into argv: %q", argv)
+ }
+ // Connection params must be in the env instead.
+ joinEnv := strings.Join(cmd.Env, "\n")
+ for _, want := range []string{
+ "PGPASSWORD=" + secret,
+ "PGHOST=db.example",
+ "PGPORT=6543",
+ "PGUSER=owuser",
+ "PGDATABASE=owdb",
+ "PGSSLMODE=disable", // sslmode=disable -> nil TLS -> mapped back
+ } {
+ if !strings.Contains(joinEnv, want) {
+ t.Errorf("env missing %q", want)
+ }
+ }
+ // pg_dump must write to the file, not stdout.
+ if !strings.Contains(argv, "-f /var/lib/openwatch/backups/x.sql") {
+ t.Errorf("pg_dump not directed at the output file: %q", argv)
+ }
+ })
+}
+
+// @ac AC-02
+// AC-02: backup filenames are deterministic given (version, stamp) and
+// sanitize unsafe characters in the version token.
+func TestDumpFileName_DeterministicAndSanitized(t *testing.T) {
+ t.Run("release-upgrade/AC-02", func(t *testing.T) {
+ got := DumpFileName("0.2.0-rc.8", "20260616T010203Z")
+ want := "openwatch-pre-upgrade-0.2.0-rc.8-20260616T010203Z.sql"
+ if got != want {
+ t.Errorf("DumpFileName = %q, want %q", got, want)
+ }
+ // Unsafe characters (slashes, spaces) are replaced.
+ if name := DumpFileName("v1/2 3", "S"); strings.ContainsAny(name, "/ ") {
+ t.Errorf("unsafe chars survived: %q", name)
+ }
+ // Empty version falls back to a non-empty token.
+ if name := DumpFileName("", "S"); !strings.Contains(name, "unknown") {
+ t.Errorf("empty version not defaulted: %q", name)
+ }
+ })
+}
diff --git a/packaging/common/cleanup-backups.sh b/packaging/common/cleanup-backups.sh
new file mode 100755
index 00000000..1e99c8bd
--- /dev/null
+++ b/packaging/common/cleanup-backups.sh
@@ -0,0 +1,32 @@
+#!/usr/bin/env bash
+# Prune old OpenWatch pre-upgrade database dumps. Run by the
+# openwatch-backup-cleanup.timer. Keeps the most recent dump ALWAYS (so the
+# last restore point is never lost) and deletes the rest older than
+# BACKUP_RETENTION_DAYS.
+set -uo pipefail
+
+CONF=/etc/openwatch/upgrade.conf
+BACKUP_DIR=/var/lib/openwatch/backups
+BACKUP_RETENTION_DAYS=30
+# shellcheck source=/dev/null
+[ -f "$CONF" ] && . "$CONF"
+
+[ -d "$BACKUP_DIR" ] || exit 0
+
+# Newest first, so index 0 is the one we always keep. Dump filenames are
+# package-controlled (openwatch-pre-upgrade--.sql, no spaces or
+# newlines), so the ls-based sort is safe here.
+# shellcheck disable=SC2012
+mapfile -t dumps < <(ls -1t "$BACKUP_DIR"/openwatch-pre-upgrade-*.sql 2>/dev/null || true)
+[ "${#dumps[@]}" -gt 1 ] || exit 0
+
+idx=0
+for f in "${dumps[@]}"; do
+ if [ "$idx" -eq 0 ]; then
+ idx=1
+ continue # always keep the most recent dump
+ fi
+ if [ -n "$(find "$f" -maxdepth 0 -mtime "+${BACKUP_RETENTION_DAYS}" 2>/dev/null)" ]; then
+ rm -f "$f" && echo "openwatch backup-cleanup: pruned $f"
+ fi
+done
diff --git a/packaging/common/openwatch-backup-cleanup.service b/packaging/common/openwatch-backup-cleanup.service
new file mode 100644
index 00000000..4bd61971
--- /dev/null
+++ b/packaging/common/openwatch-backup-cleanup.service
@@ -0,0 +1,15 @@
+[Unit]
+Description=Prune old OpenWatch pre-upgrade database backups
+Documentation=file:///usr/share/doc/openwatch/UPGRADING.md
+
+[Service]
+Type=oneshot
+ExecStart=/usr/lib/openwatch/cleanup-backups.sh
+# Backups are root-owned (written by the upgrade scriptlet); prune as root.
+User=root
+# Light hardening — the job only needs to read config and delete dumps.
+ProtectSystem=strict
+ReadWritePaths=/var/lib/openwatch/backups
+ProtectHome=true
+PrivateTmp=true
+NoNewPrivileges=true
diff --git a/packaging/common/openwatch-backup-cleanup.timer b/packaging/common/openwatch-backup-cleanup.timer
new file mode 100644
index 00000000..56caf4dd
--- /dev/null
+++ b/packaging/common/openwatch-backup-cleanup.timer
@@ -0,0 +1,11 @@
+[Unit]
+Description=Daily prune of OpenWatch pre-upgrade database backups
+
+[Timer]
+OnCalendar=daily
+# Run on next boot if the machine was off at the scheduled time.
+Persistent=true
+RandomizedDelaySec=1h
+
+[Install]
+WantedBy=timers.target
diff --git a/packaging/common/openwatch-upgrade.sh b/packaging/common/openwatch-upgrade.sh
new file mode 100755
index 00000000..233eaea9
--- /dev/null
+++ b/packaging/common/openwatch-upgrade.sh
@@ -0,0 +1,79 @@
+#!/usr/bin/env bash
+# Apply pending OpenWatch DB migrations during a package UPGRADE, with an
+# auto-backup restore point and a fail-safe service state. Invoked by the
+# RPM %post / DEB postinst ONLY on upgrade (never on a fresh install — a
+# fresh install has no database yet).
+#
+# Sequence (single-instance appliance model):
+# 1. confirm the DB is reachable + already initialized (else skip, warn)
+# 2. stop the service -> never run the old binary against a new schema
+# 3. `openwatch migrate --backup-dir` -> pg_dump restore point, then apply
+# (each migration is transactional; a failure rolls back atomically)
+# 4. success -> start the service
+# failure -> leave it STOPPED, print the restore path, exit non-zero so
+# the package manager surfaces that the upgrade needs attention
+#
+# NOT `set -e`: we handle each failure explicitly to control the fail mode.
+set -uo pipefail
+
+# Config/secrets paths default to the production locations; the env
+# overrides exist ONLY so the e2e test can point them at a scratch fixture.
+CONF="${OPENWATCH_UPGRADE_CONF:-/etc/openwatch/upgrade.conf}"
+AUTO_BACKUP=yes
+BACKUP_DIR=/var/lib/openwatch/backups
+# shellcheck source=/dev/null
+[ -f "$CONF" ] && . "$CONF"
+
+# The migrate command reads the same config the service does; load the DB
+# secret the systemd unit normally injects so a root scriptlet can connect.
+SECRETS="${OPENWATCH_SECRETS_ENV:-/etc/openwatch/secrets.env}"
+if [ -f "$SECRETS" ]; then
+ set -a
+ # shellcheck source=/dev/null
+ . "$SECRETS"
+ set +a
+fi
+
+log() { echo "openwatch upgrade: $*"; }
+err() { echo "openwatch upgrade: $*" >&2; }
+
+if ! command -v openwatch >/dev/null 2>&1; then
+ err "openwatch binary not found; skipping auto-migrate."
+ exit 0
+fi
+
+# DB reachable + initialized? `migrate --status` exits 0 when it can connect.
+# If not (DB down / not yet provisioned), do NOT fail the package — warn and
+# let the operator migrate manually once the DB is up.
+if ! openwatch migrate --status >/dev/null 2>&1; then
+ err "database not reachable — migrations were NOT applied."
+ err "once the database is up, run: openwatch migrate && systemctl restart openwatch"
+ exit 0
+fi
+
+log "stopping service for migration"
+systemctl stop openwatch.service >/dev/null 2>&1 || :
+
+args=()
+if [ "${AUTO_BACKUP:-yes}" = "yes" ]; then
+ args+=(--backup-dir "$BACKUP_DIR")
+fi
+
+if openwatch migrate "${args[@]}"; then
+ log "migration succeeded; starting service"
+ systemctl start openwatch.service >/dev/null 2>&1 || :
+ exit 0
+fi
+
+# Fail-safe: migration failed (and rolled back — DDL is transactional). Leave
+# the service stopped rather than run the new binary against an un-migrated
+# schema, and tell the operator exactly how to recover.
+err "MIGRATION FAILED. The openwatch service has been left STOPPED to avoid"
+err "running against an un-migrated schema. Your data is intact (each"
+err "migration runs in a transaction and rolled back)."
+if [ "${AUTO_BACKUP:-yes}" = "yes" ]; then
+ err "A pre-migration backup is in ${BACKUP_DIR}/ . To restore if needed:"
+ err " psql \"\$OPENWATCH_DATABASE_DSN\" < ${BACKUP_DIR}/openwatch-pre-upgrade-*.sql"
+fi
+err "After resolving the cause: openwatch migrate && systemctl start openwatch"
+exit 1
diff --git a/packaging/common/upgrade.conf b/packaging/common/upgrade.conf
new file mode 100644
index 00000000..391e5d83
--- /dev/null
+++ b/packaging/common/upgrade.conf
@@ -0,0 +1,14 @@
+# OpenWatch upgrade behavior. Sourced (KEY=value, no spaces) by the package
+# upgrade scriptlet and the backup-cleanup timer.
+
+# Auto-backup the database (pg_dump) before applying migrations on upgrade.
+# This is the restore point if a migration fails. Set to "no" ONLY if you run
+# your own verified pre-upgrade backups.
+AUTO_BACKUP=yes
+
+# Where pre-upgrade dumps are written.
+BACKUP_DIR=/var/lib/openwatch/backups
+
+# How many days of dumps to keep. The most recent dump is ALWAYS kept,
+# regardless of age, so you never lose your last restore point.
+BACKUP_RETENTION_DAYS=30
diff --git a/packaging/deb/build-deb.sh b/packaging/deb/build-deb.sh
index a747ad88..c2ee14e9 100755
--- a/packaging/deb/build-deb.sh
+++ b/packaging/deb/build-deb.sh
@@ -57,13 +57,21 @@ mkdir -p "$STAGE/etc/openwatch/keys"
chmod 0750 "$STAGE/etc/openwatch/keys"
mkdir -p "$STAGE/etc/systemd/system"
mkdir -p "$STAGE/var/lib/openwatch"
+# Pre-upgrade DB dumps land here (written by the upgrade scriptlet).
+mkdir -p "$STAGE/var/lib/openwatch/backups"
+chmod 0750 "$STAGE/var/lib/openwatch/backups"
mkdir -p "$STAGE/var/log/openwatch"
# Binary + config + unit.
install -m 0755 "$DIST_DIR/openwatch" "$STAGE/usr/bin/openwatch"
install -m 0640 "$APP_DIR/packaging/common/openwatch.toml" "$STAGE/etc/openwatch/openwatch.toml"
+install -m 0640 "$APP_DIR/packaging/common/upgrade.conf" "$STAGE/etc/openwatch/upgrade.conf"
install -m 0644 "$APP_DIR/packaging/common/openwatch.service" "$STAGE/etc/systemd/system/openwatch.service"
+install -m 0644 "$APP_DIR/packaging/common/openwatch-backup-cleanup.service" "$STAGE/etc/systemd/system/openwatch-backup-cleanup.service"
+install -m 0644 "$APP_DIR/packaging/common/openwatch-backup-cleanup.timer" "$STAGE/etc/systemd/system/openwatch-backup-cleanup.timer"
install -m 0755 "$APP_DIR/packaging/common/provision-identity-keys.sh" "$STAGE/usr/lib/openwatch/provision-identity-keys.sh"
+install -m 0755 "$APP_DIR/packaging/common/openwatch-upgrade.sh" "$STAGE/usr/lib/openwatch/openwatch-upgrade.sh"
+install -m 0755 "$APP_DIR/packaging/common/cleanup-backups.sh" "$STAGE/usr/lib/openwatch/cleanup-backups.sh"
# Demo TLS cert (chmod inside the script).
bash "$APP_DIR/packaging/common/gen-demo-cert.sh" "$STAGE/etc/openwatch/tls" >/dev/null
diff --git a/packaging/deb/conffiles b/packaging/deb/conffiles
index a659cd5d..99b5dea4 100644
--- a/packaging/deb/conffiles
+++ b/packaging/deb/conffiles
@@ -1 +1,2 @@
/etc/openwatch/openwatch.toml
+/etc/openwatch/upgrade.conf
diff --git a/packaging/deb/postinst b/packaging/deb/postinst
index 320fcaa6..033f7066 100755
--- a/packaging/deb/postinst
+++ b/packaging/deb/postinst
@@ -8,11 +8,21 @@ set -e
if [ "$1" = "configure" ]; then
if command -v systemctl >/dev/null 2>&1; then
systemctl daemon-reload || :
+ # Enable the daily pre-upgrade-backup cleanup timer (install + upgrade).
+ systemctl enable --now openwatch-backup-cleanup.timer >/dev/null 2>&1 || :
fi
# Generate-if-absent, so reconfigure/upgrade never clobbers live keys.
# preinst already created the openwatch user/group. Not guarded: a real
# failure should abort configure rather than leave an unbootable service.
/usr/lib/openwatch/provision-identity-keys.sh
+
+ # UPGRADE: dpkg passes the previously-configured version as $2 (empty on a
+ # fresh install). Apply pending DB migrations with an auto-backup restore
+ # point + fail-safe service state. `set -e` propagates a migration failure
+ # (the helper exits non-zero) so dpkg leaves the package needing attention.
+ if [ -n "${2:-}" ]; then
+ /usr/lib/openwatch/openwatch-upgrade.sh
+ fi
fi
exit 0
diff --git a/packaging/deb/prerm b/packaging/deb/prerm
index 9bffc40b..6462a3eb 100755
--- a/packaging/deb/prerm
+++ b/packaging/deb/prerm
@@ -7,6 +7,7 @@ if [ "$1" = "remove" ] || [ "$1" = "purge" ]; then
if command -v systemctl >/dev/null 2>&1; then
systemctl stop openwatch.service >/dev/null 2>&1 || :
systemctl disable openwatch.service >/dev/null 2>&1 || :
+ systemctl disable --now openwatch-backup-cleanup.timer >/dev/null 2>&1 || :
fi
fi
diff --git a/packaging/rpm/build-rpm.sh b/packaging/rpm/build-rpm.sh
index 947b9b31..5f34025c 100755
--- a/packaging/rpm/build-rpm.sh
+++ b/packaging/rpm/build-rpm.sh
@@ -62,6 +62,13 @@ cp "$DIST_DIR/openwatch" "$SRC_DIR/openwatch"
cp "$APP_DIR/packaging/common/openwatch.toml" "$SRC_DIR/openwatch.toml"
cp "$APP_DIR/packaging/common/openwatch.service" "$SRC_DIR/openwatch.service"
cp "$APP_DIR/packaging/common/provision-identity-keys.sh" "$SRC_DIR/provision-identity-keys.sh"
+# Upgrade automation: migrate-on-upgrade helper, backup cleanup, systemd
+# timer + units, and the operator-tunable upgrade.conf.
+cp "$APP_DIR/packaging/common/openwatch-upgrade.sh" "$SRC_DIR/openwatch-upgrade.sh"
+cp "$APP_DIR/packaging/common/cleanup-backups.sh" "$SRC_DIR/cleanup-backups.sh"
+cp "$APP_DIR/packaging/common/upgrade.conf" "$SRC_DIR/upgrade.conf"
+cp "$APP_DIR/packaging/common/openwatch-backup-cleanup.service" "$SRC_DIR/openwatch-backup-cleanup.service"
+cp "$APP_DIR/packaging/common/openwatch-backup-cleanup.timer" "$SRC_DIR/openwatch-backup-cleanup.timer"
# Demo TLS cert.
bash "$APP_DIR/packaging/common/gen-demo-cert.sh" "$SRC_DIR" >/dev/null
diff --git a/packaging/rpm/openwatch.spec b/packaging/rpm/openwatch.spec
index e5ed75ae..71f7c342 100644
--- a/packaging/rpm/openwatch.spec
+++ b/packaging/rpm/openwatch.spec
@@ -62,16 +62,23 @@ install -d -m 0750 %{buildroot}/etc/openwatch/tls
# (they must be unique per install); %post generates them into here.
install -d -m 0750 %{buildroot}/etc/openwatch/keys
install -m 0640 openwatch.toml %{buildroot}/etc/openwatch/openwatch.toml
+install -m 0640 upgrade.conf %{buildroot}/etc/openwatch/upgrade.conf
install -m 0644 cert.pem %{buildroot}/etc/openwatch/tls/cert.pem
install -m 0600 key.pem %{buildroot}/etc/openwatch/tls/key.pem
install -d -m 0755 %{buildroot}/usr/lib/openwatch
install -m 0755 provision-identity-keys.sh %{buildroot}/usr/lib/openwatch/provision-identity-keys.sh
+install -m 0755 openwatch-upgrade.sh %{buildroot}/usr/lib/openwatch/openwatch-upgrade.sh
+install -m 0755 cleanup-backups.sh %{buildroot}/usr/lib/openwatch/cleanup-backups.sh
install -d -m 0755 %{buildroot}/etc/systemd/system
install -m 0644 openwatch.service %{buildroot}/etc/systemd/system/openwatch.service
+install -m 0644 openwatch-backup-cleanup.service %{buildroot}/etc/systemd/system/openwatch-backup-cleanup.service
+install -m 0644 openwatch-backup-cleanup.timer %{buildroot}/etc/systemd/system/openwatch-backup-cleanup.timer
install -d -m 0750 %{buildroot}/var/lib/openwatch
+# Pre-upgrade DB dumps land here (written by the upgrade scriptlet).
+install -d -m 0750 %{buildroot}/var/lib/openwatch/backups
install -d -m 0750 %{buildroot}/var/log/openwatch
%pre
@@ -92,11 +99,24 @@ systemctl daemon-reload || :
# as a scriptlet warning, not silently leave an unbootable service.
/usr/lib/openwatch/provision-identity-keys.sh
+# Enable the daily pre-upgrade-backup cleanup timer (install + upgrade).
+systemctl enable --now openwatch-backup-cleanup.timer >/dev/null 2>&1 || :
+
+# On UPGRADE ($1 -ge 2), apply pending DB migrations with an auto-backup
+# restore point and a fail-safe service state (stop -> backup+migrate ->
+# start, or leave stopped on failure). Skipped on a FRESH install ($1 == 1):
+# there is no database yet — the operator runs the documented first-run
+# `openwatch migrate` after provisioning Postgres.
+if [ "$1" -ge 2 ]; then
+ /usr/lib/openwatch/openwatch-upgrade.sh
+fi
+
%preun
# AC-09: stop and disable cleanly so removal doesn't leave a running orphan.
if [ $1 -eq 0 ]; then
systemctl stop openwatch.service >/dev/null 2>&1 || :
systemctl disable openwatch.service >/dev/null 2>&1 || :
+ systemctl disable --now openwatch-backup-cleanup.timer >/dev/null 2>&1 || :
fi
%postun
@@ -108,14 +128,20 @@ systemctl daemon-reload || :
%dir %attr(0750, root, openwatch) /etc/openwatch
%dir %attr(0750, root, openwatch) /etc/openwatch/tls
%config(noreplace) %attr(0640, root, openwatch) /etc/openwatch/openwatch.toml
+%config(noreplace) %attr(0640, root, openwatch) /etc/openwatch/upgrade.conf
%attr(0644, root, openwatch) /etc/openwatch/tls/cert.pem
%attr(0600, openwatch, openwatch) /etc/openwatch/tls/key.pem
%attr(0644, root, root) /etc/systemd/system/openwatch.service
+%attr(0644, root, root) /etc/systemd/system/openwatch-backup-cleanup.service
+%attr(0644, root, root) /etc/systemd/system/openwatch-backup-cleanup.timer
# Identity-key directory ships empty (0750); %post generates the per-install
# keys into it. The key files are intentionally NOT packaged.
%dir %attr(0750, root, openwatch) /etc/openwatch/keys
%attr(0755, root, root) /usr/lib/openwatch/provision-identity-keys.sh
+%attr(0755, root, root) /usr/lib/openwatch/openwatch-upgrade.sh
+%attr(0755, root, root) /usr/lib/openwatch/cleanup-backups.sh
%dir %attr(0750, openwatch, openwatch) /var/lib/openwatch
+%dir %attr(0750, root, openwatch) /var/lib/openwatch/backups
%dir %attr(0750, openwatch, openwatch) /var/log/openwatch
%changelog
diff --git a/packaging/tests/run-upgrade-container-test.sh b/packaging/tests/run-upgrade-container-test.sh
new file mode 100755
index 00000000..e4f1fade
--- /dev/null
+++ b/packaging/tests/run-upgrade-container-test.sh
@@ -0,0 +1,35 @@
+#!/usr/bin/env bash
+# Host-side driver for the full package-upgrade test. Builds an OLD
+# (release 1) and NEW (release 2) openwatch RPM from the current tree, then
+# runs packaging/tests/upgrade-container-test.sh inside a rockylinux:9
+# container to prove that `rpm -U` (the real package upgrade) migrates the
+# DB to head, takes a pre-upgrade backup, and stop/starts the service.
+#
+# Requires docker plus the host build toolchain (go, make, rpmbuild).
+#
+# Network: uses --network host so dnf inside the container can reach the
+# Rocky mirrors; the container's throwaway Postgres binds the unused port
+# 55432 to avoid colliding with any Postgres already on the host.
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+APP_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
+cd "$APP_DIR"
+
+command -v docker >/dev/null || { echo "docker is required" >&2; exit 1; }
+
+RPMS="$(mktemp -d)"
+trap 'rm -rf "$RPMS"' EXIT
+mkdir -p "$RPMS/old" "$RPMS/new"
+
+echo ">> building OLD (release 1) and NEW (release 2) RPMs"
+make rpm >/dev/null
+cp dist/openwatch-*-1.*.rpm "$RPMS/old/"
+RPM_RELEASE=2 bash packaging/rpm/build-rpm.sh >/dev/null
+cp dist/openwatch-*-2.*.rpm "$RPMS/new/"
+
+echo ">> running the upgrade in a rockylinux:9 container"
+exec docker run --rm --network host \
+ -v "$RPMS:/rpms:ro" \
+ -v "$APP_DIR/packaging/tests/upgrade-container-test.sh:/test.sh:ro" \
+ rockylinux:9 bash /test.sh
diff --git a/packaging/tests/upgrade-container-test.sh b/packaging/tests/upgrade-container-test.sh
new file mode 100755
index 00000000..8715290c
--- /dev/null
+++ b/packaging/tests/upgrade-container-test.sh
@@ -0,0 +1,72 @@
+#!/usr/bin/env bash
+# Full package-manager upgrade test. Runs INSIDE a rockylinux:9 container
+# with the OLD and NEW openwatch RPMs mounted at /rpms/old and /rpms/new.
+#
+# It proves the real upgrade path: install the OLD package, stand up a
+# database, roll it back one migration to simulate a prior version, then
+# `rpm -U` the NEW package and confirm the package's %post scriptlet (with
+# $1=2) migrated the DB to head and took a pre-upgrade backup — with a
+# systemctl shim standing in for systemd (no init in the container).
+#
+# Invoke from the host via packaging/tests/run-upgrade-container-test.sh.
+set -euo pipefail
+
+echo "### prerequisites"
+dnf install -y -q postgresql-server postgresql openssl findutils >/dev/null
+
+echo "### start postgres (no systemd)"
+PGDATA=/var/lib/pgsql/data
+mkdir -p /run/postgresql && chown postgres:postgres /run/postgresql
+su postgres -c "initdb -D $PGDATA" >/dev/null
+su postgres -c "pg_ctl -D $PGDATA -o '-c listen_addresses=127.0.0.1 -p 55432' -l /tmp/pg.log -w start" \
+ || { echo '--- pg log ---'; cat /tmp/pg.log; exit 1; }
+su postgres -c "psql -p 55432 -q -c \"CREATE USER ow PASSWORD 'ow' SUPERUSER;\""
+su postgres -c "createdb -p 55432 -O ow owdb"
+DSN="postgres://ow:ow@127.0.0.1:55432/owdb?sslmode=disable"
+
+echo "### systemctl shim (records the helper's stop/start)"
+# Overwrite the real systemctl: there is no systemd init in the container, and
+# RPM scriptlets run with a restricted PATH (/sbin:/bin:/usr/sbin:/usr/bin)
+# that excludes /usr/local/bin, so the shim must live in /usr/bin to be seen.
+printf '#!/bin/sh\necho "$@" >> /tmp/systemctl.log\nexit 0\n' > /usr/bin/systemctl
+chmod +x /usr/bin/systemctl
+: > /tmp/systemctl.log
+
+echo "### install OLD package (release 1)"
+rpm -i --nodeps /rpms/old/openwatch-*.rpm
+echo "OPENWATCH_DATABASE_DSN=$DSN" > /etc/openwatch/secrets.env
+
+echo "### bring DB to head, then roll back migration 0035 to simulate the prior version"
+set -a; . /etc/openwatch/secrets.env; set +a
+openwatch migrate >/dev/null
+PGPASSWORD=ow psql -h 127.0.0.1 -p 55432 -U ow -d owdb -q \
+ -c "DROP TABLE IF EXISTS host_connection_profile CASCADE; DELETE FROM goose_db_version WHERE version_id=35;"
+before=$(PGPASSWORD=ow psql -h 127.0.0.1 -p 55432 -U ow -d owdb -tAc 'SELECT max(version_id) FROM goose_db_version')
+hcp_before=$(PGPASSWORD=ow psql -h 127.0.0.1 -p 55432 -U ow -d owdb -tAc "SELECT to_regclass('host_connection_profile')")
+echo "BEFORE upgrade: version=$before host_connection_profile=${hcp_before:-}"
+: > /tmp/systemctl.log
+
+echo "### UPGRADE: rpm -U the NEW package (release 2) — triggers %post with \$1=2"
+rpm -U --nodeps /rpms/new/openwatch-*.rpm
+
+echo "### results"
+after=$(PGPASSWORD=ow psql -h 127.0.0.1 -p 55432 -U ow -d owdb -tAc 'SELECT max(version_id) FROM goose_db_version')
+hcp_after=$(PGPASSWORD=ow psql -h 127.0.0.1 -p 55432 -U ow -d owdb -tAc "SELECT to_regclass('host_connection_profile')")
+echo "AFTER upgrade: version=$after host_connection_profile=${hcp_after:-}"
+echo "systemctl during upgrade: $(tr '\n' ',' < /tmp/systemctl.log)"
+echo "backups: $(ls /var/lib/openwatch/backups/ 2>/dev/null || echo none)"
+
+echo "### assertions"
+fail=0
+[ "$before" = "34" ] || { echo "FAIL: pre-upgrade version != 34"; fail=1; }
+[ "$after" = "35" ] || { echo "FAIL: post-upgrade version != 35 (migration did not apply)"; fail=1; }
+[ "$hcp_after" = "host_connection_profile" ] || { echo "FAIL: 0035 table not created by the upgrade"; fail=1; }
+ls /var/lib/openwatch/backups/openwatch-pre-upgrade-*.sql >/dev/null 2>&1 || { echo "FAIL: no pre-upgrade backup"; fail=1; }
+grep -q "stop openwatch.service" /tmp/systemctl.log || { echo "FAIL: service not stopped"; fail=1; }
+grep -q "start openwatch.service" /tmp/systemctl.log || { echo "FAIL: service not restarted"; fail=1; }
+if [ "$fail" -eq 0 ]; then
+ echo "RESULT: PASS - the package upgrade migrated 34 -> 35 with a backup and a stop/start"
+else
+ echo "RESULT: FAIL"
+ exit 1
+fi
diff --git a/packaging/tests/upgrade_test.go b/packaging/tests/upgrade_test.go
new file mode 100644
index 00000000..9bbc054f
--- /dev/null
+++ b/packaging/tests/upgrade_test.go
@@ -0,0 +1,140 @@
+// @spec release-upgrade
+//
+// AC traceability (this file):
+//
+// AC-03 TestUpgrade_MigrateFlagsAndFailClosed
+// AC-04 TestUpgrade_ScriptletGuardedOnUpgradeOnly
+// AC-05 TestUpgrade_HelperSequenceAndFailSafe
+// AC-06 TestUpgrade_CleanupTimerShippedAndKeepsNewest
+// AC-07 TestUpgrade_PayloadShipsUpgradeFiles
+
+package packaging_test
+
+import (
+ "strings"
+ "testing"
+)
+
+// @ac AC-03
+// AC-03: `openwatch migrate` gains --status and --backup-dir, the backup
+// runs BEFORE Apply, and a backup failure returns without migrating.
+func TestUpgrade_MigrateFlagsAndFailClosed(t *testing.T) {
+ t.Run("release-upgrade/AC-03", func(t *testing.T) {
+ src := readAppFile(t, "cmd/openwatch/main.go")
+ for _, needle := range []string{`"backup-dir"`, `"status"`, "refusing to migrate"} {
+ if !strings.Contains(src, needle) {
+ t.Errorf("cmdMigrate missing %q", needle)
+ }
+ }
+ // The pre-migration backup must precede migrations.Apply.
+ backupAt := strings.Index(src, "dbbackup.Run(")
+ applyAt := strings.Index(src, "migrations.Apply(ctx, pool)")
+ if backupAt < 0 || applyAt < 0 {
+ t.Fatalf("could not locate backup (%d) / apply (%d) in cmdMigrate", backupAt, applyAt)
+ }
+ if backupAt > applyAt {
+ t.Error("backup must run BEFORE migrations.Apply (fail-closed restore point)")
+ }
+ })
+}
+
+// @ac AC-04
+// AC-04: the upgrade helper runs on UPGRADE only — RPM gates on $1>=2, DEB
+// on a non-empty old-version $2 — never on a fresh install.
+func TestUpgrade_ScriptletGuardedOnUpgradeOnly(t *testing.T) {
+ t.Run("release-upgrade/AC-04", func(t *testing.T) {
+ spec := readAppFile(t, "packaging/rpm/openwatch.spec")
+ if !strings.Contains(spec, `[ "$1" -ge 2 ]`) || !strings.Contains(spec, "openwatch-upgrade.sh") {
+ t.Error(`RPM post-install must guard openwatch-upgrade.sh on [ "$1" -ge 2 ] (upgrade only)`)
+ }
+ post := readAppFile(t, "packaging/deb/postinst")
+ if !strings.Contains(post, `[ -n "${2:-}" ]`) || !strings.Contains(post, "openwatch-upgrade.sh") {
+ t.Error("DEB postinst must guard openwatch-upgrade.sh on a non-empty old-version $2 (upgrade only)")
+ }
+ })
+}
+
+// @ac AC-05
+// AC-05: the helper stops the service, migrates with a backup, starts on
+// success, and on failure leaves the service stopped + exits non-zero.
+func TestUpgrade_HelperSequenceAndFailSafe(t *testing.T) {
+ t.Run("release-upgrade/AC-05", func(t *testing.T) {
+ h := readAppFile(t, "packaging/common/openwatch-upgrade.sh")
+ for _, needle := range []string{
+ "systemctl stop openwatch.service",
+ "openwatch migrate",
+ "--backup-dir",
+ "systemctl start openwatch.service",
+ "STOPPED", // the fail-safe message
+ "exit 1", // non-zero on migration failure
+ } {
+ if !strings.Contains(h, needle) {
+ t.Errorf("openwatch-upgrade.sh missing %q", needle)
+ }
+ }
+ // Start must be gated on migrate success, not unconditional: the
+ // success path returns before the failure block.
+ if strings.Index(h, "systemctl start openwatch.service") > strings.Index(h, "MIGRATION FAILED") {
+ t.Error("service start must be on the success path, before the failure block")
+ }
+ })
+}
+
+// @ac AC-06
+// AC-06: the cleanup timer + service ship and are enabled; the prune keeps
+// the most recent dump.
+func TestUpgrade_CleanupTimerShippedAndKeepsNewest(t *testing.T) {
+ t.Run("release-upgrade/AC-06", func(t *testing.T) {
+ // Enabled in both scriptlets.
+ if !strings.Contains(readAppFile(t, "packaging/rpm/openwatch.spec"), "enable --now openwatch-backup-cleanup.timer") {
+ t.Error("RPM post-install must enable openwatch-backup-cleanup.timer")
+ }
+ if !strings.Contains(readAppFile(t, "packaging/deb/postinst"), "enable --now openwatch-backup-cleanup.timer") {
+ t.Error("DEB postinst must enable openwatch-backup-cleanup.timer")
+ }
+ // The prune always keeps the newest dump (index 0 -> continue).
+ clean := readAppFile(t, "packaging/common/cleanup-backups.sh")
+ if !strings.Contains(clean, "always keep") && !strings.Contains(clean, "keep the most recent") {
+ t.Error("cleanup-backups.sh must keep the most recent dump")
+ }
+ if !strings.Contains(clean, "BACKUP_RETENTION_DAYS") {
+ t.Error("cleanup-backups.sh must prune by BACKUP_RETENTION_DAYS")
+ }
+ })
+}
+
+// @ac AC-07
+// AC-07: the package payloads carry the upgrade helper, cleanup script, the
+// two systemd units, the empty backups dir, and upgrade.conf as a config.
+func TestUpgrade_PayloadShipsUpgradeFiles(t *testing.T) {
+ t.Run("release-upgrade/AC-07", func(t *testing.T) {
+ want := []string{
+ "/usr/lib/openwatch/openwatch-upgrade.sh",
+ "/usr/lib/openwatch/cleanup-backups.sh",
+ "/etc/systemd/system/openwatch-backup-cleanup.service",
+ "/etc/systemd/system/openwatch-backup-cleanup.timer",
+ "/etc/openwatch/upgrade.conf",
+ "/var/lib/openwatch/backups",
+ }
+
+ rpm := rpmPath(t)
+ rpmFiles := rpmQuery(t, rpm, "[%{FILENAMES}\n]")
+ for _, f := range want {
+ if !strings.Contains(rpmFiles, f) {
+ t.Errorf("RPM payload missing %s", f)
+ }
+ }
+ // upgrade.conf must be a config file (noreplace), like openwatch.toml.
+ if cfgs := rpmQuery(t, rpm, "[%{FILEFLAGS:fflags} %{FILENAMES}\n]"); !strings.Contains(cfgs, "/etc/openwatch/upgrade.conf") {
+ t.Errorf("upgrade.conf not in file list: %s", cfgs)
+ }
+
+ deb := debPath(t)
+ debFiles := debContents(t, deb)
+ for _, f := range want {
+ if !strings.Contains(debFiles, f) {
+ t.Errorf("DEB payload missing %s", f)
+ }
+ }
+ })
+}
diff --git a/specs/release/upgrade.spec.yaml b/specs/release/upgrade.spec.yaml
new file mode 100644
index 00000000..25821a29
--- /dev/null
+++ b/specs/release/upgrade.spec.yaml
@@ -0,0 +1,93 @@
+spec:
+ id: release-upgrade
+ title: One-command package upgrade with auto-migrate + backup
+ version: "1.0.0"
+ status: approved
+ tier: 2
+
+ context:
+ system: openwatch-go
+ feature: Automatic, safe DB migration on package upgrade
+ description: >
+ The operator upgrades OpenWatch with a single `dnf update openwatch*`
+ (or apt) command; the package post-install scriptlet applies any
+ pending database migrations automatically, taking a pg_dump restore
+ point first, and restarts the service — or, if a migration fails,
+ leaves the service stopped with a clear recovery path. The app schema
+ is in scope; the PostgreSQL ENGINE major-version upgrade is NOT (that
+ is an operator-supervised pg_upgrade, never triggered from a scriptlet).
+ related_specs:
+ - release-package-build
+ - system-config
+
+ objective:
+ summary: >
+ Make a production upgrade one command, safe by construction: never
+ migrate without a restore point, never run a binary against a
+ mismatched schema, and never lose data — for the single-instance
+ appliance model.
+ scope:
+ includes:
+ - openwatch migrate --status (report pending without applying)
+ - openwatch migrate --backup-dir (pg_dump restore point before applying; fail-closed)
+ - pg_dump password delivered via PG* env, never in argv
+ - RPM %post / DEB postinst run the upgrade helper ONLY on upgrade (not fresh install)
+ - Upgrade helper sequence stop -> backup+migrate -> start, fail-safe on error
+ - Backup-cleanup systemd timer + service, keep-most-recent prune
+ - Operator-tunable /etc/openwatch/upgrade.conf (AUTO_BACKUP, retention)
+ excludes:
+ - PostgreSQL engine major-version upgrade (operator-supervised pg_upgrade)
+ - Multi-instance / HA migration coordination (advisory lock + rolling restart)
+ - Zero-downtime upgrades (the appliance model accepts brief downtime during migrate)
+
+ constraints:
+ - id: C-01
+ description: The pre-migration pg_dump MUST receive connection parameters (especially the password) via PG* environment variables, never on the command line / argv.
+ type: security
+ enforcement: error
+ - id: C-02
+ description: When a backup is requested it MUST complete successfully before any migration is applied; a failed backup MUST abort without migrating (fail-closed). Backup is skipped only when the DB has no schema yet (fresh DB).
+ type: technical
+ enforcement: error
+ - id: C-03
+ description: The package scriptlet MUST run the auto-migrate helper ONLY on upgrade (RPM $1 >= 2; DEB configure with a non-empty old-version $2), never on a fresh install (no database exists yet).
+ type: technical
+ enforcement: error
+ - id: C-04
+ description: On migration failure the upgrade helper MUST leave the service STOPPED (never run the new binary against an un-migrated schema) and exit non-zero so the package manager surfaces the problem, printing the restore path.
+ type: technical
+ enforcement: error
+ - id: C-05
+ description: The backup-cleanup MUST always keep the most recent dump regardless of age, pruning only older dumps past the retention window.
+ type: technical
+ enforcement: error
+
+ acceptance_criteria:
+ - id: AC-01
+ description: dbbackup.Command places the password (and all connection params) in the command Env, never in Args — verified for a DSN carrying a password.
+ priority: critical
+ references_constraints: [C-01]
+ - id: AC-02
+ description: dbbackup.DumpFileName is deterministic given (version, stamp) and sanitizes unsafe characters; empty version defaults to a non-empty token.
+ priority: high
+ references_constraints: [C-01]
+ - id: AC-03
+ description: cmdMigrate supports --status (report current version + pending count, no apply) and --backup-dir (pg_dump before Apply, skipped when version==0, fail-closed on backup error) — the backup precedes migrations.Apply and a backup error returns before Apply.
+ priority: critical
+ references_constraints: [C-02]
+ - id: AC-04
+ description: The RPM %post guards the upgrade helper on `$1 -ge 2` and the DEB postinst guards it on a non-empty old-version `$2`, so it runs on upgrade only, never on fresh install.
+ priority: critical
+ references_constraints: [C-03]
+ - id: AC-05
+ description: packaging/common/openwatch-upgrade.sh stops the service, runs `openwatch migrate --backup-dir`, starts on success, and on failure leaves the service stopped and exits non-zero with the restore path (source-inspection).
+ priority: critical
+ references_constraints: [C-04]
+ - id: AC-06
+ description: Both packages ship the openwatch-backup-cleanup .service + .timer and enable the timer in %post/postinst; cleanup-backups.sh keeps the most recent dump and prunes older ones past BACKUP_RETENTION_DAYS.
+ priority: high
+ references_constraints: [C-05]
+ - id: AC-07
+ description: The package payloads contain /usr/lib/openwatch/openwatch-upgrade.sh, /usr/lib/openwatch/cleanup-backups.sh, the two systemd units, and the empty /var/lib/openwatch/backups directory; /etc/openwatch/upgrade.conf ships as a noreplace config file.
+ priority: high
+ references_constraints: [C-03]
diff --git a/specter.yaml b/specter.yaml
index 07cf6a99..f635b9e5 100644
--- a/specter.yaml
+++ b/specter.yaml
@@ -26,6 +26,7 @@ domains:
- system-job-queue
- system-policy
- release-package-build
+ - release-upgrade
- release-fips-build
- release-stage-0-signoff
- release-ci-gates