From 13c6e864809aed04352e534f5b38fa20469acada Mon Sep 17 00:00:00 2001 From: Remylus Losius Date: Mon, 15 Jun 2026 20:07:56 -0400 Subject: [PATCH 1/2] feat(ssh): per-host auth/sudo learning + scan sudo -S; default-on fallback OpenWatch now supports the full SSH matrix on the compliance scan and remembers, per host, which combination works so repeat connections avoid wasted or failed attempts. Functional: - The compliance scan can finally escalate via a sudo password. The kensa transport determines the host's sudo mode once per connection with a `true` sentinel, then runs every command in that mode: `sudo -n sh -c` (NOPASSWD) or `sudo -S -p '' sh -c` with the credential password on stdin (never argv). Previously it hardcoded `sudo -n`, so password-sudo hosts were inventoried but never scanned. - The sudo -S fallback now DEFAULTS ON (systemconfig.DefaultSecurity), retained as a kill-switch, so the full matrix works on a fresh install. Learning (system-connection-profile, migration 0035): - New host_connection_profile table + internal/connprofile store record the last-good SSH auth method and sudo mode per host. A hint, never a lock: paths still fall back and rewrite the record when the host changes, so a stale hint self-heals. - The shared dial layer gains DialOptions.PreferAuth (lead with the known-good method, avoiding a doomed publickey attempt that trips fail2ban / MaxAuthTries) and ObservedAuth (report which method authenticated). The scan reads + records the profile through it. Scope: this lands the substrate, the dial mechanism, and the scan (the path with the real functional gap and the highest command volume). Extending the same memo to the discovery / intelligence / liveness paths -- which already support the matrix but re-probe each cycle -- is a documented follow-up; their SSHTransport.Dial and the sshprivilege dialer need hostID threaded through first. Specs: new system-connection-profile (AC-01..06); kensa-executor C-15 / AC-19 and ssh-connectivity C-09 updated for the scan sudo -S + default-on. --- cmd/openwatch/main.go | 2 + cmd/openwatch/worker.go | 2 + internal/connprofile/connprofile.go | 129 +++++++++++++ internal/connprofile/connprofile_db_test.go | 140 ++++++++++++++ .../0035_host_connection_profile.sql | 41 ++++ internal/kensa/connprofile_wrap_test.go | 48 +++++ internal/kensa/scanfunc.go | 12 +- internal/kensa/transport.go | 178 +++++++++++++++--- internal/kensa/transport_test.go | 38 ++-- internal/ssh/authorder.go | 94 +++++++++ internal/ssh/authorder_test.go | 59 ++++++ internal/ssh/dial.go | 46 ++--- internal/systemconfig/security_test.go | 21 +++ internal/systemconfig/types.go | 16 +- specs/system/connection-profile.spec.yaml | 88 +++++++++ specs/system/kensa-executor.spec.yaml | 4 +- specs/system/ssh-connectivity.spec.yaml | 2 +- specter.yaml | 1 + 18 files changed, 854 insertions(+), 67 deletions(-) create mode 100644 internal/connprofile/connprofile.go create mode 100644 internal/connprofile/connprofile_db_test.go create mode 100644 internal/db/migrations/0035_host_connection_profile.sql create mode 100644 internal/kensa/connprofile_wrap_test.go create mode 100644 internal/ssh/authorder.go create mode 100644 internal/ssh/authorder_test.go create mode 100644 internal/systemconfig/security_test.go create mode 100644 specs/system/connection-profile.spec.yaml diff --git a/cmd/openwatch/main.go b/cmd/openwatch/main.go index e5650d95..77017c74 100644 --- a/cmd/openwatch/main.go +++ b/cmd/openwatch/main.go @@ -27,6 +27,7 @@ import ( "github.com/Hanalyx/openwatch/internal/alerts" "github.com/Hanalyx/openwatch/internal/audit" "github.com/Hanalyx/openwatch/internal/config" + "github.com/Hanalyx/openwatch/internal/connprofile" "github.com/Hanalyx/openwatch/internal/correlation" "github.com/Hanalyx/openwatch/internal/credential" "github.com/google/uuid" @@ -506,6 +507,7 @@ func cmdServe(cfg *config.Config, _ []string, stdout, stderr *os.File) int { vars, err := cfgStore.LoadScanVars(ctx) return vars, err }, + Profiles: connprofile.NewStore(pool), }); scanErr != nil { slog.WarnContext(bootCtx, "kensa scan wiring unavailable — on-demand scans will fail until the kensa-rules package is installed (or OPENWATCH_KENSA_RULES_DIR set)", slog.String("error", scanErr.Error())) diff --git a/cmd/openwatch/worker.go b/cmd/openwatch/worker.go index a538b19b..ad7ce13e 100644 --- a/cmd/openwatch/worker.go +++ b/cmd/openwatch/worker.go @@ -21,6 +21,7 @@ import ( "github.com/Hanalyx/openwatch/internal/audit" "github.com/Hanalyx/openwatch/internal/config" + "github.com/Hanalyx/openwatch/internal/connprofile" "github.com/Hanalyx/openwatch/internal/correlation" "github.com/Hanalyx/openwatch/internal/credential" "github.com/Hanalyx/openwatch/internal/db" @@ -183,6 +184,7 @@ func cmdWorker(cfg *config.Config, args []string, stdout, stderr *os.File) int { vars, err := varStore.LoadScanVars(ctx) return vars, err }, + Profiles: connprofile.NewStore(pool), }) if err != nil { slog.ErrorContext(bootCtx, "kensa scan wiring failed — is the kensa-rules package installed (or OPENWATCH_KENSA_RULES_DIR set)?", diff --git a/internal/connprofile/connprofile.go b/internal/connprofile/connprofile.go new file mode 100644 index 00000000..0c5cd323 --- /dev/null +++ b/internal/connprofile/connprofile.go @@ -0,0 +1,129 @@ +// Package connprofile is the per-host "last known good" SSH connection +// memory shared by every path that talks to a managed host (the liveness +// privilege probe, OS discovery, OS intelligence collection, and the +// compliance scan). +// +// The problem it solves: without memory, each connection re-discovers how +// to reach the host. It offers the public key to a host that only takes a +// password (a failed publickey attempt that counts against MaxAuthTries +// and can trip fail2ban), and it runs `sudo -n` on a host known to need a +// sudo password (a wasted round-trip before the `sudo -S` retry). This +// package records what actually worked so callers lead with the +// known-good choice. +// +// It is a HINT, never a lock. Callers still attempt the other methods if +// the recorded one fails and overwrite the record when the working choice +// changes, so a stale hint self-heals on the next connection. Treat a +// missing/unknown value as "no preference — try the normal order." +package connprofile + +import ( + "context" + "errors" + "fmt" + + "github.com/google/uuid" + "github.com/jackc/pgx/v5" + "github.com/jackc/pgx/v5/pgxpool" +) + +// SSHAuthMethod is the SSH auth method that last authenticated. +type SSHAuthMethod string + +const ( + AuthUnknown SSHAuthMethod = "" + AuthKey SSHAuthMethod = "key" + AuthPassword SSHAuthMethod = "password" +) + +// SudoMode is the privilege-escalation mode that last reached root. +type SudoMode string + +const ( + SudoUnknown SudoMode = "" + SudoRoot SudoMode = "root" // login user is already root; no sudo + SudoNopasswd SudoMode = "nopasswd" // `sudo -n` works (NOPASSWD sudoers) + SudoPassword SudoMode = "password" // `sudo -S` with the credential password works +) + +// Profile is a host's recorded connection preferences. Zero-valued fields +// (AuthUnknown / SudoUnknown) mean "not yet observed — no preference." +type Profile struct { + SSHAuthMethod SSHAuthMethod + SudoMode SudoMode +} + +// Store reads and writes host connection profiles. +type Store struct { + pool *pgxpool.Pool +} + +// NewStore returns a Store backed by the given pool. +func NewStore(pool *pgxpool.Pool) *Store { + return &Store{pool: pool} +} + +// Get returns the recorded profile for a host. A host with no row yet +// returns a zero Profile (both dimensions unknown) and a nil error — an +// absent hint is not an error, it just means "no preference." +func (s *Store) Get(ctx context.Context, hostID uuid.UUID) (Profile, error) { + var auth, sudo *string + err := s.pool.QueryRow(ctx, + `SELECT ssh_auth_method, sudo_mode FROM host_connection_profile WHERE host_id = $1`, + hostID, + ).Scan(&auth, &sudo) + if errors.Is(err, pgx.ErrNoRows) { + return Profile{}, nil + } + if err != nil { + return Profile{}, fmt.Errorf("connprofile: get %s: %w", hostID, err) + } + p := Profile{} + if auth != nil { + p.SSHAuthMethod = SSHAuthMethod(*auth) + } + if sudo != nil { + p.SudoMode = SudoMode(*sudo) + } + return p, nil +} + +// RecordSSHAuth persists the SSH auth method that just worked, leaving the +// sudo_mode column untouched. A no-op for AuthUnknown so callers can call +// it unconditionally. Recording failures are non-fatal to the caller's +// real work (a connection succeeded) — the caller logs and continues. +func (s *Store) RecordSSHAuth(ctx context.Context, hostID uuid.UUID, m SSHAuthMethod) error { + if m == AuthUnknown { + return nil + } + return s.upsert(ctx, hostID, ptr(string(m)), nil) +} + +// RecordSudoMode persists the privilege mode that just reached root, +// leaving the ssh_auth_method column untouched. A no-op for SudoUnknown. +func (s *Store) RecordSudoMode(ctx context.Context, hostID uuid.UUID, m SudoMode) error { + if m == SudoUnknown { + return nil + } + return s.upsert(ctx, hostID, nil, ptr(string(m))) +} + +// upsert writes the provided columns, preserving any column passed as nil +// via COALESCE so a single-dimension record never clobbers the other. +func (s *Store) upsert(ctx context.Context, hostID uuid.UUID, auth, sudo *string) error { + _, err := s.pool.Exec(ctx, ` + INSERT INTO host_connection_profile (host_id, ssh_auth_method, sudo_mode, updated_at) + VALUES ($1, $2, $3, now()) + ON CONFLICT (host_id) DO UPDATE SET + ssh_auth_method = COALESCE(EXCLUDED.ssh_auth_method, host_connection_profile.ssh_auth_method), + sudo_mode = COALESCE(EXCLUDED.sudo_mode, host_connection_profile.sudo_mode), + updated_at = now()`, + hostID, auth, sudo, + ) + if err != nil { + return fmt.Errorf("connprofile: upsert %s: %w", hostID, err) + } + return nil +} + +func ptr(s string) *string { return &s } diff --git a/internal/connprofile/connprofile_db_test.go b/internal/connprofile/connprofile_db_test.go new file mode 100644 index 00000000..e1fd2263 --- /dev/null +++ b/internal/connprofile/connprofile_db_test.go @@ -0,0 +1,140 @@ +// @spec system-connection-profile +// +// Connection-profile store integration tests. Skipped without +// OPENWATCH_TEST_DSN. + +package connprofile + +import ( + "context" + "os" + "testing" + "time" + + "github.com/Hanalyx/openwatch/internal/db" + "github.com/Hanalyx/openwatch/internal/db/migrations" + "github.com/google/uuid" + "github.com/jackc/pgx/v5/pgxpool" +) + +func testDSN(t *testing.T) string { + t.Helper() + dsn := os.Getenv("OPENWATCH_TEST_DSN") + if dsn == "" { + t.Skip("set OPENWATCH_TEST_DSN to run connprofile store integration tests") + } + return dsn +} + +func seedHost(t *testing.T, pool *pgxpool.Pool) uuid.UUID { + t.Helper() + ctx := context.Background() + userID, _ := uuid.NewV7() + if _, err := pool.Exec(ctx, + `INSERT INTO users (id, username, email, password_hash) VALUES ($1, $2, $3, $4)`, + userID, "cp-creator-"+userID.String(), "cp-"+userID.String()+"@example.com", "x", + ); err != nil { + t.Fatalf("seed user: %v", err) + } + hostID, _ := uuid.NewV7() + if _, err := pool.Exec(ctx, + `INSERT INTO hosts (id, hostname, ip_address, created_by) VALUES ($1, $2, $3::inet, $4)`, + hostID, "cp-host-"+hostID.String(), "192.0.2.10", userID, + ); err != nil { + t.Fatalf("seed host: %v", err) + } + return hostID +} + +func freshStore(t *testing.T) (*Store, *pgxpool.Pool) { + t.Helper() + dsn := testDSN(t) + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + t.Cleanup(cancel) + pool, err := db.NewPool(ctx, dsn, 5) + if err != nil { + t.Fatalf("NewPool: %v", err) + } + t.Cleanup(pool.Close) + if err := migrations.Apply(ctx, pool); err != nil { + t.Fatalf("migrations.Apply: %v", err) + } + return NewStore(pool), pool +} + +// @ac AC-01 +// AC-01: Get on a host with no recorded profile returns a zero Profile +// (both dimensions unknown) and no error. +func TestGet_AbsentIsZeroNoError(t *testing.T) { + t.Run("system-connection-profile/AC-01", func(t *testing.T) { + store, pool := freshStore(t) + hostID := seedHost(t, pool) + got, err := store.Get(context.Background(), hostID) + if err != nil { + t.Fatalf("Get: %v", err) + } + if got.SSHAuthMethod != AuthUnknown || got.SudoMode != SudoUnknown { + t.Errorf("absent profile = %+v, want zero", got) + } + }) +} + +// @ac AC-02 +// AC-02: recording one dimension never clobbers the other (COALESCE +// upsert), and re-recording overwrites that dimension. +func TestRecord_PartialUpsertPreservesOtherDimension(t *testing.T) { + t.Run("system-connection-profile/AC-02", func(t *testing.T) { + store, pool := freshStore(t) + ctx := context.Background() + hostID := seedHost(t, pool) + + // Record SSH auth only; sudo stays unknown. + if err := store.RecordSSHAuth(ctx, hostID, AuthKey); err != nil { + t.Fatalf("RecordSSHAuth: %v", err) + } + got, _ := store.Get(ctx, hostID) + if got.SSHAuthMethod != AuthKey || got.SudoMode != SudoUnknown { + t.Fatalf("after RecordSSHAuth = %+v, want {key, unknown}", got) + } + + // Record sudo only; SSH auth must be preserved. + if err := store.RecordSudoMode(ctx, hostID, SudoPassword); err != nil { + t.Fatalf("RecordSudoMode: %v", err) + } + got, _ = store.Get(ctx, hostID) + if got.SSHAuthMethod != AuthKey || got.SudoMode != SudoPassword { + t.Fatalf("after RecordSudoMode = %+v, want {key, password}", got) + } + + // Overwrite a dimension (host reconfigured key->password). + if err := store.RecordSSHAuth(ctx, hostID, AuthPassword); err != nil { + t.Fatalf("RecordSSHAuth overwrite: %v", err) + } + got, _ = store.Get(ctx, hostID) + if got.SSHAuthMethod != AuthPassword || got.SudoMode != SudoPassword { + t.Fatalf("after overwrite = %+v, want {password, password}", got) + } + }) +} + +// @ac AC-03 +// AC-03: recording an unknown value is a no-op (callers may call +// unconditionally) and never creates a row. +func TestRecord_UnknownIsNoOp(t *testing.T) { + t.Run("system-connection-profile/AC-03", func(t *testing.T) { + store, pool := freshStore(t) + ctx := context.Background() + hostID := seedHost(t, pool) + if err := store.RecordSSHAuth(ctx, hostID, AuthUnknown); err != nil { + t.Fatalf("RecordSSHAuth(unknown): %v", err) + } + if err := store.RecordSudoMode(ctx, hostID, SudoUnknown); err != nil { + t.Fatalf("RecordSudoMode(unknown): %v", err) + } + var n int + _ = pool.QueryRow(ctx, `SELECT count(*) FROM host_connection_profile WHERE host_id = $1`, hostID).Scan(&n) + if n != 0 { + t.Errorf("unknown records created %d rows, want 0", n) + } + }) +} diff --git a/internal/db/migrations/0035_host_connection_profile.sql b/internal/db/migrations/0035_host_connection_profile.sql new file mode 100644 index 00000000..177a7f9b --- /dev/null +++ b/internal/db/migrations/0035_host_connection_profile.sql @@ -0,0 +1,41 @@ +-- 0035_host_connection_profile.sql +-- +-- Per-host "last known good" SSH connection profile. OpenWatch talks to a +-- managed host from four code paths (liveness privilege probe, OS +-- discovery, OS intelligence collection, and the compliance scan). Each +-- has to decide, every time it connects, (a) which SSH auth method to +-- offer first and (b) how to escalate to root. Without memory, every +-- connection re-discovers this: it offers the public key even to a host +-- that only accepts the password (a failed publickey attempt that counts +-- against MaxAuthTries and can trip fail2ban), and it runs `sudo -n` even +-- on a host known to need a sudo password (a wasted round-trip before the +-- `sudo -S` retry). +-- +-- This table records what actually worked last time so each path can lead +-- with the known-good choice. It is a hint, never a lock: callers still +-- fall back to the other methods if the recorded one fails (keys rotate, +-- sudoers change) and rewrite the row when the working choice changes, so +-- a stale hint self-heals on the next connection. +-- +-- One row per host, UPSERTed by whichever path last connected. Columns are +-- nullable: a value is absent until that dimension has been observed once. + +-- +goose Up +CREATE TABLE host_connection_profile ( + host_id UUID PRIMARY KEY REFERENCES hosts(id) ON DELETE CASCADE, + + -- The SSH auth method that last authenticated successfully. Drives the + -- order auth methods are offered to crypto/ssh on the next dial. + ssh_auth_method TEXT CHECK (ssh_auth_method IN ('key', 'password')), + + -- The privilege mode that last reached root successfully: + -- 'root' — the login user is already root; no sudo wrapping. + -- 'nopasswd' — `sudo -n` succeeded (NOPASSWD sudoers). + -- 'password' — `sudo -S` with the credential password succeeded. + sudo_mode TEXT CHECK (sudo_mode IN ('root', 'nopasswd', 'password')), + + updated_at TIMESTAMPTZ NOT NULL DEFAULT now() +); + +-- +goose Down +DROP TABLE IF EXISTS host_connection_profile; diff --git a/internal/kensa/connprofile_wrap_test.go b/internal/kensa/connprofile_wrap_test.go new file mode 100644 index 00000000..3f5f761d --- /dev/null +++ b/internal/kensa/connprofile_wrap_test.go @@ -0,0 +1,48 @@ +// @spec system-connection-profile +// +// AC traceability (this file): +// +// AC-05 TestWrap_BySudoMode + +package kensa + +import ( + "strings" + "testing" + + "github.com/Hanalyx/openwatch/internal/connprofile" +) + +// @ac AC-05 +// AC-05: the scan transport wraps a command per the connection's learned +// sudo mode — no sudo verbatim, NOPASSWD via `sudo -n sh -c`, and password +// via `sudo -S -p ” sh -c` with the credential password on stdin (never +// in the command line / argv). +func TestWrap_BySudoMode(t *testing.T) { + t.Run("system-connection-profile/AC-05", func(t *testing.T) { + // no sudo -> verbatim, no stdin. + if line, stdin := (&sshTransport{sudo: false}).wrap("id"); line != "id" || stdin != nil { + t.Errorf("no-sudo wrap = %q (stdin=%v)", line, stdin) + } + // NOPASSWD -> sudo -n, no stdin. + if line, stdin := (&sshTransport{sudo: true, mode: connprofile.SudoNopasswd}).wrap("id"); line != `sudo -n sh -c 'id'` || stdin != nil { + t.Errorf("nopasswd wrap = %q (stdin=%v)", line, stdin) + } + // password -> sudo -S -p '' with the password on stdin, never argv. + pw := &sshTransport{sudo: true, mode: connprofile.SudoPassword, password: "p@ss"} + line, stdin := pw.wrap("id") + if line != `sudo -S -p '' sh -c 'id'` { + t.Errorf("password wrap = %q", line) + } + if string(stdin) != "p@ss\n" { + t.Errorf("stdin = %q, want password+newline", stdin) + } + if strings.Contains(line, "p@ss") { + t.Error("password must not appear in the command line") + } + // unknown mode degrades to sudo -n (historical behaviour). + if line, _ := (&sshTransport{sudo: true, mode: connprofile.SudoUnknown}).wrap("id"); line != `sudo -n sh -c 'id'` { + t.Errorf("unknown-mode wrap = %q, want sudo -n", line) + } + }) +} diff --git a/internal/kensa/scanfunc.go b/internal/kensa/scanfunc.go index d2cf7402..161d6905 100644 --- a/internal/kensa/scanfunc.go +++ b/internal/kensa/scanfunc.go @@ -76,6 +76,11 @@ type ScanFuncDeps struct { // changes, so a Settings edit applies to the NEXT scan without a // restart. Nil keeps the boot-loaded corpus (built-in defaults). Variables func(ctx context.Context) (map[string]string, error) + // Profiles is the per-host connection memory (nil disables + // learning). The scan transport reads it to lead with the host's + // known-good SSH auth method + sudo mode, and writes back what it + // learns. *connprofile.Store in production. + Profiles ConnProfile } // NewProductionScanFunc loads the rule corpus once at construction @@ -92,9 +97,10 @@ func NewProductionScanFunc(deps ScanFuncDeps) (ScanFunc, error) { } corpus := &corpusCache{rules: rules, dir: deps.RulesDir} factory := &TransportFactory{ - Resolve: deps.Credentials.Resolve, - Mode: deps.HostKeyMode, - Store: deps.KnownHosts, + Resolve: deps.Credentials.Resolve, + Mode: deps.HostKeyMode, + Store: deps.KnownHosts, + Profiles: deps.Profiles, } svc, err := newScanService(factory) if err != nil { diff --git a/internal/kensa/transport.go b/internal/kensa/transport.go index dddfdc26..1b205cc4 100644 --- a/internal/kensa/transport.go +++ b/internal/kensa/transport.go @@ -30,6 +30,7 @@ import ( "github.com/google/uuid" cryptossh "golang.org/x/crypto/ssh" + "github.com/Hanalyx/openwatch/internal/connprofile" "github.com/Hanalyx/openwatch/internal/credential" owssh "github.com/Hanalyx/openwatch/internal/ssh" ) @@ -49,6 +50,17 @@ var ( // Production wires credential.Service.Resolve; tests inject fakes. type CredentialResolver func(ctx context.Context, hostID uuid.UUID) (*credential.Credential, error) +// ConnProfile is the per-host connection memory the scan transport reads +// to lead with the known-good SSH auth method + sudo mode, and writes when +// it learns (or re-learns) them. nil disables learning — the transport +// then behaves statelessly (key-first auth, probe sudo every connection). +// *connprofile.Store satisfies it; tests pass a fake or nil. +type ConnProfile interface { + Get(ctx context.Context, hostID uuid.UUID) (connprofile.Profile, error) + RecordSSHAuth(ctx context.Context, hostID uuid.UUID, m connprofile.SSHAuthMethod) error + RecordSudoMode(ctx context.Context, hostID uuid.UUID, m connprofile.SudoMode) error +} + // TransportFactory implements kensa api.TransportFactory for the // long-lived Kensa instance. One factory serves every scan: Connect // resolves the target host's credential per call, keyed by the host id @@ -62,6 +74,8 @@ type TransportFactory struct { Mode owssh.Mode // Store is the known-hosts store backing Mode. Store owssh.KnownHostsStore + // Profiles is the per-host connection memory (nil disables learning). + Profiles ConnProfile } // Connect resolves the host's credential and dials @@ -87,15 +101,101 @@ func (f *TransportFactory) Connect(ctx context.Context, host kensaapi.HostConfig if port == 0 { port = 22 } + + // Lead with the host's recorded SSH auth method + sudo mode so the + // common case is one publickey-or-password attempt and one sudo form, + // not a doomed key attempt or a wasted `sudo -n` round-trip. + var prefer string + var knownSudo connprofile.SudoMode + if f.Profiles != nil { + if p, perr := f.Profiles.Get(ctx, hostID); perr == nil { + prefer = sshPrefer(p.SSHAuthMethod) + knownSudo = p.SudoMode + } + } + + var observed string client, err := owssh.Dial(ctx, host.Hostname, port, cred, owssh.DialOptions{ - Mode: f.Mode, - Store: f.Store, - Timeout: owssh.DefaultDialTimeout, + Mode: f.Mode, + Store: f.Store, + Timeout: owssh.DefaultDialTimeout, + PreferAuth: prefer, + ObservedAuth: &observed, }) if err != nil { return nil, fmt.Errorf("kensa transport: dial %s: %w", host.Hostname, err) } - return &sshTransport{client: client, sudo: sudo}, nil + if f.Profiles != nil && observed != "" { + _ = f.Profiles.RecordSSHAuth(ctx, hostID, connprofile.SSHAuthMethod(observed)) + } + + t := &sshTransport{client: client, sudo: sudo, password: cred.Password} + + // Decide how to reach root, once per connection, and reuse it for + // every command. We cannot infer "sudo refused" from a real check's + // non-zero exit (checks branch on exit codes), so a dedicated `true` + // sentinel removes the ambiguity. The recorded mode only picks which + // form to try first — the probe still confirms it, so a stale hint + // (sudoers changed) self-heals. + switch { + case sudo: + t.mode = probeSudoMode(ctx, client, cred.Password, knownSudo) + case host.Sudo: // requested, but the login user is already root + t.mode = connprofile.SudoRoot + default: // no escalation requested — nothing to learn + t.mode = connprofile.SudoUnknown + } + if f.Profiles != nil && t.mode != connprofile.SudoUnknown && t.mode != knownSudo { + _ = f.Profiles.RecordSudoMode(ctx, hostID, t.mode) + } + return t, nil +} + +// sshPrefer maps a recorded SSH auth method to the ssh dial-layer +// preference token. Unknown -> "" (historical key-first order). +func sshPrefer(m connprofile.SSHAuthMethod) string { + switch m { + case connprofile.AuthKey: + return owssh.PreferKey + case connprofile.AuthPassword: + return owssh.PreferPassword + } + return "" +} + +// probeSudoMode determines how the connection reaches root by running the +// innocuous `true` under each sudo form, leading with `prefer` so a host +// with a recorded mode usually needs a single round-trip. Returns +// SudoUnknown when neither form works (no NOPASSWD and no usable password) +// — the transport then degrades to `sudo -n`, exactly as before. +func probeSudoMode(ctx context.Context, client *cryptossh.Client, password string, prefer connprofile.SudoMode) connprofile.SudoMode { + tryN := func() bool { + res, err := runRaw(ctx, client, "sudo -n true", nil) + return err == nil && res != nil && res.ExitCode == 0 + } + tryS := func() bool { + if password == "" { + return false + } + res, err := runRaw(ctx, client, "sudo -S -p '' true", []byte(password+"\n")) + return err == nil && res != nil && res.ExitCode == 0 + } + if prefer == connprofile.SudoPassword { + if tryS() { + return connprofile.SudoPassword + } + if tryN() { + return connprofile.SudoNopasswd + } + return connprofile.SudoUnknown + } + if tryN() { + return connprofile.SudoNopasswd + } + if tryS() { + return connprofile.SudoPassword + } + return connprofile.SudoUnknown } // effectiveCredAndSudo applies the per-host username override @@ -113,17 +213,53 @@ func effectiveCredAndSudo(cred *credential.Credential, host kensaapi.HostConfig) // sshTransport is one live SSH connection; each Run opens a fresh // session on it (sessions are cheap; the TCP+handshake is shared). +// +// mode + password are set once at Connect and read-only thereafter, so +// concurrent Run calls (Kensa may parallelize rule checks on one host) +// need no synchronization. type sshTransport struct { - client *cryptossh.Client - sudo bool + client *cryptossh.Client + sudo bool + password string + mode connprofile.SudoMode } -// Run executes cmd on the host. A non-zero remote exit code is a valid -// *CommandResult (checks branch on exit codes); only transport-level -// failures (session open, missing exit status, ctx cancellation) -// return a non-nil error. Spec AC-20. +// Run executes cmd on the host, wrapping it for privilege escalation per +// the connection's determined sudo mode. A non-zero remote exit code is a +// valid *CommandResult (checks branch on exit codes); only transport-level +// failures (session open, missing exit status, ctx cancellation) return a +// non-nil error. Spec AC-19, AC-20. func (t *sshTransport) Run(ctx context.Context, cmd string) (*kensaapi.CommandResult, error) { - sess, err := t.client.NewSession() + line, stdin := t.wrap(cmd) + return runRaw(ctx, t.client, line, stdin) +} + +// wrap renders the remote command line and the optional stdin payload for +// the connection's privilege mode: +// - no sudo (root login, or escalation not requested): command verbatim. +// - password sudo: `sudo -S -p ” sh -c ''` with the credential +// password (newline-terminated) on stdin. No `-k`: the scan issues +// many commands per host, so we let sudo's own timestamp cache short- +// circuit when it can and simply re-supply the password otherwise — +// unlike the single-shot liveness/discovery probes, which `-k` to fail +// a stale wrong password fast. +// - otherwise (nopasswd / unknown): `sudo -n sh -c ''`. On unknown +// this is the historical degrade-gracefully behaviour. +func (t *sshTransport) wrap(cmd string) (line string, stdin []byte) { + if !t.sudo { + return cmd, nil + } + if t.mode == connprofile.SudoPassword { + return "sudo -S -p '' sh -c '" + escapeSingleQuotes(cmd) + "'", []byte(t.password + "\n") + } + return "sudo -n sh -c '" + escapeSingleQuotes(cmd) + "'", nil +} + +// runRaw executes line on a fresh session of client, optionally feeding +// stdin (the sudo -S password) to the remote process. Shared by Run and +// the connection-time sudo probe. +func runRaw(ctx context.Context, client *cryptossh.Client, line string, stdin []byte) (*kensaapi.CommandResult, error) { + sess, err := client.NewSession() if err != nil { return nil, fmt.Errorf("kensa transport: new session: %w", err) } @@ -132,9 +268,12 @@ func (t *sshTransport) Run(ctx context.Context, cmd string) (*kensaapi.CommandRe var stdout, stderr bytes.Buffer sess.Stdout = &stdout sess.Stderr = &stderr + if stdin != nil { + sess.Stdin = bytes.NewReader(stdin) + } start := time.Now() - if err := sess.Start(commandLine(cmd, t.sudo)); err != nil { + if err := sess.Start(line); err != nil { return nil, fmt.Errorf("kensa transport: start: %w", err) } @@ -187,16 +326,11 @@ func (t *sshTransport) ControlChannelSensitive() bool { return false } // Close terminates the underlying SSH connection. func (t *sshTransport) Close() error { return t.client.Close() } -// commandLine renders the remote command per the api.Transport -// contract: with sudo, wrap as sudo -n sh -c with the command -// single-quoted and embedded single quotes escaped (quote, backslash -// quote, quote); without sudo, the command passes through unmodified. -// Spec AC-19. -func commandLine(cmd string, sudo bool) string { - if !sudo { - return cmd - } - return "sudo -n sh -c '" + strings.ReplaceAll(cmd, "'", `'\''`) + "'" +// escapeSingleQuotes renders cmd safe to embed inside a single-quoted +// `sh -c '...'` wrapper: each embedded single quote becomes the +// quote / backslash-quote / quote idiom. Spec AC-19. +func escapeSingleQuotes(cmd string) string { + return strings.ReplaceAll(cmd, "'", `'\''`) } // trimOneTrailingNewline removes exactly one trailing newline (LF or diff --git a/internal/kensa/transport_test.go b/internal/kensa/transport_test.go index eb7b1bc5..8124ccb9 100644 --- a/internal/kensa/transport_test.go +++ b/internal/kensa/transport_test.go @@ -14,25 +14,41 @@ import ( "path/filepath" "strings" "testing" + + "github.com/Hanalyx/openwatch/internal/connprofile" ) // @ac AC-19 func TestCommandLine_SudoWrapsAndQuotes(t *testing.T) { t.Run("system-kensa-executor/AC-19", func(t *testing.T) { - // Sudo disabled: pass-through, byte for byte. - if got := commandLine(`grep -q "^x" /etc/f`, false); got != `grep -q "^x" /etc/f` { - t.Errorf("non-sudo command modified: %q", got) + // Sudo disabled: pass-through, byte for byte, no stdin. + noSudo := &sshTransport{sudo: false} + if line, stdin := noSudo.wrap(`grep -q "^x" /etc/f`); line != `grep -q "^x" /etc/f` || stdin != nil { + t.Errorf("non-sudo wrap = %q (stdin=%v), want passthrough no-stdin", line, stdin) } - // Sudo enabled: sudo -n sh -c wrapping per the api.Transport - // contract. - if got, want := commandLine("systemctl is-active sshd", true), - `sudo -n sh -c 'systemctl is-active sshd'`; got != want { - t.Errorf("sudo wrap = %q, want %q", got, want) + + // NOPASSWD mode: sudo -n sh -c wrapping, no stdin. + nopasswd := &sshTransport{sudo: true, mode: connprofile.SudoNopasswd} + if line, stdin := nopasswd.wrap("systemctl is-active sshd"); line != `sudo -n sh -c 'systemctl is-active sshd'` || stdin != nil { + t.Errorf("nopasswd wrap = %q (stdin=%v)", line, stdin) } // Embedded single quotes survive via the '\'' idiom. - if got, want := commandLine(`echo it's`, true), - `sudo -n sh -c 'echo it'\''s'`; got != want { - t.Errorf("quote escape = %q, want %q", got, want) + if line, _ := nopasswd.wrap(`echo it's`); line != `sudo -n sh -c 'echo it'\''s'` { + t.Errorf("quote escape = %q", line) + } + + // Password mode: sudo -S -p '' sh -c wrapping, password (newline + // terminated) on stdin, never in the command line. + pw := &sshTransport{sudo: true, mode: connprofile.SudoPassword, password: "s3cr3t"} + line, stdin := pw.wrap("cat /etc/shadow") + if line != `sudo -S -p '' sh -c 'cat /etc/shadow'` { + t.Errorf("password wrap line = %q", line) + } + if string(stdin) != "s3cr3t\n" { + t.Errorf("password stdin = %q, want %q", stdin, "s3cr3t\n") + } + if strings.Contains(line, "s3cr3t") { + t.Error("password leaked into the command line") } }) } diff --git a/internal/ssh/authorder.go b/internal/ssh/authorder.go new file mode 100644 index 00000000..1a1a8303 --- /dev/null +++ b/internal/ssh/authorder.go @@ -0,0 +1,94 @@ +package ssh + +import ( + "sync" + + "golang.org/x/crypto/ssh" + + "github.com/Hanalyx/openwatch/internal/credential" +) + +// Auth-method preference tokens. Plain strings (not a typed enum) so the +// ssh package stays decoupled from connprofile — callers translate. +const ( + PreferKey = "key" + PreferPassword = "password" +) + +// authObserver records which auth method crypto/ssh last attempted during +// a handshake. Because the client stops at the first method the server +// accepts, the last-attempted method after a SUCCESSFUL handshake is the +// one that authenticated. Concurrency-safe: a single ClientConfig is only +// used by one handshake, but the callbacks may fire from the handshake +// goroutine. +type authObserver struct { + mu sync.Mutex + last string +} + +func (o *authObserver) note(m string) { + o.mu.Lock() + o.last = m + o.mu.Unlock() +} + +// Last returns the method that authenticated ("key" | "password"), or "" +// if no method-bearing callback fired (e.g. no auth attempted). +func (o *authObserver) Last() string { + o.mu.Lock() + defer o.mu.Unlock() + return o.last +} + +// orderedAuthMethods builds the crypto/ssh auth-method list from cred, +// leading with `prefer` when that method's material is present, and wraps +// each method in a callback that records its attempt on obs. +// +// Both available methods are ALWAYS offered — `prefer` controls order, not +// inclusion. A stale preference (e.g. the host's key was rotated out) still +// falls back to the other method within the same handshake; the observer +// then records the method that actually worked so the next connection +// leads with it. This is the "preference, not a lock" property: the hint +// optimizes the common case and self-heals when the host changes. +func orderedAuthMethods(cred *credential.Credential, prefer string, obs *authObserver) ([]ssh.AuthMethod, error) { + var keyM, pwM ssh.AuthMethod + + if cred.PrivateKey != "" { + signer, err := parseSigner([]byte(cred.PrivateKey), cred.PrivateKeyPassphrase) + if err != nil { + return nil, err + } + keyM = ssh.PublicKeysCallback(func() ([]ssh.Signer, error) { + obs.note(PreferKey) + return []ssh.Signer{signer}, nil + }) + } + if cred.Password != "" { + pw := cred.Password + pwM = ssh.PasswordCallback(func() (string, error) { + obs.note(PreferPassword) + return pw, nil + }) + } + + out := make([]ssh.AuthMethod, 0, 2) + if prefer == PreferPassword { + // Lead with password, then key as fallback. + if pwM != nil { + out = append(out, pwM) + } + if keyM != nil { + out = append(out, keyM) + } + return out, nil + } + // Default order (prefer == "key" or unset): key first, then password. + // Matches the historical pre-learning behaviour. + if keyM != nil { + out = append(out, keyM) + } + if pwM != nil { + out = append(out, pwM) + } + return out, nil +} diff --git a/internal/ssh/authorder_test.go b/internal/ssh/authorder_test.go new file mode 100644 index 00000000..8b39384f --- /dev/null +++ b/internal/ssh/authorder_test.go @@ -0,0 +1,59 @@ +// @spec system-connection-profile +// +// AC traceability (this file): +// +// AC-04 TestOrderedAuthMethods_PreferenceNotExclusion / TestAuthObserver_RecordsLast + +package ssh + +import "testing" + +// @ac AC-04 +// AC-04: every available auth method is offered (preference orders, does +// not exclude), so a stale hint still falls back within the one handshake. +func TestOrderedAuthMethods_PreferenceNotExclusion(t *testing.T) { + t.Run("system-connection-profile/AC-04", func(t *testing.T) { + both, _ := generateEd25519CredAndAuthKey(t) + both.Password = "pw" // promote to a key+password credential + + // Both methods offered regardless of which is preferred. + for _, prefer := range []string{"", PreferKey, PreferPassword} { + m, err := orderedAuthMethods(both, prefer, &authObserver{}) + if err != nil { + t.Fatalf("prefer %q: %v", prefer, err) + } + if len(m) != 2 { + t.Errorf("prefer %q: %d methods, want 2 (preference is order, not exclusion)", prefer, len(m)) + } + } + + // Key-only and password-only creds offer exactly their one method. + keyOnly := *both + keyOnly.Password = "" + if m, _ := orderedAuthMethods(&keyOnly, "", &authObserver{}); len(m) != 1 { + t.Errorf("key-only: %d methods, want 1", len(m)) + } + pwOnly := *both + pwOnly.PrivateKey = "" + if m, _ := orderedAuthMethods(&pwOnly, "", &authObserver{}); len(m) != 1 { + t.Errorf("password-only: %d methods, want 1", len(m)) + } + }) +} + +// @ac AC-04 +// AC-04 (observation): the observer reports the last method attempted, +// which after a successful handshake is the one that authenticated. +func TestAuthObserver_RecordsLast(t *testing.T) { + t.Run("system-connection-profile/AC-04", func(t *testing.T) { + o := &authObserver{} + if o.Last() != "" { + t.Errorf("fresh observer Last = %q, want empty", o.Last()) + } + o.note(PreferKey) + o.note(PreferPassword) // key rejected, password accepted -> last wins + if o.Last() != PreferPassword { + t.Errorf("Last = %q, want %q", o.Last(), PreferPassword) + } + }) +} diff --git a/internal/ssh/dial.go b/internal/ssh/dial.go index bbbbb772..5c1e8248 100644 --- a/internal/ssh/dial.go +++ b/internal/ssh/dial.go @@ -37,6 +37,20 @@ type DialOptions struct { Mode Mode Store KnownHostsStore Timeout time.Duration + + // PreferAuth, when "key" or "password", offers that auth method + // FIRST (the other is still offered as fallback). Empty preserves the + // historical key-first order. Callers set this from the host's + // recorded connection profile to avoid a doomed publickey attempt on + // a password-only host (which counts against MaxAuthTries / trips + // fail2ban). + PreferAuth string + + // ObservedAuth, when non-nil, receives the auth method that actually + // authenticated ("key" | "password") after a successful dial. Callers + // persist it so the next connection leads with it. Untouched on dial + // failure. + ObservedAuth *string } // netDial is the network-level dial function. Production uses @@ -61,7 +75,11 @@ func Dial(ctx context.Context, host string, port int, cred *credential.Credentia opts.Store = NewMemoryStore() } - authMethods, err := authMethodsFor(cred) + if cred == nil { + return nil, ErrNoAuthMethod + } + obs := &authObserver{} + authMethods, err := orderedAuthMethods(cred, opts.PreferAuth, obs) if err != nil { return nil, err } @@ -107,28 +125,12 @@ func Dial(ctx context.Context, host string, port int, cred *credential.Credentia _ = conn.Close() return nil, classifyHandshakeErr(err) } - return ssh.NewClient(sshConn, chans, reqs), nil -} - -// authMethodsFor produces the ssh.AuthMethod list from a resolved -// credential. Public-key auth is preferred when available; password -// is added if cred.AuthMethod allows it. -func authMethodsFor(cred *credential.Credential) ([]ssh.AuthMethod, error) { - if cred == nil { - return nil, ErrNoAuthMethod + // Handshake succeeded: the last method the observer saw is the one + // that authenticated. Report it so the caller can persist the hint. + if opts.ObservedAuth != nil { + *opts.ObservedAuth = obs.Last() } - out := []ssh.AuthMethod{} - if cred.PrivateKey != "" { - signer, err := parseSigner([]byte(cred.PrivateKey), cred.PrivateKeyPassphrase) - if err != nil { - return nil, err - } - out = append(out, ssh.PublicKeys(signer)) - } - if cred.Password != "" { - out = append(out, ssh.Password(cred.Password)) - } - return out, nil + return ssh.NewClient(sshConn, chans, reqs), nil } // classifyHandshakeErr maps ssh.NewClientConn errors to the package's diff --git a/internal/systemconfig/security_test.go b/internal/systemconfig/security_test.go new file mode 100644 index 00000000..778078e1 --- /dev/null +++ b/internal/systemconfig/security_test.go @@ -0,0 +1,21 @@ +// @spec system-connection-profile +// +// AC traceability (this file): +// +// AC-06 TestDefaultSecurity_FallbackOnByDefault + +package systemconfig + +import "testing" + +// @ac AC-06 +// AC-06: the sudo -S password fallback is ON by default. OpenWatch +// supports the full SSH matrix out of the box; the flag is a kill-switch, +// not an opt-in. +func TestDefaultSecurity_FallbackOnByDefault(t *testing.T) { + t.Run("system-connection-profile/AC-06", func(t *testing.T) { + if !DefaultSecurity().AllowCredentialSudoPassword { + t.Error("DefaultSecurity().AllowCredentialSudoPassword = false, want true (default-on kill-switch)") + } + }) +} diff --git a/internal/systemconfig/types.go b/internal/systemconfig/types.go index f47b3b07..a334afd6 100644 --- a/internal/systemconfig/types.go +++ b/internal/systemconfig/types.go @@ -111,18 +111,22 @@ var ErrInvalidConfig = errors.New("systemconfig: invalid config") // Spec: system-ssh-connectivity v1.1.0 C-09..C-12. // // AllowCredentialSudoPassword is the policy knob that gates the sudo -S -// password fallback in the SSH dial layer. Defaults to false (opt-in). -// When false, the collector and discovery probes behave exactly as in -// v1.0.0 — every sudo command goes through `sudo -n` and degrades -// gracefully on NOPASSWD-not-configured hosts. +// password fallback in the SSH dial layer. Defaults to TRUE: OpenWatch +// supports the full SSH matrix out of the box (key or password auth, +// NOPASSWD or password sudo), so a host that needs a sudo password is +// reachable on a fresh install with no extra configuration. The flag is +// retained as a kill-switch — a site that forbids feeding credential +// passwords to remote sudo can set it false, and every privilege path +// degrades to `sudo -n` only. type SecurityConfig struct { AllowCredentialSudoPassword bool `json:"allow_credential_sudo_password"` } -// DefaultSecurity returns the baked-in defaults. Fallback is OFF. +// DefaultSecurity returns the baked-in defaults. The sudo -S password +// fallback is ON by default (kill-switch, not opt-in). func DefaultSecurity() SecurityConfig { return SecurityConfig{ - AllowCredentialSudoPassword: false, + AllowCredentialSudoPassword: true, } } diff --git a/specs/system/connection-profile.spec.yaml b/specs/system/connection-profile.spec.yaml new file mode 100644 index 00000000..a66d5edc --- /dev/null +++ b/specs/system/connection-profile.spec.yaml @@ -0,0 +1,88 @@ +spec: + id: system-connection-profile + title: Per-host SSH connection learning (auth method + sudo mode) + version: "1.0.0" + status: approved + tier: 2 + + context: + system: openwatch-go + feature: SSH connection memory shared by all host-communication paths + description: > + OpenWatch talks to a managed host from several paths (liveness + privilege probe, OS discovery, OS intelligence collection, and the + compliance scan). Each must decide, per connection, which SSH auth + method to offer first (key or password) and how to escalate to root + (no sudo, NOPASSWD via `sudo -n`, or a sudo password via `sudo -S`). + Without memory every connection re-discovers this: it offers an + unauthorized key (a failed publickey attempt that counts against + MaxAuthTries and can trip fail2ban) and runs a doomed `sudo -n` on a + host that needs a password. This spec records what last worked, per + host, so each path leads with the known-good choice — a hint, never a + lock: callers still fall back and rewrite the record when the host + changes, so a stale hint self-heals. + related_specs: + - system-ssh-connectivity + - system-kensa-executor + + objective: + summary: > + Support the full SSH matrix on every path — key OR password auth, + NOPASSWD OR password sudo — out of the box, and remember per host + which combination works so repeat connections avoid wasted/failed + attempts. + scope: + includes: + - host_connection_profile table + connprofile store (Get / Record) + - Dial-layer auth-method ordering (PreferAuth) + observation (ObservedAuth) + - Compliance-scan sudo -S support with per-connection sudo-mode probe + learning + - Default-on sudo -S password fallback (kill-switch retained) + excludes: + - Settings UI/API to toggle the kill-switch (DB-only for now) + - Extending auth/sudo learning to the discovery, intelligence, and + liveness paths (the substrate + dial mechanism land here; wiring + those paths is a follow-up) + + constraints: + - id: C-01 + description: The connection profile MUST be a hint, not a lock — a recorded value only orders the attempts; the path MUST still fall back to the other method and overwrite the record when the working choice changes. + type: technical + enforcement: error + - id: C-02 + description: Recording one dimension (ssh_auth_method or sudo_mode) MUST NOT clobber the other; an unknown value MUST be a no-op. + type: technical + enforcement: error + - id: C-03 + description: The sudo password MUST be delivered to the remote sudo via the session stdin pipe, never embedded in the command line / argv. + type: security + enforcement: error + - id: C-04 + description: The compliance scan MUST determine the sudo mode once per connection via an innocuous `true` sentinel (not by inferring refusal from a real check's non-zero exit) and reuse it for every command on that connection. + type: technical + enforcement: error + + acceptance_criteria: + - id: AC-01 + description: connprofile.Store.Get on a host with no recorded profile returns a zero Profile (both dimensions unknown) and a nil error. + priority: high + references_constraints: [C-02] + - id: AC-02 + description: Recording one dimension preserves the other (COALESCE upsert), and re-recording a dimension overwrites it. + priority: critical + references_constraints: [C-01, C-02] + - id: AC-03 + description: Recording an unknown SSH auth method or sudo mode is a no-op and creates no row. + priority: high + references_constraints: [C-02] + - id: AC-04 + description: ssh.Dial offers every available auth method (preference is order, not exclusion); DialOptions.PreferAuth leads with the named method and DialOptions.ObservedAuth reports the method that actually authenticated after a successful dial. + priority: critical + references_constraints: [C-01] + - id: AC-05 + description: The scan transport wraps a command per the connection's sudo mode — NOPASSWD as `sudo -n sh -c ''` with no stdin, password as `sudo -S -p '' sh -c ''` with the credential password (newline-terminated) on stdin and never in the command line, and no-sudo verbatim. + priority: critical + references_constraints: [C-03, C-04] + - id: AC-06 + description: The sudo -S password fallback is ON by default (DefaultSecurity().AllowCredentialSudoPassword is true), retained as a kill-switch rather than an opt-in. + priority: high + references_constraints: [C-01] diff --git a/specs/system/kensa-executor.spec.yaml b/specs/system/kensa-executor.spec.yaml index 0bd4fa8f..55bd1eb6 100644 --- a/specs/system/kensa-executor.spec.yaml +++ b/specs/system/kensa-executor.spec.yaml @@ -139,7 +139,7 @@ spec: type: technical enforcement: error - id: C-15 - description: The package MUST provide an in-memory SSH transport implementing kensa api.Transport over internal/ssh.Dial (x/crypto, matching every other OpenWatch host-communication path). The TransportFactory resolves the target host's credential per Connect via an injected CredentialResolver, keyed by the host id carried in HostConfig.FleetID; HostConfig.User, when set, overrides the credential username (hosts.username); sudo is downgraded when the effective user is root. HostConfig.KeyPath MUST be ignored — authentication uses the in-memory *credential.Credential only; no key material touches disk. Run MUST wrap the command in `sudo -n sh -c ''` (single-quote-escaped) when sudo is enabled; a non-zero remote exit code is a valid CommandResult, NOT a transport error. Put and Get MUST return explicit not-implemented errors until remediation needs them (the scan path never calls them); ControlChannelSensitive MUST report false for the scan-only transport + description: The package MUST provide an in-memory SSH transport implementing kensa api.Transport over internal/ssh.Dial (x/crypto, matching every other OpenWatch host-communication path). The TransportFactory resolves the target host's credential per Connect via an injected CredentialResolver, keyed by the host id carried in HostConfig.FleetID; HostConfig.User, when set, overrides the credential username (hosts.username); sudo is downgraded when the effective user is root. HostConfig.KeyPath MUST be ignored — authentication uses the in-memory *credential.Credential only; no key material touches disk. Run MUST wrap the command for privilege escalation when sudo is enabled — `sudo -n sh -c ''` for NOPASSWD or `sudo -S -p '' sh -c ''` with the credential password on stdin (never argv) per the connection's learned sudo mode (system-connection-profile), single-quote-escaped; a non-zero remote exit code is a valid CommandResult, NOT a transport error. Put and Get MUST return explicit not-implemented errors until remediation needs them (the scan path never calls them); ControlChannelSensitive MUST report false for the scan-only transport type: technical enforcement: error - id: C-16 @@ -219,7 +219,7 @@ spec: priority: critical references_constraints: [C-13] - id: AC-19 - description: With sudo enabled, the transport rewrites a command C as `sudo -n sh -c ''` with embedded single quotes escaped via the '\'' idiom (a command containing `it's` survives quoting intact); with sudo disabled the command passes through unmodified. + description: With sudo enabled, the transport rewrites a command C as `sudo -n sh -c ''` (NOPASSWD mode) or, when the connection's learned sudo mode is password, `sudo -S -p '' sh -c ''` with the credential password delivered on stdin and never in argv (see system-connection-profile); embedded single quotes are escaped via the '\'' idiom (a command containing `it's` survives quoting intact); with sudo disabled the command passes through unmodified. priority: critical references_constraints: [C-15] - id: AC-20 diff --git a/specs/system/ssh-connectivity.spec.yaml b/specs/system/ssh-connectivity.spec.yaml index d2d43916..9bd9053f 100644 --- a/specs/system/ssh-connectivity.spec.yaml +++ b/specs/system/ssh-connectivity.spec.yaml @@ -63,7 +63,7 @@ spec: type: technical enforcement: error - id: C-09 - description: 'v1.2.0 — The credential-password sudo fallback applies to EVERY SSH-using subsystem that issues a sudo command, namely: (1) the compliance collector via ssh.RunSudo, (2) the liveness privilege probe in internal/sshprivilege, (3) OS discovery firewall queries in internal/intelligence/discovery. When the initial `sudo -n ` returns non-zero, the call site MAY retry as `sudo -S -k -p '' ` ONLY when ALL of these hold — (a) systemconfig.SecurityConfig.AllowCredentialSudoPassword is true, (b) cred.AuthMethod ∈ {password, both}, (c) cred.Password is non-empty. Any one condition false → no retry; the original sudo -n failure propagates with usedFallback=false. The conditions and the retry shape are IDENTICAL across the three call sites — drift between them is forbidden.' + description: 'v1.2.0 — The credential-password sudo fallback applies to EVERY SSH-using subsystem that issues a sudo command, namely: (1) the compliance collector via ssh.RunSudo, (2) the liveness privilege probe in internal/sshprivilege, (3) OS discovery firewall queries in internal/intelligence/discovery. When the initial `sudo -n ` returns non-zero, the call site MAY retry as `sudo -S -k -p '' ` ONLY when ALL of these hold — (a) systemconfig.SecurityConfig.AllowCredentialSudoPassword is true, (b) cred.AuthMethod ∈ {password, both}, (c) cred.Password is non-empty. Any one condition false → no retry; the original sudo -n failure propagates with usedFallback=false. The conditions and the retry shape are IDENTICAL across the three call sites — drift between them is forbidden. NOTE: the compliance scan path ALSO supports credential-password sudo, but via the per-connection sudo-mode probe defined in system-connection-profile (a once-per-connection `true` sentinel, then `sudo -S -p ''` without `-k` given the high command volume) — it is deliberately NOT part of this inline `sudo -n`→`sudo -S -k` retry shape and is exempt from the identical-shape rule. Separately, AllowCredentialSudoPassword now DEFAULTS to true (a kill-switch, not opt-in); the per-call gating above is unchanged.' type: security enforcement: error - id: C-10 diff --git a/specter.yaml b/specter.yaml index 8017b0b2..07cf6a99 100644 --- a/specter.yaml +++ b/specter.yaml @@ -46,6 +46,7 @@ domains: - api-activity - system-host-inventory - system-ssh-connectivity + - system-connection-profile - api-auth - api-users - api-credentials From 1d522cc2cfd9a8c256382b890e4a17e08387ffa7 Mon Sep 17 00:00:00 2001 From: Remylus Losius Date: Mon, 15 Jun 2026 20:35:08 -0400 Subject: [PATCH 2/2] fix(kensa): gate scan sudo -S on the kill-switch + auth method MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Verification (4-agent review of the prior commit) found the compliance scan path bypassed two gates the collector / liveness / discovery paths all enforce: - HIGH: the scan ignored systemconfig AllowCredentialSudoPassword, so the kill-switch could not disable password-sudo on the busiest path (now default-on, so it was live by default). - MEDIUM: the scan fed any non-empty cred.Password to sudo -S regardless of the credential's auth method. Fix: thread a SudoPasswordPolicy seam through TransportFactory (wired in main.go/worker.go to systemconfig LoadSecurity, mirroring the collector's SudoPolicyLoader) and gate the scan's sudo password via sudoPasswordFor — allowed only when the kill-switch is on AND auth method is password|both. When disallowed the transport gets an empty sudo password, so probeSudoMode never attempts sudo -S and the connection degrades to sudo -n. This gates ONLY the sudo use of the password; SSH password AUTH stays independent (the dial already used the full credential). Also from the review (LOW): - Correct the sshprivilege package doc that wrongly claimed the scan uses ssh.RunSudo with an identical retry shape. - Add the single-factor caveat to the ObservedAuth doc (authObserver / dial.go): Last() is the authenticating method under OpenWatch's alternative-methods model; under true SSH MFA it is only the final factor, never persisted on failure. Spec system-connection-profile gains C-05 + AC-07; sudoPasswordFor unit test covers the gate matrix (kill-switch off, key-only, password, both). --- cmd/openwatch/main.go | 4 ++ cmd/openwatch/worker.go | 4 ++ internal/kensa/connprofile_wrap_test.go | 37 +++++++++++++++++ internal/kensa/scanfunc.go | 7 ++++ internal/kensa/transport.go | 50 ++++++++++++++++++++++- internal/ssh/authorder.go | 17 +++++--- internal/ssh/dial.go | 3 +- internal/sshprivilege/privilege.go | 10 +++-- specs/system/connection-profile.spec.yaml | 8 ++++ 9 files changed, 129 insertions(+), 11 deletions(-) diff --git a/cmd/openwatch/main.go b/cmd/openwatch/main.go index 77017c74..e7201c86 100644 --- a/cmd/openwatch/main.go +++ b/cmd/openwatch/main.go @@ -508,6 +508,10 @@ func cmdServe(cfg *config.Config, _ []string, stdout, stderr *os.File) int { return vars, err }, Profiles: connprofile.NewStore(pool), + Policy: func(ctx context.Context) (bool, error) { + cfg, err := cfgStore.LoadSecurity(ctx) + return cfg.AllowCredentialSudoPassword, err + }, }); scanErr != nil { slog.WarnContext(bootCtx, "kensa scan wiring unavailable — on-demand scans will fail until the kensa-rules package is installed (or OPENWATCH_KENSA_RULES_DIR set)", slog.String("error", scanErr.Error())) diff --git a/cmd/openwatch/worker.go b/cmd/openwatch/worker.go index ad7ce13e..f2396111 100644 --- a/cmd/openwatch/worker.go +++ b/cmd/openwatch/worker.go @@ -185,6 +185,10 @@ func cmdWorker(cfg *config.Config, args []string, stdout, stderr *os.File) int { return vars, err }, Profiles: connprofile.NewStore(pool), + Policy: func(ctx context.Context) (bool, error) { + cfg, err := varStore.LoadSecurity(ctx) + return cfg.AllowCredentialSudoPassword, err + }, }) if err != nil { slog.ErrorContext(bootCtx, "kensa scan wiring failed — is the kensa-rules package installed (or OPENWATCH_KENSA_RULES_DIR set)?", diff --git a/internal/kensa/connprofile_wrap_test.go b/internal/kensa/connprofile_wrap_test.go index 3f5f761d..b36d26ff 100644 --- a/internal/kensa/connprofile_wrap_test.go +++ b/internal/kensa/connprofile_wrap_test.go @@ -3,6 +3,7 @@ // AC traceability (this file): // // AC-05 TestWrap_BySudoMode +// AC-07 TestSudoPasswordFor_GatedByPolicyAndAuthMethod package kensa @@ -11,8 +12,44 @@ import ( "testing" "github.com/Hanalyx/openwatch/internal/connprofile" + "github.com/Hanalyx/openwatch/internal/credential" ) +// @ac AC-07 +// AC-07: the scan's sudo password is gated by the SAME two conditions the +// collector/liveness/discovery paths enforce — the AllowCredentialSudoPassword +// kill-switch AND the credential auth method (password|both). When either +// fails the transport gets no sudo password, so it never attempts sudo -S. +func TestSudoPasswordFor_GatedByPolicyAndAuthMethod(t *testing.T) { + t.Run("system-connection-profile/AC-07", func(t *testing.T) { + pwBoth := &credential.Credential{AuthMethod: credential.AuthBoth, Password: "pw"} + pwOnly := &credential.Credential{AuthMethod: credential.AuthPassword, Password: "pw"} + keyOnly := &credential.Credential{AuthMethod: credential.AuthSSHKey, Password: ""} + // A key credential that somehow carries a password must still be gated out. + keyWithPw := &credential.Credential{AuthMethod: credential.AuthSSHKey, Password: "pw"} + + cases := []struct { + name string + cred *credential.Credential + allowed bool + want string + }{ + {"kill-switch off blocks password+both", pwBoth, false, ""}, + {"kill-switch off blocks password-only", pwOnly, false, ""}, + {"allowed + both -> password", pwBoth, true, "pw"}, + {"allowed + password -> password", pwOnly, true, "pw"}, + {"allowed + ssh_key (no password) -> empty", keyOnly, true, ""}, + {"allowed + ssh_key carrying a password -> empty (auth-method gate)", keyWithPw, true, ""}, + {"nil credential -> empty", nil, true, ""}, + } + for _, c := range cases { + if got := sudoPasswordFor(c.cred, c.allowed); got != c.want { + t.Errorf("%s: sudoPasswordFor = %q, want %q", c.name, got, c.want) + } + } + }) +} + // @ac AC-05 // AC-05: the scan transport wraps a command per the connection's learned // sudo mode — no sudo verbatim, NOPASSWD via `sudo -n sh -c`, and password diff --git a/internal/kensa/scanfunc.go b/internal/kensa/scanfunc.go index 161d6905..8608b326 100644 --- a/internal/kensa/scanfunc.go +++ b/internal/kensa/scanfunc.go @@ -81,6 +81,12 @@ type ScanFuncDeps struct { // known-good SSH auth method + sudo mode, and writes back what it // learns. *connprofile.Store in production. Profiles ConnProfile + // Policy gates whether the credential password may be used for + // sudo -S (the AllowCredentialSudoPassword kill-switch). nil => + // allowed (default-on). Production wires systemconfig LoadSecurity so + // the scan honors the same switch as the collector/liveness/discovery + // paths. + Policy SudoPasswordPolicy } // NewProductionScanFunc loads the rule corpus once at construction @@ -101,6 +107,7 @@ func NewProductionScanFunc(deps ScanFuncDeps) (ScanFunc, error) { Mode: deps.HostKeyMode, Store: deps.KnownHosts, Profiles: deps.Profiles, + Policy: deps.Policy, } svc, err := newScanService(factory) if err != nil { diff --git a/internal/kensa/transport.go b/internal/kensa/transport.go index 1b205cc4..39a8a477 100644 --- a/internal/kensa/transport.go +++ b/internal/kensa/transport.go @@ -61,6 +61,31 @@ type ConnProfile interface { RecordSudoMode(ctx context.Context, hostID uuid.UUID, m connprofile.SudoMode) error } +// SudoPasswordPolicy reports whether the operator permits feeding the +// credential password to remote sudo (systemconfig +// SecurityConfig.AllowCredentialSudoPassword — the kill-switch). It is the +// SAME gate the collector / liveness / discovery paths consult; the scan +// MUST honor it too or the switch silently fails open on the busiest path. +// Returning an error is treated as "allowed" to match LoadSecurity's +// missing-row fallback (default-on). +type SudoPasswordPolicy func(ctx context.Context) (allowed bool, err error) + +// sudoPasswordFor returns the password the transport may use for `sudo -S`, +// or "" when password-sudo is not permitted. It applies the SAME two gates +// the other SSH paths enforce: (1) the kill-switch, (2) the credential's +// auth method must allow password material. Note this gates only the SUDO +// use of the password — SSH password AUTH is independent and still uses +// cred.Password directly in the dial. +func sudoPasswordFor(cred *credential.Credential, allowed bool) string { + if !allowed || cred == nil || cred.Password == "" { + return "" + } + if cred.AuthMethod != credential.AuthPassword && cred.AuthMethod != credential.AuthBoth { + return "" + } + return cred.Password +} + // TransportFactory implements kensa api.TransportFactory for the // long-lived Kensa instance. One factory serves every scan: Connect // resolves the target host's credential per call, keyed by the host id @@ -76,6 +101,11 @@ type TransportFactory struct { Store owssh.KnownHostsStore // Profiles is the per-host connection memory (nil disables learning). Profiles ConnProfile + // Policy gates whether the credential password may be used for sudo -S + // (the AllowCredentialSudoPassword kill-switch). nil => allowed + // (default-on, matching DefaultSecurity); production wires the real + // systemconfig loader so a flipped switch disables scan password-sudo. + Policy SudoPasswordPolicy } // Connect resolves the host's credential and dials @@ -129,7 +159,21 @@ func (f *TransportFactory) Connect(ctx context.Context, host kensaapi.HostConfig _ = f.Profiles.RecordSSHAuth(ctx, hostID, connprofile.SSHAuthMethod(observed)) } - t := &sshTransport{client: client, sudo: sudo, password: cred.Password} + // Gate the SUDO use of the password on the kill-switch + auth method — + // the same two conditions the collector / liveness / discovery paths + // enforce. When disallowed the transport gets no sudo password, so the + // probe never attempts sudo -S and the connection degrades to sudo -n. + // (SSH password AUTH above is unaffected: the dial already used the + // full credential.) + sudoAllowed := true + if f.Policy != nil { + if v, perr := f.Policy(ctx); perr == nil { + sudoAllowed = v + } + } + sudoPassword := sudoPasswordFor(cred, sudoAllowed) + + t := &sshTransport{client: client, sudo: sudo, password: sudoPassword} // Decide how to reach root, once per connection, and reuse it for // every command. We cannot infer "sudo refused" from a real check's @@ -139,7 +183,9 @@ func (f *TransportFactory) Connect(ctx context.Context, host kensaapi.HostConfig // (sudoers changed) self-heals. switch { case sudo: - t.mode = probeSudoMode(ctx, client, cred.Password, knownSudo) + // probe with the GATED password: when password-sudo is disallowed + // sudoPassword is "", so probeSudoMode never attempts sudo -S. + t.mode = probeSudoMode(ctx, client, sudoPassword, knownSudo) case host.Sudo: // requested, but the login user is already root t.mode = connprofile.SudoRoot default: // no escalation requested — nothing to learn diff --git a/internal/ssh/authorder.go b/internal/ssh/authorder.go index 1a1a8303..35168e75 100644 --- a/internal/ssh/authorder.go +++ b/internal/ssh/authorder.go @@ -16,11 +16,18 @@ const ( ) // authObserver records which auth method crypto/ssh last attempted during -// a handshake. Because the client stops at the first method the server -// accepts, the last-attempted method after a SUCCESSFUL handshake is the -// one that authenticated. Concurrency-safe: a single ClientConfig is only -// used by one handshake, but the callbacks may fire from the handshake -// goroutine. +// a handshake. The note fires when a method is ATTEMPTED (its callback is +// invoked), not when it is accepted. For SINGLE-FACTOR auth — OpenWatch's +// model, where key/password/both are ALTERNATIVE methods, never a required +// sequence — the client stops at the first method the server accepts +// (authSuccess), so the last-attempted method after a SUCCESSFUL handshake +// is the one that authenticated. Under true SSH multi-factor (e.g. sshd +// AuthenticationMethods "publickey,password") Last() would record only the +// final factor; OpenWatch does not use SSH MFA, and even then a wrong hint +// would at worst reorder the next dial (both methods stay offered) — never +// a failure, and nothing is persisted on an unsuccessful handshake. +// Concurrency-safe: a single ClientConfig is only used by one handshake, +// but the callbacks may fire from the handshake goroutine. type authObserver struct { mu sync.Mutex last string diff --git a/internal/ssh/dial.go b/internal/ssh/dial.go index 5c1e8248..d03c866d 100644 --- a/internal/ssh/dial.go +++ b/internal/ssh/dial.go @@ -126,7 +126,8 @@ func Dial(ctx context.Context, host string, port int, cred *credential.Credentia return nil, classifyHandshakeErr(err) } // Handshake succeeded: the last method the observer saw is the one - // that authenticated. Report it so the caller can persist the hint. + // that authenticated (for single-factor auth — see authObserver). Report + // it so the caller can persist the hint. if opts.ObservedAuth != nil { *opts.ObservedAuth = obs.Last() } diff --git a/internal/sshprivilege/privilege.go b/internal/sshprivilege/privilege.go index ce1c89f8..ed828637 100644 --- a/internal/sshprivilege/privilege.go +++ b/internal/sshprivilege/privilege.go @@ -7,9 +7,13 @@ // initial `sudo -n true` returns non-zero AND the credential carries // a non-empty Password AND the policy is on, the probe retries via // `sudo -S -k -p ” true` with the password fed through stdin. The -// retry shape is identical to the scan path's ssh.RunSudo and to -// discovery.probeFirewall — drift between the three is forbidden by -// the spec's C-09. +// retry shape is identical across the THREE inline-retry call sites — +// this probe, the collector's ssh.RunSudo, and discovery.probeFirewall — +// and drift between them is forbidden by the spec's C-09. (The compliance +// scan also supports password sudo, but via a different shape — a +// per-connection sudo-mode probe in internal/kensa, see +// system-connection-profile — so it is intentionally not part of this +// trio; it consults the SAME kill-switch + auth-method gate.) // // This package is kept OUT of internal/liveness because the liveness // package's AC-14 invariant forbids credential + crypto/ssh imports. diff --git a/specs/system/connection-profile.spec.yaml b/specs/system/connection-profile.spec.yaml index a66d5edc..67de607a 100644 --- a/specs/system/connection-profile.spec.yaml +++ b/specs/system/connection-profile.spec.yaml @@ -60,6 +60,10 @@ spec: description: The compliance scan MUST determine the sudo mode once per connection via an innocuous `true` sentinel (not by inferring refusal from a real check's non-zero exit) and reuse it for every command on that connection. type: technical enforcement: error + - id: C-05 + description: The compliance scan MUST gate its sudo -S use of the credential password on the SAME two conditions the collector/liveness/discovery paths enforce — the AllowCredentialSudoPassword kill-switch is on AND the credential auth method is password or both. When either fails, the scan MUST NOT attempt sudo -S (it degrades to sudo -n). This gates only the sudo use of the password; SSH password authentication is independent. + type: security + enforcement: error acceptance_criteria: - id: AC-01 @@ -86,3 +90,7 @@ spec: description: The sudo -S password fallback is ON by default (DefaultSecurity().AllowCredentialSudoPassword is true), retained as a kill-switch rather than an opt-in. priority: high references_constraints: [C-01] + - id: AC-07 + description: The scan resolves a sudo password only when the kill-switch is on AND the credential auth method is password or both; a key-only credential or a kill-switch set false yields no sudo password (sudoPasswordFor returns empty), so the scan never attempts sudo -S in those cases. + priority: critical + references_constraints: [C-05]