diff --git a/cmd/openwatch/main.go b/cmd/openwatch/main.go index 15e676a4..751d7ef3 100644 --- a/cmd/openwatch/main.go +++ b/cmd/openwatch/main.go @@ -388,7 +388,9 @@ func cmdServe(cfg *config.Config, _ []string, stdout, stderr *os.File) int { // SecurityConfig reader so the firewall probe can retry a // sudo -n failure via sudo -S -k with the credential password // — same gating as the collector + the privilege probe. - WithPolicyLoader(cfgStore) + WithPolicyLoader(cfgStore). + // Sudo-mode learning for the firewall probe (system-connection-profile). + WithProfiles(connStore) // OS Intelligence collector — runs one RunCycle per host: SSH // session, snapshot.Collect (packages/services/users/network/etc.), @@ -414,7 +416,9 @@ func cmdServe(cfg *config.Config, _ []string, stdout, stderr *os.File) int { WithSudoPolicyLoader(func(ctx context.Context) (owssh.SudoPolicy, error) { cfg, err := cfgStore.LoadSecurity(ctx) return owssh.SudoPolicy{AllowCredentialPassword: cfg.AllowCredentialSudoPassword}, err - }) + }). + // Sudo-mode learning across the cycle's sudo commands (system-connection-profile). + WithProfiles(connStore) // Intelligence scheduler — cron-like loop that picks "due" hosts // from host_intelligence_state.next_intelligence_at and dispatches diff --git a/internal/intelligence/collector/collector.go b/internal/intelligence/collector/collector.go index fa71781c..eff67ca3 100644 --- a/internal/intelligence/collector/collector.go +++ b/internal/intelligence/collector/collector.go @@ -70,6 +70,16 @@ type Publisher interface { // via systemconfig.Store.LoadSecurity. Tests substitute a constant. type SudoPolicyLoader func(ctx context.Context) (owssh.SudoPolicy, error) +// ConnProfileStore is the subset of connprofile the collector uses to +// learn the host's SUDO mode: lead each cycle's sudo commands with the +// recorded mode and record the mode that actually worked. nil disables +// sudo-mode learning. (SSH auth-method learning is handled separately by +// the profile-aware transport.) Spec system-connection-profile v1.2.0. +type ConnProfileStore interface { + Get(ctx context.Context, hostID uuid.UUID) (connprofile.Profile, error) + RecordSudoMode(ctx context.Context, hostID uuid.UUID, m connprofile.SudoMode) error +} + // Service is the OS Intelligence collector. Construct via NewService. type Service struct { pool *pgxpool.Pool @@ -79,6 +89,7 @@ type Service struct { lookup HostLookup transport SSHTransport sudoPolicy SudoPolicyLoader + profiles ConnProfileStore } // NewService constructs a Service. emit + bus may be nil — RunCycle @@ -114,6 +125,15 @@ func (s *Service) WithSudoPolicyLoader(l SudoPolicyLoader) *Service { return s } +// WithProfiles enables per-host sudo-mode learning: each cycle leads its +// sudo commands with the host's recorded mode and records the mode that +// worked. nil (the default) keeps the historical sudo -n-first probing. +// Spec system-connection-profile v1.2.0 C-07 / AC-10. +func (s *Service) WithProfiles(p ConnProfileStore) *Service { + s.profiles = p + return s +} + // hostFacts is the internal hand-off used by runCycleWithTransport so // tests can build it directly. cred can be nil when the stub transport // ignores credentials. @@ -253,12 +273,33 @@ func (s *Service) runCycleWithTransport(ctx context.Context, hf hostFacts) (Snap } sudoFallbackCount := 0 + // Sudo-mode learning: lead this cycle's sudo commands with the host's + // recorded mode (skips the doomed `sudo -n` on a password-sudo host), + // observe what actually worked, and record it once at cycle end. + // sudoPrefer threads the observation forward so later sudo commands in + // the same cycle also lead correctly. Spec system-connection-profile + // v1.2.0 C-07. + var knownSudo, learnedSudo connprofile.SudoMode + if s.profiles != nil { + if p, perr := s.profiles.Get(ctx, hf.HostID); perr == nil { + knownSudo = p.SudoMode + } + } + sudoPrefer := string(knownSudo) + observeSudo := func(observed string) { + if observed != "" { + learnedSudo = connprofile.SudoMode(observed) + sudoPrefer = observed + } + } + snap := Snapshot{CollectedAt: time.Now().UTC()} if out, code, err := sess.Run(ctx, "cat /etc/passwd"); err == nil && code == 0 { // Spec v1.1.0 C-09: sudo -n first; sudo -S -k with cred.Password // on fallback if policy + credential allow. - shadow, scode, used, serr := owssh.RunSudo(ctx, sess, hf.Cred, policy, "cat /etc/shadow") + shadow, scode, used, observed, serr := owssh.RunSudo(ctx, sess, hf.Cred, policy, sudoPrefer, "cat /etc/shadow") + observeSudo(observed) if used { sudoFallbackCount++ } @@ -385,7 +426,8 @@ func (s *Service) runCycleWithTransport(ctx context.Context, hf hostFacts) (Snap // (sudo denied) silently drop the entry — partial success. if path == "/etc/shadow" { // Spec v1.1.0 C-09 — same gating as the shadow read above. - out, code, used, err := owssh.RunSudo(ctx, sess, hf.Cred, policy, "sha256sum "+path) + out, code, used, observed, err := owssh.RunSudo(ctx, sess, hf.Cred, policy, sudoPrefer, "sha256sum "+path) + observeSudo(observed) if used { sudoFallbackCount++ } @@ -412,6 +454,13 @@ func (s *Service) runCycleWithTransport(ctx context.Context, hf hostFacts) (Snap // covers ufw-inactive Ubuntu hosts (count=0 via the non-sudo // fallback inside the heredoc). + // Record the learned sudo mode once per cycle — only when a form was + // confirmed AND it differs from what was already stored (a no-op + // upsert otherwise). Spec system-connection-profile v1.2.0 C-07. + if s.profiles != nil && learnedSudo != connprofile.SudoUnknown && learnedSudo != knownSudo { + _ = s.profiles.RecordSudoMode(ctx, hf.HostID, learnedSudo) + } + return snap, sudoFallbackCount, nil } diff --git a/internal/intelligence/discovery/discovery.go b/internal/intelligence/discovery/discovery.go index 19e8202a..d3804b7d 100644 --- a/internal/intelligence/discovery/discovery.go +++ b/internal/intelligence/discovery/discovery.go @@ -149,6 +149,17 @@ type PolicyLoader interface { LoadSecurity(ctx context.Context) (systemconfig.SecurityConfig, error) } +// SudoProfileStore is the subset of connprofile the discovery service +// uses to learn the host's SUDO mode for the firewall probe: lead with +// the recorded mode and record the mode a sudo firewall command confirms. +// nil disables sudo-mode learning. (SSH auth-method learning is handled +// separately by the profile-aware transport.) Spec system-connection- +// profile v1.2.0. +type SudoProfileStore interface { + Get(ctx context.Context, hostID uuid.UUID) (connprofile.Profile, error) + RecordSudoMode(ctx context.Context, hostID uuid.UUID, m connprofile.SudoMode) error +} + type Service struct { pool *pgxpool.Pool credSvc *credential.Service @@ -157,6 +168,7 @@ type Service struct { lookup HostLookup transport SSHTransport policy PolicyLoader + profiles SudoProfileStore } // NewService constructs a Service. emit + bus may be nil — Discover @@ -203,6 +215,15 @@ func (s *Service) WithPolicyLoader(p PolicyLoader) *Service { return s } +// WithProfiles enables per-host sudo-mode learning for the firewall probe: +// lead with the host's recorded sudo mode and record the mode a sudo +// firewall command confirms. nil (the default) keeps the historical +// sudo -n-first probing. Spec system-connection-profile v1.2.0 C-07. +func (s *Service) WithProfiles(p SudoProfileStore) *Service { + s.profiles = p + return s +} + // hostFacts is the internal hand-off from Discover (which knows the // hostID and pulls addr + cred) to discoverWithTransport (which only // needs the prepared tuple). Tests build it directly. @@ -340,10 +361,25 @@ func (s *Service) discoverWithTransport(ctx context.Context, hf hostFacts) (Syst cfg.policy = sec } } - if svc, status, ok := probeFirewall(ctx, sess, cfg); ok { + // Sudo-mode learning: lead the firewall probe with the host's recorded + // mode, and record the mode a sudo command confirms. Best-effort — a + // lookup miss leads in the default order. Spec system-connection- + // profile v1.2.0 C-07. + var knownSudo connprofile.SudoMode + if s.profiles != nil { + if p, perr := s.profiles.Get(ctx, hf.HostID); perr == nil { + knownSudo = p.SudoMode + cfg.prefer = knownSudo + } + } + svc, status, learnedSudo, ok := probeFirewall(ctx, sess, cfg) + if ok { facts.FirewallService = svc facts.FirewallStatus = status } + if s.profiles != nil && learnedSudo != connprofile.SudoUnknown && learnedSudo != knownSudo { + _ = s.profiles.RecordSudoMode(ctx, hf.HostID, learnedSudo) + } return facts, nil } diff --git a/internal/intelligence/discovery/firewall_fallback_test.go b/internal/intelligence/discovery/firewall_fallback_test.go index 878a72fd..a7b7847b 100644 --- a/internal/intelligence/discovery/firewall_fallback_test.go +++ b/internal/intelligence/discovery/firewall_fallback_test.go @@ -15,6 +15,7 @@ package discovery import ( "testing" + "github.com/Hanalyx/openwatch/internal/connprofile" "github.com/Hanalyx/openwatch/internal/credential" "github.com/Hanalyx/openwatch/internal/systemconfig" ) @@ -56,7 +57,7 @@ func TestProbeFirewall_PasswordFallback_UFWSuccess(t *testing.T) { policy: systemconfig.SecurityConfig{AllowCredentialSudoPassword: true}, cred: validHostCred(), } - svc, status, ok := probeFirewall(testCtx(t), sess, cfg) + svc, status, learned, ok := probeFirewall(testCtx(t), sess, cfg) if !ok { t.Fatalf("ok: want true (fallback succeeded), got false") } @@ -66,6 +67,10 @@ func TestProbeFirewall_PasswordFallback_UFWSuccess(t *testing.T) { if status != "active" { t.Errorf("status: want active, got %q", status) } + // The sudo -S fallback confirmed password sudo — learned mode. + if learned != connprofile.SudoPassword { + t.Errorf("learned sudo mode: want %q, got %q", connprofile.SudoPassword, learned) + } // The fallback call MUST have been issued through RunWithStdin // with the credential password on stdin. if got := len(stub.stdinCalls); got == 0 { @@ -106,10 +111,14 @@ func TestProbeFirewall_PasswordFallback_PolicyOff(t *testing.T) { policy: systemconfig.SecurityConfig{AllowCredentialSudoPassword: false}, cred: validHostCred(), } - _, _, ok := probeFirewall(testCtx(t), sess, cfg) + _, _, learned, ok := probeFirewall(testCtx(t), sess, cfg) if ok { t.Errorf("ok: want false (policy off, no sudo path succeeded), got true") } + // Nothing confirmed sudo (policy off, every form denied). + if learned != connprofile.SudoUnknown { + t.Errorf("learned sudo mode: want unknown, got %q", learned) + } if got := len(stub.stdinCalls); got != 0 { t.Errorf("RunWithStdin called %d times with policy off; want 0", got) } @@ -133,13 +142,75 @@ func TestProbeFirewall_NoFallbackOnSudoNSuccess(t *testing.T) { policy: systemconfig.SecurityConfig{AllowCredentialSudoPassword: true}, cred: validHostCred(), } - svc, status, ok := probeFirewall(testCtx(t), sess, cfg) + svc, status, learned, ok := probeFirewall(testCtx(t), sess, cfg) if !ok || svc != "firewalld" || status != "active" { t.Errorf("first-firewall hit: svc=%q status=%q ok=%v", svc, status, ok) } + // Sudoless firewalld hit first — no sudo command ran, so the probe + // confirms no sudo mode (learning stays with whatever liveness knows). + if learned != connprofile.SudoUnknown { + t.Errorf("learned sudo mode: want unknown (sudoless path), got %q", learned) + } // Zero RunWithStdin calls — fallback never engaged. if got := len(stub.stdinCalls); got != 0 { t.Errorf("RunWithStdin called %d times though sudo -n was not even attempted", got) } }) } + +// @spec system-connection-profile +// @ac AC-11 +// AC-11 (discovery sudo): the firewall probe opportunistically reports the +// sudo mode a real sudo command confirms — NOPASSWD here (sudo -n ufw +// status exits 0) — and leads with sudo -S when the host is recorded as +// needing a password. +func TestProbeFirewall_SudoModeLearning(t *testing.T) { + t.Run("system-connection-profile/AC-11", func(t *testing.T) { + // NOPASSWD host: firewalld absent, sudo -n ufw status succeeds. + stub := newStubSSHTransport() + stub.SeedAll() + stub.outputs["sudo -n ufw status"] = stubResult{out: []byte("Status: active\n"), exitCode: 0} + + sess, _ := stub.Dial(testCtx(t), "host", 22, validHostCred()) + cfg := sudoFallbackConfig{ + policy: systemconfig.SecurityConfig{AllowCredentialSudoPassword: true}, + cred: validHostCred(), + } + svc, _, learned, ok := probeFirewall(testCtx(t), sess, cfg) + if !ok || svc != "ufw" { + t.Fatalf("probe: ok=%v svc=%q, want true/ufw", ok, svc) + } + if learned != connprofile.SudoNopasswd { + t.Errorf("learned: want %q, got %q", connprofile.SudoNopasswd, learned) + } + // NOPASSWD confirmed via sudo -n: no password fed to stdin. + if got := len(stub.stdinCalls); got != 0 { + t.Errorf("RunWithStdin called %d times on a NOPASSWD host; want 0", got) + } + }) + + t.Run("known password host leads with sudo -S", func(t *testing.T) { + stub := newStubSSHTransport() + stub.SeedAll() + // Only sudo -S ufw status is seeded; sudo -n is left unseeded (127). + stub.outputs["sudo -S -k -p '' ufw status"] = stubResult{out: []byte("Status: active\n"), exitCode: 0} + + sess, _ := stub.Dial(testCtx(t), "host", 22, validHostCred()) + cfg := sudoFallbackConfig{ + policy: systemconfig.SecurityConfig{AllowCredentialSudoPassword: true}, + cred: validHostCred(), + prefer: connprofile.SudoPassword, + } + svc, _, learned, ok := probeFirewall(testCtx(t), sess, cfg) + if !ok || svc != "ufw" { + t.Fatalf("probe: ok=%v svc=%q, want true/ufw", ok, svc) + } + if learned != connprofile.SudoPassword { + t.Errorf("learned: want %q, got %q", connprofile.SudoPassword, learned) + } + // Led with sudo -S: the password was fed on the first ufw attempt. + if len(stub.stdinCalls) == 0 || stub.stdinCalls[0].cmd != "sudo -S -k -p '' ufw status" { + t.Errorf("did not lead with sudo -S: stdinCalls=%+v", stub.stdinCalls) + } + }) +} diff --git a/internal/intelligence/discovery/helpers.go b/internal/intelligence/discovery/helpers.go index 99f23881..0a779376 100644 --- a/internal/intelligence/discovery/helpers.go +++ b/internal/intelligence/discovery/helpers.go @@ -5,6 +5,7 @@ import ( "strconv" "strings" + "github.com/Hanalyx/openwatch/internal/connprofile" "github.com/Hanalyx/openwatch/internal/credential" "github.com/Hanalyx/openwatch/internal/systemconfig" ) @@ -99,6 +100,11 @@ func parseGB(s string) int { type sudoFallbackConfig struct { policy systemconfig.SecurityConfig cred *credential.Credential + // prefer is the host's learned sudo mode. When connprofile.SudoPassword + // AND the fallback is permitted, runSudoWithFallback leads with + // `sudo -S` and skips the doomed `sudo -n` round-trip. Spec + // system-connection-profile v1.2.0 C-07. + prefer connprofile.SudoMode } // canFallback returns true iff (a) the policy allows the credential @@ -124,18 +130,44 @@ func (c sudoFallbackConfig) canFallback() bool { // reattempts on a failed retry (-k invalidates the host's sudo // timestamp cache so a wrong password trips pam once, not three // times). Spec C-11 / AC-17. -func runSudoWithFallback(ctx context.Context, sess SSHSession, sudoCmd string, cfg sudoFallbackConfig) ([]byte, int, error) { - out, code, err := sess.Run(ctx, "sudo -n "+sudoCmd) +// The returned observed is the sudo mode confirmed to work this call +// (connprofile.SudoNopasswd / SudoPassword on an exit-0 of a given form, +// SudoUnknown otherwise — never a misobservation from a command that +// failed for its own reasons). +func runSudoWithFallback(ctx context.Context, sess SSHSession, sudoCmd string, cfg sudoFallbackConfig) (out []byte, code int, observed connprofile.SudoMode, err error) { + // Lead with `sudo -S` when the host is known to need a password and the + // fallback is permitted — skips the doomed `sudo -n`. Both forms are + // still attempted on a miss (a hint, not a lock), so a stale mode + // self-heals. + if cfg.prefer == connprofile.SudoPassword && cfg.canFallback() { + pw := append([]byte(cfg.cred.Password), '\n') + o, c, e := sess.RunWithStdin(ctx, "sudo -S -k -p '' "+sudoCmd, pw) + if e == nil && c == 0 { + return o, c, connprofile.SudoPassword, nil + } + // sudo -S did not confirm; the host may have gained NOPASSWD. + o2, c2, e2 := sess.Run(ctx, "sudo -n "+sudoCmd) + if e2 == nil && c2 == 0 { + return o2, c2, connprofile.SudoNopasswd, nil + } + return o, c, connprofile.SudoUnknown, e + } + + out, code, err = sess.Run(ctx, "sudo -n "+sudoCmd) if err == nil && code == 0 { - return out, code, nil + return out, code, connprofile.SudoNopasswd, nil } if !cfg.canFallback() { - return out, code, err + return out, code, connprofile.SudoUnknown, err } // Pipe the password (with a trailing newline so sudo flushes // the line) into the remote process's stdin. pw := append([]byte(cfg.cred.Password), '\n') - return sess.RunWithStdin(ctx, "sudo -S -k -p '' "+sudoCmd, pw) + fOut, fCode, fErr := sess.RunWithStdin(ctx, "sudo -S -k -p '' "+sudoCmd, pw) + if fErr == nil && fCode == 0 { + return fOut, fCode, connprofile.SudoPassword, nil + } + return fOut, fCode, connprofile.SudoUnknown, fErr } // probeFirewall tries each known firewall service in order. The first @@ -148,28 +180,47 @@ func runSudoWithFallback(ctx context.Context, sess SSHSession, sudoCmd string, c // the policy permits retries through `sudo -S -k -p ” ` before // the probe falls through to the next firewall. Spec // system-ssh-connectivity v1.2.0 C-09 / AC-20. -func probeFirewall(ctx context.Context, sess SSHSession, cfg sudoFallbackConfig) (service, status string, ok bool) { +// +// The returned learned is the sudo mode opportunistically confirmed by a +// sudo firewall command (SudoUnknown when none answered via sudo — e.g. +// a sudoless firewalld host, or one with no firewall tool). Spec +// system-connection-profile v1.2.0 C-07 / AC-11. +func probeFirewall(ctx context.Context, sess SSHSession, cfg sudoFallbackConfig) (service, status string, learned connprofile.SudoMode, ok bool) { + // note records the strongest confirmation a sudo attempt produced. + note := func(observed connprofile.SudoMode) { + if observed != connprofile.SudoUnknown { + learned = observed + } + } // firewalld via systemctl is sudoless on many distros. if out, code, err := sess.Run(ctx, "systemctl is-active firewalld"); err == nil && code == 0 { - return "firewalld", strings.TrimSpace(string(out)), true + return "firewalld", strings.TrimSpace(string(out)), learned, true } // ufw — Debian / Ubuntu. Needs sudo on most distros for `status`. - if out, code, err := runSudoWithFallback(ctx, sess, "ufw status", cfg); err == nil && code == 0 { - return "ufw", firstWord(string(out)), true + out, code, observed, err := runSudoWithFallback(ctx, sess, "ufw status", cfg) + note(observed) + if err == nil && code == 0 { + return "ufw", firstWord(string(out)), learned, true } // nftables. - if _, code, err := runSudoWithFallback(ctx, sess, "nft list ruleset", cfg); err == nil && code == 0 { - return "nftables", "active", true + _, code, observed, err = runSudoWithFallback(ctx, sess, "nft list ruleset", cfg) + note(observed) + if err == nil && code == 0 { + return "nftables", "active", learned, true } // iptables fallback. - if _, code, err := runSudoWithFallback(ctx, sess, "iptables -L", cfg); err == nil && code == 0 { - return "iptables", "active", true + _, code, observed, err = runSudoWithFallback(ctx, sess, "iptables -L", cfg) + note(observed) + if err == nil && code == 0 { + return "iptables", "active", learned, true } // firewall-cmd as last resort (RHEL). - if out, code, err := runSudoWithFallback(ctx, sess, "firewall-cmd --state", cfg); err == nil && code == 0 { - return "firewalld", strings.TrimSpace(string(out)), true + out, code, observed, err = runSudoWithFallback(ctx, sess, "firewall-cmd --state", cfg) + note(observed) + if err == nil && code == 0 { + return "firewalld", strings.TrimSpace(string(out)), learned, true } - return "", "", false + return "", "", learned, false } func firstWord(s string) string { diff --git a/internal/ssh/sudo.go b/internal/ssh/sudo.go index ad5b4473..993be589 100644 --- a/internal/ssh/sudo.go +++ b/internal/ssh/sudo.go @@ -37,6 +37,17 @@ type SudoPolicy struct { AllowCredentialPassword bool } +// Sudo-mode tokens for RunSudo's prefer (in) and observed (out). Plain +// strings — not connprofile's typed enum — so this dial-layer file stays +// decoupled from connprofile, exactly as PreferKey/PreferPassword do for +// the auth method. The values match connprofile.SudoNopasswd / +// connprofile.SudoPassword, so a string()/SudoMode() cast round-trips at +// the call site. An empty prefer/observed means "unknown — no preference". +const ( + SudoNopasswd = "nopasswd" + SudoPassword = "password" +) + // RunSudo executes `cmd` as root via sudo. The pipeline is: // // 1. Always try `sudo -n ` first. NOPASSWD hosts return exit 0 @@ -52,43 +63,76 @@ type SudoPolicy struct { // credential cache before each attempt so a wrong password fails // fast (no PAM retry counter increment, no host-side lockout). // +// prefer (a Sudo* token, or "" for unknown) is the host's learned sudo +// mode: when it is SudoPassword AND the credential can supply a password, +// RunSudo leads with `sudo -S` and skips the doomed `sudo -n` round-trip. +// Both forms are still attempted on a miss (a hint, not a lock), so a +// stale preference self-heals. +// // Returns the final stdout, the final exit code, a bool indicating // whether the password fallback was used (callers aggregate this for -// the per-cycle audit emission), and any transport error. +// the per-cycle audit emission), the sudo mode OBSERVED to work this +// call (a Sudo* token, or "" when neither form was confirmed), and any +// transport error. observed is set ONLY on a confirmed exit-0 of a given +// form — a real command that exits non-zero for its own reasons never +// produces a (mis)observation, so callers can safely record it. // // Source-inspection-friendly: the password is taken from cred.Password // and passed as the `stdin` argument of RunWithStdin. It does NOT // appear in the `cmd` string anywhere — see ssh_test.go AC-15. // // Spec: system-ssh-connectivity v1.1.0 C-09 / C-10 / C-11 / C-12, -// AC-11..AC-17. +// AC-11..AC-17; system-connection-profile v1.2.0 C-07 (sudo-mode learning). func RunSudo( ctx context.Context, sess SudoSession, cred *credential.Credential, policy SudoPolicy, + prefer string, cmd string, -) (stdout []byte, exitCode int, usedFallback bool, err error) { - // Phase 1: sudo -n. The exact wire-shape predates v1.1.0 — every - // existing collector / discovery call site sent this same prefix. +) (stdout []byte, exitCode int, usedFallback bool, observed string, err error) { + canPassword := policy.AllowCredentialPassword && + cred != nil && cred.Password != "" && + (cred.AuthMethod == credential.AuthPassword || cred.AuthMethod == credential.AuthBoth) + + // Lead with `sudo -S` when the host is known to need a password and we + // can supply one — the common steady-state case for a password-sudo + // host, where `sudo -n` would just waste a round-trip. On a miss we + // still fall back to `sudo -n` (the host may have gained NOPASSWD). + if prefer == SudoPassword && canPassword { + pwIn := append([]byte(cred.Password), '\n') + fOut, fCode, fErr := sess.RunWithStdin(ctx, "sudo -S -k -p '' "+cmd, pwIn) + if fErr != nil { + return fOut, fCode, true, "", fErr + } + if fCode == 0 { + return fOut, fCode, true, SudoPassword, nil + } + // sudo -S did not succeed; try NOPASSWD in case the host changed. + nOut, nCode, nErr := sess.Run(ctx, "sudo -n "+cmd) + if nErr == nil && nCode == 0 { + return nOut, nCode, true, SudoNopasswd, nil + } + // Neither confirmed; surface the password attempt (more + // informative) with no observation to record. + return fOut, fCode, true, "", nil + } + + // Default order — Phase 1: sudo -n. The exact wire-shape predates + // v1.1.0 — every existing collector / discovery call site sent this + // same prefix. out, code, err := sess.Run(ctx, "sudo -n "+cmd) if err != nil { - return out, code, false, err + return out, code, false, "", err } if code == 0 { // NOPASSWD path. C-12: password fallback MUST NOT execute. - return out, code, false, nil + return out, code, false, SudoNopasswd, nil } // Phase 2 gating. Any miss → return the sudo -n failure verbatim. - if !policy.AllowCredentialPassword { - return out, code, false, nil - } - if cred == nil || cred.Password == "" { - return out, code, false, nil - } - if cred.AuthMethod != credential.AuthPassword && cred.AuthMethod != credential.AuthBoth { - return out, code, false, nil + if !canPassword { + return out, code, false, "", nil } // Phase 3: `sudo -S -k -p '' `. The password is appended with @@ -99,5 +143,9 @@ func RunSudo( // retries that would lock the host user. pwIn := append([]byte(cred.Password), '\n') fOut, fCode, fErr := sess.RunWithStdin(ctx, "sudo -S -k -p '' "+cmd, pwIn) - return fOut, fCode, true, fErr + obs := "" + if fErr == nil && fCode == 0 { + obs = SudoPassword + } + return fOut, fCode, true, obs, fErr } diff --git a/internal/ssh/sudo_test.go b/internal/ssh/sudo_test.go index 031086bc..9979bbfd 100644 --- a/internal/ssh/sudo_test.go +++ b/internal/ssh/sudo_test.go @@ -72,7 +72,7 @@ func TestRunSudo_NopasswdShortCircuits(t *testing.T) { } policy := SudoPolicy{AllowCredentialPassword: true} - out, code, used, err := RunSudo(context.Background(), sess, cred, policy, "cat /etc/shadow") + out, code, used, _, err := RunSudo(context.Background(), sess, cred, policy, "", "cat /etc/shadow") if err != nil { t.Fatalf("unexpected err: %v", err) } @@ -104,7 +104,7 @@ func TestRunSudo_FallbackEngagesWhenAllowed(t *testing.T) { } policy := SudoPolicy{AllowCredentialPassword: true} - out, code, used, err := RunSudo(context.Background(), sess, cred, policy, "cat /etc/shadow") + out, code, used, _, err := RunSudo(context.Background(), sess, cred, policy, "", "cat /etc/shadow") if err != nil { t.Fatalf("unexpected err: %v", err) } @@ -142,7 +142,7 @@ func TestRunSudo_NoFallbackWhenPolicyDisabled(t *testing.T) { } policy := SudoPolicy{AllowCredentialPassword: false} - _, code, used, err := RunSudo(context.Background(), sess, cred, policy, "cat /etc/shadow") + _, code, used, _, err := RunSudo(context.Background(), sess, cred, policy, "", "cat /etc/shadow") if err != nil { t.Fatalf("unexpected err: %v", err) } @@ -170,7 +170,7 @@ func TestRunSudo_NoFallbackWhenSshKeyOnly(t *testing.T) { } policy := SudoPolicy{AllowCredentialPassword: true} - _, _, used, err := RunSudo(context.Background(), sess, cred, policy, "cat /etc/shadow") + _, _, used, _, err := RunSudo(context.Background(), sess, cred, policy, "", "cat /etc/shadow") if err != nil { t.Fatalf("unexpected err: %v", err) } @@ -197,7 +197,7 @@ func TestRunSudo_WrongPasswordNoRetry(t *testing.T) { } policy := SudoPolicy{AllowCredentialPassword: true} - _, code, used, err := RunSudo(context.Background(), sess, cred, policy, "cat /etc/shadow") + _, code, used, _, err := RunSudo(context.Background(), sess, cred, policy, "", "cat /etc/shadow") if err != nil { t.Fatalf("unexpected err: %v", err) } @@ -253,7 +253,7 @@ func TestRunSudo_TransportErrorBubblesUp(t *testing.T) { sess := &stubSession{transportErr: errors.New("session closed")} policy := SudoPolicy{AllowCredentialPassword: true} cred := &credential.Credential{AuthMethod: credential.AuthBoth, Password: "x"} - _, _, used, err := RunSudo(context.Background(), sess, cred, policy, "ls") + _, _, used, _, err := RunSudo(context.Background(), sess, cred, policy, "", "ls") if err == nil || !strings.Contains(err.Error(), "session closed") { t.Errorf("transport error not propagated: err=%v", err) } @@ -261,3 +261,73 @@ func TestRunSudo_TransportErrorBubblesUp(t *testing.T) { t.Error("transport error triggered fallback (should bypass)") } } + +// @spec system-connection-profile +// @ac AC-10 +// AC-10 (collector/discovery half, sudo): RunSudo reports the sudo mode +// observed to work (SudoNopasswd / SudoPassword on a confirmed exit-0, +// "" otherwise), and when told prefer=SudoPassword it leads with sudo -S +// and skips the doomed sudo -n round-trip. +func TestRunSudo_SudoModeLearning(t *testing.T) { + cred := &credential.Credential{AuthMethod: credential.AuthBoth, Password: "hunter2"} // pragma: allowlist secret + policy := SudoPolicy{AllowCredentialPassword: true} + + t.Run("system-connection-profile/AC-10", func(t *testing.T) { + // Known password host: lead with sudo -S, no sudo -n attempt. + sess := &stubSession{fallbackOK: true} + _, code, used, observed, err := RunSudo(context.Background(), sess, cred, policy, SudoPassword, "cat /etc/shadow") + if err != nil || code != 0 { + t.Fatalf("want success, got code=%d err=%v", code, err) + } + if !used { + t.Error("usedFallback=false; leading with sudo -S should count as fallback") + } + if observed != SudoPassword { + t.Errorf("observed=%q, want %q", observed, SudoPassword) + } + if len(sess.runCalls) != 0 { + t.Errorf("lead-with-password issued %d sudo -n call(s), want 0", len(sess.runCalls)) + } + if len(sess.stdinCalls) != 1 { + t.Errorf("want exactly 1 sudo -S call, got %d", len(sess.stdinCalls)) + } + }) + + t.Run("nopasswd observed on default order", func(t *testing.T) { + sess := &stubSession{nopasswdSucceeds: true} + _, _, used, observed, _ := RunSudo(context.Background(), sess, cred, policy, "", "cat /etc/shadow") + if used { + t.Error("usedFallback=true on a NOPASSWD host") + } + if observed != SudoNopasswd { + t.Errorf("observed=%q, want %q", observed, SudoNopasswd) + } + }) + + t.Run("password observed via default-order fallback", func(t *testing.T) { + sess := &stubSession{nopasswdSucceeds: false, fallbackOK: true} + _, _, _, observed, _ := RunSudo(context.Background(), sess, cred, policy, "", "cat /etc/shadow") + if observed != SudoPassword { + t.Errorf("observed=%q, want %q", observed, SudoPassword) + } + }) + + t.Run("no observation when neither form confirms", func(t *testing.T) { + // sudo -n denied, password rejected: nothing definitively worked. + sess := &stubSession{nopasswdSucceeds: false, fallbackOK: false} + _, _, _, observed, _ := RunSudo(context.Background(), sess, cred, policy, "", "cat /etc/shadow") + if observed != "" { + t.Errorf("observed=%q, want empty (ambiguous)", observed) + } + }) + + t.Run("stale password hint self-heals to nopasswd", func(t *testing.T) { + // prefer=password but the host now grants NOPASSWD and rejects the + // piped password: lead sudo -S (fails), fall back to sudo -n (ok). + sess := &stubSession{nopasswdSucceeds: true, fallbackOK: false} + _, code, _, observed, _ := RunSudo(context.Background(), sess, cred, policy, SudoPassword, "cat /etc/shadow") + if code != 0 || observed != SudoNopasswd { + t.Errorf("self-heal: code=%d observed=%q, want code=0 observed=%q", code, observed, SudoNopasswd) + } + }) +} diff --git a/internal/sshprivilege/privilege.go b/internal/sshprivilege/privilege.go index 191af50a..51160b2a 100644 --- a/internal/sshprivilege/privilege.go +++ b/internal/sshprivilege/privilege.go @@ -84,11 +84,16 @@ type Dialer interface { } // ConnProfileStore is the subset of connprofile the probe uses to lead the -// dial with the host's known-good SSH auth method and record what -// authenticated. nil (the default) disables learning. +// dial with the host's known-good SSH auth method AND its sudo mode, and +// to record what actually worked. nil (the default) disables learning. +// +// The liveness probe is the authoritative sudo-mode learner: it runs an +// innocuous `true` sentinel every cycle (~5 min), so unlike the +// opportunistic discovery/collector paths it reliably confirms the mode. type ConnProfileStore interface { Get(ctx context.Context, hostID uuid.UUID) (connprofile.Profile, error) RecordSSHAuth(ctx context.Context, hostID uuid.UUID, m connprofile.SSHAuthMethod) error + RecordSudoMode(ctx context.Context, hostID uuid.UUID, m connprofile.SudoMode) error } // probeConfig accumulates the optional dependencies a Probe needs. @@ -162,14 +167,16 @@ func Probe(resolver Resolver, opts ...ProbeOption) liveness.PrivilegeProbeFunc { return true, false, fmt.Errorf("resolve credential: %w", rerr) } - // Learning: lead the dial with the host's recorded auth method - // (if a profile store is wired and a row exists), then record the - // method that actually authenticated. Both are best-effort: a - // lookup miss just dials in the default order. + // Learning: lead the dial with the host's recorded auth method AND + // sudo mode (if a profile store is wired and a row exists), then + // record what actually worked. Both are best-effort: a lookup miss + // just dials/escalates in the default order. var prefer connprofile.SSHAuthMethod + var knownSudo connprofile.SudoMode if cfg.profiles != nil { if p, gerr := cfg.profiles.Get(ctx, id); gerr == nil { prefer = p.SSHAuthMethod + knownSudo = p.SudoMode } } @@ -183,27 +190,80 @@ func Probe(resolver Resolver, opts ...ProbeOption) liveness.PrivilegeProbeFunc { _ = cfg.profiles.RecordSSHAuth(ctx, id, observed) } - // Layer 1: sudo -n true. The 80% case where NOPASSWD is set. - out, code, runErr := exec.Run(ctx, "sudo -n true") - if runErr == nil && code == 0 { - return true, true, nil - } - - // Layer 2 (v1.2.0): sudo -S -k -p '' true. Only when the - // policy + credential permit. Per spec C-09 / AC-19, the - // auth method must allow password material AND the password - // field must be populated. - if !canFallback(ctx, cfg.policy, cred) { - return true, false, fmt.Errorf("sudo -n true: exit %d: %s", code, strings.TrimSpace(string(out))) + ok, sudoMode, sudoErr := probeSudo(ctx, exec, cred, cfg.policy, knownSudo) + if cfg.profiles != nil && sudoMode != connprofile.SudoUnknown && sudoMode != knownSudo { + _ = cfg.profiles.RecordSudoMode(ctx, id, sudoMode) } + return true, ok, sudoErr + } +} +// probeSudo determines whether sudo works and in which mode, by running +// the innocuous `true` sentinel under each form. It leads with `sudo -S` +// when the host is known to need a password (knownSudo == SudoPassword) +// and the policy + credential permit one — skipping the doomed `sudo -n`. +// Otherwise it keeps the historical `sudo -n` first order. Both forms are +// still attempted on a miss (a hint, not a lock), so a stale mode self- +// heals on the next probe. +// +// Returns ok (sudo usable), the mode CONFIRMED to work (SudoUnknown when +// neither did), and on failure the same diagnostic error the pre-learning +// probe returned (preserving spec AC-18/AC-19/AC-21 behaviour). Spec +// system-connection-profile v1.2.0 C-07. +func probeSudo( + ctx context.Context, + exec SessionExecutor, + cred *credential.Credential, + policy PolicyLoader, + knownSudo connprofile.SudoMode, +) (ok bool, mode connprofile.SudoMode, err error) { + canPassword := canFallback(ctx, policy, cred) + + runN := func() (bool, []byte, int) { + out, code, runErr := exec.Run(ctx, "sudo -n true") + return runErr == nil && code == 0, out, code + } + runS := func() (bool, []byte, int) { stdin := bytes.NewReader([]byte(cred.Password + "\n")) - out2, code2, runErr2 := exec.RunWithStdin(ctx, "sudo -S -k -p '' true", stdin) - if runErr2 == nil && code2 == 0 { - return true, true, nil + out, code, runErr := exec.RunWithStdin(ctx, "sudo -S -k -p '' true", stdin) + return runErr == nil && code == 0, out, code + } + + // Lead with sudo -S on a known password-sudo host. + if knownSudo == connprofile.SudoPassword && canPassword { + if good, _, _ := runS(); good { + return true, connprofile.SudoPassword, nil + } + // sudo -S did not confirm; the host may have gained NOPASSWD. + good, out, code := runN() + if good { + return true, connprofile.SudoNopasswd, nil } - return true, false, fmt.Errorf("sudo -S -k -p '' true: exit %d: %s", code2, strings.TrimSpace(string(out2))) + return false, connprofile.SudoUnknown, + fmt.Errorf("sudo -n true: exit %d: %s", code, strings.TrimSpace(string(out))) + } + + // Default order — Layer 1: sudo -n true. The 80% case where NOPASSWD + // is set. + good, out, code := runN() + if good { + return true, connprofile.SudoNopasswd, nil + } + + // Layer 2 (v1.2.0): sudo -S -k -p '' true. Only when the policy + + // credential permit. Per spec C-09 / AC-19, the auth method must allow + // password material AND the password field must be populated. + if !canPassword { + return false, connprofile.SudoUnknown, + fmt.Errorf("sudo -n true: exit %d: %s", code, strings.TrimSpace(string(out))) + } + + good2, out2, code2 := runS() + if good2 { + return true, connprofile.SudoPassword, nil } + return false, connprofile.SudoUnknown, + fmt.Errorf("sudo -S -k -p '' true: exit %d: %s", code2, strings.TrimSpace(string(out2))) } // canFallback returns true iff the policy is on AND the credential is diff --git a/internal/sshprivilege/privilege_test.go b/internal/sshprivilege/privilege_test.go index 487667a9..4197dfe6 100644 --- a/internal/sshprivilege/privilege_test.go +++ b/internal/sshprivilege/privilege_test.go @@ -408,17 +408,19 @@ func TestPrivilegeProbe_NoFallbackOnSudoNSuccess(t *testing.T) { // stubProfiles is an in-memory connprofile store for the learning tests. type stubProfiles struct { - mu sync.Mutex - prefer connprofile.SSHAuthMethod - recorded connprofile.SSHAuthMethod - getErr error + mu sync.Mutex + prefer connprofile.SSHAuthMethod + preferSudo connprofile.SudoMode + recorded connprofile.SSHAuthMethod + recordedSudo connprofile.SudoMode + getErr error } func (s *stubProfiles) Get(_ context.Context, _ uuid.UUID) (connprofile.Profile, error) { if s.getErr != nil { return connprofile.Profile{}, s.getErr } - return connprofile.Profile{SSHAuthMethod: s.prefer}, nil + return connprofile.Profile{SSHAuthMethod: s.prefer, SudoMode: s.preferSudo}, nil } func (s *stubProfiles) RecordSSHAuth(_ context.Context, _ uuid.UUID, m connprofile.SSHAuthMethod) error { @@ -428,6 +430,13 @@ func (s *stubProfiles) RecordSSHAuth(_ context.Context, _ uuid.UUID, m connprofi return nil } +func (s *stubProfiles) RecordSudoMode(_ context.Context, _ uuid.UUID, m connprofile.SudoMode) error { + s.mu.Lock() + s.recordedSudo = m + s.mu.Unlock() + return nil +} + // @spec system-connection-profile // @ac AC-09 // AC-09 (liveness half): when a profile store is wired, the probe leads @@ -471,3 +480,57 @@ func TestPrivilegeProbe_AuthLearning(t *testing.T) { } }) } + +// @spec system-connection-profile +// @ac AC-12 +// AC-12 (liveness sudo): the probe records the sudo mode it confirms via +// the `true` sentinel, and on a host known to need a password it leads +// with sudo -S, skipping the doomed sudo -n. +func TestPrivilegeProbe_SudoModeLearning(t *testing.T) { + t.Run("system-connection-profile/AC-12", func(t *testing.T) { + hostID := liveness.HostID(uuid.Must(uuid.NewV7()).String()) + // NOPASSWD host: sudo -n true succeeds → record nopasswd. + exec := &stubExec{outcomes: map[string]execResult{"sudo -n true": {code: 0}}} + profiles := &stubProfiles{} + + probe := Probe( + stubResolver{cred: validCred()}, + WithPolicyLoader(stubPolicy{cfg: systemconfig.SecurityConfig{AllowCredentialSudoPassword: true}}), + WithDialer(&stubDialer{exec: exec}), + WithProfiles(profiles), + ) + if _, ok, err := probe(context.Background(), hostID, "192.0.2.1:22", 2*time.Second); !ok { + t.Fatalf("ok: want true, got false; err=%v", err) + } + if profiles.recordedSudo != connprofile.SudoNopasswd { + t.Errorf("record: want sudo mode=nopasswd, got %q", profiles.recordedSudo) + } + }) + + t.Run("known password host leads with sudo -S", func(t *testing.T) { + hostID := liveness.HostID(uuid.Must(uuid.NewV7()).String()) + // Only sudo -S succeeds; sudo -n is NOT seeded (would be exit 1). + exec := &stubExec{outcomes: map[string]execResult{"sudo -S -k -p '' true": {code: 0}}} + profiles := &stubProfiles{preferSudo: connprofile.SudoPassword} + + probe := Probe( + stubResolver{cred: validCred()}, + WithPolicyLoader(stubPolicy{cfg: systemconfig.SecurityConfig{AllowCredentialSudoPassword: true}}), + WithDialer(&stubDialer{exec: exec}), + WithProfiles(profiles), + ) + if _, ok, err := probe(context.Background(), hostID, "192.0.2.1:22", 2*time.Second); !ok { + t.Fatalf("ok: want true, got false; err=%v", err) + } + // Led with sudo -S: no sudo -n call recorded, and mode stays + // password (already known, no re-record needed). + for _, c := range exec.calls { + if c.cmd == "sudo -n true" { + t.Errorf("led with sudo -n on a known password host: %+v", exec.calls) + } + } + if profiles.recordedSudo != "" { + t.Errorf("re-record: want none (mode unchanged), got %q", profiles.recordedSudo) + } + }) +} diff --git a/specs/system/connection-profile.spec.yaml b/specs/system/connection-profile.spec.yaml index 22efeddf..5dec885a 100644 --- a/specs/system/connection-profile.spec.yaml +++ b/specs/system/connection-profile.spec.yaml @@ -1,7 +1,7 @@ spec: id: system-connection-profile title: Per-host SSH connection learning (auth method + sudo mode) - version: "1.1.0" + version: "1.2.0" status: approved tier: 2 @@ -41,12 +41,18 @@ spec: (collector), and liveness privilege-probe paths (v1.1.0) — each leads the dial with the host's recorded method and records what authenticated via the shared connprofile store + - Sudo-mode learning wired into those same three paths (v1.2.0) — + each leads sudo with the host's recorded mode and records the + mode confirmed to work. The liveness probe is the authoritative + learner (innocuous `true` sentinel every cycle); discovery and + collector learn opportunistically from their real sudo commands + (only a confirmed exit-0 records a mode, never a misobservation) excludes: - Settings UI/API to toggle the kill-switch (DB-only for now) - - sudo-mode (NOPASSWD vs password) learning for the discovery, - intelligence, and liveness paths — those still probe sudo mode each - cycle; only the SSH auth-method dimension is wired for them so far - (the compliance scan learns both) + - A dedicated `true`-sentinel sudo probe in discovery/collector — + they reuse their existing real sudo commands rather than adding a + round-trip, deferring to the liveness probe for authoritative + confirmation (system-connection-profile C-04 binds only the scan) constraints: - id: C-01 @@ -73,6 +79,10 @@ spec: description: The OS discovery, OS intelligence (collector), and liveness privilege-probe paths MUST lead the SSH dial with the host's recorded auth method and record the method that authenticated, using the shared connprofile store. This learning MUST be best-effort — a missing host id, an absent profile row, or a store error MUST dial in the default order and MUST NOT fail the connection. type: technical enforcement: error + - id: C-07 + description: The OS discovery, OS intelligence (collector), and liveness privilege-probe paths MUST lead sudo with the host's recorded sudo mode (leading with `sudo -S` only when the mode is password AND the credential password may be fed per C-05's gate) and MUST record the mode confirmed to work. To avoid misrecording, a mode MUST be recorded ONLY on a confirmed exit-0 of a given sudo form — never inferred from a command that exited non-zero for its own reasons. Learning MUST be best-effort and MUST NOT change the sudo password gate (C-05) or fail the connection on a store miss/error. + type: technical + enforcement: error acceptance_criteria: - id: AC-01 @@ -111,3 +121,15 @@ spec: description: The liveness privilege probe, when WithProfiles is set, leads the dial with the host's recorded SSH auth method and records the method that authenticated; with no profile store wired it passes no preference and records nothing. A profile lookup error is non-fatal (dials in the default order). priority: high references_constraints: [C-06] + - id: AC-10 + description: ssh.RunSudo reports the sudo mode OBSERVED to work — SudoNopasswd when `sudo -n` exits 0, SudoPassword when `sudo -S` exits 0, and empty otherwise (never a misobservation from a command that failed for its own reasons). When told prefer=SudoPassword and the password gate is satisfied it leads with `sudo -S`, issuing zero `sudo -n` calls; a stale password preference self-heals by falling back to `sudo -n`. This is the shared primitive the collector threads (lead with the recorded mode, record the confirmed mode once per cycle). + priority: critical + references_constraints: [C-07] + - id: AC-11 + description: The discovery firewall probe leads its sudo firewall commands with the host's recorded sudo mode and opportunistically records the mode a sudo command confirms (SudoUnknown when none answered via sudo — e.g. a sudoless firewalld host); recording is skipped when the confirmed mode equals the stored one. + priority: high + references_constraints: [C-07] + - id: AC-12 + description: The liveness privilege probe records the sudo mode it confirms via the `true` sentinel (the authoritative learner, run every cycle), and on a host recorded as needing a password it leads with `sudo -S` (zero `sudo -n` calls); a confirmed mode equal to the stored one is not re-recorded. + priority: high + references_constraints: [C-07]