diff --git a/Dashboard/MainWindow.xaml.cs b/Dashboard/MainWindow.xaml.cs index 84341f8c..54b811a3 100644 --- a/Dashboard/MainWindow.xaml.cs +++ b/Dashboard/MainWindow.xaml.cs @@ -169,8 +169,11 @@ reach it directly rather than forwarding through EmailAlertService (E3c Phase 6) _alertHistoryStore = new JsonAlertHistoryStore(_preferencesService); /* Webhook service is constructed first and injected into the email service (Plan E E3c): the shared lib service carries no Current static, so Dashboard - keeps this handle for the email fan-out and any MCP/health consumers. */ - _webhookAlertService = new WebhookAlertService(alertSettings, EmailAlertService.Branding, new LoggerAdapter()); + keeps this handle for the email fan-out and any MCP/health consumers. The + history store (built at line above) is passed so the webhook cooldown is seeded + across restart (#1145) — without it a restart inside the cooldown window + re-posts a Teams/Slack alert delivered just before the restart. */ + _webhookAlertService = new WebhookAlertService(alertSettings, EmailAlertService.Branding, new LoggerAdapter(), _alertHistoryStore); _emailAlertService = new EmailAlertService(alertSettings, _alertHistoryStore, _webhookAlertService, new LoggerAdapter()); _alertCheckTimer = new DispatcherTimer(); diff --git a/Dashboard/Services/JsonAlertHistoryStore.cs b/Dashboard/Services/JsonAlertHistoryStore.cs index 842084bc..16b78e69 100644 --- a/Dashboard/Services/JsonAlertHistoryStore.cs +++ b/Dashboard/Services/JsonAlertHistoryStore.cs @@ -124,6 +124,35 @@ public Task RecordAlertAsync(AlertHistoryRecord record) } } + /// + /// Returns the UTC time the most recent alert webhook was successfully + /// sent for this server/metric, scanned from the in-memory alert log + /// (loaded from alert_history.json on startup) — or null if none. Seeds + /// the webhook cooldown after restart so a Teams/Slack alert posted + /// shortly before a restart is not re-posted afterward (#1145, mirroring + /// the email seed #981). + /// + /// + /// Dashboard records webhook deliveries as their own alert-log rows with + /// NotificationType == "webhook" (written only on a successful post), so + /// the type alone implies success — no SendError filter is needed. + /// + public Task GetLastWebhookSentUtcAsync(string serverId, string metricName) + { + lock (_alertLogLock) + { + DateTime? max = null; + foreach (var entry in _alertLog) + { + if (entry.ServerId != serverId) continue; + if (entry.MetricName != metricName) continue; + if (entry.NotificationType != "webhook") continue; + if (max == null || entry.AlertTime > max.Value) max = entry.AlertTime; + } + return Task.FromResult(max); + } + } + /// /// Returns the AlertTime of the most recent log entry for the given /// (serverId, metricName), regardless of notification channel or send diff --git a/Lite.Tests/DuckDbSchemaTests.cs b/Lite.Tests/DuckDbSchemaTests.cs index f1084ad1..e18ea4c6 100644 --- a/Lite.Tests/DuckDbSchemaTests.cs +++ b/Lite.Tests/DuckDbSchemaTests.cs @@ -138,8 +138,9 @@ public void SchemaStatements_MatchTableCount() foreach (var _ in Schema.GetAllTableStatements()) tableCount++; - /* 31 tables from Schema (schema_version is created separately by DuckDbInitializer) */ - Assert.Equal(31, tableCount); + /* 32 tables from Schema (schema_version is created separately by DuckDbInitializer). + Includes config_edge_trigger_watermarks added for #1145. */ + Assert.Equal(32, tableCount); } [Fact] diff --git a/Lite.Tests/StoreRoundTripTests.cs b/Lite.Tests/StoreRoundTripTests.cs index 5646c2fb..38c5e8d0 100644 --- a/Lite.Tests/StoreRoundTripTests.cs +++ b/Lite.Tests/StoreRoundTripTests.cs @@ -124,6 +124,70 @@ unfiltered read still returns the row. */ Assert.Null(await store.GetLastAlertTimeAsync("3", "Missing")); } + [Fact] + public async Task GetLastWebhookSentUtc_FiltersToWebhookRows_IncludingEmailWebhook() + { + await _duckDb.InitializeAsync(); + var store = new DuckDbAlertHistoryStore(_duckDb); + + /* #1145: only rows whose notification_type implies a webhook delivered seed the webhook + cooldown. email-only / tray rows must NOT; an 'email+webhook' row counts even though its + send_error is the EMAIL failure (the webhook still delivered), because send_error tracks + the email channel, not the webhook. */ + await RecordAsync(store, "5", "Blocking Detected", "email", null); // email only + await RecordAsync(store, "5", "Blocking Detected", "tray", null); // tray only + await RecordAsync(store, "5", "Blocking Detected", "webhook", null); // webhook + await RecordAsync(store, "5", "Blocking Detected", "email+webhook", "smtp boom"); // webhook sent, email failed (latest) + + var lastWebhook = await store.GetLastWebhookSentUtcAsync("5", "Blocking Detected"); + var lastAny = await store.GetLastAlertTimeAsync("5", "Blocking Detected"); + + Assert.NotNull(lastWebhook); + /* The email+webhook row is the last written, so it is both the unfiltered max and the + webhook-filtered max. */ + Assert.Equal(lastAny!.Value, lastWebhook!.Value); + + /* A metric with only email/tray rows → no webhook seed. */ + await RecordAsync(store, "5", "EmailOnly", "email", null); + await RecordAsync(store, "5", "EmailOnly", "tray", null); + Assert.Null(await store.GetLastWebhookSentUtcAsync("5", "EmailOnly")); + + /* Unknown metric → null. */ + Assert.Null(await store.GetLastWebhookSentUtcAsync("5", "Missing")); + } + + [Fact] + public async Task EdgeTriggerWatermark_SaveLoad_RoundTripsAndUpserts() + { + await _duckDb.InitializeAsync(); + var store = new DuckDbAlertHistoryStore(_duckDb); + + /* #1145: empty table → empty load. */ + Assert.Empty(await store.LoadEdgeTriggerWatermarksAsync()); + + await store.SaveEdgeTriggerWatermarkAsync(1, "Blocking Detected", 4); + await store.SaveEdgeTriggerWatermarkAsync(1, "Deadlocks Detected", 2); + await store.SaveEdgeTriggerWatermarkAsync(2, "Blocking Detected", 7); + + var loaded = await store.LoadEdgeTriggerWatermarksAsync(); + Assert.Equal(3, loaded.Count); + Assert.Contains(loaded, r => r.ServerId == 1 && r.MetricName == "Blocking Detected" && r.Watermark == 4); + Assert.Contains(loaded, r => r.ServerId == 1 && r.MetricName == "Deadlocks Detected" && r.Watermark == 2); + Assert.Contains(loaded, r => r.ServerId == 2 && r.MetricName == "Blocking Detected" && r.Watermark == 7); + + /* Upsert on the (server_id, metric_name) primary key: same key overwrites, no dup row. */ + await store.SaveEdgeTriggerWatermarkAsync(1, "Blocking Detected", 9); + loaded = await store.LoadEdgeTriggerWatermarksAsync(); + Assert.Equal(3, loaded.Count); + Assert.Contains(loaded, r => r.ServerId == 1 && r.MetricName == "Blocking Detected" && r.Watermark == 9); + + /* A reset to 0 (the window drained) persists too — so a restart restores 0, not a stale count. */ + await store.SaveEdgeTriggerWatermarkAsync(1, "Blocking Detected", 0); + loaded = await store.LoadEdgeTriggerWatermarksAsync(); + Assert.Equal(3, loaded.Count); + Assert.Contains(loaded, r => r.ServerId == 1 && r.MetricName == "Blocking Detected" && r.Watermark == 0); + } + [Fact] public async Task MuteRuleStore_InsertUpdateSetEnabledDeleteExpire_RoundTrips() { diff --git a/Lite.Tests/WebhookCooldownSeedTests.cs b/Lite.Tests/WebhookCooldownSeedTests.cs new file mode 100644 index 00000000..e144c0c4 --- /dev/null +++ b/Lite.Tests/WebhookCooldownSeedTests.cs @@ -0,0 +1,110 @@ +using System; +using System.Threading.Tasks; +using PerformanceMonitor.Notifications; +using PerformanceMonitorLite; +using PerformanceMonitorLite.Services; +using Xunit; + +namespace PerformanceMonitorLite.Tests; + +/// +/// #1145: the shared must seed its per-(serverId, metricName) +/// cooldown from alert history on first use, so a Teams/Slack alert posted shortly before an app +/// restart is not re-posted on the first post-restart sweep — the guarantee #981 gave the email +/// channel. The cooldown is time-bounded (EmailCooldownMinutes), so it only covers a restart +/// inside the cooldown window; the time-independent edge-trigger watermark persistence (Lite) +/// covers the rest. These tests use a dead webhook URL: a SUPPRESSED post never touches the +/// network (the cooldown short-circuits first), while an ATTEMPTED post fails against the dead +/// URL and increments the Teams failure counter — the observable proxy for "did it try to post". +/// +public class WebhookCooldownSeedTests +{ + private static WebhookAlertService MakeService(IAlertHistoryStore? history, FakeWebhookSettings settings) + => new(settings, EmailAlertService.Branding, new AppLoggerAdapter(), history); + + private static FakeWebhookSettings EnabledTeamsSettings() => new() + { + TeamsWebhookEnabled = true, + TeamsWebhookUrl = "http://localhost:1/never", // closed port -> connection refused, fast deterministic failure + EmailCooldownMinutes = 15 + }; + + [Fact] + public async Task SeedsCooldownFromHistory_WithinWindow_SuppressesRepostAfterRestart() + { + // A webhook delivered "just now", then a restart (fresh service = empty in-memory cooldown). + var history = new FakeHistoryStore { LastWebhookSent = DateTime.UtcNow }; + var svc = MakeService(history, EnabledTeamsSettings()); + + var sent = await svc.TrySendWebhookAlertsAsync("Deadlocks Detected", "Srv", "4", "1", "1"); + + Assert.False(sent); // suppressed + Assert.Equal(1, history.GetLastWebhookSentCallCount); // the seed was consulted + Assert.Equal(0, svc.GetTeamsHealth().ConsecutiveFailures); // and NO post was attempted + } + + [Fact] + public async Task SeedFromHistory_OlderThanCooldown_DoesNotSuppress() + { + // gotqn's repro: the restart is 17 min after the send, beyond the 15-min cooldown. The + // cooldown seed must NOT suppress here — that's exactly why the Lite watermark persistence + // is also needed. The post is attempted (and fails against the dead URL). + var history = new FakeHistoryStore { LastWebhookSent = DateTime.UtcNow.AddMinutes(-17) }; + var svc = MakeService(history, EnabledTeamsSettings()); + + var sent = await svc.TrySendWebhookAlertsAsync("Deadlocks Detected", "Srv", "4", "1", "1"); + + Assert.False(sent); // dead URL -> post failed + Assert.Equal(1, history.GetLastWebhookSentCallCount); // seed consulted + Assert.Equal(1, svc.GetTeamsHealth().ConsecutiveFailures); // but it WAS attempted (not suppressed) + } + + [Fact] + public async Task NullHistoryStore_NoSeed_AttemptsPost() + { + // The legacy/test path passes no history store: pre-#1145 in-memory-only cooldown, so a + // fresh service attempts the post. + var svc = MakeService(history: null, EnabledTeamsSettings()); + + var sent = await svc.TrySendWebhookAlertsAsync("Deadlocks Detected", "Srv", "4", "1", "1"); + + Assert.False(sent); + Assert.Equal(1, svc.GetTeamsHealth().ConsecutiveFailures); + } + + private sealed class FakeWebhookSettings : IAlertSettings + { + public bool SmtpEnabled => false; + public string SmtpServer => ""; + public int SmtpPort => 25; + public bool SmtpUseSsl => false; + public string SmtpUsername => ""; + public string SmtpFromAddress => ""; + public string SmtpRecipients => ""; + public string? GetSmtpPassword() => null; + public int EmailCooldownMinutes { get; set; } = 15; + public bool TeamsWebhookEnabled { get; set; } + public string TeamsWebhookUrl { get; set; } = ""; + public string TeamsProxyAddress => ""; + public bool SlackWebhookEnabled { get; set; } + public string SlackWebhookUrl { get; set; } = ""; + public string SlackProxyAddress => ""; + public double AnalysisNotifySeverity => 1.5; + public int AnalysisNotifyCooldownMinutes => 360; + } + + private sealed class FakeHistoryStore : IAlertHistoryStore + { + public DateTime? LastWebhookSent { get; set; } + public int GetLastWebhookSentCallCount { get; private set; } + + public Task RecordAlertAsync(AlertHistoryRecord record) => Task.CompletedTask; + public Task GetLastEmailSentUtcAsync(string serverId, string metricName) => Task.FromResult(null); + public Task GetLastWebhookSentUtcAsync(string serverId, string metricName) + { + GetLastWebhookSentCallCount++; + return Task.FromResult(LastWebhookSent); + } + public Task GetLastAlertTimeAsync(string serverId, string metricName) => Task.FromResult(null); + } +} diff --git a/Lite/Database/Schema.cs b/Lite/Database/Schema.cs index 1b059697..7ff8ce04 100644 --- a/Lite/Database/Schema.cs +++ b/Lite/Database/Schema.cs @@ -768,6 +768,20 @@ CREATE TABLE IF NOT EXISTS config_alert_log ( context_json VARCHAR )"; + /* Edge-trigger watermarks for the rolling-count blocking/deadlock alert gate (#1091). + Persisted so the watermark survives an app restart (#1145): without it the in-memory + watermark resets to 0 and the first post-restart sweep re-fires the same alert (and + re-posts the same webhook) for events still lingering in the 1-hour lookback window. + Keyed (server_id, metric_name); one short row per server/metric, upserted on change. */ + public const string CreateEdgeTriggerWatermarksTable = @" +CREATE TABLE IF NOT EXISTS config_edge_trigger_watermarks ( + server_id INTEGER NOT NULL, + metric_name VARCHAR NOT NULL, + watermark INTEGER NOT NULL, + updated_at TIMESTAMP NOT NULL, + PRIMARY KEY (server_id, metric_name) +)"; + public const string CreateMuteRulesTable = @" CREATE TABLE IF NOT EXISTS config_mute_rules ( id VARCHAR NOT NULL PRIMARY KEY, @@ -829,6 +843,7 @@ public static IEnumerable GetAllTableStatements() yield return CreateServerPropertiesTable; yield return CreateSessionStatsTable; yield return CreateAlertLogTable; + yield return CreateEdgeTriggerWatermarksTable; yield return CreateMuteRulesTable; yield return CreateDismissedArchiveAlertsTable; } diff --git a/Lite/MainWindow.xaml.cs b/Lite/MainWindow.xaml.cs index 6bc9298d..d9b7778f 100644 --- a/Lite/MainWindow.xaml.cs +++ b/Lite/MainWindow.xaml.cs @@ -68,6 +68,8 @@ reflects all conditions. _lastBadgeCounts lets the sweep re-render with the last private readonly IAlertSettings _alertSettings = new AppAlertSettings(); private readonly MuteRuleService _muteRuleService; private EmailAlertService _emailAlertService; + /* Held so the edge-trigger watermark seed/persist (#1145) can reach the store directly. */ + private readonly DuckDbAlertHistoryStore _alertHistoryStore; /* Track active alert states for resolved notifications */ private readonly Dictionary _activeCpuAlert = new(); @@ -98,6 +100,14 @@ and reset to 0 when the window empties so the next event alerts again. */ private readonly Dictionary _lastAlertedBlockingCount = new(); private readonly Dictionary _lastAlertedDeadlockCount = new(); + /* Persistence for the two watermarks above (#1145): seeded from the alert store at + startup (SeedEdgeTriggerWatermarksAsync) and upserted on change, so a restart does + not reset the watermark to 0 and re-fire / re-post a webhook for events still + lingering in the rolling 1-hour lookback window. The metric_name values are the + persisted-row keys; they need not match the alert "Detected" metric names. */ + private const string BlockingWatermarkMetric = "Blocking Detected"; + private const string DeadlockWatermarkMetric = "Deadlocks Detected"; + public MainWindow() { InitializeComponent(); @@ -105,12 +115,14 @@ public MainWindow() // Initialize services (with loggers wired to AppLogger) _databaseInitializer = new DuckDbInitializer(App.DatabasePath, new AppLoggerAdapter()); /* Webhook service is constructed first and injected into the email service - (Plan E E3c): the shared send core fans out to it. */ + (Plan E E3c): the shared send core fans out to it. The history store is shared + by both so the webhook service can seed its cooldown across restart (#1145). */ + _alertHistoryStore = new DuckDbAlertHistoryStore(_databaseInitializer); var webhookAlertService = new WebhookAlertService( - _alertSettings, EmailAlertService.Branding, new AppLoggerAdapter()); + _alertSettings, EmailAlertService.Branding, new AppLoggerAdapter(), _alertHistoryStore); _emailAlertService = new EmailAlertService( _alertSettings, - new DuckDbAlertHistoryStore(_databaseInitializer), + _alertHistoryStore, webhookAlertService, new AppLoggerAdapter()); _muteRuleService = new MuteRuleService( @@ -169,6 +181,14 @@ private async void MainWindow_Loaded(object sender, RoutedEventArgs e) // Initialize the DuckDB database await _databaseInitializer.InitializeAsync(); + /* Restore edge-trigger watermarks now — after the DB (and the watermark table) + exist, but before ANY alert sweep can read the watermark dicts. RefreshServerList() + below ends in a fire-and-forget RefreshOverviewAsync() → CheckPerformanceAlerts, so + seeding here (not just before the explicit RefreshOverviewAsync later) keeps a + restart from re-firing / re-posting alerts for events still in the lookback + window (#1145), independent of whether the DuckDB reads ever yield. */ + await SeedEdgeTriggerWatermarksAsync(); + // Initialize the collection engine (with loggers wired to AppLogger) _collectorService = new RemoteCollectorService( _databaseInitializer, @@ -1536,6 +1556,36 @@ private void CheckConnectionsAndNotify() } } + /// + /// Seeds the in-memory edge-trigger watermarks from the persisted store (#1145) so a + /// restart does not reset them to 0 and re-fire / re-post a webhook for blocking/deadlock + /// events still lingering in the rolling 1-hour lookback window. Runs once at startup, + /// after the DB is initialized and before the first alert sweep. + /// + private async Task SeedEdgeTriggerWatermarksAsync() + { + try + { + var rows = await _alertHistoryStore.LoadEdgeTriggerWatermarksAsync(); + foreach (var (serverId, metricName, watermark) in rows) + { + var key = serverId.ToString(); + if (metricName == BlockingWatermarkMetric) + { + _lastAlertedBlockingCount[key] = watermark; + } + else if (metricName == DeadlockWatermarkMetric) + { + _lastAlertedDeadlockCount[key] = watermark; + } + } + } + catch (Exception ex) + { + AppLogger.Error("Alerts", $"Failed to seed edge-trigger watermarks: {ex.Message}"); + } + } + private async void CheckPerformanceAlerts(ServerSummaryItem summary) { if (!App.AlertsEnabled || _trayService == null) return; @@ -1644,6 +1694,13 @@ await _emailAlertService.TrySendAlertEmailAsync( ? RollingCountAlertGate.Evaluate(effectiveBlockingCount, App.AlertBlockingThreshold, blockingWatermark, blockingCooldownElapsed, suppressPopups) : new RollingCountAlertGate.Decision(false, false, 0); _lastAlertedBlockingCount[key] = blockingDecision.Watermark; + /* Persist the watermark across restart so the same blocked-process reports aren't + re-alerted (and re-posted to Teams/Slack) on the first post-restart sweep (#1145). + On-change only — the gate returns the same watermark on most sweeps. */ + if (blockingDecision.Watermark != blockingWatermark) + { + await _alertHistoryStore.SaveEdgeTriggerWatermarkAsync(summary.ServerId, BlockingWatermarkMetric, blockingDecision.Watermark); + } bool wasBlockingActive = _activeBlockingAlert.TryGetValue(key, out var wasBlocking) && wasBlocking; _activeBlockingAlert[key] = blockingDecision.Active; @@ -1715,6 +1772,12 @@ await SendDetectedAlertAsync( ? RollingCountAlertGate.Evaluate(effectiveDeadlockCount, App.AlertDeadlockThreshold, deadlockWatermark, deadlockCooldownElapsed, suppressPopups) : new RollingCountAlertGate.Decision(false, false, 0); _lastAlertedDeadlockCount[key] = deadlockDecision.Watermark; + /* Persist the watermark across restart so the same deadlocks aren't re-alerted (and + re-posted to Teams/Slack) on the first post-restart sweep (#1145). On-change only. */ + if (deadlockDecision.Watermark != deadlockWatermark) + { + await _alertHistoryStore.SaveEdgeTriggerWatermarkAsync(summary.ServerId, DeadlockWatermarkMetric, deadlockDecision.Watermark); + } bool wasDeadlockActive = _activeDeadlockAlert.TryGetValue(key, out var wasDeadlock) && wasDeadlock; _activeDeadlockAlert[key] = deadlockDecision.Active; diff --git a/Lite/Services/DuckDbAlertHistoryStore.cs b/Lite/Services/DuckDbAlertHistoryStore.cs index cbdb4d5e..8fd4e188 100644 --- a/Lite/Services/DuckDbAlertHistoryStore.cs +++ b/Lite/Services/DuckDbAlertHistoryStore.cs @@ -7,6 +7,7 @@ */ using System; +using System.Collections.Generic; using System.Threading.Tasks; using PerformanceMonitor.Notifications; using PerformanceMonitorLite.Database; @@ -155,6 +156,60 @@ is explicit (the cooldown subtraction is tick math regardless). */ } } + /// + /// Returns the UTC time the most recent alert webhook was successfully sent + /// for this server/metric, read from config_alert_log — or null if none. + /// Seeds the webhook cooldown after restart so a Teams/Slack alert posted + /// shortly before a restart is not re-posted afterward (#1145, mirroring the + /// email seed #981). + /// + public async Task GetLastWebhookSentUtcAsync(string serverId, string metricName) + { + var sid = int.TryParse(serverId, out var s) ? s : 0; + try + { + /* Use injected initializer, fall back to creating one from App.DatabasePath */ + var duckDb = _duckDb; + if (duckDb == null) + { + var dbPath = App.DatabasePath; + if (string.IsNullOrEmpty(dbPath)) return null; + duckDb = new DuckDbInitializer(dbPath); + } + + using var readLock = duckDb.AcquireReadLock(); + using var connection = duckDb.CreateConnection(); + await connection.OpenAsync(); + + using var command = connection.CreateCommand(); + /* A successful webhook send is logged with a notification_type of + 'webhook' / 'email+webhook' — those types are only ever written + when WebhookSent is true, so the type alone implies success. + send_error tracks the EMAIL channel, so it is NOT filtered on: + an email-failed-but-webhook-sent row must still seed the cooldown. */ + command.CommandText = @" +SELECT MAX(alert_time) +FROM config_alert_log +WHERE server_id = $1 +AND metric_name = $2 +AND notification_type IN ('webhook', 'email+webhook')"; + command.Parameters.Add(new DuckDB.NET.Data.DuckDBParameter { Value = sid }); + command.Parameters.Add(new DuckDB.NET.Data.DuckDBParameter { Value = metricName }); + + var result = await command.ExecuteScalarAsync(); + if (result == null || result == DBNull.Value) return null; + + /* alert_time is written as DateTime.UtcNow; tag it UTC so the kind + is explicit (the cooldown subtraction is tick math regardless). */ + return DateTime.SpecifyKind(Convert.ToDateTime(result), DateTimeKind.Utc); + } + catch (Exception ex) + { + AppLogger.Error("WebhookAlert", $"Could not read persisted webhook cooldown: {ex.Message}"); + return null; + } + } + /// /// Returns the UTC time of the most recent alert_log row for this /// (serverId, metricName), regardless of notification channel or @@ -207,4 +262,86 @@ FROM config_alert_log return null; } } + + /// + /// Loads all persisted edge-trigger watermarks (#1145), one entry per + /// (server_id, metric_name). The caller seeds its in-memory watermark dicts + /// from these at startup, before the first alert sweep, so a restart does not + /// reset the watermark to 0 and re-fire (and re-post a webhook for) events + /// still lingering in the rolling lookback window. + /// + public async Task> LoadEdgeTriggerWatermarksAsync() + { + var result = new List<(int, string, int)>(); + try + { + var duckDb = _duckDb; + if (duckDb == null) + { + var dbPath = App.DatabasePath; + if (string.IsNullOrEmpty(dbPath)) return result; + duckDb = new DuckDbInitializer(dbPath); + } + + using var readLock = duckDb.AcquireReadLock(); + using var connection = duckDb.CreateConnection(); + await connection.OpenAsync(); + + using var command = connection.CreateCommand(); + command.CommandText = @" +SELECT server_id, metric_name, watermark +FROM config_edge_trigger_watermarks"; + + using var reader = await command.ExecuteReaderAsync(); + while (await reader.ReadAsync()) + { + result.Add((Convert.ToInt32(reader.GetValue(0)), reader.GetString(1), Convert.ToInt32(reader.GetValue(2)))); + } + } + catch (Exception ex) + { + AppLogger.Error("Alerts", $"Could not load edge-trigger watermarks: {ex.Message}"); + } + return result; + } + + /// + /// Upserts one edge-trigger watermark (#1145). Called on-change only — the gate + /// returns the same watermark on the vast majority of sweeps — so this is a + /// low-frequency write that piggybacks on the existing alert-store write lock. + /// + public async Task SaveEdgeTriggerWatermarkAsync(int serverId, string metricName, int watermark) + { + try + { + var duckDb = _duckDb; + if (duckDb == null) + { + var dbPath = App.DatabasePath; + if (string.IsNullOrEmpty(dbPath)) return; + duckDb = new DuckDbInitializer(dbPath); + } + + using var writeLock = duckDb.AcquireWriteLock(); + using var connection = duckDb.CreateConnection(); + await connection.OpenAsync(); + + using var command = connection.CreateCommand(); + /* INSERT OR REPLACE upserts on the (server_id, metric_name) primary key — + one stable row per server/metric, overwritten each time the watermark moves. */ + command.CommandText = @" +INSERT OR REPLACE INTO config_edge_trigger_watermarks (server_id, metric_name, watermark, updated_at) +VALUES ($1, $2, $3, $4)"; + command.Parameters.Add(new DuckDB.NET.Data.DuckDBParameter { Value = serverId }); + command.Parameters.Add(new DuckDB.NET.Data.DuckDBParameter { Value = metricName }); + command.Parameters.Add(new DuckDB.NET.Data.DuckDBParameter { Value = watermark }); + command.Parameters.Add(new DuckDB.NET.Data.DuckDBParameter { Value = DateTime.UtcNow }); + + await command.ExecuteNonQueryAsync(); + } + catch (Exception ex) + { + AppLogger.Error("Alerts", $"Could not persist edge-trigger watermark ({metricName}): {ex.Message}"); + } + } } diff --git a/PerformanceMonitor.Notifications/IAlertHistoryStore.cs b/PerformanceMonitor.Notifications/IAlertHistoryStore.cs index f017160d..76c98800 100644 --- a/PerformanceMonitor.Notifications/IAlertHistoryStore.cs +++ b/PerformanceMonitor.Notifications/IAlertHistoryStore.cs @@ -41,6 +41,16 @@ public interface IAlertHistoryStore /// Task GetLastEmailSentUtcAsync(string serverId, string metricName); + /// + /// MAX(alert_time) filtered to a *successful webhook send* — seeds the webhook + /// cooldown across restart so a Teams/Slack alert delivered shortly before a restart + /// is not re-posted afterward (#1145, mirroring the email seed #981). The + /// notification_type already implies the webhook delivered (it's only written on a + /// successful post), and send_error tracks the EMAIL channel, so it is NOT filtered on. + /// Lite: notification_type IN ('webhook','email+webhook'). Dash: NotificationType == "webhook". + /// + Task GetLastWebhookSentUtcAsync(string serverId, string metricName); + /// /// MAX(alert_time) UNFILTERED (any channel/result) — seeds the analysis /// per-finding cooldown across restart. Stamped unconditionally upstream. diff --git a/PerformanceMonitor.Notifications/WebhookAlertService.cs b/PerformanceMonitor.Notifications/WebhookAlertService.cs index ed297e05..506b0742 100644 --- a/PerformanceMonitor.Notifications/WebhookAlertService.cs +++ b/PerformanceMonitor.Notifications/WebhookAlertService.cs @@ -41,17 +41,28 @@ at the email / in-app dialog instead. */ private readonly IAlertSettings _settings; private readonly AlertBranding _branding; private readonly ILogger _logger; + private readonly IAlertHistoryStore? _historyStore; private int _consecutiveTeamsFailures; private string? _lastTeamsError; private int _consecutiveSlackFailures; private string? _lastSlackError; - public WebhookAlertService(IAlertSettings settings, AlertBranding branding, ILogger logger) + /// + /// Optional alert-history store used to seed the per-(serverId, metricName) webhook + /// cooldown across an app restart (#1145, mirroring the email seed #981). When null the + /// cooldown is purely in-memory (the pre-#1145 behavior) — the test call sites pass null. + /// + public WebhookAlertService( + IAlertSettings settings, + AlertBranding branding, + ILogger logger, + IAlertHistoryStore? historyStore = null) { _settings = settings; _branding = branding; _logger = logger; + _historyStore = historyStore; } /// @@ -69,6 +80,20 @@ public async Task TrySendWebhookAlertsAsync( try { var cooldownKey = $"webhook:{serverId}:{metricName}"; + + /* Seed the in-memory cooldown from the alert log the first time this key is + seen, so a Teams/Slack alert posted shortly before an app restart is not + immediately re-posted afterward (#1145, mirroring the email seed #981). The + in-memory dictionary is authoritative once seeded. */ + if (_historyStore is not null && !_cooldowns.ContainsKey(cooldownKey)) + { + var lastPersistedSend = await _historyStore.GetLastWebhookSentUtcAsync(serverId, metricName); + if (lastPersistedSend.HasValue) + { + _cooldowns.TryAdd(cooldownKey, lastPersistedSend.Value); + } + } + if (_cooldowns.TryGetValue(cooldownKey, out var lastSent) && DateTime.UtcNow - lastSent < TimeSpan.FromMinutes(_settings.EmailCooldownMinutes)) {