Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions Dashboard/MainWindow.xaml.cs
Original file line number Diff line number Diff line change
Expand Up @@ -169,8 +169,11 @@ reach it directly rather than forwarding through EmailAlertService (E3c Phase 6)
_alertHistoryStore = new JsonAlertHistoryStore(_preferencesService);
/* Webhook service is constructed first and injected into the email service
(Plan E E3c): the shared lib service carries no Current static, so Dashboard
keeps this handle for the email fan-out and any MCP/health consumers. */
_webhookAlertService = new WebhookAlertService(alertSettings, EmailAlertService.Branding, new LoggerAdapter<WebhookAlertService>());
keeps this handle for the email fan-out and any MCP/health consumers. The
history store (built at line above) is passed so the webhook cooldown is seeded
across restart (#1145) — without it a restart inside the cooldown window
re-posts a Teams/Slack alert delivered just before the restart. */
_webhookAlertService = new WebhookAlertService(alertSettings, EmailAlertService.Branding, new LoggerAdapter<WebhookAlertService>(), _alertHistoryStore);
_emailAlertService = new EmailAlertService(alertSettings, _alertHistoryStore, _webhookAlertService, new LoggerAdapter<EmailAlertService>());

_alertCheckTimer = new DispatcherTimer();
Expand Down
29 changes: 29 additions & 0 deletions Dashboard/Services/JsonAlertHistoryStore.cs
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,35 @@ public Task RecordAlertAsync(AlertHistoryRecord record)
}
}

/// <summary>
/// Returns the UTC time the most recent alert webhook was successfully
/// sent for this server/metric, scanned from the in-memory alert log
/// (loaded from alert_history.json on startup) — or null if none. Seeds
/// the webhook cooldown after restart so a Teams/Slack alert posted
/// shortly before a restart is not re-posted afterward (#1145, mirroring
/// the email seed #981).
/// </summary>
/// <remarks>
/// Dashboard records webhook deliveries as their own alert-log rows with
/// NotificationType == "webhook" (written only on a successful post), so
/// the type alone implies success — no SendError filter is needed.
/// </remarks>
public Task<DateTime?> GetLastWebhookSentUtcAsync(string serverId, string metricName)
{
lock (_alertLogLock)
{
DateTime? max = null;
foreach (var entry in _alertLog)
{
if (entry.ServerId != serverId) continue;
if (entry.MetricName != metricName) continue;
if (entry.NotificationType != "webhook") continue;
if (max == null || entry.AlertTime > max.Value) max = entry.AlertTime;
}
return Task.FromResult(max);
}
}

/// <summary>
/// Returns the AlertTime of the most recent log entry for the given
/// (serverId, metricName), regardless of notification channel or send
Expand Down
5 changes: 3 additions & 2 deletions Lite.Tests/DuckDbSchemaTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -138,8 +138,9 @@ public void SchemaStatements_MatchTableCount()
foreach (var _ in Schema.GetAllTableStatements())
tableCount++;

/* 31 tables from Schema (schema_version is created separately by DuckDbInitializer) */
Assert.Equal(31, tableCount);
/* 32 tables from Schema (schema_version is created separately by DuckDbInitializer).
Includes config_edge_trigger_watermarks added for #1145. */
Assert.Equal(32, tableCount);
}

[Fact]
Expand Down
64 changes: 64 additions & 0 deletions Lite.Tests/StoreRoundTripTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,70 @@ unfiltered read still returns the row. */
Assert.Null(await store.GetLastAlertTimeAsync("3", "Missing"));
}

[Fact]
public async Task GetLastWebhookSentUtc_FiltersToWebhookRows_IncludingEmailWebhook()
{
await _duckDb.InitializeAsync();
var store = new DuckDbAlertHistoryStore(_duckDb);

/* #1145: only rows whose notification_type implies a webhook delivered seed the webhook
cooldown. email-only / tray rows must NOT; an 'email+webhook' row counts even though its
send_error is the EMAIL failure (the webhook still delivered), because send_error tracks
the email channel, not the webhook. */
await RecordAsync(store, "5", "Blocking Detected", "email", null); // email only
await RecordAsync(store, "5", "Blocking Detected", "tray", null); // tray only
await RecordAsync(store, "5", "Blocking Detected", "webhook", null); // webhook
await RecordAsync(store, "5", "Blocking Detected", "email+webhook", "smtp boom"); // webhook sent, email failed (latest)

var lastWebhook = await store.GetLastWebhookSentUtcAsync("5", "Blocking Detected");
var lastAny = await store.GetLastAlertTimeAsync("5", "Blocking Detected");

Assert.NotNull(lastWebhook);
/* The email+webhook row is the last written, so it is both the unfiltered max and the
webhook-filtered max. */
Assert.Equal(lastAny!.Value, lastWebhook!.Value);

/* A metric with only email/tray rows → no webhook seed. */
await RecordAsync(store, "5", "EmailOnly", "email", null);
await RecordAsync(store, "5", "EmailOnly", "tray", null);
Assert.Null(await store.GetLastWebhookSentUtcAsync("5", "EmailOnly"));

/* Unknown metric → null. */
Assert.Null(await store.GetLastWebhookSentUtcAsync("5", "Missing"));
}

[Fact]
public async Task EdgeTriggerWatermark_SaveLoad_RoundTripsAndUpserts()
{
await _duckDb.InitializeAsync();
var store = new DuckDbAlertHistoryStore(_duckDb);

/* #1145: empty table → empty load. */
Assert.Empty(await store.LoadEdgeTriggerWatermarksAsync());

await store.SaveEdgeTriggerWatermarkAsync(1, "Blocking Detected", 4);
await store.SaveEdgeTriggerWatermarkAsync(1, "Deadlocks Detected", 2);
await store.SaveEdgeTriggerWatermarkAsync(2, "Blocking Detected", 7);

var loaded = await store.LoadEdgeTriggerWatermarksAsync();
Assert.Equal(3, loaded.Count);
Assert.Contains(loaded, r => r.ServerId == 1 && r.MetricName == "Blocking Detected" && r.Watermark == 4);
Assert.Contains(loaded, r => r.ServerId == 1 && r.MetricName == "Deadlocks Detected" && r.Watermark == 2);
Assert.Contains(loaded, r => r.ServerId == 2 && r.MetricName == "Blocking Detected" && r.Watermark == 7);

/* Upsert on the (server_id, metric_name) primary key: same key overwrites, no dup row. */
await store.SaveEdgeTriggerWatermarkAsync(1, "Blocking Detected", 9);
loaded = await store.LoadEdgeTriggerWatermarksAsync();
Assert.Equal(3, loaded.Count);
Assert.Contains(loaded, r => r.ServerId == 1 && r.MetricName == "Blocking Detected" && r.Watermark == 9);

/* A reset to 0 (the window drained) persists too — so a restart restores 0, not a stale count. */
await store.SaveEdgeTriggerWatermarkAsync(1, "Blocking Detected", 0);
loaded = await store.LoadEdgeTriggerWatermarksAsync();
Assert.Equal(3, loaded.Count);
Assert.Contains(loaded, r => r.ServerId == 1 && r.MetricName == "Blocking Detected" && r.Watermark == 0);
}

[Fact]
public async Task MuteRuleStore_InsertUpdateSetEnabledDeleteExpire_RoundTrips()
{
Expand Down
110 changes: 110 additions & 0 deletions Lite.Tests/WebhookCooldownSeedTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
using System;
using System.Threading.Tasks;
using PerformanceMonitor.Notifications;
using PerformanceMonitorLite;
using PerformanceMonitorLite.Services;
using Xunit;

namespace PerformanceMonitorLite.Tests;

/// <summary>
/// #1145: the shared <see cref="WebhookAlertService"/> must seed its per-(serverId, metricName)
/// cooldown from alert history on first use, so a Teams/Slack alert posted shortly before an app
/// restart is not re-posted on the first post-restart sweep — the guarantee #981 gave the email
/// channel. The cooldown is time-bounded (EmailCooldownMinutes), so it only covers a restart
/// inside the cooldown window; the time-independent edge-trigger watermark persistence (Lite)
/// covers the rest. These tests use a dead webhook URL: a SUPPRESSED post never touches the
/// network (the cooldown short-circuits first), while an ATTEMPTED post fails against the dead
/// URL and increments the Teams failure counter — the observable proxy for "did it try to post".
/// </summary>
public class WebhookCooldownSeedTests
{
private static WebhookAlertService MakeService(IAlertHistoryStore? history, FakeWebhookSettings settings)
=> new(settings, EmailAlertService.Branding, new AppLoggerAdapter<WebhookAlertService>(), history);

private static FakeWebhookSettings EnabledTeamsSettings() => new()
{
TeamsWebhookEnabled = true,
TeamsWebhookUrl = "http://localhost:1/never", // closed port -> connection refused, fast deterministic failure
EmailCooldownMinutes = 15
};

[Fact]
public async Task SeedsCooldownFromHistory_WithinWindow_SuppressesRepostAfterRestart()
{
// A webhook delivered "just now", then a restart (fresh service = empty in-memory cooldown).
var history = new FakeHistoryStore { LastWebhookSent = DateTime.UtcNow };
var svc = MakeService(history, EnabledTeamsSettings());

var sent = await svc.TrySendWebhookAlertsAsync("Deadlocks Detected", "Srv", "4", "1", "1");

Assert.False(sent); // suppressed
Assert.Equal(1, history.GetLastWebhookSentCallCount); // the seed was consulted
Assert.Equal(0, svc.GetTeamsHealth().ConsecutiveFailures); // and NO post was attempted
}

[Fact]
public async Task SeedFromHistory_OlderThanCooldown_DoesNotSuppress()
{
// gotqn's repro: the restart is 17 min after the send, beyond the 15-min cooldown. The
// cooldown seed must NOT suppress here — that's exactly why the Lite watermark persistence
// is also needed. The post is attempted (and fails against the dead URL).
var history = new FakeHistoryStore { LastWebhookSent = DateTime.UtcNow.AddMinutes(-17) };
var svc = MakeService(history, EnabledTeamsSettings());

var sent = await svc.TrySendWebhookAlertsAsync("Deadlocks Detected", "Srv", "4", "1", "1");

Assert.False(sent); // dead URL -> post failed
Assert.Equal(1, history.GetLastWebhookSentCallCount); // seed consulted
Assert.Equal(1, svc.GetTeamsHealth().ConsecutiveFailures); // but it WAS attempted (not suppressed)
}

[Fact]
public async Task NullHistoryStore_NoSeed_AttemptsPost()
{
// The legacy/test path passes no history store: pre-#1145 in-memory-only cooldown, so a
// fresh service attempts the post.
var svc = MakeService(history: null, EnabledTeamsSettings());

var sent = await svc.TrySendWebhookAlertsAsync("Deadlocks Detected", "Srv", "4", "1", "1");

Assert.False(sent);
Assert.Equal(1, svc.GetTeamsHealth().ConsecutiveFailures);
}

private sealed class FakeWebhookSettings : IAlertSettings
{
public bool SmtpEnabled => false;
public string SmtpServer => "";
public int SmtpPort => 25;
public bool SmtpUseSsl => false;
public string SmtpUsername => "";
public string SmtpFromAddress => "";
public string SmtpRecipients => "";
public string? GetSmtpPassword() => null;
public int EmailCooldownMinutes { get; set; } = 15;
public bool TeamsWebhookEnabled { get; set; }
public string TeamsWebhookUrl { get; set; } = "";
public string TeamsProxyAddress => "";
public bool SlackWebhookEnabled { get; set; }
public string SlackWebhookUrl { get; set; } = "";
public string SlackProxyAddress => "";
public double AnalysisNotifySeverity => 1.5;
public int AnalysisNotifyCooldownMinutes => 360;
}

private sealed class FakeHistoryStore : IAlertHistoryStore
{
public DateTime? LastWebhookSent { get; set; }
public int GetLastWebhookSentCallCount { get; private set; }

public Task RecordAlertAsync(AlertHistoryRecord record) => Task.CompletedTask;
public Task<DateTime?> GetLastEmailSentUtcAsync(string serverId, string metricName) => Task.FromResult<DateTime?>(null);
public Task<DateTime?> GetLastWebhookSentUtcAsync(string serverId, string metricName)
{
GetLastWebhookSentCallCount++;
return Task.FromResult(LastWebhookSent);
}
public Task<DateTime?> GetLastAlertTimeAsync(string serverId, string metricName) => Task.FromResult<DateTime?>(null);
}
}
15 changes: 15 additions & 0 deletions Lite/Database/Schema.cs
Original file line number Diff line number Diff line change
Expand Up @@ -768,6 +768,20 @@ CREATE TABLE IF NOT EXISTS config_alert_log (
context_json VARCHAR
)";

/* Edge-trigger watermarks for the rolling-count blocking/deadlock alert gate (#1091).
Persisted so the watermark survives an app restart (#1145): without it the in-memory
watermark resets to 0 and the first post-restart sweep re-fires the same alert (and
re-posts the same webhook) for events still lingering in the 1-hour lookback window.
Keyed (server_id, metric_name); one short row per server/metric, upserted on change. */
public const string CreateEdgeTriggerWatermarksTable = @"
CREATE TABLE IF NOT EXISTS config_edge_trigger_watermarks (
server_id INTEGER NOT NULL,
metric_name VARCHAR NOT NULL,
watermark INTEGER NOT NULL,
updated_at TIMESTAMP NOT NULL,
PRIMARY KEY (server_id, metric_name)
)";

public const string CreateMuteRulesTable = @"
CREATE TABLE IF NOT EXISTS config_mute_rules (
id VARCHAR NOT NULL PRIMARY KEY,
Expand Down Expand Up @@ -829,6 +843,7 @@ public static IEnumerable<string> GetAllTableStatements()
yield return CreateServerPropertiesTable;
yield return CreateSessionStatsTable;
yield return CreateAlertLogTable;
yield return CreateEdgeTriggerWatermarksTable;
yield return CreateMuteRulesTable;
yield return CreateDismissedArchiveAlertsTable;
}
Expand Down
Loading
Loading