Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions Dashboard.Tests/FactScorerTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -276,4 +276,52 @@ public void BuildNarrowMemoryFact_NullInputs_ReturnsNull()
Assert.Null(FactRemediation.BuildNarrowMemoryFact(1, null, 24000));
Assert.Null(FactRemediation.BuildNarrowMemoryFact(1, 28672, null));
}

/* ── WS5: server-health advisory facts (LPIM / IFI / memory dumps) ── */

// Each WS5 server-health fact scores its 0.4 advisory base ONLY when the value is bad, and 0
// otherwise — mirroring the WS3 server-config keys. The collectors gate emission (Express /
// small-RAM / dumps>0) so a fact that would score 0 is normally never emitted, but the scorer
// is still independently bad-only so it can be unit-tested in isolation.
[Theory]
// IFI: disabled (Value 0) is bad; enabled (Value 1) is fine.
[InlineData("CONFIG_IFI_DISABLED", 0, 0.4)]
[InlineData("CONFIG_IFI_DISABLED", 1, 0.0)]
// LPIM: disabled (Value 0) is bad; enabled (Value 1) is fine.
[InlineData("CONFIG_LPIM_DISABLED", 0, 0.4)]
[InlineData("CONFIG_LPIM_DISABLED", 1, 0.0)]
// Memory dumps: any count > 0 is bad; 0 is fine.
[InlineData("SERVER_MEMORY_DUMPS", 1, 0.4)]
[InlineData("SERVER_MEMORY_DUMPS", 7, 0.4)]
[InlineData("SERVER_MEMORY_DUMPS", 0, 0.0)]
public void ServerHealthFact_ScoresBadValueOnly(string key, long value, double expected)
{
var facts = new List<Fact>
{
new() { Source = "config", Key = key, Value = value }
};

var scorer = new FactScorer();
scorer.ScoreAll(facts);

Assert.Equal(expected, facts[0].BaseSeverity, precision: 4);
}

// Advice exists for each WS5 server-health root key (dead-fact guard — a fact that roots but
// renders no advice is the P1 dead-fact bug class).
[Theory]
[InlineData("CONFIG_IFI_DISABLED")]
[InlineData("CONFIG_LPIM_DISABLED")]
[InlineData("SERVER_MEMORY_DUMPS")]
public void ServerHealthKeys_HaveAdviceBlocks(string key)
{
var advice = FactAdvice.GetForFactKey(key);

Assert.NotNull(advice);
Assert.False(string.IsNullOrWhiteSpace(advice!.Headline));
Assert.False(string.IsNullOrWhiteSpace(advice.Investigation));
Assert.False(string.IsNullOrWhiteSpace(advice.Remediation));
// Advise-only: no generated Apply T-SQL is attached to the bare advice block.
Assert.Null(advice.RemediationTsql);
}
}
41 changes: 41 additions & 0 deletions Dashboard.Tests/InferenceEngineTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,47 @@ public void ServerConfigFacts_AllFour_RootFourDistinctFindings()
Assert.Contains(stories, s => s.RootFactKey == "CONFIG_MIN_MAX_MEMORY_NARROW");
}

// WS5: each server-health advisory fact at its 0.4 advisory base roots a standalone
// recommendation, below the 0.5 incident threshold — because each is a config-advisory root
// key. One finding per fact (they are leaves with no edges between them). Mirrors the WS3
// ServerConfigFact_RootsStandalone_BelowMinimumSeverity test.
[Theory]
[InlineData("CONFIG_IFI_DISABLED")]
[InlineData("CONFIG_LPIM_DISABLED")]
[InlineData("SERVER_MEMORY_DUMPS")]
public void ServerHealthFact_RootsStandalone_BelowMinimumSeverity(string key)
{
var engine = new InferenceEngine(new RelationshipGraph());
var facts = new List<Fact>
{
new() { Key = key, Source = "config", Value = 0, Severity = 0.4 }
};

var stories = engine.BuildStories(facts);

Assert.Contains(stories, s => s.RootFactKey == key);
}

// All three server-health facts together root three distinct standalone findings (no edges
// between them, so none consumes another).
[Fact]
public void ServerHealthFacts_AllThree_RootThreeDistinctFindings()
{
var engine = new InferenceEngine(new RelationshipGraph());
var facts = new List<Fact>
{
new() { Key = "CONFIG_IFI_DISABLED", Source = "config", Value = 0, Severity = 0.4 },
new() { Key = "CONFIG_LPIM_DISABLED", Source = "config", Value = 0, Severity = 0.4 },
new() { Key = "SERVER_MEMORY_DUMPS", Source = "config", Value = 3, Severity = 0.4 },
};

var stories = engine.BuildStories(facts);

Assert.Contains(stories, s => s.RootFactKey == "CONFIG_IFI_DISABLED");
Assert.Contains(stories, s => s.RootFactKey == "CONFIG_LPIM_DISABLED");
Assert.Contains(stories, s => s.RootFactKey == "SERVER_MEMORY_DUMPS");
}

// Regression: a duplicated fact key (e.g. a collector returning two rows for the same
// setting) once aborted the entire analysis — BuildStories built its lookup with a raw
// ToDictionary(f => f.Key), which throws on a duplicate. It must now dedupe and survive.
Expand Down
79 changes: 78 additions & 1 deletion Dashboard/Analysis/SqlServerFactCollector.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1985,7 +1985,10 @@ SELECT TOP 1
cores_per_socket,
is_hadr_enabled,
edition,
product_version
product_version,
lock_pages_in_memory,
instant_file_initialization_enabled,
memory_dump_count
FROM collect.server_properties
ORDER BY collection_time DESC";

Expand All @@ -1998,6 +2001,10 @@ FROM collect.server_properties
var socketCount = reader.IsDBNull(3) ? 0 : Convert.ToInt32(reader.GetValue(3));
var coresPerSocket = reader.IsDBNull(4) ? 0 : Convert.ToInt32(reader.GetValue(4));
var hadrEnabled = !reader.IsDBNull(5) && Convert.ToBoolean(reader.GetValue(5));
var edition = reader.IsDBNull(6) ? string.Empty : reader.GetString(6);
bool? lpim = reader.IsDBNull(8) ? (bool?)null : Convert.ToBoolean(reader.GetValue(8));
bool? ifi = reader.IsDBNull(9) ? (bool?)null : Convert.ToBoolean(reader.GetValue(9));
int? dumpCount = reader.IsDBNull(10) ? (int?)null : Convert.ToInt32(reader.GetValue(10));

if (cpuCount == 0) return;

Expand All @@ -2017,13 +2024,83 @@ FROM collect.server_properties
["hadr_enabled"] = hadrEnabled ? 1 : 0
}
});

// WS5 server-health advisories (advise-only). Gating lives here so a fact that would
// score 0 is simply never emitted (noise control); the scorer then scores the emitted
// fact's Value. Shared with Lite — keep the rules identical (see DuckDbFactCollector).
EmitServerHealthFacts(context, facts, edition, physicalMemMb, lpim, ifi, dumpCount);
}
catch (Exception ex)
{
Logger.Error("SqlServerFactCollector.CollectServerPropertiesFactsAsync failed", ex);
}
}

// RAM floor below which LPIM-off is not worth flagging — on a small buffer pool the OS paging
// SQL out is not the practical risk it is on a large dedicated host. Shared rule with Lite.
private const long LpimAdvisoryMinPhysicalMemoryMb = 32 * 1024;

/// <summary>
/// Emits the WS5 advise-only server-health facts (IFI off / LPIM off / memory dumps) from the
/// latest server_properties values, applying the noise-control gating both apps share:
/// • IFI: emit whenever the value is known (Value = enabled bit) — universally good advice.
/// • LPIM: emit only on non-Express editions with meaningful RAM (Value = enabled bit) — so a
/// tiny instance never flags. When LPIM is ON the emitted Value scores 0 (harmless).
/// • Dumps: emit whenever the count is known (Value = count) — the scorer flags count > 0.
/// </summary>
private static void EmitServerHealthFacts(
AnalysisContext context, List<Fact> facts, string edition, long physicalMemMb,
bool? lockPagesInMemory, bool? instantFileInit, int? memoryDumpCount)
{
var isExpress = edition.Contains("Express", StringComparison.OrdinalIgnoreCase);

if (instantFileInit.HasValue)
{
facts.Add(new Fact
{
Source = "config",
Key = "CONFIG_IFI_DISABLED",
Value = instantFileInit.Value ? 1 : 0,
ServerId = context.ServerId,
Metadata = new Dictionary<string, double>
{
["instant_file_initialization_enabled"] = instantFileInit.Value ? 1 : 0
}
});
}

if (lockPagesInMemory.HasValue && !isExpress && physicalMemMb >= LpimAdvisoryMinPhysicalMemoryMb)
{
facts.Add(new Fact
{
Source = "config",
Key = "CONFIG_LPIM_DISABLED",
Value = lockPagesInMemory.Value ? 1 : 0,
ServerId = context.ServerId,
Metadata = new Dictionary<string, double>
{
["lock_pages_in_memory"] = lockPagesInMemory.Value ? 1 : 0,
["physical_memory_mb"] = physicalMemMb
}
});
}

if (memoryDumpCount.HasValue)
{
facts.Add(new Fact
{
Source = "config",
Key = "SERVER_MEMORY_DUMPS",
Value = memoryDumpCount.Value,
ServerId = context.ServerId,
Metadata = new Dictionary<string, double>
{
["memory_dump_count"] = memoryDumpCount.Value
}
});
}
}

/// <summary>
/// Collects disk space facts from database_size_stats: volume free space, file sizes.
/// </summary>
Expand Down
76 changes: 75 additions & 1 deletion Lite/Analysis/DuckDbFactCollector.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1844,7 +1844,8 @@ private async Task CollectServerPropertiesFactsAsync(AnalysisContext context, Li
using var cmd = connection.CreateCommand();
cmd.CommandText = @"
SELECT COALESCE(vcore_count, cpu_count) AS cpu_count, hyperthread_ratio, physical_memory_mb,
socket_count, cores_per_socket, is_hadr_enabled, edition, product_version
socket_count, cores_per_socket, is_hadr_enabled, edition, product_version,
lock_pages_in_memory, instant_file_initialization_enabled, memory_dump_count
FROM server_properties
WHERE server_id = $1
ORDER BY collection_time DESC
Expand All @@ -1861,6 +1862,10 @@ ORDER BY collection_time DESC
var socketCount = reader.IsDBNull(3) ? 0 : Convert.ToInt32(reader.GetValue(3));
var coresPerSocket = reader.IsDBNull(4) ? 0 : Convert.ToInt32(reader.GetValue(4));
var hadrEnabled = !reader.IsDBNull(5) && Convert.ToBoolean(reader.GetValue(5));
var edition = reader.IsDBNull(6) ? string.Empty : reader.GetString(6);
bool? lpim = reader.IsDBNull(8) ? (bool?)null : Convert.ToBoolean(reader.GetValue(8));
bool? ifi = reader.IsDBNull(9) ? (bool?)null : Convert.ToBoolean(reader.GetValue(9));
int? dumpCount = reader.IsDBNull(10) ? (int?)null : Convert.ToInt32(reader.GetValue(10));

if (cpuCount == 0) return;

Expand All @@ -1880,10 +1885,79 @@ ORDER BY collection_time DESC
["hadr_enabled"] = hadrEnabled ? 1 : 0
}
});

// WS5 server-health advisories (advise-only). Gating mirrors the Dashboard collector so
// both apps agree on what is worth flagging; a fact that would score 0 is simply never
// emitted (noise control).
EmitServerHealthFacts(context, facts, edition, physicalMemMb, lpim, ifi, dumpCount);
}
catch { /* Table may not exist or have no data */ }
}

// RAM floor below which LPIM-off is not worth flagging — shared rule with the Dashboard
// SqlServerFactCollector (small buffer pools do not suffer from OS paging the way large ones do).
private const long LpimAdvisoryMinPhysicalMemoryMb = 32 * 1024;

/// <summary>
/// Emits the WS5 advise-only server-health facts (IFI off / LPIM off / memory dumps) from the
/// latest server_properties values, applying the same noise-control gating as the Dashboard:
/// • IFI: emit whenever the value is known (Value = enabled bit) — universally good advice.
/// • LPIM: emit only on non-Express editions with meaningful RAM (Value = enabled bit).
/// • Dumps: emit whenever the count is known (Value = count) — the scorer flags count > 0.
/// </summary>
private static void EmitServerHealthFacts(
AnalysisContext context, List<Fact> facts, string edition, long physicalMemMb,
bool? lockPagesInMemory, bool? instantFileInit, int? memoryDumpCount)
{
var isExpress = edition.Contains("Express", StringComparison.OrdinalIgnoreCase);

if (instantFileInit.HasValue)
{
facts.Add(new Fact
{
Source = "config",
Key = "CONFIG_IFI_DISABLED",
Value = instantFileInit.Value ? 1 : 0,
ServerId = context.ServerId,
Metadata = new Dictionary<string, double>
{
["instant_file_initialization_enabled"] = instantFileInit.Value ? 1 : 0
}
});
}

if (lockPagesInMemory.HasValue && !isExpress && physicalMemMb >= LpimAdvisoryMinPhysicalMemoryMb)
{
facts.Add(new Fact
{
Source = "config",
Key = "CONFIG_LPIM_DISABLED",
Value = lockPagesInMemory.Value ? 1 : 0,
ServerId = context.ServerId,
Metadata = new Dictionary<string, double>
{
["lock_pages_in_memory"] = lockPagesInMemory.Value ? 1 : 0,
["physical_memory_mb"] = physicalMemMb
}
});
}

if (memoryDumpCount.HasValue)
{
facts.Add(new Fact
{
Source = "config",
Key = "SERVER_MEMORY_DUMPS",
Value = memoryDumpCount.Value,
ServerId = context.ServerId,
Metadata = new Dictionary<string, double>
{
["memory_dump_count"] = memoryDumpCount.Value
}
});
}
}

/// <summary>
/// Collects disk space facts from database_size_stats: volume free space, file sizes.
/// </summary>
Expand Down
18 changes: 17 additions & 1 deletion Lite/Database/DuckDbInitializer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ public void Dispose()
/// <summary>
/// Current schema version. Increment this when schema changes require table rebuilds.
/// </summary>
internal const int CurrentSchemaVersion = 26;
internal const int CurrentSchemaVersion = 27;

private readonly string _archivePath;

Expand Down Expand Up @@ -716,6 +716,22 @@ New tables only — no existing table changes needed. Tables created by
_logger?.LogWarning("Migration to v26 encountered an error (non-fatal): {Error}", ex.Message);
}
}

if (fromVersion < 27)
{
_logger?.LogInformation("Running migration to v27: adding server-health columns (LPIM/IFI/memory dumps) to server_properties");
try
{
await ExecuteNonQueryAsync(connection, "ALTER TABLE server_properties ADD COLUMN IF NOT EXISTS lock_pages_in_memory BOOLEAN");
await ExecuteNonQueryAsync(connection, "ALTER TABLE server_properties ADD COLUMN IF NOT EXISTS instant_file_initialization_enabled BOOLEAN");
await ExecuteNonQueryAsync(connection, "ALTER TABLE server_properties ADD COLUMN IF NOT EXISTS memory_dump_count INTEGER");
}
catch (Exception ex)
{
_logger?.LogError(ex, "Migration to v27 failed");
throw;
}
}
}

/// <summary>
Expand Down
5 changes: 4 additions & 1 deletion Lite/Database/Schema.cs
Original file line number Diff line number Diff line change
Expand Up @@ -658,7 +658,10 @@ CREATE TABLE IF NOT EXISTS server_properties (
is_clustered BOOLEAN,
enterprise_features VARCHAR,
service_objective VARCHAR,
vcore_count INTEGER
vcore_count INTEGER,
lock_pages_in_memory BOOLEAN,
instant_file_initialization_enabled BOOLEAN,
memory_dump_count INTEGER
)";

public const string CreateServerPropertiesIndex = @"
Expand Down
Loading
Loading