diff --git a/apps/api/src/anomaly/anomaly.types.ts b/apps/api/src/anomaly/anomaly.types.ts new file mode 100644 index 00000000..aeb926be --- /dev/null +++ b/apps/api/src/anomaly/anomaly.types.ts @@ -0,0 +1,120 @@ +import type { SpikeDetectorConfig } from '@proprietary/anomaly-detection/types'; + +export enum MetricType { + connections = 'connections', + ops_per_sec = 'ops_per_sec', + memory_used = 'memory_used', + input_kbps = 'input_kbps', + output_kbps = 'output_kbps', + slowlog_last_id = 'slowlog_last_id', + acl_denied = 'acl_denied', + evicted_keys = 'evicted_keys', + blocked_clients = 'blocked_clients', + keyspace_misses = 'keyspace_misses', + fragmentation_ratio = 'fragmentation_ratio', +} + +export interface DetectorConfig { + warningZScore?: number; + criticalZScore?: number; + warningAbsolute?: number; + criticalAbsolute?: number; + consecutiveRequired?: number; + cooldownMs?: number; +} + +export type DetectorConfigMap = Partial>; + +const NO_ABSOLUTE = Number.POSITIVE_INFINITY; + +const zScoreDefaults = { + warningZScore: 2.0, + criticalZScore: 3.0, + warningAbsolute: NO_ABSOLUTE, + criticalAbsolute: NO_ABSOLUTE, + consecutiveRequired: 3, + cooldownMs: 60_000, +} satisfies Required; + +/** Defaults mirror hardcoded thresholds in proprietary/anomaly-detection/anomaly.service.ts */ +export const DETECTOR_DEFAULTS: Record> = { + [MetricType.connections]: { ...zScoreDefaults }, + [MetricType.ops_per_sec]: { ...zScoreDefaults }, + [MetricType.memory_used]: { + ...zScoreDefaults, + warningZScore: 2.5, + criticalZScore: 3.5, + }, + [MetricType.input_kbps]: { ...zScoreDefaults }, + [MetricType.output_kbps]: { ...zScoreDefaults }, + [MetricType.slowlog_last_id]: { + ...zScoreDefaults, + warningZScore: 1.5, + criticalZScore: 2.5, + consecutiveRequired: 1, + cooldownMs: 30_000, + }, + [MetricType.acl_denied]: { + ...zScoreDefaults, + warningZScore: 1.5, + criticalZScore: 2.5, + warningAbsolute: 10, + criticalAbsolute: 50, + consecutiveRequired: 2, + cooldownMs: 30_000, + }, + [MetricType.evicted_keys]: { + ...zScoreDefaults, + consecutiveRequired: 2, + cooldownMs: 30_000, + }, + [MetricType.blocked_clients]: { ...zScoreDefaults }, + [MetricType.keyspace_misses]: { ...zScoreDefaults }, + [MetricType.fragmentation_ratio]: { + ...zScoreDefaults, + warningAbsolute: 1.5, + criticalAbsolute: 2.0, + consecutiveRequired: 5, + cooldownMs: 120_000, + }, +}; + +export function resolveDetectorConfig( + metric: MetricType, + overrides: DetectorConfigMap, +): Required { + return { + ...DETECTOR_DEFAULTS[metric], + ...overrides[metric], + }; +} + +/** Strips Infinity sentinels before JSON serialization. Omits absolute fields + * when they represent "no threshold configured" (i.e., Infinity). */ +export function toApiDetectorConfig(config: Required): DetectorConfig { + const out: DetectorConfig = { + warningZScore: config.warningZScore, + criticalZScore: config.criticalZScore, + consecutiveRequired: config.consecutiveRequired, + cooldownMs: config.cooldownMs, + }; + if (isFinite(config.warningAbsolute)) out.warningAbsolute = config.warningAbsolute; + if (isFinite(config.criticalAbsolute)) out.criticalAbsolute = config.criticalAbsolute; + return out; +} + +export function toSpikeDetectorConfig(config: Required): SpikeDetectorConfig { + return { + warningZScore: config.warningZScore, + criticalZScore: config.criticalZScore, + warningThreshold: config.warningAbsolute, + criticalThreshold: config.criticalAbsolute, + consecutiveRequired: config.consecutiveRequired, + cooldownMs: config.cooldownMs, + }; +} + +/** Generic z-score defaults for metrics without API config (e.g. cpu_utilization). */ +export const DEFAULT_SPIKE_CONFIG: SpikeDetectorConfig = toSpikeDetectorConfig(zScoreDefaults); + +export const API_METRIC_TYPES = Object.values(MetricType); diff --git a/apps/api/src/metric-forecasting/__tests__/metric-forecasting.service.spec.ts b/apps/api/src/metric-forecasting/__tests__/metric-forecasting.service.spec.ts index 33cfff52..58c64a37 100644 --- a/apps/api/src/metric-forecasting/__tests__/metric-forecasting.service.spec.ts +++ b/apps/api/src/metric-forecasting/__tests__/metric-forecasting.service.spec.ts @@ -20,6 +20,7 @@ function mockGlobalSettings(overrides?: Partial): AppSettings { metricForecastingDefaultRollingWindowMs: 21600000, metricForecastingDefaultAlertThresholdMs: 7200000, inferenceSlaConfig: {}, + anomalyDetectorConfig: {}, createdAt: Date.now(), updatedAt: Date.now(), ...overrides, diff --git a/apps/api/src/prometheus/prometheus.service.ts b/apps/api/src/prometheus/prometheus.service.ts index bb4295b3..8f0d5121 100644 --- a/apps/api/src/prometheus/prometheus.service.ts +++ b/apps/api/src/prometheus/prometheus.service.ts @@ -175,6 +175,7 @@ export class PrometheusService extends MultiConnectionPoller implements OnModule private anomalyBySeverity: Gauge; private anomalyByMetric: Gauge; private correlatedGroupsTotal: Counter; + private detectorConfigUpdatesTotal: Counter; private correlatedGroupsBySeverity: Gauge; private correlatedGroupsByPattern: Gauge; private anomalyDetectionBufferReady: Gauge; @@ -599,6 +600,11 @@ export class PrometheusService extends MultiConnectionPoller implements OnModule labelNames: ['connection', 'pattern', 'severity'], registers: [this.registry], }); + this.detectorConfigUpdatesTotal = new Counter({ + name: 'betterdb_detector_config_updates_total', + help: 'Total anomaly detector threshold config PATCH requests', + registers: [this.registry], + }); this.anomalyEventsCurrent = this.createGauge('anomaly_events_current', 'Unresolved anomalies', [ 'severity', ]); @@ -1358,6 +1364,10 @@ export class PrometheusService extends MultiConnectionPoller implements OnModule this.pollsTotal.labels(connLabel).inc(); } + incrementDetectorConfigUpdates(): void { + this.detectorConfigUpdatesTotal.inc(); + } + updateVectorIndexMetrics( connectionId: string, indexes: ReadonlyArray<{ diff --git a/apps/api/src/settings/dto/update-anomaly-detectors.dto.ts b/apps/api/src/settings/dto/update-anomaly-detectors.dto.ts new file mode 100644 index 00000000..abb2de51 --- /dev/null +++ b/apps/api/src/settings/dto/update-anomaly-detectors.dto.ts @@ -0,0 +1,157 @@ +import { ApiPropertyOptional } from '@nestjs/swagger'; +import { Type } from 'class-transformer'; +import { + IsInt, + IsNumber, + IsOptional, + Max, + Min, + Validate, + ValidateNested, + ValidationArguments, + ValidatorConstraint, + ValidatorConstraintInterface, +} from 'class-validator'; + +@ValidatorConstraint({ name: 'criticalGreaterThanWarning', async: false }) +class CriticalGreaterThanWarningValidator implements ValidatorConstraintInterface { + validate(criticalZScore: number, args: ValidationArguments): boolean { + const obj = args.object as DetectorConfigDto; + if (obj.warningZScore === undefined || criticalZScore === undefined) { + return true; + } + return obj.warningZScore < criticalZScore; + } + + defaultMessage(): string { + return 'warningZScore must be less than criticalZScore when both are provided'; + } +} + +@ValidatorConstraint({ name: 'criticalAbsoluteGreaterThanWarning', async: false }) +class CriticalAbsoluteGreaterThanWarningValidator + implements ValidatorConstraintInterface { + validate(criticalAbsolute: number, args: ValidationArguments): boolean { + const obj = args.object as DetectorConfigDto; + if (obj.warningAbsolute === undefined || criticalAbsolute === undefined) { + return true; + } + return obj.warningAbsolute < criticalAbsolute; + } + + defaultMessage(): string { + return 'warningAbsolute must be less than criticalAbsolute when both are provided'; + } +} + +export class DetectorConfigDto { + @ApiPropertyOptional({ minimum: 0.5, maximum: 10 }) + @IsOptional() + @IsNumber() + @Min(0.5) + @Max(10) + warningZScore?: number; + + @ApiPropertyOptional({ minimum: 1, maximum: 15 }) + @IsOptional() + @IsNumber() + @Min(1) + @Max(15) + @Validate(CriticalGreaterThanWarningValidator) + criticalZScore?: number; + + @ApiPropertyOptional({ minimum: 0 }) + @IsOptional() + @IsNumber() + @Min(0) + warningAbsolute?: number; + + @ApiPropertyOptional({ minimum: 0 }) + @IsOptional() + @IsNumber() + @Min(0) + @Validate(CriticalAbsoluteGreaterThanWarningValidator) + criticalAbsolute?: number; + + @ApiPropertyOptional({ minimum: 1, maximum: 20 }) + @IsOptional() + @IsInt() + @Min(1) + @Max(20) + consecutiveRequired?: number; + + @ApiPropertyOptional({ minimum: 1000, maximum: 3600000 }) + @IsOptional() + @IsInt() + @Min(1000) + @Max(3600000) + cooldownMs?: number; +} + +export class UpdateAnomalyDetectorsDto { + @ApiPropertyOptional({ type: DetectorConfigDto }) + @IsOptional() + @ValidateNested() + @Type(() => DetectorConfigDto) + connections?: DetectorConfigDto; + + @ApiPropertyOptional({ type: DetectorConfigDto }) + @IsOptional() + @ValidateNested() + @Type(() => DetectorConfigDto) + ops_per_sec?: DetectorConfigDto; + + @ApiPropertyOptional({ type: DetectorConfigDto }) + @IsOptional() + @ValidateNested() + @Type(() => DetectorConfigDto) + memory_used?: DetectorConfigDto; + + @ApiPropertyOptional({ type: DetectorConfigDto }) + @IsOptional() + @ValidateNested() + @Type(() => DetectorConfigDto) + input_kbps?: DetectorConfigDto; + + @ApiPropertyOptional({ type: DetectorConfigDto }) + @IsOptional() + @ValidateNested() + @Type(() => DetectorConfigDto) + output_kbps?: DetectorConfigDto; + + @ApiPropertyOptional({ type: DetectorConfigDto }) + @IsOptional() + @ValidateNested() + @Type(() => DetectorConfigDto) + slowlog_last_id?: DetectorConfigDto; + + @ApiPropertyOptional({ type: DetectorConfigDto }) + @IsOptional() + @ValidateNested() + @Type(() => DetectorConfigDto) + acl_denied?: DetectorConfigDto; + + @ApiPropertyOptional({ type: DetectorConfigDto }) + @IsOptional() + @ValidateNested() + @Type(() => DetectorConfigDto) + evicted_keys?: DetectorConfigDto; + + @ApiPropertyOptional({ type: DetectorConfigDto }) + @IsOptional() + @ValidateNested() + @Type(() => DetectorConfigDto) + blocked_clients?: DetectorConfigDto; + + @ApiPropertyOptional({ type: DetectorConfigDto }) + @IsOptional() + @ValidateNested() + @Type(() => DetectorConfigDto) + keyspace_misses?: DetectorConfigDto; + + @ApiPropertyOptional({ type: DetectorConfigDto }) + @IsOptional() + @ValidateNested() + @Type(() => DetectorConfigDto) + fragmentation_ratio?: DetectorConfigDto; +} diff --git a/apps/api/src/settings/settings-anomaly-detectors.spec.ts b/apps/api/src/settings/settings-anomaly-detectors.spec.ts new file mode 100644 index 00000000..fa541d5f --- /dev/null +++ b/apps/api/src/settings/settings-anomaly-detectors.spec.ts @@ -0,0 +1,219 @@ +/* eslint-disable @typescript-eslint/no-explicit-any */ +import { BadRequestException, ValidationPipe } from '@nestjs/common'; +import { Test, TestingModule } from '@nestjs/testing'; +import { ANOMALY_SERVICE } from '@betterdb/shared'; +import { + API_METRIC_TYPES, + DETECTOR_DEFAULTS, + MetricType, + resolveDetectorConfig, + toApiDetectorConfig, +} from '../anomaly/anomaly.types'; +import { UpdateAnomalyDetectorsDto } from './dto/update-anomaly-detectors.dto'; +import { PrometheusService } from '../prometheus/prometheus.service'; +import { SettingsController } from './settings.controller'; +import { SettingsService } from './settings.service'; + +describe('Settings anomaly detector thresholds', () => { + let controller: SettingsController; + let settingsService: { + getDetectorConfig: jest.Mock; + updateDetectorConfig: jest.Mock; + updateSettings: jest.Mock; + }; + let anomalyService: { reloadDetectorConfig: jest.Mock }; + let prometheusService: { incrementDetectorConfigUpdates: jest.Mock }; + + beforeEach(async () => { + settingsService = { + getDetectorConfig: jest.fn().mockResolvedValue({}), + updateDetectorConfig: jest.fn().mockImplementation(async (map) => map), + updateSettings: jest.fn().mockResolvedValue({ + settings: { anomalyDetectorConfig: {} }, + source: 'database', + requiresRestart: false, + }), + }; + anomalyService = { reloadDetectorConfig: jest.fn() }; + prometheusService = { incrementDetectorConfigUpdates: jest.fn() }; + + const module: TestingModule = await Test.createTestingModule({ + controllers: [SettingsController], + providers: [ + { provide: SettingsService, useValue: settingsService }, + { provide: PrometheusService, useValue: prometheusService }, + { provide: ANOMALY_SERVICE, useValue: anomalyService }, + ], + }).compile(); + + controller = module.get(SettingsController); + }); + + describe('GET /settings/anomaly/detectors', () => { + it('returns defaults when no overrides are stored', async () => { + const result = await controller.getAnomalyDetectors(); + expect(result.overrides).toEqual({}); + for (const metric of API_METRIC_TYPES) { + expect(result.resolved[metric]).toEqual(toApiDetectorConfig(DETECTOR_DEFAULTS[metric])); + } + }); + + it('GET response omits Infinity absolute fields, includes real ones', async () => { + const result = await controller.getAnomalyDetectors(); + // connections has no absolute threshold — fields should be absent + expect('warningAbsolute' in result.defaults.connections).toBe(false); + expect('criticalAbsolute' in result.defaults.connections).toBe(false); + // acl_denied has real absolute thresholds — fields should be present + expect(result.defaults.acl_denied.warningAbsolute).toBe(10); + expect(result.defaults.acl_denied.criticalAbsolute).toBe(50); + }); + + it('merges stored overrides with defaults per field', async () => { + settingsService.getDetectorConfig.mockResolvedValue({ + [MetricType.connections]: { warningZScore: 2.5 }, + }); + const result = await controller.getAnomalyDetectors(); + expect(result.resolved.connections.warningZScore).toBe(2.5); + expect(result.resolved.connections.criticalZScore).toBe( + DETECTOR_DEFAULTS[MetricType.connections].criticalZScore, + ); + expect(result.resolved.memory_used).toEqual( + toApiDetectorConfig(DETECTOR_DEFAULTS[MetricType.memory_used]), + ); + }); + }); + + describe('PATCH /settings/anomaly/detectors', () => { + const validationPipe = new ValidationPipe({ whitelist: true, forbidNonWhitelisted: true }); + + it('persists partial updates and hot-reloads', async () => { + const merged = { [MetricType.connections]: { warningZScore: 2.5, consecutiveRequired: 5 } }; + settingsService.updateDetectorConfig.mockResolvedValue(merged); + + const result = await controller.patchAnomalyDetectors({ + connections: { warningZScore: 2.5, consecutiveRequired: 5 }, + }); + + expect(result).toEqual({ message: 'Detector config updated', config: merged }); + expect(settingsService.updateDetectorConfig).toHaveBeenCalledWith({ + connections: { warningZScore: 2.5, consecutiveRequired: 5 }, + }); + expect(anomalyService.reloadDetectorConfig).toHaveBeenCalledWith(merged); + expect(prometheusService.incrementDetectorConfigUpdates).toHaveBeenCalled(); + }); + + it('rejects invalid warningZScore', async () => { + await expect( + validationPipe.transform( + { connections: { warningZScore: -1 } }, + { type: 'body', metatype: UpdateAnomalyDetectorsDto }, + ), + ).rejects.toBeInstanceOf(BadRequestException); + }); + + it('rejects unknown metric keys', async () => { + await expect( + validationPipe.transform( + { fake_metric: { warningZScore: 2.5 } }, + { type: 'body', metatype: UpdateAnomalyDetectorsDto }, + ), + ).rejects.toBeInstanceOf(BadRequestException); + }); + + it('rejects warningZScore greater than or equal to criticalZScore', async () => { + await expect( + validationPipe.transform( + { connections: { warningZScore: 5, criticalZScore: 2 } }, + { type: 'body', metatype: UpdateAnomalyDetectorsDto }, + ), + ).rejects.toBeInstanceOf(BadRequestException); + }); + + it('rejects partial PATCH that inverts Z-scores against stored config', async () => { + settingsService.updateDetectorConfig.mockRejectedValue( + new BadRequestException( + 'connections: warningZScore (9.5) must be less than criticalZScore (3.0)', + ), + ); + await expect( + controller.patchAnomalyDetectors({ connections: { warningZScore: 9.5 } }), + ).rejects.toBeInstanceOf(BadRequestException); + }); + + it('rejects warningAbsolute >= criticalAbsolute in same payload', async () => { + await expect( + validationPipe.transform( + { acl_denied: { warningAbsolute: 100, criticalAbsolute: 10 } }, + { type: 'body', metatype: UpdateAnomalyDetectorsDto }, + ), + ).rejects.toBeInstanceOf(BadRequestException); + }); + }); + + describe('POST /settings/anomaly/detectors/reset', () => { + it('clears stored overrides and hot-reloads empty config', async () => { + const result = await controller.resetAnomalyDetectors(); + + expect(result).toEqual({ message: 'Detector config reset to defaults' }); + expect(settingsService.updateSettings).toHaveBeenCalledWith({ anomalyDetectorConfig: {} }); + expect(anomalyService.reloadDetectorConfig).toHaveBeenCalledWith({}); + }); + }); + + describe('SettingsService.updateDetectorConfig field merge', () => { + it('merges at field level within a metric', async () => { + const storageClient = { + getSettings: jest.fn().mockResolvedValue({ + id: 1, + auditPollIntervalMs: 60000, + clientAnalyticsPollIntervalMs: 60000, + anomalyPollIntervalMs: 1000, + anomalyCacheTtlMs: 3600000, + anomalyPrometheusIntervalMs: 30000, + metricForecastingEnabled: true, + metricForecastingDefaultRollingWindowMs: 21600000, + metricForecastingDefaultAlertThresholdMs: 7200000, + inferenceSlaConfig: {}, + anomalyDetectorConfig: { + connections: { warningZScore: 2.5, consecutiveRequired: 5 }, + }, + createdAt: 1, + updatedAt: 1, + }), + updateSettings: jest.fn().mockImplementation(async (updates) => ({ + ...(await storageClient.getSettings()), + ...updates, + anomalyDetectorConfig: updates.anomalyDetectorConfig, + })), + saveSettings: jest.fn(), + }; + + const configService = { + get: jest.fn((_key: string, defaultValue: string) => defaultValue), + }; + + const svc = new SettingsService(storageClient as any, configService as any); + svc['cachedSettings'] = await storageClient.getSettings(); + + const result = await svc.updateDetectorConfig({ + connections: { warningZScore: 2.8 }, + }); + + expect(result.connections).toEqual({ + warningZScore: 2.8, + consecutiveRequired: 5, + }); + }); + }); + + describe('resolveDetectorConfig', () => { + it('field-level merge matches service expectations', () => { + const existing = { connections: { warningZScore: 2.5, consecutiveRequired: 5 } }; + const merged = { + connections: { ...existing.connections, warningZScore: 2.8 }, + }; + expect(resolveDetectorConfig(MetricType.connections, merged).warningZScore).toBe(2.8); + expect(resolveDetectorConfig(MetricType.connections, merged).consecutiveRequired).toBe(5); + }); + }); +}); diff --git a/apps/api/src/settings/settings.controller.ts b/apps/api/src/settings/settings.controller.ts index 559650d8..96b80d6f 100644 --- a/apps/api/src/settings/settings.controller.ts +++ b/apps/api/src/settings/settings.controller.ts @@ -1,12 +1,54 @@ -import { Controller, Get, Put, Post, Body } from '@nestjs/common'; -import { ApiTags, ApiOperation, ApiResponse } from '@nestjs/swagger'; +import { + Controller, + Get, + Put, + Post, + Patch, + Body, + HttpCode, + UsePipes, + ValidationPipe, + Inject, + Optional, + OnModuleInit, + Logger, +} from '@nestjs/common'; +import { ApiTags, ApiOperation, ApiResponse, ApiOkResponse } from '@nestjs/swagger'; import { SettingsUpdateRequest, SettingsResponse } from '@betterdb/shared'; +import { ANOMALY_SERVICE } from '@betterdb/shared'; +import type { IAnomalyService } from '@betterdb/shared'; +import { + API_METRIC_TYPES, + DETECTOR_DEFAULTS, + DetectorConfig, + DetectorConfigMap, + MetricType, + resolveDetectorConfig, + toApiDetectorConfig, +} from '../anomaly/anomaly.types'; +import { UpdateAnomalyDetectorsDto, DetectorConfigDto } from './dto/update-anomaly-detectors.dto'; +import { PrometheusService } from '../prometheus/prometheus.service'; import { SettingsService } from './settings.service'; @ApiTags('settings') @Controller('settings') -export class SettingsController { - constructor(private readonly settingsService: SettingsService) {} +export class SettingsController implements OnModuleInit { + private readonly logger = new Logger(SettingsController.name); + + constructor( + private readonly settingsService: SettingsService, + private readonly prometheusService: PrometheusService, + @Optional() @Inject(ANOMALY_SERVICE) private readonly anomalyService?: IAnomalyService, + ) {} + + onModuleInit(): void { + if (!this.anomalyService) { + this.logger.warn( + 'AnomalyService not injected into SettingsController — ' + + 'detector config hot-reload will be skipped on PATCH', + ); + } + } @Get() @ApiOperation({ summary: 'Get current application settings' }) @@ -28,4 +70,72 @@ export class SettingsController { async resetSettings(): Promise { return this.settingsService.resetToDefaults(); } + + @Get('anomaly/detectors') + @ApiOperation({ summary: 'Get anomaly detector threshold configuration' }) + @ApiOkResponse() + async getAnomalyDetectors(): Promise<{ + defaults: Record; + overrides: DetectorConfigMap; + resolved: Record; + }> { + const overrides = await this.settingsService.getDetectorConfig(); + + const sanitize = (cfg: Required) => toApiDetectorConfig(cfg); + + const defaults = Object.fromEntries( + API_METRIC_TYPES.map((m) => [m, sanitize(DETECTOR_DEFAULTS[m])]), + ) as Record; + + const resolved = Object.fromEntries( + API_METRIC_TYPES.map((m) => [m, sanitize(resolveDetectorConfig(m, overrides))]), + ) as Record; + + return { defaults, overrides, resolved }; + } + + @Patch('anomaly/detectors') + @HttpCode(200) + @ApiOperation({ summary: 'Update per-metric anomaly detector thresholds' }) + @UsePipes(new ValidationPipe({ whitelist: true, forbidNonWhitelisted: true })) + async patchAnomalyDetectors( + @Body() dto: UpdateAnomalyDetectorsDto, + ): Promise<{ message: string; config: DetectorConfigMap }> { + const configMap = this.dtoToConfigMap(dto); + const result = await this.settingsService.updateDetectorConfig(configMap); + this.anomalyService?.reloadDetectorConfig(result); + this.prometheusService.incrementDetectorConfigUpdates(); + return { message: 'Detector config updated', config: result }; + } + + @Post('anomaly/detectors/reset') + @HttpCode(200) + @ApiOperation({ summary: 'Reset all anomaly detector thresholds to defaults' }) + async resetAnomalyDetectors(): Promise<{ message: string }> { + await this.settingsService.updateSettings({ anomalyDetectorConfig: {} }); + this.anomalyService?.reloadDetectorConfig({}); + return { message: 'Detector config reset to defaults' }; + } + + private dtoToConfigMap(dto: UpdateAnomalyDetectorsDto): DetectorConfigMap { + const map: DetectorConfigMap = {}; + for (const metric of API_METRIC_TYPES) { + const entry = dto[metric]; + if (entry !== undefined) { + map[metric] = this.stripUndefined(entry); + } + } + return map; + } + + private stripUndefined(dto: DetectorConfigDto): DetectorConfigMap[MetricType] { + const out: DetectorConfigMap[MetricType] = {}; + if (dto.warningZScore !== undefined) out.warningZScore = dto.warningZScore; + if (dto.criticalZScore !== undefined) out.criticalZScore = dto.criticalZScore; + if (dto.warningAbsolute !== undefined) out.warningAbsolute = dto.warningAbsolute; + if (dto.criticalAbsolute !== undefined) out.criticalAbsolute = dto.criticalAbsolute; + if (dto.consecutiveRequired !== undefined) out.consecutiveRequired = dto.consecutiveRequired; + if (dto.cooldownMs !== undefined) out.cooldownMs = dto.cooldownMs; + return out; + } } diff --git a/apps/api/src/settings/settings.module.ts b/apps/api/src/settings/settings.module.ts index e3eaf1e2..1c7a2793 100644 --- a/apps/api/src/settings/settings.module.ts +++ b/apps/api/src/settings/settings.module.ts @@ -2,10 +2,11 @@ import { Module, Global } from '@nestjs/common'; import { SettingsService } from './settings.service'; import { SettingsController } from './settings.controller'; import { StorageModule } from '../storage/storage.module'; +import { PrometheusModule } from '../prometheus/prometheus.module'; @Global() @Module({ - imports: [StorageModule], + imports: [StorageModule, PrometheusModule], providers: [SettingsService], controllers: [SettingsController], exports: [SettingsService], diff --git a/apps/api/src/settings/settings.service.ts b/apps/api/src/settings/settings.service.ts index 88ab3892..189e308a 100644 --- a/apps/api/src/settings/settings.service.ts +++ b/apps/api/src/settings/settings.service.ts @@ -1,6 +1,7 @@ -import { Injectable, Inject, OnModuleInit, OnModuleDestroy, Logger } from '@nestjs/common'; +import { BadRequestException, Injectable, Inject, OnModuleInit, OnModuleDestroy, Logger } from '@nestjs/common'; import { ConfigService } from '@nestjs/config'; import { AppSettings, SettingsUpdateRequest, SettingsResponse } from '@betterdb/shared'; +import { DetectorConfigMap, MetricType, resolveDetectorConfig } from '../anomaly/anomaly.types'; import { StoragePort } from '../common/interfaces/storage-port.interface'; @Injectable() @@ -74,6 +75,7 @@ export class SettingsService implements OnModuleInit, OnModuleDestroy { 10, ), inferenceSlaConfig: {}, + anomalyDetectorConfig: {}, createdAt: now, updatedAt: now, }; @@ -126,6 +128,46 @@ export class SettingsService implements OnModuleInit, OnModuleDestroy { }; } + async getDetectorConfig(): Promise { + const stored = this.getCachedSettings().anomalyDetectorConfig; + return stored ?? {}; + } + + async updateDetectorConfig(overrides: DetectorConfigMap): Promise { + const existing = await this.getDetectorConfig(); + const merged: DetectorConfigMap = { ...existing }; + + for (const key of Object.keys(overrides) as MetricType[]) { + merged[key] = { + ...existing[key], + ...overrides[key], + }; + } + + for (const key of Object.keys(merged) as MetricType[]) { + const resolved = resolveDetectorConfig(key as MetricType, merged); + + if (resolved.warningZScore >= resolved.criticalZScore) { + throw new BadRequestException( + `${key}: warningZScore (${resolved.warningZScore}) must be less than ` + + `criticalZScore (${resolved.criticalZScore}) after merging with stored config`, + ); + } + + const hasWarningAbs = resolved.warningAbsolute !== Number.POSITIVE_INFINITY; + const hasCriticalAbs = resolved.criticalAbsolute !== Number.POSITIVE_INFINITY; + if (hasWarningAbs && hasCriticalAbs && resolved.warningAbsolute >= resolved.criticalAbsolute) { + throw new BadRequestException( + `${key}: warningAbsolute (${resolved.warningAbsolute}) must be less than ` + + `criticalAbsolute (${resolved.criticalAbsolute}) after merging with stored config`, + ); + } + } + + const updated = await this.updateSettings({ anomalyDetectorConfig: merged }); + return updated.settings.anomalyDetectorConfig as DetectorConfigMap; + } + async resetToDefaults(): Promise { await this.initializeFromEnv(); const settings = await this.storageClient.getSettings(); diff --git a/apps/api/src/storage/adapters/__tests__/inference-sla-config.spec.ts b/apps/api/src/storage/adapters/__tests__/inference-sla-config.spec.ts index bb057996..03d7e2bf 100644 --- a/apps/api/src/storage/adapters/__tests__/inference-sla-config.spec.ts +++ b/apps/api/src/storage/adapters/__tests__/inference-sla-config.spec.ts @@ -15,6 +15,7 @@ function buildSettings(overrides: Partial = {}): AppSettings { metricForecastingDefaultRollingWindowMs: 21_600_000, metricForecastingDefaultAlertThresholdMs: 7_200_000, inferenceSlaConfig: {}, + anomalyDetectorConfig: {}, updatedAt: now, createdAt: now, ...overrides, diff --git a/apps/api/src/storage/adapters/base-sql.adapter.ts b/apps/api/src/storage/adapters/base-sql.adapter.ts index 8262e313..a58c9fe8 100644 --- a/apps/api/src/storage/adapters/base-sql.adapter.ts +++ b/apps/api/src/storage/adapters/base-sql.adapter.ts @@ -270,6 +270,7 @@ export class RowMappers { metricForecastingDefaultRollingWindowMs: row.throughput_forecasting_default_rolling_window_ms, metricForecastingDefaultAlertThresholdMs: row.throughput_forecasting_default_alert_threshold_ms, inferenceSlaConfig: this.dialect.fromJson(row.inference_sla_config) ?? {}, + anomalyDetectorConfig: this.dialect.fromJson(row.anomaly_detector_config) ?? {}, updatedAt: typeof row.updated_at === 'string' ? parseInt(row.updated_at, 10) : row.updated_at, createdAt: typeof row.created_at === 'string' ? parseInt(row.created_at, 10) : row.created_at, }; diff --git a/apps/api/src/storage/adapters/memory.adapter.ts b/apps/api/src/storage/adapters/memory.adapter.ts index d9392735..aa743d03 100644 --- a/apps/api/src/storage/adapters/memory.adapter.ts +++ b/apps/api/src/storage/adapters/memory.adapter.ts @@ -774,6 +774,9 @@ export class MemoryAdapter implements StoragePort { if (updates.inferenceSlaConfig !== undefined) { validUpdates.inferenceSlaConfig = updates.inferenceSlaConfig; } + if (updates.anomalyDetectorConfig !== undefined) { + validUpdates.anomalyDetectorConfig = updates.anomalyDetectorConfig; + } this.settings = { ...this.settings, diff --git a/apps/api/src/storage/adapters/postgres.adapter.ts b/apps/api/src/storage/adapters/postgres.adapter.ts index 065fc244..f34e5c5d 100644 --- a/apps/api/src/storage/adapters/postgres.adapter.ts +++ b/apps/api/src/storage/adapters/postgres.adapter.ts @@ -1239,6 +1239,7 @@ export class PostgresAdapter implements StoragePort { ALTER TABLE app_settings ADD COLUMN IF NOT EXISTS throughput_forecasting_default_rolling_window_ms INTEGER NOT NULL DEFAULT 21600000; ALTER TABLE app_settings ADD COLUMN IF NOT EXISTS throughput_forecasting_default_alert_threshold_ms INTEGER NOT NULL DEFAULT 7200000; ALTER TABLE app_settings ADD COLUMN IF NOT EXISTS inference_sla_config JSONB NOT NULL DEFAULT '{}'::JSONB; + ALTER TABLE app_settings ADD COLUMN IF NOT EXISTS anomaly_detector_config JSONB NOT NULL DEFAULT '{}'::JSONB; CREATE TABLE IF NOT EXISTS metric_forecast_settings ( connection_id TEXT NOT NULL, @@ -2502,9 +2503,9 @@ export class PostgresAdapter implements StoragePort { id, audit_poll_interval_ms, client_analytics_poll_interval_ms, anomaly_poll_interval_ms, anomaly_cache_ttl_ms, anomaly_prometheus_interval_ms, throughput_forecasting_enabled, throughput_forecasting_default_rolling_window_ms, throughput_forecasting_default_alert_threshold_ms, - inference_sla_config, + inference_sla_config, anomaly_detector_config, updated_at, created_at - ) VALUES (1, $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11) + ) VALUES (1, $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12) ON CONFLICT(id) DO UPDATE SET audit_poll_interval_ms = EXCLUDED.audit_poll_interval_ms, client_analytics_poll_interval_ms = EXCLUDED.client_analytics_poll_interval_ms, @@ -2515,6 +2516,7 @@ export class PostgresAdapter implements StoragePort { throughput_forecasting_default_rolling_window_ms = EXCLUDED.throughput_forecasting_default_rolling_window_ms, throughput_forecasting_default_alert_threshold_ms = EXCLUDED.throughput_forecasting_default_alert_threshold_ms, inference_sla_config = EXCLUDED.inference_sla_config, + anomaly_detector_config = EXCLUDED.anomaly_detector_config, updated_at = EXCLUDED.updated_at`, [ settings.auditPollIntervalMs, @@ -2526,6 +2528,7 @@ export class PostgresAdapter implements StoragePort { settings.metricForecastingDefaultRollingWindowMs, settings.metricForecastingDefaultAlertThresholdMs, JSON.stringify(settings.inferenceSlaConfig ?? {}), + JSON.stringify(settings.anomalyDetectorConfig ?? {}), now, settings.createdAt || now, ], diff --git a/apps/api/src/storage/adapters/sqlite.adapter.ts b/apps/api/src/storage/adapters/sqlite.adapter.ts index 2013d167..f5d12047 100644 --- a/apps/api/src/storage/adapters/sqlite.adapter.ts +++ b/apps/api/src/storage/adapters/sqlite.adapter.ts @@ -223,6 +223,7 @@ export class SqliteAdapter implements StoragePort { type: 'INTEGER NOT NULL DEFAULT 7200000', }, { name: 'inference_sla_config', type: "TEXT NOT NULL DEFAULT '{}'" }, + { name: 'anomaly_detector_config', type: "TEXT NOT NULL DEFAULT '{}'" }, ]; for (const col of appSettingsMigrations) { if (!settingsColumns.has(col.name)) { @@ -2253,9 +2254,9 @@ export class SqliteAdapter implements StoragePort { id, audit_poll_interval_ms, client_analytics_poll_interval_ms, anomaly_poll_interval_ms, anomaly_cache_ttl_ms, anomaly_prometheus_interval_ms, throughput_forecasting_enabled, throughput_forecasting_default_rolling_window_ms, throughput_forecasting_default_alert_threshold_ms, - inference_sla_config, + inference_sla_config, anomaly_detector_config, updated_at, created_at - ) VALUES (1, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + ) VALUES (1, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) ON CONFLICT(id) DO UPDATE SET audit_poll_interval_ms = excluded.audit_poll_interval_ms, client_analytics_poll_interval_ms = excluded.client_analytics_poll_interval_ms, @@ -2266,6 +2267,7 @@ export class SqliteAdapter implements StoragePort { throughput_forecasting_default_rolling_window_ms = excluded.throughput_forecasting_default_rolling_window_ms, throughput_forecasting_default_alert_threshold_ms = excluded.throughput_forecasting_default_alert_threshold_ms, inference_sla_config = excluded.inference_sla_config, + anomaly_detector_config = excluded.anomaly_detector_config, updated_at = excluded.updated_at `); @@ -2279,6 +2281,7 @@ export class SqliteAdapter implements StoragePort { settings.metricForecastingDefaultRollingWindowMs, settings.metricForecastingDefaultAlertThresholdMs, JSON.stringify(settings.inferenceSlaConfig ?? {}), + JSON.stringify(settings.anomalyDetectorConfig ?? {}), now, settings.createdAt || now, ); @@ -3494,6 +3497,57 @@ export class SqliteAdapter implements StoragePort { async createCacheProposal(input: CreateCacheProposalInput): Promise { if (!this.db) throw new Error('Database not initialized'); + + // Guard: enforce pending-uniqueness (partial indexes are unreliable on some SQLite versions) + if (input.proposal_type === 'tool_ttl_adjust') { + const toolName = + (input.proposal_payload as { tool_name?: string })?.tool_name ?? null; + const normalizedTool = toolName ?? '__betterdb_null__'; + const existing = this.db + .prepare( + `SELECT id FROM cache_proposals + WHERE connection_id = ? + AND cache_name = ? + AND proposal_type = ? + AND status = 'pending' + AND COALESCE(json_extract(proposal_payload, '$.tool_name'), '__betterdb_null__') = ? + LIMIT 1`, + ) + .get(input.connection_id, input.cache_name, input.proposal_type, normalizedTool); + if (existing) { + throw new Error( + `UNIQUE constraint failed: pending proposal already exists for ` + + `(${input.connection_id}, ${input.cache_name}, ${input.proposal_type})`, + ); + } + } else if (input.proposal_type === 'threshold_adjust') { + const category = + (input.proposal_payload as { category?: string | null })?.category ?? null; + const normalizedCategory = category ?? '__betterdb_null__'; + const existing = this.db + .prepare( + `SELECT id FROM cache_proposals + WHERE connection_id = ? + AND cache_name = ? + AND proposal_type = ? + AND status = 'pending' + AND COALESCE(json_extract(proposal_payload, '$.category'), '__betterdb_null__') = ? + LIMIT 1`, + ) + .get( + input.connection_id, + input.cache_name, + input.proposal_type, + normalizedCategory, + ); + if (existing) { + throw new Error( + `UNIQUE constraint failed: pending proposal already exists for ` + + `(${input.connection_id}, ${input.cache_name}, ${input.proposal_type})`, + ); + } + } + const proposedAt = input.proposed_at ?? Date.now(); const expiresAt = input.expires_at ?? proposedAt + PROPOSAL_DEFAULT_EXPIRY_MS; this.db diff --git a/docs/anomaly-detection.md b/docs/anomaly-detection.md index 1e7617a4..5c57e93e 100644 --- a/docs/anomaly-detection.md +++ b/docs/anomaly-detection.md @@ -494,6 +494,43 @@ curl -X PUT http://localhost:3001/settings \ **Note**: Changing `anomalyPollIntervalMs` affects detection sensitivity. Faster polling = quicker detection but higher overhead. +### Per-metric detector thresholds + +Tune Z-score, absolute, consecutive-sample, and cooldown thresholds per metric at runtime (no restart). Partial PATCH bodies only override the fields you send. + +```bash +# View defaults, stored overrides, and fully resolved config +curl http://localhost:3001/settings/anomaly/detectors + +# Partial update — only changes warningZScore and consecutiveRequired for connections +curl -X PATCH http://localhost:3001/settings/anomaly/detectors \ + -H "Content-Type: application/json" \ + -d '{"connections": {"warningZScore": 2.5, "consecutiveRequired": 5}}' +``` + +**Configurable fields** (all optional per metric): + +| Field | Description | +|-------|-------------| +| `warningZScore` | Z-score threshold for WARNING (0.5–10) | +| `criticalZScore` | Z-score threshold for CRITICAL (1–15, must be > warning when both set) | +| `warningAbsolute` | Absolute value threshold for WARNING | +| `criticalAbsolute` | Absolute value threshold for CRITICAL | +| `consecutiveRequired` | Consecutive samples above threshold before alerting (1–20) | +| `cooldownMs` | Minimum ms between alerts for the same metric (1000–3600000) | + +**Metrics**: `connections`, `ops_per_sec`, `memory_used`, `input_kbps`, `output_kbps`, `slowlog_last_id`, `acl_denied`, `evicted_keys`, `blocked_clients`, `keyspace_misses`, `fragmentation_ratio`. + +`replication_role` is detected via state-diff (not z-score) and is not tunable through this API. + +Reset all overrides: + +```bash +curl -X POST http://localhost:3001/settings/anomaly/detectors/reset +``` + +Overrides persist in postgres/sqlite settings storage and survive restarts. The memory storage backend resets overrides on restart (same as other settings). + ### Disabling Detection To completely disable anomaly detection: @@ -511,6 +548,16 @@ docker run -e ANOMALY_DETECTION_ENABLED=false betterdb/monitor ## API Endpoints +### Get / update detector thresholds + +```http +GET /settings/anomaly/detectors +PATCH /settings/anomaly/detectors +POST /settings/anomaly/detectors/reset +``` + +See [Per-metric detector thresholds](#per-metric-detector-thresholds) above for request examples and field reference. + ### Get Recent Anomaly Events ```http diff --git a/docs/prometheus-metrics.md b/docs/prometheus-metrics.md index b35ea882..b12b683f 100644 --- a/docs/prometheus-metrics.md +++ b/docs/prometheus-metrics.md @@ -263,6 +263,7 @@ Real-time anomaly detection system metrics. | `betterdb_anomaly_events_current` | gauge | `severity` | Unresolved anomalies by severity | `3` | | `betterdb_anomaly_by_severity` | gauge | `severity` | Anomalies in last hour by severity | `12` | | `betterdb_anomaly_by_metric` | gauge | `metric_type` | Anomalies in last hour by metric | `8` | +| `betterdb_detector_config_updates_total` | counter | - | PATCH requests to `/settings/anomaly/detectors` | `3` | **Label Values**: - `severity`: `info`, `warning`, `critical` diff --git a/packages/shared/src/types/anomaly-detector-settings.types.ts b/packages/shared/src/types/anomaly-detector-settings.types.ts new file mode 100644 index 00000000..c37235cb --- /dev/null +++ b/packages/shared/src/types/anomaly-detector-settings.types.ts @@ -0,0 +1,13 @@ +export interface AnomalyDetectorConfigEntry { + warningZScore?: number; + criticalZScore?: number; + warningAbsolute?: number; + criticalAbsolute?: number; + consecutiveRequired?: number; + cooldownMs?: number; +} + +/** Persisted per-metric detector threshold overrides (partial fields only). */ +export type AnomalyDetectorConfigMap = Partial< + Record +>; diff --git a/packages/shared/src/types/anomaly.ts b/packages/shared/src/types/anomaly.ts index 9e945b2e..7f1be590 100644 --- a/packages/shared/src/types/anomaly.ts +++ b/packages/shared/src/types/anomaly.ts @@ -1,3 +1,4 @@ +import type { AnomalyDetectorConfigMap } from './anomaly-detector-settings.types'; import { AnomalyWarmupStatus } from './health'; export const ANOMALY_SERVICE = 'ANOMALY_SERVICE'; @@ -8,4 +9,5 @@ export const ANOMALY_SERVICE = 'ANOMALY_SERVICE'; */ export interface IAnomalyService { getWarmupStatus(): AnomalyWarmupStatus; + reloadDetectorConfig(overrides: AnomalyDetectorConfigMap): void; } diff --git a/packages/shared/src/types/settings.types.ts b/packages/shared/src/types/settings.types.ts index eef7a701..78bd440a 100644 --- a/packages/shared/src/types/settings.types.ts +++ b/packages/shared/src/types/settings.types.ts @@ -1,3 +1,4 @@ +import type { AnomalyDetectorConfigMap } from './anomaly-detector-settings.types'; import type { InferenceSlaConfig } from './inference-latency'; export interface AppSettings { @@ -17,6 +18,8 @@ export interface AppSettings { inferenceSlaConfig: InferenceSlaConfig; + anomalyDetectorConfig: AnomalyDetectorConfigMap; + updatedAt: number; createdAt: number; } diff --git a/proprietary/anomaly-detection/__tests__/anomaly.service.spec.ts b/proprietary/anomaly-detection/__tests__/anomaly.service.spec.ts index 27f94791..8eac1e9f 100644 --- a/proprietary/anomaly-detection/__tests__/anomaly.service.spec.ts +++ b/proprietary/anomaly-detection/__tests__/anomaly.service.spec.ts @@ -639,6 +639,16 @@ describe('AnomalyService', () => { const config = detectors.get(MetricType.CPU_UTILIZATION).getConfig(); expect(config.detectDrops).toBe(true); }); + + it('CPU detector uses DEFAULT_SPIKE_CONFIG thresholds, not connections', async () => { + const { DEFAULT_SPIKE_CONFIG } = await import('@app/anomaly/anomaly.types'); + await poll(); + const detectors: Map = (service as any).detectors.get('conn-1'); + const config = detectors.get(MetricType.CPU_UTILIZATION).getConfig(); + expect(config.warningZScore).toBe(DEFAULT_SPIKE_CONFIG.warningZScore); + expect(config.criticalZScore).toBe(DEFAULT_SPIKE_CONFIG.criticalZScore); + expect(config.detectDrops).toBe(true); + }); }); // ─── Buffer Initialization ────────────────────────────────────────────── diff --git a/proprietary/anomaly-detection/anomaly.service.ts b/proprietary/anomaly-detection/anomaly.service.ts index 6e980168..30812625 100644 --- a/proprietary/anomaly-detection/anomaly.service.ts +++ b/proprietary/anomaly-detection/anomaly.service.ts @@ -10,6 +10,14 @@ import { ConnectionRegistry } from '@app/connections/connection-registry.service import { MetricBuffer } from './metric-buffer'; import { SpikeDetector } from './spike-detector'; import { Correlator } from './correlator'; +import { + DEFAULT_SPIKE_CONFIG, + DetectorConfigMap, + DETECTOR_DEFAULTS, + MetricType as ApiMetricType, + resolveDetectorConfig, + toSpikeDetectorConfig, +} from '@app/anomaly/anomaly.types'; import { MetricType, AnomalyEvent, @@ -45,6 +53,7 @@ export class AnomalyService extends MultiConnectionPoller implements OnModuleIni private readonly maxRecentGroups = 100; private readonly metricExtractors: Map; + private detectorOverrides: DetectorConfigMap = {}; private readonly correlationIntervalMs = 5000; private correlationInterval: NodeJS.Timeout | null = null; private prometheusSummaryInterval: NodeJS.Timeout | null = null; @@ -78,7 +87,8 @@ export class AnomalyService extends MultiConnectionPoller implements OnModuleIni return this.settingsService.getCachedSettings().anomalyPrometheusIntervalMs; } - onModuleInit() { + async onModuleInit() { + this.detectorOverrides = await this.settingsService.getDetectorConfig(); this.logger.log('Starting anomaly detection service...'); // Start multi-connection polling @@ -145,47 +155,39 @@ export class AnomalyService extends MultiConnectionPoller implements OnModuleIni ]); } - private initializeBuffersAndDetectorsForConnection(connectionId: string): void { - // Define custom configs for specific metrics - const configs: Partial> = { - [MetricType.ACL_DENIED]: { - warningZScore: 1.5, - criticalZScore: 2.5, - warningThreshold: 10, - criticalThreshold: 50, - consecutiveRequired: 2, - cooldownMs: 30000, - }, - [MetricType.MEMORY_USED]: { - warningZScore: 2.5, - criticalZScore: 3.5, - consecutiveRequired: 3, - cooldownMs: 60000, - }, - [MetricType.EVICTED_KEYS]: { - warningZScore: 2.0, - criticalZScore: 3.0, - consecutiveRequired: 2, - cooldownMs: 30000, - }, - [MetricType.FRAGMENTATION_RATIO]: { - warningZScore: 2.0, - criticalZScore: 3.0, - warningThreshold: 1.5, - criticalThreshold: 2.0, - consecutiveRequired: 5, - cooldownMs: 120000, - }, - [MetricType.CPU_UTILIZATION]: { - warningZScore: 2.0, - criticalZScore: 3.0, - consecutiveRequired: 3, - cooldownMs: 60000, - detectDrops: true, - }, - }; + reloadDetectorConfig(overrides: DetectorConfigMap): void { + this.detectorOverrides = overrides; + this.applyDetectorConfigToAllConnections(); + } + + private applyDetectorConfigToAllConnections(): void { + for (const detectors of this.detectors.values()) { + for (const [metricType, detector] of detectors.entries()) { + detector.updateConfig(this.resolveSpikeConfig(metricType)); + } + } + } - // Initialize buffers and detectors for all metrics + private isApiMetric(metric: MetricType): boolean { + return (metric as string) in DETECTOR_DEFAULTS; + } + + private resolveSpikeConfig(metric: MetricType): SpikeDetectorConfig { + if (this.isApiMetric(metric)) { + return toSpikeDetectorConfig( + resolveDetectorConfig( + metric as unknown as ApiMetricType, + this.detectorOverrides, + ), + ); + } + if (metric === MetricType.CPU_UTILIZATION) { + return { ...DEFAULT_SPIKE_CONFIG, detectDrops: true }; + } + return {}; + } + + private initializeBuffersAndDetectorsForConnection(connectionId: string): void { const connectionBuffers = new Map(); const connectionDetectors = new Map(); @@ -193,8 +195,10 @@ export class AnomalyService extends MultiConnectionPoller implements OnModuleIni // REPLICATION_ROLE, CLUSTER_STATE, SLOWLOG_LAST_ID, and deprecated SLOWLOG_COUNT are handled outside the normal extractor loop if (metricType === MetricType.REPLICATION_ROLE || metricType === MetricType.CLUSTER_STATE || metricType === MetricType.SLOWLOG_LAST_ID || metricType === MetricType.SLOWLOG_COUNT) continue; connectionBuffers.set(metricType, new MetricBuffer(metricType)); - const config = configs[metricType] || {}; - connectionDetectors.set(metricType, new SpikeDetector(metricType, config)); + connectionDetectors.set( + metricType, + new SpikeDetector(metricType, this.resolveSpikeConfig(metricType)), + ); } this.buffers.set(connectionId, connectionBuffers); @@ -286,12 +290,13 @@ export class AnomalyService extends MultiConnectionPoller implements OnModuleIni // Lazily create buffer/detector on first available data if (!buffers.has(MetricType.SLOWLOG_LAST_ID)) { buffers.set(MetricType.SLOWLOG_LAST_ID, new MetricBuffer(MetricType.SLOWLOG_LAST_ID)); - detectors.set(MetricType.SLOWLOG_LAST_ID, new SpikeDetector(MetricType.SLOWLOG_LAST_ID, { - warningZScore: 1.5, - criticalZScore: 2.5, - consecutiveRequired: 1, - cooldownMs: 30000, - })); + detectors.set( + MetricType.SLOWLOG_LAST_ID, + new SpikeDetector( + MetricType.SLOWLOG_LAST_ID, + this.resolveSpikeConfig(MetricType.SLOWLOG_LAST_ID), + ), + ); } const slowlogBuffer = buffers.get(MetricType.SLOWLOG_LAST_ID)!; @@ -305,7 +310,8 @@ export class AnomalyService extends MultiConnectionPoller implements OnModuleIni } } - // Replication role state-change detection (not z-score based) + // Replication role state-change detection (not z-score based). + // Not exposed via /settings/anomaly/detectors — no SpikeDetector exists for this metric. const roleStr = info['role']; if (roleStr) { const currentRole = roleStr === 'master' ? 1 : (roleStr === 'slave' || roleStr === 'replica') ? 0 : -1; diff --git a/proprietary/anomaly-detection/spike-detector.ts b/proprietary/anomaly-detection/spike-detector.ts index 8ff7506c..193f33ee 100644 --- a/proprietary/anomaly-detection/spike-detector.ts +++ b/proprietary/anomaly-detection/spike-detector.ts @@ -168,4 +168,16 @@ export class SpikeDetector { getConfig(): Required { return { ...this.config }; } + + updateConfig(config: SpikeDetectorConfig = {}): void { + this.config = { + warningZScore: config.warningZScore ?? 2.0, + criticalZScore: config.criticalZScore ?? 3.0, + warningThreshold: config.warningThreshold ?? Number.POSITIVE_INFINITY, + criticalThreshold: config.criticalThreshold ?? Number.POSITIVE_INFINITY, + consecutiveRequired: config.consecutiveRequired ?? 3, + cooldownMs: config.cooldownMs ?? 60000, + detectDrops: config.detectDrops !== undefined ? config.detectDrops : this.config.detectDrops, + }; + } }