Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 120 additions & 0 deletions apps/api/src/anomaly/anomaly.types.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
import type { SpikeDetectorConfig } from '@proprietary/anomaly-detection/types';

export enum MetricType {
connections = 'connections',
ops_per_sec = 'ops_per_sec',
memory_used = 'memory_used',
input_kbps = 'input_kbps',
output_kbps = 'output_kbps',
slowlog_last_id = 'slowlog_last_id',
acl_denied = 'acl_denied',
evicted_keys = 'evicted_keys',
blocked_clients = 'blocked_clients',
keyspace_misses = 'keyspace_misses',
fragmentation_ratio = 'fragmentation_ratio',
}

export interface DetectorConfig {
warningZScore?: number;
criticalZScore?: number;
warningAbsolute?: number;
criticalAbsolute?: number;
consecutiveRequired?: number;
cooldownMs?: number;
}

export type DetectorConfigMap = Partial<Record<MetricType, DetectorConfig>>;

const NO_ABSOLUTE = Number.POSITIVE_INFINITY;

const zScoreDefaults = {
warningZScore: 2.0,
criticalZScore: 3.0,
warningAbsolute: NO_ABSOLUTE,
criticalAbsolute: NO_ABSOLUTE,
consecutiveRequired: 3,
cooldownMs: 60_000,
} satisfies Required<DetectorConfig>;
Comment thread
cursor[bot] marked this conversation as resolved.

/** Defaults mirror hardcoded thresholds in proprietary/anomaly-detection/anomaly.service.ts */
export const DETECTOR_DEFAULTS: Record<MetricType, Required<DetectorConfig>> = {
[MetricType.connections]: { ...zScoreDefaults },
[MetricType.ops_per_sec]: { ...zScoreDefaults },
[MetricType.memory_used]: {
...zScoreDefaults,
warningZScore: 2.5,
criticalZScore: 3.5,
},
[MetricType.input_kbps]: { ...zScoreDefaults },
[MetricType.output_kbps]: { ...zScoreDefaults },
[MetricType.slowlog_last_id]: {
...zScoreDefaults,
warningZScore: 1.5,
criticalZScore: 2.5,
consecutiveRequired: 1,
cooldownMs: 30_000,
},
[MetricType.acl_denied]: {
...zScoreDefaults,
warningZScore: 1.5,
criticalZScore: 2.5,
warningAbsolute: 10,
criticalAbsolute: 50,
consecutiveRequired: 2,
cooldownMs: 30_000,
},
[MetricType.evicted_keys]: {
...zScoreDefaults,
consecutiveRequired: 2,
cooldownMs: 30_000,
},
[MetricType.blocked_clients]: { ...zScoreDefaults },
[MetricType.keyspace_misses]: { ...zScoreDefaults },
[MetricType.fragmentation_ratio]: {
...zScoreDefaults,
warningAbsolute: 1.5,
criticalAbsolute: 2.0,
consecutiveRequired: 5,
cooldownMs: 120_000,
},
};

export function resolveDetectorConfig(
metric: MetricType,
overrides: DetectorConfigMap,
): Required<DetectorConfig> {
return {
...DETECTOR_DEFAULTS[metric],
...overrides[metric],
};
}

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Partial overrides can invert warning/critical Z-score ordering

Medium Severity

resolveDetectorConfig naively merges overrides onto defaults without validating cross-field invariants. Because the DTO validator (CriticalGreaterThanWarningValidator) only checks warningZScore < criticalZScore when both fields are in the same request payload, a partial PATCH like { connections: { warningZScore: 9.5 } } passes validation but produces a resolved config where warningZScore (9.5) exceeds the default criticalZScore (3.0). The spike detector then operates with an inverted threshold, likely suppressing warnings entirely or producing nonsensical severity classifications.

Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit 93684a1. Configure here.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in settings.service.tsupdateDetectorConfig now validates the fully resolved state (defaults + stored + new delta) before persisting.


/** Strips Infinity sentinels before JSON serialization. Omits absolute fields
* when they represent "no threshold configured" (i.e., Infinity). */
export function toApiDetectorConfig(config: Required<DetectorConfig>): DetectorConfig {
const out: DetectorConfig = {
warningZScore: config.warningZScore,
criticalZScore: config.criticalZScore,
consecutiveRequired: config.consecutiveRequired,
cooldownMs: config.cooldownMs,
};
if (isFinite(config.warningAbsolute)) out.warningAbsolute = config.warningAbsolute;
if (isFinite(config.criticalAbsolute)) out.criticalAbsolute = config.criticalAbsolute;
return out;
}

export function toSpikeDetectorConfig(config: Required<DetectorConfig>): SpikeDetectorConfig {
return {
warningZScore: config.warningZScore,
criticalZScore: config.criticalZScore,
warningThreshold: config.warningAbsolute,
criticalThreshold: config.criticalAbsolute,
consecutiveRequired: config.consecutiveRequired,
cooldownMs: config.cooldownMs,
};
}

/** Generic z-score defaults for metrics without API config (e.g. cpu_utilization). */
export const DEFAULT_SPIKE_CONFIG: SpikeDetectorConfig = toSpikeDetectorConfig(zScoreDefaults);

export const API_METRIC_TYPES = Object.values(MetricType);
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ function mockGlobalSettings(overrides?: Partial<AppSettings>): AppSettings {
metricForecastingDefaultRollingWindowMs: 21600000,
metricForecastingDefaultAlertThresholdMs: 7200000,
inferenceSlaConfig: {},
anomalyDetectorConfig: {},
createdAt: Date.now(),
updatedAt: Date.now(),
...overrides,
Expand Down
10 changes: 10 additions & 0 deletions apps/api/src/prometheus/prometheus.service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ export class PrometheusService extends MultiConnectionPoller implements OnModule
private anomalyBySeverity: Gauge;
private anomalyByMetric: Gauge;
private correlatedGroupsTotal: Counter;
private detectorConfigUpdatesTotal: Counter;
private correlatedGroupsBySeverity: Gauge;
private correlatedGroupsByPattern: Gauge;
private anomalyDetectionBufferReady: Gauge;
Expand Down Expand Up @@ -599,6 +600,11 @@ export class PrometheusService extends MultiConnectionPoller implements OnModule
labelNames: ['connection', 'pattern', 'severity'],
registers: [this.registry],
});
this.detectorConfigUpdatesTotal = new Counter({
name: 'betterdb_detector_config_updates_total',
help: 'Total anomaly detector threshold config PATCH requests',
registers: [this.registry],
});
this.anomalyEventsCurrent = this.createGauge('anomaly_events_current', 'Unresolved anomalies', [
'severity',
]);
Expand Down Expand Up @@ -1358,6 +1364,10 @@ export class PrometheusService extends MultiConnectionPoller implements OnModule
this.pollsTotal.labels(connLabel).inc();
}

incrementDetectorConfigUpdates(): void {
this.detectorConfigUpdatesTotal.inc();
}

updateVectorIndexMetrics(
connectionId: string,
indexes: ReadonlyArray<{
Expand Down
157 changes: 157 additions & 0 deletions apps/api/src/settings/dto/update-anomaly-detectors.dto.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
import { ApiPropertyOptional } from '@nestjs/swagger';
import { Type } from 'class-transformer';
import {
IsInt,
IsNumber,
IsOptional,
Max,
Min,
Validate,
ValidateNested,
ValidationArguments,
ValidatorConstraint,
ValidatorConstraintInterface,
} from 'class-validator';

@ValidatorConstraint({ name: 'criticalGreaterThanWarning', async: false })
class CriticalGreaterThanWarningValidator implements ValidatorConstraintInterface {
validate(criticalZScore: number, args: ValidationArguments): boolean {
const obj = args.object as DetectorConfigDto;
if (obj.warningZScore === undefined || criticalZScore === undefined) {
return true;
}
return obj.warningZScore < criticalZScore;
}

defaultMessage(): string {
return 'warningZScore must be less than criticalZScore when both are provided';
}
}
Comment thread
cursor[bot] marked this conversation as resolved.

@ValidatorConstraint({ name: 'criticalAbsoluteGreaterThanWarning', async: false })
class CriticalAbsoluteGreaterThanWarningValidator
implements ValidatorConstraintInterface {
validate(criticalAbsolute: number, args: ValidationArguments): boolean {
const obj = args.object as DetectorConfigDto;
if (obj.warningAbsolute === undefined || criticalAbsolute === undefined) {
return true;
}
return obj.warningAbsolute < criticalAbsolute;
}

defaultMessage(): string {
return 'warningAbsolute must be less than criticalAbsolute when both are provided';
}
}

export class DetectorConfigDto {
@ApiPropertyOptional({ minimum: 0.5, maximum: 10 })
@IsOptional()
@IsNumber()
@Min(0.5)
@Max(10)
warningZScore?: number;

@ApiPropertyOptional({ minimum: 1, maximum: 15 })
@IsOptional()
@IsNumber()
@Min(1)
@Max(15)
@Validate(CriticalGreaterThanWarningValidator)
criticalZScore?: number;

@ApiPropertyOptional({ minimum: 0 })
@IsOptional()
@IsNumber()
@Min(0)
warningAbsolute?: number;

@ApiPropertyOptional({ minimum: 0 })
@IsOptional()
@IsNumber()
@Min(0)
@Validate(CriticalAbsoluteGreaterThanWarningValidator)
criticalAbsolute?: number;
Comment thread
cursor[bot] marked this conversation as resolved.

@ApiPropertyOptional({ minimum: 1, maximum: 20 })
@IsOptional()
@IsInt()
@Min(1)
@Max(20)
consecutiveRequired?: number;

@ApiPropertyOptional({ minimum: 1000, maximum: 3600000 })
@IsOptional()
@IsInt()
@Min(1000)
@Max(3600000)
cooldownMs?: number;
}

export class UpdateAnomalyDetectorsDto {
@ApiPropertyOptional({ type: DetectorConfigDto })
@IsOptional()
@ValidateNested()
@Type(() => DetectorConfigDto)
connections?: DetectorConfigDto;

@ApiPropertyOptional({ type: DetectorConfigDto })
@IsOptional()
@ValidateNested()
@Type(() => DetectorConfigDto)
ops_per_sec?: DetectorConfigDto;

@ApiPropertyOptional({ type: DetectorConfigDto })
@IsOptional()
@ValidateNested()
@Type(() => DetectorConfigDto)
memory_used?: DetectorConfigDto;

@ApiPropertyOptional({ type: DetectorConfigDto })
@IsOptional()
@ValidateNested()
@Type(() => DetectorConfigDto)
input_kbps?: DetectorConfigDto;

@ApiPropertyOptional({ type: DetectorConfigDto })
@IsOptional()
@ValidateNested()
@Type(() => DetectorConfigDto)
output_kbps?: DetectorConfigDto;

@ApiPropertyOptional({ type: DetectorConfigDto })
@IsOptional()
@ValidateNested()
@Type(() => DetectorConfigDto)
slowlog_last_id?: DetectorConfigDto;

@ApiPropertyOptional({ type: DetectorConfigDto })
@IsOptional()
@ValidateNested()
@Type(() => DetectorConfigDto)
acl_denied?: DetectorConfigDto;

@ApiPropertyOptional({ type: DetectorConfigDto })
@IsOptional()
@ValidateNested()
@Type(() => DetectorConfigDto)
evicted_keys?: DetectorConfigDto;

@ApiPropertyOptional({ type: DetectorConfigDto })
@IsOptional()
@ValidateNested()
@Type(() => DetectorConfigDto)
blocked_clients?: DetectorConfigDto;

@ApiPropertyOptional({ type: DetectorConfigDto })
@IsOptional()
@ValidateNested()
@Type(() => DetectorConfigDto)
keyspace_misses?: DetectorConfigDto;

@ApiPropertyOptional({ type: DetectorConfigDto })
@IsOptional()
@ValidateNested()
@Type(() => DetectorConfigDto)
fragmentation_ratio?: DetectorConfigDto;
}
Loading
Loading