From fa11b5b33eb14379b70fca8a8695cd9f2e571881 Mon Sep 17 00:00:00 2001 From: jamby77 Date: Wed, 27 May 2026 15:31:11 +0300 Subject: [PATCH 1/9] feat(cache-proposals): pure confidence-score function for threshold recommendations --- .../__tests__/confidence-score.spec.ts | 90 +++++++++++++++++++ .../cache-proposals/confidence-score.ts | 66 ++++++++++++++ 2 files changed, 156 insertions(+) create mode 100644 proprietary/cache-proposals/__tests__/confidence-score.spec.ts create mode 100644 proprietary/cache-proposals/confidence-score.ts diff --git a/proprietary/cache-proposals/__tests__/confidence-score.spec.ts b/proprietary/cache-proposals/__tests__/confidence-score.spec.ts new file mode 100644 index 00000000..7ac92010 --- /dev/null +++ b/proprietary/cache-proposals/__tests__/confidence-score.spec.ts @@ -0,0 +1,90 @@ +import { + computeConfidence, + TARGET_SAMPLES, + SIGNAL_SAT, + FRESHNESS_WINDOW_MS, + TIGHTEN_BOUNDARY, + LOOSEN_BOUNDARY, +} from '../confidence-score'; + +describe('computeConfidence', () => { + const now = 1_700_000_000_000; + const baseInput = { + sampleCount: TARGET_SAMPLES, + signalRate: SIGNAL_SAT, + signalBoundary: TIGHTEN_BOUNDARY, + latestRecordedAt: now, + now, + }; + + it('returns 1.0 score when every component is saturated', () => { + const result = computeConfidence(baseInput); + expect(result.score).toBeCloseTo(1.0, 5); + expect(result.breakdown).toEqual({ sample: 1, signal: 1, freshness: 1 }); + }); + + it('drives score toward 0 when signal is at the decision boundary', () => { + const result = computeConfidence({ ...baseInput, signalRate: TIGHTEN_BOUNDARY }); + expect(result.breakdown.signal).toBe(0); + expect(result.score).toBe(0); + }); + + it('caps sample component at 1 past TARGET_SAMPLES', () => { + const result = computeConfidence({ ...baseInput, sampleCount: TARGET_SAMPLES * 10 }); + expect(result.breakdown.sample).toBe(1); + }); + + it('scales sample component linearly below TARGET_SAMPLES', () => { + const result = computeConfidence({ ...baseInput, sampleCount: TARGET_SAMPLES / 2 }); + expect(result.breakdown.sample).toBeCloseTo(0.5, 5); + }); + + it('reports 0 freshness when samples are older than FRESHNESS_WINDOW_MS', () => { + const result = computeConfidence({ + ...baseInput, + latestRecordedAt: now - FRESHNESS_WINDOW_MS - 1, + }); + expect(result.breakdown.freshness).toBe(0); + expect(result.score).toBe(0); + }); + + it('clamps freshness to 1 under clock skew (latestRecordedAt > now)', () => { + const result = computeConfidence({ + ...baseInput, + latestRecordedAt: now + 10_000, + }); + expect(result.breakdown.freshness).toBe(1); + expect(Number.isFinite(result.score)).toBe(true); + expect(result.score).toBeLessThanOrEqual(1); + }); + + it('short-circuits to 0 when sampleCount is 0', () => { + const result = computeConfidence({ ...baseInput, sampleCount: 0 }); + expect(result.score).toBe(0); + expect(result.breakdown).toEqual({ sample: 0, signal: 1, freshness: 1 }); + }); + + it('uses the LOOSEN boundary correctly', () => { + const result = computeConfidence({ + ...baseInput, + signalRate: LOOSEN_BOUNDARY, + signalBoundary: LOOSEN_BOUNDARY, + }); + expect(result.breakdown.signal).toBe(0); + }); + + it('always returns components in [0, 1] regardless of input', () => { + const result = computeConfidence({ + ...baseInput, + sampleCount: -10, + signalRate: 5, + latestRecordedAt: now - FRESHNESS_WINDOW_MS * 100, + }); + for (const v of Object.values(result.breakdown)) { + expect(v).toBeGreaterThanOrEqual(0); + expect(v).toBeLessThanOrEqual(1); + } + expect(result.score).toBeGreaterThanOrEqual(0); + expect(result.score).toBeLessThanOrEqual(1); + }); +}); diff --git a/proprietary/cache-proposals/confidence-score.ts b/proprietary/cache-proposals/confidence-score.ts new file mode 100644 index 00000000..f62334e5 --- /dev/null +++ b/proprietary/cache-proposals/confidence-score.ts @@ -0,0 +1,66 @@ +/** + * Pure scoring math for TIGHTEN / LOOSEN proposals. No IO, no DB, no + * NestJS — pure function so the math is unit-testable in isolation. + * + * Geometric mean of three 0–1 components: sample count, signal strength, + * and sample freshness. One weak component drags the whole score down, + * which matches the intent — 1000 samples should not rescue a borderline + * signal. + */ + +export const TARGET_SAMPLES = 200; +export const SIGNAL_SAT = 0.8; +export const FRESHNESS_WINDOW_MS = 3_600_000; +export const TIGHTEN_BOUNDARY = 0.2; +export const LOOSEN_BOUNDARY = 0.3; + +export interface ConfidenceComponents { + sample: number; + signal: number; + freshness: number; +} + +export interface ConfidenceResult { + score: number; + breakdown: ConfidenceComponents; +} + +export interface ConfidenceInput { + sampleCount: number; + /** `uncertainHitRate` for TIGHTEN, `nearMissRate` for LOOSEN. */ + signalRate: number; + /** Decision boundary the engine used: `TIGHTEN_BOUNDARY` or `LOOSEN_BOUNDARY`. */ + signalBoundary: number; + /** Epoch ms of the most recent sample in the filtered window. */ + latestRecordedAt: number; + /** Epoch ms representing "now" (injected for testability). */ + now: number; +} + +const clamp01 = (n: number): number => { + if (!Number.isFinite(n) || n <= 0) { + return 0; + } + if (n >= 1) { + return 1; + } + return n; +}; + +export function computeConfidence(input: ConfidenceInput): ConfidenceResult { + const sample = clamp01(input.sampleCount / TARGET_SAMPLES); + const signal = clamp01( + (input.signalRate - input.signalBoundary) / (SIGNAL_SAT - input.signalBoundary), + ); + const ageMs = input.now - input.latestRecordedAt; + const freshness = ageMs <= 0 ? 1 : clamp01(1 - ageMs / FRESHNESS_WINDOW_MS); + + const breakdown: ConfidenceComponents = { sample, signal, freshness }; + + if (sample === 0 || signal === 0 || freshness === 0) { + return { score: 0, breakdown }; + } + + const score = Math.cbrt(sample * signal * freshness); + return { score: clamp01(score), breakdown }; +} From f5537e68214098c786788d88e2565e571685155d Mon Sep 17 00:00:00 2001 From: jamby77 Date: Wed, 27 May 2026 15:37:02 +0300 Subject: [PATCH 2/9] chore(cache-proposals): expand freshness ternary to braced if/else --- proprietary/cache-proposals/confidence-score.ts | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/proprietary/cache-proposals/confidence-score.ts b/proprietary/cache-proposals/confidence-score.ts index f62334e5..a82f7e2c 100644 --- a/proprietary/cache-proposals/confidence-score.ts +++ b/proprietary/cache-proposals/confidence-score.ts @@ -53,7 +53,12 @@ export function computeConfidence(input: ConfidenceInput): ConfidenceResult { (input.signalRate - input.signalBoundary) / (SIGNAL_SAT - input.signalBoundary), ); const ageMs = input.now - input.latestRecordedAt; - const freshness = ageMs <= 0 ? 1 : clamp01(1 - ageMs / FRESHNESS_WINDOW_MS); + let freshness: number; + if (ageMs <= 0) { + freshness = 1; + } else { + freshness = clamp01(1 - ageMs / FRESHNESS_WINDOW_MS); + } const breakdown: ConfidenceComponents = { sample, signal, freshness }; From 195fd5cb777f6763eef2ccd5688a37936808f7e5 Mon Sep 17 00:00:00 2001 From: jamby77 Date: Wed, 27 May 2026 15:39:19 +0300 Subject: [PATCH 3/9] feat(cache-proposals): add confidence fields to ThresholdRecommendation type --- proprietary/cache-proposals/cache-readonly.types.ts | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/proprietary/cache-proposals/cache-readonly.types.ts b/proprietary/cache-proposals/cache-readonly.types.ts index f26d38b7..e7751029 100644 --- a/proprietary/cache-proposals/cache-readonly.types.ts +++ b/proprietary/cache-proposals/cache-readonly.types.ts @@ -82,6 +82,12 @@ export const THRESHOLD_REASONINGS = { `Declaring optimal to break the cycle.`, } as const; +export interface ThresholdRecommendationConfidenceBreakdown { + sample: number; + signal: number; + freshness: number; +} + export interface ThresholdRecommendation { category: string; sample_count: number; @@ -100,6 +106,8 @@ export interface ThresholdRecommendation { metrics_snapshot?: TuningMetricsSnapshot; dampening_factor?: number; consecutive_same_direction?: number; + confidence_score: number | null; + confidence_breakdown: ThresholdRecommendationConfidenceBreakdown | null; } export interface TuningHistoryEntry { From 2f8a2050f58d5506a87072e4d56f7c309251be2c Mon Sep 17 00:00:00 2001 From: jamby77 Date: Wed, 27 May 2026 15:42:22 +0300 Subject: [PATCH 4/9] feat(cache-proposals): surface confidence score on threshold recommendations --- .../cache-proposals/cache-readonly.service.ts | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/proprietary/cache-proposals/cache-readonly.service.ts b/proprietary/cache-proposals/cache-readonly.service.ts index 2021ce01..ef2c2a54 100644 --- a/proprietary/cache-proposals/cache-readonly.service.ts +++ b/proprietary/cache-proposals/cache-readonly.service.ts @@ -19,6 +19,7 @@ import type { SimilarityDistribution, SimilarityDistributionBucket, ThresholdRecommendation, + ThresholdRecommendationConfidenceBreakdown, ThresholdRecommendationKind, ToolEffectivenessEntry, ToolEffectivenessRecommendation, @@ -26,6 +27,11 @@ import type { TuningMetricsSnapshot, } from './cache-readonly.types'; import { DatabasePort } from '@app/common/interfaces/database-port.interface'; +import { + computeConfidence, + TIGHTEN_BOUNDARY, + LOOSEN_BOUNDARY, +} from './confidence-score'; export type { CacheHealth, @@ -212,6 +218,8 @@ export class CacheReadonlyService { avg_miss_similarity: 0, recommendation: THRESHOLD_RECOMMENDATIONS.INSUFFICIENT_DATA, reasoning: THRESHOLD_REASONINGS.insufficientData(sampleCount, minSamples), + confidence_score: null, + confidence_breakdown: null, }; } const hits = filtered.filter((s) => s.result === 'hit'); @@ -384,6 +392,35 @@ export class CacheReadonlyService { } } + let confidence_score: number | null = null; + let confidence_breakdown: ThresholdRecommendationConfidenceBreakdown | null = null; + if ( + recommendation === THRESHOLD_RECOMMENDATIONS.TIGHTEN || + recommendation === THRESHOLD_RECOMMENDATIONS.LOOSEN + ) { + const isTighten = recommendation === THRESHOLD_RECOMMENDATIONS.TIGHTEN; + const latestRecordedAt = filtered.reduce( + (acc, s) => (s.recordedAt > acc ? s.recordedAt : acc), + 0, + ); + const result = computeConfidence({ + sampleCount, + signalRate: isTighten ? uncertainHitRate : nearMissRate, + signalBoundary: isTighten ? TIGHTEN_BOUNDARY : LOOSEN_BOUNDARY, + latestRecordedAt, + now: Date.now(), + }); + confidence_score = result.score; + confidence_breakdown = result.breakdown; + this.logger.log( + `thresholdRecommendation ${recommendation} cache=${cache.name} ` + + `category=${categoryLabel} score=${confidence_score.toFixed(3)} ` + + `sample=${result.breakdown.sample.toFixed(3)} ` + + `signal=${result.breakdown.signal.toFixed(3)} ` + + `freshness=${result.breakdown.freshness.toFixed(3)}`, + ); + } + return { category: categoryLabel, sample_count: sampleCount, @@ -400,6 +437,8 @@ export class CacheReadonlyService { metrics_snapshot: currentMetrics, dampening_factor: dampeningFactor, consecutive_same_direction: consecutiveSameDirection, + confidence_score, + confidence_breakdown, }; } From 2c235f579cb5de2aa355ae194d34863e8b674d5a Mon Sep 17 00:00:00 2001 From: jamby77 Date: Wed, 27 May 2026 15:46:43 +0300 Subject: [PATCH 5/9] chore(cache-proposals): expand latestRecordedAt reduce to braced form --- proprietary/cache-proposals/cache-readonly.service.ts | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/proprietary/cache-proposals/cache-readonly.service.ts b/proprietary/cache-proposals/cache-readonly.service.ts index ef2c2a54..4831dcfe 100644 --- a/proprietary/cache-proposals/cache-readonly.service.ts +++ b/proprietary/cache-proposals/cache-readonly.service.ts @@ -399,10 +399,12 @@ export class CacheReadonlyService { recommendation === THRESHOLD_RECOMMENDATIONS.LOOSEN ) { const isTighten = recommendation === THRESHOLD_RECOMMENDATIONS.TIGHTEN; - const latestRecordedAt = filtered.reduce( - (acc, s) => (s.recordedAt > acc ? s.recordedAt : acc), - 0, - ); + const latestRecordedAt = filtered.reduce((acc, s) => { + if (s.recordedAt > acc) { + return s.recordedAt; + } + return acc; + }, 0); const result = computeConfidence({ sampleCount, signalRate: isTighten ? uncertainHitRate : nearMissRate, From 216e15ce69920f2881f163d4ab33d0817028b5c5 Mon Sep 17 00:00:00 2001 From: jamby77 Date: Wed, 27 May 2026 15:49:59 +0300 Subject: [PATCH 6/9] test(cache-proposals): assert confidence fields flow through threshold recommendation --- .../__tests__/cache-readonly.service.spec.ts | 90 +++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/proprietary/cache-proposals/__tests__/cache-readonly.service.spec.ts b/proprietary/cache-proposals/__tests__/cache-readonly.service.spec.ts index b1577779..1761bae6 100644 --- a/proprietary/cache-proposals/__tests__/cache-readonly.service.spec.ts +++ b/proprietary/cache-proposals/__tests__/cache-readonly.service.spec.ts @@ -286,6 +286,96 @@ describe('CacheReadonlyService', () => { expect(result.recommendation).toBe('tighten_threshold'); expect(result.recommended_threshold).toBeLessThan(0.1); }); + + it('returns null confidence fields for insufficient_data', async () => { + const { service, client } = await buildService(); + seedSimilarityWindow( + client, + SEMANTIC_NAME, + Array.from({ length: 5 }, (_, i) => ({ + score: 0.05, + result: 'hit' as const, + category: 'all', + ts: Date.now() + i, + })), + ); + const result = await service.thresholdRecommendation(CONNECTION_ID, SEMANTIC_NAME, { + minSamples: 10, + }); + expect(result.recommendation).toBe('insufficient_data'); + expect(result.confidence_score).toBeNull(); + expect(result.confidence_breakdown).toBeNull(); + }); + + it('populates confidence_score and breakdown on tighten_threshold', async () => { + const { service, client } = await buildService(); + const now = Date.now(); + const samples = [ + ...Array.from({ length: 85 }, (_, i) => ({ + score: 0.02 + (i % 5) * 0.01, + result: 'hit' as const, + category: 'all', + ts: now + i, + })), + ...Array.from({ length: 10 }, (_, i) => ({ + score: 0.08 + i * 0.001, + result: 'hit' as const, + category: 'all', + ts: now + 100 + i, + })), + ...Array.from({ length: 5 }, (_, i) => ({ + score: 0.2, + result: 'miss' as const, + category: 'all', + ts: now + 200 + i, + })), + ]; + seedSimilarityWindow(client, SEMANTIC_NAME, samples); + const result = await service.thresholdRecommendation(CONNECTION_ID, SEMANTIC_NAME, { + minSamples: 50, + }); + expect(result.recommendation).toBe('tighten_threshold'); + expect(result.confidence_score).not.toBeNull(); + expect(result.confidence_score).toBeGreaterThan(0); + expect(result.confidence_score).toBeLessThanOrEqual(1); + expect(result.confidence_breakdown).not.toBeNull(); + const breakdown = result.confidence_breakdown!; + expect(breakdown.sample).toBeGreaterThan(0); + expect(breakdown.signal).toBeGreaterThan(0); + expect(breakdown.freshness).toBeGreaterThan(0); + }); + + it('drives confidence to 0 when samples are stale', async () => { + const { service, client } = await buildService(); + const twoHoursAgo = Date.now() - 2 * 3_600_000; + const samples = [ + ...Array.from({ length: 85 }, (_, i) => ({ + score: 0.02 + (i % 5) * 0.01, + result: 'hit' as const, + category: 'all', + ts: twoHoursAgo + i, + })), + ...Array.from({ length: 10 }, (_, i) => ({ + score: 0.08 + i * 0.001, + result: 'hit' as const, + category: 'all', + ts: twoHoursAgo + 100 + i, + })), + ...Array.from({ length: 5 }, (_, i) => ({ + score: 0.2, + result: 'miss' as const, + category: 'all', + ts: twoHoursAgo + 100 + i, + })), + ]; + seedSimilarityWindow(client, SEMANTIC_NAME, samples); + const result = await service.thresholdRecommendation(CONNECTION_ID, SEMANTIC_NAME, { + minSamples: 50, + }); + expect(result.recommendation).toBe('tighten_threshold'); + expect(result.confidence_score).toBe(0); + expect(result.confidence_breakdown!.freshness).toBe(0); + }); }); describe('toolEffectiveness', () => { From 6fde8233c06f2dcf2f6871c25b1f44b82ffa318b Mon Sep 17 00:00:00 2001 From: jamby77 Date: Wed, 27 May 2026 17:08:39 +0300 Subject: [PATCH 7/9] fix(cache-proposals): align LOOSEN_BOUNDARY with engine's actual cutoff MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The engine triggers LOOSEN when nearMissRate > 0.25, but the confidence score was using 0.3 as the signal boundary — creating a dead zone where recommendations between 0.25 and 0.3 collapsed to score 0 because the signal component went negative. --- proprietary/cache-proposals/confidence-score.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/proprietary/cache-proposals/confidence-score.ts b/proprietary/cache-proposals/confidence-score.ts index a82f7e2c..28dae2e9 100644 --- a/proprietary/cache-proposals/confidence-score.ts +++ b/proprietary/cache-proposals/confidence-score.ts @@ -12,7 +12,7 @@ export const TARGET_SAMPLES = 200; export const SIGNAL_SAT = 0.8; export const FRESHNESS_WINDOW_MS = 3_600_000; export const TIGHTEN_BOUNDARY = 0.2; -export const LOOSEN_BOUNDARY = 0.3; +export const LOOSEN_BOUNDARY = 0.25; export interface ConfidenceComponents { sample: number; From 7f8d0e54b58ab2dcc5685afc8ea349192af36084 Mon Sep 17 00:00:00 2001 From: jamby77 Date: Thu, 28 May 2026 12:19:38 +0300 Subject: [PATCH 8/9] fix(cache-proposals): map confidence signal to the engine's actual decision path The engine has four TIGHTEN/LOOSEN paths (uncertain_hits, distant_hits, near_misses, low_hit_rate). The confidence-score wiring was passing the wrong signal/boundary for distant_hits (used uncertainHitRate instead of distantHitRate) and low_hit_rate (used nearMissRate, which is below its own decision cutoff on that path), collapsing the score to 0 in both cases. Capture the engine's chosen signalRate alongside the existing `signal` discriminator, add per-signal boundary constants, and route both into the confidence calc. Reported by @KIvanow on PR #224. --- .../__tests__/cache-readonly.service.spec.ts | 85 +++++++++++++++++++ .../cache-proposals/cache-readonly.service.ts | 30 ++++--- .../cache-proposals/confidence-score.ts | 22 +++++ 3 files changed, 123 insertions(+), 14 deletions(-) diff --git a/proprietary/cache-proposals/__tests__/cache-readonly.service.spec.ts b/proprietary/cache-proposals/__tests__/cache-readonly.service.spec.ts index 1761bae6..7df164ed 100644 --- a/proprietary/cache-proposals/__tests__/cache-readonly.service.spec.ts +++ b/proprietary/cache-proposals/__tests__/cache-readonly.service.spec.ts @@ -376,6 +376,91 @@ describe('CacheReadonlyService', () => { expect(result.confidence_score).toBe(0); expect(result.confidence_breakdown!.freshness).toBe(0); }); + + it('populates non-zero confidence on low_hit_rate LOOSEN even when nearMissRate is low', async () => { + const { service, client } = await buildService(); + const now = Date.now(); + // Defaults: threshold=0.10, uncertainty_band=0.05. + // near-misses: (0.10, 0.15] + // close-misses: (0.10, 0.20] + // We want CLOSE but NOT near, so misses at score 0.18. + const samples = [ + ...Array.from({ length: 3 }, (_, i) => ({ + score: 0.03, + result: 'hit' as const, + category: 'all', + ts: now + i, + })), + ...Array.from({ length: 100 }, (_, i) => ({ + score: 0.18, + result: 'miss' as const, + category: 'all', + ts: now + 10 + i, + })), + ...Array.from({ length: 97 }, (_, i) => ({ + score: 0.5, + result: 'miss' as const, + category: 'all', + ts: now + 200 + i, + })), + ]; + seedSimilarityWindow(client, SEMANTIC_NAME, samples); + const result = await service.thresholdRecommendation(CONNECTION_ID, SEMANTIC_NAME, { + minSamples: 50, + }); + expect(result.recommendation).toBe('loosen_threshold'); + expect(result.signal).toBe('low_hit_rate'); + expect(result.confidence_score).toBeGreaterThan(0); + expect(result.confidence_breakdown!.signal).toBeGreaterThan(0); + }); + + it('populates non-zero confidence on distant_hits TIGHTEN', async () => { + const { service, client } = await buildService(); + // Tighten the uncertainty_band so distant != uncertain (the engine's + // distant_hits branch is unreachable with the default band=0.05). + // threshold=0.10, band=0.01: + // midpoint=0.05 → distant: score > 0.05 + // uncertain: score >= 0.09 (threshold - band) + client.hashes[`${SEMANTIC_NAME}:__config`] = { + threshold: '0.10', + uncertainty_band: '0.01', + category_thresholds: '{}', + }; + const now = Date.now(); + // Need hitRate > 0.8, distantHitRate > 0.25, hits.length >= 20, + // uncertainFractionOfAll ≤ 0.15 so the uncertain_hits branch skips. + const samples = [ + // 70 strong hits (score 0.02 — neither uncertain nor distant). + ...Array.from({ length: 70 }, (_, i) => ({ + score: 0.02, + result: 'hit' as const, + category: 'all', + ts: now + i, + })), + // 30 distant-but-not-uncertain hits (score 0.07 — distant, not uncertain). + ...Array.from({ length: 30 }, (_, i) => ({ + score: 0.07, + result: 'hit' as const, + category: 'all', + ts: now + 100 + i, + })), + // 10 misses → hitRate = 100/110 ≈ 0.91 (> 0.8). nearMissRate = 0. + ...Array.from({ length: 10 }, (_, i) => ({ + score: 0.5, + result: 'miss' as const, + category: 'all', + ts: now + 200 + i, + })), + ]; + seedSimilarityWindow(client, SEMANTIC_NAME, samples); + const result = await service.thresholdRecommendation(CONNECTION_ID, SEMANTIC_NAME, { + minSamples: 50, + }); + expect(result.recommendation).toBe('tighten_threshold'); + expect(result.signal).toBe('distant_hits'); + expect(result.confidence_score).toBeGreaterThan(0); + expect(result.confidence_breakdown!.signal).toBeGreaterThan(0); + }); }); describe('toolEffectiveness', () => { diff --git a/proprietary/cache-proposals/cache-readonly.service.ts b/proprietary/cache-proposals/cache-readonly.service.ts index 4831dcfe..57b9a0a0 100644 --- a/proprietary/cache-proposals/cache-readonly.service.ts +++ b/proprietary/cache-proposals/cache-readonly.service.ts @@ -27,11 +27,7 @@ import type { TuningMetricsSnapshot, } from './cache-readonly.types'; import { DatabasePort } from '@app/common/interfaces/database-port.interface'; -import { - computeConfidence, - TIGHTEN_BOUNDARY, - LOOSEN_BOUNDARY, -} from './confidence-score'; +import { computeConfidence, signalBoundaryFor } from './confidence-score'; export type { CacheHealth, @@ -259,6 +255,7 @@ export class CacheReadonlyService { let recommendedThreshold: number | undefined; let reasoning: string; let signal: string | undefined; + let signalRate: number | undefined; const currentMetrics = { hit_rate: hitRate, @@ -276,6 +273,7 @@ export class CacheReadonlyService { if (uncertainFractionOfAll > 0.15) { recommendation = THRESHOLD_RECOMMENDATIONS.TIGHTEN; signal = 'uncertain_hits'; + signalRate = uncertainHitRate; const step = config.uncertainty_band * 0.6; recommendedThreshold = Math.max(0, threshold - step); reasoning = THRESHOLD_REASONINGS.tighten(uncertainHitRate); @@ -295,6 +293,7 @@ export class CacheReadonlyService { // 2× uncertainty_band to avoid overshooting on the first cycle. recommendation = THRESHOLD_RECOMMENDATIONS.TIGHTEN; signal = 'distant_hits'; + signalRate = distantHitRate; const sortedHitScores = hits.map((s) => s.score).sort((a, b) => a - b); const p75 = sortedHitScores[Math.floor(sortedHitScores.length * 0.75)]; const target = p75 + config.uncertainty_band * 0.3; @@ -309,6 +308,7 @@ export class CacheReadonlyService { // Many near-misses just above the threshold — probably too strict. recommendation = THRESHOLD_RECOMMENDATIONS.LOOSEN; signal = 'near_misses'; + signalRate = nearMissRate; recommendedThreshold = threshold + avgNearMissDelta; reasoning = THRESHOLD_REASONINGS.loosen(nearMissRate); } else if (hitRate < 0.05 && misses.length >= 20) { @@ -317,9 +317,11 @@ export class CacheReadonlyService { const closeMisses = misses.filter( (s) => s.score > threshold && s.score <= threshold + config.uncertainty_band * 2, ); - if (closeMisses.length / misses.length > 0.1) { + const closeMissFraction = closeMisses.length / misses.length; + if (closeMissFraction > 0.1) { recommendation = THRESHOLD_RECOMMENDATIONS.LOOSEN; signal = 'low_hit_rate'; + signalRate = closeMissFraction; const step = config.uncertainty_band * 0.6; recommendedThreshold = threshold + step; reasoning = THRESHOLD_REASONINGS.loosenLowHitRate(hitRate); @@ -394,11 +396,11 @@ export class CacheReadonlyService { let confidence_score: number | null = null; let confidence_breakdown: ThresholdRecommendationConfidenceBreakdown | null = null; - if ( + const isActionable = recommendation === THRESHOLD_RECOMMENDATIONS.TIGHTEN || - recommendation === THRESHOLD_RECOMMENDATIONS.LOOSEN - ) { - const isTighten = recommendation === THRESHOLD_RECOMMENDATIONS.TIGHTEN; + recommendation === THRESHOLD_RECOMMENDATIONS.LOOSEN; + const signalBoundary = signalBoundaryFor(signal); + if (isActionable && signalRate !== undefined && signalBoundary !== null) { const latestRecordedAt = filtered.reduce((acc, s) => { if (s.recordedAt > acc) { return s.recordedAt; @@ -407,8 +409,8 @@ export class CacheReadonlyService { }, 0); const result = computeConfidence({ sampleCount, - signalRate: isTighten ? uncertainHitRate : nearMissRate, - signalBoundary: isTighten ? TIGHTEN_BOUNDARY : LOOSEN_BOUNDARY, + signalRate, + signalBoundary, latestRecordedAt, now: Date.now(), }); @@ -416,9 +418,9 @@ export class CacheReadonlyService { confidence_breakdown = result.breakdown; this.logger.log( `thresholdRecommendation ${recommendation} cache=${cache.name} ` + - `category=${categoryLabel} score=${confidence_score.toFixed(3)} ` + + `category=${categoryLabel} signal=${signal} score=${confidence_score.toFixed(3)} ` + `sample=${result.breakdown.sample.toFixed(3)} ` + - `signal=${result.breakdown.signal.toFixed(3)} ` + + `signal_strength=${result.breakdown.signal.toFixed(3)} ` + `freshness=${result.breakdown.freshness.toFixed(3)}`, ); } diff --git a/proprietary/cache-proposals/confidence-score.ts b/proprietary/cache-proposals/confidence-score.ts index 28dae2e9..5d692271 100644 --- a/proprietary/cache-proposals/confidence-score.ts +++ b/proprietary/cache-proposals/confidence-score.ts @@ -11,8 +11,30 @@ export const TARGET_SAMPLES = 200; export const SIGNAL_SAT = 0.8; export const FRESHNESS_WINDOW_MS = 3_600_000; + +// Engine decision boundaries — each matches the trigger cutoff in +// cache-readonly.service.ts thresholdRecommendation() so the signal +// component starts at 0 right at the boundary and grows toward 1. export const TIGHTEN_BOUNDARY = 0.2; +export const DISTANT_HITS_BOUNDARY = 0.25; export const LOOSEN_BOUNDARY = 0.25; +export const LOW_HIT_RATE_BOUNDARY = 0.1; + +export function signalBoundaryFor(signal: string | undefined): number | null { + if (signal === 'uncertain_hits') { + return TIGHTEN_BOUNDARY; + } + if (signal === 'distant_hits') { + return DISTANT_HITS_BOUNDARY; + } + if (signal === 'near_misses') { + return LOOSEN_BOUNDARY; + } + if (signal === 'low_hit_rate') { + return LOW_HIT_RATE_BOUNDARY; + } + return null; +} export interface ConfidenceComponents { sample: number; From bbcd2c9b43d42f914a9091fa7411652680cd4ccc Mon Sep 17 00:00:00 2001 From: jamby77 Date: Thu, 28 May 2026 12:30:31 +0300 Subject: [PATCH 9/9] test(cache-proposals): lock down signalBoundaryFor + reproduce mapping bug Adds unit tests that: - assert signalBoundaryFor returns the right boundary for each of the four engine signals, and null for unknown/undefined input. - document the original mapping bug: feeding a path's rate against the wrong boundary (the old wiring) yields signal=0 and score=0, while feeding the right pair yields signal>0 and score>0. Two cases cover the low_hit_rate and distant_hits paths. Belt-and-suspenders over the existing service-spec regression guards. --- .../__tests__/confidence-score.spec.ts | 76 +++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/proprietary/cache-proposals/__tests__/confidence-score.spec.ts b/proprietary/cache-proposals/__tests__/confidence-score.spec.ts index 7ac92010..ff7727a5 100644 --- a/proprietary/cache-proposals/__tests__/confidence-score.spec.ts +++ b/proprietary/cache-proposals/__tests__/confidence-score.spec.ts @@ -1,10 +1,13 @@ import { computeConfidence, + signalBoundaryFor, TARGET_SAMPLES, SIGNAL_SAT, FRESHNESS_WINDOW_MS, TIGHTEN_BOUNDARY, + DISTANT_HITS_BOUNDARY, LOOSEN_BOUNDARY, + LOW_HIT_RATE_BOUNDARY, } from '../confidence-score'; describe('computeConfidence', () => { @@ -88,3 +91,76 @@ describe('computeConfidence', () => { expect(result.score).toBeLessThanOrEqual(1); }); }); + +describe('signalBoundaryFor', () => { + it('returns the matching boundary for each known signal', () => { + expect(signalBoundaryFor('uncertain_hits')).toBe(TIGHTEN_BOUNDARY); + expect(signalBoundaryFor('distant_hits')).toBe(DISTANT_HITS_BOUNDARY); + expect(signalBoundaryFor('near_misses')).toBe(LOOSEN_BOUNDARY); + expect(signalBoundaryFor('low_hit_rate')).toBe(LOW_HIT_RATE_BOUNDARY); + }); + + it('returns null for unknown or undefined signals', () => { + expect(signalBoundaryFor(undefined)).toBeNull(); + expect(signalBoundaryFor('')).toBeNull(); + expect(signalBoundaryFor('typo_signal')).toBeNull(); + }); +}); + +describe('regression: mapping bug repro (PR #224 review by KIvanow)', () => { + // The original wiring passed signalRate=nearMissRate / boundary=LOOSEN_BOUNDARY + // for ALL LOOSEN paths, including low_hit_rate. On the low_hit_rate path the + // engine has already fallen through `nearMissRate > 0.25`, so the rate is + // below the boundary — the signal component collapses to 0 → whole score 0. + // These two tests show the math is correct given inputs; the bug was the + // mapping that fed the wrong inputs. + const now = 1_700_000_000_000; + const freshFullSamples = { + sampleCount: TARGET_SAMPLES, + latestRecordedAt: now, + now, + }; + + it('low_hit_rate path: passing nearMissRate (below LOOSEN_BOUNDARY) yields signal=0 / score=0', () => { + // Simulate the buggy wiring on a low_hit_rate triggered LOOSEN where + // nearMissRate happens to be 0.10 — well below the 0.25 LOOSEN_BOUNDARY. + const buggy = computeConfidence({ + ...freshFullSamples, + signalRate: 0.1, + signalBoundary: LOOSEN_BOUNDARY, + }); + expect(buggy.breakdown.signal).toBe(0); + expect(buggy.score).toBe(0); + + // Correct wiring on the same engine state: closeMissFraction = 0.5, + // boundary = LOW_HIT_RATE_BOUNDARY (0.1). Now there's a real signal. + const fixed = computeConfidence({ + ...freshFullSamples, + signalRate: 0.5, + signalBoundary: LOW_HIT_RATE_BOUNDARY, + }); + expect(fixed.breakdown.signal).toBeGreaterThan(0); + expect(fixed.score).toBeGreaterThan(0); + }); + + it('distant_hits path: passing uncertainHitRate (below TIGHTEN_BOUNDARY) yields signal=0 / score=0', () => { + // Symmetric: distant_hits TIGHTEN fires when uncertainHitRate ≤ 0.2 but + // distantHitRate > 0.25. Buggy wiring passed uncertainHitRate to the + // confidence calc. + const buggy = computeConfidence({ + ...freshFullSamples, + signalRate: 0.15, + signalBoundary: TIGHTEN_BOUNDARY, + }); + expect(buggy.breakdown.signal).toBe(0); + expect(buggy.score).toBe(0); + + const fixed = computeConfidence({ + ...freshFullSamples, + signalRate: 0.4, + signalBoundary: DISTANT_HITS_BOUNDARY, + }); + expect(fixed.breakdown.signal).toBeGreaterThan(0); + expect(fixed.score).toBeGreaterThan(0); + }); +});