Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
175 changes: 175 additions & 0 deletions proprietary/cache-proposals/__tests__/cache-readonly.service.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,181 @@ describe('CacheReadonlyService', () => {
expect(result.recommendation).toBe('tighten_threshold');
expect(result.recommended_threshold).toBeLessThan(0.1);
});

it('returns null confidence fields for insufficient_data', async () => {
const { service, client } = await buildService();
seedSimilarityWindow(
client,
SEMANTIC_NAME,
Array.from({ length: 5 }, (_, i) => ({
score: 0.05,
result: 'hit' as const,
category: 'all',
ts: Date.now() + i,
})),
);
const result = await service.thresholdRecommendation(CONNECTION_ID, SEMANTIC_NAME, {
minSamples: 10,
});
expect(result.recommendation).toBe('insufficient_data');
expect(result.confidence_score).toBeNull();
expect(result.confidence_breakdown).toBeNull();
});

it('populates confidence_score and breakdown on tighten_threshold', async () => {
const { service, client } = await buildService();
const now = Date.now();
const samples = [
...Array.from({ length: 85 }, (_, i) => ({
score: 0.02 + (i % 5) * 0.01,
result: 'hit' as const,
category: 'all',
ts: now + i,
})),
...Array.from({ length: 10 }, (_, i) => ({
score: 0.08 + i * 0.001,
result: 'hit' as const,
category: 'all',
ts: now + 100 + i,
})),
...Array.from({ length: 5 }, (_, i) => ({
score: 0.2,
result: 'miss' as const,
category: 'all',
ts: now + 200 + i,
})),
];
seedSimilarityWindow(client, SEMANTIC_NAME, samples);
const result = await service.thresholdRecommendation(CONNECTION_ID, SEMANTIC_NAME, {
minSamples: 50,
});
expect(result.recommendation).toBe('tighten_threshold');
expect(result.confidence_score).not.toBeNull();
expect(result.confidence_score).toBeGreaterThan(0);
expect(result.confidence_score).toBeLessThanOrEqual(1);
expect(result.confidence_breakdown).not.toBeNull();
const breakdown = result.confidence_breakdown!;
expect(breakdown.sample).toBeGreaterThan(0);
expect(breakdown.signal).toBeGreaterThan(0);
expect(breakdown.freshness).toBeGreaterThan(0);
});

it('drives confidence to 0 when samples are stale', async () => {
const { service, client } = await buildService();
const twoHoursAgo = Date.now() - 2 * 3_600_000;
const samples = [
...Array.from({ length: 85 }, (_, i) => ({
score: 0.02 + (i % 5) * 0.01,
result: 'hit' as const,
category: 'all',
ts: twoHoursAgo + i,
})),
...Array.from({ length: 10 }, (_, i) => ({
score: 0.08 + i * 0.001,
result: 'hit' as const,
category: 'all',
ts: twoHoursAgo + 100 + i,
})),
...Array.from({ length: 5 }, (_, i) => ({
score: 0.2,
result: 'miss' as const,
category: 'all',
ts: twoHoursAgo + 100 + i,
})),
];
seedSimilarityWindow(client, SEMANTIC_NAME, samples);
const result = await service.thresholdRecommendation(CONNECTION_ID, SEMANTIC_NAME, {
minSamples: 50,
});
expect(result.recommendation).toBe('tighten_threshold');
expect(result.confidence_score).toBe(0);
expect(result.confidence_breakdown!.freshness).toBe(0);
});

it('populates non-zero confidence on low_hit_rate LOOSEN even when nearMissRate is low', async () => {
const { service, client } = await buildService();
const now = Date.now();
// Defaults: threshold=0.10, uncertainty_band=0.05.
// near-misses: (0.10, 0.15]
// close-misses: (0.10, 0.20]
// We want CLOSE but NOT near, so misses at score 0.18.
const samples = [
...Array.from({ length: 3 }, (_, i) => ({
score: 0.03,
result: 'hit' as const,
category: 'all',
ts: now + i,
})),
...Array.from({ length: 100 }, (_, i) => ({
score: 0.18,
result: 'miss' as const,
category: 'all',
ts: now + 10 + i,
})),
...Array.from({ length: 97 }, (_, i) => ({
score: 0.5,
result: 'miss' as const,
category: 'all',
ts: now + 200 + i,
})),
];
seedSimilarityWindow(client, SEMANTIC_NAME, samples);
const result = await service.thresholdRecommendation(CONNECTION_ID, SEMANTIC_NAME, {
minSamples: 50,
});
expect(result.recommendation).toBe('loosen_threshold');
expect(result.signal).toBe('low_hit_rate');
expect(result.confidence_score).toBeGreaterThan(0);
expect(result.confidence_breakdown!.signal).toBeGreaterThan(0);
});

it('populates non-zero confidence on distant_hits TIGHTEN', async () => {
const { service, client } = await buildService();
// Tighten the uncertainty_band so distant != uncertain (the engine's
// distant_hits branch is unreachable with the default band=0.05).
// threshold=0.10, band=0.01:
// midpoint=0.05 → distant: score > 0.05
// uncertain: score >= 0.09 (threshold - band)
client.hashes[`${SEMANTIC_NAME}:__config`] = {
threshold: '0.10',
uncertainty_band: '0.01',
category_thresholds: '{}',
};
const now = Date.now();
// Need hitRate > 0.8, distantHitRate > 0.25, hits.length >= 20,
// uncertainFractionOfAll ≤ 0.15 so the uncertain_hits branch skips.
const samples = [
// 70 strong hits (score 0.02 — neither uncertain nor distant).
...Array.from({ length: 70 }, (_, i) => ({
score: 0.02,
result: 'hit' as const,
category: 'all',
ts: now + i,
})),
// 30 distant-but-not-uncertain hits (score 0.07 — distant, not uncertain).
...Array.from({ length: 30 }, (_, i) => ({
score: 0.07,
result: 'hit' as const,
category: 'all',
ts: now + 100 + i,
})),
// 10 misses → hitRate = 100/110 ≈ 0.91 (> 0.8). nearMissRate = 0.
...Array.from({ length: 10 }, (_, i) => ({
score: 0.5,
result: 'miss' as const,
category: 'all',
ts: now + 200 + i,
})),
];
seedSimilarityWindow(client, SEMANTIC_NAME, samples);
const result = await service.thresholdRecommendation(CONNECTION_ID, SEMANTIC_NAME, {
minSamples: 50,
});
expect(result.recommendation).toBe('tighten_threshold');
expect(result.signal).toBe('distant_hits');
expect(result.confidence_score).toBeGreaterThan(0);
expect(result.confidence_breakdown!.signal).toBeGreaterThan(0);
});
});

describe('toolEffectiveness', () => {
Expand Down
166 changes: 166 additions & 0 deletions proprietary/cache-proposals/__tests__/confidence-score.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
import {
computeConfidence,
signalBoundaryFor,
TARGET_SAMPLES,
SIGNAL_SAT,
FRESHNESS_WINDOW_MS,
TIGHTEN_BOUNDARY,
DISTANT_HITS_BOUNDARY,
LOOSEN_BOUNDARY,
LOW_HIT_RATE_BOUNDARY,
} from '../confidence-score';

describe('computeConfidence', () => {
const now = 1_700_000_000_000;
const baseInput = {
sampleCount: TARGET_SAMPLES,
signalRate: SIGNAL_SAT,
signalBoundary: TIGHTEN_BOUNDARY,
latestRecordedAt: now,
now,
};

it('returns 1.0 score when every component is saturated', () => {
const result = computeConfidence(baseInput);
expect(result.score).toBeCloseTo(1.0, 5);
expect(result.breakdown).toEqual({ sample: 1, signal: 1, freshness: 1 });
});

it('drives score toward 0 when signal is at the decision boundary', () => {
const result = computeConfidence({ ...baseInput, signalRate: TIGHTEN_BOUNDARY });
expect(result.breakdown.signal).toBe(0);
expect(result.score).toBe(0);
});

it('caps sample component at 1 past TARGET_SAMPLES', () => {
const result = computeConfidence({ ...baseInput, sampleCount: TARGET_SAMPLES * 10 });
expect(result.breakdown.sample).toBe(1);
});

it('scales sample component linearly below TARGET_SAMPLES', () => {
const result = computeConfidence({ ...baseInput, sampleCount: TARGET_SAMPLES / 2 });
expect(result.breakdown.sample).toBeCloseTo(0.5, 5);
});

it('reports 0 freshness when samples are older than FRESHNESS_WINDOW_MS', () => {
const result = computeConfidence({
...baseInput,
latestRecordedAt: now - FRESHNESS_WINDOW_MS - 1,
});
expect(result.breakdown.freshness).toBe(0);
expect(result.score).toBe(0);
});

it('clamps freshness to 1 under clock skew (latestRecordedAt > now)', () => {
const result = computeConfidence({
...baseInput,
latestRecordedAt: now + 10_000,
});
expect(result.breakdown.freshness).toBe(1);
expect(Number.isFinite(result.score)).toBe(true);
expect(result.score).toBeLessThanOrEqual(1);
});

it('short-circuits to 0 when sampleCount is 0', () => {
const result = computeConfidence({ ...baseInput, sampleCount: 0 });
expect(result.score).toBe(0);
expect(result.breakdown).toEqual({ sample: 0, signal: 1, freshness: 1 });
});

it('uses the LOOSEN boundary correctly', () => {
const result = computeConfidence({
...baseInput,
signalRate: LOOSEN_BOUNDARY,
signalBoundary: LOOSEN_BOUNDARY,
});
expect(result.breakdown.signal).toBe(0);
});

it('always returns components in [0, 1] regardless of input', () => {
const result = computeConfidence({
...baseInput,
sampleCount: -10,
signalRate: 5,
latestRecordedAt: now - FRESHNESS_WINDOW_MS * 100,
});
for (const v of Object.values(result.breakdown)) {
expect(v).toBeGreaterThanOrEqual(0);
expect(v).toBeLessThanOrEqual(1);
}
expect(result.score).toBeGreaterThanOrEqual(0);
expect(result.score).toBeLessThanOrEqual(1);
});
});

describe('signalBoundaryFor', () => {
it('returns the matching boundary for each known signal', () => {
expect(signalBoundaryFor('uncertain_hits')).toBe(TIGHTEN_BOUNDARY);
expect(signalBoundaryFor('distant_hits')).toBe(DISTANT_HITS_BOUNDARY);
expect(signalBoundaryFor('near_misses')).toBe(LOOSEN_BOUNDARY);
expect(signalBoundaryFor('low_hit_rate')).toBe(LOW_HIT_RATE_BOUNDARY);
});

it('returns null for unknown or undefined signals', () => {
expect(signalBoundaryFor(undefined)).toBeNull();
expect(signalBoundaryFor('')).toBeNull();
expect(signalBoundaryFor('typo_signal')).toBeNull();
});
});

describe('regression: mapping bug repro (PR #224 review by KIvanow)', () => {
// The original wiring passed signalRate=nearMissRate / boundary=LOOSEN_BOUNDARY
// for ALL LOOSEN paths, including low_hit_rate. On the low_hit_rate path the
// engine has already fallen through `nearMissRate > 0.25`, so the rate is
// below the boundary — the signal component collapses to 0 → whole score 0.
// These two tests show the math is correct given inputs; the bug was the
// mapping that fed the wrong inputs.
const now = 1_700_000_000_000;
const freshFullSamples = {
sampleCount: TARGET_SAMPLES,
latestRecordedAt: now,
now,
};

it('low_hit_rate path: passing nearMissRate (below LOOSEN_BOUNDARY) yields signal=0 / score=0', () => {
// Simulate the buggy wiring on a low_hit_rate triggered LOOSEN where
// nearMissRate happens to be 0.10 — well below the 0.25 LOOSEN_BOUNDARY.
const buggy = computeConfidence({
...freshFullSamples,
signalRate: 0.1,
signalBoundary: LOOSEN_BOUNDARY,
});
expect(buggy.breakdown.signal).toBe(0);
expect(buggy.score).toBe(0);

// Correct wiring on the same engine state: closeMissFraction = 0.5,
// boundary = LOW_HIT_RATE_BOUNDARY (0.1). Now there's a real signal.
const fixed = computeConfidence({
...freshFullSamples,
signalRate: 0.5,
signalBoundary: LOW_HIT_RATE_BOUNDARY,
});
expect(fixed.breakdown.signal).toBeGreaterThan(0);
expect(fixed.score).toBeGreaterThan(0);
});

it('distant_hits path: passing uncertainHitRate (below TIGHTEN_BOUNDARY) yields signal=0 / score=0', () => {
// Symmetric: distant_hits TIGHTEN fires when uncertainHitRate ≤ 0.2 but
// distantHitRate > 0.25. Buggy wiring passed uncertainHitRate to the
// confidence calc.
const buggy = computeConfidence({
...freshFullSamples,
signalRate: 0.15,
signalBoundary: TIGHTEN_BOUNDARY,
});
expect(buggy.breakdown.signal).toBe(0);
expect(buggy.score).toBe(0);

const fixed = computeConfidence({
...freshFullSamples,
signalRate: 0.4,
signalBoundary: DISTANT_HITS_BOUNDARY,
});
expect(fixed.breakdown.signal).toBeGreaterThan(0);
expect(fixed.score).toBeGreaterThan(0);
});
});
Loading
Loading