diff --git a/apps/backend/app.ts b/apps/backend/app.ts index 29250af4..ecb01085 100644 --- a/apps/backend/app.ts +++ b/apps/backend/app.ts @@ -22,6 +22,7 @@ import { playlist_route } from './routes/playlist.route.js'; import { startPlaylistProxy, stopPlaylistProxy } from './services/playlist-proxy.service.js'; import { startAlbumPlaysRefresh, stopAlbumPlaysRefresh } from './services/album-plays-refresh.service.js'; import { setupCdcWebSocket, shutdownCdcWebSocket } from './services/cdc/index.js'; +import { startRotationTracksCacheWarm } from './services/rotation-tracks-cache-warm.service.js'; import { drainInFlightEnrichments } from './services/metadata/enrichment.service.js'; import { activeShow } from './middleware/checkActiveShow.js'; import errorHandler from './middleware/errorHandler.js'; @@ -132,6 +133,12 @@ const server = app.listen(port, () => { startPlaylistProxy(); startAlbumPlaysRefresh(); void setupCdcWebSocket(server); + // One-shot warm of the rotation-tracks picker LRUs in + // `library.service.ts`. Fire-and-forget — the walk shares the LML + // semaphore with concurrent traffic, and the LRUs are process-local so + // the work would otherwise have to happen on the first picker open per + // row after every restart. See `services/rotation-tracks-cache-warm.service.ts`. + startRotationTracksCacheWarm(); }); // Strictly greater than the LML client's 30 s AbortController diff --git a/apps/backend/services/library.service.ts b/apps/backend/services/library.service.ts index 3d0fd986..c2cbad5c 100644 --- a/apps/backend/services/library.service.ts +++ b/apps/backend/services/library.service.ts @@ -370,6 +370,20 @@ export function __resetRotationLmlLookupCacheForTests(): void { rotationLmlNegativeCache.clear(); } +/** + * Current sizes of the rotation LML LRUs. Used by the rotation-tracks + * cache-warm service (BS#998) to classify warm-pass outcomes per row + * (positive-cache-hit vs negative-cache-hit) without coupling to LRU + * internals. Underscored so it stays out of the public service surface — + * production callers should not depend on cache shape. + */ +export function __rotationLmlCacheSizesForWarm(): { positive: number; negative: number } { + return { + positive: rotationLmlPositiveCache.size, + negative: rotationLmlNegativeCache.size, + }; +} + /** * Look up the resolved Discogs release id for a rotation row by its id. * diff --git a/apps/backend/services/rotation-tracks-cache-warm.service.ts b/apps/backend/services/rotation-tracks-cache-warm.service.ts new file mode 100644 index 00000000..ee0edbc0 --- /dev/null +++ b/apps/backend/services/rotation-tracks-cache-warm.service.ts @@ -0,0 +1,177 @@ +/** + * One-shot warm pass for the rotation-tracks picker LRU caches in + * `library.service.ts` (BS#987's `rotationLmlPositiveCache` + + * `rotationLmlNegativeCache`). + * + * Why this lives in the API process and not in a `jobs/` package: + * The LRUs are process-local (`new LRUCache(...)` at module scope), so a + * sidecar one-shot job would warm its own copy and exit — useless for the + * long-lived API process that serves `GET /library/rotation/:id/tracks`. + * Tubafrenzy's `RotationTracklistCache.warmCache` does the equivalent on + * JVM boot. + * + * What gets warmed: + * For every active rotation row (`kill_date IS NULL OR kill_date > + * CURRENT_DATE` — the same predicate `getRotationFromDB` uses), the warmer + * calls `getDiscogsReleaseIdByRotationId` end-to-end so it goes through + * the same three-tier resolver real picker opens take. That means the + * warm walk shares the LML chokepoint with concurrent user traffic — + * `lookupSemaphore` (5 permits) and the rate-limit token bucket throttle + * it naturally, and the per-call 5 s budget from #993 caps tail latency. + * Rows resolved via tier 1 or tier 2 (DB lookup, no LML call) cost ~1 ms; + * rows that fall through to tier 3 spend a semaphore permit but populate + * either the positive or the negative LRU so the next picker open is + * instant. + * + * Fire-and-forget on app boot: + * We don't await the warm before `app.listen` resolves — health checks + * and live traffic should not pay startup latency for a best-effort + * optimization. A single row's failure is captured to Sentry and logged + * but does not stop the walk; the warm just continues with the next id. + * On process restart, the warmer re-runs (the LRU is in RAM); ~310 rows + * × ~few-second tail per LML call ≈ a few minutes of background traffic + * per restart, which is acceptable on a process that runs for days. + * + * Exported API: + * warmRotationTracksCache() — perform a single walk and return the + * counters (used by `start...` and tests). + * startRotationTracksCacheWarm() — fire-and-forget kickoff; called once + * from `app.ts` post-`listen`. + */ +import * as Sentry from '@sentry/node'; +import { sql } from 'drizzle-orm'; +import { db, rotation } from '@wxyc/database'; +import { getDiscogsReleaseIdByRotationId, __rotationLmlCacheSizesForWarm } from './library.service.js'; + +const LOG_PREFIX = '[rotation-tracks-cache-warm]'; + +/** + * Log progress every N rows. 50 keeps the boot log readable for the prod + * row count (~310) — about six progress lines plus a final summary. + */ +const PROGRESS_LOG_EVERY = 50; + +export interface WarmCounters { + /** Total rows walked. */ + scanned: number; + /** Rows that resolved via tier 1 or 2 (no LML call). */ + preResolved: number; + /** Rows that hit LML and got a release id (positive cache populated). */ + lmlPositive: number; + /** Rows that hit LML and got nothing (negative cache populated). */ + lmlNegative: number; + /** Rows where `getDiscogsReleaseIdByRotationId` threw. */ + errors: number; + /** Wall-clock duration of the walk in ms. */ + elapsedMs: number; +} + +/** + * Walk every active rotation row, calling `getDiscogsReleaseIdByRotationId` + * for each so the per-`rotation_id` LRUs in `library.service.ts` are populated. + * + * Sequential by design — `getDiscogsReleaseIdByRotationId` itself acquires + * the `lookupSemaphore` permit when it falls through to LML, so the upper + * bound on outstanding LML calls remains 5. Driving extra concurrency here + * would only deepen the semaphore queue without raising throughput, while + * stealing fairness from concurrent user requests. + * + * The before/after delta of the positive + negative LRU sizes (read via the + * test-only sizes accessor) reconstructs the LML-positive vs LML-negative + * tally without coupling this service to the LRU internals or requiring a + * second pass. + */ +export async function warmRotationTracksCache(): Promise { + const startTime = Date.now(); + const startSizes = __rotationLmlCacheSizesForWarm(); + + const rows = await db + .select({ id: rotation.id }) + .from(rotation) + .where(sql`${rotation.kill_date} > CURRENT_DATE OR ${rotation.kill_date} IS NULL`); + + const counters: WarmCounters = { + scanned: 0, + preResolved: 0, + lmlPositive: 0, + lmlNegative: 0, + errors: 0, + elapsedMs: 0, + }; + + console.log(`${LOG_PREFIX} starting walk over ${rows.length} active rotation row(s)`); + + for (const row of rows) { + const rotationId = row.id as unknown as number; + counters.scanned += 1; + + try { + const beforeSizes = __rotationLmlCacheSizesForWarm(); + const result = await getDiscogsReleaseIdByRotationId(rotationId); + const afterSizes = __rotationLmlCacheSizesForWarm(); + + // Per-row LRU-delta classifier: if a positive entry was added during + // this iteration the row was a fresh tier-3 hit; same for negative. + // No size change means the row resolved via tier 1 or tier 2 (or hit + // a previously-cached LML result, which only happens on a duplicate + // rotation_id — impossible from this query but cheap to handle as + // "pre-resolved" since we did no LML work). + if (afterSizes.positive > beforeSizes.positive) { + counters.lmlPositive += 1; + } else if (afterSizes.negative > beforeSizes.negative) { + counters.lmlNegative += 1; + } else if (result !== null) { + counters.preResolved += 1; + } else { + // Null with no LRU growth: tier 1+2 missed and LML was either + // unconfigured, returned the same negative we'd already cached, or + // declined the row due to NULL artist_name/album_title. None of + // these populate either LRU, so we fold them into preResolved for + // the summary (the picker won't pay an LML round-trip on next open + // either way — it'll degrade to free-text immediately). + counters.preResolved += 1; + } + } catch (err) { + counters.errors += 1; + Sentry.captureException(err, { + tags: { subsystem: 'rotation-tracks-cache-warm' }, + extra: { rotation_id: rotationId }, + }); + console.warn(`${LOG_PREFIX} row ${rotationId} failed: ${(err as Error).message}`); + } + + if (counters.scanned % PROGRESS_LOG_EVERY === 0) { + console.log( + `${LOG_PREFIX} progress: scanned=${counters.scanned}/${rows.length} ` + + `preResolved=${counters.preResolved} lmlPositive=${counters.lmlPositive} ` + + `lmlNegative=${counters.lmlNegative} errors=${counters.errors}` + ); + } + } + + counters.elapsedMs = Date.now() - startTime; + + console.log( + `${LOG_PREFIX} done: scanned=${counters.scanned} preResolved=${counters.preResolved} ` + + `lmlPositive=${counters.lmlPositive} lmlNegative=${counters.lmlNegative} ` + + `errors=${counters.errors} elapsedMs=${counters.elapsedMs} ` + + `(starting cache sizes positive=${startSizes.positive} negative=${startSizes.negative})` + ); + + return counters; +} + +/** + * Fire-and-forget kickoff for app boot. Returns immediately; the walk runs + * in the background. A top-level walk failure (DB outage, for instance) is + * captured to Sentry and logged — boot should not depend on the walk + * succeeding. + */ +export function startRotationTracksCacheWarm(): void { + void warmRotationTracksCache().catch((err) => { + Sentry.captureException(err, { + tags: { subsystem: 'rotation-tracks-cache-warm' }, + }); + console.error(`${LOG_PREFIX} walk aborted:`, err); + }); +} diff --git a/tests/unit/services/rotation-tracks-cache-warm.service.test.ts b/tests/unit/services/rotation-tracks-cache-warm.service.test.ts new file mode 100644 index 00000000..629c8e4b --- /dev/null +++ b/tests/unit/services/rotation-tracks-cache-warm.service.test.ts @@ -0,0 +1,215 @@ +/** + * Unit tests for the rotation-tracks picker LRU warm pass (BS#998). + * + * Coverage: + * 1. The walk fetches active rotation rows (kill_date IS NULL OR > today), + * calls `getDiscogsReleaseIdByRotationId` for each, and emits a final + * summary log line with the expected counters. + * 2. A single row's failure is captured to Sentry and does not halt the + * walk; subsequent rows are still visited. + * 3. The counter classification works: tier-3 positive vs negative cache + * additions are correctly tallied via the LRU-sizes delta. + * 4. `startRotationTracksCacheWarm` is fire-and-forget: a thrown error in + * the top-level walk is caught and Sentry-captured rather than escaping + * to the caller (which would be `app.listen`'s callback). + * + * The shared `db` and `getDiscogsReleaseIdByRotationId` (via library.service) + * are mocked. We hold the LRU-sizes accessor stub so we can simulate + * positive/negative cache growth. + */ +import { jest, describe, test, expect, beforeEach, afterEach } from '@jest/globals'; +import { db } from '../../mocks/database.mock'; + +// `library.service` exposes both the picker resolver we drive end-to-end +// and the LRU-sizes accessor we read for the counter classifier. Mock at +// the module boundary so the warm service receives our test doubles. +const mockGetDiscogsReleaseIdByRotationId = jest.fn<(id: number) => Promise>(); +type Sizes = { positive: number; negative: number }; +const sizesRef: { current: Sizes } = { current: { positive: 0, negative: 0 } }; +const mockRotationLmlCacheSizesForWarm = jest.fn<() => Sizes>(() => sizesRef.current); + +jest.mock('../../../apps/backend/services/library.service', () => ({ + getDiscogsReleaseIdByRotationId: mockGetDiscogsReleaseIdByRotationId, + __rotationLmlCacheSizesForWarm: mockRotationLmlCacheSizesForWarm, +})); + +const mockCaptureException = jest.fn(); +jest.mock('@sentry/node', () => ({ + captureException: (...args: unknown[]) => mockCaptureException(...args), +})); + +import { + warmRotationTracksCache, + startRotationTracksCacheWarm, +} from '../../../apps/backend/services/rotation-tracks-cache-warm.service'; + +type MockedFn = jest.Mock<(...args: unknown[]) => unknown>; + +const selectMock = db.select as unknown as MockedFn; + +/** + * Stub the `db.select().from().where()` chain so the warm pass receives the + * given list of active rotation ids. The query has no further chained + * methods after `.where()`, so the chain resolves directly when awaited. + */ +function mockActiveRotationRows(ids: number[]): void { + const fromFn = jest.fn(); + const whereFn = jest.fn<() => Promise>>().mockResolvedValue(ids.map((id) => ({ id }))); + fromFn.mockReturnValue({ where: whereFn }); + selectMock.mockReturnValue({ from: fromFn }); +} + +/** + * Make `getDiscogsReleaseIdByRotationId` advance the LRU sizes accessor as + * the real resolver would. Used by the tier-3 classification test. + */ +function resolverSideEffect(grow: 'positive' | 'negative' | 'none', returnValue: number | null): void { + mockGetDiscogsReleaseIdByRotationId.mockImplementationOnce(() => { + if (grow === 'positive') { + sizesRef.current = { ...sizesRef.current, positive: sizesRef.current.positive + 1 }; + } else if (grow === 'negative') { + sizesRef.current = { ...sizesRef.current, negative: sizesRef.current.negative + 1 }; + } + return Promise.resolve(returnValue); + }); +} + +describe('rotation-tracks-cache-warm.service', () => { + let consoleLogSpy: jest.SpiedFunction; + let consoleWarnSpy: jest.SpiedFunction; + let consoleErrorSpy: jest.SpiedFunction; + + beforeEach(() => { + selectMock.mockReset(); + mockGetDiscogsReleaseIdByRotationId.mockReset(); + mockCaptureException.mockReset(); + sizesRef.current = { positive: 0, negative: 0 }; + consoleLogSpy = jest.spyOn(console, 'log').mockImplementation(() => undefined); + consoleWarnSpy = jest.spyOn(console, 'warn').mockImplementation(() => undefined); + consoleErrorSpy = jest.spyOn(console, 'error').mockImplementation(() => undefined); + }); + + afterEach(() => { + consoleLogSpy.mockRestore(); + consoleWarnSpy.mockRestore(); + consoleErrorSpy.mockRestore(); + }); + + describe('warmRotationTracksCache', () => { + test('visits every active rotation row in id order', async () => { + mockActiveRotationRows([10, 20, 30]); + mockGetDiscogsReleaseIdByRotationId.mockResolvedValue(null); + + await warmRotationTracksCache(); + + expect(mockGetDiscogsReleaseIdByRotationId).toHaveBeenCalledTimes(3); + expect(mockGetDiscogsReleaseIdByRotationId).toHaveBeenNthCalledWith(1, 10); + expect(mockGetDiscogsReleaseIdByRotationId).toHaveBeenNthCalledWith(2, 20); + expect(mockGetDiscogsReleaseIdByRotationId).toHaveBeenNthCalledWith(3, 30); + }); + + test('summary counters: classifies tier-3 positive vs negative vs pre-resolved per row', async () => { + // Row 10 — tier 1/2 hit (resolver returns id, LRU sizes unchanged). + // Row 20 — tier-3 LML positive (LRU positive size grows by 1). + // Row 30 — tier-3 LML negative (LRU negative size grows by 1). + // Row 40 — tier 1/2 NULL (resolver returns null, LRU sizes unchanged). + mockActiveRotationRows([10, 20, 30, 40]); + resolverSideEffect('none', 12345); + resolverSideEffect('positive', 4080); + resolverSideEffect('negative', null); + resolverSideEffect('none', null); + + const counters = await warmRotationTracksCache(); + + expect(counters.scanned).toBe(4); + expect(counters.preResolved).toBe(2); + expect(counters.lmlPositive).toBe(1); + expect(counters.lmlNegative).toBe(1); + expect(counters.errors).toBe(0); + expect(counters.elapsedMs).toBeGreaterThanOrEqual(0); + }); + + test('does not halt when a single row throws — sibling rows still visited and captured to Sentry', async () => { + mockActiveRotationRows([10, 20, 30]); + mockGetDiscogsReleaseIdByRotationId.mockResolvedValueOnce(123); + mockGetDiscogsReleaseIdByRotationId.mockRejectedValueOnce(new Error('LML timeout')); + mockGetDiscogsReleaseIdByRotationId.mockResolvedValueOnce(null); + + const counters = await warmRotationTracksCache(); + + // Walk visited all three rows despite the middle row throwing. + expect(mockGetDiscogsReleaseIdByRotationId).toHaveBeenCalledTimes(3); + expect(counters.scanned).toBe(3); + expect(counters.errors).toBe(1); + expect(mockCaptureException).toHaveBeenCalledTimes(1); + const [capturedErr, captureContext] = mockCaptureException.mock.calls[0] as [ + Error, + { tags?: Record; extra?: Record }, + ]; + expect(capturedErr).toBeInstanceOf(Error); + expect(capturedErr.message).toBe('LML timeout'); + expect(captureContext.tags?.subsystem).toBe('rotation-tracks-cache-warm'); + expect(captureContext.extra?.rotation_id).toBe(20); + }); + + test('emits a final summary log line carrying the expected counters', async () => { + mockActiveRotationRows([10, 20]); + resolverSideEffect('positive', 4080); + resolverSideEffect('negative', null); + + await warmRotationTracksCache(); + + // Final summary line is the last call to console.log emitted by the + // walk. Match on the counter shape rather than exact text so a minor + // copy edit doesn't break the assertion. + const finalLog = consoleLogSpy.mock.calls + .map((args) => args[0]) + .filter((line): line is string => typeof line === 'string') + .reverse() + .find((line) => line.includes('done:')); + expect(finalLog).toBeDefined(); + expect(finalLog).toContain('scanned=2'); + expect(finalLog).toContain('lmlPositive=1'); + expect(finalLog).toContain('lmlNegative=1'); + expect(finalLog).toContain('errors=0'); + expect(finalLog).toMatch(/elapsedMs=\d+/); + }); + + test('zero active rows is a clean no-op (no resolver calls, no errors)', async () => { + mockActiveRotationRows([]); + + const counters = await warmRotationTracksCache(); + + expect(mockGetDiscogsReleaseIdByRotationId).not.toHaveBeenCalled(); + expect(counters.scanned).toBe(0); + expect(counters.errors).toBe(0); + }); + }); + + describe('startRotationTracksCacheWarm', () => { + test('returns synchronously and does not throw when the walk fails at the top level', async () => { + // A DB outage at the SELECT step would reject before the per-row loop + // even begins; the kick-off must catch it so a transient blip during + // app boot can't crash the listen callback or emit an unhandled + // rejection. + const fromFn = jest.fn(); + const whereFn = jest.fn<() => Promise>>().mockRejectedValue(new Error('db down')); + fromFn.mockReturnValue({ where: whereFn }); + selectMock.mockReturnValue({ from: fromFn }); + + expect(() => startRotationTracksCacheWarm()).not.toThrow(); + + // Let the background promise settle so we can assert the catch arm + // captured to Sentry. + await new Promise((resolve) => setImmediate(resolve)); + + expect(mockCaptureException).toHaveBeenCalledTimes(1); + const [capturedErr, captureContext] = mockCaptureException.mock.calls[0] as [ + Error, + { tags?: Record }, + ]; + expect(capturedErr.message).toBe('db down'); + expect(captureContext.tags?.subsystem).toBe('rotation-tracks-cache-warm'); + }); + }); +});