From 240d56f7433206dc6241f06eeb8bad65d6dd6885 Mon Sep 17 00:00:00 2001 From: Jake Bromberg Date: Fri, 22 May 2026 10:22:31 -0700 Subject: [PATCH] feat(library): warm the rotation-tracks picker LRU on backend boot (#998) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `getDiscogsReleaseIdByRotationId`'s process-local LRUs (rotationLml{Positive,Negative}Cache, BS#987) start cold on every restart, so the first picker open per active rotation row pays a full LML round-trip — bounded at 5 s by BS#993 but still a user-visible stall. Tubafrenzy's RotationTracklistCache.warmCache does the equivalent on JVM boot. New `rotation-tracks-cache-warm.service.ts` walks the ~310 active rotation rows (kill_date IS NULL OR > CURRENT_DATE, matching getRotationFromDB) and calls the resolver end-to-end so the existing lookupSemaphore (5 permits), token bucket, and 5 s per-call timeout all apply unchanged. Sequential by design — driving extra concurrency would only deepen the queue. Fire-and-forget on boot so health checks don't pay startup latency; per-row failures are Sentry-captured with subsystem=rotation-tracks-cache-warm + rotation_id but don't halt the walk. Counter classification reads the LRU sizes accessor before and after each resolver call to tally preResolved vs lmlPositive vs lmlNegative without coupling to LRU internals. Progress log every 50 rows; final summary carries scanned + the four breakdowns + elapsedMs. Closes #998 --- apps/backend/app.ts | 7 + apps/backend/services/library.service.ts | 14 ++ .../rotation-tracks-cache-warm.service.ts | 177 ++++++++++++++ ...rotation-tracks-cache-warm.service.test.ts | 215 ++++++++++++++++++ 4 files changed, 413 insertions(+) create mode 100644 apps/backend/services/rotation-tracks-cache-warm.service.ts create mode 100644 tests/unit/services/rotation-tracks-cache-warm.service.test.ts diff --git a/apps/backend/app.ts b/apps/backend/app.ts index 29250af4..ecb01085 100644 --- a/apps/backend/app.ts +++ b/apps/backend/app.ts @@ -22,6 +22,7 @@ import { playlist_route } from './routes/playlist.route.js'; import { startPlaylistProxy, stopPlaylistProxy } from './services/playlist-proxy.service.js'; import { startAlbumPlaysRefresh, stopAlbumPlaysRefresh } from './services/album-plays-refresh.service.js'; import { setupCdcWebSocket, shutdownCdcWebSocket } from './services/cdc/index.js'; +import { startRotationTracksCacheWarm } from './services/rotation-tracks-cache-warm.service.js'; import { drainInFlightEnrichments } from './services/metadata/enrichment.service.js'; import { activeShow } from './middleware/checkActiveShow.js'; import errorHandler from './middleware/errorHandler.js'; @@ -132,6 +133,12 @@ const server = app.listen(port, () => { startPlaylistProxy(); startAlbumPlaysRefresh(); void setupCdcWebSocket(server); + // One-shot warm of the rotation-tracks picker LRUs in + // `library.service.ts`. Fire-and-forget — the walk shares the LML + // semaphore with concurrent traffic, and the LRUs are process-local so + // the work would otherwise have to happen on the first picker open per + // row after every restart. See `services/rotation-tracks-cache-warm.service.ts`. + startRotationTracksCacheWarm(); }); // Strictly greater than the LML client's 30 s AbortController diff --git a/apps/backend/services/library.service.ts b/apps/backend/services/library.service.ts index 3d0fd986..c2cbad5c 100644 --- a/apps/backend/services/library.service.ts +++ b/apps/backend/services/library.service.ts @@ -370,6 +370,20 @@ export function __resetRotationLmlLookupCacheForTests(): void { rotationLmlNegativeCache.clear(); } +/** + * Current sizes of the rotation LML LRUs. Used by the rotation-tracks + * cache-warm service (BS#998) to classify warm-pass outcomes per row + * (positive-cache-hit vs negative-cache-hit) without coupling to LRU + * internals. Underscored so it stays out of the public service surface — + * production callers should not depend on cache shape. + */ +export function __rotationLmlCacheSizesForWarm(): { positive: number; negative: number } { + return { + positive: rotationLmlPositiveCache.size, + negative: rotationLmlNegativeCache.size, + }; +} + /** * Look up the resolved Discogs release id for a rotation row by its id. * diff --git a/apps/backend/services/rotation-tracks-cache-warm.service.ts b/apps/backend/services/rotation-tracks-cache-warm.service.ts new file mode 100644 index 00000000..ee0edbc0 --- /dev/null +++ b/apps/backend/services/rotation-tracks-cache-warm.service.ts @@ -0,0 +1,177 @@ +/** + * One-shot warm pass for the rotation-tracks picker LRU caches in + * `library.service.ts` (BS#987's `rotationLmlPositiveCache` + + * `rotationLmlNegativeCache`). + * + * Why this lives in the API process and not in a `jobs/` package: + * The LRUs are process-local (`new LRUCache(...)` at module scope), so a + * sidecar one-shot job would warm its own copy and exit — useless for the + * long-lived API process that serves `GET /library/rotation/:id/tracks`. + * Tubafrenzy's `RotationTracklistCache.warmCache` does the equivalent on + * JVM boot. + * + * What gets warmed: + * For every active rotation row (`kill_date IS NULL OR kill_date > + * CURRENT_DATE` — the same predicate `getRotationFromDB` uses), the warmer + * calls `getDiscogsReleaseIdByRotationId` end-to-end so it goes through + * the same three-tier resolver real picker opens take. That means the + * warm walk shares the LML chokepoint with concurrent user traffic — + * `lookupSemaphore` (5 permits) and the rate-limit token bucket throttle + * it naturally, and the per-call 5 s budget from #993 caps tail latency. + * Rows resolved via tier 1 or tier 2 (DB lookup, no LML call) cost ~1 ms; + * rows that fall through to tier 3 spend a semaphore permit but populate + * either the positive or the negative LRU so the next picker open is + * instant. + * + * Fire-and-forget on app boot: + * We don't await the warm before `app.listen` resolves — health checks + * and live traffic should not pay startup latency for a best-effort + * optimization. A single row's failure is captured to Sentry and logged + * but does not stop the walk; the warm just continues with the next id. + * On process restart, the warmer re-runs (the LRU is in RAM); ~310 rows + * × ~few-second tail per LML call ≈ a few minutes of background traffic + * per restart, which is acceptable on a process that runs for days. + * + * Exported API: + * warmRotationTracksCache() — perform a single walk and return the + * counters (used by `start...` and tests). + * startRotationTracksCacheWarm() — fire-and-forget kickoff; called once + * from `app.ts` post-`listen`. + */ +import * as Sentry from '@sentry/node'; +import { sql } from 'drizzle-orm'; +import { db, rotation } from '@wxyc/database'; +import { getDiscogsReleaseIdByRotationId, __rotationLmlCacheSizesForWarm } from './library.service.js'; + +const LOG_PREFIX = '[rotation-tracks-cache-warm]'; + +/** + * Log progress every N rows. 50 keeps the boot log readable for the prod + * row count (~310) — about six progress lines plus a final summary. + */ +const PROGRESS_LOG_EVERY = 50; + +export interface WarmCounters { + /** Total rows walked. */ + scanned: number; + /** Rows that resolved via tier 1 or 2 (no LML call). */ + preResolved: number; + /** Rows that hit LML and got a release id (positive cache populated). */ + lmlPositive: number; + /** Rows that hit LML and got nothing (negative cache populated). */ + lmlNegative: number; + /** Rows where `getDiscogsReleaseIdByRotationId` threw. */ + errors: number; + /** Wall-clock duration of the walk in ms. */ + elapsedMs: number; +} + +/** + * Walk every active rotation row, calling `getDiscogsReleaseIdByRotationId` + * for each so the per-`rotation_id` LRUs in `library.service.ts` are populated. + * + * Sequential by design — `getDiscogsReleaseIdByRotationId` itself acquires + * the `lookupSemaphore` permit when it falls through to LML, so the upper + * bound on outstanding LML calls remains 5. Driving extra concurrency here + * would only deepen the semaphore queue without raising throughput, while + * stealing fairness from concurrent user requests. + * + * The before/after delta of the positive + negative LRU sizes (read via the + * test-only sizes accessor) reconstructs the LML-positive vs LML-negative + * tally without coupling this service to the LRU internals or requiring a + * second pass. + */ +export async function warmRotationTracksCache(): Promise { + const startTime = Date.now(); + const startSizes = __rotationLmlCacheSizesForWarm(); + + const rows = await db + .select({ id: rotation.id }) + .from(rotation) + .where(sql`${rotation.kill_date} > CURRENT_DATE OR ${rotation.kill_date} IS NULL`); + + const counters: WarmCounters = { + scanned: 0, + preResolved: 0, + lmlPositive: 0, + lmlNegative: 0, + errors: 0, + elapsedMs: 0, + }; + + console.log(`${LOG_PREFIX} starting walk over ${rows.length} active rotation row(s)`); + + for (const row of rows) { + const rotationId = row.id as unknown as number; + counters.scanned += 1; + + try { + const beforeSizes = __rotationLmlCacheSizesForWarm(); + const result = await getDiscogsReleaseIdByRotationId(rotationId); + const afterSizes = __rotationLmlCacheSizesForWarm(); + + // Per-row LRU-delta classifier: if a positive entry was added during + // this iteration the row was a fresh tier-3 hit; same for negative. + // No size change means the row resolved via tier 1 or tier 2 (or hit + // a previously-cached LML result, which only happens on a duplicate + // rotation_id — impossible from this query but cheap to handle as + // "pre-resolved" since we did no LML work). + if (afterSizes.positive > beforeSizes.positive) { + counters.lmlPositive += 1; + } else if (afterSizes.negative > beforeSizes.negative) { + counters.lmlNegative += 1; + } else if (result !== null) { + counters.preResolved += 1; + } else { + // Null with no LRU growth: tier 1+2 missed and LML was either + // unconfigured, returned the same negative we'd already cached, or + // declined the row due to NULL artist_name/album_title. None of + // these populate either LRU, so we fold them into preResolved for + // the summary (the picker won't pay an LML round-trip on next open + // either way — it'll degrade to free-text immediately). + counters.preResolved += 1; + } + } catch (err) { + counters.errors += 1; + Sentry.captureException(err, { + tags: { subsystem: 'rotation-tracks-cache-warm' }, + extra: { rotation_id: rotationId }, + }); + console.warn(`${LOG_PREFIX} row ${rotationId} failed: ${(err as Error).message}`); + } + + if (counters.scanned % PROGRESS_LOG_EVERY === 0) { + console.log( + `${LOG_PREFIX} progress: scanned=${counters.scanned}/${rows.length} ` + + `preResolved=${counters.preResolved} lmlPositive=${counters.lmlPositive} ` + + `lmlNegative=${counters.lmlNegative} errors=${counters.errors}` + ); + } + } + + counters.elapsedMs = Date.now() - startTime; + + console.log( + `${LOG_PREFIX} done: scanned=${counters.scanned} preResolved=${counters.preResolved} ` + + `lmlPositive=${counters.lmlPositive} lmlNegative=${counters.lmlNegative} ` + + `errors=${counters.errors} elapsedMs=${counters.elapsedMs} ` + + `(starting cache sizes positive=${startSizes.positive} negative=${startSizes.negative})` + ); + + return counters; +} + +/** + * Fire-and-forget kickoff for app boot. Returns immediately; the walk runs + * in the background. A top-level walk failure (DB outage, for instance) is + * captured to Sentry and logged — boot should not depend on the walk + * succeeding. + */ +export function startRotationTracksCacheWarm(): void { + void warmRotationTracksCache().catch((err) => { + Sentry.captureException(err, { + tags: { subsystem: 'rotation-tracks-cache-warm' }, + }); + console.error(`${LOG_PREFIX} walk aborted:`, err); + }); +} diff --git a/tests/unit/services/rotation-tracks-cache-warm.service.test.ts b/tests/unit/services/rotation-tracks-cache-warm.service.test.ts new file mode 100644 index 00000000..629c8e4b --- /dev/null +++ b/tests/unit/services/rotation-tracks-cache-warm.service.test.ts @@ -0,0 +1,215 @@ +/** + * Unit tests for the rotation-tracks picker LRU warm pass (BS#998). + * + * Coverage: + * 1. The walk fetches active rotation rows (kill_date IS NULL OR > today), + * calls `getDiscogsReleaseIdByRotationId` for each, and emits a final + * summary log line with the expected counters. + * 2. A single row's failure is captured to Sentry and does not halt the + * walk; subsequent rows are still visited. + * 3. The counter classification works: tier-3 positive vs negative cache + * additions are correctly tallied via the LRU-sizes delta. + * 4. `startRotationTracksCacheWarm` is fire-and-forget: a thrown error in + * the top-level walk is caught and Sentry-captured rather than escaping + * to the caller (which would be `app.listen`'s callback). + * + * The shared `db` and `getDiscogsReleaseIdByRotationId` (via library.service) + * are mocked. We hold the LRU-sizes accessor stub so we can simulate + * positive/negative cache growth. + */ +import { jest, describe, test, expect, beforeEach, afterEach } from '@jest/globals'; +import { db } from '../../mocks/database.mock'; + +// `library.service` exposes both the picker resolver we drive end-to-end +// and the LRU-sizes accessor we read for the counter classifier. Mock at +// the module boundary so the warm service receives our test doubles. +const mockGetDiscogsReleaseIdByRotationId = jest.fn<(id: number) => Promise>(); +type Sizes = { positive: number; negative: number }; +const sizesRef: { current: Sizes } = { current: { positive: 0, negative: 0 } }; +const mockRotationLmlCacheSizesForWarm = jest.fn<() => Sizes>(() => sizesRef.current); + +jest.mock('../../../apps/backend/services/library.service', () => ({ + getDiscogsReleaseIdByRotationId: mockGetDiscogsReleaseIdByRotationId, + __rotationLmlCacheSizesForWarm: mockRotationLmlCacheSizesForWarm, +})); + +const mockCaptureException = jest.fn(); +jest.mock('@sentry/node', () => ({ + captureException: (...args: unknown[]) => mockCaptureException(...args), +})); + +import { + warmRotationTracksCache, + startRotationTracksCacheWarm, +} from '../../../apps/backend/services/rotation-tracks-cache-warm.service'; + +type MockedFn = jest.Mock<(...args: unknown[]) => unknown>; + +const selectMock = db.select as unknown as MockedFn; + +/** + * Stub the `db.select().from().where()` chain so the warm pass receives the + * given list of active rotation ids. The query has no further chained + * methods after `.where()`, so the chain resolves directly when awaited. + */ +function mockActiveRotationRows(ids: number[]): void { + const fromFn = jest.fn(); + const whereFn = jest.fn<() => Promise>>().mockResolvedValue(ids.map((id) => ({ id }))); + fromFn.mockReturnValue({ where: whereFn }); + selectMock.mockReturnValue({ from: fromFn }); +} + +/** + * Make `getDiscogsReleaseIdByRotationId` advance the LRU sizes accessor as + * the real resolver would. Used by the tier-3 classification test. + */ +function resolverSideEffect(grow: 'positive' | 'negative' | 'none', returnValue: number | null): void { + mockGetDiscogsReleaseIdByRotationId.mockImplementationOnce(() => { + if (grow === 'positive') { + sizesRef.current = { ...sizesRef.current, positive: sizesRef.current.positive + 1 }; + } else if (grow === 'negative') { + sizesRef.current = { ...sizesRef.current, negative: sizesRef.current.negative + 1 }; + } + return Promise.resolve(returnValue); + }); +} + +describe('rotation-tracks-cache-warm.service', () => { + let consoleLogSpy: jest.SpiedFunction; + let consoleWarnSpy: jest.SpiedFunction; + let consoleErrorSpy: jest.SpiedFunction; + + beforeEach(() => { + selectMock.mockReset(); + mockGetDiscogsReleaseIdByRotationId.mockReset(); + mockCaptureException.mockReset(); + sizesRef.current = { positive: 0, negative: 0 }; + consoleLogSpy = jest.spyOn(console, 'log').mockImplementation(() => undefined); + consoleWarnSpy = jest.spyOn(console, 'warn').mockImplementation(() => undefined); + consoleErrorSpy = jest.spyOn(console, 'error').mockImplementation(() => undefined); + }); + + afterEach(() => { + consoleLogSpy.mockRestore(); + consoleWarnSpy.mockRestore(); + consoleErrorSpy.mockRestore(); + }); + + describe('warmRotationTracksCache', () => { + test('visits every active rotation row in id order', async () => { + mockActiveRotationRows([10, 20, 30]); + mockGetDiscogsReleaseIdByRotationId.mockResolvedValue(null); + + await warmRotationTracksCache(); + + expect(mockGetDiscogsReleaseIdByRotationId).toHaveBeenCalledTimes(3); + expect(mockGetDiscogsReleaseIdByRotationId).toHaveBeenNthCalledWith(1, 10); + expect(mockGetDiscogsReleaseIdByRotationId).toHaveBeenNthCalledWith(2, 20); + expect(mockGetDiscogsReleaseIdByRotationId).toHaveBeenNthCalledWith(3, 30); + }); + + test('summary counters: classifies tier-3 positive vs negative vs pre-resolved per row', async () => { + // Row 10 — tier 1/2 hit (resolver returns id, LRU sizes unchanged). + // Row 20 — tier-3 LML positive (LRU positive size grows by 1). + // Row 30 — tier-3 LML negative (LRU negative size grows by 1). + // Row 40 — tier 1/2 NULL (resolver returns null, LRU sizes unchanged). + mockActiveRotationRows([10, 20, 30, 40]); + resolverSideEffect('none', 12345); + resolverSideEffect('positive', 4080); + resolverSideEffect('negative', null); + resolverSideEffect('none', null); + + const counters = await warmRotationTracksCache(); + + expect(counters.scanned).toBe(4); + expect(counters.preResolved).toBe(2); + expect(counters.lmlPositive).toBe(1); + expect(counters.lmlNegative).toBe(1); + expect(counters.errors).toBe(0); + expect(counters.elapsedMs).toBeGreaterThanOrEqual(0); + }); + + test('does not halt when a single row throws — sibling rows still visited and captured to Sentry', async () => { + mockActiveRotationRows([10, 20, 30]); + mockGetDiscogsReleaseIdByRotationId.mockResolvedValueOnce(123); + mockGetDiscogsReleaseIdByRotationId.mockRejectedValueOnce(new Error('LML timeout')); + mockGetDiscogsReleaseIdByRotationId.mockResolvedValueOnce(null); + + const counters = await warmRotationTracksCache(); + + // Walk visited all three rows despite the middle row throwing. + expect(mockGetDiscogsReleaseIdByRotationId).toHaveBeenCalledTimes(3); + expect(counters.scanned).toBe(3); + expect(counters.errors).toBe(1); + expect(mockCaptureException).toHaveBeenCalledTimes(1); + const [capturedErr, captureContext] = mockCaptureException.mock.calls[0] as [ + Error, + { tags?: Record; extra?: Record }, + ]; + expect(capturedErr).toBeInstanceOf(Error); + expect(capturedErr.message).toBe('LML timeout'); + expect(captureContext.tags?.subsystem).toBe('rotation-tracks-cache-warm'); + expect(captureContext.extra?.rotation_id).toBe(20); + }); + + test('emits a final summary log line carrying the expected counters', async () => { + mockActiveRotationRows([10, 20]); + resolverSideEffect('positive', 4080); + resolverSideEffect('negative', null); + + await warmRotationTracksCache(); + + // Final summary line is the last call to console.log emitted by the + // walk. Match on the counter shape rather than exact text so a minor + // copy edit doesn't break the assertion. + const finalLog = consoleLogSpy.mock.calls + .map((args) => args[0]) + .filter((line): line is string => typeof line === 'string') + .reverse() + .find((line) => line.includes('done:')); + expect(finalLog).toBeDefined(); + expect(finalLog).toContain('scanned=2'); + expect(finalLog).toContain('lmlPositive=1'); + expect(finalLog).toContain('lmlNegative=1'); + expect(finalLog).toContain('errors=0'); + expect(finalLog).toMatch(/elapsedMs=\d+/); + }); + + test('zero active rows is a clean no-op (no resolver calls, no errors)', async () => { + mockActiveRotationRows([]); + + const counters = await warmRotationTracksCache(); + + expect(mockGetDiscogsReleaseIdByRotationId).not.toHaveBeenCalled(); + expect(counters.scanned).toBe(0); + expect(counters.errors).toBe(0); + }); + }); + + describe('startRotationTracksCacheWarm', () => { + test('returns synchronously and does not throw when the walk fails at the top level', async () => { + // A DB outage at the SELECT step would reject before the per-row loop + // even begins; the kick-off must catch it so a transient blip during + // app boot can't crash the listen callback or emit an unhandled + // rejection. + const fromFn = jest.fn(); + const whereFn = jest.fn<() => Promise>>().mockRejectedValue(new Error('db down')); + fromFn.mockReturnValue({ where: whereFn }); + selectMock.mockReturnValue({ from: fromFn }); + + expect(() => startRotationTracksCacheWarm()).not.toThrow(); + + // Let the background promise settle so we can assert the catch arm + // captured to Sentry. + await new Promise((resolve) => setImmediate(resolve)); + + expect(mockCaptureException).toHaveBeenCalledTimes(1); + const [capturedErr, captureContext] = mockCaptureException.mock.calls[0] as [ + Error, + { tags?: Record }, + ]; + expect(capturedErr.message).toBe('db down'); + expect(captureContext.tags?.subsystem).toBe('rotation-tracks-cache-warm'); + }); + }); +});