From 8a4e36e4e012532a306ad6aa692389c1fd88716f Mon Sep 17 00:00:00 2001 From: Simon Smallchua <40650011+simonsmallchua@users.noreply.github.com> Date: Wed, 27 May 2026 23:10:28 +1000 Subject: [PATCH 1/3] Drop jobs realtime, remove dead index --- ...527224118_drop_dead_domain_hosts_index.sql | 14 ++ .../20260527224119_disable_jobs_realtime.sql | 25 +++ web/static/app/pages/dashboard.js | 4 +- web/static/app/pages/webflow-jobs.js | 174 ++++-------------- 4 files changed, 73 insertions(+), 144 deletions(-) create mode 100644 supabase/migrations/20260527224118_drop_dead_domain_hosts_index.sql create mode 100644 supabase/migrations/20260527224119_disable_jobs_realtime.sql diff --git a/supabase/migrations/20260527224118_drop_dead_domain_hosts_index.sql b/supabase/migrations/20260527224118_drop_dead_domain_hosts_index.sql new file mode 100644 index 000000000..f7a603c27 --- /dev/null +++ b/supabase/migrations/20260527224118_drop_dead_domain_hosts_index.sql @@ -0,0 +1,14 @@ +-- Drop redundant index `idx_domain_hosts_domain_id`. +-- +-- `pg_stat_user_indexes` shows 0 scans against this index, while the composite +-- unique index `domain_hosts_domain_id_host_key (domain_id, host)` serves every +-- `domain_id`-prefix lookup (12.6M scans). The standalone `(domain_id)` index is +-- pure write overhead on every `domain_hosts` upsert. +-- +-- `CONCURRENTLY` is intentionally omitted — Supabase's branch migration runner +-- pipelines statements over the extended protocol, which Postgres forbids for +-- `DROP INDEX CONCURRENTLY`. Dropping an index is a fast metadata operation; +-- the brief lock on a 105k-row table is acceptable and there are no live +-- customers yet. Guarded with `IF EXISTS` to remain idempotent across replays. + +DROP INDEX IF EXISTS public.idx_domain_hosts_domain_id; diff --git a/supabase/migrations/20260527224119_disable_jobs_realtime.sql b/supabase/migrations/20260527224119_disable_jobs_realtime.sql new file mode 100644 index 000000000..41b04e77d --- /dev/null +++ b/supabase/migrations/20260527224119_disable_jobs_realtime.sql @@ -0,0 +1,25 @@ +-- Remove `public.jobs` from the `supabase_realtime` publication. +-- +-- `jobs` is the highest-churn table in the queue (per-task `running_tasks` +-- bumps, `recalculate_job_stats`, the `update_job_queue_counters` trigger). The +-- Realtime WAL-decode poll (`realtime.list_changes`) was ~14% of total database +-- time, decoding every one of those changes regardless of how many dashboards +-- were subscribed. The frontend already has an adaptive polling fallback +-- (500ms while a job is active, 1-2s idle) reading the lane-isolated `/v1/jobs` +-- API, so dropping `jobs` from the publication trades a continuous tenant-wide +-- cost for cheap, viewer-scoped polling. +-- +-- `notifications` stays on the publication. Reverse with +-- `ALTER PUBLICATION supabase_realtime ADD TABLE public.jobs;` if needed. + +DO $$ +BEGIN + IF EXISTS ( + SELECT 1 FROM pg_publication_tables + WHERE pubname = 'supabase_realtime' + AND schemaname = 'public' + AND tablename = 'jobs' + ) THEN + ALTER PUBLICATION supabase_realtime DROP TABLE public.jobs; + END IF; +END $$; diff --git a/web/static/app/pages/dashboard.js b/web/static/app/pages/dashboard.js index 7760a68cd..7aa850754 100644 --- a/web/static/app/pages/dashboard.js +++ b/web/static/app/pages/dashboard.js @@ -100,8 +100,8 @@ async function init() { containerSelector: "#adminGroup", }); - // Subscribe to realtime job updates (falls back to 10 s polling when - // Supabase realtime is unavailable, e.g. on preview branches). + // Poll for job updates (500 ms while a job is active, 1 s when idle). + // `jobs` is no longer on the Supabase Realtime publication. let unsubscribe = null; function startSubscription() { if (unsubscribe) unsubscribe(); diff --git a/web/static/app/pages/webflow-jobs.js b/web/static/app/pages/webflow-jobs.js index b58166015..7d55a3d0a 100644 --- a/web/static/app/pages/webflow-jobs.js +++ b/web/static/app/pages/webflow-jobs.js @@ -25,20 +25,15 @@ import { formatRelativeTime, formatDuration, formatCount, - formatStatus, - statusCategory, } from "/app/lib/formatters.js"; import { createStatusPill } from "/app/components/hover-status-pill.js"; import { createDataTable } from "/app/components/hover-data-table.js"; // ── Constants ────────────────────────────────────────────────────────────────── -const REALTIME_DEBOUNCE_MS = 250; -const SUBSCRIBE_RETRY_INTERVAL_MS = 1000; -const MAX_SUBSCRIBE_RETRIES = 15; // Match legacy gnh-auth-extension.js: 500 ms when jobs are active, 1 s when idle. -const FALLBACK_POLLING_INTERVAL_ACTIVE_MS = 500; -const FALLBACK_POLLING_INTERVAL_IDLE_MS = 1000; +const POLLING_INTERVAL_ACTIVE_MS = 500; +const POLLING_INTERVAL_IDLE_MS = 1000; // ── Data fetching ────────────────────────────────────────────────────────────── @@ -154,152 +149,47 @@ export function renderErrorState(container, message = "Failed to load jobs.") { container.appendChild(div); } -// ── Realtime subscription ────────────────────────────────────────────────────── +// ── Adaptive polling ───────────────────────────────────────────────────────── /** - * Subscribe to job changes for an organisation via Supabase Realtime. - * Falls back to polling if realtime fails. + * Poll for job changes for an organisation. * - * @param {string} orgId + * `jobs` was removed from the Supabase Realtime publication — decoding the WAL + * for every job-counter tick was ~14% of total database time, billed whether + * or not anyone was watching. We poll the lane-isolated `/v1/jobs` API instead: + * 500 ms while a job is active, 1 s when idle. The interval is re-evaluated on + * every tick so it adapts as jobs start and finish. + * + * @param {string} _orgId - retained for call-site compatibility * @param {() => void} onUpdate - called when jobs may have changed - * @returns {() => void} unsubscribe / cleanup function + * @returns {() => void} cleanup function */ -export function subscribeToJobUpdates(orgId, onUpdate) { - let channel = null; - let retryCount = 0; - let retryTimer = null; - let fallbackTimer = null; - let lastUpdate = 0; - let debounceTimer = null; - let unsubscribed = false; - - function throttledUpdate() { - const now = Date.now(); - if (now - lastUpdate >= REALTIME_DEBOUNCE_MS) { - lastUpdate = now; - clearFallback(); - onUpdate(); - return; - } - if (!debounceTimer) { - debounceTimer = setTimeout(() => { - debounceTimer = null; - if (unsubscribed) return; - lastUpdate = Date.now(); - clearFallback(); - onUpdate(); - }, REALTIME_DEBOUNCE_MS); - } - } +export function subscribeToJobUpdates(_orgId, onUpdate) { + let timer = null; + let stopped = false; - // Adaptive interval: 500 ms while jobs are active, 1 s when idle. - // Matches the legacy gnh-auth-extension.js dual-interval behaviour. - function getFallbackInterval() { + function intervalMs() { return window.dataBinder?.hasRealtimeActiveJobs - ? FALLBACK_POLLING_INTERVAL_ACTIVE_MS - : FALLBACK_POLLING_INTERVAL_IDLE_MS; - } - - let fallbackIntervalMs = null; - - function startFallback() { - const nextMs = getFallbackInterval(); - if (fallbackTimer && fallbackIntervalMs === nextMs) return; - if (fallbackTimer) { - clearInterval(fallbackTimer); - } - fallbackIntervalMs = nextMs; - fallbackTimer = setInterval(onUpdate, fallbackIntervalMs); - } - - function clearFallback() { - if (fallbackTimer) { - clearInterval(fallbackTimer); - fallbackTimer = null; - fallbackIntervalMs = null; - } + ? POLLING_INTERVAL_ACTIVE_MS + : POLLING_INTERVAL_IDLE_MS; } - function cleanup() { - unsubscribed = true; - if (retryTimer) { - clearTimeout(retryTimer); - retryTimer = null; - } - if (debounceTimer) { - clearTimeout(debounceTimer); - debounceTimer = null; - } - clearFallback(); - if (channel && window.supabase) { - window.supabase.removeChannel(channel).catch(() => {}); - channel = null; - } + function schedule() { + if (stopped) return; + timer = setTimeout(() => { + if (stopped) return; + onUpdate(); + schedule(); + }, intervalMs()); } - function subscribe() { - if (unsubscribed) return; - if (!orgId || !window.supabase?.channel) { - if (retryCount < MAX_SUBSCRIBE_RETRIES) { - retryCount++; - retryTimer = setTimeout(subscribe, SUBSCRIBE_RETRY_INTERVAL_MS); - } else { - startFallback(); - } - return; - } - - retryCount = 0; - - try { - channel = window.supabase - .channel(`hover-jobs:${orgId}`) - .on( - "postgres_changes", - { - event: "INSERT", - schema: "public", - table: "jobs", - filter: `organisation_id=eq.${orgId}`, - }, - throttledUpdate - ) - .on( - "postgres_changes", - { - event: "UPDATE", - schema: "public", - table: "jobs", - filter: `organisation_id=eq.${orgId}`, - }, - throttledUpdate - ) - .on( - "postgres_changes", - { - event: "DELETE", - schema: "public", - table: "jobs", - filter: `organisation_id=eq.${orgId}`, - }, - throttledUpdate - ) - .subscribe((status, err) => { - if ( - (status === "CHANNEL_ERROR" || status === "TIMED_OUT" || err) && - !unsubscribed - ) { - startFallback(); - } - }); + schedule(); - // Start fallback immediately; clearFallback() stops it on first real event - startFallback(); - } catch { - startFallback(); + return function cleanup() { + stopped = true; + if (timer) { + clearTimeout(timer); + timer = null; } - } - - subscribe(); - return cleanup; + }; } From 875ae4b374573fd9bede3d1a276e009a506ed349 Mon Sep 17 00:00:00 2001 From: Simon Smallchua <40650011+simonsmallchua@users.noreply.github.com> Date: Wed, 27 May 2026 23:15:27 +1000 Subject: [PATCH 2/3] Add changelog entry --- CHANGELOG.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index becf1cb23..2c97d59cf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -41,6 +41,19 @@ On merge, CI will: previously-crawled domain ramps up from a few concurrent requests and widens on success. Tunable via `GNH_PACER_START_FLOOR_DELAY_MS` (`0` restores the prior behaviour). +- Dashboard and Webflow job lists now refresh via adaptive polling (500 ms while + a job is active, 1 s when idle) instead of Supabase Realtime. `public.jobs` + was removed from the `supabase_realtime` publication because decoding its WAL + on every job-counter change was ~14% of total database time, billed + continuously regardless of how many dashboards were subscribed. + `notifications` realtime is unchanged. Reverse with + `ALTER PUBLICATION supabase_realtime ADD TABLE public.jobs;`. + +### Removed + +- Redundant `idx_domain_hosts_domain_id` index (0 recorded scans; the composite + `(domain_id, host)` unique index already serves every `domain_id`-prefix + lookup), removing write overhead on every `domain_hosts` upsert. ## Full changelog history From 4ed405d34c41a87bb2109203933079da1fdcfc2d Mon Sep 17 00:00:00 2001 From: Simon Smallchua <40650011+simonsmallchua@users.noreply.github.com> Date: Wed, 27 May 2026 23:22:24 +1000 Subject: [PATCH 3/3] Await poll update to prevent overlap --- web/static/app/pages/webflow-jobs.js | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/web/static/app/pages/webflow-jobs.js b/web/static/app/pages/webflow-jobs.js index 7d55a3d0a..3cb01bc6e 100644 --- a/web/static/app/pages/webflow-jobs.js +++ b/web/static/app/pages/webflow-jobs.js @@ -161,7 +161,7 @@ export function renderErrorState(container, message = "Failed to load jobs.") { * every tick so it adapts as jobs start and finish. * * @param {string} _orgId - retained for call-site compatibility - * @param {() => void} onUpdate - called when jobs may have changed + * @param {() => (void | Promise)} onUpdate - called when jobs may have changed * @returns {() => void} cleanup function */ export function subscribeToJobUpdates(_orgId, onUpdate) { @@ -176,10 +176,15 @@ export function subscribeToJobUpdates(_orgId, onUpdate) { function schedule() { if (stopped) return; - timer = setTimeout(() => { + timer = setTimeout(async () => { if (stopped) return; - onUpdate(); - schedule(); + // Await so a slow refresh can't stack overlapping polls; reschedule in + // finally so a rejected update still keeps the loop alive. + try { + await onUpdate(); + } finally { + schedule(); + } }, intervalMs()); }