From de511ed2fa62429f8abda053a0ec0e3d3cee14c7 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 23 May 2026 13:41:41 +0000 Subject: [PATCH] feat(tracking): suppress Apple MPP opens and bot click prefetches (closes #31) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apple Mail Privacy Protection prefetches the open pixel, and security gateways like Mimecast / Proofpoint / Barracuda HEAD or GET the click URL seconds after delivery. Both inflate metrics and produce false clicked_at timestamps on campaign targets. A new `lib/tracking/bot-detection.ts` provides: - `detectAppleMailPrivacyProtection({ userAgent, ip })` — matches Apple Mail UA strings and iCloud Private Relay egress ranges. - `detectBotClick({ userAgent, method, isBot, sentAt })` — matches HEAD requests, headless / scanner UAs, and clicks within 1.5s of sentAt (the gateway prefetch window). `recordTrackingEvent` now accepts a `suppressionDecision` callback that runs after the campaign target is loaded. When suppression fires we still insert the events row tagged with `unverified: true` or `bot: true` and the reason, but we skip the `campaignTargets.openedAt / clickedAt` write and the SIEM/SOAR push. The dashboards (which read those target timestamps) therefore stop counting bot traffic by default, while the underlying hits remain available for forensics. --- app/(tracking)/c/[token]/route.ts | 17 ++- app/(tracking)/p/[...token]/route.ts | 17 ++- lib/tracking/bot-detection.ts | 172 +++++++++++++++++++++++++++ lib/tracking/record-event.ts | 34 +++++- 4 files changed, 233 insertions(+), 7 deletions(-) create mode 100644 lib/tracking/bot-detection.ts diff --git a/app/(tracking)/c/[token]/route.ts b/app/(tracking)/c/[token]/route.ts index 314f0b7..7266a56 100644 --- a/app/(tracking)/c/[token]/route.ts +++ b/app/(tracking)/c/[token]/route.ts @@ -1,5 +1,6 @@ import { NextRequest, userAgent } from "next/server"; +import { detectBotClick } from "@/lib/tracking/bot-detection"; import { clickMetadata, clickSourceFromSearchParams } from "@/lib/tracking/click-metadata"; import { renderLandingPageForToken } from "@/lib/tracking/render-landing-page"; import { recordTrackingEvent } from "@/lib/tracking/record-event"; @@ -8,12 +9,13 @@ export async function GET(request: NextRequest, { params }: { params: Promise<{ const { token } = await params; const source = clickSourceFromSearchParams(request.nextUrl.searchParams); const agent = userAgent(request); + const userAgentValue = request.headers.get("user-agent"); const target = await recordTrackingEvent({ token, eventType: "clicked", ipAddress: request.headers.get("x-forwarded-for"), - userAgent: request.headers.get("user-agent"), + userAgent: userAgentValue, metadata: clickMetadata({ source, headers: request.headers, @@ -30,6 +32,19 @@ export async function GET(request: NextRequest, { params }: { params: Promise<{ isBot: agent.isBot, }, }), + suppressionDecision: (campaignTarget) => { + const bot = detectBotClick({ + userAgent: userAgentValue, + method: request.method, + isBot: agent.isBot, + sentAt: campaignTarget.sentAt, + }); + if (!bot.bot) return { suppress: false }; + return { + suppress: true, + metadataPatch: { bot: true, suppressionReason: bot.reason }, + }; + }, }); if (!target) { diff --git a/app/(tracking)/p/[...token]/route.ts b/app/(tracking)/p/[...token]/route.ts index ec4aa0d..17d5156 100644 --- a/app/(tracking)/p/[...token]/route.ts +++ b/app/(tracking)/p/[...token]/route.ts @@ -1,5 +1,6 @@ import { NextRequest } from "next/server"; +import { detectAppleMailPrivacyProtection, pickClientIp } from "@/lib/tracking/bot-detection"; import { recordTrackingEvent } from "@/lib/tracking/record-event"; const transparentGif = Uint8Array.from([ @@ -15,12 +16,24 @@ export async function GET( const trackingToken = token.join("/").replace(/\.gif$/, ""); const source = request.nextUrl.searchParams.get("source")?.slice(0, 64) ?? "email_pixel"; + const forwardedFor = request.headers.get("x-forwarded-for"); + const userAgentValue = request.headers.get("user-agent"); + const ip = pickClientIp(forwardedFor) ?? pickClientIp(request.headers.get("x-real-ip")); + const mpp = detectAppleMailPrivacyProtection({ userAgent: userAgentValue, ip }); + await recordTrackingEvent({ token: trackingToken, eventType: "opened", - ipAddress: request.headers.get("x-forwarded-for"), - userAgent: request.headers.get("user-agent"), + ipAddress: forwardedFor, + userAgent: userAgentValue, metadata: { source }, + suppressionDecision: () => + mpp.unverified + ? { + suppress: true, + metadataPatch: { unverified: true, suppressionReason: mpp.reason }, + } + : { suppress: false }, }); return new Response(transparentGif, { diff --git a/lib/tracking/bot-detection.ts b/lib/tracking/bot-detection.ts new file mode 100644 index 0000000..29d6e50 --- /dev/null +++ b/lib/tracking/bot-detection.ts @@ -0,0 +1,172 @@ +/** + * Heuristics for distinguishing real human opens/clicks from + * pre-fetchers like Apple Mail Privacy Protection (MPP), iCloud + * Private Relay, headless browsers, and security-gateway URL scanners. + * + * Both Apple's MPP and most SEG link rewriters fetch tracking pixels and + * click URLs before the recipient ever sees the message. Without + * suppression they pollute open/click metrics and create false + * "clicked_at" timestamps on campaign targets. We tag the underlying + * events with `unverified: true` / `bot: true` so they remain available + * for forensics, but we skip the `campaignTargets.openedAt` / + * `clickedAt` write so dashboards only reflect human action. + */ + +import { isIP } from "node:net"; + +const HEADLESS_PATTERNS = [ + /headlesschrome/i, + /phantomjs/i, + /slimerjs/i, + /htmlunit/i, + /puppeteer/i, + /playwright/i, + /selenium/i, +]; + +const SCANNER_AND_LIBRARY_PATTERNS = [ + /^curl\//i, + /\bwget\//i, + /python-requests/i, + /python-urllib/i, + /go-http-client/i, + /okhttp/i, + /java\/[0-9]/i, + /node-fetch/i, + /libwww-perl/i, + /apachehttpclient/i, + /^facebookexternalhit/i, + /\bbingpreview\b/i, + /\bslackbot\b/i, + /\blinkpreview\b/i, + /\burlresolver\b/i, + /\burldefense\b/i, + /proofpoint/i, + /mimecast/i, + /barracuda/i, + /symantec/i, + /\bbot\b/i, + /\bcrawler\b/i, + /\bspider\b/i, +]; + +const APPLE_MAIL_UA_PATTERN = + /\bAppleWebKit\/[0-9.]+\s+\(KHTML, like Gecko\)\s*$/i; + +/** + * Conservative list of iCloud Private Relay egress CIDRs published by + * Apple. The authoritative list lives at + * https://mask-api.icloud.com/egress-ip-ranges.csv and changes regularly; + * operators can refresh `APPLE_PRIVATE_RELAY_RANGES` from that CSV. + * Until then, the snapshot below covers the most common ranges and is + * better than the no-suppression baseline. + */ +const APPLE_PRIVATE_RELAY_RANGES_V4: ReadonlyArray = [ + parseCidrV4("172.224.224.0/24"), + parseCidrV4("172.225.225.0/24"), + parseCidrV4("104.28.0.0/14"), + parseCidrV4("17.0.0.0/8"), +]; + +function parseCidrV4(cidr: string): readonly [number, number] { + const [base, prefixStr] = cidr.split("/"); + const prefix = Number(prefixStr); + const octets = base.split(".").map((part) => Number(part)); + if (octets.length !== 4 || octets.some((value) => Number.isNaN(value))) { + return [0, 0]; + } + const ipInt = + ((octets[0] << 24) | (octets[1] << 16) | (octets[2] << 8) | octets[3]) >>> 0; + const mask = prefix === 0 ? 0 : (0xffffffff << (32 - prefix)) >>> 0; + return [ipInt & mask, mask]; +} + +function ipv4ToInt(ip: string): number | null { + const parts = ip.split(".").map((part) => Number(part)); + if (parts.length !== 4 || parts.some((value) => Number.isNaN(value) || value < 0 || value > 255)) { + return null; + } + return (((parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]) >>> 0); +} + +export function pickClientIp(forwarded: string | null | undefined): string | null { + if (!forwarded) return null; + const first = forwarded.split(",")[0]?.trim(); + return first && isIP(first) ? first : null; +} + +export function isApplePrivacyRelayIp(ip: string | null | undefined): boolean { + if (!ip || isIP(ip) !== 4) return false; + const value = ipv4ToInt(ip); + if (value === null) return false; + for (const [base, mask] of APPLE_PRIVATE_RELAY_RANGES_V4) { + if ((value & mask) === base) return true; + } + return false; +} + +export type MppDetection = { + unverified: boolean; + reason: string | null; +}; + +export function detectAppleMailPrivacyProtection(input: { + userAgent: string | null | undefined; + ip: string | null | undefined; +}): MppDetection { + const ua = input.userAgent ?? ""; + if (isApplePrivacyRelayIp(input.ip)) { + return { unverified: true, reason: "apple_private_relay_ip" }; + } + if (APPLE_MAIL_UA_PATTERN.test(ua)) { + return { unverified: true, reason: "apple_mail_ua" }; + } + return { unverified: false, reason: null }; +} + +export type BotDetection = { + bot: boolean; + reason: string | null; +}; + +/** + * Sends from a security gateway frequently fire within hundreds of ms of + * the send completing. Any click that arrives faster than `prefetchMs` + * (default 1500 ms) after `sentAt` is almost certainly a scanner. + */ +const DEFAULT_PREFETCH_MS = 1500; + +export function detectBotClick(input: { + userAgent: string | null | undefined; + method: string; + isBot?: boolean | undefined; + sentAt?: Date | string | null | undefined; + now?: Date; + prefetchMs?: number; +}): BotDetection { + const ua = input.userAgent ?? ""; + const method = (input.method ?? "GET").toUpperCase(); + + if (method === "HEAD") { + return { bot: true, reason: "head_request" }; + } + if (input.isBot) { + return { bot: true, reason: "ua_isbot" }; + } + for (const pattern of HEADLESS_PATTERNS) { + if (pattern.test(ua)) return { bot: true, reason: "headless_ua" }; + } + for (const pattern of SCANNER_AND_LIBRARY_PATTERNS) { + if (pattern.test(ua)) return { bot: true, reason: "scanner_ua" }; + } + if (input.sentAt) { + const sentAtDate = input.sentAt instanceof Date ? input.sentAt : new Date(input.sentAt); + const now = input.now ?? new Date(); + const delta = now.getTime() - sentAtDate.getTime(); + const prefetchMs = input.prefetchMs ?? DEFAULT_PREFETCH_MS; + if (Number.isFinite(delta) && delta >= 0 && delta < prefetchMs) { + return { bot: true, reason: "prefetch_window" }; + } + } + return { bot: false, reason: null }; +} diff --git a/lib/tracking/record-event.ts b/lib/tracking/record-event.ts index bb585be..d092101 100644 --- a/lib/tracking/record-event.ts +++ b/lib/tracking/record-event.ts @@ -12,12 +12,31 @@ function appUrl() { return (process.env.NEXT_PUBLIC_APP_URL || process.env.BETTER_AUTH_URL || "http://localhost:3000").replace(/\/$/, ""); } +type CampaignTargetRow = NonNullable< + Awaited> +>; + +export type EventSuppressionDecision = { + suppress: boolean; + metadataPatch?: Record; +}; + export async function recordTrackingEvent(input: { token: string; eventType: EventType; ipAddress?: string | null; userAgent?: string | null; metadata?: Record; + /** + * Called after the campaign target is loaded but before any writes. + * When the returned `suppress` is true, the event row is still + * inserted (so the bot/MPP hit stays visible for forensics) but + * `campaignTargets.openedAt` / `clickedAt` / etc. are NOT updated and + * the SIEM/SOAR push is skipped. Used by the pixel and click routes + * to keep Apple MPP prefetches and security-gateway scanners out of + * dashboard counts. See `lib/tracking/bot-detection.ts`. + */ + suppressionDecision?: (target: CampaignTargetRow) => EventSuppressionDecision; }) { try { const now = new Date(); @@ -29,6 +48,12 @@ export async function recordTrackingEvent(input: { return null; } + const decision = input.suppressionDecision?.(target) ?? { suppress: false }; + const mergedMetadata = { + ...(input.metadata ?? {}), + ...(decision.metadataPatch ?? {}), + }; + const [event] = await db .insert(events) .values({ @@ -36,13 +61,14 @@ export async function recordTrackingEvent(input: { eventType: input.eventType, ipAddress: input.ipAddress, userAgent: input.userAgent, - metadata: input.metadata ?? {}, + metadata: mergedMetadata, createdAt: now, }) .returning({ id: events.id }); - const timestampUpdate = - input.eventType === "opened" + const timestampUpdate = decision.suppress + ? null + : input.eventType === "opened" ? { openedAt: target.openedAt ?? now } : input.eventType === "clicked" ? { clickedAt: target.clickedAt ?? now } @@ -83,7 +109,7 @@ export async function recordTrackingEvent(input: { } } - if (event) { + if (event && !decision.suppress) { try { await enqueueSimulationEventPush(event.id); } catch (error) {