Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion app/(tracking)/c/[token]/route.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { NextRequest, userAgent } from "next/server";

import { detectBotClick } from "@/lib/tracking/bot-detection";
import { clickMetadata, clickSourceFromSearchParams } from "@/lib/tracking/click-metadata";
import { renderLandingPageForToken } from "@/lib/tracking/render-landing-page";
import { recordTrackingEvent } from "@/lib/tracking/record-event";
Expand All @@ -8,12 +9,13 @@ export async function GET(request: NextRequest, { params }: { params: Promise<{
const { token } = await params;
const source = clickSourceFromSearchParams(request.nextUrl.searchParams);
const agent = userAgent(request);
const userAgentValue = request.headers.get("user-agent");

const target = await recordTrackingEvent({
token,
eventType: "clicked",
ipAddress: request.headers.get("x-forwarded-for"),
userAgent: request.headers.get("user-agent"),
userAgent: userAgentValue,
metadata: clickMetadata({
source,
headers: request.headers,
Expand All @@ -30,6 +32,19 @@ export async function GET(request: NextRequest, { params }: { params: Promise<{
isBot: agent.isBot,
},
}),
suppressionDecision: (campaignTarget) => {
const bot = detectBotClick({
userAgent: userAgentValue,
method: request.method,
isBot: agent.isBot,
sentAt: campaignTarget.sentAt,
});
if (!bot.bot) return { suppress: false };
return {
suppress: true,
metadataPatch: { bot: true, suppressionReason: bot.reason },
};
},
});

if (!target) {
Expand Down
17 changes: 15 additions & 2 deletions app/(tracking)/p/[...token]/route.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { NextRequest } from "next/server";

import { detectAppleMailPrivacyProtection, pickClientIp } from "@/lib/tracking/bot-detection";
import { recordTrackingEvent } from "@/lib/tracking/record-event";

const transparentGif = Uint8Array.from([
Expand All @@ -15,12 +16,24 @@ export async function GET(
const trackingToken = token.join("/").replace(/\.gif$/, "");
const source = request.nextUrl.searchParams.get("source")?.slice(0, 64) ?? "email_pixel";

const forwardedFor = request.headers.get("x-forwarded-for");
const userAgentValue = request.headers.get("user-agent");
const ip = pickClientIp(forwardedFor) ?? pickClientIp(request.headers.get("x-real-ip"));
const mpp = detectAppleMailPrivacyProtection({ userAgent: userAgentValue, ip });

await recordTrackingEvent({
token: trackingToken,
eventType: "opened",
ipAddress: request.headers.get("x-forwarded-for"),
userAgent: request.headers.get("user-agent"),
ipAddress: forwardedFor,
userAgent: userAgentValue,
metadata: { source },
suppressionDecision: () =>
mpp.unverified
? {
suppress: true,
metadataPatch: { unverified: true, suppressionReason: mpp.reason },
}
: { suppress: false },
});

return new Response(transparentGif, {
Expand Down
172 changes: 172 additions & 0 deletions lib/tracking/bot-detection.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
/**
* Heuristics for distinguishing real human opens/clicks from
* pre-fetchers like Apple Mail Privacy Protection (MPP), iCloud
* Private Relay, headless browsers, and security-gateway URL scanners.
*
* Both Apple's MPP and most SEG link rewriters fetch tracking pixels and
* click URLs before the recipient ever sees the message. Without
* suppression they pollute open/click metrics and create false
* "clicked_at" timestamps on campaign targets. We tag the underlying
* events with `unverified: true` / `bot: true` so they remain available
* for forensics, but we skip the `campaignTargets.openedAt` /
* `clickedAt` write so dashboards only reflect human action.
*/

import { isIP } from "node:net";

const HEADLESS_PATTERNS = [
/headlesschrome/i,
/phantomjs/i,
/slimerjs/i,
/htmlunit/i,
/puppeteer/i,
/playwright/i,
/selenium/i,
];

const SCANNER_AND_LIBRARY_PATTERNS = [
/^curl\//i,
/\bwget\//i,
/python-requests/i,
/python-urllib/i,
/go-http-client/i,
/okhttp/i,
/java\/[0-9]/i,
/node-fetch/i,
/libwww-perl/i,
/apachehttpclient/i,
/^facebookexternalhit/i,
/\bbingpreview\b/i,
/\bslackbot\b/i,
/\blinkpreview\b/i,
/\burlresolver\b/i,
/\burldefense\b/i,
/proofpoint/i,
/mimecast/i,
/barracuda/i,
/symantec/i,
/\bbot\b/i,
/\bcrawler\b/i,
/\bspider\b/i,
];

const APPLE_MAIL_UA_PATTERN =
/\bAppleWebKit\/[0-9.]+\s+\(KHTML, like Gecko\)\s*$/i;

/**
* Conservative list of iCloud Private Relay egress CIDRs published by
* Apple. The authoritative list lives at
* https://mask-api.icloud.com/egress-ip-ranges.csv and changes regularly;
* operators can refresh `APPLE_PRIVATE_RELAY_RANGES` from that CSV.
* Until then, the snapshot below covers the most common ranges and is
* better than the no-suppression baseline.
*/
const APPLE_PRIVATE_RELAY_RANGES_V4: ReadonlyArray<readonly [number, number]> = [
parseCidrV4("172.224.224.0/24"),
parseCidrV4("172.225.225.0/24"),
parseCidrV4("104.28.0.0/14"),
parseCidrV4("17.0.0.0/8"),
];

function parseCidrV4(cidr: string): readonly [number, number] {
const [base, prefixStr] = cidr.split("/");
const prefix = Number(prefixStr);
const octets = base.split(".").map((part) => Number(part));
if (octets.length !== 4 || octets.some((value) => Number.isNaN(value))) {
return [0, 0];
}
const ipInt =
((octets[0] << 24) | (octets[1] << 16) | (octets[2] << 8) | octets[3]) >>> 0;
const mask = prefix === 0 ? 0 : (0xffffffff << (32 - prefix)) >>> 0;
return [ipInt & mask, mask];
}

function ipv4ToInt(ip: string): number | null {
const parts = ip.split(".").map((part) => Number(part));
if (parts.length !== 4 || parts.some((value) => Number.isNaN(value) || value < 0 || value > 255)) {
return null;
}
return (((parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]) >>> 0);
}

export function pickClientIp(forwarded: string | null | undefined): string | null {
if (!forwarded) return null;
const first = forwarded.split(",")[0]?.trim();
return first && isIP(first) ? first : null;
}

export function isApplePrivacyRelayIp(ip: string | null | undefined): boolean {
if (!ip || isIP(ip) !== 4) return false;
const value = ipv4ToInt(ip);
if (value === null) return false;
for (const [base, mask] of APPLE_PRIVATE_RELAY_RANGES_V4) {
if ((value & mask) === base) return true;
}
return false;
}

export type MppDetection = {
unverified: boolean;
reason: string | null;
};

export function detectAppleMailPrivacyProtection(input: {
userAgent: string | null | undefined;
ip: string | null | undefined;
}): MppDetection {
const ua = input.userAgent ?? "";
if (isApplePrivacyRelayIp(input.ip)) {
return { unverified: true, reason: "apple_private_relay_ip" };
}
if (APPLE_MAIL_UA_PATTERN.test(ua)) {
return { unverified: true, reason: "apple_mail_ua" };
}
return { unverified: false, reason: null };
}

export type BotDetection = {
bot: boolean;
reason: string | null;
};

/**
* Sends from a security gateway frequently fire within hundreds of ms of
* the send completing. Any click that arrives faster than `prefetchMs`
* (default 1500 ms) after `sentAt` is almost certainly a scanner.
*/
const DEFAULT_PREFETCH_MS = 1500;

export function detectBotClick(input: {
userAgent: string | null | undefined;
method: string;
isBot?: boolean | undefined;
sentAt?: Date | string | null | undefined;
now?: Date;
prefetchMs?: number;
}): BotDetection {
const ua = input.userAgent ?? "";
const method = (input.method ?? "GET").toUpperCase();

if (method === "HEAD") {
return { bot: true, reason: "head_request" };
}
if (input.isBot) {
return { bot: true, reason: "ua_isbot" };
}
for (const pattern of HEADLESS_PATTERNS) {
if (pattern.test(ua)) return { bot: true, reason: "headless_ua" };
}
for (const pattern of SCANNER_AND_LIBRARY_PATTERNS) {
if (pattern.test(ua)) return { bot: true, reason: "scanner_ua" };
}
if (input.sentAt) {
const sentAtDate = input.sentAt instanceof Date ? input.sentAt : new Date(input.sentAt);
const now = input.now ?? new Date();
const delta = now.getTime() - sentAtDate.getTime();
const prefetchMs = input.prefetchMs ?? DEFAULT_PREFETCH_MS;
if (Number.isFinite(delta) && delta >= 0 && delta < prefetchMs) {
return { bot: true, reason: "prefetch_window" };
}
}
return { bot: false, reason: null };
}
34 changes: 30 additions & 4 deletions lib/tracking/record-event.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,31 @@ function appUrl() {
return (process.env.NEXT_PUBLIC_APP_URL || process.env.BETTER_AUTH_URL || "http://localhost:3000").replace(/\/$/, "");
}

type CampaignTargetRow = NonNullable<
Awaited<ReturnType<typeof db.query.campaignTargets.findFirst>>
>;

export type EventSuppressionDecision = {
suppress: boolean;
metadataPatch?: Record<string, unknown>;
};

export async function recordTrackingEvent(input: {
token: string;
eventType: EventType;
ipAddress?: string | null;
userAgent?: string | null;
metadata?: Record<string, unknown>;
/**
* Called after the campaign target is loaded but before any writes.
* When the returned `suppress` is true, the event row is still
* inserted (so the bot/MPP hit stays visible for forensics) but
* `campaignTargets.openedAt` / `clickedAt` / etc. are NOT updated and
* the SIEM/SOAR push is skipped. Used by the pixel and click routes
* to keep Apple MPP prefetches and security-gateway scanners out of
* dashboard counts. See `lib/tracking/bot-detection.ts`.
*/
suppressionDecision?: (target: CampaignTargetRow) => EventSuppressionDecision;
}) {
try {
const now = new Date();
Expand All @@ -29,20 +48,27 @@ export async function recordTrackingEvent(input: {
return null;
}

const decision = input.suppressionDecision?.(target) ?? { suppress: false };
const mergedMetadata = {
...(input.metadata ?? {}),
...(decision.metadataPatch ?? {}),
};

const [event] = await db
.insert(events)
.values({
campaignTargetId: target.id,
eventType: input.eventType,
ipAddress: input.ipAddress,
userAgent: input.userAgent,
metadata: input.metadata ?? {},
metadata: mergedMetadata,
createdAt: now,
})
.returning({ id: events.id });

const timestampUpdate =
input.eventType === "opened"
const timestampUpdate = decision.suppress
? null
: input.eventType === "opened"
? { openedAt: target.openedAt ?? now }
: input.eventType === "clicked"
? { clickedAt: target.clickedAt ?? now }
Expand Down Expand Up @@ -83,7 +109,7 @@ export async function recordTrackingEvent(input: {
}
}

if (event) {
if (event && !decision.suppress) {
try {
await enqueueSimulationEventPush(event.id);
} catch (error) {
Expand Down