From 7787f794a86f33106bafefe71d36dc31bf503448 Mon Sep 17 00:00:00 2001 From: lynncoleart Date: Thu, 14 May 2026 02:01:07 -0500 Subject: [PATCH 1/8] Filter moderation-blocked slugs from /api/index --- src/worker/index.ts | 33 ++++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/src/worker/index.ts b/src/worker/index.ts index 5326b23..628ed5b 100644 --- a/src/worker/index.ts +++ b/src/worker/index.ts @@ -128,6 +128,32 @@ async function backfillTotal(env: Env): Promise { return count; } +async function filterModeratedIndexItems( + db: D1Database, + items: { slug: string; title: string; generatedAt: number | null }[] +): Promise<{ slug: string; title: string; generatedAt: number | null }[]> { + if (items.length === 0) return items; + + const slugs = items.map((it) => it.slug); + const placeholders = slugs.map(() => "?").join(","); + try { + const { results } = await db + .prepare( + `SELECT slug FROM article_moderation + WHERE status IN ('banned', 'pending', 'checking') + AND slug IN (${placeholders})` + ) + .bind(...slugs) + .all<{ slug: string }>(); + if (!results || results.length === 0) return items; + const blocked = new Set(results.map((r) => r.slug)); + return items.filter((it) => !blocked.has(it.slug)); + } catch (e) { + console.error("index: moderation filter failed", e); + return items; + } +} + app.get("/api/index", async (c) => { const cursorRaw = c.req.query("cursor"); const cursor = cursorRaw && cursorRaw.length > 0 ? cursorRaw : undefined; @@ -148,6 +174,7 @@ app.get("/api/index", async (c) => { title: k.metadata?.title ?? slugToTitle(k.name), generatedAt: k.metadata?.generatedAt ?? null, })); + const filteredItems = await filterModeratedIndexItems(c.env.DB, items); // Total is only computed on the first page request — subsequent paginated // calls don't need it, and it costs an extra KV read (or full sweep). @@ -160,14 +187,14 @@ app.get("/api/index", async (c) => { total = await backfillTotal(c.env); } // If this first page is the entire dataset, opportunistically reconcile. - if (list.list_complete && total !== items.length) { - total = items.length; + if (list.list_complete && total !== filteredItems.length) { + total = filteredItems.length; try { await c.env.ARTICLES.put(TOTAL_KEY, String(total)); } catch {} } } return c.json({ - items, + items: filteredItems, cursor: list.list_complete ? null : (list as any).cursor ?? null, complete: list.list_complete, total, From 5a1259a1e616a479c2a0d7a820a078dc57388c32 Mon Sep 17 00:00:00 2001 From: lynncoleart Date: Thu, 14 May 2026 03:10:04 -0500 Subject: [PATCH 2/8] Harden upvote endpoints against voting bursts --- src/worker/comments.ts | 111 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 110 insertions(+), 1 deletion(-) diff --git a/src/worker/comments.ts b/src/worker/comments.ts index 91a89c9..5484e30 100644 --- a/src/worker/comments.ts +++ b/src/worker/comments.ts @@ -16,6 +16,10 @@ export interface CommentsEnv { OPENROUTER_MODEL: string; OPENROUTER_MODERATION_MODEL?: string; IDENT_PER_IP_PER_HOUR?: string; + ARTICLE_VOTE_PER_USER_PER_HOUR?: string; + ARTICLE_VOTE_PER_IP_PER_HOUR?: string; + ARTICLE_VOTE_PER_SUBNET_PER_HOUR?: string; + ARTICLE_VOTE_PER_USER_PER_MINUTE?: string; } const COOKIE_NAME = "hu_uid"; @@ -24,6 +28,12 @@ const COOKIE_NAME = "hu_uid"; // actually expire. const COOKIE_MAX_AGE = 60 * 60 * 24 * 400; const MAX_BODY_LEN = 2000; +const ARTICLE_VOTE_DEFAULTS = { + userPerHour: 120, + ipPerHour: 240, + subnetPerHour: 600, + userPerMinute: 30, +}; export interface UserRow { id: string; @@ -216,7 +226,7 @@ function rootOrderClause(sort: SortMode): string { return "ORDER BY score DESC, created_at DESC"; case "recommended": default: - return ( + return ( "ORDER BY (sqrt(CAST(score AS REAL)) / " + "pow(((? - created_at) / 3600000.0) + 2.0, 0.8)) DESC, " + "created_at DESC" @@ -224,6 +234,103 @@ function rootOrderClause(sort: SortMode): string { } } +function parsePositiveInt(raw: string | undefined, fallback: number): number { + const n = Number.parseInt(raw || "", 10); + return Number.isFinite(n) && n > 0 ? n : fallback; +} + +function ipToSubnet(ip: string): string { + if (!ip || ip === "unknown") return "unknown"; + if (ip.includes(".")) { + const parts = ip.split("."); + if (parts.length === 4 && parts.every((p) => /^\d+$/.test(p))) { + return `${parts[0]}.${parts[1]}.${parts[2]}.0/24`; + } + } + if (ip.includes(":")) { + const parts = ip.split(":").filter((p) => p.length > 0); + if (parts.length >= 4) { + return `${parts.slice(0, 4).join(":")}::/64`; + } + } + return ip; +} + +async function enforceArticleVoteRateLimits( + c: any, + env: CommentsEnv, + userId: string +): Promise { + const ip = clientIp(c); + const subnet = ipToSubnet(ip); + + const checks = [ + { + scope: "user/hour", + result: rateLimit({ + kv: env.ARTICLES, + bucket: "article-vote", + ip: `user:${userId}`, + limit: parsePositiveInt( + env.ARTICLE_VOTE_PER_USER_PER_HOUR, + ARTICLE_VOTE_DEFAULTS.userPerHour + ), + windowSec: 3600, + }), + }, + { + scope: "ip/hour", + result: rateLimit({ + kv: env.ARTICLES, + bucket: "article-vote", + ip: `ip:${ip}`, + limit: parsePositiveInt( + env.ARTICLE_VOTE_PER_IP_PER_HOUR, + ARTICLE_VOTE_DEFAULTS.ipPerHour + ), + windowSec: 3600, + }), + }, + { + scope: "subnet/hour", + result: rateLimit({ + kv: env.ARTICLES, + bucket: "article-vote", + ip: `subnet:${subnet}`, + limit: parsePositiveInt( + env.ARTICLE_VOTE_PER_SUBNET_PER_HOUR, + ARTICLE_VOTE_DEFAULTS.subnetPerHour + ), + windowSec: 3600, + }), + }, + { + scope: "user/minute", + result: rateLimit({ + kv: env.ARTICLES, + bucket: "article-vote", + ip: `user:${userId}:burst`, + limit: parsePositiveInt( + env.ARTICLE_VOTE_PER_USER_PER_MINUTE, + ARTICLE_VOTE_DEFAULTS.userPerMinute + ), + windowSec: 60, + }), + }, + ]; + + for (const check of checks) { + const result = await check.result; + if (result.ok) continue; + const err: any = new Error( + `vote limit exceeded (${check.scope}), max ${result.limit} per window` + ); + err.status = 429; + err.retryAfter = result.retryAfter; + throw err; + } +} + function compareDTO(sort: SortMode): (a: CommentDTO, b: CommentDTO) => number { if (sort === "newest") return (a, b) => b.created_at - a.created_at; if (sort === "top") @@ -434,6 +541,7 @@ export function createCommentsApp() { let user: UserRow; try { user = await ensureUser(c, c.env); + await enforceArticleVoteRateLimits(c, c.env, user.id); } catch (e: any) { if (e?.status === 429) { return c.json({ error: e.message }, 429, { @@ -498,6 +606,7 @@ export function createCommentsApp() { let user: UserRow; try { user = await ensureUser(c, c.env); + await enforceArticleVoteRateLimits(c, c.env, user.id); } catch (e: any) { if (e?.status === 429) { return c.json({ error: e.message }, 429, { From e4b146677dfaf311fb8cd6a72c97c68466115967 Mon Sep 17 00:00:00 2001 From: lynncoleart Date: Thu, 14 May 2026 03:15:27 -0500 Subject: [PATCH 3/8] Fix remaining PR 15 CodeRabbit findings --- package.json | 1 + pnpm-lock.yaml | 9 +++++ src/worker/comments.ts | 34 ++++++++++++----- src/worker/index.ts | 83 +++++++++++++++++++++++++++++------------- 4 files changed, 92 insertions(+), 35 deletions(-) diff --git a/package.json b/package.json index 8f7920f..7b4aaa1 100644 --- a/package.json +++ b/package.json @@ -14,6 +14,7 @@ }, "dependencies": { "hono": "^4.6.14", + "ipaddr.js": "^2.4.0", "react": "^18.3.1", "react-dom": "^18.3.1" }, diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index bf6fbe6..98061c7 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -11,6 +11,9 @@ importers: hono: specifier: ^4.6.14 version: 4.12.14 + ipaddr.js: + specifier: ^2.4.0 + version: 2.4.0 react: specifier: ^18.3.1 version: 18.3.1 @@ -935,6 +938,10 @@ packages: resolution: {integrity: sha512-am5zfg3yu6sqn5yjKBNqhnTX7Cv+m00ox+7jbaKkrLMRJ4rAdldd1xPd/JzbBWspqaQv6RSTrgFN95EsfhC+7w==} engines: {node: '>=16.9.0'} + ipaddr.js@2.4.0: + resolution: {integrity: sha512-9VGk3HGanVE6JoZXHiCpnGy5X0jYDnN4EA4lntFPj+1vIWlFhIylq2CrrCOJH9EAhc5CYhq18F2Av2tgoAPsYQ==} + engines: {node: '>= 10'} + is-arrayish@0.3.4: resolution: {integrity: sha512-m6UrgzFVUYawGBh1dUsWR5M2Clqic9RVXC/9f8ceNlv2IcO9j9J/z8UoCLPqtsPBFNzEpfR3xftohbfqDx8EQA==} @@ -1938,6 +1945,8 @@ snapshots: hono@4.12.14: {} + ipaddr.js@2.4.0: {} + is-arrayish@0.3.4: optional: true diff --git a/src/worker/comments.ts b/src/worker/comments.ts index 5484e30..7277fb5 100644 --- a/src/worker/comments.ts +++ b/src/worker/comments.ts @@ -6,6 +6,7 @@ import { type Identity, } from "./identity"; import { slugify } from "./slug"; +import ipaddr from "ipaddr.js"; import { rateLimit, clientIp } from "./ratelimit"; import { moderateCommentNow } from "./moderation"; @@ -241,17 +242,31 @@ function parsePositiveInt(raw: string | undefined, fallback: number): number { function ipToSubnet(ip: string): string { if (!ip || ip === "unknown") return "unknown"; - if (ip.includes(".")) { - const parts = ip.split("."); - if (parts.length === 4 && parts.every((p) => /^\d+$/.test(p))) { - return `${parts[0]}.${parts[1]}.${parts[2]}.0/24`; + try { + const parsed = ipaddr.parse(ip); + if (parsed.kind() === "ipv4") { + const bytes = parsed.toByteArray(); + return `${bytes[0]}.${bytes[1]}.${bytes[2]}.0/24`; } - } - if (ip.includes(":")) { - const parts = ip.split(":").filter((p) => p.length > 0); - if (parts.length >= 4) { - return `${parts.slice(0, 4).join(":")}::/64`; + + if (parsed.kind() === "ipv6") { + if (parsed.isIPv4MappedAddress && parsed.isIPv4MappedAddress()) { + const ipv4 = parsed.toIPv4Address(); + const bytes = ipv4.toByteArray(); + return `${bytes[0]}.${bytes[1]}.${bytes[2]}.0/24`; + } + + const bytes = parsed.toByteArray(); + const parts = [ + (bytes[0] << 8) | bytes[1], + (bytes[2] << 8) | bytes[3], + (bytes[4] << 8) | bytes[5], + (bytes[6] << 8) | bytes[7], + ].map((n) => n.toString(16)); + return `${parts[0]}:${parts[1]}:${parts[2]}:${parts[3]}::/64`; } + } catch { + return ip; } return ip; } @@ -739,6 +754,7 @@ export function createCommentsApp() { let user: UserRow; try { user = await ensureUser(c, c.env); + await enforceArticleVoteRateLimits(c, c.env, user.id); } catch (e: any) { if (e?.status === 429) { return c.json({ error: e.message }, 429, { diff --git a/src/worker/index.ts b/src/worker/index.ts index 628ed5b..ea09afb 100644 --- a/src/worker/index.ts +++ b/src/worker/index.ts @@ -17,6 +17,8 @@ import { countRecentBansByIp, enqueueArticleForModeration, isSlugBanned, + isTitleModerationApproved, + topicRejectedMessage, runSweep, } from "./moderation"; @@ -134,7 +136,10 @@ async function filterModeratedIndexItems( ): Promise<{ slug: string; title: string; generatedAt: number | null }[]> { if (items.length === 0) return items; - const slugs = items.map((it) => it.slug); + const safeItems = items.filter((it) => !isPermanentlyBlockedSlug(it.slug)); + if (safeItems.length === 0) return []; + + const slugs = safeItems.map((it) => it.slug); const placeholders = slugs.map(() => "?").join(","); try { const { results } = await db @@ -147,10 +152,10 @@ async function filterModeratedIndexItems( .all<{ slug: string }>(); if (!results || results.length === 0) return items; const blocked = new Set(results.map((r) => r.slug)); - return items.filter((it) => !blocked.has(it.slug)); + return safeItems.filter((it) => !blocked.has(it.slug)); } catch (e) { console.error("index: moderation filter failed", e); - return items; + throw e; } } @@ -174,7 +179,21 @@ app.get("/api/index", async (c) => { title: k.metadata?.title ?? slugToTitle(k.name), generatedAt: k.metadata?.generatedAt ?? null, })); - const filteredItems = await filterModeratedIndexItems(c.env.DB, items); + let filteredItems = items; + try { + filteredItems = await filterModeratedIndexItems(c.env.DB, items); + } catch { + return c.json( + { + error: "index temporarily unavailable", + items: [], + cursor: list.list_complete ? null : ((list as any).cursor ?? null), + complete: false, + total: null, + }, + 503 + ); + } // Total is only computed on the first page request — subsequent paginated // calls don't need it, and it costs an extra KV read (or full sweep). @@ -511,23 +530,8 @@ app.get("/api/page/:slug", async (c) => { ); } - // 3. Per-IP rate limit on generation (defense against UA-spoofing scrapers). + // 3. Resolve caller identity before applying abuse controls. const ip = clientIp(c); - const perHour = parseInt(c.env.GEN_PER_IP_PER_HOUR || "30", 10); - const rl = await rateLimit({ - kv: c.env.ARTICLES, - bucket: "gen", - ip, - limit: perHour, - windowSec: 3600, - }); - if (!rl.ok) { - return c.json( - { error: `slow down — at most ${rl.limit} new entries per hour from one address` }, - 429, - { "retry-after": String(rl.retryAfter), "x-robots-tag": "noindex" } - ); - } // 3b. IP-strike block. If this IP has had too many articles banned // recently, refuse before any LLM call. This is what stops a botnet @@ -553,6 +557,39 @@ app.get("/api/page/:slug", async (c) => { ); } + if (!c.env.OPENROUTER_API_KEY) { + return c.json({ error: "OPENROUTER_API_KEY is not configured" }, 500); + } + + const title = slugToTitle(slug); + + // 4. Pre-generation content policy check via moderation model. + const approvedByPolicy = await isTitleModerationApproved(title, c.env); + if (!approvedByPolicy) { + return c.json( + { error: topicRejectedMessage(), banned: true }, + 403, + { "x-robots-tag": "noindex" } + ); + } + + // 5. Per-IP rate limit on generation (defense against UA-spoofing scrapers). + const perHour = parseInt(c.env.GEN_PER_IP_PER_HOUR || "30", 10); + const rl = await rateLimit({ + kv: c.env.ARTICLES, + bucket: "gen", + ip, + limit: perHour, + windowSec: 3600, + }); + if (!rl.ok) { + return c.json( + { error: `slow down — at most ${rl.limit} new entries per hour from one address` }, + 429, + { "retry-after": String(rl.retryAfter), "x-robots-tag": "noindex" } + ); + } + // 4. Daily soft cap (per-namespace counter). const today = new Date().toISOString().slice(0, 10); const counterKey = `__counter:${today}`; @@ -563,10 +600,6 @@ app.get("/api/page/:slug", async (c) => { return c.json({ error: "daily generation cap reached; try again tomorrow" }, 503); } - if (!c.env.OPENROUTER_API_KEY) { - return c.json({ error: "OPENROUTER_API_KEY is not configured" }, 500); - } - // 3. Fetch source context if `from` is present. let sourceContext: GenerateOptions["sourceContext"] = null; if (fromSlug) { @@ -579,8 +612,6 @@ app.get("/api/page/:slug", async (c) => { } } - const title = slugToTitle(slug); - // Pull every prior link-context blurb other articles have written about // this slug. These become CANON the LLM must respect. let priorHints: string[] = []; From 4397aa037305cf6e54515d4be5a99e53ba797d87 Mon Sep 17 00:00:00 2001 From: lynncoleart Date: Thu, 14 May 2026 03:36:39 -0500 Subject: [PATCH 4/8] Return safeItems in /api/index empty moderation result path --- src/worker/index.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/worker/index.ts b/src/worker/index.ts index ea09afb..f699367 100644 --- a/src/worker/index.ts +++ b/src/worker/index.ts @@ -150,7 +150,7 @@ async function filterModeratedIndexItems( ) .bind(...slugs) .all<{ slug: string }>(); - if (!results || results.length === 0) return items; + if (!results || results.length === 0) return safeItems; const blocked = new Set(results.map((r) => r.slug)); return safeItems.filter((it) => !blocked.has(it.slug)); } catch (e) { From f123787f47e35c359d4b347464b41c0c50f87d09 Mon Sep 17 00:00:00 2001 From: lynncoleart Date: Thu, 14 May 2026 04:25:41 -0500 Subject: [PATCH 5/8] fix: separate comment creation limiter and reorder moderation checks --- README.md | 2 + src/worker/comments.ts | 148 +++++++++++++++++++++++++++++------------ src/worker/index.ts | 22 +++--- wrangler.toml | 2 + 4 files changed, 119 insertions(+), 55 deletions(-) diff --git a/README.md b/README.md index 2824c96..1ce5485 100644 --- a/README.md +++ b/README.md @@ -253,6 +253,8 @@ Defined in `wrangler.toml` under `[vars]`: | `MAX_ARTICLES_PER_DAY` | `5000` | Global circuit breaker — soft cap per UTC day | | `GEN_PER_IP_PER_HOUR` | `100` | Per-IP article generation budget | | `IDENT_PER_IP_PER_HOUR` | `10` | Per-IP cap on minting new commenter identities | +| `COMMENT_PER_IP_PER_HOUR` | `90` | Per-IP comment creation budget | +| `COMMENT_PER_IP_PER_MINUTE` | `15` | Per-IP comment burst budget | Secrets (set via `wrangler secret put`): diff --git a/src/worker/comments.ts b/src/worker/comments.ts index 7277fb5..00dd715 100644 --- a/src/worker/comments.ts +++ b/src/worker/comments.ts @@ -21,6 +21,8 @@ export interface CommentsEnv { ARTICLE_VOTE_PER_IP_PER_HOUR?: string; ARTICLE_VOTE_PER_SUBNET_PER_HOUR?: string; ARTICLE_VOTE_PER_USER_PER_MINUTE?: string; + COMMENT_PER_IP_PER_HOUR?: string; + COMMENT_PER_IP_PER_MINUTE?: string; } const COOKIE_NAME = "hu_uid"; @@ -36,6 +38,11 @@ const ARTICLE_VOTE_DEFAULTS = { userPerMinute: 30, }; +const COMMENT_CREATION_DEFAULTS = { + ipPerHour: 90, + ipPerMinute: 15, +}; + export interface UserRow { id: string; name: string; @@ -282,63 +289,116 @@ async function enforceArticleVoteRateLimits( const checks = [ { scope: "user/hour", - result: rateLimit({ - kv: env.ARTICLES, - bucket: "article-vote", - ip: `user:${userId}`, - limit: parsePositiveInt( - env.ARTICLE_VOTE_PER_USER_PER_HOUR, - ARTICLE_VOTE_DEFAULTS.userPerHour - ), - windowSec: 3600, - }), + run: () => + rateLimit({ + kv: env.ARTICLES, + bucket: "article-vote", + ip: `user:${userId}`, + limit: parsePositiveInt( + env.ARTICLE_VOTE_PER_USER_PER_HOUR, + ARTICLE_VOTE_DEFAULTS.userPerHour + ), + windowSec: 3600, + }), }, { scope: "ip/hour", - result: rateLimit({ - kv: env.ARTICLES, - bucket: "article-vote", - ip: `ip:${ip}`, - limit: parsePositiveInt( - env.ARTICLE_VOTE_PER_IP_PER_HOUR, - ARTICLE_VOTE_DEFAULTS.ipPerHour - ), - windowSec: 3600, - }), + run: () => + rateLimit({ + kv: env.ARTICLES, + bucket: "article-vote", + ip: `ip:${ip}`, + limit: parsePositiveInt( + env.ARTICLE_VOTE_PER_IP_PER_HOUR, + ARTICLE_VOTE_DEFAULTS.ipPerHour + ), + windowSec: 3600, + }), }, { scope: "subnet/hour", - result: rateLimit({ - kv: env.ARTICLES, - bucket: "article-vote", - ip: `subnet:${subnet}`, - limit: parsePositiveInt( - env.ARTICLE_VOTE_PER_SUBNET_PER_HOUR, - ARTICLE_VOTE_DEFAULTS.subnetPerHour - ), - windowSec: 3600, - }), + run: () => + rateLimit({ + kv: env.ARTICLES, + bucket: "article-vote", + ip: `subnet:${subnet}`, + limit: parsePositiveInt( + env.ARTICLE_VOTE_PER_SUBNET_PER_HOUR, + ARTICLE_VOTE_DEFAULTS.subnetPerHour + ), + windowSec: 3600, + }), }, { scope: "user/minute", - result: rateLimit({ - kv: env.ARTICLES, - bucket: "article-vote", - ip: `user:${userId}:burst`, - limit: parsePositiveInt( - env.ARTICLE_VOTE_PER_USER_PER_MINUTE, - ARTICLE_VOTE_DEFAULTS.userPerMinute - ), - windowSec: 60, - }), + run: () => + rateLimit({ + kv: env.ARTICLES, + bucket: "article-vote", + ip: `user:${userId}:burst`, + limit: parsePositiveInt( + env.ARTICLE_VOTE_PER_USER_PER_MINUTE, + ARTICLE_VOTE_DEFAULTS.userPerMinute + ), + windowSec: 60, + }), }, ]; - for (const check of checks) { - const result = await check.result; + for (const { scope, run } of checks) { + const result = await run(); if (result.ok) continue; const err: any = new Error( - `vote limit exceeded (${check.scope}), max ${result.limit} per window` + `vote limit exceeded (${scope}), max ${result.limit} per window` + ); + err.status = 429; + err.retryAfter = result.retryAfter; + throw err; + } +} + +async function enforceCommentCreationRateLimits( + c: any, + env: CommentsEnv +): Promise { + const ip = clientIp(c); + + const checks = [ + { + scope: "ip/hour", + run: () => + rateLimit({ + kv: env.ARTICLES, + bucket: "comment-create", + ip: `ip:${ip}`, + limit: parsePositiveInt( + env.COMMENT_PER_IP_PER_HOUR, + COMMENT_CREATION_DEFAULTS.ipPerHour + ), + windowSec: 3600, + }), + }, + { + scope: "ip/minute", + run: () => + rateLimit({ + kv: env.ARTICLES, + bucket: "comment-create", + ip: `ip:${ip}:burst`, + limit: parsePositiveInt( + env.COMMENT_PER_IP_PER_MINUTE, + COMMENT_CREATION_DEFAULTS.ipPerMinute + ), + windowSec: 60, + }), + }, + ]; + + for (const { scope, run } of checks) { + const result = await run(); + if (result.ok) continue; + const err: any = new Error( + `comment limit exceeded (${scope}), max ${result.limit} per window` ); err.status = 429; err.retryAfter = result.retryAfter; @@ -555,8 +615,8 @@ export function createCommentsApp() { let user: UserRow; try { + await enforceCommentCreationRateLimits(c, c.env); user = await ensureUser(c, c.env); - await enforceArticleVoteRateLimits(c, c.env, user.id); } catch (e: any) { if (e?.status === 429) { return c.json({ error: e.message }, 429, { diff --git a/src/worker/index.ts b/src/worker/index.ts index f699367..18a85c0 100644 --- a/src/worker/index.ts +++ b/src/worker/index.ts @@ -563,16 +563,6 @@ app.get("/api/page/:slug", async (c) => { const title = slugToTitle(slug); - // 4. Pre-generation content policy check via moderation model. - const approvedByPolicy = await isTitleModerationApproved(title, c.env); - if (!approvedByPolicy) { - return c.json( - { error: topicRejectedMessage(), banned: true }, - 403, - { "x-robots-tag": "noindex" } - ); - } - // 5. Per-IP rate limit on generation (defense against UA-spoofing scrapers). const perHour = parseInt(c.env.GEN_PER_IP_PER_HOUR || "30", 10); const rl = await rateLimit({ @@ -590,7 +580,17 @@ app.get("/api/page/:slug", async (c) => { ); } - // 4. Daily soft cap (per-namespace counter). + // 6. Pre-generation content policy check via moderation model. + const approvedByPolicy = await isTitleModerationApproved(title, c.env); + if (!approvedByPolicy) { + return c.json( + { error: topicRejectedMessage(), banned: true }, + 403, + { "x-robots-tag": "noindex" } + ); + } + + // 7. Daily soft cap (per-namespace counter). const today = new Date().toISOString().slice(0, 10); const counterKey = `__counter:${today}`; const countStr = await c.env.ARTICLES.get(counterKey); diff --git a/wrangler.toml b/wrangler.toml index 2bc1686..7c74f0b 100644 --- a/wrangler.toml +++ b/wrangler.toml @@ -43,6 +43,8 @@ MAX_ARTICLES_PER_DAY = "5000" GEN_PER_IP_PER_HOUR = "100" IDENT_PER_IP_PER_HOUR = "10" SEARCH_PER_IP_PER_HOUR = "15" +COMMENT_PER_IP_PER_HOUR = "90" +COMMENT_PER_IP_PER_MINUTE = "15" # OPENROUTER_API_KEY should be set as a secret: # pnpm wrangler secret put OPENROUTER_API_KEY From c8c851d566f7b0fa3548ee9a355b4d1f3b86b6da Mon Sep 17 00:00:00 2001 From: lynncoleart Date: Thu, 14 May 2026 05:43:09 -0500 Subject: [PATCH 6/8] reorder comment creation limiter after identity check --- src/worker/comments.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/worker/comments.ts b/src/worker/comments.ts index 00dd715..da19e9c 100644 --- a/src/worker/comments.ts +++ b/src/worker/comments.ts @@ -615,8 +615,8 @@ export function createCommentsApp() { let user: UserRow; try { - await enforceCommentCreationRateLimits(c, c.env); user = await ensureUser(c, c.env); + await enforceCommentCreationRateLimits(c, c.env); } catch (e: any) { if (e?.status === 429) { return c.json({ error: e.message }, 429, { From c992702d1ead19b4a8ffa6f8bc6fa1125281b46a Mon Sep 17 00:00:00 2001 From: lynncoleart Date: Sat, 16 May 2026 13:16:21 -0500 Subject: [PATCH 7/8] Add configurable LLM chat endpoint --- src/worker/comments.ts | 2 ++ src/worker/identity.ts | 10 +++++++++- src/worker/index.ts | 3 +++ src/worker/llm.ts | 19 ++++++++++++++----- src/worker/moderation.ts | 10 +++++++++- wrangler.toml | 4 ++++ 6 files changed, 41 insertions(+), 7 deletions(-) diff --git a/src/worker/comments.ts b/src/worker/comments.ts index a169e1d..6ba9ac3 100644 --- a/src/worker/comments.ts +++ b/src/worker/comments.ts @@ -15,6 +15,7 @@ export interface CommentsEnv { DB: D1Database; ARTICLES: KVNamespace; OPENROUTER_API_KEY: string; + LLM_API_URL?: string; OPENROUTER_MODEL: string; OPENROUTER_MODERATION_MODEL?: string; IDENT_PER_IP_PER_HOUR?: string; @@ -153,6 +154,7 @@ async function ensureUser( try { identity = await hallucinateIdentity( env.OPENROUTER_API_KEY, + env.LLM_API_URL, env.OPENROUTER_MODEL || "google/gemini-2.5-flash-lite" ); } catch { diff --git a/src/worker/identity.ts b/src/worker/identity.ts index b7748d3..ce694cf 100644 --- a/src/worker/identity.ts +++ b/src/worker/identity.ts @@ -20,6 +20,7 @@ export interface Identity { export async function hallucinateIdentity( apiKey: string, + apiUrl: string | undefined, model: string ): Promise { const body = { @@ -33,7 +34,7 @@ export async function hallucinateIdentity( ], }; - const res = await fetch("https://openrouter.ai/api/v1/chat/completions", { + const res = await fetch(chatCompletionsUrl(apiUrl), { method: "POST", headers: { "Content-Type": "application/json", @@ -50,6 +51,13 @@ export async function hallucinateIdentity( return parseIdentity(raw); } +function chatCompletionsUrl(apiUrl: string | undefined): string { + const trimmed = apiUrl?.trim(); + return trimmed && trimmed.length > 0 + ? trimmed + : "https://openrouter.ai/api/v1/chat/completions"; +} + export function parseIdentity(raw: string): Identity { const cleaned = raw .trim() diff --git a/src/worker/index.ts b/src/worker/index.ts index 63b854a..2c91fc3 100644 --- a/src/worker/index.ts +++ b/src/worker/index.ts @@ -34,6 +34,7 @@ export interface Env { DB: D1Database; ASSETS: Fetcher; OPENROUTER_API_KEY: string; + LLM_API_URL?: string; OPENROUTER_MODEL: string; OPENROUTER_MODERATION_MODEL?: string; MAX_ARTICLES_PER_DAY: string; @@ -426,6 +427,7 @@ app.get("/api/search", async (c) => { // leaves us with enough. const titles = await hallucinateSearchTitles( c.env.OPENROUTER_API_KEY, + c.env.LLM_API_URL, c.env.OPENROUTER_MODERATION_MODEL || c.env.OPENROUTER_MODEL || "google/gemini-2.5-flash-lite", @@ -681,6 +683,7 @@ app.get("/api/page/:slug", async (c) => { const genOpts: GenerateOptions = { apiKey: c.env.OPENROUTER_API_KEY, + apiUrl: c.env.LLM_API_URL, model: c.env.OPENROUTER_MODEL || "google/gemini-2.0-flash-001", title, slug, diff --git a/src/worker/llm.ts b/src/worker/llm.ts index a808468..d59eaa6 100644 --- a/src/worker/llm.ts +++ b/src/worker/llm.ts @@ -17,6 +17,7 @@ RULES: export interface GenerateOptions { apiKey: string; + apiUrl?: string; model: string; title: string; slug: string; @@ -74,7 +75,7 @@ export async function streamGeneration(opts: GenerateOptions): Promise ""); - throw new Error(`OpenRouter error ${res.status}: ${errText.slice(0, 300)}`); + throw new Error(`LLM provider error ${res.status}: ${errText.slice(0, 300)}`); } const decoder = new TextDecoder(); @@ -154,6 +155,7 @@ Reply with ONLY a JSON array of N strings. No prose, no code fences, no explanat * fewer if the model misbehaves. Never throws. */ export async function hallucinateSearchTitles( apiKey: string, + apiUrl: string | undefined, model: string, query: string, count: number @@ -161,7 +163,7 @@ export async function hallucinateSearchTitles( const userMsg = `Search query: "${query}"\n\nReturn a JSON array of exactly ${count} plausible Halupedia titles inspired by this query. No commentary.`; let raw = ""; try { - const res = await fetch("https://openrouter.ai/api/v1/chat/completions", { + const res = await fetch(chatCompletionsUrl(apiUrl), { method: "POST", headers: { "Content-Type": "application/json", @@ -205,7 +207,7 @@ export async function hallucinateSearchTitles( * Non-streaming fallback (used for retry on malformed output). */ export async function generateOnce(opts: GenerateOptions): Promise { - const res = await fetch("https://openrouter.ai/api/v1/chat/completions", { + const res = await fetch(chatCompletionsUrl(opts.apiUrl), { method: "POST", headers: { "Content-Type": "application/json", @@ -224,7 +226,14 @@ export async function generateOnce(opts: GenerateOptions): Promise { ], }), }); - if (!res.ok) throw new Error(`OpenRouter error ${res.status}`); + if (!res.ok) throw new Error(`LLM provider error ${res.status}`); const json: any = await res.json(); return json?.choices?.[0]?.message?.content ?? ""; } + +function chatCompletionsUrl(apiUrl: string | undefined): string { + const trimmed = apiUrl?.trim(); + return trimmed && trimmed.length > 0 + ? trimmed + : "https://openrouter.ai/api/v1/chat/completions"; +} diff --git a/src/worker/moderation.ts b/src/worker/moderation.ts index fc11f7b..1234553 100644 --- a/src/worker/moderation.ts +++ b/src/worker/moderation.ts @@ -19,6 +19,7 @@ export interface ModerationEnv { DB: D1Database; ARTICLES: KVNamespace; OPENROUTER_API_KEY: string; + LLM_API_URL?: string; OPENROUTER_MODEL: string; OPENROUTER_MODERATION_MODEL?: string; } @@ -184,7 +185,7 @@ async function judgeBatch( let raw = ""; try { - const res = await fetch("https://openrouter.ai/api/v1/chat/completions", { + const res = await fetch(chatCompletionsUrl(env.LLM_API_URL), { method: "POST", headers: { "Content-Type": "application/json", @@ -233,6 +234,13 @@ async function judgeBatch( return out; } +function chatCompletionsUrl(apiUrl: string | undefined): string { + const trimmed = apiUrl?.trim(); + return trimmed && trimmed.length > 0 + ? trimmed + : "https://openrouter.ai/api/v1/chat/completions"; +} + /* -------------------------------------------------------------------------- */ /* Per-write enqueue helpers */ /* -------------------------------------------------------------------------- */ diff --git a/wrangler.toml b/wrangler.toml index ed30520..acc16ee 100644 --- a/wrangler.toml +++ b/wrangler.toml @@ -37,6 +37,10 @@ migrations_dir = "migrations" [vars] OPENROUTER_MODEL = "google/gemini-2.5-flash-lite" +# Optional OpenAI-compatible chat completions endpoint override. Leave unset +# for OpenRouter; set locally in .dev.vars to use z.ai or another compatible +# provider without changing code. +# LLM_API_URL = "https://api.z.ai/api/coding/paas/v4/chat/completions" MAX_ARTICLES_PER_DAY = "5000" # Per-IP guards against UA-spoofing scrapers. Bypassable via IP rotation, # but the global daily cap above is the hard backstop. From cfb2a51a6b4bcb59c66f89ad064c3861ef39f8d6 Mon Sep 17 00:00:00 2001 From: lynncoleart Date: Sat, 16 May 2026 13:31:17 -0500 Subject: [PATCH 8/8] Add deterministic abuse moderation floor --- src/worker/abuse.ts | 171 +++++++++++++++++++++++++++++++++++++++ src/worker/blocklist.ts | 3 + src/worker/moderation.ts | 19 ++++- 3 files changed, 189 insertions(+), 4 deletions(-) create mode 100644 src/worker/abuse.ts diff --git a/src/worker/abuse.ts b/src/worker/abuse.ts new file mode 100644 index 0000000..27a04a5 --- /dev/null +++ b/src/worker/abuse.ts @@ -0,0 +1,171 @@ +/** + * Deterministic abuse floor for cases that should not depend on model + * judgment. The LLM moderation prompt handles nuance; this catches obvious + * slurs, protected-class hate, genocidal/Nazi promotion, and sexual-minor + * abuse even when the configured provider is too permissive. + */ + +const DIRECT_SLURS = [ + "nigger", + "nigga", + "kike", + "faggot", + "fag", + "tranny", + "chink", + "gook", + "spic", + "wetback", + "coon", + "raghead", + "towelhead", + "sandnigger", + "zipperhead", + "retard", + "mongoloid", +]; + +const PROTECTED_GROUPS = [ + "jew", + "jews", + "jewish", + "muslim", + "muslims", + "islam", + "islamic", + "islamist", + "islamists", + "christian", + "christians", + "black", + "blacks", + "asian", + "asians", + "arab", + "arabs", + "mexican", + "mexicans", + "immigrant", + "immigrants", + "gay", + "gays", + "lesbian", + "lesbians", + "trans", + "transgender", + "women", + "woman", + "disabled", +]; + +const HATE_ACTIONS = [ + "kill", + "murder", + "rape", + "hang", + "lynch", + "gas", + "exterminate", + "eradicate", + "enslave", + "sterilize", + "castrate", + "deport", + "banish", +]; + +const HATE_DESCRIPTORS = [ + "vermin", + "rats", + "parasites", + "cockroaches", + "subhuman", + "animals", + "rapists", + "pedophiles", + "terrorists", + "disease", + "plague", + "filth", + "scum", + "degenerate", + "degenerates", + "inferior", +]; + +const PROFANE_TARGETING = [ + "fuck", + "suck", + "sucks", + "hate", + "destroy", +]; + +const SEXUAL_MINOR_PATTERNS = [ + /(?:^|-)child(?:ren)?-(?:porn|sex|rape|rapist|molest|abuse)(?:-|$)/, + /(?:^|-)(?:porn|sex|rape|molest|abuse)-child(?:ren)?(?:-|$)/, + /(?:^|-)minor(?:s)?-(?:porn|sex|rape|molest|abuse)(?:-|$)/, + /(?:^|-)(?:cp|loli|lolicon)(?:-|$)/, +]; + +const GENOCIDE_PATTERNS = [ + /(?:^|-)heil-hitler(?:-|$)/, + /(?:^|-)white-power(?:-|$)/, + /(?:^|-)white-supremacy(?:-|$)/, + /(?:^|-)aryan-(?:race|nation|supremacy)(?:-|$)/, + /(?:^|-)gas-the-[a-z0-9-]+/, + /(?:^|-)holocaust-(?:hoax|fake|denial)(?:-|$)/, + /(?:^|-)hitler-did-nothing-wrong(?:-|$)/, + /(?:^|-)(?:kill|exterminate|eradicate|gas)-all-[a-z0-9-]+/, +]; + +export function containsDeterministicDisallowedAbuse(text: string): boolean { + const normalized = normalizeForAbuseScan(text); + if (!normalized) return false; + + for (const term of DIRECT_SLURS) { + if (hasToken(normalized, term)) return true; + } + + for (const pattern of SEXUAL_MINOR_PATTERNS) { + if (pattern.test(normalized)) return true; + } + + for (const pattern of GENOCIDE_PATTERNS) { + if (pattern.test(normalized)) return true; + } + + const targetsProtectedGroup = PROTECTED_GROUPS.some((term) => + hasToken(normalized, term) + ); + if (!targetsProtectedGroup) return false; + + return ( + HATE_ACTIONS.some((term) => hasToken(normalized, term)) || + HATE_DESCRIPTORS.some((term) => hasToken(normalized, term)) || + PROFANE_TARGETING.some((term) => hasToken(normalized, term)) + ); +} + +function normalizeForAbuseScan(text: string): string { + return text + .toLowerCase() + .normalize("NFKD") + .replace(/[\u0300-\u036f]/g, "") + .replace(/0/g, "o") + .replace(/1/g, "i") + .replace(/3/g, "e") + .replace(/4/g, "a") + .replace(/5/g, "s") + .replace(/7/g, "t") + .replace(/@/g, "a") + .replace(/\$/g, "s") + .replace(/[^a-z0-9]+/g, "-") + .replace(/-+/g, "-") + .replace(/^-|-$/g, ""); +} + +function hasToken(normalized: string, token: string): boolean { + const escaped = token.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + return new RegExp(`(?:^|-)${escaped}(?:s|es)?(?:-|$)`).test(normalized); +} diff --git a/src/worker/blocklist.ts b/src/worker/blocklist.ts index 76e6e7e..f2a8da0 100644 --- a/src/worker/blocklist.ts +++ b/src/worker/blocklist.ts @@ -1,3 +1,5 @@ +import { containsDeterministicDisallowedAbuse } from "./abuse"; + /** * Permanent slug-pattern blocklist. Slugs matching any rule below are * refused before generation, before cache lookup, before anything. They @@ -12,6 +14,7 @@ */ export function isPermanentlyBlockedSlug(slug: string): boolean { slug = slug.toLowerCase(); + if (containsDeterministicDisallowedAbuse(slug)) return true; if (slug.startsWith("0-0")) return true; if (slug.includes("0-0-0")) return true; if (slug.includes("strama")) return true; diff --git a/src/worker/moderation.ts b/src/worker/moderation.ts index 1234553..90a6bc8 100644 --- a/src/worker/moderation.ts +++ b/src/worker/moderation.ts @@ -1,3 +1,5 @@ +import { containsDeterministicDisallowedAbuse } from "./abuse"; + /** * Content moderation. Two trigger paths: * @@ -91,6 +93,7 @@ export async function isTitleModerationApproved( title: string, env: ModerationEnv ): Promise { + if (containsDeterministicDisallowedAbuse(title)) return false; const rejected = await judgeBatch( [{ index: 1, text: title }], "article title", @@ -139,6 +142,7 @@ const ENGAGEMENT_BAIT_PHRASES = [ "informative article", ]; export function isObviousCommentSpam(body: string): boolean { + if (containsDeterministicDisallowedAbuse(body)) return true; const trimmed = body.trim(); if (SPAM_FINGERPRINT.test(trimmed)) return true; // Strip emojis/punctuation and check if what remains is ONLY a bait phrase. @@ -172,11 +176,19 @@ async function judgeBatch( ): Promise> { if (items.length === 0) return new Set(); - const numbered = items + const out = new Set(); + for (const item of items) { + if (containsDeterministicDisallowedAbuse(item.text)) out.add(item.index); + } + + const modelItems = items.filter((item) => !out.has(item.index)); + if (modelItems.length === 0) return out; + + const numbered = modelItems .map((it) => `${it.index}. ${it.text.replace(/\s+/g, " ").slice(0, 500)}`) .join("\n"); - const userMsg = `Review the following ${items.length} ${kind}${items.length === 1 ? "" : "s"} and return the JSON array of 1-based indices to remove (or [] if all are acceptable):\n\n${numbered}`; + const userMsg = `Review the following ${modelItems.length} ${kind}${modelItems.length === 1 ? "" : "s"} and return the JSON array of 1-based indices to remove (or [] if all are acceptable):\n\n${numbered}`; const model = env.OPENROUTER_MODERATION_MODEL || @@ -225,8 +237,7 @@ async function judgeBatch( } if (!Array.isArray(arr)) return new Set(); - const valid = new Set(items.map((it) => it.index)); - const out = new Set(); + const valid = new Set(modelItems.map((it) => it.index)); for (const v of arr) { const n = typeof v === "number" ? v : parseInt(String(v), 10); if (Number.isFinite(n) && valid.has(n)) out.add(n);