From 356b51316a627bf23bd873eaa8d6a5f7ba8f0fcb Mon Sep 17 00:00:00 2001 From: lukesalamone Date: Fri, 8 May 2026 17:26:54 -0700 Subject: [PATCH] integrate openai moderation endpoint --- src/worker/comments.ts | 11 ++++++++- src/worker/index.ts | 22 +++++++++++++++++ src/worker/moderation.ts | 52 ++++++++++++++++++++++++++++++++++++++++ wrangler.toml | 3 +++ 4 files changed, 87 insertions(+), 1 deletion(-) diff --git a/src/worker/comments.ts b/src/worker/comments.ts index b80ddf6..74f3c86 100644 --- a/src/worker/comments.ts +++ b/src/worker/comments.ts @@ -7,7 +7,7 @@ import { } from "./identity"; import { slugify } from "./slug"; import { rateLimit, clientIp } from "./ratelimit"; -import { moderateCommentNow } from "./moderation"; +import { moderateCommentNow, openaiModerate } from "./moderation"; export interface CommentsEnv { DB: D1Database; @@ -16,6 +16,7 @@ export interface CommentsEnv { OPENROUTER_MODEL: string; OPENROUTER_MODERATION_MODEL?: string; IDENT_PER_IP_PER_HOUR?: string; + OPENAI_API_KEY?: string; } const COOKIE_NAME = "hu_uid"; @@ -421,6 +422,14 @@ export function createCommentsApp() { return c.json({ error: `comment exceeds ${MAX_BODY_LEN} chars` }, 400); } + // Pre-save moderation + if (c.env.OPENAI_API_KEY) { + const flagged = await openaiModerate(body, c.env.OPENAI_API_KEY); + if (flagged) { + return c.json({ error: "comment was flagged by moderation" }, 400); + } + } + if (parent_id) { const parent = await c.env.DB .prepare("SELECT id, slug FROM comments WHERE id = ?") diff --git a/src/worker/index.ts b/src/worker/index.ts index 42a362b..e267471 100644 --- a/src/worker/index.ts +++ b/src/worker/index.ts @@ -14,9 +14,11 @@ import { isLikelyVpn } from "./vpn"; import { isPermanentlyBlockedSlug } from "./blocklist"; import { loadHints, saveHints } from "./hints"; import { + banSlugNow, countRecentBansByIp, enqueueArticleForModeration, isSlugBanned, + openaiModerate, runSweep, } from "./moderation"; @@ -38,6 +40,9 @@ export interface Env { // Per-IP rate limit for /api/search LLM-backed suggestions. Over the // limit, search still returns DB matches but skips the hallucination call. SEARCH_PER_IP_PER_HOUR?: string; + // Optional: OpenAI API key for synchronous pre-generation moderation. + // Set via: pnpm wrangler secret put OPENAI_API_KEY + OPENAI_API_KEY?: string; } interface StoredArticle { @@ -552,6 +557,23 @@ app.get("/api/page/:slug", async (c) => { const title = slugToTitle(slug); + // Check title against OpenAI Moderation API before spending LLM tokens + if (c.env.OPENAI_API_KEY) { + const flagged = await openaiModerate(title, c.env.OPENAI_API_KEY); + if (flagged) { + c.executionCtx.waitUntil( + banSlugNow(slug, c.env).catch((e) => + console.error("banSlugNow failed", e) + ) + ); + return c.json( + { error: "this entry has been removed by moderation", banned: true }, + 404, + { "x-robots-tag": "noindex" } + ); + } + } + // Pull every prior link-context blurb other articles have written about // this slug. These become CANON the LLM must respect. let priorHints: string[] = []; diff --git a/src/worker/moderation.ts b/src/worker/moderation.ts index df77ae1..40368fe 100644 --- a/src/worker/moderation.ts +++ b/src/worker/moderation.ts @@ -21,6 +21,58 @@ export interface ModerationEnv { OPENROUTER_API_KEY: string; OPENROUTER_MODEL: string; OPENROUTER_MODERATION_MODEL?: string; + OPENAI_API_KEY?: string; +} + +/** + * Call the OpenAI Moderation API and return true if the text was flagged. + * If this fails, the async sweep can still catch stragglers. + */ +export async function openaiModerate( + text: string, + apiKey: string +): Promise { + try { + const res = await fetch("https://api.openai.com/v1/moderations", { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${apiKey}`, + }, + body: JSON.stringify({ model: "omni-moderation-latest", input: text }), + }); + if (!res.ok) return false; + const json: any = await res.json(); + return json?.results?.[0]?.flagged === true; + } catch { + return false; + } +} + +/** + * Immediately mark slug as banned in DB and delete it from KV. + * Used when the pre-check flags a slug before generation starts. + */ +export async function banSlugNow( + slug: string, + env: ModerationEnv +): Promise { + const now = Date.now(); + try { + await env.ARTICLES.delete(slug); + } catch {} + try { + await env.DB + .prepare( + `INSERT INTO article_moderation (slug, status, reason, enqueued_at, checked_at) + VALUES (?, 'banned', ?, ?, ?) + ON CONFLICT(slug) DO UPDATE SET status='banned', reason=excluded.reason, checked_at=excluded.checked_at` + ) + .bind(slug, "openai-moderation-precheck", now, now) + .run(); + } catch (e) { + console.error("banSlugNow: DB write failed", slug, e); + } } const BATCH_SIZE = 30; diff --git a/wrangler.toml b/wrangler.toml index 9f9df8f..4ed621a 100644 --- a/wrangler.toml +++ b/wrangler.toml @@ -46,3 +46,6 @@ SEARCH_PER_IP_PER_HOUR = "15" # OPENROUTER_API_KEY should be set as a secret: # pnpm wrangler secret put OPENROUTER_API_KEY + +# Enable OpenAI Moderation API for slugs and comments +# pnpm wrangler secret put OPENAI_API_KEY