From 41eb05da82d9d3ac1d35eee1b85ff2d9b48dad9e Mon Sep 17 00:00:00 2001 From: vladzr Date: Mon, 20 Apr 2026 20:25:41 +0800 Subject: [PATCH 1/2] fix: vector-search function --- package.json | 2 - pages/api/vector-search.ts | 134 ++++++++++++------------------------- pnpm-lock.yaml | 26 ------- 3 files changed, 42 insertions(+), 120 deletions(-) diff --git a/package.json b/package.json index 5c5391a0..52feb771 100644 --- a/package.json +++ b/package.json @@ -42,12 +42,10 @@ "@vercel/analytics": "^1.0.2", "ai": "^2.2.14", "common-tags": "^1.8.2", - "gpt3-tokenizer": "^1.1.5", "next": "14.2.35", "nextra": "^3.2.3", "nextra-theme-docs": "^3.2.3", "openai": "^4.93.0", - "openai-edge": "^1.2.2", "react": "^19.1.0", "react-dom": "^19.1.0", "react-markdown": "^9.0.0", diff --git a/pages/api/vector-search.ts b/pages/api/vector-search.ts index 6024b73c..3ad6f937 100644 --- a/pages/api/vector-search.ts +++ b/pages/api/vector-search.ts @@ -1,42 +1,25 @@ import { createClient } from '@supabase/supabase-js'; -import { OpenAIStream, StreamingTextResponse } from 'ai'; import { codeBlock, oneLine } from 'common-tags'; -import GPT3Tokenizer from 'gpt3-tokenizer'; import type { NextRequest } from 'next/server'; -import { - ChatCompletionRequestMessage, - Configuration, - CreateEmbeddingResponse, - CreateModerationResponse, - OpenAIApi, -} from 'openai-edge'; +import OpenAI from 'openai'; import { ApplicationError, UserError } from '../../lib/errors'; -const openAiKey = process.env.OPENAI_KEY; -const supabaseUrl = process.env.SUPABASE_URL; -const supabaseServiceKey = process.env.SUPABASE_SERVICE_ROLE_KEY; - -const config = new Configuration({ - apiKey: openAiKey, -}); -const openai = new OpenAIApi(config); - export const runtime = 'edge'; export default async function handler(req: NextRequest) { try { + const openAiKey = process.env.OPENAI_KEY; + const supabaseUrl = process.env.SUPABASE_URL; + const supabaseServiceKey = process.env.SUPABASE_SERVICE_ROLE_KEY; + if (!openAiKey) { throw new ApplicationError('Missing environment variable OPENAI_KEY'); } - if (!supabaseUrl) { throw new ApplicationError('Missing environment variable SUPABASE_URL'); } - if (!supabaseServiceKey) { - throw new ApplicationError( - 'Missing environment variable SUPABASE_SERVICE_ROLE_KEY', - ); + throw new ApplicationError('Missing environment variable SUPABASE_SERVICE_ROLE_KEY'); } const requestData = await req.json(); @@ -51,39 +34,30 @@ export default async function handler(req: NextRequest) { throw new UserError('Missing query in request data'); } + const openai = new OpenAI({ apiKey: openAiKey }); const supabaseClient = createClient(supabaseUrl, supabaseServiceKey); - // Moderate the content to comply with OpenAI T&C const sanitizedQuery = query.trim(); - const moderationResponse: CreateModerationResponse = await openai - .createModeration({ input: sanitizedQuery }) - .then((res) => res.json()); - const [results] = moderationResponse.results; + // Moderate the content to comply with OpenAI T&C + const moderation = await openai.moderations.create({ input: sanitizedQuery }); + const [moderationResult] = moderation.results; - if (results.flagged) { + if (moderationResult.flagged) { throw new UserError('Flagged content', { flagged: true, - categories: results.categories, + categories: moderationResult.categories, }); } - // Create embedding from query - const embeddingResponse = await openai.createEmbedding({ - model: 'text-embedding-ada-002', + // Create embedding from query. + // Must use the same model as generate-embeddings.ts (text-embedding-3-small). + const embeddingResponse = await openai.embeddings.create({ + model: 'text-embedding-3-small', input: sanitizedQuery.replaceAll('\n', ' '), }); - if (embeddingResponse.status !== 200) { - throw new ApplicationError( - 'Failed to create embedding for question', - embeddingResponse, - ); - } - - const { - data: [{ embedding }], - }: CreateEmbeddingResponse = await embeddingResponse.json(); + const [{ embedding }] = embeddingResponse.data; const { error: matchError, data: pageSections } = await supabaseClient.rpc( 'match_page_sections', @@ -99,21 +73,12 @@ export default async function handler(req: NextRequest) { throw new ApplicationError('Failed to match page sections', matchError); } - const tokenizer = new GPT3Tokenizer({ type: 'gpt3' }); - let tokenCount = 0; + // Build context string, capped at ~6 000 chars (≈1 500 tokens). + // Avoids a tokenizer package that would exceed the edge function bundle limit. let contextText = ''; - - for (let i = 0; i < pageSections.length; i++) { - const pageSection = pageSections[i]; - const content = pageSection.content; - const encoded = tokenizer.encode(content); - tokenCount += encoded.text.length; - - if (tokenCount >= 1500) { - break; - } - - contextText += `${content.trim()}\n---\n`; + for (const section of pageSections) { + if (contextText.length + section.content.length > 6000) break; + contextText += `${section.content.trim()}\n---\n`; } const prompt = codeBlock` @@ -136,59 +101,44 @@ export default async function handler(req: NextRequest) { Answer as markdown (including related code snippets if available): `; - const chatMessage: ChatCompletionRequestMessage = { - role: 'user', - content: prompt, - }; - - const response = await openai.createChatCompletion({ + const completion = await openai.chat.completions.create({ model: 'gpt-3.5-turbo', - messages: [chatMessage], + messages: [{ role: 'user', content: prompt }], max_tokens: 512, temperature: 0, stream: true, }); - if (!response.ok) { - const error = await response.json(); - throw new ApplicationError('Failed to generate completion', error); - } - - // Transform the response into a readable stream - const stream = OpenAIStream(response); + // Stream plain text back. Compatible with useCompletion from the ai package. + const encoder = new TextEncoder(); + const stream = new ReadableStream({ + async start(controller) { + for await (const chunk of completion) { + const text = chunk.choices[0]?.delta?.content ?? ''; + if (text) controller.enqueue(encoder.encode(text)); + } + controller.close(); + }, + }); - // Return a StreamingTextResponse, which can be consumed by the client - return new StreamingTextResponse(stream); + return new Response(stream, { + headers: { 'Content-Type': 'text/plain; charset=utf-8' }, + }); } catch (err: unknown) { if (err instanceof UserError) { return new Response( - JSON.stringify({ - error: err.message, - data: err.data, - }), - { - status: 400, - headers: { 'Content-Type': 'application/json' }, - }, + JSON.stringify({ error: err.message, data: err.data }), + { status: 400, headers: { 'Content-Type': 'application/json' } }, ); } if (err instanceof ApplicationError) { - // Print out application errors with their additional data console.error(`${err.message}: ${JSON.stringify(err.data)}`); } else { - // Print out unexpected errors as is to help with debugging console.error(err); } - - // TODO: include more response info in debug environments return new Response( - JSON.stringify({ - error: 'There was an error processing your request', - }), - { - status: 500, - headers: { 'Content-Type': 'application/json' }, - }, + JSON.stringify({ error: 'There was an error processing your request' }), + { status: 500, headers: { 'Content-Type': 'application/json' } }, ); } } diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index a7ee2815..52a51a40 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -41,9 +41,6 @@ importers: common-tags: specifier: ^1.8.2 version: 1.8.2 - gpt3-tokenizer: - specifier: ^1.1.5 - version: 1.1.5 next: specifier: 14.2.35 version: 14.2.35(react-dom@19.1.0(react@19.1.0))(react@19.1.0) @@ -56,9 +53,6 @@ importers: openai: specifier: ^4.93.0 version: 4.104.0(ws@8.18.2(bufferutil@4.0.9)(utf-8-validate@5.0.10))(zod@3.25.64) - openai-edge: - specifier: ^1.2.2 - version: 1.2.3 react: specifier: ^19.1.0 version: 19.1.0 @@ -2135,9 +2129,6 @@ packages: array-iterate@2.0.1: resolution: {integrity: sha512-I1jXZMjAgCMmxT4qxXfPXa6SthSoE8h6gkSI9BGGNv8mP8G/v0blc+qFnZu6K42vTOiuME596QaLO0TP3Lk0xg==} - array-keyed-map@2.1.3: - resolution: {integrity: sha512-JIUwuFakO+jHjxyp4YgSiKXSZeC0U+R1jR94bXWBcVlFRBycqXlb+kH9JHxBGcxnVuSqx5bnn0Qz9xtSeKOjiA==} - array-union@1.0.2: resolution: {integrity: sha512-Dxr6QJj/RdU/hCaBjOfxW+q6lyuVE6JFWIrAUpuOOhoJJoQ99cUn3igRaHVB5P9WrgFVN0FfArM3x0cueOU8ng==} engines: {node: '>=0.10.0'} @@ -3335,10 +3326,6 @@ packages: resolution: {integrity: sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==} engines: {node: '>= 0.4'} - gpt3-tokenizer@1.1.5: - resolution: {integrity: sha512-O9iCL8MqGR0Oe9wTh0YftzIbysypNQmS5a5JG3cB3M4LMYjlAVvNnf8LUzVY9MrI7tj+YLY356uHtO2lLX2HpA==} - engines: {node: '>=12'} - graceful-fs@4.2.11: resolution: {integrity: sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==} @@ -4484,11 +4471,6 @@ packages: oniguruma-to-es@2.3.0: resolution: {integrity: sha512-bwALDxriqfKGfUufKGGepCzu9x7nJQuoRoAFp4AnwehhC2crqrDIAP/uN2qdlsAvSMpeRC3+Yzhqc7hLmle5+g==} - openai-edge@1.2.3: - resolution: {integrity: sha512-CnEm2VbPSng1S9N/OFWmRmFC5Rm+pNID+7KYR6aIPJEnCXoH6CHzdBH6s+rLH+JGt0zuWmpC7NHMXED2dpZqPg==} - engines: {node: '>=18'} - deprecated: The official OpenAI SDK now supports edge runtimes, please use that instead. - openai@4.104.0: resolution: {integrity: sha512-p99EFNsA/yX6UhVO93f5kJsDRLAg+CTA2RBqdHK4RtK8u5IJw32Hyb2dTGKbnnFmnuoBv5r7Z2CURI9sGZpSuA==} hasBin: true @@ -9024,8 +9006,6 @@ snapshots: array-iterate@2.0.1: {} - array-keyed-map@2.1.3: {} - array-union@1.0.2: dependencies: array-uniq: 1.0.3 @@ -10323,10 +10303,6 @@ snapshots: gopd@1.2.0: {} - gpt3-tokenizer@1.1.5: - dependencies: - array-keyed-map: 2.1.3 - graceful-fs@4.2.11: {} gray-matter@4.0.3: @@ -11967,8 +11943,6 @@ snapshots: regex: 5.1.1 regex-recursion: 5.1.1 - openai-edge@1.2.3: {} - openai@4.104.0(ws@8.18.2(bufferutil@4.0.9)(utf-8-validate@5.0.10))(zod@3.25.64): dependencies: '@types/node': 18.19.112 From a0baf9da09e01212fd58366a1a5d11f54dd65f33 Mon Sep 17 00:00:00 2001 From: vladzr Date: Mon, 20 Apr 2026 20:25:47 +0800 Subject: [PATCH 2/2] fix: vector-search function --- pages/api/vector-search.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pages/api/vector-search.ts b/pages/api/vector-search.ts index 3ad6f937..d9042076 100644 --- a/pages/api/vector-search.ts +++ b/pages/api/vector-search.ts @@ -60,7 +60,7 @@ export default async function handler(req: NextRequest) { const [{ embedding }] = embeddingResponse.data; const { error: matchError, data: pageSections } = await supabaseClient.rpc( - 'match_page_sections', + 'match_docs_page_sections', { embedding, match_threshold: 0.78,