From 33e7da86bc902d695608073a8c034c4edc0eb26f Mon Sep 17 00:00:00 2001 From: Les Orchard Date: Mon, 15 Jun 2026 17:47:04 -0700 Subject: [PATCH] feat(search): add Exa Search API as a search provider Add exa-api alongside the existing parallel-api/google/bing/duckduckgo providers. ExaSearchProvider POSTs to api.exa.ai/search, opting into contents.highlights so results include snippets (Exa returns metadata only by default), and maps url/title/highlights through the shared markdown + security-wrapper path identical to the Parallel provider. Wiring: - config: add "exa-api" to SEARCH_PROVIDERS and an exa_api_key field (env EXA_API_KEY, --exa-api-key) - factory + webAgent: exa-api case and key-required validation guard - run.ts / taskRunner.ts: select the API key by provider, passing undefined for providers that don't use one (browser providers, none) so an unrelated key isn't threaded through the agent config Debug logging for both API providers (Exa and Parallel), gated on --debug via the [X:debug] console.warn convention: - request: the exact outbound body (query + options, API key omitted) - response: result count plus an abbreviated sample of the first result so all returned fields are visible (long strings truncated by the shared abbreviateForDebug helper) The debug flag threads through CreateSearchProviderOptions, which SearchService.create already forwards. Tests cover the factory, markdown formatting, empty/missing-title results, API error, the highlights opt-in, and debug request/response logging on and off for both API providers. Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/cli/src/commands/run.ts | 12 +- packages/core/src/config/defaults.ts | 19 +- packages/core/src/search/debugPreview.ts | 17 ++ .../core/src/search/providers/exaSearch.ts | 108 ++++++++ .../src/search/providers/parallelSearch.ts | 37 ++- packages/core/src/search/searchProvider.ts | 11 +- packages/core/src/webAgent.ts | 5 + packages/core/test/config.test.ts | 1 + .../core/test/search/searchProvider.test.ts | 250 ++++++++++++++++++ packages/server/src/taskRunner.ts | 26 +- 10 files changed, 472 insertions(+), 14 deletions(-) create mode 100644 packages/core/src/search/debugPreview.ts create mode 100644 packages/core/src/search/providers/exaSearch.ts diff --git a/packages/cli/src/commands/run.ts b/packages/cli/src/commands/run.ts index 1cbee13b..68a6fa53 100644 --- a/packages/cli/src/commands/run.ts +++ b/packages/cli/src/commands/run.ts @@ -331,6 +331,7 @@ async function executeRunCommand(task: string, options: any): Promise { } // Create WebAgent + const searchProvider = options.searchProvider ?? cfg.search_provider; const webAgent = new WebAgent(browser, { debug: debugMode, vision: options.vision ?? cfg.vision, @@ -341,8 +342,15 @@ async function executeRunCommand(task: string, options: any): Promise { initialNavigationRetries: options.initialNavigationRetries ?? cfg.initial_navigation_retries, maxConsecutiveErrors: options.maxConsecutiveErrors ?? cfg.max_consecutive_errors, maxTotalErrors: options.maxTotalErrors ?? cfg.max_total_errors, - searchProvider: options.searchProvider ?? cfg.search_provider, - searchApiKey: cfg.parallel_api_key, + searchProvider, + // Only pass a key for providers that use one; browser providers and + // "none" don't, so we avoid threading an unrelated key through config. + searchApiKey: + searchProvider === "exa-api" + ? cfg.exa_api_key + : searchProvider === "parallel-api" + ? cfg.parallel_api_key + : undefined, tabstackApiKey: options.tabstackApiKey ?? cfg.tabstack_api_key, tabstackApiUrl: options.tabstackApiUrl ?? cfg.tabstack_api_url, trustedHostnames: options.trustedHostnames ?? cfg.trusted_hostnames, diff --git a/packages/core/src/config/defaults.ts b/packages/core/src/config/defaults.ts index 02a552a0..ec3cc919 100644 --- a/packages/core/src/config/defaults.ts +++ b/packages/core/src/config/defaults.ts @@ -42,7 +42,14 @@ export type ReasoningLevel = (typeof REASONING_LEVELS)[number]; export const LOGGERS = ["console", "json"] as const; export type LoggerType = (typeof LOGGERS)[number]; -export const SEARCH_PROVIDERS = ["none", "duckduckgo", "google", "bing", "parallel-api"] as const; +export const SEARCH_PROVIDERS = [ + "none", + "duckduckgo", + "google", + "bing", + "parallel-api", + "exa-api", +] as const; export type SearchProviderName = (typeof SEARCH_PROVIDERS)[number]; export type ConfigFieldType = "string" | "string[]" | "number" | "boolean" | "enum"; @@ -136,6 +143,7 @@ export interface PiloConfig { // Search Configuration search_provider?: SearchProviderName; parallel_api_key?: string; + exa_api_key?: string; // Tabstack Configuration tabstack_api_key?: string; @@ -215,6 +223,7 @@ export interface PiloConfigResolved { // Search Configuration search_provider: SearchProviderName; parallel_api_key?: string; + exa_api_key?: string; // Tabstack Configuration tabstack_api_key?: string; @@ -733,6 +742,14 @@ export const FIELDS: Record = { description: "Parallel API key for search", category: "search", }, + exa_api_key: { + type: "string", + cli: "--exa-api-key", + placeholder: "key", + env: ["EXA_API_KEY"], + description: "Exa API key for search", + category: "search", + }, // Tabstack Configuration tabstack_api_key: { diff --git a/packages/core/src/search/debugPreview.ts b/packages/core/src/search/debugPreview.ts new file mode 100644 index 00000000..a21e3ba9 --- /dev/null +++ b/packages/core/src/search/debugPreview.ts @@ -0,0 +1,17 @@ +/** + * Debug helpers for search providers. + */ + +const MAX_STRING_LEN = 120; + +/** + * Deep-clone a value for debug logging, truncating any long string so the + * "flavor" of a response (text, summaries, snippets, etc.) is visible without + * dumping the full payload. Non-string values pass through unchanged. + */ +export function abbreviateForDebug(value: unknown): unknown { + const json = JSON.stringify(value, (_key, v) => + typeof v === "string" && v.length > MAX_STRING_LEN ? `${v.slice(0, MAX_STRING_LEN)}…` : v, + ); + return json === undefined ? value : JSON.parse(json); +} diff --git a/packages/core/src/search/providers/exaSearch.ts b/packages/core/src/search/providers/exaSearch.ts new file mode 100644 index 00000000..8d6654a0 --- /dev/null +++ b/packages/core/src/search/providers/exaSearch.ts @@ -0,0 +1,108 @@ +/** + * Exa API Search Provider + * + * API-based search provider that uses the Exa API for search. + * Returns results formatted as markdown for consistency with browser providers. + */ + +import type { AriaBrowser } from "../../browser/ariaBrowser.js"; +import type { SearchProvider } from "../searchProvider.js"; +import { + wrapExternalContentWithWarning, + ExternalContentLabel, +} from "../../utils/promptSecurity.js"; +import { abbreviateForDebug } from "../debugPreview.js"; + +interface ExaSearchResult { + url: string; + title?: string; + highlights?: string[]; +} + +interface ExaApiResponse { + results?: ExaSearchResult[]; +} + +export class ExaSearchProvider implements SearchProvider { + readonly name = "exa-api"; + readonly requiresBrowser = false; + + constructor( + private apiKey: string, + private debug = false, + ) {} + + async search(query: string, _browser?: AriaBrowser): Promise { + const url = "https://api.exa.ai/search"; + const body = JSON.stringify({ + query, + // Opt into highlights, or Exa returns metadata only (no snippets). + contents: { highlights: { maxCharacters: 1500 } }, + }); + + if (this.debug) { + // Log the exact outbound request body (sans API key) so the query and + // contents options are observable. Matches the [X:debug] console.warn convention. + console.warn(`[ExaSearch:debug] POST ${url}`, body); + } + + const response = await fetch(url, { + method: "POST", + headers: { + "Content-Type": "application/json", + "x-api-key": this.apiKey, + }, + body, + }); + + if (!response.ok) { + const errorText = await response.text().catch(() => "Unknown error"); + throw new Error(`Exa API error (${response.status}): ${errorText}`); + } + + const data = (await response.json()) as ExaApiResponse; + + if (this.debug) { + // Log the count plus an abbreviated sample of the first result so all + // returned fields (including ones we don't map, like summary/score/ + // publishedDate) are visible, with long strings truncated. + const results = data.results ?? []; + console.warn( + `[ExaSearch:debug] response: ${results.length} result(s), sample:`, + abbreviateForDebug(results[0]), + ); + } + + return this.formatAsMarkdown(query, data); + } + + private formatAsMarkdown(query: string, data: ExaApiResponse): string { + const header = `# Search Results for "${query}" (via ${this.name})`; + + let wrapped: string; + if (!data.results || data.results.length === 0) { + wrapped = wrapExternalContentWithWarning( + `${header}\n\nNo results found.`, + ExternalContentLabel.SearchResults, + ); + } else { + const lines: string[] = []; + + data.results.forEach((result, index) => { + const title = result.title || result.url; + lines.push(`${index + 1}. [${title}](${result.url})`); + if (result.highlights?.length) { + lines.push(result.highlights.join("\n")); + } + lines.push(""); + }); + + wrapped = wrapExternalContentWithWarning( + `${header}\n\n${lines.join("\n").trim()}`, + ExternalContentLabel.SearchResults, + ); + } + + return wrapped; + } +} diff --git a/packages/core/src/search/providers/parallelSearch.ts b/packages/core/src/search/providers/parallelSearch.ts index c8dd4443..5b36f6f8 100644 --- a/packages/core/src/search/providers/parallelSearch.ts +++ b/packages/core/src/search/providers/parallelSearch.ts @@ -11,6 +11,7 @@ import { wrapExternalContentWithWarning, ExternalContentLabel, } from "../../utils/promptSecurity.js"; +import { abbreviateForDebug } from "../debugPreview.js"; interface ParallelSearchResult { url: string; @@ -27,21 +28,33 @@ export class ParallelSearchProvider implements SearchProvider { readonly name = "parallel-api"; readonly requiresBrowser = false; - constructor(private apiKey: string) {} + constructor( + private apiKey: string, + private debug = false, + ) {} async search(query: string, _browser?: AriaBrowser): Promise { - const response = await fetch("https://api.parallel.ai/v1beta/search", { + const url = "https://api.parallel.ai/v1beta/search"; + const body = JSON.stringify({ + objective: query, + search_queries: [query], + excerpts: { max_chars_per_result: 1500 }, + }); + + if (this.debug) { + // Log the exact outbound request body (sans API key) so the query and + // options are observable. Matches the [X:debug] console.warn convention. + console.warn(`[ParallelSearch:debug] POST ${url}`, body); + } + + const response = await fetch(url, { method: "POST", headers: { "Content-Type": "application/json", "x-api-key": this.apiKey, "parallel-beta": "search-extract-2025-10-10", }, - body: JSON.stringify({ - objective: query, - search_queries: [query], - excerpts: { max_chars_per_result: 1500 }, - }), + body, }); if (!response.ok) { @@ -55,6 +68,16 @@ export class ParallelSearchProvider implements SearchProvider { throw new Error(`Parallel API error: ${data.error}`); } + if (this.debug) { + // Log the count plus an abbreviated sample of the first result so all + // returned fields are visible, with long strings truncated. + const results = data.results ?? []; + console.warn( + `[ParallelSearch:debug] response: ${results.length} result(s), sample:`, + abbreviateForDebug(results[0]), + ); + } + return this.formatAsMarkdown(query, data); } diff --git a/packages/core/src/search/searchProvider.ts b/packages/core/src/search/searchProvider.ts index 7e9feb4b..61a7e494 100644 --- a/packages/core/src/search/searchProvider.ts +++ b/packages/core/src/search/searchProvider.ts @@ -21,6 +21,8 @@ export interface SearchProvider { export interface CreateSearchProviderOptions { /** API key for providers that require authentication (e.g., Parallel) */ apiKey?: string; + /** When true, API providers log their outbound request at debug level */ + debug?: boolean; } /** @@ -49,7 +51,14 @@ export async function createSearchProvider( throw new Error("Parallel API key is required for parallel-api search provider"); } const { ParallelSearchProvider } = await import("./providers/parallelSearch.js"); - return new ParallelSearchProvider(options.apiKey); + return new ParallelSearchProvider(options.apiKey, options.debug); + } + case "exa-api": { + if (!options.apiKey) { + throw new Error("Exa API key is required for exa-api search provider"); + } + const { ExaSearchProvider } = await import("./providers/exaSearch.js"); + return new ExaSearchProvider(options.apiKey, options.debug); } default: throw new Error(`Unknown search provider: ${providerName}`); diff --git a/packages/core/src/webAgent.ts b/packages/core/src/webAgent.ts index 2e281a62..3d401e82 100644 --- a/packages/core/src/webAgent.ts +++ b/packages/core/src/webAgent.ts @@ -357,6 +357,10 @@ export class WebAgent { throw new Error("parallel_api_key is required when search_provider is 'parallel-api'"); } + if (this.searchProvider === "exa-api" && !this.searchApiKey) { + throw new Error("exa_api_key is required when search_provider is 'exa-api'"); + } + // Initialize services this.compressor = new SnapshotCompressor(); this.eventEmitter = options.eventEmitter ?? new WebAgentEventEmitter(); @@ -420,6 +424,7 @@ export class WebAgent { if (this.searchProvider !== "none") { this.searchService = await SearchService.create(this.searchProvider, this.browser, { apiKey: this.searchApiKey, + debug: this.debug, }); } diff --git a/packages/core/test/config.test.ts b/packages/core/test/config.test.ts index 63d349d8..1c1552cd 100644 --- a/packages/core/test/config.test.ts +++ b/packages/core/test/config.test.ts @@ -198,6 +198,7 @@ describe("ConfigManager", () => { "unsafe_mode", "search_provider", "parallel_api_key", + "exa_api_key", "tabstack_api_key", "tabstack_api_url", "upload_allowed_paths", diff --git a/packages/core/test/search/searchProvider.test.ts b/packages/core/test/search/searchProvider.test.ts index f54e158a..a54dd85d 100644 --- a/packages/core/test/search/searchProvider.test.ts +++ b/packages/core/test/search/searchProvider.test.ts @@ -5,6 +5,7 @@ import { DuckDuckGoSearchProvider } from "../../src/search/providers/duckduckgoS import { GoogleSearchProvider } from "../../src/search/providers/googleSearch.js"; import { BingSearchProvider } from "../../src/search/providers/bingSearch.js"; import { ParallelSearchProvider } from "../../src/search/providers/parallelSearch.js"; +import { ExaSearchProvider } from "../../src/search/providers/exaSearch.js"; import { BrowserSearchProvider } from "../../src/search/providers/browserSearch.js"; import type { AriaBrowser, TemporaryTab } from "../../src/browser/ariaBrowser.js"; import { LoadState } from "../../src/browser/ariaBrowser.js"; @@ -55,6 +56,22 @@ describe("Search Provider", () => { ); }); + it("should create Exa provider with API key", async () => { + const provider = await createSearchProvider("exa-api", { + apiKey: "test-api-key", + }); + + expect(provider).toBeInstanceOf(ExaSearchProvider); + expect(provider.name).toBe("exa-api"); + expect(provider.requiresBrowser).toBe(false); + }); + + it("should throw error for Exa provider without API key", async () => { + await expect(createSearchProvider("exa-api")).rejects.toThrow( + "Exa API key is required for exa-api search provider", + ); + }); + it("should throw error for unknown provider", async () => { // @ts-expect-error - testing invalid input await expect(createSearchProvider("unknown")).rejects.toThrow( @@ -254,5 +271,238 @@ describe("Search Provider", () => { }), ); }); + + it("should log the outbound request at debug level when debug is enabled", async () => { + const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {}); + const provider = new ParallelSearchProvider("test-api-key", true); + + global.fetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => Promise.resolve({ results: [] }), + }); + + await provider.search("debug me"); + + expect(warnSpy).toHaveBeenCalledWith( + expect.stringContaining("[ParallelSearch:debug]"), + expect.stringContaining("debug me"), + ); + + warnSpy.mockRestore(); + }); + + it("should log an abbreviated response sample at debug level when debug is enabled", async () => { + const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {}); + const provider = new ParallelSearchProvider("test-api-key", true); + const longExcerpt = "y".repeat(500); + + global.fetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => + Promise.resolve({ + results: [{ url: "https://example.com", title: "Example", excerpts: [longExcerpt] }], + }), + }); + + await provider.search("debug me"); + + const responseCall = (warnSpy.mock.calls as unknown[][]).find((c) => + String(c[0]).includes("response"), + ); + expect(responseCall).toBeDefined(); + const logged = JSON.stringify(responseCall?.[1]); + expect(logged).toContain("https://example.com"); + expect(logged).not.toContain(longExcerpt); + expect(logged).toContain("…"); + + warnSpy.mockRestore(); + }); + + it("should not log when debug is disabled", async () => { + const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {}); + const provider = new ParallelSearchProvider("test-api-key"); + + global.fetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => Promise.resolve({ results: [] }), + }); + + await provider.search("quiet"); + + expect(warnSpy).not.toHaveBeenCalled(); + + warnSpy.mockRestore(); + }); + }); + + describe("ExaSearchProvider", () => { + it("should format results as markdown", async () => { + const provider = new ExaSearchProvider("test-api-key"); + + global.fetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => + Promise.resolve({ + results: [ + { + url: "https://example.com", + title: "Example", + highlights: ["This is an example"], + }, + { url: "https://test.com", title: "Test", highlights: ["This is a test"] }, + ], + }), + }); + + const result = await provider.search("test query"); + + expect(result).toContain('# Search Results for "test query"'); + expect(result).toContain("1. [Example](https://example.com)"); + expect(result).toContain("This is an example"); + expect(result).toContain("2. [Test](https://test.com)"); + expect(result).toContain("This is a test"); + }); + + it("should handle empty results", async () => { + const provider = new ExaSearchProvider("test-api-key"); + + global.fetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => Promise.resolve({ results: [] }), + }); + + const result = await provider.search("test query"); + + expect(result).toContain('# Search Results for "test query"'); + expect(result).toContain("No results found."); + }); + + it("should use URL as title when title is missing", async () => { + const provider = new ExaSearchProvider("test-api-key"); + + global.fetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => + Promise.resolve({ + results: [{ url: "https://example.com/page", highlights: ["Content"] }], + }), + }); + + const result = await provider.search("test"); + + expect(result).toContain("[https://example.com/page](https://example.com/page)"); + }); + + it("should throw error on API failure", async () => { + const provider = new ExaSearchProvider("test-api-key"); + + global.fetch = vi.fn().mockResolvedValue({ + ok: false, + status: 401, + text: () => Promise.resolve("Unauthorized"), + }); + + await expect(provider.search("test")).rejects.toThrow("Exa API error (401): Unauthorized"); + }); + + it("should send correct request to API", async () => { + const provider = new ExaSearchProvider("test-api-key"); + + global.fetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => Promise.resolve({ results: [] }), + }); + + await provider.search("my search query"); + + expect(global.fetch).toHaveBeenCalledWith( + "https://api.exa.ai/search", + expect.objectContaining({ + method: "POST", + headers: expect.objectContaining({ + "Content-Type": "application/json", + "x-api-key": "test-api-key", + }), + body: expect.stringContaining("my search query"), + }), + ); + + // Must opt into highlights, or Exa returns metadata only. + const body = JSON.parse((global.fetch as ReturnType).mock.calls[0][1].body); + expect(body.contents.highlights).toBeTruthy(); + }); + + it("should log the outbound request at debug level when debug is enabled", async () => { + const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {}); + const provider = new ExaSearchProvider("test-api-key", true); + + global.fetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => Promise.resolve({ results: [] }), + }); + + await provider.search("debug me"); + + expect(warnSpy).toHaveBeenCalledWith( + expect.stringContaining("[ExaSearch:debug]"), + expect.stringContaining("debug me"), + ); + + warnSpy.mockRestore(); + }); + + it("should log an abbreviated response sample at debug level when debug is enabled", async () => { + const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {}); + const provider = new ExaSearchProvider("test-api-key", true); + const longText = "x".repeat(500); + + global.fetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => + Promise.resolve({ + results: [ + { + url: "https://example.com", + title: "Example", + summary: "A concise summary", + text: longText, + highlights: ["hi"], + }, + ], + }), + }); + + await provider.search("debug me"); + + const responseCall = (warnSpy.mock.calls as unknown[][]).find((c) => + String(c[0]).includes("response"), + ); + expect(responseCall).toBeDefined(); + const logged = JSON.stringify(responseCall?.[1]); + // Extra fields beyond what we map (summary) are surfaced... + expect(logged).toContain("https://example.com"); + expect(logged).toContain("A concise summary"); + // ...but long values are truncated rather than dumped in full. + expect(logged).not.toContain(longText); + expect(logged).toContain("…"); + + warnSpy.mockRestore(); + }); + + it("should not log when debug is disabled", async () => { + const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {}); + const provider = new ExaSearchProvider("test-api-key"); + + global.fetch = vi.fn().mockResolvedValue({ + ok: true, + json: () => Promise.resolve({ results: [] }), + }); + + await provider.search("quiet"); + + expect(warnSpy).not.toHaveBeenCalled(); + + warnSpy.mockRestore(); + }); }); }); diff --git a/packages/server/src/taskRunner.ts b/packages/server/src/taskRunner.ts index 7c845b3a..bdce5a4f 100644 --- a/packages/server/src/taskRunner.ts +++ b/packages/server/src/taskRunner.ts @@ -87,7 +87,7 @@ export interface PiloTaskRequest { logger?: "console" | "json"; // Search configuration overrides - searchProvider?: "none" | "duckduckgo" | "google" | "bing" | "parallel-api"; + searchProvider?: "none" | "duckduckgo" | "google" | "bing" | "parallel-api" | "exa-api"; // Tabstack configuration overrides tabstackApiKey?: string; @@ -277,6 +277,17 @@ export function validateTaskRequest( }), }; } + if (effectiveSearchProvider === "exa-api" && !serverConfig.exa_api_key) { + return { + status: 400, + response: createErrorResponse({ + message: "exa-api search provider requires EXA_API_KEY to be configured on the server", + code: "MISSING_SEARCH_API_KEY", + reason: "INVALID_REQUEST", + phase: "setup", + }), + }; + } try { getAIProviderInfo(); @@ -326,6 +337,8 @@ export async function runTask(options: TaskRunnerOptions): Promise 0 ? { allowedPaths: uploadAllowedPaths } : false; const advertisedUploadFiles = await resolveAdvertisedUploadFiles(allowFileUpload); + const searchProvider = body.searchProvider ?? serverConfig.search_provider; + const browserConfig = { browser: browserName as (typeof PLAYWRIGHT_BROWSERS)[number], channel: body.channel ?? serverConfig.channel, @@ -373,8 +386,15 @@ export async function runTask(options: TaskRunnerOptions): Promise