From f3a7e07bb2d3dc47571bb4ba8690b02448f055aa Mon Sep 17 00:00:00 2001 From: kleinlennart <31488915+kleinlennart@users.noreply.github.com> Date: Mon, 22 Jun 2026 22:07:49 +0200 Subject: [PATCH 1/3] setup openapi spec --- .../data/[locale]/[format]/[...path]/route.ts | 53 +- app/openapi/route.ts | 32 + lib/openapi/schemas.ts | 320 ++++ lib/openapi/spec.ts | 410 +++++ package.json | 3 +- proxy.ts | 5 +- public/openapi.json | 1611 +++++++++++++++++ scripts/generate-openapi.ts | 12 + 8 files changed, 2416 insertions(+), 30 deletions(-) create mode 100644 app/openapi/route.ts create mode 100644 lib/openapi/schemas.ts create mode 100644 lib/openapi/spec.ts create mode 100644 public/openapi.json create mode 100644 scripts/generate-openapi.ts diff --git a/app/api/data/[locale]/[format]/[...path]/route.ts b/app/api/data/[locale]/[format]/[...path]/route.ts index a30d41e..869245b 100644 --- a/app/api/data/[locale]/[format]/[...path]/route.ts +++ b/app/api/data/[locale]/[format]/[...path]/route.ts @@ -1,34 +1,34 @@ -import { NextRequest, NextResponse } from "next/server"; -import { - getVideoByAssetId, - getVideoByCitation, - getTranscriptByKalturaId, - queryVideos, - type VideoRecord, - type VideosQueryParams, -} from "@/lib/db"; +import { routing } from "@/i18n/routing"; import { - getCachedTranscriptedEntries, - getCachedTranscriptedEntriesByLanguage, + getCachedTranscriptedEntries, + getCachedTranscriptedEntriesByLanguage, } from "@/lib/cached-db"; -import { getVideoMetadata, recordToVideo } from "@/lib/un-api"; -import { - getSpeakerMapping, - SpeakerInfo, - formatSpeakerInfo, -} from "@/lib/speakers"; +import { compressedJson, compressedText } from "@/lib/compressed-json"; +import { TRANSCRIPT_DISCLAIMER } from "@/lib/config"; import { getCountryName } from "@/lib/country-lookup"; +import { + getTranscriptByKalturaId, + getVideoByAssetId, + getVideoByCitation, + queryVideos, + type VideoRecord, + type VideosQueryParams, +} from "@/lib/db"; import { symbolFromSlug } from "@/lib/meeting-slug"; -import { videoUrl } from "@/lib/video-url"; -import { TRANSCRIPT_DISCLAIMER } from "@/lib/config"; -import { routing } from "@/i18n/routing"; -import { compressedJson, compressedText } from "@/lib/compressed-json"; import { - buildSpeakerSegments, - formatTranscriptAsPlainText, - formatSpeakerText, - formatTimecode, + formatSpeakerInfo, + getSpeakerMapping, + SpeakerInfo, +} from "@/lib/speakers"; +import { + buildSpeakerSegments, + formatSpeakerText, + formatTimecode, + formatTranscriptAsPlainText, } from "@/lib/transcript-formatting"; +import { getVideoMetadata, recordToVideo } from "@/lib/un-api"; +import { videoUrl } from "@/lib/video-url"; +import { NextRequest, NextResponse } from "next/server"; // Unified data-API handler. The proxy (proxy.ts) rewrites // /{locale}/{slug}.json → /api/data/{locale}/json/{slug} @@ -167,8 +167,7 @@ async function handleMeeting( if (format === "text") { return textResponse( request, - buildHeader(locale, video, record, null) + - "No transcript available.\n", + buildHeader(locale, video, record, null) + "No transcript available.\n", ); } const metadata = await getVideoMetadata(record.asset_id); diff --git a/app/openapi/route.ts b/app/openapi/route.ts new file mode 100644 index 0000000..2954d93 --- /dev/null +++ b/app/openapi/route.ts @@ -0,0 +1,32 @@ +// Serves the Swagger UI API reference at /openapi. +// Using a plain route handler (not a React page) avoids React strict-mode +// warnings from swagger-ui-react's legacy class components. +export async function GET() { + const html = ` + + + + + API Reference — UN Transcripts + + + + +
+ + + +`; + + return new Response(html, { + headers: { "Content-Type": "text/html; charset=utf-8" }, + }); +} diff --git a/lib/openapi/schemas.ts b/lib/openapi/schemas.ts new file mode 100644 index 0000000..9306c65 --- /dev/null +++ b/lib/openapi/schemas.ts @@ -0,0 +1,320 @@ +import { z } from "zod"; + +// Zod schemas that are the single source of truth for the public data API's +// request parameters and response shapes. `scripts/generate-openapi.ts` feeds +// these through `z.toJSONSchema()` to assemble `public/openapi.json`. +// +// This module is intentionally dependency-free (no `next/server`, no DB +// imports) so it runs under `tsx` in the `prebuild` step without dragging in +// the server runtime. That's also why the locale list is duplicated as a +// literal here rather than imported from `i18n/routing` — keep this graph +// pure. + +// ── Shared ──────────────────────────────────────────────────────────────── + +/** The six official UN languages, in canonical order (see i18n/routing.ts). */ +export const LocaleSchema = z + .enum(["ar", "zh", "en", "fr", "ru", "es"]) + .describe("ISO 639-1 code of one of the six official UN languages."); + +// ── Request parameters ────────────────────────────────────────────────────── + +/** Query params for `GET /{locale}/meetings.{json,txt}`. */ +export const MeetingsQuerySchema = z.object({ + q: z + .string() + .min(2) + .optional() + .describe("Full-text search query (ignored if shorter than 2 chars)."), + category: z.string().optional().describe("Filter by WebTV category name."), + date: z + .string() + .regex(/^\d{4}-\d{2}-\d{2}$/) + .optional() + .describe("Filter to a single date (YYYY-MM-DD)."), + sort: z + .enum(["date_desc", "date_asc", "title_asc", "title_desc"]) + .optional() + .describe("Sort order (default: date_desc)."), + offset: z.coerce + .number() + .int() + .min(0) + .optional() + .describe("Pagination offset; page size is 250."), + text: z + .union([ + z.enum(["transcript", "pv", "sr"]), + z.array(z.enum(["transcript", "pv", "sr"])), + ]) + .optional() + .describe( + "Restrict to meetings that have the given document(s) available. " + + "Repeat the param to require multiple (e.g. text=transcript&text=pv).", + ), + xlang: z + .literal("1") + .optional() + .describe( + "Set to 1 to include meetings whose transcript exists only in other " + + "languages than the requested locale.", + ), +}); + +/** Query param for `GET /{locale}/{slug}.{json,txt}`. */ +export const MeetingQuerySchema = z.object({ + language: LocaleSchema.optional().describe( + "Transcript language to return. Defaults to the URL locale's transcript " + + "if one exists, otherwise the most recent transcript in any language.", + ), +}); + +/** Query param for `GET /api/languages`. */ +export const LanguagesQuerySchema = z.object({ + kalturaId: z + .string() + .min(1) + .describe("Kaltura player ID of the video (the `kaltura_id` field)."), +}); + +/** Query params for `GET /api/pv`. */ +export const PvQuerySchema = z.object({ + symbol: z + .string() + .min(1) + .describe("UN document symbol, e.g. S/PV.10175 or A/79/PV.21."), + lang: LocaleSchema.optional().describe("Document language (default: en)."), +}); + +// ── Response building blocks ──────────────────────────────────────────────── + +export const TopicSchema = z + .object({ + key: z.string(), + label: z.string(), + description: z.string(), + }) + .describe("A substantive policy topic identified across the meeting."); + +export const SpeakerSchema = z + .object({ + name: z.string().nullable(), + affiliation: z + .string() + .nullable() + .describe("ISO alpha-3 country code or short affiliation label."), + affiliation_full: z + .string() + .nullable() + .describe("Official UN country name resolved from the ISO code."), + group: z.string().nullable(), + function: z.string().nullable(), + }) + .describe("Resolved speaker for a statement."); + +export const WordSchema = z.object({ + text: z.string(), + start: z.number().describe("Start time in seconds."), + end: z.number().describe("End time in seconds."), +}); + +export const SentenceSchema = z.object({ + text: z.string(), + start: z.number().describe("Start time in seconds."), + end: z.number().describe("End time in seconds."), + topics: z.array(TopicSchema), + words: z + .array(WordSchema) + .optional() + .describe("Per-word timing; present only when word-level timing exists."), +}); + +export const ParagraphSchema = z.object({ + sentences: z.array(SentenceSchema), +}); + +export const StatementSchema = z.object({ + statement_number: z + .number() + .int() + .describe("1-based index of the statement."), + paragraphs: z.array(ParagraphSchema), + speaker: SpeakerSchema, +}); + +export const VideoInfoSchema = z.object({ + id: z.string().describe("UN Web TV asset ID (the DB primary key)."), + kaltura_id: z.string().nullable(), + title: z.string().nullable(), + clean_title: z.string().nullable(), + url: z.string().nullable(), + date: z.string().nullable(), + scheduled_time: z.string().nullable(), + status: z.string().nullable(), + duration: z.number().nullable(), + category: z.string().nullable(), + body: z.string().nullable().describe("UN organ / body name."), + event_code: z.string().nullable(), + event_type: z.string().nullable(), + session_number: z.string().nullable(), + pv_symbol: z.string().nullable().describe("Official UN document symbol."), + pv_part: z.string().nullable(), + slug: z.string().describe("Human-readable meeting slug, e.g. sc/10175."), +}); + +export const MetadataSchema = z + .object({ + summary: z.string().nullable(), + description: z.string().nullable(), + categories: z.array(z.string()), + geographic_subject: z.array(z.string()), + subject_topical: z.array(z.string()), + corporate_name: z.array(z.string()), + speaker_affiliation: z.array(z.string()), + related_documents: z.array(z.string()), + }) + .describe("Structured metadata harvested from the UN Web TV page."); + +// ── Meeting list response ─────────────────────────────────────────────────── + +export const MeetingListItemSchema = z.object({ + title: z.string().nullable(), + date: z.string().nullable(), + body: z.string().nullable(), + category: z.string().nullable(), + slug: z.string(), + duration: z.number().nullable(), + hasTranscript: z.boolean(), + pageUrl: z.string(), + jsonUrl: z.string(), + textUrl: z.string().nullable(), +}); + +export const MeetingsResponseSchema = z.object({ + meetings: z.array(MeetingListItemSchema), + total: z.number().int(), + totalIncludingOther: z + .number() + .int() + .describe("Total ignoring the locale filter (see the xlang param)."), + hasMore: z.boolean(), + offset: z.number().int(), + pageSize: z.number().int(), +}); + +// ── Meeting detail response (three variants) ──────────────────────────────── + +const CompletedTranscriptSchema = z.object({ + transcript_id: z.string(), + language: z.string(), + data: z.array(StatementSchema), + topics: z.array(TopicSchema), +}); + +/** Returned when the transcript is finished. */ +export const MeetingCompletedResponseSchema = z.object({ + disclaimer: z.string(), + video: VideoInfoSchema, + metadata: MetadataSchema, + transcript: CompletedTranscriptSchema, +}); + +/** Returned when no transcript exists for the meeting. */ +export const MeetingNoTranscriptResponseSchema = z.object({ + disclaimer: z.string(), + video: VideoInfoSchema, + metadata: MetadataSchema, + transcript: z.null(), + message: z.string(), +}); + +/** Returned while the transcription pipeline is still running. */ +export const MeetingInProgressResponseSchema = z.object({ + disclaimer: z.string(), + video: VideoInfoSchema, + metadata: MetadataSchema, + transcript: z.object({ + status: z.string().describe("Current pipeline stage."), + transcriptId: z.string(), + }), + message: z.string(), +}); + +export const MeetingResponseSchema = z + .union([ + MeetingCompletedResponseSchema, + MeetingNoTranscriptResponseSchema, + MeetingInProgressResponseSchema, + ]) + .describe( + "One of three shapes depending on transcript availability: completed " + + "(full transcript), in-progress (status only), or absent (null).", + ); + +// ── Other endpoints ───────────────────────────────────────────────────────── + +export const LanguagesResponseSchema = z.object({ + entryId: z.string().describe("Canonical Kaltura entry ID."), + languages: z.array( + z.object({ + code: LocaleSchema, + name: z.string(), + available: z + .boolean() + .describe("Whether an audio track exists for this language."), + transcriptStatus: z + .string() + .nullable() + .describe("Transcription status, or null if none exists."), + }), + ), +}); + +const PvTurnSchema = z.object({ + speaker: z.string(), + affiliation: z.string().optional(), + spokenLanguage: z.string().optional(), + onBehalfOf: z.string().optional(), + paragraphNumber: z.number().optional(), + paragraphs: z.array(z.string()), + type: z.enum(["speech", "procedural"]), + proceduralParagraphs: z.array(z.number()).optional(), +}); + +export const PvResponseSchema = z + .object({ + symbol: z.string(), + body: z.string(), + session: z.string(), + meetingNumber: z.string(), + date: z.string(), + location: z.string(), + language: z.string(), + status: z.enum(["provisional", "official"]), + president: z.object({ name: z.string(), country: z.string() }).nullable(), + members: z.array( + z.object({ country: z.string(), representative: z.string() }), + ), + agendaItems: z.array(z.string()), + turns: z.array(PvTurnSchema), + fullText: z.string(), + }) + .describe("Parsed UN verbatim (PV) or summary (SR) record."); + +export const HealthResponseSchema = z.object({ + status: z.enum(["ok", "error"]), +}); + +// ── Error shapes (two distinct conventions in the codebase) ───────────────── + +/** Shape returned by the data-API routes (`/{locale}/...`). */ +export const DataApiErrorSchema = z + .object({ error: z.string() }) + .describe("Error response from the meeting data API."); + +/** Shape returned by `apiError()` (`/api/languages`, `/api/pv`). */ +export const ApiErrorSchema = z + .object({ + error: z.object({ code: z.string(), message: z.string() }), + }) + .describe("Structured error response from /api/* routes."); diff --git a/lib/openapi/spec.ts b/lib/openapi/spec.ts new file mode 100644 index 0000000..732bc33 --- /dev/null +++ b/lib/openapi/spec.ts @@ -0,0 +1,410 @@ +import { z } from "zod"; +import { + MeetingsResponseSchema, + MeetingResponseSchema, + LanguagesResponseSchema, + PvResponseSchema, + HealthResponseSchema, + DataApiErrorSchema, + ApiErrorSchema, +} from "./schemas"; + +// Assembles the OpenAPI 3.0.3 document for the public data API. Response +// bodies come from the Zod schemas in ./schemas via `z.toJSONSchema` + +// `toOas30`; path/operation metadata is hand-written TypeScript. +// See scripts/generate-openapi.ts. +// +// We target OpenAPI 3.0.3 (not 3.1) because swagger-ui-react's 3.1 parsing +// layer (apidom) fails under Turbopack / certain bundlers. 3.0 uses swagger- +// ui's legacy parser which is battle-tested. The main schema difference: +// nullable fields use `nullable: true` instead of `anyOf: [{type:"null"},…]`. +// `toOas30` handles that conversion automatically. + +const OPENAPI_VERSION = "3.0.3"; + +type JsonObj = Record; + +/** + * Recursively convert a JSON Schema 2020-12 object (from `z.toJSONSchema`) to + * an OpenAPI 3.0-compatible schema object. + * + * The only structural difference we need to handle: nullable fields. + * - 2020-12: `{anyOf: [T, {type:"null"}]}` (possibly with sibling props) + * - OAS 3.0: spread T + `nullable: true` (+ sibling props) + * - Standalone `{type:"null"}`: `{nullable: true}` + */ +function toOas30(schema: unknown): unknown { + if (!schema || typeof schema !== "object" || Array.isArray(schema)) + return schema; + const s = schema as JsonObj; + + // Standalone null type (e.g. z.null() in a union variant) + if (s.type === "null") { + const { type: _t, ...rest } = s; + return { nullable: true, ...rest }; + } + + // anyOf containing exactly one {type:"null"} → collapse to nullable + if (Array.isArray(s.anyOf)) { + const nullIdx = (s.anyOf as unknown[]).findIndex( + (v) => + v && + typeof v === "object" && + !Array.isArray(v) && + (v as JsonObj).type === "null", + ); + if (nullIdx !== -1) { + const others = (s.anyOf as unknown[]).filter((_, i) => i !== nullIdx); + const { anyOf: _a, ...siblings } = s; + if (others.length === 1) { + return { ...(toOas30(others[0]) as JsonObj), nullable: true, ...siblings }; + } + return { anyOf: others.map(toOas30), nullable: true, ...siblings }; + } + } + + // Recurse into nested schema locations + const out: JsonObj = {}; + for (const [k, v] of Object.entries(s)) { + if (k === "properties" && v && typeof v === "object" && !Array.isArray(v)) { + out[k] = Object.fromEntries( + Object.entries(v as JsonObj).map(([pk, pv]) => [pk, toOas30(pv)]), + ); + } else if (k === "items") { + out[k] = toOas30(v); + } else if ( + (k === "anyOf" || k === "oneOf" || k === "allOf") && + Array.isArray(v) + ) { + out[k] = v.map(toOas30); + } else { + out[k] = v; + } + } + return out; +} + +/** + * Convert a Zod schema to an OpenAPI 3.0-compatible component schema. + * Strips `$schema` (dialect is declared at the document level in OAS) and + * runs `toOas30` to fix nullable encoding. + */ +function toComponent(schema: z.ZodType): Record { + const json = z.toJSONSchema(schema, { + target: "draft-2020-12", + io: "output", + }) as JsonObj; + delete json.$schema; + return toOas30(json) as JsonObj; +} + +const COMPONENT_SCHEMAS: Record = { + MeetingsResponse: MeetingsResponseSchema, + MeetingResponse: MeetingResponseSchema, + LanguagesResponse: LanguagesResponseSchema, + PvResponse: PvResponseSchema, + HealthResponse: HealthResponseSchema, + DataApiError: DataApiErrorSchema, + ApiError: ApiErrorSchema, +}; + +const ref = (name: string) => ({ $ref: `#/components/schemas/${name}` }); + +const jsonContent = (schemaName: string) => ({ + "application/json": { schema: ref(schemaName) }, +}); + +const localeParam = { + name: "locale", + in: "path" as const, + required: true, + description: "One of the six official UN languages.", + schema: { type: "string", enum: ["ar", "zh", "en", "fr", "ru", "es"] }, +}; + +export function buildSpec(): Record { + return { + openapi: OPENAPI_VERSION, + info: { + title: "UN Transcripts API", + version: "1.0.0", + description: + "Public, read-only API for UN Web TV meeting records and " + + "automatically generated transcripts.\n\n" + + "Meeting URLs use human-readable slugs derived from official UN " + + "document symbols (S/PV.10175 → `sc/10175`, A/79/PV.21 → " + + "`ga/79/21`). Append `.json` or `.txt` to any meeting page URL to " + + "get the same content as data. Videos without a document symbol are " + + "addressable at `/{locale}/asset/{asset_id}`.\n\n" + + "Transcripts are produced by automatic speech recognition over real " + + "meeting audio — not official records. The authoritative record is " + + "the UN verbatim (PV) or summary (SR) document; see `GET /api/pv`.", + }, + servers: [{ url: "/" }], + tags: [ + { name: "meetings", description: "Meeting records and transcripts." }, + { name: "discovery", description: "Audio tracks and source documents." }, + { name: "health", description: "Service status." }, + ], + paths: { + "/{locale}/meetings.json": { + get: { + tags: ["meetings"], + summary: "Browse or search meetings (JSON)", + description: + "Paginated list of meetings within the last 365 days, newest " + + "first. Supports full-text search and filtering. Page size is 250.", + operationId: "listMeetings", + parameters: [ + localeParam, + qp("q", "Full-text search query (min 2 chars).", { + type: "string", + }), + qp("category", "Filter by WebTV category name.", { + type: "string", + }), + qp("date", "Filter to a single date (YYYY-MM-DD).", { + type: "string", + pattern: "^\\d{4}-\\d{2}-\\d{2}$", + }), + qp("sort", "Sort order (default date_desc).", { + type: "string", + enum: ["date_desc", "date_asc", "title_asc", "title_desc"], + }), + qp("offset", "Pagination offset.", { + type: "integer", + minimum: 0, + }), + { + name: "text", + in: "query", + required: false, + description: + "Restrict to meetings that have the given document(s). " + + "Repeat to require multiple.", + schema: { + type: "array", + items: { type: "string", enum: ["transcript", "pv", "sr"] }, + }, + style: "form", + explode: true, + }, + qp( + "xlang", + "Set to 1 to include transcripts that exist only in other languages.", + { type: "string", enum: ["1"] }, + ), + ], + responses: { + "200": { + description: "A page of meetings.", + content: jsonContent("MeetingsResponse"), + }, + }, + }, + }, + "/{locale}/meetings.txt": { + get: { + tags: ["meetings"], + summary: "Browse or search meetings (plain text)", + description: + "Same data as `meetings.json` in a compact, LLM-friendly plain-text " + + "table. Accepts the same query parameters.", + operationId: "listMeetingsText", + parameters: [localeParam], + responses: { + "200": { + description: "Plain-text meeting list.", + content: { "text/plain": { schema: { type: "string" } } }, + }, + }, + }, + }, + "/{locale}/{slug}.json": { + get: { + tags: ["meetings"], + summary: "Get a single meeting and its transcript (JSON)", + description: + "Returns the meeting metadata and transcript for a slug derived " + + "from a UN document symbol. The response has one of three shapes " + + "depending on transcript availability (completed, in-progress, or " + + "absent — see the schema).\n\n" + + "`slug` may contain slashes (it is a multi-segment path). Examples:\n" + + "- `sc/10175` — Security Council (S/PV.10175)\n" + + "- `ga/79/21` — General Assembly plenary (A/79/PV.21)\n" + + "- `ga/c1/79/21` — GA First Committee\n" + + "- `hrc/55/12` — Human Rights Council\n" + + "- `ecosoc/2024/30` — ECOSOC\n" + + "- `asset/k1a2b3c4` — permalink for videos without a document symbol", + operationId: "getMeeting", + parameters: [ + localeParam, + slugParam, + qp( + "language", + "Transcript language to return (defaults to the URL locale).", + { type: "string", enum: ["ar", "zh", "en", "fr", "ru", "es"] }, + ), + ], + responses: { + "200": { + description: "The meeting and (if available) its transcript.", + content: jsonContent("MeetingResponse"), + }, + "404": { + description: "Unknown locale, invalid slug, or video not found.", + content: jsonContent("DataApiError"), + }, + }, + }, + }, + "/{locale}/{slug}.txt": { + get: { + tags: ["meetings"], + summary: "Get a single meeting transcript (plain text)", + description: + "Plain-text rendering of the transcript with speaker headers and " + + "timecodes. See the `.json` variant for the slug grammar.", + operationId: "getMeetingText", + parameters: [localeParam, slugParam], + responses: { + "200": { + description: "Plain-text transcript.", + content: { "text/plain": { schema: { type: "string" } } }, + }, + "404": { + description: "Unknown locale, invalid slug, or video not found.", + content: jsonContent("DataApiError"), + }, + }, + }, + }, + "/api/languages": { + get: { + tags: ["discovery"], + summary: "List audio language tracks for a video", + description: + "Returns all six UN languages with a flag for whether an audio " + + "track exists and the current transcript status for each.", + operationId: "getLanguages", + parameters: [ + { + name: "kalturaId", + in: "query", + required: true, + description: "Kaltura player ID (the `kaltura_id` field).", + schema: { type: "string" }, + }, + ], + responses: { + "200": { + description: "Available languages and transcript statuses.", + content: jsonContent("LanguagesResponse"), + }, + "400": { + description: "Missing kalturaId.", + content: jsonContent("ApiError"), + }, + }, + }, + }, + "/api/pv": { + get: { + tags: ["discovery"], + summary: "Fetch a UN verbatim/summary record", + description: + "Fetches the official UN verbatim (PV) or summary (SR) record PDF " + + "for a document symbol, parses it to structured JSON, and caches " + + "it. This is the authoritative record (transcripts are not).", + operationId: "getPvDocument", + parameters: [ + { + name: "symbol", + in: "query", + required: true, + description: "UN document symbol, e.g. S/PV.10175.", + schema: { type: "string" }, + }, + { + name: "lang", + in: "query", + required: false, + description: "Document language (default en).", + schema: { + type: "string", + enum: ["ar", "zh", "en", "fr", "ru", "es"], + }, + }, + ], + responses: { + "200": { + description: "Parsed document.", + content: jsonContent("PvResponse"), + }, + "400": { + description: "Missing symbol.", + content: jsonContent("ApiError"), + }, + "404": { + description: "Document not found or not available.", + content: jsonContent("ApiError"), + }, + }, + }, + }, + "/api/health": { + get: { + tags: ["health"], + summary: "Service health check", + description: "Pings the database. 200 when healthy, 503 otherwise.", + operationId: "getHealth", + responses: { + "200": { + description: "Healthy.", + content: jsonContent("HealthResponse"), + }, + "503": { + description: "Database unreachable.", + content: jsonContent("HealthResponse"), + }, + }, + }, + }, + }, + components: { + schemas: Object.fromEntries( + Object.entries(COMPONENT_SCHEMAS).map(([name, schema]) => [ + name, + toComponent(schema), + ]), + ), + }, + }; +} + +/** Build a simple query parameter object. */ +function qp( + name: string, + description: string, + schema: Record, +) { + return { name, in: "query", required: false, description, schema }; +} + +const slugParam = { + name: "slug", + in: "path" as const, + required: true, + description: + "Meeting slug derived from a UN document symbol (may contain slashes), " + + "or `asset/{asset_id}` for videos without a symbol.", + schema: { type: "string" }, + examples: { + securityCouncil: { value: "sc/10175", summary: "Security Council" }, + generalAssembly: { value: "ga/79/21", summary: "GA plenary" }, + gaCommittee: { value: "ga/c1/79/21", summary: "GA First Committee" }, + humanRights: { value: "hrc/55/12", summary: "Human Rights Council" }, + ecosoc: { value: "ecosoc/2024/30", summary: "ECOSOC" }, + assetPermalink: { value: "asset/k1a2b3c4", summary: "Asset permalink" }, + }, +}; diff --git a/package.json b/package.json index baed34e..6d468dd 100644 --- a/package.json +++ b/package.json @@ -4,7 +4,8 @@ "private": true, "scripts": { "dev": "next dev --turbopack", - "build": "next build", + "build": "pnpm generate-openapi && next build", + "generate-openapi": "tsx scripts/generate-openapi.ts", "typecheck": "tsc --noEmit", "start": "next start", "test": "vitest run", diff --git a/proxy.ts b/proxy.ts index 68416dc..82c73b3 100644 --- a/proxy.ts +++ b/proxy.ts @@ -59,11 +59,12 @@ export default function middleware(req: NextRequest) { export const config = { // Two distinct matchers: // 1. The next-intl matcher: HTML pages without file extensions and - // excluding internal namespaces (api routes, _next, Sentry tunnel). + // excluding internal namespaces (api routes, _next, Sentry tunnel) and + // the locale-free /openapi API docs page. // 2. A second matcher for `.json` / `.txt` URLs under a locale prefix, // so this middleware can rewrite them to the data handler. matcher: [ - "/((?!api|_next|_vercel|monitoring|.*\\..*).*)", + "/((?!api|_next|_vercel|monitoring|openapi|.*\\..*).*)", "/(ar|zh|en|fr|ru|es)/(.*\\.(?:json|txt))", ], }; diff --git a/public/openapi.json b/public/openapi.json new file mode 100644 index 0000000..0a8acc2 --- /dev/null +++ b/public/openapi.json @@ -0,0 +1,1611 @@ +{ + "openapi": "3.0.3", + "info": { + "title": "UN Transcripts API", + "version": "1.0.0", + "description": "Public, read-only API for UN Web TV meeting records and automatically generated transcripts.\n\nMeeting URLs use human-readable slugs derived from official UN document symbols (S/PV.10175 → `sc/10175`, A/79/PV.21 → `ga/79/21`). Append `.json` or `.txt` to any meeting page URL to get the same content as data. Videos without a document symbol are addressable at `/{locale}/asset/{asset_id}`.\n\nTranscripts are produced by automatic speech recognition over real meeting audio — not official records. The authoritative record is the UN verbatim (PV) or summary (SR) document; see `GET /api/pv`." + }, + "servers": [ + { + "url": "/" + } + ], + "tags": [ + { + "name": "meetings", + "description": "Meeting records and transcripts." + }, + { + "name": "discovery", + "description": "Audio tracks and source documents." + }, + { + "name": "health", + "description": "Service status." + } + ], + "paths": { + "/{locale}/meetings.json": { + "get": { + "tags": [ + "meetings" + ], + "summary": "Browse or search meetings (JSON)", + "description": "Paginated list of meetings within the last 365 days, newest first. Supports full-text search and filtering. Page size is 250.", + "operationId": "listMeetings", + "parameters": [ + { + "name": "locale", + "in": "path", + "required": true, + "description": "One of the six official UN languages.", + "schema": { + "type": "string", + "enum": [ + "ar", + "zh", + "en", + "fr", + "ru", + "es" + ] + } + }, + { + "name": "q", + "in": "query", + "required": false, + "description": "Full-text search query (min 2 chars).", + "schema": { + "type": "string" + } + }, + { + "name": "category", + "in": "query", + "required": false, + "description": "Filter by WebTV category name.", + "schema": { + "type": "string" + } + }, + { + "name": "date", + "in": "query", + "required": false, + "description": "Filter to a single date (YYYY-MM-DD).", + "schema": { + "type": "string", + "pattern": "^\\d{4}-\\d{2}-\\d{2}$" + } + }, + { + "name": "sort", + "in": "query", + "required": false, + "description": "Sort order (default date_desc).", + "schema": { + "type": "string", + "enum": [ + "date_desc", + "date_asc", + "title_asc", + "title_desc" + ] + } + }, + { + "name": "offset", + "in": "query", + "required": false, + "description": "Pagination offset.", + "schema": { + "type": "integer", + "minimum": 0 + } + }, + { + "name": "text", + "in": "query", + "required": false, + "description": "Restrict to meetings that have the given document(s). Repeat to require multiple.", + "schema": { + "type": "array", + "items": { + "type": "string", + "enum": [ + "transcript", + "pv", + "sr" + ] + } + }, + "style": "form", + "explode": true + }, + { + "name": "xlang", + "in": "query", + "required": false, + "description": "Set to 1 to include transcripts that exist only in other languages.", + "schema": { + "type": "string", + "enum": [ + "1" + ] + } + } + ], + "responses": { + "200": { + "description": "A page of meetings.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/MeetingsResponse" + } + } + } + } + } + } + }, + "/{locale}/meetings.txt": { + "get": { + "tags": [ + "meetings" + ], + "summary": "Browse or search meetings (plain text)", + "description": "Same data as `meetings.json` in a compact, LLM-friendly plain-text table. Accepts the same query parameters.", + "operationId": "listMeetingsText", + "parameters": [ + { + "name": "locale", + "in": "path", + "required": true, + "description": "One of the six official UN languages.", + "schema": { + "type": "string", + "enum": [ + "ar", + "zh", + "en", + "fr", + "ru", + "es" + ] + } + } + ], + "responses": { + "200": { + "description": "Plain-text meeting list.", + "content": { + "text/plain": { + "schema": { + "type": "string" + } + } + } + } + } + } + }, + "/{locale}/{slug}.json": { + "get": { + "tags": [ + "meetings" + ], + "summary": "Get a single meeting and its transcript (JSON)", + "description": "Returns the meeting metadata and transcript for a slug derived from a UN document symbol. The response has one of three shapes depending on transcript availability (completed, in-progress, or absent — see the schema).\n\n`slug` may contain slashes (it is a multi-segment path). Examples:\n- `sc/10175` — Security Council (S/PV.10175)\n- `ga/79/21` — General Assembly plenary (A/79/PV.21)\n- `ga/c1/79/21` — GA First Committee\n- `hrc/55/12` — Human Rights Council\n- `ecosoc/2024/30` — ECOSOC\n- `asset/k1a2b3c4` — permalink for videos without a document symbol", + "operationId": "getMeeting", + "parameters": [ + { + "name": "locale", + "in": "path", + "required": true, + "description": "One of the six official UN languages.", + "schema": { + "type": "string", + "enum": [ + "ar", + "zh", + "en", + "fr", + "ru", + "es" + ] + } + }, + { + "name": "slug", + "in": "path", + "required": true, + "description": "Meeting slug derived from a UN document symbol (may contain slashes), or `asset/{asset_id}` for videos without a symbol.", + "schema": { + "type": "string" + }, + "examples": { + "securityCouncil": { + "value": "sc/10175", + "summary": "Security Council" + }, + "generalAssembly": { + "value": "ga/79/21", + "summary": "GA plenary" + }, + "gaCommittee": { + "value": "ga/c1/79/21", + "summary": "GA First Committee" + }, + "humanRights": { + "value": "hrc/55/12", + "summary": "Human Rights Council" + }, + "ecosoc": { + "value": "ecosoc/2024/30", + "summary": "ECOSOC" + }, + "assetPermalink": { + "value": "asset/k1a2b3c4", + "summary": "Asset permalink" + } + } + }, + { + "name": "language", + "in": "query", + "required": false, + "description": "Transcript language to return (defaults to the URL locale).", + "schema": { + "type": "string", + "enum": [ + "ar", + "zh", + "en", + "fr", + "ru", + "es" + ] + } + } + ], + "responses": { + "200": { + "description": "The meeting and (if available) its transcript.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/MeetingResponse" + } + } + } + }, + "404": { + "description": "Unknown locale, invalid slug, or video not found.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/DataApiError" + } + } + } + } + } + } + }, + "/{locale}/{slug}.txt": { + "get": { + "tags": [ + "meetings" + ], + "summary": "Get a single meeting transcript (plain text)", + "description": "Plain-text rendering of the transcript with speaker headers and timecodes. See the `.json` variant for the slug grammar.", + "operationId": "getMeetingText", + "parameters": [ + { + "name": "locale", + "in": "path", + "required": true, + "description": "One of the six official UN languages.", + "schema": { + "type": "string", + "enum": [ + "ar", + "zh", + "en", + "fr", + "ru", + "es" + ] + } + }, + { + "name": "slug", + "in": "path", + "required": true, + "description": "Meeting slug derived from a UN document symbol (may contain slashes), or `asset/{asset_id}` for videos without a symbol.", + "schema": { + "type": "string" + }, + "examples": { + "securityCouncil": { + "value": "sc/10175", + "summary": "Security Council" + }, + "generalAssembly": { + "value": "ga/79/21", + "summary": "GA plenary" + }, + "gaCommittee": { + "value": "ga/c1/79/21", + "summary": "GA First Committee" + }, + "humanRights": { + "value": "hrc/55/12", + "summary": "Human Rights Council" + }, + "ecosoc": { + "value": "ecosoc/2024/30", + "summary": "ECOSOC" + }, + "assetPermalink": { + "value": "asset/k1a2b3c4", + "summary": "Asset permalink" + } + } + } + ], + "responses": { + "200": { + "description": "Plain-text transcript.", + "content": { + "text/plain": { + "schema": { + "type": "string" + } + } + } + }, + "404": { + "description": "Unknown locale, invalid slug, or video not found.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/DataApiError" + } + } + } + } + } + } + }, + "/api/languages": { + "get": { + "tags": [ + "discovery" + ], + "summary": "List audio language tracks for a video", + "description": "Returns all six UN languages with a flag for whether an audio track exists and the current transcript status for each.", + "operationId": "getLanguages", + "parameters": [ + { + "name": "kalturaId", + "in": "query", + "required": true, + "description": "Kaltura player ID (the `kaltura_id` field).", + "schema": { + "type": "string" + } + } + ], + "responses": { + "200": { + "description": "Available languages and transcript statuses.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/LanguagesResponse" + } + } + } + }, + "400": { + "description": "Missing kalturaId.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ApiError" + } + } + } + } + } + } + }, + "/api/pv": { + "get": { + "tags": [ + "discovery" + ], + "summary": "Fetch a UN verbatim/summary record", + "description": "Fetches the official UN verbatim (PV) or summary (SR) record PDF for a document symbol, parses it to structured JSON, and caches it. This is the authoritative record (transcripts are not).", + "operationId": "getPvDocument", + "parameters": [ + { + "name": "symbol", + "in": "query", + "required": true, + "description": "UN document symbol, e.g. S/PV.10175.", + "schema": { + "type": "string" + } + }, + { + "name": "lang", + "in": "query", + "required": false, + "description": "Document language (default en).", + "schema": { + "type": "string", + "enum": [ + "ar", + "zh", + "en", + "fr", + "ru", + "es" + ] + } + } + ], + "responses": { + "200": { + "description": "Parsed document.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/PvResponse" + } + } + } + }, + "400": { + "description": "Missing symbol.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ApiError" + } + } + } + }, + "404": { + "description": "Document not found or not available.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ApiError" + } + } + } + } + } + } + }, + "/api/health": { + "get": { + "tags": [ + "health" + ], + "summary": "Service health check", + "description": "Pings the database. 200 when healthy, 503 otherwise.", + "operationId": "getHealth", + "responses": { + "200": { + "description": "Healthy.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HealthResponse" + } + } + } + }, + "503": { + "description": "Database unreachable.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HealthResponse" + } + } + } + } + } + } + } + }, + "components": { + "schemas": { + "MeetingsResponse": { + "type": "object", + "properties": { + "meetings": { + "type": "array", + "items": { + "type": "object", + "properties": { + "title": { + "type": "string", + "nullable": true + }, + "date": { + "type": "string", + "nullable": true + }, + "body": { + "type": "string", + "nullable": true + }, + "category": { + "type": "string", + "nullable": true + }, + "slug": { + "type": "string" + }, + "duration": { + "type": "number", + "nullable": true + }, + "hasTranscript": { + "type": "boolean" + }, + "pageUrl": { + "type": "string" + }, + "jsonUrl": { + "type": "string" + }, + "textUrl": { + "type": "string", + "nullable": true + } + }, + "required": [ + "title", + "date", + "body", + "category", + "slug", + "duration", + "hasTranscript", + "pageUrl", + "jsonUrl", + "textUrl" + ], + "additionalProperties": false + } + }, + "total": { + "type": "integer", + "minimum": -9007199254740991, + "maximum": 9007199254740991 + }, + "totalIncludingOther": { + "type": "integer", + "minimum": -9007199254740991, + "maximum": 9007199254740991, + "description": "Total ignoring the locale filter (see the xlang param)." + }, + "hasMore": { + "type": "boolean" + }, + "offset": { + "type": "integer", + "minimum": -9007199254740991, + "maximum": 9007199254740991 + }, + "pageSize": { + "type": "integer", + "minimum": -9007199254740991, + "maximum": 9007199254740991 + } + }, + "required": [ + "meetings", + "total", + "totalIncludingOther", + "hasMore", + "offset", + "pageSize" + ], + "additionalProperties": false + }, + "MeetingResponse": { + "anyOf": [ + { + "type": "object", + "properties": { + "disclaimer": { + "type": "string" + }, + "video": { + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "UN Web TV asset ID (the DB primary key)." + }, + "kaltura_id": { + "type": "string", + "nullable": true + }, + "title": { + "type": "string", + "nullable": true + }, + "clean_title": { + "type": "string", + "nullable": true + }, + "url": { + "type": "string", + "nullable": true + }, + "date": { + "type": "string", + "nullable": true + }, + "scheduled_time": { + "type": "string", + "nullable": true + }, + "status": { + "type": "string", + "nullable": true + }, + "duration": { + "type": "number", + "nullable": true + }, + "category": { + "type": "string", + "nullable": true + }, + "body": { + "type": "string", + "nullable": true, + "description": "UN organ / body name." + }, + "event_code": { + "type": "string", + "nullable": true + }, + "event_type": { + "type": "string", + "nullable": true + }, + "session_number": { + "type": "string", + "nullable": true + }, + "pv_symbol": { + "type": "string", + "nullable": true, + "description": "Official UN document symbol." + }, + "pv_part": { + "type": "string", + "nullable": true + }, + "slug": { + "type": "string", + "description": "Human-readable meeting slug, e.g. sc/10175." + } + }, + "required": [ + "id", + "kaltura_id", + "title", + "clean_title", + "url", + "date", + "scheduled_time", + "status", + "duration", + "category", + "body", + "event_code", + "event_type", + "session_number", + "pv_symbol", + "pv_part", + "slug" + ], + "additionalProperties": false + }, + "metadata": { + "type": "object", + "properties": { + "summary": { + "type": "string", + "nullable": true + }, + "description": { + "type": "string", + "nullable": true + }, + "categories": { + "type": "array", + "items": { + "type": "string" + } + }, + "geographic_subject": { + "type": "array", + "items": { + "type": "string" + } + }, + "subject_topical": { + "type": "array", + "items": { + "type": "string" + } + }, + "corporate_name": { + "type": "array", + "items": { + "type": "string" + } + }, + "speaker_affiliation": { + "type": "array", + "items": { + "type": "string" + } + }, + "related_documents": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": [ + "summary", + "description", + "categories", + "geographic_subject", + "subject_topical", + "corporate_name", + "speaker_affiliation", + "related_documents" + ], + "additionalProperties": false, + "description": "Structured metadata harvested from the UN Web TV page." + }, + "transcript": { + "type": "object", + "properties": { + "transcript_id": { + "type": "string" + }, + "language": { + "type": "string" + }, + "data": { + "type": "array", + "items": { + "type": "object", + "properties": { + "statement_number": { + "type": "integer", + "minimum": -9007199254740991, + "maximum": 9007199254740991, + "description": "1-based index of the statement." + }, + "paragraphs": { + "type": "array", + "items": { + "type": "object", + "properties": { + "sentences": { + "type": "array", + "items": { + "type": "object", + "properties": { + "text": { + "type": "string" + }, + "start": { + "type": "number", + "description": "Start time in seconds." + }, + "end": { + "type": "number", + "description": "End time in seconds." + }, + "topics": { + "type": "array", + "items": { + "type": "object", + "properties": { + "key": { + "type": "string" + }, + "label": { + "type": "string" + }, + "description": { + "type": "string" + } + }, + "required": [ + "key", + "label", + "description" + ], + "additionalProperties": false, + "description": "A substantive policy topic identified across the meeting." + } + }, + "words": { + "description": "Per-word timing; present only when word-level timing exists.", + "type": "array", + "items": { + "type": "object", + "properties": { + "text": { + "type": "string" + }, + "start": { + "type": "number", + "description": "Start time in seconds." + }, + "end": { + "type": "number", + "description": "End time in seconds." + } + }, + "required": [ + "text", + "start", + "end" + ], + "additionalProperties": false + } + } + }, + "required": [ + "text", + "start", + "end", + "topics" + ], + "additionalProperties": false + } + } + }, + "required": [ + "sentences" + ], + "additionalProperties": false + } + }, + "speaker": { + "type": "object", + "properties": { + "name": { + "type": "string", + "nullable": true + }, + "affiliation": { + "type": "string", + "nullable": true, + "description": "ISO alpha-3 country code or short affiliation label." + }, + "affiliation_full": { + "type": "string", + "nullable": true, + "description": "Official UN country name resolved from the ISO code." + }, + "group": { + "type": "string", + "nullable": true + }, + "function": { + "type": "string", + "nullable": true + } + }, + "required": [ + "name", + "affiliation", + "affiliation_full", + "group", + "function" + ], + "additionalProperties": false, + "description": "Resolved speaker for a statement." + } + }, + "required": [ + "statement_number", + "paragraphs", + "speaker" + ], + "additionalProperties": false + } + }, + "topics": { + "type": "array", + "items": { + "type": "object", + "properties": { + "key": { + "type": "string" + }, + "label": { + "type": "string" + }, + "description": { + "type": "string" + } + }, + "required": [ + "key", + "label", + "description" + ], + "additionalProperties": false, + "description": "A substantive policy topic identified across the meeting." + } + } + }, + "required": [ + "transcript_id", + "language", + "data", + "topics" + ], + "additionalProperties": false + } + }, + "required": [ + "disclaimer", + "video", + "metadata", + "transcript" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "disclaimer": { + "type": "string" + }, + "video": { + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "UN Web TV asset ID (the DB primary key)." + }, + "kaltura_id": { + "type": "string", + "nullable": true + }, + "title": { + "type": "string", + "nullable": true + }, + "clean_title": { + "type": "string", + "nullable": true + }, + "url": { + "type": "string", + "nullable": true + }, + "date": { + "type": "string", + "nullable": true + }, + "scheduled_time": { + "type": "string", + "nullable": true + }, + "status": { + "type": "string", + "nullable": true + }, + "duration": { + "type": "number", + "nullable": true + }, + "category": { + "type": "string", + "nullable": true + }, + "body": { + "type": "string", + "nullable": true, + "description": "UN organ / body name." + }, + "event_code": { + "type": "string", + "nullable": true + }, + "event_type": { + "type": "string", + "nullable": true + }, + "session_number": { + "type": "string", + "nullable": true + }, + "pv_symbol": { + "type": "string", + "nullable": true, + "description": "Official UN document symbol." + }, + "pv_part": { + "type": "string", + "nullable": true + }, + "slug": { + "type": "string", + "description": "Human-readable meeting slug, e.g. sc/10175." + } + }, + "required": [ + "id", + "kaltura_id", + "title", + "clean_title", + "url", + "date", + "scheduled_time", + "status", + "duration", + "category", + "body", + "event_code", + "event_type", + "session_number", + "pv_symbol", + "pv_part", + "slug" + ], + "additionalProperties": false + }, + "metadata": { + "type": "object", + "properties": { + "summary": { + "type": "string", + "nullable": true + }, + "description": { + "type": "string", + "nullable": true + }, + "categories": { + "type": "array", + "items": { + "type": "string" + } + }, + "geographic_subject": { + "type": "array", + "items": { + "type": "string" + } + }, + "subject_topical": { + "type": "array", + "items": { + "type": "string" + } + }, + "corporate_name": { + "type": "array", + "items": { + "type": "string" + } + }, + "speaker_affiliation": { + "type": "array", + "items": { + "type": "string" + } + }, + "related_documents": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": [ + "summary", + "description", + "categories", + "geographic_subject", + "subject_topical", + "corporate_name", + "speaker_affiliation", + "related_documents" + ], + "additionalProperties": false, + "description": "Structured metadata harvested from the UN Web TV page." + }, + "transcript": { + "nullable": true + }, + "message": { + "type": "string" + } + }, + "required": [ + "disclaimer", + "video", + "metadata", + "transcript", + "message" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "disclaimer": { + "type": "string" + }, + "video": { + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "UN Web TV asset ID (the DB primary key)." + }, + "kaltura_id": { + "type": "string", + "nullable": true + }, + "title": { + "type": "string", + "nullable": true + }, + "clean_title": { + "type": "string", + "nullable": true + }, + "url": { + "type": "string", + "nullable": true + }, + "date": { + "type": "string", + "nullable": true + }, + "scheduled_time": { + "type": "string", + "nullable": true + }, + "status": { + "type": "string", + "nullable": true + }, + "duration": { + "type": "number", + "nullable": true + }, + "category": { + "type": "string", + "nullable": true + }, + "body": { + "type": "string", + "nullable": true, + "description": "UN organ / body name." + }, + "event_code": { + "type": "string", + "nullable": true + }, + "event_type": { + "type": "string", + "nullable": true + }, + "session_number": { + "type": "string", + "nullable": true + }, + "pv_symbol": { + "type": "string", + "nullable": true, + "description": "Official UN document symbol." + }, + "pv_part": { + "type": "string", + "nullable": true + }, + "slug": { + "type": "string", + "description": "Human-readable meeting slug, e.g. sc/10175." + } + }, + "required": [ + "id", + "kaltura_id", + "title", + "clean_title", + "url", + "date", + "scheduled_time", + "status", + "duration", + "category", + "body", + "event_code", + "event_type", + "session_number", + "pv_symbol", + "pv_part", + "slug" + ], + "additionalProperties": false + }, + "metadata": { + "type": "object", + "properties": { + "summary": { + "type": "string", + "nullable": true + }, + "description": { + "type": "string", + "nullable": true + }, + "categories": { + "type": "array", + "items": { + "type": "string" + } + }, + "geographic_subject": { + "type": "array", + "items": { + "type": "string" + } + }, + "subject_topical": { + "type": "array", + "items": { + "type": "string" + } + }, + "corporate_name": { + "type": "array", + "items": { + "type": "string" + } + }, + "speaker_affiliation": { + "type": "array", + "items": { + "type": "string" + } + }, + "related_documents": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": [ + "summary", + "description", + "categories", + "geographic_subject", + "subject_topical", + "corporate_name", + "speaker_affiliation", + "related_documents" + ], + "additionalProperties": false, + "description": "Structured metadata harvested from the UN Web TV page." + }, + "transcript": { + "type": "object", + "properties": { + "status": { + "type": "string", + "description": "Current pipeline stage." + }, + "transcriptId": { + "type": "string" + } + }, + "required": [ + "status", + "transcriptId" + ], + "additionalProperties": false + }, + "message": { + "type": "string" + } + }, + "required": [ + "disclaimer", + "video", + "metadata", + "transcript", + "message" + ], + "additionalProperties": false + } + ], + "description": "One of three shapes depending on transcript availability: completed (full transcript), in-progress (status only), or absent (null)." + }, + "LanguagesResponse": { + "type": "object", + "properties": { + "entryId": { + "type": "string", + "description": "Canonical Kaltura entry ID." + }, + "languages": { + "type": "array", + "items": { + "type": "object", + "properties": { + "code": { + "type": "string", + "enum": [ + "ar", + "zh", + "en", + "fr", + "ru", + "es" + ], + "description": "ISO 639-1 code of one of the six official UN languages." + }, + "name": { + "type": "string" + }, + "available": { + "type": "boolean", + "description": "Whether an audio track exists for this language." + }, + "transcriptStatus": { + "type": "string", + "nullable": true, + "description": "Transcription status, or null if none exists." + } + }, + "required": [ + "code", + "name", + "available", + "transcriptStatus" + ], + "additionalProperties": false + } + } + }, + "required": [ + "entryId", + "languages" + ], + "additionalProperties": false + }, + "PvResponse": { + "type": "object", + "properties": { + "symbol": { + "type": "string" + }, + "body": { + "type": "string" + }, + "session": { + "type": "string" + }, + "meetingNumber": { + "type": "string" + }, + "date": { + "type": "string" + }, + "location": { + "type": "string" + }, + "language": { + "type": "string" + }, + "status": { + "type": "string", + "enum": [ + "provisional", + "official" + ] + }, + "president": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "country": { + "type": "string" + } + }, + "required": [ + "name", + "country" + ], + "additionalProperties": false, + "nullable": true + }, + "members": { + "type": "array", + "items": { + "type": "object", + "properties": { + "country": { + "type": "string" + }, + "representative": { + "type": "string" + } + }, + "required": [ + "country", + "representative" + ], + "additionalProperties": false + } + }, + "agendaItems": { + "type": "array", + "items": { + "type": "string" + } + }, + "turns": { + "type": "array", + "items": { + "type": "object", + "properties": { + "speaker": { + "type": "string" + }, + "affiliation": { + "type": "string" + }, + "spokenLanguage": { + "type": "string" + }, + "onBehalfOf": { + "type": "string" + }, + "paragraphNumber": { + "type": "number" + }, + "paragraphs": { + "type": "array", + "items": { + "type": "string" + } + }, + "type": { + "type": "string", + "enum": [ + "speech", + "procedural" + ] + }, + "proceduralParagraphs": { + "type": "array", + "items": { + "type": "number" + } + } + }, + "required": [ + "speaker", + "paragraphs", + "type" + ], + "additionalProperties": false + } + }, + "fullText": { + "type": "string" + } + }, + "required": [ + "symbol", + "body", + "session", + "meetingNumber", + "date", + "location", + "language", + "status", + "president", + "members", + "agendaItems", + "turns", + "fullText" + ], + "additionalProperties": false, + "description": "Parsed UN verbatim (PV) or summary (SR) record." + }, + "HealthResponse": { + "type": "object", + "properties": { + "status": { + "type": "string", + "enum": [ + "ok", + "error" + ] + } + }, + "required": [ + "status" + ], + "additionalProperties": false + }, + "DataApiError": { + "type": "object", + "properties": { + "error": { + "type": "string" + } + }, + "required": [ + "error" + ], + "additionalProperties": false, + "description": "Error response from the meeting data API." + }, + "ApiError": { + "type": "object", + "properties": { + "error": { + "type": "object", + "properties": { + "code": { + "type": "string" + }, + "message": { + "type": "string" + } + }, + "required": [ + "code", + "message" + ], + "additionalProperties": false + } + }, + "required": [ + "error" + ], + "additionalProperties": false, + "description": "Structured error response from /api/* routes." + } + } + } +} diff --git a/scripts/generate-openapi.ts b/scripts/generate-openapi.ts new file mode 100644 index 0000000..bf6352a --- /dev/null +++ b/scripts/generate-openapi.ts @@ -0,0 +1,12 @@ +import { writeFileSync } from "fs"; +import { join } from "path"; +import { buildSpec } from "../lib/openapi/spec"; + +// Regenerates public/openapi.json from the Zod schemas. Wired into `prebuild` +// so the spec stays in sync on every `pnpm build`; run directly via +// `pnpm generate-openapi`. + +const spec = buildSpec(); +const out = join(process.cwd(), "public", "openapi.json"); +writeFileSync(out, JSON.stringify(spec, null, 2) + "\n"); +console.log(`Wrote ${out}`); From b868b88c5f9d0004b9ae304b741796dec3f31ea9 Mon Sep 17 00:00:00 2001 From: kleinlennart <31488915+kleinlennart@users.noreply.github.com> Date: Mon, 22 Jun 2026 22:13:47 +0200 Subject: [PATCH 2/3] add api docstrings --- app/api/auth/logout/route.ts | 1 + app/api/auth/me/route.ts | 1 + app/api/auth/request-link/route.ts | 1 + app/api/auth/verify/route.ts | 1 + app/api/cron/check-pv/route.ts | 1 + app/api/cron/liveness-sweep/route.ts | 1 + app/api/cron/process-scheduled/route.ts | 1 + app/api/cron/realign/route.ts | 1 + .../send-transcript-notifications/route.ts | 1 + app/api/cron/sync-videos/route.ts | 1 + .../data/[locale]/[format]/[...path]/route.ts | 31 +++---- app/api/health/route.ts | 1 + app/api/languages/route.ts | 1 + app/api/og/meeting/[...slug]/route.tsx | 89 +++++++++---------- app/api/pv/align/route.ts | 1 + app/api/pv/route.ts | 1 + app/api/speakers/statements/route.ts | 1 + app/api/subscriptions/feed/route.ts | 1 + app/api/subscriptions/route.ts | 1 + app/api/subscriptions/video/route.ts | 1 + app/api/transcripts/[id]/analysis/route.ts | 1 + app/api/transcripts/[id]/route.ts | 1 + app/api/transcripts/[id]/words/route.ts | 1 + app/api/transcripts/check/route.ts | 1 + app/api/transcripts/route.ts | 1 + app/api/videos/route.ts | 1 + app/api/waitlist/route.ts | 1 + 27 files changed, 85 insertions(+), 60 deletions(-) diff --git a/app/api/auth/logout/route.ts b/app/api/auth/logout/route.ts index 1415362..1ed1b1e 100644 --- a/app/api/auth/logout/route.ts +++ b/app/api/auth/logout/route.ts @@ -1,3 +1,4 @@ +// Clears the current user session (logout). import { NextResponse } from "next/server"; import { clearSession } from "@/lib/auth/service"; diff --git a/app/api/auth/me/route.ts b/app/api/auth/me/route.ts index 0529d78..5177d93 100644 --- a/app/api/auth/me/route.ts +++ b/app/api/auth/me/route.ts @@ -1,3 +1,4 @@ +// Returns the authenticated user's email and feature flags. import { NextResponse } from "next/server"; import { getCurrentUser } from "@/lib/auth/service"; diff --git a/app/api/auth/request-link/route.ts b/app/api/auth/request-link/route.ts index 54d6c37..c6a67d5 100644 --- a/app/api/auth/request-link/route.ts +++ b/app/api/auth/request-link/route.ts @@ -1,3 +1,4 @@ +// Sends a magic-link login email to the given address. import { NextResponse } from "next/server"; import { getTranslations } from "next-intl/server"; import { sendMagicLink } from "@/lib/auth/mail"; diff --git a/app/api/auth/verify/route.ts b/app/api/auth/verify/route.ts index 21be00d..8b2f09a 100644 --- a/app/api/auth/verify/route.ts +++ b/app/api/auth/verify/route.ts @@ -1,3 +1,4 @@ +// Verifies a magic-link token and creates an authenticated session. import { NextResponse } from "next/server"; import { revalidatePath } from "next/cache"; import { getTranslations } from "next-intl/server"; diff --git a/app/api/cron/check-pv/route.ts b/app/api/cron/check-pv/route.ts index f381f01..857f566 100644 --- a/app/api/cron/check-pv/route.ts +++ b/app/api/cron/check-pv/route.ts @@ -1,3 +1,4 @@ +// Cron: checks for newly available PV documents on recent meetings. import { NextRequest, NextResponse } from "next/server"; import { runCheckPv } from "@/lib/cron/check-pv"; import { apiError } from "@/lib/api-error"; diff --git a/app/api/cron/liveness-sweep/route.ts b/app/api/cron/liveness-sweep/route.ts index 1a2fe2a..7e479f6 100644 --- a/app/api/cron/liveness-sweep/route.ts +++ b/app/api/cron/liveness-sweep/route.ts @@ -1,3 +1,4 @@ +// Cron: marks heartbeat-stale transcript rows as interrupted. import { NextRequest, NextResponse } from "next/server"; import { runLivenessSweep } from "@/lib/cron/liveness-sweep"; import { apiError } from "@/lib/api-error"; diff --git a/app/api/cron/process-scheduled/route.ts b/app/api/cron/process-scheduled/route.ts index 253ab08..ce148fd 100644 --- a/app/api/cron/process-scheduled/route.ts +++ b/app/api/cron/process-scheduled/route.ts @@ -1,3 +1,4 @@ +// Cron: starts transcription for scheduled and interrupted transcript rows. import { NextRequest, NextResponse } from "next/server"; import { runProcessScheduled } from "@/lib/cron/process-scheduled"; import { apiError } from "@/lib/api-error"; diff --git a/app/api/cron/realign/route.ts b/app/api/cron/realign/route.ts index c7174d8..bbac9d2 100644 --- a/app/api/cron/realign/route.ts +++ b/app/api/cron/realign/route.ts @@ -1,3 +1,4 @@ +// Cron: recalculates timestamp offsets for re-cut videos. import { NextRequest, NextResponse } from "next/server"; import { runRealign } from "@/lib/cron/realign"; import { apiError } from "@/lib/api-error"; diff --git a/app/api/cron/send-transcript-notifications/route.ts b/app/api/cron/send-transcript-notifications/route.ts index b3e8b29..6f72af0 100644 --- a/app/api/cron/send-transcript-notifications/route.ts +++ b/app/api/cron/send-transcript-notifications/route.ts @@ -1,3 +1,4 @@ +// Cron: emails subscribers when a requested transcript is ready. import { NextRequest, NextResponse } from "next/server"; import { runSendTranscriptNotifications } from "@/lib/cron/send-transcript-notifications"; import { apiError } from "@/lib/api-error"; diff --git a/app/api/cron/sync-videos/route.ts b/app/api/cron/sync-videos/route.ts index 10ff73c..122c181 100644 --- a/app/api/cron/sync-videos/route.ts +++ b/app/api/cron/sync-videos/route.ts @@ -1,3 +1,4 @@ +// Cron: scrapes UN Web TV and upserts meeting records. import { NextRequest, NextResponse } from "next/server"; import { runSyncVideos } from "@/lib/cron/sync-videos"; import { apiError } from "@/lib/api-error"; diff --git a/app/api/data/[locale]/[format]/[...path]/route.ts b/app/api/data/[locale]/[format]/[...path]/route.ts index 869245b..fbdff6d 100644 --- a/app/api/data/[locale]/[format]/[...path]/route.ts +++ b/app/api/data/[locale]/[format]/[...path]/route.ts @@ -1,30 +1,31 @@ +// Public data API: meeting list and single-meeting detail as JSON or plain text. import { routing } from "@/i18n/routing"; import { - getCachedTranscriptedEntries, - getCachedTranscriptedEntriesByLanguage, + getCachedTranscriptedEntries, + getCachedTranscriptedEntriesByLanguage, } from "@/lib/cached-db"; import { compressedJson, compressedText } from "@/lib/compressed-json"; import { TRANSCRIPT_DISCLAIMER } from "@/lib/config"; import { getCountryName } from "@/lib/country-lookup"; import { - getTranscriptByKalturaId, - getVideoByAssetId, - getVideoByCitation, - queryVideos, - type VideoRecord, - type VideosQueryParams, + getTranscriptByKalturaId, + getVideoByAssetId, + getVideoByCitation, + queryVideos, + type VideoRecord, + type VideosQueryParams, } from "@/lib/db"; import { symbolFromSlug } from "@/lib/meeting-slug"; import { - formatSpeakerInfo, - getSpeakerMapping, - SpeakerInfo, + formatSpeakerInfo, + getSpeakerMapping, + SpeakerInfo, } from "@/lib/speakers"; import { - buildSpeakerSegments, - formatSpeakerText, - formatTimecode, - formatTranscriptAsPlainText, + buildSpeakerSegments, + formatSpeakerText, + formatTimecode, + formatTranscriptAsPlainText, } from "@/lib/transcript-formatting"; import { getVideoMetadata, recordToVideo } from "@/lib/un-api"; import { videoUrl } from "@/lib/video-url"; diff --git a/app/api/health/route.ts b/app/api/health/route.ts index 3a46731..0109fb4 100644 --- a/app/api/health/route.ts +++ b/app/api/health/route.ts @@ -1,3 +1,4 @@ +// Database ping; returns 200 ok or 503 error. import { pool } from "@/lib/db"; export async function GET() { diff --git a/app/api/languages/route.ts b/app/api/languages/route.ts index fded4bc..7216fb6 100644 --- a/app/api/languages/route.ts +++ b/app/api/languages/route.ts @@ -1,3 +1,4 @@ +// Lists available audio language tracks and transcript status for a video. import { NextRequest, NextResponse } from "next/server"; import { getAvailableAudioLanguages } from "@/lib/transcription"; import { getTranscriptLanguagesByKalturaId } from "@/lib/db"; diff --git a/app/api/og/meeting/[...slug]/route.tsx b/app/api/og/meeting/[...slug]/route.tsx index 85c0019..b70412a 100644 --- a/app/api/og/meeting/[...slug]/route.tsx +++ b/app/api/og/meeting/[...slug]/route.tsx @@ -1,9 +1,10 @@ -import { ImageResponse } from "next/og"; -import { getTranslations } from "next-intl/server"; +// Generates an Open Graph image for a meeting page. +import type { VideoRecord } from "@/lib/db"; import { getVideoByAssetId, getVideoByCitation } from "@/lib/db"; import { symbolFromSlug } from "@/lib/meeting-slug"; import { OgHeader, getOgFonts } from "@/lib/og"; -import type { VideoRecord } from "@/lib/db"; +import { getTranslations } from "next-intl/server"; +import { ImageResponse } from "next/og"; async function resolveVideo(slug: string): Promise { if (slug.startsWith("asset/")) { @@ -62,66 +63,64 @@ export async function GET( const metaParts = [category, date].filter(Boolean); return new ImageResponse( - ( +
+
-
+ {title} +
+ {metaParts.length > 0 && (
- {title} + {metaParts.join(" · ")}
- {metaParts.length > 0 && ( -
- {metaParts.join(" · ")} -
- )} -
+ )}
- ), + , { ...SIZE, fonts }, ); } diff --git a/app/api/pv/align/route.ts b/app/api/pv/align/route.ts index 5ddd4d7..2c6e85e 100644 --- a/app/api/pv/align/route.ts +++ b/app/api/pv/align/route.ts @@ -1,3 +1,4 @@ +// Aligns a parsed PV document with audio to produce per-turn timestamps. import { NextRequest, NextResponse } from "next/server"; import { getPVContent, savePVContent } from "@/lib/db"; import { getKalturaAudioUrl } from "@/lib/transcription"; diff --git a/app/api/pv/route.ts b/app/api/pv/route.ts index 9a6eb9e..dfca9b7 100644 --- a/app/api/pv/route.ts +++ b/app/api/pv/route.ts @@ -1,3 +1,4 @@ +// Fetches and parses a UN PV or SR document PDF to structured JSON. import { NextRequest, NextResponse } from "next/server"; import { getPVContent, savePVContent } from "@/lib/db"; import { fetchPVDocument } from "@/lib/pv-documents"; diff --git a/app/api/speakers/statements/route.ts b/app/api/speakers/statements/route.ts index 2a007e9..951de31 100644 --- a/app/api/speakers/statements/route.ts +++ b/app/api/speakers/statements/route.ts @@ -1,3 +1,4 @@ +// Returns paginated statements attributed to a speaker entity. import { NextRequest, NextResponse } from "next/server"; import { requireExperimental } from "@/lib/auth/require-experimental"; import { apiError } from "@/lib/api-error"; diff --git a/app/api/subscriptions/feed/route.ts b/app/api/subscriptions/feed/route.ts index ff34093..cf9873f 100644 --- a/app/api/subscriptions/feed/route.ts +++ b/app/api/subscriptions/feed/route.ts @@ -1,3 +1,4 @@ +// Adds or removes a category feed subscription for the current user. import { NextRequest, NextResponse } from "next/server"; import { requireUser } from "@/lib/auth/require-user"; import { diff --git a/app/api/subscriptions/route.ts b/app/api/subscriptions/route.ts index 33fa9e2..1634f44 100644 --- a/app/api/subscriptions/route.ts +++ b/app/api/subscriptions/route.ts @@ -1,3 +1,4 @@ +// Returns the current user's feed and video subscription state. import { NextRequest, NextResponse } from "next/server"; import { getCurrentUser } from "@/lib/auth/service"; import { diff --git a/app/api/subscriptions/video/route.ts b/app/api/subscriptions/video/route.ts index ac2583a..f3d99c9 100644 --- a/app/api/subscriptions/video/route.ts +++ b/app/api/subscriptions/video/route.ts @@ -1,3 +1,4 @@ +// Adds or removes a per-video transcript subscription for the current user. import { NextRequest, NextResponse } from "next/server"; import { requireUser } from "@/lib/auth/require-user"; import { addVideoSubscription, removeVideoSubscription } from "@/lib/db"; diff --git a/app/api/transcripts/[id]/analysis/route.ts b/app/api/transcripts/[id]/analysis/route.ts index 30faa56..066dd27 100644 --- a/app/api/transcripts/[id]/analysis/route.ts +++ b/app/api/transcripts/[id]/analysis/route.ts @@ -1,3 +1,4 @@ +// Runs on-demand proposition analysis on a completed transcript. import { NextRequest, NextResponse } from "next/server"; import { AzureOpenAI } from "openai"; import { analyzePropositions } from "@/lib/pipeline"; diff --git a/app/api/transcripts/[id]/route.ts b/app/api/transcripts/[id]/route.ts index 5c557b3..fc616f6 100644 --- a/app/api/transcripts/[id]/route.ts +++ b/app/api/transcripts/[id]/route.ts @@ -1,3 +1,4 @@ +// Polls transcript pipeline status and returns the result when complete. import { NextRequest, NextResponse } from "next/server"; import { createHash } from "crypto"; import { pollTranscription } from "@/lib/transcription"; diff --git a/app/api/transcripts/[id]/words/route.ts b/app/api/transcripts/[id]/words/route.ts index fe4b9f1..b2e8595 100644 --- a/app/api/transcripts/[id]/words/route.ts +++ b/app/api/transcripts/[id]/words/route.ts @@ -1,3 +1,4 @@ +// Returns word-level timestamps for a completed transcript. import { NextRequest } from "next/server"; import { getTranscriptByIdForDisplay } from "@/lib/db"; import { apiError } from "@/lib/api-error"; diff --git a/app/api/transcripts/check/route.ts b/app/api/transcripts/check/route.ts index 3d2d79a..d9316ae 100644 --- a/app/api/transcripts/check/route.ts +++ b/app/api/transcripts/check/route.ts @@ -1,3 +1,4 @@ +// Checks for an existing transcript by Kaltura ID and language. import { NextRequest } from "next/server"; import { getActiveTranscriptByKalturaId, diff --git a/app/api/transcripts/route.ts b/app/api/transcripts/route.ts index 92597cc..77ed2f8 100644 --- a/app/api/transcripts/route.ts +++ b/app/api/transcripts/route.ts @@ -1,3 +1,4 @@ +// Starts or schedules a new transcription for a video. import { NextRequest, NextResponse } from "next/server"; import { getTranscriptByKalturaId, diff --git a/app/api/videos/route.ts b/app/api/videos/route.ts index c9a8516..a90a624 100644 --- a/app/api/videos/route.ts +++ b/app/api/videos/route.ts @@ -1,3 +1,4 @@ +// Paginated video list with transcript availability flags. import { NextRequest, NextResponse } from "next/server"; import { queryVideos, type VideosQueryParams } from "@/lib/db"; import { diff --git a/app/api/waitlist/route.ts b/app/api/waitlist/route.ts index f39b5d8..dc3a4d0 100644 --- a/app/api/waitlist/route.ts +++ b/app/api/waitlist/route.ts @@ -1,3 +1,4 @@ +// Records experimental-features waitlist interest for the current user. import { NextResponse } from "next/server"; import { requireUser } from "@/lib/auth/require-user"; import { setExperimentalWaitlist } from "@/lib/auth/service"; From e8406926e48a0d627eff4def0596fd1458b97be9 Mon Sep 17 00:00:00 2001 From: kleinlennart <31488915+kleinlennart@users.noreply.github.com> Date: Mon, 22 Jun 2026 22:41:23 +0200 Subject: [PATCH 3/3] improve api --- .../data/[locale]/[format]/[...path]/route.ts | 8 +++++++ app/llms-full.txt/route.ts | 13 ++++++++-- app/llms.txt/route.ts | 1 + lib/openapi/schemas.ts | 12 +++++++++- lib/openapi/spec.ts | 16 ++++++++++--- public/openapi.json | 24 +++++++++++++++++-- 6 files changed, 66 insertions(+), 8 deletions(-) diff --git a/app/api/data/[locale]/[format]/[...path]/route.ts b/app/api/data/[locale]/[format]/[...path]/route.ts index fbdff6d..ed1ed9d 100644 --- a/app/api/data/[locale]/[format]/[...path]/route.ts +++ b/app/api/data/[locale]/[format]/[...path]/route.ts @@ -377,6 +377,12 @@ async function handleList( const dateRaw = sp.get("date"); const date = dateRaw && /^\d{4}-\d{2}-\d{2}$/.test(dateRaw) ? dateRaw : undefined; + const fromRaw = sp.get("from"); + const dateFrom = + fromRaw && /^\d{4}-\d{2}-\d{2}$/.test(fromRaw) ? fromRaw : undefined; + const toRaw = sp.get("to"); + const dateTo = + toRaw && /^\d{4}-\d{2}-\d{2}$/.test(toRaw) ? toRaw : undefined; const docs = sp .getAll("text") .filter((d) => ["transcript", "pv", "sr"].includes(d)); @@ -394,6 +400,8 @@ async function handleList( q, daysBack: LIST_DAYS_BACK, date, + dateFrom, + dateTo, category: sp.get("category") || undefined, docs: docs.length ? docs : undefined, sort: { by, dir }, diff --git a/app/llms-full.txt/route.ts b/app/llms-full.txt/route.ts index bf9b764..d36cf1b 100644 --- a/app/llms-full.txt/route.ts +++ b/app/llms-full.txt/route.ts @@ -44,10 +44,12 @@ Returns a paginated list of UN meetings matching the given filters. Covers the l |-----------|------|-------------| | \`q\` | string | Search meeting titles and metadata (not transcript content). Min 2 characters. | | \`category\` | string | Filter by meeting category. | -| \`date\` | YYYY-MM-DD | Filter to a specific date. | +| \`date\` | YYYY-MM-DD | Filter to a specific date. Mutually exclusive with \`from\`/\`to\`. | +| \`from\` | YYYY-MM-DD | Inclusive start of a date range. | +| \`to\` | YYYY-MM-DD | Inclusive end of a date range. | | \`sort\` | enum | \`date_desc\` (default), \`date_asc\`, \`title_asc\`, \`title_desc\` | | \`offset\`| integer | Pagination offset. Results come in chunks of 250. | -| \`text\` | string (multi) | Filter by available documents: \`transcript\`, \`pv\` (verbatim record), \`sr\` (summary record). | +| \`text\` | string (multi) | Filter by document type: \`transcript\` = has automatic transcript; \`pv\` = has official verbatim record; \`sr\` = has official summary record. Use \`text=transcript\` to exclude meetings with no content. | | \`xlang\` | \`1\` | Include meetings not yet available in the URL locale (default: hide them). | ### Response shape @@ -233,6 +235,13 @@ Closed or confidential meetings are not covered (they are not recorded on Web TV - **Time window**: search and browse cover the last 365 days, matching the website homepage. - **Transcript accuracy**: these are automatic speech recognition outputs, not official records. Names, abbreviations, and document symbols may be misheard. Accuracy varies by speaker and microphone quality. - **Languages**: six UN languages are supported (en, fr, es, ar, zh, ru). Not every meeting has transcripts in all languages — it depends on which audio tracks are available. + +--- + +## Machine-readable spec + +OpenAPI 3.0 spec: \`GET /openapi.json\` +Interactive reference: \`/openapi\` `; export function GET() { diff --git a/app/llms.txt/route.ts b/app/llms.txt/route.ts index 3277001..a34472c 100644 --- a/app/llms.txt/route.ts +++ b/app/llms.txt/route.ts @@ -22,6 +22,7 @@ The locale prefix in the URL (\`/en\`, \`/fr\`, \`/ar\`, \`/zh\`, \`/ru\`, \`/es - Read transcript (text): \`GET /en/{slug}.txt\` — plain-text transcript with speaker labels, compact for LLM context. - Read transcript (JSON): \`GET /en/{slug}.json\` — structured JSON with timestamps, speakers, topics, and optional word-level timing. - [Full API reference](/llms-full.txt): detailed query parameters, response shapes, and known limitations. +- [OpenAPI spec](/openapi.json): machine-readable OpenAPI 3.0 spec (interactive UI at /openapi). ## Meeting URL scheme diff --git a/lib/openapi/schemas.ts b/lib/openapi/schemas.ts index 9306c65..c6e4d42 100644 --- a/lib/openapi/schemas.ts +++ b/lib/openapi/schemas.ts @@ -31,7 +31,17 @@ export const MeetingsQuerySchema = z.object({ .string() .regex(/^\d{4}-\d{2}-\d{2}$/) .optional() - .describe("Filter to a single date (YYYY-MM-DD)."), + .describe("Filter to a single date (YYYY-MM-DD). Mutually exclusive with from/to."), + from: z + .string() + .regex(/^\d{4}-\d{2}-\d{2}$/) + .optional() + .describe("Inclusive start of a date range (YYYY-MM-DD)."), + to: z + .string() + .regex(/^\d{4}-\d{2}-\d{2}$/) + .optional() + .describe("Inclusive end of a date range (YYYY-MM-DD)."), sort: z .enum(["date_desc", "date_asc", "title_asc", "title_desc"]) .optional() diff --git a/lib/openapi/spec.ts b/lib/openapi/spec.ts index 732bc33..8bd6bda 100644 --- a/lib/openapi/spec.ts +++ b/lib/openapi/spec.ts @@ -163,7 +163,15 @@ export function buildSpec(): Record { qp("category", "Filter by WebTV category name.", { type: "string", }), - qp("date", "Filter to a single date (YYYY-MM-DD).", { + qp("date", "Filter to a single date (YYYY-MM-DD). Mutually exclusive with from/to.", { + type: "string", + pattern: "^\\d{4}-\\d{2}-\\d{2}$", + }), + qp("from", "Inclusive start of a date range (YYYY-MM-DD).", { + type: "string", + pattern: "^\\d{4}-\\d{2}-\\d{2}$", + }), + qp("to", "Inclusive end of a date range (YYYY-MM-DD).", { type: "string", pattern: "^\\d{4}-\\d{2}-\\d{2}$", }), @@ -180,8 +188,10 @@ export function buildSpec(): Record { in: "query", required: false, description: - "Restrict to meetings that have the given document(s). " + - "Repeat to require multiple.", + "Filter by available document type. `transcript` = has an automatic transcript; " + + "`pv` = has an official verbatim record; `sr` = has an official summary record. " + + "Repeat to require multiple (e.g. `text=transcript&text=pv`). " + + "Use `text=transcript` to exclude meetings with no content to read.", schema: { type: "array", items: { type: "string", enum: ["transcript", "pv", "sr"] }, diff --git a/public/openapi.json b/public/openapi.json index 0a8acc2..6df039b 100644 --- a/public/openapi.json +++ b/public/openapi.json @@ -73,7 +73,27 @@ "name": "date", "in": "query", "required": false, - "description": "Filter to a single date (YYYY-MM-DD).", + "description": "Filter to a single date (YYYY-MM-DD). Mutually exclusive with from/to.", + "schema": { + "type": "string", + "pattern": "^\\d{4}-\\d{2}-\\d{2}$" + } + }, + { + "name": "from", + "in": "query", + "required": false, + "description": "Inclusive start of a date range (YYYY-MM-DD).", + "schema": { + "type": "string", + "pattern": "^\\d{4}-\\d{2}-\\d{2}$" + } + }, + { + "name": "to", + "in": "query", + "required": false, + "description": "Inclusive end of a date range (YYYY-MM-DD).", "schema": { "type": "string", "pattern": "^\\d{4}-\\d{2}-\\d{2}$" @@ -108,7 +128,7 @@ "name": "text", "in": "query", "required": false, - "description": "Restrict to meetings that have the given document(s). Repeat to require multiple.", + "description": "Filter by available document type. `transcript` = has an automatic transcript; `pv` = has an official verbatim record; `sr` = has an official summary record. Repeat to require multiple (e.g. `text=transcript&text=pv`). Use `text=transcript` to exclude meetings with no content to read.", "schema": { "type": "array", "items": {