diff --git a/components/ballotquestions/CommitteeHearing.test.tsx b/components/ballotquestions/CommitteeHearing.test.tsx
index 9777da7c5..ae41805e5 100644
--- a/components/ballotquestions/CommitteeHearing.test.tsx
+++ b/components/ballotquestions/CommitteeHearing.test.tsx
@@ -31,7 +31,11 @@ describe("CommitteeHearing", () => {
   })
 
   it("shows hearing context copy", () => {
-    render(<CommitteeHearing hearing={{ id: "1", startsAt: FUTURE_MS }} />)
+    render(
+      <CommitteeHearing
+        hearing={{ id: "1", startsAt: FUTURE_MS, videoURLs: [] }}
+      />
+    )
     expect(screen.getByText("Committee Hearing")).toBeInTheDocument()
     expect(
       screen.getByText("Committee hearings are public meetings.")
@@ -39,13 +43,19 @@ describe("CommitteeHearing", () => {
   })
 
   it("formats the hearing date", () => {
-    render(<CommitteeHearing hearing={{ id: "1", startsAt: PAST_MS }} />)
+    render(
+      <CommitteeHearing
+        hearing={{ id: "1", startsAt: PAST_MS, videoURLs: [] }}
+      />
+    )
     expect(screen.getByText(/December 14, 2025/)).toBeInTheDocument()
   })
 
   it("shows a hearing page link when an id is present", () => {
     render(
-      <CommitteeHearing hearing={{ id: "hearing-1", startsAt: PAST_MS }} />
+      <CommitteeHearing
+        hearing={{ id: "hearing-1", startsAt: PAST_MS, videoURLs: [] }}
+      />
     )
     expect(
       screen.getByRole("link", { name: /Open hearing page/i })
@@ -53,7 +63,11 @@ describe("CommitteeHearing", () => {
   })
 
   it("hides the hearing page link when no hearing id is available", () => {
-    render(<CommitteeHearing hearing={{ id: "", startsAt: PAST_MS }} />)
+    render(
+      <CommitteeHearing
+        hearing={{ id: "", startsAt: PAST_MS, videoURLs: [] }}
+      />
+    )
     expect(screen.queryByRole("link")).not.toBeInTheDocument()
   })
 })
diff --git a/components/ballotquestions/types.ts b/components/ballotquestions/types.ts
index cdc9e0b9f..b27cecb87 100644
--- a/components/ballotquestions/types.ts
+++ b/components/ballotquestions/types.ts
@@ -1,6 +1,6 @@
 export type Hearing = {
   id: string
-  videoURL?: string
+  videoURLs: string[]
   startsAt: number // milliseconds since epoch, converted from Firestore Timestamp server-side
 }
 
diff --git a/docs/ballot-questions-frontend.md b/docs/ballot-questions-frontend.md
index 5e9ee756a..ff0cede3d 100644
--- a/docs/ballot-questions-frontend.md
+++ b/docs/ballot-questions-frontend.md
@@ -166,15 +166,15 @@ For each relevant hearing, display:
 
 - **Status**: "Occurred" if `hearing.content.startsAt` is in the past, "Scheduled" if in the future
 - **Date**: formatted from `hearing.content.startsAt`
-- **Watch link**: "Watch the committee hearing here." linked to `hearing.videoURL` — hidden if no video
+- **Watch link**: "Watch the committee hearing here." linked to `hearing.videoURLs` — hidden if no videos
 
 Since ballot questions are always under SJ42 and typically have one hearing, render a single hearing block. If there are multiple, render them in reverse chronological order (most recent first).
 
 **Hearing data model recap:**
 
 - `bill.hearingIds?: string[]` — event IDs; doc path is `/events/hearing-{id}`
-- `bill.nextHearingAt?: Timestamp` — convenience field for upcoming hearing only (not sufficient alone — we need date + videoURL from the full document)
-- `hearing.videoURL?: string` — link for the "Watch" CTA
+- `bill.nextHearingAt?: Timestamp` — convenience field for upcoming hearing only (not sufficient alone — we need date + videoURLs from the full document)
+- `hearing.videoURLs: string[]` — link for the "Watch" CTA
 - `hearing.content.startsAt` — determines "Occurred" vs. "Scheduled" status
 
 No new components are needed for hearing display — build a simple `CommitteeHearing` component local to `components/ballotquestions/`.
diff --git a/functions/src/bills/updateBillReferences.test.ts b/functions/src/bills/updateBillReferences.test.ts
index 04e7bb762..7e16873d5 100644
--- a/functions/src/bills/updateBillReferences.test.ts
+++ b/functions/src/bills/updateBillReferences.test.ts
@@ -13,6 +13,8 @@ function createHearing(
     type: "hearing",
     startsAt,
     fetchedAt: Timestamp.fromMillis(Date.now()),
+    videos: [],
+    transcriptionIds: [],
     content: {
       EventId: 1,
       EventDate: "2026-02-01T10:00:00",
diff --git a/functions/src/events/AssemblyAIHandler.ts b/functions/src/events/AssemblyAIHandler.ts
new file mode 100644
index 000000000..6e57318b5
--- /dev/null
+++ b/functions/src/events/AssemblyAIHandler.ts
@@ -0,0 +1,410 @@
+import {
+  AssemblyAI,
+  Transcript,
+  TranscriptParagraph,
+  TranscriptUtterance,
+  TranscriptWord
+} from "assemblyai"
+import { db, storage } from "../firebase"
+import { randomBytes } from "node:crypto"
+import { sha256 } from "js-sha256"
+import ffmpeg from "fluent-ffmpeg"
+import fs from "fs"
+
+abstract class AssemblyAIHandlerBase {
+  abstract submitTranscription({
+    EventId,
+    videoUrl,
+    bucketName
+  }: {
+    EventId: number
+    videoUrl: string
+    bucketName?: string
+  }): Promise<string>
+
+  abstract getTranscript(transcript_id: string): Promise<Transcript>
+  abstract fetchParagraphs(
+    transcript_id: string
+  ): Promise<TranscriptParagraph[]>
+}
+
+export class AssemblyAIHandler extends AssemblyAIHandlerBase {
+  assembly: AssemblyAI
+
+  constructor({ apiKey }: { apiKey: string }) {
+    super()
+    this.assembly = new AssemblyAI({
+      apiKey
+    })
+  }
+
+  async submitTranscription({
+    EventId,
+    videoUrl,
+    bucketName
+  }: {
+    EventId: number
+    videoUrl: string
+    bucketName?: string
+  }): Promise<string> {
+    const newToken = randomBytes(16).toString("hex")
+    const audioUrl = await extractAudioFromVideo(EventId, videoUrl, bucketName)
+
+    const transcript = await this.assembly.transcripts.submit({
+      audio:
+        // test with: "https://assemblyaiusercontent.com/playground/aKUqpEtmYmI.flac",
+        audioUrl,
+      webhook_url:
+        // make sure process.env.FUNCTIONS_API_BASE equals
+        // https://us-central1-digital-testimony-prod.cloudfunctions.net
+        // on prod. test with:
+        // "https://ngrokid.ngrok-free.app/demo-dtp/us-central1/transcription",
+        `${process.env.FUNCTIONS_API_BASE}/transcription`,
+      speaker_labels: true,
+      webhook_auth_header_name: "x-maple-webhook",
+      webhook_auth_header_value: newToken
+    })
+
+    await db
+      .collection("events")
+      .doc(`hearing-${String(EventId)}`)
+      .collection("private")
+      .doc(transcript.id)
+      .set({
+        videoAssemblyWebhookToken: sha256(newToken)
+      })
+
+    return transcript.id
+  }
+
+  async getTranscript(transcript_id: string): Promise<Transcript> {
+    return await this.assembly.transcripts.get(transcript_id)
+  }
+
+  async fetchParagraphs(transcript_id: string): Promise<TranscriptParagraph[]> {
+    return (await this.assembly.transcripts.paragraphs(transcript_id))
+      .paragraphs
+  }
+}
+
+export class AssemblyAIHandlerDummy extends AssemblyAIHandlerBase {
+  async submitTranscription({
+    EventId,
+    videoUrl,
+    bucketName
+  }: {
+    EventId: number
+    videoUrl: string
+    bucketName?: string
+  }): Promise<string> {
+    const token = randomBytes(16).toString("hex")
+    const transcriptionId = `mock_${Math.random().toString(36).slice(2)}`
+
+    setTimeout(async () => {
+      const transcript: any = await this.getTranscript(transcriptionId)
+      transcript["transcript_id"] = transcript.id
+      await fetch("http://localhost:5001/demo-dtp/us-central1/transcription", {
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          "x-maple-webhook": token
+        },
+        body: JSON.stringify(transcript)
+      })
+    }, 10000)
+
+    await db
+      .collection("events")
+      .doc(`hearing-${String(EventId)}`)
+      .collection("private")
+      .doc(transcriptionId)
+      .set({
+        videoAssemblyWebhookToken: sha256(token)
+      })
+
+    return transcriptionId
+  }
+
+  async getTranscript(transcriptId: string): Promise<Transcript> {
+    return getTranscript(transcriptId).transcript
+  }
+
+  async fetchParagraphs(transcriptId: string): Promise<TranscriptParagraph[]> {
+    return getTranscript(transcriptId).paragraphs
+  }
+}
+
+const extractAudioFromVideo = async (
+  EventId: number,
+  videoUrl: string,
+  bucketName?: string
+): Promise<string> => {
+  const tmpFilePath = `/tmp/hearing-${EventId}-${Date.now()}.m4a`
+
+  // Stream directly from URL and copy audio codec
+  await new Promise<void>((resolve, reject) => {
+    ffmpeg(videoUrl)
+      .noVideo()
+      .audioCodec("copy")
+      .format("mp4")
+      .on("start", commandLine => {
+        console.log(`Spawned FFmpeg with command: ${commandLine}`)
+      })
+      .on("end", () => {
+        console.log("FFmpeg processing finished successfully")
+        resolve()
+      })
+      .on("error", err => {
+        console.error("FFmpeg error:", err)
+        reject(err)
+      })
+      .save(tmpFilePath)
+  })
+
+  // Upload the audio file
+  const bucket = bucketName ? storage.bucket(bucketName) : storage.bucket()
+  const audioFileName = `hearing-${EventId}-${Date.now()}.m4a`
+  const file = bucket.file(audioFileName)
+
+  const fileContent = await fs.promises.readFile(tmpFilePath)
+  await file.save(fileContent, {
+    metadata: {
+      contentType: "audio/mp4"
+    }
+  })
+
+  // Clean up temporary file
+  await fs.promises.unlink(tmpFilePath)
+
+  const [url] = await file.getSignedUrl({
+    action: "read",
+    expires: Date.now() + 24 * 60 * 60 * 1000
+  })
+
+  // Delete old files
+  const [files] = await bucket.getFiles({
+    prefix: "hearing-",
+    maxResults: 1000
+  })
+  const oneDayAgo = Date.now() - 24 * 60 * 60 * 1000
+  const oldFiles = files.filter(file => {
+    const timestamp = parseInt(file.name.split("-").pop()?.split(".")[0] || "0")
+    return timestamp < oneDayAgo
+  })
+  await Promise.all(oldFiles.map(file => file.delete()))
+
+  // Return the new audio url
+  return url
+}
+
+export const submitTranscription = async ({
+  EventId,
+  maybeVideoUrl,
+  bucketName
+}: {
+  EventId: number
+  maybeVideoUrl: string
+  bucketName?: string
+}) => {
+  const assembly = new AssemblyAI({
+    apiKey: process.env.ASSEMBLY_API_KEY ? process.env.ASSEMBLY_API_KEY : ""
+  })
+
+  const newToken = randomBytes(16).toString("hex")
+  const audioUrl = await extractAudioFromVideo(
+    EventId,
+    maybeVideoUrl,
+    bucketName
+  )
+
+  const transcript = await assembly.transcripts.submit({
+    audio:
+      // test with: "https://assemblyaiusercontent.com/playground/aKUqpEtmYmI.flac",
+      audioUrl,
+    webhook_url:
+      // make sure process.env.FUNCTIONS_API_BASE equals
+      // https://us-central1-digital-testimony-prod.cloudfunctions.net
+      // on prod. test with:
+      // "https://ngrokid.ngrok-free.app/demo-dtp/us-central1/transcription",
+      `${process.env.FUNCTIONS_API_BASE}/transcription`,
+    speaker_labels: true,
+    webhook_auth_header_name: "x-maple-webhook",
+    webhook_auth_header_value: newToken
+  })
+
+  await db
+    .collection("events")
+    .doc(`hearing-${String(EventId)}`)
+    .collection("private")
+    .doc("webhookAuth")
+    .set({
+      videoAssemblyWebhookToken: sha256(newToken)
+    })
+
+  return transcript.id
+}
+
+const WORD_BANK = [
+  "lorem",
+  "ipsum",
+  "dolor",
+  "sit",
+  "amet",
+  "consectetur",
+  "adipiscing",
+  "elit",
+  "sed",
+  "do",
+  "eiusmod",
+  "tempor",
+  "incididunt",
+  "ut",
+  "labore",
+  "et",
+  "dolore",
+  "magna",
+  "aliqua"
+]
+
+const SPEAKERS = ["A", "B", "C"]
+
+function randomInt(min: number, max: number) {
+  return Math.floor(Math.random() * (max - min + 1)) + min
+}
+
+function randomFloat(min: number, max: number, precision = 2) {
+  return Number((Math.random() * (max - min) + min).toFixed(precision))
+}
+
+function mean(values: number[]) {
+  return values.reduce((a, b) => a + b, 0) / values.length
+}
+
+function loremSentence(length: number) {
+  return Array.from({ length }, () => {
+    return WORD_BANK[randomInt(0, WORD_BANK.length - 1)]
+  })
+}
+
+function loremParagraph(length: number) {
+  return Array.from({ length }, () => loremSentence(randomInt(3, 10)))
+}
+
+// paragraphs -> sentences -> words
+function loremTranscriptStructure() {
+  return Array.from({ length: randomInt(10, 20) }, () =>
+    loremParagraph(randomInt(3, 8))
+  )
+}
+
+export function getTranscript(transcript_id: string): {
+  transcript: Transcript
+  paragraphs: TranscriptParagraph[]
+} {
+  const structure = loremTranscriptStructure()
+
+  const utterances: TranscriptUtterance[] = []
+  const paragraphs: TranscriptParagraph[] = []
+  const allWords: TranscriptWord[] = []
+
+  let currentTime = 0
+
+  for (const paragraph of structure) {
+    const speaker = SPEAKERS[randomInt(0, SPEAKERS.length - 1)]
+
+    const paragraphWords: TranscriptWord[] = []
+
+    for (const sentence of paragraph) {
+      const sentenceWords: TranscriptWord[] = []
+
+      for (const token of sentence) {
+        const confidence = randomFloat(0.5, 0.99)
+
+        const word: TranscriptWord = {
+          confidence,
+          start: Number(currentTime.toFixed(2)),
+          end: Number((currentTime + 1).toFixed(2)),
+          speaker,
+          text: token
+        }
+
+        sentenceWords.push(word)
+        paragraphWords.push(word)
+        allWords.push(word)
+
+        currentTime += 300
+      }
+
+      const utterance: TranscriptUtterance = {
+        confidence: Number(
+          mean(sentenceWords.map(w => w.confidence)).toFixed(2)
+        ),
+        start: sentenceWords[0].start,
+        end: sentenceWords[sentenceWords.length - 1].end,
+        speaker,
+        text: sentenceWords.map(w => w.text).join(" "),
+        words: sentenceWords
+      }
+
+      utterances.push(utterance)
+
+      currentTime += randomInt(100, 3000)
+    }
+
+    const transcriptParagraph: TranscriptParagraph = {
+      confidence: Number(
+        mean(paragraphWords.map(w => w.confidence)).toFixed(2)
+      ),
+      start: paragraphWords[0].start,
+      end: paragraphWords[paragraphWords.length - 1].end,
+      text: paragraphWords.map(w => w.text).join(" "),
+      words: paragraphWords
+    }
+
+    paragraphs.push(transcriptParagraph)
+
+    currentTime += randomInt(500, 7000)
+  }
+
+  const transcript: Transcript = {
+    acoustic_model: "no",
+    audio_url: "https://example.com/definitely-a-video",
+    auto_highlights: false,
+    id: transcript_id,
+    language_confidence: 0.95,
+    language_confidence_threshold: 0.03,
+    language_model: "no",
+    speech_model: null,
+    redact_pii: true,
+    status: "completed",
+    summarization: false,
+    webhook_auth: true,
+    webhook_auth_header_name: "x-maple-webhook",
+
+    text: utterances.map(u => u.text).join(". "),
+    confidence: Number(mean(allWords.map(w => w.confidence)).toFixed(2)),
+
+    utterances,
+    words: allWords
+  }
+
+  return {
+    transcript,
+    paragraphs
+  }
+}
+
+let assemblyInstance: AssemblyAIHandler | AssemblyAIHandlerDummy | undefined
+
+export function assemblyAI(): AssemblyAIHandler | AssemblyAIHandlerDummy {
+  if (!assemblyInstance) {
+    const apiKey = process.env.ASSEMBLY_API_KEY
+    if (!apiKey || apiKey === "test-api-key") {
+      console.log("AssemblyAI is faked for this emulator")
+      assemblyInstance = new AssemblyAIHandlerDummy()
+    } else {
+      assemblyInstance = new AssemblyAIHandler({ apiKey })
+    }
+  }
+  return assemblyInstance
+}
diff --git a/functions/src/events/EventScraper.ts b/functions/src/events/EventScraper.ts
new file mode 100644
index 000000000..3f7e62850
--- /dev/null
+++ b/functions/src/events/EventScraper.ts
@@ -0,0 +1,222 @@
+import { RuntimeOptions, runWith } from "firebase-functions/v1"
+import { DateTime } from "luxon"
+import { logFetchError } from "../common"
+import { db, Timestamp } from "../firebase"
+import * as api from "../malegislature"
+import {
+  BaseEvent,
+  BaseEventContent,
+  Session,
+  SessionContent,
+  SpecialEvent,
+  SpecialEventContent
+} from "./types"
+import { currentGeneralCourt } from "../shared"
+
+export abstract class EventScraper<ListItem, Event extends BaseEvent> {
+  private schedule
+  private timeout
+  private memory
+  private pastEventCutoff
+
+  constructor(
+    schedule: string,
+    timeout: number,
+    {
+      memory = "256MB",
+      pastEventCutoff = { days: 8 }
+    }: {
+      memory?: RuntimeOptions["memory"]
+      pastEventCutoff?: Duration
+    } = {}
+  ) {
+    this.schedule = schedule
+    this.timeout = timeout
+    this.memory = memory
+    this.pastEventCutoff = pastEventCutoff
+  }
+
+  get function() {
+    return runWith({
+      timeoutSeconds: this.timeout,
+      secrets: ["ASSEMBLY_API_KEY"],
+      memory: this.memory,
+      maxInstances: 1
+    })
+      .pubsub.schedule(this.schedule)
+      .onRun(() => this.run())
+  }
+
+  abstract listEvents(): Promise<ListItem[]>
+  abstract getEvent(item: ListItem): Promise<Event>
+
+  private async run() {
+    const list = await this.listEvents().catch(logFetchError("event list"))
+
+    if (!list) return
+
+    const writer = db.bulkWriter()
+    const upcomingOrRecentCutoff = DateTime.now().minus(this.pastEventCutoff)
+
+    for (let item of list) {
+      const id = (item as any)?.EventId,
+        event = await this.getEvent(item).catch(logFetchError("event", id))
+
+      if (!event) continue
+      if (event.startsAt.toMillis() < upcomingOrRecentCutoff.toMillis()) break
+
+      writer.set(db.doc(`/events/${event.id}`), event, { merge: true })
+
+      console.log("event in run()", event)
+    }
+
+    await writer.close()
+  }
+
+  /** Parse the event start time in the time zone of the API. */
+  getEventStart(content: { EventDate: string; StartTime: string }) {
+    const { year, month, day } = DateTime.fromISO(content.EventDate, {
+      zone: api.timeZone
+    })
+    const { hour, minute, second, millisecond } = DateTime.fromISO(
+      content.StartTime,
+      { zone: api.timeZone }
+    )
+    const startsAt = DateTime.fromObject(
+      { year, month, day, hour, minute, second, millisecond },
+      { zone: api.timeZone }
+    )
+    return startsAt
+  }
+
+  /** Return timestamps shared between event types. */
+  timestamps(content: BaseEventContent) {
+    const startsAt = this.getEventStart(content)
+    return {
+      fetchedAt: Timestamp.now(),
+      startsAt: Timestamp.fromMillis(startsAt.toMillis())
+    }
+  }
+}
+
+export class SpecialEventsScraper extends EventScraper<
+  SpecialEventContent,
+  SpecialEvent
+> {
+  constructor() {
+    super("every 60 minutes", 540)
+  }
+
+  async listEvents() {
+    const events = await api.getSpecialEvents()
+    return events.filter(SpecialEventContent.guard)
+  }
+
+  getEvent(content: SpecialEventContent) {
+    const event: SpecialEvent = {
+      id: `specialEvent-${content.EventId}`,
+      type: "specialEvent",
+      content,
+      ...this.timestamps(content)
+    }
+    return Promise.resolve(event)
+  }
+}
+
+export class SessionScraper extends EventScraper<SessionContent, Session> {
+  private court = currentGeneralCourt
+
+  constructor() {
+    super("every 60 minutes", 120)
+  }
+
+  async listEvents() {
+    const events = await api.getSessions(this.court)
+    return events.filter(SessionContent.guard)
+  }
+
+  getEvent(content: SessionContent) {
+    const event: Session = {
+      id: `session-${this.court}-${content.EventId}`,
+      type: "session",
+      content,
+      ...this.timestamps(content)
+    }
+    return Promise.resolve(event)
+  }
+}
+
+export abstract class EventPostProcessor<ListItem> {
+  private schedule
+  private timeout
+  private eventType
+  private memory
+  private pastEventBeginProcessing
+  private pastEventCutoff
+
+  constructor(
+    schedule: string,
+    timeout: number,
+    eventType: string,
+    {
+      memory = "256MB",
+      pastEventBeginProcessing = {},
+      pastEventCutoff = { days: 8 }
+    }: {
+      memory?: RuntimeOptions["memory"]
+      pastEventBeginProcessing?: Duration
+      pastEventCutoff?: Duration
+    } = {}
+  ) {
+    this.schedule = schedule
+    this.timeout = timeout
+    this.eventType = eventType
+    this.memory = memory
+    this.pastEventBeginProcessing = pastEventBeginProcessing
+    this.pastEventCutoff = pastEventCutoff
+  }
+
+  get function() {
+    return runWith({
+      timeoutSeconds: this.timeout,
+      secrets: ["ASSEMBLY_API_KEY"],
+      memory: this.memory,
+      maxInstances: 1
+    })
+      .pubsub.schedule(this.schedule)
+      .onRun(() => this.run())
+  }
+
+  abstract updateIf(data: FirebaseFirestore.DocumentData): null | ListItem
+  abstract getUpdate(item: ListItem): any
+
+  private async run() {
+    const writer = db.bulkWriter()
+
+    const now = DateTime.now()
+    const begin = now.minus(this.pastEventBeginProcessing).toJSDate()
+    const cutoff = now.minus(this.pastEventCutoff).toJSDate()
+
+    const snapshot = await db
+      .collection("events")
+      .where("type", "==", this.eventType)
+      .where("startsAt", "<=", begin)
+      .where("startsAt", ">=", cutoff)
+      .get()
+
+    if (snapshot.empty) return
+
+    for (const doc of snapshot.docs) {
+      const data = doc.data()
+      if (!data) continue
+      const item = this.updateIf(data)
+      if (!item) continue
+
+      writer.update(doc.ref, await this.getUpdate(item))
+
+      console.log("event in run()", data)
+    }
+
+    await writer.close()
+  }
+}
diff --git a/functions/src/events/HearingScraper.ts b/functions/src/events/HearingScraper.ts
new file mode 100644
index 000000000..99383f187
--- /dev/null
+++ b/functions/src/events/HearingScraper.ts
@@ -0,0 +1,271 @@
+import { JSDOM } from "jsdom"
+import { db, Timestamp } from "../firebase"
+import * as api from "../malegislature"
+import { Hearing, HearingContent, HearingListItem, Video } from "./types"
+import { isValidVideoUrl } from "./helpers"
+import { Committee } from "../committees/types"
+import { EventPostProcessor, EventScraper } from "./EventScraper"
+import { assemblyAI } from "./AssemblyAIHandler"
+
+const loadCommitteeChairNames = async (
+  generalCourtNumber: number,
+  committeeCode: string
+) => {
+  try {
+    const committeeSnap = await db
+      .collection(`generalCourts/${generalCourtNumber}/committees`)
+      .doc(committeeCode)
+      .get()
+
+    if (!committeeSnap.exists) return [] as string[]
+
+    const { members, content } = Committee.check(committeeSnap.data())
+    const chairCodes = new Set<string>()
+    const maybeHouse = content.HouseChairperson?.MemberCode
+    const maybeSenate = content.SenateChairperson?.MemberCode
+
+    if (maybeHouse) chairCodes.add(maybeHouse)
+    if (maybeSenate) chairCodes.add(maybeSenate)
+    return (members ?? [])
+      .filter(member => chairCodes.has(member.id))
+      .map(member => member.name)
+  } catch (error) {
+    console.warn(
+      `Failed to load committee chairs for ${committeeCode} (${generalCourtNumber}):`,
+      error
+    )
+    return [] as string[]
+  }
+}
+
+export class HearingScraper extends EventScraper<HearingListItem, Hearing> {
+  constructor() {
+    super("every 60 minutes", 480)
+  }
+
+  async listEvents() {
+    const events = await api.listHearings()
+    return events.filter(HearingListItem.guard)
+  }
+
+  async getEvent({ EventId }: HearingListItem /* e.g. 4962 */) {
+    const data = await api.getHearing(EventId)
+    const content = HearingContent.check(data)
+
+    const host = content.HearingHost
+    const committeeChairs =
+      host?.CommitteeCode && host?.GeneralCourtNumber
+        ? await loadCommitteeChairNames(
+            host.GeneralCourtNumber,
+            host.CommitteeCode
+          )
+        : []
+
+    return {
+      id: `hearing-${EventId}`,
+      type: "hearing",
+      content,
+      committeeChairs,
+      videos: [],
+      transcriptionIds: [],
+      ...this.timestamps(content)
+    } as Hearing
+  }
+}
+
+function removeCommonWords(strings: string[]) {
+  if (!strings.length) return []
+
+  // Normalize whitespace and split into words
+  const wordLists = strings.map(s => s.trim().replace(/\s+/g, " ").split(" "))
+
+  let prefixLen = 0
+  while (
+    wordLists.every(
+      words =>
+        prefixLen < words.length &&
+        words[prefixLen].toLowerCase() === wordLists[0][prefixLen].toLowerCase()
+    )
+  ) {
+    prefixLen++
+  }
+
+  let suffixLen = 0
+  while (
+    wordLists.every(
+      words =>
+        suffixLen < words.length - prefixLen &&
+        words[words.length - 1 - suffixLen].toLowerCase() ===
+          wordLists[0][wordLists[0].length - 1 - suffixLen].toLowerCase()
+    )
+  ) {
+    suffixLen++
+  }
+
+  return wordLists.map(words =>
+    words.slice(prefixLen, words.length - suffixLen).join(" ")
+  )
+}
+
+export class HearingPostProcessor extends EventPostProcessor<HearingListItem> {
+  constructor() {
+    super("every 60 minutes", 480, "hearing", { memory: "4GB" })
+  }
+
+  async getHearingVideos(
+    EventId: number
+  ): Promise<Omit<Video, "transcriptionId">[]> {
+    const hearingErr = `An error collecting videos for hearing ${EventId} (webpage format changed?)`
+
+    const req = await fetch(
+      `https://malegislature.gov/Events/Hearings/Detail/${EventId}`
+    )
+    const res = await req.text()
+    if (!res) throw new Error(`${hearingErr}: No response for request`)
+    const dom = new JSDOM(res)
+    if (!dom)
+      throw new Error(`${hearingErr}: Could not create JSDOM of request`)
+
+    const videoElements = [].slice.call(
+      dom.window.document.querySelectorAll("#playWebcast")
+    ) as Element[]
+    if (videoElements.length === 0) return []
+    const videoURLs = videoElements.map(elem => {
+      const onclick = elem.getAttribute("onclick")
+      if (!onclick) throw new Error(`${hearingErr}: No onclick in ${elem}`)
+      const match = onclick.match(/switchVideo\('([^']+)'/)
+      if (!match || match.length < 2)
+        throw new Error(`${hearingErr}: Could not match switchVideo in ${elem}`)
+      if (!isValidVideoUrl(match[1]))
+        throw new Error(`${hearingErr}: ${match[1]} is not a valid video url`)
+      return match[1]
+    })
+    const tbody = videoElements[0].closest("tbody")
+    if (!tbody)
+      throw new Error(
+        `${hearingErr}: Could not find parent tbody of #playWebcast`
+      )
+    const titles = Array.from(tbody.querySelectorAll("tr")).map(tr => {
+      const item = tr.querySelector("td")?.textContent?.trim()
+      if (!item)
+        throw new Error(`${hearingErr}: Could not locate title in ${tr}`)
+      return item
+    })
+    if (titles.length !== videoURLs.length)
+      throw new Error(
+        `${hearingErr}: Number of video table rows did not equal number of #playWebcast elements`
+      )
+
+    let videos = videoURLs.map((url, i) => {
+      return {
+        url: url,
+        title: titles[i]
+      }
+    })
+
+    let seen = new Set()
+    videos = videos.filter(item => {
+      if (seen.has(item.url)) return false
+      seen.add(item.url)
+      return true
+    })
+
+    if (videos.length > 1) {
+      const order = videos.map(item => {
+        const title = item.title.toLowerCase()
+        const match = title.match(
+          /\b(?:(\d+)\s+of\s+\d+|part\s+(\d+)|pt\.?\s+(\d+))\b/
+        )
+        if (!match) return -1
+        const part = parseInt(match[1] || match[2] || match[3], 10)
+        return part - 1
+      })
+      seen.clear()
+      let validOrder = true
+      for (const n of order) {
+        if (n < 0 || n >= order.length || seen.has(n)) {
+          validOrder = false
+          break
+        }
+        seen.add(n)
+      }
+      if (validOrder) {
+        const reordered = new Array(videos.length)
+        for (let i = 0; i < order.length; i++) {
+          reordered[order[i]] = videos[i]
+        }
+        videos = reordered
+        videos = videos.map((item, index) => {
+          item.title = `Part ${index + 1}`
+          return item
+        })
+      } else {
+        let shortTitles = removeCommonWords(titles)
+        if (shortTitles[0].length === 0) {
+          shortTitles = shortTitles.map((_, i) => `Part ${i + 1}`)
+        }
+        videos = videos.map((item, index) => {
+          item.title = shortTitles[index]
+          return item
+        })
+        console.log(
+          `Ordering not possible for hearing ${EventId} - fallback titles are ${JSON.stringify(
+            shortTitles
+          )}`
+        )
+      }
+    } else {
+      videos[0].title = `hearing-${EventId}`
+    }
+    return videos
+  }
+
+  updateIf(data: FirebaseFirestore.DocumentData): null | HearingListItem {
+    if (data.videos.length) return null
+    return { EventId: data.content.EventId }
+  }
+
+  async getUpdate(
+    { EventId }: HearingListItem,
+    existingVideos?: Video[]
+  ): Promise<{
+    transcriptionIds: string[]
+    videos: Video[]
+    videosFetchedAt: Timestamp
+  }> {
+    const videos = await this.getHearingVideos(EventId)
+
+    const prevURLs = existingVideos
+      ? Object.fromEntries(
+          existingVideos.map(({ url, transcriptionId }) => [
+            url,
+            transcriptionId
+          ])
+        )
+      : {}
+
+    const transcriptionIds = await Promise.all(
+      videos.map(item => {
+        return prevURLs[item.url] !== undefined
+          ? prevURLs[item.url]
+          : assemblyAI().submitTranscription({
+              EventId,
+              videoUrl: item.url
+            })
+      })
+    )
+
+    const videosWithTranscriptions = videos.map((item, index) => {
+      return {
+        transcriptionId: transcriptionIds[index],
+        ...item
+      }
+    })
+
+    return {
+      transcriptionIds,
+      videos: videosWithTranscriptions,
+      videosFetchedAt: Timestamp.now()
+    }
+  }
+}
diff --git a/functions/src/events/index.ts b/functions/src/events/index.ts
index 96ff5307d..2a1f508ad 100644
--- a/functions/src/events/index.ts
+++ b/functions/src/events/index.ts
@@ -1,3 +1,5 @@
 export * from "./scrapeEvents"
 export { scrapeSingleHearing } from "./scrapeEvents"
 export { scrapeSingleHearingv2 } from "./scrapeEvents"
+export { assemblyAI } from "./AssemblyAIHandler"
+export { HearingScraper, HearingPostProcessor } from "./HearingScraper"
diff --git a/functions/src/events/scrapeEvents.ts b/functions/src/events/scrapeEvents.ts
index 419bd505b..ecbdbce49 100644
--- a/functions/src/events/scrapeEvents.ts
+++ b/functions/src/events/scrapeEvents.ts
@@ -1,418 +1,9 @@
 import * as functions from "firebase-functions/v1"
-import { RuntimeOptions, runWith } from "firebase-functions/v1"
 import { onCall, CallableRequest } from "firebase-functions/v2/https"
-import { DateTime } from "luxon"
-import { JSDOM } from "jsdom"
-import { AssemblyAI } from "assemblyai"
-import { checkAuth, checkAdmin, logFetchError } from "../common"
-import { db, storage, Timestamp } from "../firebase"
-import * as api from "../malegislature"
-import {
-  BaseEvent,
-  BaseEventContent,
-  Hearing,
-  HearingContent,
-  HearingListItem,
-  Session,
-  SessionContent,
-  SpecialEvent,
-  SpecialEventContent
-} from "./types"
-import { currentGeneralCourt } from "../shared"
-import { randomBytes } from "node:crypto"
-import { sha256 } from "js-sha256"
-import { isValidVideoUrl, withinCutoff } from "./helpers"
-import ffmpeg from "fluent-ffmpeg"
-import fs from "fs"
-import { Committee } from "../committees/types"
-abstract class EventScraper<ListItem, Event extends BaseEvent> {
-  private schedule
-  private timeout
-  private memory
-
-  constructor(
-    schedule: string,
-    timeout: number,
-    memory: RuntimeOptions["memory"] = "256MB"
-  ) {
-    this.schedule = schedule
-    this.timeout = timeout
-    this.memory = memory
-  }
-
-  get function() {
-    return runWith({
-      timeoutSeconds: this.timeout,
-      secrets: ["ASSEMBLY_API_KEY"],
-      memory: this.memory,
-      maxInstances: 1
-    })
-      .pubsub.schedule(this.schedule)
-      .onRun(() => this.run())
-  }
-
-  abstract listEvents(): Promise<ListItem[]>
-  abstract getEvent(item: ListItem): Promise<Event>
-
-  private async run() {
-    const list = await this.listEvents().catch(logFetchError("event list"))
-
-    if (!list) return
-
-    const writer = db.bulkWriter()
-    const upcomingOrRecentCutoff = DateTime.now().minus({ days: 8 })
-
-    for (let item of list) {
-      const id = (item as any)?.EventId,
-        event = await this.getEvent(item).catch(logFetchError("event", id))
-
-      if (!event) continue
-      if (event.startsAt.toMillis() < upcomingOrRecentCutoff.toMillis()) break
-
-      writer.set(db.doc(`/events/${event.id}`), event, { merge: true })
-
-      console.log("event in run()", event)
-    }
-
-    await writer.close()
-  }
-
-  /** Parse the event start time in the time zone of the API. */
-  getEventStart(content: { EventDate: string; StartTime: string }) {
-    const { year, month, day } = DateTime.fromISO(content.EventDate, {
-      zone: api.timeZone
-    })
-    const { hour, minute, second, millisecond } = DateTime.fromISO(
-      content.StartTime,
-      { zone: api.timeZone }
-    )
-    const startsAt = DateTime.fromObject(
-      { year, month, day, hour, minute, second, millisecond },
-      { zone: api.timeZone }
-    )
-    return startsAt
-  }
-
-  /** Return timestamps shared between event types. */
-  timestamps(content: BaseEventContent) {
-    const startsAt = this.getEventStart(content)
-    return {
-      fetchedAt: Timestamp.now(),
-      startsAt: Timestamp.fromMillis(startsAt.toMillis())
-    }
-  }
-}
-
-class SpecialEventsScraper extends EventScraper<
-  SpecialEventContent,
-  SpecialEvent
-> {
-  constructor() {
-    super("every 60 minutes", 540)
-  }
-
-  async listEvents() {
-    const events = await api.getSpecialEvents()
-    return events.filter(SpecialEventContent.guard)
-  }
-
-  getEvent(content: SpecialEventContent) {
-    const event: SpecialEvent = {
-      id: `specialEvent-${content.EventId}`,
-      type: "specialEvent",
-      content,
-      ...this.timestamps(content)
-    }
-    return Promise.resolve(event)
-  }
-}
-
-class SessionScraper extends EventScraper<SessionContent, Session> {
-  private court = currentGeneralCourt
-
-  constructor() {
-    super("every 60 minutes", 120)
-  }
-
-  async listEvents() {
-    const events = await api.getSessions(this.court)
-    return events.filter(SessionContent.guard)
-  }
-
-  getEvent(content: SessionContent) {
-    const event: Session = {
-      id: `session-${this.court}-${content.EventId}`,
-      type: "session",
-      content,
-      ...this.timestamps(content)
-    }
-    return Promise.resolve(event)
-  }
-}
-
-const extractAudioFromVideo = async (
-  EventId: number,
-  videoUrl: string,
-  bucketName?: string
-): Promise<string> => {
-  const tmpFilePath = `/tmp/hearing-${EventId}-${Date.now()}.m4a`
-
-  // Stream directly from URL and copy audio codec
-  await new Promise<void>((resolve, reject) => {
-    ffmpeg(videoUrl)
-      .noVideo()
-      .audioCodec("copy")
-      .format("mp4")
-      .on("start", commandLine => {
-        console.log(`Spawned FFmpeg with command: ${commandLine}`)
-      })
-      .on("end", () => {
-        console.log("FFmpeg processing finished successfully")
-        resolve()
-      })
-      .on("error", err => {
-        console.error("FFmpeg error:", err)
-        reject(err)
-      })
-      .save(tmpFilePath)
-  })
-
-  // Upload the audio file
-  const bucket = bucketName ? storage.bucket(bucketName) : storage.bucket()
-  const audioFileName = `hearing-${EventId}-${Date.now()}.m4a`
-  const file = bucket.file(audioFileName)
-
-  const fileContent = await fs.promises.readFile(tmpFilePath)
-  await file.save(fileContent, {
-    metadata: {
-      contentType: "audio/mp4"
-    }
-  })
-
-  // Clean up temporary file
-  await fs.promises.unlink(tmpFilePath)
-
-  const [url] = await file.getSignedUrl({
-    action: "read",
-    expires: Date.now() + 24 * 60 * 60 * 1000
-  })
-
-  // Delete old files
-  const [files] = await bucket.getFiles({
-    prefix: "hearing-",
-    maxResults: 1000
-  })
-  const oneDayAgo = Date.now() - 24 * 60 * 60 * 1000
-  const oldFiles = files.filter(file => {
-    const timestamp = parseInt(file.name.split("-").pop()?.split(".")[0] || "0")
-    return timestamp < oneDayAgo
-  })
-  await Promise.all(oldFiles.map(file => file.delete()))
-
-  // Return the new audio url
-  return url
-}
-
-export const submitTranscription = async ({
-  EventId,
-  maybeVideoUrl,
-  bucketName
-}: {
-  EventId: number
-  maybeVideoUrl: string
-  bucketName?: string
-}) => {
-  const assembly = new AssemblyAI({
-    apiKey: process.env.ASSEMBLY_API_KEY ? process.env.ASSEMBLY_API_KEY : ""
-  })
-
-  const newToken = randomBytes(16).toString("hex")
-  const audioUrl = await extractAudioFromVideo(
-    EventId,
-    maybeVideoUrl,
-    bucketName
-  )
-
-  const transcript = await assembly.transcripts.submit({
-    audio:
-      // test with: "https://assemblyaiusercontent.com/playground/aKUqpEtmYmI.flac",
-      audioUrl,
-    webhook_url:
-      // make sure process.env.FUNCTIONS_API_BASE equals
-      // https://us-central1-digital-testimony-prod.cloudfunctions.net
-      // on prod. test with:
-      // "https://ngrokid.ngrok-free.app/demo-dtp/us-central1/transcription",
-      `${process.env.FUNCTIONS_API_BASE}/transcription`,
-    speaker_labels: true,
-    webhook_auth_header_name: "x-maple-webhook",
-    webhook_auth_header_value: newToken
-  })
-
-  await db
-    .collection("events")
-    .doc(`hearing-${String(EventId)}`)
-    .collection("private")
-    .doc("webhookAuth")
-    .set({
-      videoAssemblyWebhookToken: sha256(newToken)
-    })
-
-  return transcript.id
-}
-
-export const getHearingVideoUrl = async (EventId: number) => {
-  const req = await fetch(
-    `https://malegislature.gov/Events/Hearings/Detail/${EventId}`
-  )
-  const res = await req.text()
-  if (res) {
-    const dom = new JSDOM(res)
-    if (dom) {
-      const maybeVideoSource =
-        dom.window.document.querySelectorAll("video source")
-      if (maybeVideoSource.length && maybeVideoSource[0]) {
-        const firstVideoSource = maybeVideoSource[0] as HTMLSourceElement
-        const maybeVideoUrl = firstVideoSource.src
-
-        return isValidVideoUrl(maybeVideoUrl) ? maybeVideoUrl : null
-      }
-    }
-  }
-  return null
-}
-
-const shouldScrapeVideo = async (
-  EventId: number,
-  ignoreCutoff: boolean = false
-) => {
-  const eventInDb = await db
-    .collection("events")
-    .doc(`hearing-${String(EventId)}`)
-    .get()
-  const eventData = eventInDb.data()
-
-  console.log("eventData in shouldScrapeVideo()", eventData)
-
-  if (!eventData) {
-    return false
-  }
-  if (!eventData.videoURL) {
-    return (
-      ignoreCutoff ||
-      withinCutoff(new Date(Hearing.check(eventData).startsAt.toDate()))
-    )
-  }
-  return false
-}
-
-const loadCommitteeChairNames = async (
-  generalCourtNumber: number,
-  committeeCode: string
-) => {
-  try {
-    const committeeSnap = await db
-      .collection(`generalCourts/${generalCourtNumber}/committees`)
-      .doc(committeeCode)
-      .get()
-
-    if (!committeeSnap.exists) return [] as string[]
-
-    const { members, content } = Committee.check(committeeSnap.data())
-    const chairCodes = new Set<string>()
-    const maybeHouse = content.HouseChairperson?.MemberCode
-    const maybeSenate = content.SenateChairperson?.MemberCode
-
-    if (maybeHouse) chairCodes.add(maybeHouse)
-    if (maybeSenate) chairCodes.add(maybeSenate)
-    return (members ?? [])
-      .filter(member => chairCodes.has(member.id))
-      .map(member => member.name)
-  } catch (error) {
-    console.warn(
-      `Failed to load committee chairs for ${committeeCode} (${generalCourtNumber}):`,
-      error
-    )
-    return [] as string[]
-  }
-}
-
-class HearingScraper extends EventScraper<HearingListItem, Hearing> {
-  constructor() {
-    super("every 60 minutes", 480, "4GB")
-  }
-
-  async listEvents() {
-    const events = await api.listHearings()
-    return events.filter(HearingListItem.guard)
-  }
-
-  async getEvent(
-    { EventId }: HearingListItem /* e.g. 4962 */,
-    { ignoreCutoff = false }: { ignoreCutoff?: boolean } = {}
-  ) {
-    const data = await api.getHearing(EventId)
-    const content = HearingContent.check(data)
-
-    console.log("content in getEvent()", content)
-
-    const host = content.HearingHost
-    const committeeChairs =
-      host?.CommitteeCode && host?.GeneralCourtNumber
-        ? await loadCommitteeChairNames(
-            host.GeneralCourtNumber,
-            host.CommitteeCode
-          )
-        : []
-
-    if (await shouldScrapeVideo(EventId, ignoreCutoff)) {
-      try {
-        const maybeVideoUrl = await getHearingVideoUrl(EventId)
-        if (maybeVideoUrl) {
-          const transcriptId = await submitTranscription({
-            maybeVideoUrl,
-            EventId
-          })
-
-          // Immediately save video info to prevent reprocessing
-          // since the bulkWriter does not save the video properties
-          // returned from this method.
-          await db.collection("events").doc(`hearing-${EventId}`).update({
-            videoURL: maybeVideoUrl,
-            videoFetchedAt: Timestamp.now(),
-            videoTranscriptionId: transcriptId
-          })
-
-          return {
-            id: `hearing-${EventId}`,
-            type: "hearing",
-            content,
-            ...this.timestamps(content),
-            videoURL: maybeVideoUrl,
-            videoFetchedAt: Timestamp.now(),
-            committeeChairs,
-            videoTranscriptionId: transcriptId // using the assembly Id as our transcriptionId
-          } as Hearing
-        }
-      } catch (error) {
-        console.error(`Failed to process audio for hearing ${EventId}:`, error)
-        return {
-          id: `hearing-${EventId}`,
-          type: "hearing",
-          content,
-          committeeChairs,
-          ...this.timestamps(content)
-        } as Hearing
-      }
-    }
-    return {
-      id: `hearing-${EventId}`,
-      type: "hearing",
-      content,
-      committeeChairs,
-      ...this.timestamps(content)
-    } as Hearing
-  }
-}
+import { checkAuth, checkAdmin } from "../common"
+import { db } from "../firebase"
+import { SpecialEventsScraper, SessionScraper } from "./EventScraper"
+import { HearingScraper, HearingPostProcessor } from "./HearingScraper"
 
 /**
  * Callable cloud function to scrape a single hearing by EventId.
@@ -442,12 +33,10 @@ export const scrapeSingleHearing = functions
     }
 
     try {
-      // Create a temporary scraper instance to reuse the existing logic
-      const scraper = new HearingScraper()
-      const hearing = await scraper.getEvent(
-        { EventId: eventId },
-        { ignoreCutoff: true }
-      )
+      const hearing = {
+        ...(await new HearingScraper().getEvent({ EventId: eventId })),
+        ...(await new HearingPostProcessor().getUpdate({ EventId: eventId })) // Videos
+      }
 
       // Save the hearing to Firestore
       await db.doc(`/events/${hearing.id}`).set(hearing, { merge: true })
@@ -487,12 +76,10 @@ export const scrapeSingleHearingv2 = onCall(
     }
 
     try {
-      // Create a temporary scraper instance to reuse the existing logic
-      const scraper = new HearingScraper()
-      const hearing = await scraper.getEvent(
-        { EventId: eventId },
-        { ignoreCutoff: true }
-      )
+      const hearing = {
+        ...(await new HearingScraper().getEvent({ EventId: eventId })),
+        ...(await new HearingPostProcessor().getUpdate({ EventId: eventId }))
+      }
 
       // Save the hearing to Firestore
       await db.doc(`/events/${hearing.id}`).set(hearing, { merge: true })
@@ -518,3 +105,4 @@ export const scrapeSingleHearingv2 = onCall(
 export const scrapeSpecialEvents = new SpecialEventsScraper().function
 export const scrapeSessions = new SessionScraper().function
 export const scrapeHearings = new HearingScraper().function
+export const scrapeVideos = new HearingPostProcessor().function
diff --git a/functions/src/events/types.ts b/functions/src/events/types.ts
index 4101b41d1..9a00fe190 100644
--- a/functions/src/events/types.ts
+++ b/functions/src/events/types.ts
@@ -97,13 +97,20 @@ export const HearingContent = BaseEventContent.extend({
 export type HearingListItem = Static<typeof HearingListItem>
 export const HearingListItem = Record({ EventId: Number })
 
+export type Video = Static<typeof Video>
+export const Video = Record({
+  url: String,
+  title: String,
+  transcriptionId: String
+})
+
 export type Hearing = Static<typeof Hearing>
 export const Hearing = BaseEvent.extend({
   type: L("hearing"),
   content: HearingContent,
-  videoURL: Optional(String),
-  videoTranscriptionId: Optional(String),
-  videoFetchedAt: Optional(InstanceOf(Timestamp)),
+  videos: Array(Video),
+  transcriptionIds: Array(String),
+  videosFetchedAt: Optional(InstanceOf(Timestamp)),
   committeeChairs: Optional(Array(String))
 })
 
diff --git a/functions/src/hearings/search.ts b/functions/src/hearings/search.ts
index fe26d0385..ce2375042 100644
--- a/functions/src/hearings/search.ts
+++ b/functions/src/hearings/search.ts
@@ -33,7 +33,7 @@ export const {
   documentTrigger: "events/{eventId}",
   alias: "hearings",
   idField: "id",
-  filter: data => data.type === "hearing",
+  filter: data => data.type === "hearing" && "transcriptionIds" in data,
   schema: {
     fields: [
       { name: "eventId", type: "int32", facet: false },
@@ -57,7 +57,7 @@ export const {
   },
   convert: data => {
     const hearing = Hearing.check(data)
-    const { content, startsAt: startsAtTimestamp, id, videoURL } = hearing
+    const { content, startsAt: startsAtTimestamp, id, videos } = hearing
     const startsAt = startsAtTimestamp.toMillis()
     const schedule = DateTime.fromMillis(startsAt, { zone: timeZone })
 
@@ -115,7 +115,7 @@ export const {
         bill => bill.slug || `${courtNumber}/${bill.number}`
       ),
       court: courtNumber,
-      hasVideo: Boolean(videoURL)
+      hasVideo: videos.length > 0
     }
   }
 })
diff --git a/functions/src/index.ts b/functions/src/index.ts
index 641255bf4..970e31b59 100644
--- a/functions/src/index.ts
+++ b/functions/src/index.ts
@@ -17,6 +17,7 @@ export {
 } from "./committees"
 export {
   scrapeHearings,
+  scrapeVideos,
   scrapeSessions,
   scrapeSpecialEvents,
   scrapeSingleHearing,
diff --git a/functions/src/webhooks/transcription.ts b/functions/src/webhooks/transcription.ts
index 04a5ed1f8..767f015af 100644
--- a/functions/src/webhooks/transcription.ts
+++ b/functions/src/webhooks/transcription.ts
@@ -1,5 +1,5 @@
 import * as functions from "firebase-functions"
-import { AssemblyAI } from "assemblyai"
+import { assemblyAI } from "../events/AssemblyAIHandler"
 import { db, Timestamp } from "../firebase"
 import { sha256 } from "js-sha256"
 
@@ -10,13 +10,8 @@ export const transcription = functions
       if (req.body.status === "completed") {
         // If we get a request with the right header and status, get the
         // transcription from the assembly API.
-        const assembly = new AssemblyAI({
-          apiKey: process.env.ASSEMBLY_API_KEY
-            ? process.env.ASSEMBLY_API_KEY
-            : ""
-        })
 
-        const transcript = await assembly.transcripts.get(
+        const transcript = await assemblyAI().getTranscript(
           req.body.transcript_id
         )
 
@@ -25,7 +20,7 @@ export const transcription = functions
           // look for an event (aka Hearing) in the DB with a matching ID.
           const maybeEventsInDb = await db
             .collection("events")
-            .where("videoTranscriptionId", "==", transcript.id)
+            .where("transcriptionIds", "array-contains", transcript.id)
             .get()
 
           if (maybeEventsInDb.docs.length) {
@@ -43,7 +38,7 @@ export const transcription = functions
                 .collection("events")
                 .doc(doc.id)
                 .collection("private")
-                .doc("webhookAuth")
+                .doc(transcript.id)
                 .get()
 
               const tokenDataInDb =
@@ -69,12 +64,12 @@ export const transcription = functions
               // If there is one authenticated event, pull out the parts we want to
               // save and try to save them in the db.
 
-              const { paragraphs } = await assembly.transcripts.paragraphs(
+              const paragraphs = await assemblyAI().fetchParagraphs(
                 transcript.id
               )
               const { id, text, audio_url, utterances } = transcript
               try {
-                const transcriptionInDb = await db
+                const transcriptionInDb = db
                   .collection("transcriptions")
                   .doc(id)
 
diff --git a/pages/ballotQuestions/[id].tsx b/pages/ballotQuestions/[id].tsx
index 5bbbe1ff9..418628e3e 100644
--- a/pages/ballotQuestions/[id].tsx
+++ b/pages/ballotQuestions/[id].tsx
@@ -10,6 +10,7 @@ import {
   Hearing
 } from "../../components/ballotquestions/types"
 import { BallotQuestion, Bill } from "../../components/db"
+import { Video } from "../../components/hearing/hearing"
 import { createPage } from "../../components/page"
 import { usePublishService } from "../../components/publish/hooks"
 import { serverSideTranslations } from "next-i18next/serverSideTranslations"
@@ -22,7 +23,7 @@ async function getHearing(id: string): Promise<Hearing | null> {
   const data = snap.data()
   return {
     id,
-    videoURL: data.videoURL ?? undefined,
+    videoURLs: data.videos.map((item: Video) => item.url),
     startsAt: data.startsAt?.toMillis() ?? 0
   }
 }
diff --git a/scripts/firebase-admin/backfillHearingTranscription.ts b/scripts/firebase-admin/backfillHearingTranscription.ts
index a9c19daeb..7612cf9d7 100644
--- a/scripts/firebase-admin/backfillHearingTranscription.ts
+++ b/scripts/firebase-admin/backfillHearingTranscription.ts
@@ -1,15 +1,15 @@
-import { Timestamp } from "../../functions/src/firebase"
-import { Record, Number, String } from "runtypes"
+import { Record, Number, String, Boolean } from "runtypes"
 import { Script } from "./types"
-import { getHearingVideoUrl, submitTranscription } from "functions/src/events"
+import { HearingPostProcessor } from "functions/src/events"
 
 const Args = Record({
   eventId: Number.optional(),
-  bucketName: String.optional()
+  bucketName: String.optional(),
+  recreateTranscripts: Boolean.optional()
 })
 
 export const script: Script = async ({ db, args }) => {
-  const { eventId, bucketName } = Args.check(args)
+  const { eventId, bucketName, recreateTranscripts } = Args.check(args)
 
   // Process a single event by eventId
   if (eventId) {
@@ -20,30 +20,22 @@ export const script: Script = async ({ db, args }) => {
       return
     }
     const data = doc.data()
-    if (data?.videoTranscriptionId) {
-      console.log(`Hearing ${eventId} already has a transcription.`)
-      return
-    }
+    if (!data) return
     try {
-      const maybeVideoUrl = await getHearingVideoUrl(eventId)
-      if (maybeVideoUrl) {
-        const transcriptId = await submitTranscription({
-          maybeVideoUrl,
-          EventId: eventId,
-          bucketName
-        })
-
-        await docRef.update({
-          videoURL: maybeVideoUrl,
-          videoFetchedAt: Timestamp.now(),
-          videoTranscriptionId: transcriptId
-        })
+      const update = recreateTranscripts
+        ? await new HearingPostProcessor().getUpdate({ EventId: eventId })
+        : await new HearingPostProcessor().getUpdate(
+            { EventId: eventId },
+            data.videos
+          )
+      if (update !== null) {
+        await docRef.update(update)
 
         console.log(
-          `Transcription submitted for hearing ${eventId}: ${transcriptId}`
+          `Transcriptions submitted for hearing ${eventId}: ${update.transcriptionIds}`
         )
       } else {
-        console.log(`No valid video URL found for hearing ${eventId}`)
+        console.log(`No additional videos to be processed for ${eventId}`)
       }
     } catch (error) {
       console.error(`Failed to process hearing ${eventId}:`, error)
@@ -60,40 +52,29 @@ export const script: Script = async ({ db, args }) => {
       if (count >= 100) {
         break // Limit to 100 operations for this run
       }
+      const EventId = parseInt(doc.id.replace("hearing-", ""))
+      console.log(`Processing hearing ${EventId}...`)
       const data = doc.data()
-      if (!data.videoTranscriptionId) {
-        const EventId = parseInt(doc.id.replace("hearing-", ""))
-        console.log(`Processing hearing ${EventId}...`)
-
-        try {
-          const maybeVideoUrl = await getHearingVideoUrl(EventId)
-          if (maybeVideoUrl) {
-            const transcriptId = await submitTranscription({
-              maybeVideoUrl,
-              EventId,
-              bucketName
-            })
+      if (data.empty) continue
 
-            await doc.ref.update({
-              videoURL: maybeVideoUrl,
-              videoFetchedAt: Timestamp.now(),
-              videoTranscriptionId: transcriptId
-            })
+      try {
+        const update = recreateTranscripts
+          ? await new HearingPostProcessor().getUpdate({ EventId })
+          : await new HearingPostProcessor().getUpdate({ EventId }, data.videos)
+        if (update.videos.length > data.videos.length) {
+          await doc.ref.update(update)
 
-            console.log(
-              `Transcription submitted for hearing ${EventId}: ${transcriptId}`
-            )
-            count++
-          } else {
-            console.log(`No valid video URL found for hearing ${EventId}`)
-          }
-        } catch (error) {
-          console.error(`Failed to process hearing ${EventId}:`, error)
+          console.log(
+            `Transcriptions submitted for hearing ${EventId}: ${update.transcriptionIds}`
+          )
+          count++
+        } else {
+          console.log(
+            `No additional videos to be processed for hearing ${EventId}`
+          )
         }
-      } else {
-        console.log(
-          `Skipping hearing ${data.EventId}, already has transcription.`
-        )
+      } catch (error) {
+        console.error(`Failed to process hearing ${EventId}:`, error)
       }
     }
     console.log("Done processing hearings without transcriptions.")
diff --git a/scripts/firebase-admin/backfillHearingVideoFormat.ts b/scripts/firebase-admin/backfillHearingVideoFormat.ts
new file mode 100644
index 000000000..0596cb484
--- /dev/null
+++ b/scripts/firebase-admin/backfillHearingVideoFormat.ts
@@ -0,0 +1,96 @@
+import { FieldValue, Timestamp } from "../../functions/src/firebase"
+import { Record, Number } from "runtypes"
+import { Script } from "./types"
+
+const Args = Record({
+  eventId: Number.optional()
+})
+
+function migrateVideo(
+  data: FirebaseFirestore.DocumentData
+): FirebaseFirestore.DocumentData | null {
+  if ("videos" in data) {
+    return null
+  }
+
+  if (!("videoURL" in data)) {
+    return {
+      videos: [],
+      transcriptionIds: [],
+      videoTranscriptionId: FieldValue.delete(),
+      videoFetchedAt: FieldValue.delete(),
+      videoURL: FieldValue.delete()
+    }
+  }
+
+  const url = data.videoURL
+  const fetchedAt = data?.videoFetchedAt
+  const transcriptionId = data?.videoTranscriptionId
+
+  const transcriptionIds = transcriptionId ? [transcriptionId] : []
+
+  const videos = [
+    {
+      // Default; not shown
+      title: data.id,
+      url,
+      transcriptionId
+    }
+  ]
+
+  return {
+    videos,
+    transcriptionIds,
+    videosFetchedAt: fetchedAt || Timestamp.now(),
+    videoTranscriptionId: FieldValue.delete(),
+    videoFetchedAt: FieldValue.delete(),
+    videoURL: FieldValue.delete()
+  }
+}
+
+export const script: Script = async ({ db, args }) => {
+  const { eventId } = Args.check(args)
+
+  // Process a single event by eventId
+  if (eventId) {
+    const snapshot = await db
+      .collection("events")
+      .where("type", "==", "hearing")
+      .where("id", "==", eventId)
+      .get()
+
+    if (snapshot.empty || snapshot.docs.length !== 1) {
+      throw new Error(
+        `The number of documents matching the event id ${eventId} must be exactly one`
+      )
+    }
+
+    const doc = snapshot.docs[0]
+    const modify = migrateVideo(doc.data())
+    if (modify) {
+      doc.ref.update(modify)
+    }
+  } else {
+    const snapshot = await db
+      .collection("events")
+      .where("type", "==", "hearing")
+      .get()
+
+    if (snapshot.empty) {
+      throw new Error("Hearing backfill failed; no documents were found")
+    }
+
+    let bulkWriter = db.bulkWriter()
+
+    for (const doc of snapshot.docs) {
+      console.log(doc.data().id)
+      const modify = migrateVideo(doc.data())
+      if (modify) {
+        bulkWriter.update(doc.ref, modify)
+      }
+    }
+    await bulkWriter.close()
+  }
+
+  console.log("Video backfill complete")
+}
diff --git a/scripts/firebase-admin/migrateHearingTranscription.ts b/scripts/firebase-admin/migrateHearingTranscription.ts
index 910ba3943..03dce0f1c 100644
--- a/scripts/firebase-admin/migrateHearingTranscription.ts
+++ b/scripts/firebase-admin/migrateHearingTranscription.ts
@@ -39,6 +39,119 @@ function convertTimestamps(obj: any): any {
   return obj
 }
 
+async function migrateTranscription(
+  db: admin.firestore.Firestore,
+  devDb: admin.firestore.Firestore,
+  transcriptionId: string,
+  bulkWriter?: FirebaseFirestore.BulkWriter
+) {
+  const devTranscriptionDoc = await devDb
+    .collection("transcriptions")
+    .doc(transcriptionId)
+    .get()
+
+  const devTranscriptionData = devTranscriptionDoc.exists
+    ? devTranscriptionDoc.data()
+    : null
+
+  if (!devTranscriptionData) {
+    throw new Error(
+      `Transcription ${transcriptionId} not found in dev project.`
+    )
+  }
+
+  // Create transcription in target project instead of setting, in case it already exists, which will throw an error
+  const convertedData = convertTimestamps(devTranscriptionData)
+  console.log(`Creating transcription ${transcriptionId}...`)
+  if (bulkWriter) {
+    bulkWriter.create(
+      db.collection("transcriptions").doc(transcriptionId),
+      convertedData
+    )
+  } else {
+    await db
+      .collection("transcriptions")
+      .doc(transcriptionId)
+      .create(convertedData)
+  }
+
+  const subcollections = await devTranscriptionDoc.ref.listCollections()
+  for (const subcol of subcollections) {
+    const docs = await subcol.get()
+    for (const doc of docs.docs) {
+      const ref = db
+        .collection("transcriptions")
+        .doc(transcriptionId)
+        .collection(subcol.id)
+        .doc(doc.id)
+      if (bulkWriter) {
+        bulkWriter.set(ref, doc.data())
+      }
+      await ref.set(doc.data())
+    }
+  }
+}
+
+async function migrateHearing(
+  db: admin.firestore.Firestore,
+  devDb: admin.firestore.Firestore,
+  devDoc:
+    | admin.firestore.DocumentSnapshot<admin.firestore.DocumentData>
+    | admin.firestore.QueryDocumentSnapshot<admin.firestore.DocumentData>,
+  bulkWriter?: FirebaseFirestore.BulkWriter
+): Promise<"migrate" | "skip" | "fail"> {
+  const devData = devDoc.data()
+
+  if (!devData || !devData?.transcriptionIds?.length) {
+    console.log(`Hearing ${devDoc.id} has no transcription to migrate.`)
+    return "skip"
+  }
+  const targetDoc = await db.collection("events").doc(devDoc.id).get()
+  const targetData = targetDoc.exists ? targetDoc.data() : null
+
+  if (!targetData) {
+    console.log(`${devDoc.id} not found in target project.`)
+    return "skip"
+  }
+
+  let found = false
+  for (const transcriptionId of devData.transcriptionIds) {
+    if (!targetData.transcriptionIds.includes(transcriptionId)) {
+      found = true
+      try {
+        await migrateTranscription(db, devDb, transcriptionId, bulkWriter)
+      } catch (err) {
+        console.error(`Error creating transcription ${transcriptionId}:`, err)
+        return "fail"
+      }
+    }
+  }
+  if (!found) {
+    console.log(`${devDoc.id} has no new transcriptions.`)
+    return "skip"
+  }
+
+  console.log(`Updating hearing ${devDoc.id}...`)
+  if (bulkWriter) {
+    bulkWriter.update(db.collection("events").doc(devDoc.id), {
+      videos: devData.videos,
+      videosFetchedAt: convertTimestamps(devData.videosFetchedAt),
+      transcriptionIds: devData.transcriptionIds
+    })
+  } else {
+    await db
+      .collection("events")
+      .doc(devDoc.id)
+      .update({
+        videos: devData.videos,
+        videosFetchedAt: convertTimestamps(devData.videosFetchedAt),
+        transcriptionIds: devData.transcriptionIds
+      })
+  }
+
+  return "migrate"
+}
+
 const Args = Record({
   sourceProject: String,
   hearing: Number.optional()
@@ -66,78 +179,15 @@ export const script: Script = async ({ db, args }) => {
   if (hearing) {
     const hearingId = "hearing-" + hearing
     console.log(`Processing single hearing: ${hearingId}`)
-    const devHearingsSnapshot = await devDb
-      .collection("events")
-      .doc(hearingId)
-      .get()
+    const devDoc = await devDb.collection("events").doc(hearingId).get()
 
-    if (!devHearingsSnapshot.exists) {
+    if (!devDoc.exists) {
       console.error(`Hearing ${hearingId} not found in dev project.`)
       return
     }
-    const devData = devHearingsSnapshot.data()
-
-    if (!devData?.videoTranscriptionId) {
-      console.log(`Hearing ${hearingId} has no transcription to migrate.`)
-      return
-    }
-    const targetDoc = await db.collection("events").doc(hearingId).get()
-    const targetData = targetDoc.exists ? targetDoc.data() : null
-
-    // Only migrate if hearing in target environment does not have a transcription yet
-    if (!targetData?.videoTranscriptionId) {
-      const transcriptionId = devData.videoTranscriptionId
-      const devTranscriptionDoc = await devDb
-        .collection("transcriptions")
-        .doc(transcriptionId)
-        .get()
-
-      const devTranscriptionData = devTranscriptionDoc.exists
-        ? devTranscriptionDoc.data()
-        : null
-
-      if (devTranscriptionData) {
-        // Create transcription in target project instead of setting, in case it already exists, which will throw an error
-        const convertedData = convertTimestamps(devTranscriptionData)
-        try {
-          console.log(`Creating transcription ${transcriptionId}...`)
-          await db
-            .collection("transcriptions")
-            .doc(transcriptionId)
-            .create(convertedData)
-        } catch (err) {
-          console.error(`Error creating transcription ${transcriptionId}:`, err)
-          return
-        }
-
-        const subcollections = await devTranscriptionDoc.ref.listCollections()
-        for (const subcol of subcollections) {
-          const docs = await subcol.get()
-          for (const doc of docs.docs) {
-            await db
-              .collection("transcriptions")
-              .doc(transcriptionId)
-              .collection(subcol.id)
-              .doc(doc.id)
-              .set(doc.data())
-          }
-        }
-      } else {
-        console.error(
-          `Transcription ${transcriptionId} not found in dev project.`
-        )
-      }
 
-      await db
-        .collection("events")
-        .doc(hearingId)
-        .update({
-          videoURL: devData.videoURL,
-          videoFetchedAt: convertTimestamps(devData.videoFetchedAt),
-          videoTranscriptionId: devData.videoTranscriptionId
-        })
-      console.log(`Migration complete for hearing ${hearingId}.`)
-    }
+    await migrateHearing(db, devDb, devDoc)
+    console.log(`Migration complete for hearing ${hearingId}.`)
   } else {
     // For full migration
     const devHearingsSnapshot = await devDb
@@ -157,83 +207,14 @@ export const script: Script = async ({ db, args }) => {
         console.log(`Migration limit of ${limit} reached. Stopping.`)
         break
       }
-      const devData = devDoc.data()
-      if (!devData.videoTranscriptionId) {
-        skipped++
-        console.log(`${devDoc.id} has no transcription to migrate.`)
-        continue
-      }
-
-      const targetDoc = await db.collection("events").doc(devDoc.id).get()
-      const targetData = targetDoc.exists ? targetDoc.data() : null
-
-      if (!targetData) {
-        skipped++
-        console.log(`${devDoc.id} not found in target project.`)
-        continue
-      }
 
-      // Only migrate if hearing in target environment does not have a transcription yet
-      if (!targetData?.videoTranscriptionId) {
-        console.log(`Migrating ${devDoc.id}...`)
-        const transcriptionId = devData.videoTranscriptionId
-        const devTranscriptionDoc = await devDb
-          .collection("transcriptions")
-          .doc(transcriptionId)
-          .get()
-
-        const devTranscriptionData = devTranscriptionDoc.exists
-          ? devTranscriptionDoc.data()
-          : null
-
-        if (devTranscriptionData) {
-          // Create transcription in target project instead of setting, in case it already exists, which will throw an error
-          const convertedData = convertTimestamps(devTranscriptionData)
-          try {
-            console.log(`Creating transcription ${transcriptionId}...`)
-            bulkWriter.create(
-              db.collection("transcriptions").doc(transcriptionId),
-              convertedData
-            )
-          } catch (err) {
-            failed++
-            console.error(
-              `Error creating transcription ${transcriptionId}:`,
-              err
-            )
-            continue
-          }
-
-          const subcollections = await devTranscriptionDoc.ref.listCollections()
-          for (const subcol of subcollections) {
-            const docs = await subcol.get()
-            for (const doc of docs.docs) {
-              await db
-                .collection("transcriptions")
-                .doc(transcriptionId)
-                .collection(subcol.id)
-                .doc(doc.id)
-                .set(doc.data())
-            }
-          }
-        } else {
-          failed++
-          console.error(
-            `Transcription ${transcriptionId} not found in dev project.`
-          )
-          continue
-        }
-
-        console.log(`Updating ${devDoc.id}...`)
-        bulkWriter.update(db.collection("events").doc(devDoc.id), {
-          videoURL: devData.videoURL,
-          videoFetchedAt: convertTimestamps(devData.videoFetchedAt),
-          videoTranscriptionId: devData.videoTranscriptionId
-        })
-        migrated++
+      const result = await migrateHearing(db, devDb, devDoc, bulkWriter)
+      if (result === "migrate") {
+        migrated += 1
+      } else if (result === "skip") {
+        skipped += 1
       } else {
-        console.log(`${devDoc.id} already has a transcription, skipping.`)
-        skipped++
+        failed += 1
       }
     }
 
diff --git a/stories/organisms/ballotquestions/BallotQuestionDetails.stories.tsx b/stories/organisms/ballotquestions/BallotQuestionDetails.stories.tsx
index 40ec1154c..c26d83150 100644
--- a/stories/organisms/ballotquestions/BallotQuestionDetails.stories.tsx
+++ b/stories/organisms/ballotquestions/BallotQuestionDetails.stories.tsx
@@ -142,7 +142,10 @@ const sampleBill: Bill = {
 const sampleHearing = {
   id: "hearing-101",
   startsAt: new Date("2026-03-12T10:00:00-05:00").getTime(),
-  videoURL: "https://malegislature.gov/"
+  videoURLs: [
+    "https://prodarchivevideo.blob.core.windows.net/video/2022/Hearings/Joint/April/12.mp4",
+    "https://prodarchivevideo.blob.core.windows.net/video/2022/Hearings/Joint/April/12_1.mp4"
+  ]
 }
 
 const emptyTestimonyListing: UsePublishedTestimonyListing = {