From f121f31d88d2b005c5d30df3684ba6bc1a0a5b06 Mon Sep 17 00:00:00 2001
From: vendz <vasavandit@gmail.com>
Date: Wed, 17 Jun 2026 17:59:03 -0400
Subject: [PATCH 01/35] =?UTF-8?q?refactor(desktop):=20hub=20always-on=20?=
 =?UTF-8?q?=E2=80=94=20drop=20enable=20toggle,=20derive=20provider=20from?=
 =?UTF-8?q?=20Voice=20Model?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

RealtimeHubSettings no longer stores an enable flag or its own provider; the
hub is the default voice path and its provider follows the existing Advanced
'Voice Model' picker (RealtimeOmniSettings). Drops the now-unused subtitle and
CaseIterable conformance.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../FloatingControlBarState.swift             |  46 ----
 .../FloatingControlBarView.swift              |  79 ++++---
 .../RealtimeHubSettings.swift                 |  63 ++----
 .../VoiceActivityIndicator.swift              | 199 ------------------
 4 files changed, 54 insertions(+), 333 deletions(-)
 delete mode 100644 desktop/macos/Desktop/Sources/FloatingControlBar/VoiceActivityIndicator.swift

diff --git a/desktop/macos/Desktop/Sources/FloatingControlBar/FloatingControlBarState.swift b/desktop/macos/Desktop/Sources/FloatingControlBar/FloatingControlBarState.swift
index f39cf6618f3..8608cb771ec 100644
--- a/desktop/macos/Desktop/Sources/FloatingControlBar/FloatingControlBarState.swift
+++ b/desktop/macos/Desktop/Sources/FloatingControlBar/FloatingControlBarState.swift
@@ -51,21 +51,6 @@ struct FloatingBarNotification: Identifiable, Equatable {
     }
 }
 
-/// The high-level voice activity the floating bar is reflecting right now. Derived
-/// from the lower-level PTT/hub flags so the status indicator has a single, ordered
-/// source of truth (each state has exactly one visual treatment).
-enum VoiceActivity: Equatable {
-    /// Nothing happening — the bar rests as a calm, barely-breathing sliver.
-    case idle
-    /// User is holding push-to-talk; we're capturing their voice (red, "you").
-    case listening
-    /// Turn committed, waiting on the model's reply — the model may answer late,
-    /// so this MUST read as "working, wait" rather than "done" (cool autonomous swirl).
-    case thinking
-    /// The model is speaking its reply (warm, audio-reactive waveform — "it").
-    case speaking
-}
-
 /// Observable object holding the state for the floating control bar.
 @MainActor
 class FloatingControlBarState: NSObject, ObservableObject {
@@ -108,35 +93,6 @@ class FloatingControlBarState: NSObject, ObservableObject {
     @Published var isVoiceListening: Bool = false
     @Published var isVoiceLocked: Bool = false
     @Published var voiceTranscript: String = ""
-    /// True after a voice turn is committed and we're waiting on the model's reply
-    /// (vs. still recording) — drives the "Thinking…/Responding…" indicator so the user
-    /// knows to wait rather than re-pressing (which would interrupt a slow reply).
-    @Published var isVoiceThinking: Bool = false
-    /// True while the model is actually speaking its reply (native audio playing or the
-    /// AVSpeech fallback talking). Distinct from `isVoiceThinking` so the indicator can
-    /// show a clearly different "it's talking" treatment vs. "it's working".
-    @Published var isVoiceSpeaking: Bool = false
-    /// Smoothed 0…1 output amplitude of the model's spoken reply, sampled from the
-    /// playback engine. Drives the speaking waveform so it reacts to the actual voice
-    /// (premium feel) rather than animating blindly. 0 when not speaking.
-    @Published var voiceLevel: CGFloat = 0
-
-    /// Single ordered source of truth for the status indicator. Listening wins (the user
-    /// is actively talking), then speaking, then thinking, else idle — by construction the
-    /// hub sets these mutually exclusively, the ordering just makes barge-in race-safe.
-    var voiceActivity: VoiceActivity {
-        if isVoiceListening { return .listening }
-        if isVoiceSpeaking { return .speaking }
-        if isVoiceThinking { return .thinking }
-        return .idle
-    }
-
-    /// Whether any voice turn is in flight — keeps the bar expanded across the whole
-    /// listening → thinking → speaking arc so the indicator stays visible (one expand,
-    /// one collapse per turn — no resize churn mid-turn).
-    var isVoiceActive: Bool {
-        isVoiceListening || isVoiceThinking || isVoiceSpeaking
-    }
 
     // Voice follow-up state (PTT while AI conversation is active)
     @Published var isVoiceFollowUp: Bool = false
@@ -180,8 +136,6 @@ class FloatingControlBarState: NSObject, ObservableObject {
         isVoiceFollowUp = false
         voiceFollowUpTranscript = ""
         currentQueryFromVoice = false
-        isVoiceSpeaking = false
-        voiceLevel = 0
         lastConversationActivityAt = nil
     }
 }
diff --git a/desktop/macos/Desktop/Sources/FloatingControlBar/FloatingControlBarView.swift b/desktop/macos/Desktop/Sources/FloatingControlBar/FloatingControlBarView.swift
index 9188f5d11b3..763fd6f3494 100644
--- a/desktop/macos/Desktop/Sources/FloatingControlBar/FloatingControlBarView.swift
+++ b/desktop/macos/Desktop/Sources/FloatingControlBar/FloatingControlBarView.swift
@@ -33,11 +33,9 @@ struct FloatingControlBarView: View {
         .animation(.spring(response: 0.35, dampingFraction: 0.82), value: state.currentNotification?.id)
     }
 
-    /// Whether the bar chrome should stretch to fill the window width. Stays full-width
-    /// for the whole voice turn (listening → thinking → speaking) so the status indicator
-    /// has room and the bar resizes exactly once per turn.
+    /// Whether the bar chrome should stretch to fill the window width
     private var barNeedsFullWidth: Bool {
-        isHovering || state.showingAIConversation || state.isVoiceActive
+        isHovering || state.showingAIConversation || state.isVoiceListening
     }
 
     private var barChrome: some View {
@@ -85,7 +83,7 @@ struct FloatingControlBarView: View {
             }
         }
         .overlay(alignment: .topTrailing) {
-            if isHovering && !state.isVoiceActive {
+            if isHovering && !state.isVoiceListening {
                 Button {
                     openFloatingBarSettings()
                 } label: {
@@ -281,8 +279,8 @@ struct FloatingControlBarView: View {
 
     private var controlBarView: some View {
         Group {
-            if state.isVoiceActive && !state.isVoiceFollowUp {
-                voiceActiveView
+            if state.isVoiceListening && !state.isVoiceFollowUp {
+                voiceListeningView
                     .padding(.horizontal, 6)
                     .padding(.vertical, 3)
                     .frame(height: 50)
@@ -308,11 +306,11 @@ struct FloatingControlBarView: View {
         }
     }
 
-    /// Minimal resting indicator shown when not hovering and no voice turn is active —
-    /// a calm, slowly breathing sliver. (Active turns render `voiceActiveView` instead.)
+    /// Minimal thin bar shown when not hovering
     private var compactCircleView: some View {
-        VoiceActivityIndicator(activity: state.voiceActivity, level: state.voiceLevel)
-            .frame(width: 28, height: 14)
+        RoundedRectangle(cornerRadius: 3)
+            .fill(Color.white.opacity(0.5))
+            .frame(width: 28, height: 6)
     }
 
     private func compactToggle(_ title: String, isOn: Binding<Bool>) -> some View {
@@ -360,15 +358,20 @@ struct FloatingControlBarView: View {
         }
     }
 
-    /// Unified expanded voice view for the whole turn. The status indicator carries the
-    /// state (listening / thinking / speaking) visually; the text is just the helpful
-    /// detail (transcript, "Release to send", "Thinking…"). One element, no jarring swaps.
-    private var voiceActiveView: some View {
+    private var voiceListeningView: some View {
         HStack(spacing: 8) {
-            VoiceActivityIndicator(activity: state.voiceActivity, level: state.voiceLevel)
-                .frame(width: 34, height: 18)
+            // Pulsing mic icon
+            Circle()
+                .fill(Color.red)
+                .frame(width: 10, height: 10)
+                .scaleEffect(state.isVoiceListening ? 1.2 : 1.0)
+                .animation(.easeInOut(duration: 0.6).repeatForever(autoreverses: true), value: state.isVoiceListening)
+
+            Image(systemName: "mic.fill")
+                .scaledFont(size: 14, weight: .semibold)
+                .foregroundColor(.white)
 
-            if state.isVoiceLocked && state.isVoiceListening {
+            if state.isVoiceLocked {
                 Text("LOCKED")
                     .scaledFont(size: 10, weight: .bold)
                     .foregroundColor(.orange)
@@ -378,31 +381,21 @@ struct FloatingControlBarView: View {
                     .cornerRadius(4)
             }
 
-            // Dim only the "Release to send" hint; live transcript / status reads brighter.
-            let isHint = state.voiceActivity == .listening && state.voiceTranscript.isEmpty
-            Text(voiceStatusText)
-                .scaledFont(size: 13)
-                .foregroundColor(.white.opacity(isHint ? 0.5 : 0.85))
-                .lineLimit(1)
-                .truncationMode(.head)
-        }
-    }
-
-    /// The detail text beside the indicator for the current voice state. The indicator
-    /// itself carries the state visually; this is just the helpful detail.
-    private var voiceStatusText: String {
-        switch state.voiceActivity {
-        case .listening:
-            if !state.voiceTranscript.isEmpty { return state.voiceTranscript }
-            return state.isVoiceLocked
-                ? "Tap \(shortcutSettings.pttShortcut.displayLabel) to send"
-                : "Release \(shortcutSettings.pttShortcut.displayLabel) to send"
-        case .thinking:
-            return "Thinking…"
-        case .speaking:
-            return "Speaking…"
-        case .idle:
-            return ""
+            if !state.voiceTranscript.isEmpty {
+                Text(state.voiceTranscript)
+                    .scaledFont(size: 13)
+                    .foregroundColor(.white.opacity(0.8))
+                    .lineLimit(1)
+                    .truncationMode(.head)
+            } else {
+                Text(
+                    state.isVoiceLocked
+                        ? "Tap \(shortcutSettings.pttShortcut.displayLabel) to send"
+                        : "Release \(shortcutSettings.pttShortcut.displayLabel) to send"
+                )
+                    .scaledFont(size: 13)
+                    .foregroundColor(.white.opacity(0.5))
+            }
         }
     }
 
diff --git a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubSettings.swift b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubSettings.swift
index 4f4b5952e44..e06f27d0ec0 100644
--- a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubSettings.swift
+++ b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubSettings.swift
@@ -1,18 +1,20 @@
 import Foundation
 
-// MARK: - Realtime Hub (Phase 1)
+// MARK: - Realtime Hub
 //
 // "Realtime-as-hub": instead of the cascade (STT → router → Claude → TTS), one
 // realtime model is the single hub. It does in-session STT, reasoning, routing
 // (as tool choice), and speaks the answer. Its tools call the EXISTING backend
 // endpoints / app code — no new backend routes.
 //
-// Phase 1 is CLIENT-DIRECT + dev/test only: the realtime WS connects straight to
-// the provider with the user's own BYOK key (see APIKeyService). It is gated so
-// it never runs for managed (non-BYOK) users. Phase 2 will replace the BYOK key
-// with a server-minted ephemeral token to make it shippable.
+// The hub is the default voice path — there is no opt-in toggle. Every PTT turn
+// routes through it whenever it can connect: BYOK users connect client-direct with
+// their own key (see APIKeyService); managed users connect with a server-minted
+// ephemeral token. When neither is available (no key, mint fails / not entitled) the
+// turn falls back to the legacy STT cascade. The provider follows the user's "Voice
+// Model" choice in Advanced settings (RealtimeOmniSettings) — no separate picker.
 
-enum RealtimeHubProvider: String, CaseIterable, Sendable {
+enum RealtimeHubProvider: String, Sendable {
   case openai
   case gemini
 
@@ -23,13 +25,6 @@ enum RealtimeHubProvider: String, CaseIterable, Sendable {
     }
   }
 
-  var subtitle: String {
-    switch self {
-    case .openai: return "gpt-realtime-2 · native spoken audio"
-    case .gemini: return "gemini native-audio Live · spoken audio + tools"
-    }
-  }
-
   /// Concrete model identifier sent to the provider.
   var modelID: String {
     switch self {
@@ -58,46 +53,24 @@ enum RealtimeHubProvider: String, CaseIterable, Sendable {
 final class RealtimeHubSettings {
   static let shared = RealtimeHubSettings()
 
-  private let enabledKey = "realtimeHubEnabled"
-  private let providerKey = "realtimeHubProvider"
-
-  private init() {
-    UserDefaults.standard.register(defaults: [
-      enabledKey: false,
-      providerKey: RealtimeHubProvider.openai.rawValue,
-    ])
-  }
-
-  /// Master switch. When off, the floating bar uses the legacy STT → router →
-  /// Claude → TTS cascade. Ships behind this flag.
-  var isEnabled: Bool {
-    get { UserDefaults.standard.bool(forKey: enabledKey) }
-    set {
-      UserDefaults.standard.set(newValue, forKey: enabledKey)
-      NotificationCenter.default.post(name: .realtimeHubSettingsDidChange, object: nil)
-    }
-  }
+  private init() {}
 
+  /// The hub provider follows the user's "Voice Model" choice in Advanced settings —
+  /// there is no separate hub picker. The two map 1:1 (same underlying models), and
+  /// `.auto` is already resolved to a concrete provider by `effectiveProvider`.
   var provider: RealtimeHubProvider {
-    get {
-      let raw = UserDefaults.standard.string(forKey: providerKey)
-      return raw.flatMap(RealtimeHubProvider.init(rawValue:)) ?? .openai
-    }
-    set {
-      UserDefaults.standard.set(newValue.rawValue, forKey: providerKey)
-      NotificationCenter.default.post(name: .realtimeHubSettingsDidChange, object: nil)
+    switch RealtimeOmniSettings.shared.effectiveProvider {
+    case .gptRealtime2: return .openai
+    case .geminiFlashLive, .auto: return .gemini
     }
   }
 
-  /// The hub may only run client-direct when the user has supplied the selected
-  /// provider's own key (BYOK / dev key). This is the managed-user gate: managed
-  /// users have no BYOK key, so the hub stays off and the cascade is used.
+  /// True when the hub can connect client-direct with the user's own provider key
+  /// (BYOK / dev key). Managed users without a key connect via a minted ephemeral
+  /// token instead (see RealtimeHubController.ensureWarm); both reach the hub.
   var canConnect: Bool {
     APIKeyService.byokKey(provider.byokProvider) != nil
   }
-
-  /// True when the hub should drive this PTT turn (enabled + a usable key).
-  var isActive: Bool { isEnabled && canConnect }
 }
 
 extension Notification.Name {
diff --git a/desktop/macos/Desktop/Sources/FloatingControlBar/VoiceActivityIndicator.swift b/desktop/macos/Desktop/Sources/FloatingControlBar/VoiceActivityIndicator.swift
deleted file mode 100644
index 46f0eb99883..00000000000
--- a/desktop/macos/Desktop/Sources/FloatingControlBar/VoiceActivityIndicator.swift
+++ /dev/null
@@ -1,199 +0,0 @@
-import SwiftUI
-
-/// The floating bar's single status element. One coherent shape that changes its
-/// motion law, color, and energy per state — never a hard icon swap — so the user
-/// always knows, at a glance and without labels, whether the assistant is:
-///
-///   • idle      — a calm, barely-breathing sliver (nearly still, muted)
-///   • listening — a red waveform reacting to "you" (red is reserved for recording)
-///   • thinking  — a cool blue→violet gradient sweeping on its own fixed clock; the
-///                 self-driven motion (no audio) reads as "working, wait" — critical
-///                 so a late reply never looks like "done / idle"
-///   • speaking  — a green waveform driven by the model's actual output amplitude
-///                 ("it's talking", clearly distinct from the red "you" waveform)
-///
-/// Performance: idle (the long-lived resting state) uses a single Core Animation
-/// property animation — no per-frame redraw. The active states use one
-/// `TimelineView(.animation)` + `Canvas` (a single GPU-friendly draw pass, no
-/// view-graph diffing per frame), and only run for the few seconds a turn is live.
-/// No blur/shadow/material (those force offscreen passes) — glow is faked with
-/// translucent gradient fills.
-struct VoiceActivityIndicator: View {
-    let activity: VoiceActivity
-    /// Smoothed 0…1 amplitude of the model's spoken reply (drives the speaking waveform).
-    var level: CGFloat = 0
-
-    var body: some View {
-        ZStack {
-            switch activity {
-            case .idle:
-                IdleBreath()
-            case .listening:
-                WaveformBars(palette: .listening, level: 0, reactive: false)
-            case .thinking:
-                ThinkingSweep()
-            case .speaking:
-                WaveformBars(palette: .speaking, level: level, reactive: true)
-            }
-        }
-        // Cross-fade + gentle scale between states so energy "ramps" rather than snaps.
-        .transition(.opacity.combined(with: .scale(scale: 0.7)))
-        .animation(.spring(response: 0.4, dampingFraction: 0.86), value: activity)
-    }
-}
-
-// MARK: - Idle
-
-/// A short muted capsule that breathes very slowly. Intentionally low-energy so the
-/// resting bar never pulls the eye. Pure Core Animation — no redraw loop.
-private struct IdleBreath: View {
-    @State private var breathing = false
-
-    var body: some View {
-        Capsule()
-            .fill(Color.white.opacity(breathing ? 0.55 : 0.26))
-            .frame(width: 26, height: 5)
-            .scaleEffect(x: 1, y: breathing ? 1.0 : 0.7, anchor: .center)
-            .onAppear {
-                withAnimation(.easeInOut(duration: 2.8).repeatForever(autoreverses: true)) {
-                    breathing = true
-                }
-            }
-    }
-}
-
-// MARK: - Thinking
-
-/// A cool blue→violet gradient that pans continuously across a capsule at a fixed,
-/// self-driven rate. The autonomous (non-audio) motion is the cue that the model is
-/// working — so a slow reply reads as "wait", never as "done".
-private struct ThinkingSweep: View {
-    // Hoisted: the colors are state-constant, so only the gradient positions change
-    // per frame — no point rebuilding these Gradient values 60–120×/s.
-    private static let sweepGradient = Gradient(colors: [
-        Color(red: 0.70, green: 0.49, blue: 1.0),  // violet
-        Color(red: 0.43, green: 0.55, blue: 1.0),  // blue
-        Color(red: 0.70, green: 0.49, blue: 1.0),
-        Color(red: 0.43, green: 0.55, blue: 1.0),
-        Color(red: 0.70, green: 0.49, blue: 1.0),
-    ])
-    private static let glowGradient = Gradient(colors: [
-        Color.white.opacity(0.45), Color.white.opacity(0),
-    ])
-
-    var body: some View {
-        TimelineView(.animation) { timeline in
-            Canvas { context, size in
-                let t = timeline.date.timeIntervalSinceReferenceDate
-                let rect = CGRect(origin: .zero, size: size)
-                let capsule = Capsule().path(in: rect)
-
-                // Dim base track so the capsule reads even at the low point of the sweep.
-                context.fill(capsule, with: .color(.white.opacity(0.10)))
-
-                context.drawLayer { layer in
-                    layer.clip(to: capsule)
-
-                    // Pan a symmetric violet→blue→violet gradient horizontally. Symmetric
-                    // stops + a span twice the width mean the loop has no visible seam.
-                    let period = 2.2  // seconds per full pan
-                    let phase = (t.truncatingRemainder(dividingBy: period)) / period
-                    let span = size.width * 2
-                    let shift = CGFloat(phase) * span
-                    layer.fill(
-                        Rectangle().path(in: rect),
-                        with: .linearGradient(
-                            Self.sweepGradient,
-                            startPoint: CGPoint(x: -span + shift, y: 0),
-                            endPoint: CGPoint(x: shift, y: 0)))
-
-                    // Soft moving highlight (faked glow) gliding with an eased ping-pong
-                    // so it slows at the ends instead of snapping back.
-                    let eased = 0.5 - 0.5 * cos(phase * 2 * .pi)
-                    let cx = size.width * CGFloat(eased)
-                    let glowR = max(size.height, size.width * 0.32)
-                    layer.fill(
-                        Rectangle().path(in: rect),
-                        with: .radialGradient(
-                            Self.glowGradient,
-                            center: CGPoint(x: cx, y: size.height / 2),
-                            startRadius: 0, endRadius: glowR))
-                }
-            }
-        }
-        .frame(width: 34, height: 8)
-    }
-}
-
-// MARK: - Waveform (listening + speaking)
-
-/// Color treatment for a waveform state — a precomputed top→bottom gradient (constant
-/// per state, so it's built once here, not per-bar per-frame inside the Canvas).
-private struct WaveformPalette {
-    let gradient: Gradient
-
-    /// Red — reserved exclusively for recording the user ("you").
-    static let listening = WaveformPalette(gradient: Gradient(colors: [
-        Color(red: 1.0, green: 0.42, blue: 0.42),
-        Color(red: 1.0, green: 0.18, blue: 0.33),
-    ]))
-
-    /// Green/mint — the assistant speaking ("it"); clearly not the red "you" or blue "thinking".
-    static let speaking = WaveformPalette(gradient: Gradient(colors: [
-        Color(red: 0.46, green: 0.93, blue: 0.74),
-        Color(red: 0.20, green: 0.83, blue: 0.60),
-    ]))
-}
-
-/// A small centered equalizer. `reactive` bars track the live `level` (speaking);
-/// non-reactive bars animate on a lively synthetic clock (listening). A per-bar phase
-/// + center weighting gives an organic "voice blob" rather than a marching pattern.
-private struct WaveformBars: View {
-    let palette: WaveformPalette
-    var level: CGFloat
-    var reactive: Bool
-
-    private let barCount = 5
-
-    var body: some View {
-        TimelineView(.animation) { timeline in
-            Canvas { context, size in
-                let t = timeline.date.timeIntervalSinceReferenceDate
-                // Equal bars and gaps: n bars, n-1 gaps, all one unit wide.
-                let unit = size.width / CGFloat(barCount * 2 - 1)
-                let radius = unit / 2
-                let minH = size.height * 0.28
-
-                for i in 0..<barCount {
-                    // Center bars are weighted taller so it reads as a rounded voice shape.
-                    let distFromCenter = abs(CGFloat(i) - CGFloat(barCount - 1) / 2)
-                    let centerWeight = 1.0 - distFromCenter / CGFloat(barCount)
-
-                    let wobble = (sin(t * 7.5 + Double(i) * 1.1) + 1) / 2  // 0…1
-                    let drive: CGFloat
-                    if reactive {
-                        // Audio-reactive: height follows the smoothed amplitude, with a
-                        // little per-bar wobble so quiet passages still feel alive.
-                        drive = min(1, level * (0.55 + 0.9 * centerWeight) + CGFloat(wobble) * 0.18)
-                    } else {
-                        // Listening: purely synthetic but lively equalizer motion.
-                        drive = CGFloat(wobble) * (0.45 + 0.55 * centerWeight)
-                    }
-
-                    let h = max(minH, minH + (size.height - minH) * drive)
-                    let x = CGFloat(i) * unit * 2
-                    let y = (size.height - h) / 2
-                    let barRect = CGRect(x: x, y: y, width: unit, height: h)
-                    let bar = Capsule().path(in: barRect)
-                    context.fill(
-                        bar,
-                        with: .linearGradient(
-                            palette.gradient,
-                            startPoint: CGPoint(x: x, y: y),
-                            endPoint: CGPoint(x: x, y: y + h)))
-                }
-            }
-        }
-        .frame(width: 34, height: 16)
-    }
-}

From 73bebbd56cf922aa8bdb6ca269b2fda6b3377cc8 Mon Sep 17 00:00:00 2001
From: vendz <vasavandit@gmail.com>
Date: Wed, 17 Jun 2026 17:59:03 -0400
Subject: [PATCH 02/35] refactor(desktop): remove hub enable gates; unwire pill
 flags; single BYOK predicate

isActive/ensureWarm/reconnect no longer gate on the removed isEnabled toggle.
isActive now consults RealtimeHubSettings.canConnect (single source). Removes
the floating-bar pill flag writes (isVoiceThinking/Speaking/voiceLevel) and the
orphaned AVSpeech delegate; native-audio playback is unchanged.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../RealtimeHubController.swift               | 106 ++++--------------
 1 file changed, 19 insertions(+), 87 deletions(-)

diff --git a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubController.swift b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubController.swift
index cf8e7356e4c..0fe8156e067 100644
--- a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubController.swift
+++ b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubController.swift
@@ -59,23 +59,19 @@ final class RealtimeHubController: NSObject, RealtimeHubSessionDelegate {
 
   private override init() {
     super.init()
-    // Clear "speaking" when the AVSpeech fallback finishes (native audio uses the
-    // player's drain callback instead).
-    speech.delegate = self
   }
 
   /// In-flight ephemeral mint guard (managed users).
   private var minting = false
 
   /// True when the hub should drive this PTT turn. Read by PushToTalkManager at PTT
-  /// start. BYOK users are ready immediately (own key); managed users are ready only
-  /// once a warm session exists (token minted + connecting) — otherwise PTT falls
-  /// back to the legacy cascade for that turn.
+  /// start. The hub is the default voice path (no opt-in toggle): BYOK users are ready
+  /// immediately (own key); managed users are ready only once a warm session exists
+  /// (token minted + connecting) — otherwise PTT falls back to the legacy cascade for
+  /// that turn.
   var isActive: Bool {
-    guard RealtimeHubSettings.shared.isEnabled else { return false }
-    let provider = RealtimeHubSettings.shared.provider
-    if APIKeyService.byokKey(provider.byokProvider) != nil { return true }
-    return session != nil && sessionProvider == provider
+    if RealtimeHubSettings.shared.canConnect { return true }
+    return session != nil && sessionProvider == RealtimeHubSettings.shared.provider
   }
 
   func setup(barState: FloatingControlBarState) {
@@ -93,9 +89,8 @@ final class RealtimeHubController: NSObject, RealtimeHubSessionDelegate {
   }
 
   @objc private func settingsChanged() {
-    // Only reconnect if enabled and the provider actually changed — avoids
-    // redundant teardown/recreate races on unrelated notifications.
-    if !RealtimeHubSettings.shared.isEnabled { teardownSession(); return }
+    // Only reconnect if the provider actually changed — avoids redundant
+    // teardown/recreate races on unrelated notifications.
     if session != nil, sessionProvider == RealtimeHubSettings.shared.provider { return }
     teardownSession()
     ensureWarm()
@@ -103,11 +98,10 @@ final class RealtimeHubController: NSObject, RealtimeHubSessionDelegate {
 
   // MARK: - Warm session lifecycle (kept open between turns)
 
-  /// Open the WS now if it isn't already (no-op if disabled or already warm).
-  /// BYOK → connect client-direct with the user's key (Phase 1). Otherwise, if
-  /// signed in → mint a server-side ephemeral token (Phase 2) and connect with it.
+  /// Open the WS now if it isn't already (no-op if already warm). BYOK → connect
+  /// client-direct with the user's key. Otherwise, if signed in → mint a server-side
+  /// ephemeral token and connect with it.
   func ensureWarm() {
-    guard RealtimeHubSettings.shared.isEnabled else { return }
     let provider = RealtimeHubSettings.shared.provider
     if session != nil, sessionProvider == provider { return }
     if session != nil { teardownSession() }
@@ -117,7 +111,7 @@ final class RealtimeHubController: NSObject, RealtimeHubSessionDelegate {
     } else if AuthService.shared.isSignedIn {
       mintAndConnect(provider: provider)
     } else {
-      log("RealtimeHub: enabled but no BYOK key and not signed in — hub unavailable (cascade).")
+      log("RealtimeHub: no BYOK key and not signed in — hub unavailable (cascade).")
     }
   }
 
@@ -137,9 +131,8 @@ final class RealtimeHubController: NSObject, RealtimeHubSessionDelegate {
         log("⚠️ RealtimeHub: ephemeral mint failed / not entitled — staying on cascade")
         return
       }
-      // Provider/enable may have changed while minting; only connect if still wanted.
-      guard RealtimeHubSettings.shared.isEnabled,
-        RealtimeHubSettings.shared.provider == provider, self.session == nil
+      // Provider may have changed while minting; only connect if still wanted.
+      guard RealtimeHubSettings.shared.provider == provider, self.session == nil
       else { return }
       self.startSession(provider: provider, auth: .ephemeral(token))
     }
@@ -152,24 +145,7 @@ final class RealtimeHubController: NSObject, RealtimeHubSessionDelegate {
     // Both providers stream native spoken audio (24k PCM) → StreamingPCMPlayer;
     // AVSpeech is only a no-audio fallback.
     if pcmPlayer == nil {
-      let p = StreamingPCMPlayer(sampleRate: 24000)
-      // Feed the live output amplitude to the speaking waveform — but only while we're
-      // actually in the speaking state, so publishing `voiceLevel` never re-renders the
-      // bar outside that window.
-      p.onLevel = { [weak self] level in
-        guard let self, self.barState?.isVoiceSpeaking == true else { return }
-        self.barState?.voiceLevel = CGFloat(level)
-      }
-      // The reply isn't truly over until the buffered audio finishes draining — only
-      // then do we drop "speaking" and let the bar collapse back to idle.
-      p.onPlayingChanged = { [weak self] playing in
-        guard let self, let barState = self.barState else { return }
-        if !playing {
-          barState.isVoiceSpeaking = false
-          barState.voiceLevel = 0
-        }
-      }
-      pcmPlayer = p
+      pcmPlayer = StreamingPCMPlayer(sampleRate: 24000)
     }
     s.start()
     log(
@@ -202,9 +178,6 @@ final class RealtimeHubController: NSObject, RealtimeHubSessionDelegate {
     audioReceivedThisTurn = false
     turnRecorded = false
     lastTurnAt = Date()
-    barState?.isVoiceThinking = false  // new turn → we're recording again, not waiting
-    barState?.isVoiceSpeaking = false  // any prior reply is being cut off below
-    barState?.voiceLevel = 0
     pcmPlayer?.stop()  // stop any prior reply locally
     if speech.isSpeaking { speech.stopSpeaking(at: .immediate) }
     if bargeIn {
@@ -246,11 +219,6 @@ final class RealtimeHubController: NSObject, RealtimeHubSessionDelegate {
   /// PTT-up: end the turn; the model now responds (and may call tools).
   func commitTurn() {
     responding = true
-    // Show a distinct "waiting on the model" state (not the red recording dot, which
-    // reads as "still listening") so the user knows to wait rather than re-press. Setting
-    // this keeps the bar's `isVoiceActive` true across the PTT-up → thinking handoff, so
-    // the window stays expanded (the window observes the flags and resizes itself).
-    barState?.isVoiceThinking = true
     // (The screen frame is sent at turn START — see beginTurn — so it has time to
     // upload/decode before the model answers. Nothing to attach here.)
     session?.commitInputTurn()
@@ -297,11 +265,6 @@ final class RealtimeHubController: NSObject, RealtimeHubSessionDelegate {
   }
 
   func hubDidReceiveAudio(_ pcm24k: Data) {
-    if !audioReceivedThisTurn {
-      // First audio of the turn: it's no longer thinking, it's speaking.
-      barState?.isVoiceThinking = false
-      barState?.isVoiceSpeaking = true
-    }
     audioReceivedThisTurn = true
     pcmPlayer?.enqueue(pcm24k)  // native spoken audio (OpenAI + Gemini)
   }
@@ -412,41 +375,34 @@ final class RealtimeHubController: NSObject, RealtimeHubSessionDelegate {
     // land here.
     responding = false
     logError("RealtimeHub: session error — \(message)")
-    // The reply is dead — stop any buffered audio and drop the speaking state before
-    // collapsing (the drain callback won't fire for a torn-down engine).
+    // The reply is dead — stop any buffered audio before collapsing.
     pcmPlayer?.stop()
     if speech.isSpeaking { speech.stopSpeaking(at: .immediate) }
-    barState?.isVoiceSpeaking = false
-    barState?.voiceLevel = 0
     exitVoiceUI()
     let aliveFor = lastWarmAt.map { Date().timeIntervalSince($0) } ?? 0
     teardownSession()
     // Re-warm so the NEXT PTT uses the hub, not the STT cascade. Gemini idle-closes
     // the socket (~2.5 min, close 1008) even before the first turn; managed users have
     // no BYOK key, so once `session` is nil `isActive` is false and PTT silently falls
-    // back to omni STT. So gate on isEnabled (NOT isActive, which needs a live session).
+    // back to omni STT. So always try to re-warm (the hub is the default voice path).
     // A socket that survived past the idle window was a normal idle-close → reset the
     // strike budget and keep re-warming forever; one that died fast is likely a config/
     // auth failure → let the strikes cap stop the churn.
     if aliveFor > 60 { hubReconnectStrikes = 0 }
-    guard RealtimeHubSettings.shared.isEnabled, !reconnectPending, hubReconnectStrikes < 5 else { return }
+    guard !reconnectPending, hubReconnectStrikes < 5 else { return }
     hubReconnectStrikes += 1
     reconnectPending = true
     DispatchQueue.main.asyncAfter(deadline: .now() + 1.5) { [weak self] in
       guard let self else { return }
       self.reconnectPending = false
-      if RealtimeHubSettings.shared.isEnabled, self.session == nil { self.ensureWarm() }
+      if self.session == nil { self.ensureWarm() }
     }
   }
 
   /// Return the floating bar from its PTT voice state to compact after a hub turn.
-  /// Leaves `isVoiceSpeaking` alone — the turn can finish generating while the buffered
-  /// reply is still playing; the player's drain callback drops speaking when it ends. The
-  /// window observes these flags and collapses itself once `isVoiceActive` goes false.
   private func exitVoiceUI() {
     guard let barState else { return }
     barState.voiceTranscript = ""
-    barState.isVoiceThinking = false
     barState.isVoiceListening = false
     barState.isVoiceLocked = false
     barState.isVoiceFollowUp = false
@@ -523,17 +479,9 @@ final class RealtimeHubController: NSObject, RealtimeHubSessionDelegate {
     utterance.voice =
       AVSpeechSynthesisVoice(language: AVSpeechSynthesisVoice.currentLanguageCode())
       ?? AVSpeechSynthesisVoice(language: "en-US")
-    barState?.isVoiceThinking = false
-    barState?.isVoiceSpeaking = true
     speech.speak(utterance)
   }
 
-  /// Drop the speaking state once the AVSpeech fallback stops talking.
-  private func finishedSpeaking() {
-    barState?.isVoiceSpeaking = false
-    barState?.voiceLevel = 0
-  }
-
   /// Local synthetic mouse click (point_click tool).
   @discardableResult
   static func click(at point: CGPoint) -> Bool {
@@ -549,19 +497,3 @@ final class RealtimeHubController: NSObject, RealtimeHubSessionDelegate {
     return true
   }
 }
-
-// MARK: - AVSpeech fallback completion
-
-extension RealtimeHubController: AVSpeechSynthesizerDelegate {
-  nonisolated func speechSynthesizer(
-    _ synthesizer: AVSpeechSynthesizer, didFinish utterance: AVSpeechUtterance
-  ) {
-    Task { @MainActor [weak self] in self?.finishedSpeaking() }
-  }
-
-  nonisolated func speechSynthesizer(
-    _ synthesizer: AVSpeechSynthesizer, didCancel utterance: AVSpeechUtterance
-  ) {
-    Task { @MainActor [weak self] in self?.finishedSpeaking() }
-  }
-}

From ce7c34e809e9ad27edf4d2720a0ab3f67d12fd8d Mon Sep 17 00:00:00 2001
From: vendz <vasavandit@gmail.com>
Date: Wed, 17 Jun 2026 17:59:03 -0400
Subject: [PATCH 03/35] refactor(desktop): remove floating-bar Realtime Hub
 toggle + provider picker

The hub is always-on and follows the existing Voice Model picker, so the
duplicate enable+provider cards are removed. Default Voice Model set to OpenAI.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../MainWindow/Pages/SettingsPage.swift       | 76 +------------------
 1 file changed, 1 insertion(+), 75 deletions(-)

diff --git a/desktop/macos/Desktop/Sources/MainWindow/Pages/SettingsPage.swift b/desktop/macos/Desktop/Sources/MainWindow/Pages/SettingsPage.swift
index fdb8bc462e7..488b1ab852a 100644
--- a/desktop/macos/Desktop/Sources/MainWindow/Pages/SettingsPage.swift
+++ b/desktop/macos/Desktop/Sources/MainWindow/Pages/SettingsPage.swift
@@ -271,12 +271,7 @@ struct SettingsContentView: View {
 
   // AI Chat settings
   @AppStorage("chatBridgeMode") private var chatBridgeMode: String = "piMono"
-  @AppStorage("realtimeOmniProvider") private var realtimeOmniProvider: String = RealtimeOmniProvider.auto.rawValue
-  // Realtime-as-hub (Phase 1, dev/BYOK only): the realtime model is the single
-  // tool-dispatching voice hub. Provider toggle persisted here; RealtimeHubSession
-  // reads it at connect.
-  @AppStorage("realtimeHubEnabled") private var realtimeHubEnabled = false
-  @AppStorage("realtimeHubProvider") private var realtimeHubProvider: String = RealtimeHubProvider.openai.rawValue
+  @AppStorage("realtimeOmniProvider") private var realtimeOmniProvider: String = RealtimeOmniProvider.gptRealtime2.rawValue
   @AppStorage("askModeEnabled") private var askModeEnabled = false
   @AppStorage("claudeMdEnabled") private var claudeMdEnabled = true
   @AppStorage("projectClaudeMdEnabled") private var projectClaudeMdEnabled = true
@@ -2534,75 +2529,6 @@ struct SettingsContentView: View {
       voiceSpeedSlider(settingId: "floatingbar.voicespeed")
         .opacity(shortcutSettings.hasAnyFloatingBarVoiceAnswersEnabled ? 1 : 0.55)
         .disabled(!shortcutSettings.hasAnyFloatingBarVoiceAnswersEnabled)
-
-      realtimeHubCard
-      realtimeHubProviderCard
-        .opacity(realtimeHubEnabled ? 1 : 0.55)
-        .disabled(!realtimeHubEnabled)
-    }
-  }
-
-  // MARK: Realtime-as-hub (Phase 1, dev/BYOK only)
-
-  /// The realtime model becomes the single voice hub: in-session STT + reasoning
-  /// + tool-choice routing + spoken reply, bypassing the STT→Haiku router→Claude
-  /// cascade. Client-direct using the user's own BYOK key (dev/test only).
-  private var realtimeHubCard: some View {
-    settingsCard(settingId: "floatingbar.realtimehub") {
-      HStack(spacing: 16) {
-        VStack(alignment: .leading, spacing: 4) {
-          Text("Realtime Voice Hub (experimental)")
-            .scaledFont(size: 16, weight: .semibold)
-            .foregroundColor(OmiColors.textPrimary)
-          Text(
-            "Let the realtime model run the whole voice turn — listen, decide, and speak — "
-              + "instead of the slower transcribe→route→answer pipeline. Uses your own provider key."
-          )
-          .scaledFont(size: 13)
-          .foregroundColor(OmiColors.textSecondary)
-        }
-        Spacer()
-        Toggle("", isOn: $realtimeHubEnabled)
-          .toggleStyle(.switch)
-          .tint(OmiColors.purplePrimary)
-          .onChange(of: realtimeHubEnabled) { _ in
-            NotificationCenter.default.post(name: .realtimeHubSettingsDidChange, object: nil)
-          }
-      }
-    }
-  }
-
-  private var realtimeHubProviderCard: some View {
-    let provider = RealtimeHubProvider(rawValue: realtimeHubProvider) ?? .openai
-    let hasKey = APIKeyService.byokKey(provider.byokProvider) != nil
-    return settingsCard(settingId: "floatingbar.realtimehub.provider") {
-      HStack(spacing: 16) {
-        VStack(alignment: .leading, spacing: 4) {
-          Text("Hub Provider")
-            .scaledFont(size: 16, weight: .semibold)
-            .foregroundColor(OmiColors.textPrimary)
-          Text(
-            hasKey
-              ? provider.subtitle
-              : "⚠️ No \(provider.byokProvider.displayName) key set — add one in Developer settings to use this provider."
-          )
-          .scaledFont(size: 13)
-          .foregroundColor(hasKey ? OmiColors.textSecondary : OmiColors.purplePrimary)
-        }
-        Spacer()
-        Picker("", selection: $realtimeHubProvider) {
-          ForEach(RealtimeHubProvider.allCases, id: \.rawValue) { p in
-            Text(p.displayName).tag(p.rawValue)
-          }
-        }
-        .pickerStyle(.menu)
-        .labelsHidden()
-        .frame(width: 180)
-        .tint(OmiColors.purplePrimary)
-        .onChange(of: realtimeHubProvider) { _ in
-          NotificationCenter.default.post(name: .realtimeHubSettingsDidChange, object: nil)
-        }
-      }
     }
   }
 

From 884fd41f599bfa8c1c9760b475f903ced252c661 Mon Sep 17 00:00:00 2001
From: vendz <vasavandit@gmail.com>
Date: Wed, 17 Jun 2026 17:59:03 -0400
Subject: [PATCH 04/35] feat(desktop): default Voice Model to OpenAI (GPT
 Realtime 2)

Users can still switch to Gemini or Auto in Advanced settings; this default
also drives the realtime hub provider.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../Desktop/Sources/RealtimeOmni/RealtimeOmniSettings.swift   | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/desktop/macos/Desktop/Sources/RealtimeOmni/RealtimeOmniSettings.swift b/desktop/macos/Desktop/Sources/RealtimeOmni/RealtimeOmniSettings.swift
index 393feeaa853..5edc05c7094 100644
--- a/desktop/macos/Desktop/Sources/RealtimeOmni/RealtimeOmniSettings.swift
+++ b/desktop/macos/Desktop/Sources/RealtimeOmni/RealtimeOmniSettings.swift
@@ -59,7 +59,9 @@ final class RealtimeOmniSettings {
 
     private init() {
         UserDefaults.standard.register(defaults: [
-            providerKey: RealtimeOmniProvider.auto.rawValue,
+            // Default to OpenAI (GPT Realtime 2); the user can switch to Gemini or Auto
+            // in Advanced → Voice Model. This default also drives the realtime hub provider.
+            providerKey: RealtimeOmniProvider.gptRealtime2.rawValue,
             enabledKey: false,
         ])
     }

From f858824888acdf53ce8a891157d84f303a52e988 Mon Sep 17 00:00:00 2001
From: vendz <vasavandit@gmail.com>
Date: Wed, 17 Jun 2026 17:59:52 -0400
Subject: [PATCH 05/35] revert(desktop): restore imperative PTT resize; drop
 reactive voice-activity observer

Keeps the QueryTracer wiring, router-skip/screenshot heuristics, and recordVoiceTurn.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../FloatingControlBarWindow.swift            | 76 ++++---------------
 1 file changed, 13 insertions(+), 63 deletions(-)

diff --git a/desktop/macos/Desktop/Sources/FloatingControlBar/FloatingControlBarWindow.swift b/desktop/macos/Desktop/Sources/FloatingControlBar/FloatingControlBarWindow.swift
index 7dc37d7d622..d97aa6cb7e0 100644
--- a/desktop/macos/Desktop/Sources/FloatingControlBar/FloatingControlBarWindow.swift
+++ b/desktop/macos/Desktop/Sources/FloatingControlBar/FloatingControlBarWindow.swift
@@ -41,8 +41,6 @@ class FloatingControlBarWindow: NSPanel, NSWindowDelegate {
     private var suppressHoverResize = false
     private var inputHeightCancellable: AnyCancellable?
     private var responseHeightCancellable: AnyCancellable?
-    private var voiceActivityCancellable: AnyCancellable?
-    private var collapseWorkItem: DispatchWorkItem?
     private var resizeWorkItem: DispatchWorkItem?
     /// Saved center point from before chat opened, used to restore position on close.
     private var preChatCenter: NSPoint?
@@ -87,7 +85,6 @@ class FloatingControlBarWindow: NSPanel, NSWindowDelegate {
         self.maxSize = FloatingControlBarWindow.maxBarSize
 
         setupViews()
-        setupVoiceActivityObserver()
 
         if ShortcutSettings.shared.draggableBarEnabled,
            let savedPosition = UserDefaults.standard.string(forKey: FloatingControlBarWindow.positionKey) {
@@ -522,54 +519,6 @@ class FloatingControlBarWindow: NSPanel, NSWindowDelegate {
         inputHeightCancellable = nil
     }
 
-    /// Single owner of the voice-turn expand/collapse. The bar is wide whenever a voice
-    /// turn is active (`isVoiceActive` = listening || thinking || speaking) and collapses
-    /// to the resting sliver when it ends — derived reactively from the published flags
-    /// instead of imperative resize calls scattered across the PTT/hub code (which had to
-    /// coordinate via a `skipResize` flag).
-    private func setupVoiceActivityObserver() {
-        voiceActivityCancellable = state.$isVoiceListening
-            .combineLatest(state.$isVoiceThinking, state.$isVoiceSpeaking)
-            .map { $0 || $1 || $2 }
-            .removeDuplicates()
-            .receive(on: DispatchQueue.main)
-            .sink { [weak self] active in
-                self?.onVoiceActiveChanged(active)
-            }
-    }
-
-    /// Expand immediately so the window is already wide when the indicator + text render
-    /// (a delayed expand flashes the content cramped in the sliver first). Defer the
-    /// collapse a beat so the transient listening→thinking dip on PTT-up — `isVoiceActive`
-    /// momentarily clears before commitTurn sets thinking — doesn't blink the bar shut.
-    private func onVoiceActiveChanged(_ active: Bool) {
-        collapseWorkItem?.cancel()
-        collapseWorkItem = nil
-        if active {
-            applyVoiceExpansion(true)
-        } else {
-            let work = DispatchWorkItem { [weak self] in self?.applyVoiceExpansion(false) }
-            collapseWorkItem = work
-            DispatchQueue.main.asyncAfter(deadline: .now() + 0.12, execute: work)
-        }
-    }
-
-    private func applyVoiceExpansion(_ active: Bool) {
-        // Onboarding shows no separate bar; follow-up and the AI conversation own their
-        // own layout, so the voice indicator never drives the window size in those modes.
-        guard UserDefaults.standard.bool(forKey: "hasCompletedOnboarding"),
-              !state.isVoiceFollowUp else { return }
-        if active {
-            guard !state.showingAIConversation else { return }
-            resizeForPTTState(expanded: true, animated: false)  // snap — content is ready now
-        } else {
-            // Collapse only when nothing else needs the window expanded.
-            guard !state.showingAIConversation, !state.showingAIResponse,
-                  state.currentNotification == nil, !state.isHoveringBar else { return }
-            resizeForPTTState(expanded: false, animated: true)
-        }
-    }
-
     func updateAIResponse(type: String, text: String) {
         guard state.showingAIConversation else { return }
 
@@ -670,7 +619,7 @@ class FloatingControlBarWindow: NSPanel, NSWindowDelegate {
 
     /// Resize for hover expand/collapse — anchored from center so the circle grows outward.
     func resizeForHover(expanded: Bool) {
-        guard !state.showingAIConversation, !state.isVoiceActive, !state.isShowingNotification, !suppressHoverResize else { return }
+        guard !state.showingAIConversation, !state.isVoiceListening, !state.isShowingNotification, !suppressHoverResize else { return }
         resizeWorkItem?.cancel()
         resizeWorkItem = nil
 
@@ -679,7 +628,7 @@ class FloatingControlBarWindow: NSPanel, NSWindowDelegate {
         let doResize: () -> Void = { [weak self] in
             guard let self = self else { return }
             guard !self.state.showingAIConversation,
-                  !self.state.isVoiceActive,
+                  !self.state.isVoiceListening,
                   !self.state.isShowingNotification,
                   !self.suppressHoverResize
             else { return }
@@ -709,16 +658,12 @@ class FloatingControlBarWindow: NSPanel, NSWindowDelegate {
         }
     }
 
-    /// Resize window for PTT state (expanded when listening, compact circle when idle).
-    /// Expand snaps (animated:false) so the indicator + text never flash cramped while the
-    /// window grows; collapse animates for a smooth shrink back to the resting sliver.
-    func resizeForPTTState(expanded: Bool, animated: Bool = true) {
+    /// Resize window for PTT state (expanded when listening, compact circle when idle)
+    func resizeForPTTState(expanded: Bool) {
         let size = expanded
             ? NSSize(width: FloatingControlBarWindow.expandedWidth, height: FloatingControlBarWindow.expandedBarSize.height)
             : FloatingControlBarWindow.minBarSize
-        // Idempotent: skip when already at the target size (avoids a no-op resize).
-        if abs(frame.width - size.width) < 1, abs(frame.height - size.height) < 1 { return }
-        resizeAnchored(to: size, makeResizable: false, animated: animated)
+        resizeAnchored(to: size, makeResizable: false, animated: true)
     }
 
     func showNotification(_ notification: FloatingBarNotification, animated: Bool = true) {
@@ -737,7 +682,7 @@ class FloatingControlBarWindow: NSPanel, NSWindowDelegate {
         state.currentNotification = nil
 
         let targetSize: NSSize
-        if state.isVoiceActive {
+        if state.isVoiceListening {
             targetSize = NSSize(width: Self.expandedWidth, height: Self.expandedBarSize.height)
         } else {
             targetSize = state.isHoveringBar ? Self.expandedBarSize : Self.minBarSize
@@ -748,7 +693,7 @@ class FloatingControlBarWindow: NSPanel, NSWindowDelegate {
     /// Restore the compact pill size when we temporarily surface the bar outside
     /// of an active hover, notification, voice session, or AI conversation.
     func normalizeForTemporaryShow() {
-        guard !state.showingAIConversation, !state.isVoiceActive, state.currentNotification == nil else { return }
+        guard !state.showingAIConversation, !state.isVoiceListening, state.currentNotification == nil else { return }
         resizeAnchored(to: Self.minBarSize, makeResizable: false, animated: false, anchorTop: true)
     }
 
@@ -892,7 +837,7 @@ class FloatingControlBarWindow: NSPanel, NSWindowDelegate {
             minimumWidth = FloatingControlBarWindow.expandedWidth
         } else if state.currentNotification != nil {
             minimumWidth = FloatingControlBarWindow.notificationWidth
-        } else if state.isVoiceActive {
+        } else if state.isVoiceListening {
             minimumWidth = FloatingControlBarWindow.expandedWidth
         } else if state.isHoveringBar {
             minimumWidth = FloatingControlBarWindow.expandedBarSize.width
@@ -1737,6 +1682,11 @@ class FloatingControlBarManager {
         return window?.state
     }
 
+    /// Resize the floating bar for PTT state changes.
+    func resizeForPTT(expanded: Bool) {
+        window?.resizeForPTTState(expanded: expanded)
+    }
+
     // MARK: - AI Query
 
     private func prepareVisibleQueryState(_ message: String, in barWindow: FloatingControlBarWindow, fromVoice: Bool) {

From bb26c29b9d911718699d04f2b149a41947683c7d Mon Sep 17 00:00:00 2001
From: vendz <vasavandit@gmail.com>
Date: Wed, 17 Jun 2026 17:59:52 -0400
Subject: [PATCH 06/35] revert(desktop): restore updateBarState(skipResize:)
 imperative resize

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../PushToTalkManager.swift                   | 34 +++++++++++--------
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/desktop/macos/Desktop/Sources/FloatingControlBar/PushToTalkManager.swift b/desktop/macos/Desktop/Sources/FloatingControlBar/PushToTalkManager.swift
index 3eca7fcdd98..8daa4225417 100644
--- a/desktop/macos/Desktop/Sources/FloatingControlBar/PushToTalkManager.swift
+++ b/desktop/macos/Desktop/Sources/FloatingControlBar/PushToTalkManager.swift
@@ -523,10 +523,6 @@ class PushToTalkManager: ObservableObject {
     state = .finalizing
     finalizeWorkItem?.cancel()
     finalizeWorkItem = nil
-    // Flags only — the window keeps the bar expanded into "thinking" because commitTurn
-    // sets isVoiceThinking before the reactive resize observer settles (so isVoiceActive
-    // never dips), which is why there's no flicker and no skip-resize coordination here.
-    updateBarState()
 
     // Stop mic immediately — no more audio capture
     audioCaptureService?.stopCapture()
@@ -563,10 +559,10 @@ class PushToTalkManager: ObservableObject {
       // Real speech — instant local ack + commit. The hub speaks the reply and
       // dispatches tools itself; no transcript/router/LLM hop here.
       if ShortcutSettings.shared.pttSoundsEnabled { ackSound?.play() }
-      barState?.voiceTranscript = "…"
       RealtimeHubController.shared.commitTurn()
-      // Leave the bar showing "…"; the hub controller exits the voice UI on turn
-      // completion (so we skip the clearing updateBarState()).
+      // Collapse the bar on release — the hub speaks its reply as audio (no inline
+      // status UI), the same as the legacy voice path.
+      updateBarState()
       AnalyticsManager.shared.floatingBarPTTEnded(
         mode: finalizedMode, hadTranscript: true, transcriptLength: 0)
       log("PushToTalkManager: hub turn committed (instant ack)")
@@ -729,14 +725,14 @@ class PushToTalkManager: ObservableObject {
 
     isCurrentSessionFollowUp = false
 
-    // Reset state. The reactive resize observer won't collapse the bar when a query is in
-    // flight or a conversation is open — it guards on showingAIConversation/showingAIResponse,
-    // which openAIInputWithQuery sets (to the correct response size) right after this.
+    // Reset state — skip PTT collapse resize when we have a query,
+    // because openAIInputWithQuery will resize to the correct size.
+    // Also skip resize when in follow-up mode (panel is already at response size).
     state = .idle
     transcriptSegments = []
     lastInterimText = ""
     currentContextSnapshot = nil
-    updateBarState()
+    updateBarState(skipResize: hasQuery || wasFollowUp)
 
     guard hasQuery else {
       log("PushToTalkManager: no transcript to send")
@@ -1037,8 +1033,9 @@ class PushToTalkManager: ObservableObject {
 
   // MARK: - Bar State Sync
 
-  private func updateBarState() {
+  private func updateBarState(skipResize: Bool = false) {
     guard let barState = barState else { return }
+    let wasListening = barState.isVoiceListening
     let isShowingVoiceUI = (state == .listening || state == .lockedListening)
     barState.isVoiceListening = isShowingVoiceUI
     barState.isVoiceLocked = (state == .lockedListening)
@@ -1047,9 +1044,16 @@ class PushToTalkManager: ObservableObject {
       barState.voiceTranscript = ""
       barState.voiceFollowUpTranscript = ""
     }
-    // The bar's expand/collapse is derived reactively from these flags by the window
-    // (FloatingControlBarWindow.setupVoiceActivityObserver) — one resize per turn, no
-    // imperative calls or skip-flags to keep in sync here.
+
+    // Skip resize when in follow-up mode, expanded AI conversation, or during onboarding
+    // (during onboarding the floating bar shouldn't appear as a separate window)
+    let isOnboarding = !UserDefaults.standard.bool(forKey: "hasCompletedOnboarding")
+    guard !skipResize && !barState.isVoiceFollowUp && !barState.showingAIConversation && !isOnboarding else { return }
+    if barState.isVoiceListening && !wasListening {
+      FloatingControlBarManager.shared.resizeForPTT(expanded: true)
+    } else if !barState.isVoiceListening && wasListening {
+      FloatingControlBarManager.shared.resizeForPTT(expanded: false)
+    }
   }
 }
 

From 155c171ac74e58009791ea0d851976d8fa4245a5 Mon Sep 17 00:00:00 2001
From: vendz <vasavandit@gmail.com>
Date: Wed, 17 Jun 2026 17:59:52 -0400
Subject: [PATCH 07/35] refactor(desktop): drop orphaned audio level-tap from
 StreamingPCMPlayer

The mixer RMS tap + pending-buffer tracking only fed the reverted speaking
waveform; playback (enqueue/stop/engine-restart/config-rebuild) is unchanged.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../StreamingPCMPlayer.swift                  | 85 +------------------
 1 file changed, 1 insertion(+), 84 deletions(-)

diff --git a/desktop/macos/Desktop/Sources/FloatingControlBar/StreamingPCMPlayer.swift b/desktop/macos/Desktop/Sources/FloatingControlBar/StreamingPCMPlayer.swift
index 28f4ab4d60e..0836dea0aa2 100644
--- a/desktop/macos/Desktop/Sources/FloatingControlBar/StreamingPCMPlayer.swift
+++ b/desktop/macos/Desktop/Sources/FloatingControlBar/StreamingPCMPlayer.swift
@@ -14,27 +14,6 @@ final class StreamingPCMPlayer {
   private let format: AVAudioFormat
   private var configObserver: NSObjectProtocol?
 
-  /// Smoothed 0…1 output amplitude, delivered on the main thread (~40×/s) while the
-  /// engine runs. Driven by a tap on the mixer so it tracks what's *actually audible*,
-  /// not what's been buffered ahead. Used to make the speaking waveform audio-reactive.
-  var onLevel: ((Float) -> Void)?
-  /// Fires on the main thread when playback starts (false→true) and when the queue
-  /// fully drains (true→false). Lets the caller mark "speaking" precisely — including
-  /// the silent tail after the last chunk arrives but before it finishes playing.
-  var onPlayingChanged: ((Bool) -> Void)?
-
-  /// Outstanding scheduled buffers (incremented on enqueue, decremented when each
-  /// finishes). Guarded by `bufferLock` because completion handlers run off-main.
-  private var pendingBuffers = 0
-  private let bufferLock = NSLock()
-  private var isPlayingState = false
-  // Exponential moving average of the output RMS (smoothed so the waveform never jitters).
-  private var smoothedLevel: Float = 0
-  // Last value handed to `onLevel`, so we skip main-thread hops while the level is flat
-  // (e.g. the silent tail of a reply) instead of publishing the same number ~40×/s.
-  private var lastDispatchedLevel: Float = -1
-  private var levelTapInstalled = false
-
   init(sampleRate: Double = 24000) {
     // Float32 mono at the source rate; the mixer resamples to the device rate.
     format = AVAudioFormat(
@@ -55,8 +34,6 @@ final class StreamingPCMPlayer {
       log("StreamingPCMPlayer: audio config changed — rebuilding engine")
       self.player.stop()
       self.engine.stop()
-      // The rebuilt graph loses the old tap; let ensureRunning() reinstall it.
-      self.removeLevelTap()
       self.engine.disconnectNodeOutput(self.player)
       self.engine.connect(self.player, to: self.engine.mainMixerNode, format: self.format)
       self.ensureRunning()
@@ -69,42 +46,6 @@ final class StreamingPCMPlayer {
     }
   }
 
-  /// Tap the mixer output once the engine is live so `onLevel` reflects the audio the
-  /// user actually hears. Cheap: one RMS pass per ~1024-frame buffer, EMA-smoothed.
-  private func installLevelTapIfNeeded() {
-    guard !levelTapInstalled, engine.isRunning else { return }
-    levelTapInstalled = true
-    engine.mainMixerNode.installTap(onBus: 0, bufferSize: 1024, format: nil) {
-      [weak self] buffer, _ in
-      guard let self, self.onLevel != nil, let data = buffer.floatChannelData else { return }
-      let frames = Int(buffer.frameLength)
-      guard frames > 0 else { return }
-      let samples = data[0]
-      var sumSquares: Float = 0
-      for i in 0..<frames { sumSquares += samples[i] * samples[i] }
-      let rms = (sumSquares / Float(frames)).squareRoot()
-      // Normalize: speech RMS is small, so apply gain and clamp. Attack fast, release
-      // slow so the bars rise crisply with the voice but settle smoothly between words.
-      let target = min(1.0, rms * 3.2)
-      let alpha: Float = target > self.smoothedLevel ? 0.35 : 0.12
-      self.smoothedLevel += (target - self.smoothedLevel) * alpha
-      let out = self.smoothedLevel
-      // Only hop to main when the level actually moved — flat/silent stretches stay quiet.
-      guard abs(out - self.lastDispatchedLevel) > 0.01 else { return }
-      self.lastDispatchedLevel = out
-      DispatchQueue.main.async { self.onLevel?(out) }
-    }
-  }
-
-  /// Detach the level tap (call when playback stops; reinstalled on the next play).
-  private func removeLevelTap() {
-    guard levelTapInstalled else { return }
-    engine.mainMixerNode.removeTap(onBus: 0)
-    levelTapInstalled = false
-    smoothedLevel = 0
-    lastDispatchedLevel = -1
-  }
-
   /// Ensure the engine + player are actually running before scheduling. Checking
   /// the real `isRunning`/`isPlaying` state (not a one-shot flag) is what makes
   /// playback survive past the first turn: AVAudioEngine auto-suspends when idle
@@ -125,19 +66,6 @@ final class StreamingPCMPlayer {
     if !player.isPlaying {
       player.play()
     }
-    installLevelTapIfNeeded()
-  }
-
-  /// Adjust the outstanding-buffer count and emit `onPlayingChanged` on the edges.
-  private func adjustPending(by delta: Int) {
-    bufferLock.lock()
-    pendingBuffers = max(0, pendingBuffers + delta)
-    let nowPlaying = pendingBuffers > 0
-    let changed = nowPlaying != isPlayingState
-    if changed { isPlayingState = nowPlaying }
-    bufferLock.unlock()
-    guard changed else { return }
-    DispatchQueue.main.async { [weak self] in self?.onPlayingChanged?(nowPlaying) }
   }
 
   /// `data` = little-endian Int16 PCM, mono, at the configured sample rate.
@@ -156,22 +84,11 @@ final class StreamingPCMPlayer {
         channel[i] = max(-1.0, min(1.0, Float(src[i]) / 32768.0))
       }
     }
-    adjustPending(by: 1)
-    player.scheduleBuffer(buffer, completionHandler: { [weak self] in self?.adjustPending(by: -1) })
+    player.scheduleBuffer(buffer)
   }
 
   func stop() {
-    removeLevelTap()  // no playback → no reason to keep tapping (reinstalled on next play)
     player.stop()
     engine.stop()
-    bufferLock.lock()
-    pendingBuffers = 0
-    let wasPlaying = isPlayingState
-    isPlayingState = false
-    bufferLock.unlock()
-    smoothedLevel = 0
-    if wasPlaying {
-      DispatchQueue.main.async { [weak self] in self?.onPlayingChanged?(false) }
-    }
   }
 }

From 4b047bc3545502f918f94ef66f522cb9ecfa3095 Mon Sep 17 00:00:00 2001
From: vendz <vasavandit@gmail.com>
Date: Wed, 17 Jun 2026 17:59:52 -0400
Subject: [PATCH 08/35] docs(desktop): update RealtimeHubSession header (BYOK +
 managed, not dev-only)

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../FloatingControlBar/RealtimeHubSession.swift        | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubSession.swift b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubSession.swift
index 7dc991138c0..2e009f35140 100644
--- a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubSession.swift
+++ b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubSession.swift
@@ -1,12 +1,12 @@
 import Foundation
 import Network
 
-// MARK: - Realtime Hub Session (Phase 1, CLIENT-DIRECT)
+// MARK: - Realtime Hub Session
 //
-// One persistent WebSocket to a realtime provider, opened with the user's own
-// BYOK key (dev/test only — gated by RealtimeHubSettings.canConnect). The model
-// is the hub: it does in-session STT + reasoning + routing (via tool calls) and
-// speaks the answer.
+// One persistent WebSocket to a realtime provider, opened either with the user's
+// own BYOK key (client-direct, gated by RealtimeHubSettings.canConnect) or with a
+// server-minted ephemeral token (managed users). The model is the hub: it does
+// in-session STT + reasoning + routing (via tool calls) and speaks the answer.
 //
 // Two providers, normalized to ONE internal stream surface
 // (RealtimeHubSessionDelegate):

From 26c445b7c23b798d4f35c645116fe5e514de84c7 Mon Sep 17 00:00:00 2001
From: vendz <vasavandit@gmail.com>
Date: Wed, 17 Jun 2026 17:59:52 -0400
Subject: [PATCH 09/35] =?UTF-8?q?chore(desktop):=20changelog=20=E2=80=94?=
 =?UTF-8?q?=20hub=20as=20default=20voice=20path;=20drop=20reverted=20pill?=
 =?UTF-8?q?=20entry?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 desktop/macos/CHANGELOG.json | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/desktop/macos/CHANGELOG.json b/desktop/macos/CHANGELOG.json
index 2c0d04f0735..278ff53fa00 100644
--- a/desktop/macos/CHANGELOG.json
+++ b/desktop/macos/CHANGELOG.json
@@ -1,8 +1,7 @@
 {
   "unreleased": [
     "Faster, cheaper assistant responses via Anthropic prompt caching of the system+tools prefix and conversation history",
-    "Added an experimental Realtime Voice Hub (Settings \u2192 Floating Bar): the realtime model handles your whole voice turn \u2014 listening, deciding, and speaking \u2014 for noticeably faster replies",
-    "Redesigned the floating bar voice indicator with smooth, distinct idle, listening, thinking, and speaking states so you always know whether the assistant is working or done",
+    "Faster voice replies (experimental): the realtime model now handles your whole voice turn \u2014 listening, deciding, and speaking \u2014 instead of the slower transcribe\u2192route\u2192answer pipeline",
     "Voice (push-to-talk) conversations now appear in your chat history",
     "Fixed older chat messages failing to load in long chats"
   ],

From 6666c64fe737f6307799f870e688ace1b6d7a2b7 Mon Sep 17 00:00:00 2001
From: vendz <vasavandit@gmail.com>
Date: Wed, 17 Jun 2026 18:42:27 -0400
Subject: [PATCH 10/35] fix(desktop): re-warm hub on Voice Model change;
 collapse bar on mid-turn hub exit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Observe .realtimeOmniSettingsDidChange (the live 'voice model changed' signal)
  instead of the dead .realtimeHubSettingsDidChange, so switching the Voice Model
  re-warms the hub on the newly selected provider for BYOK users.
- exitVoiceUI() now collapses the bar when it clears isVoiceListening (mid-turn
  error / silent-tap cancel) — the transition-based updateBarState() would otherwise
  see no change and leave the bar expanded. Guarded against conversation/response/
  notification/hover/onboarding.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../RealtimeHubController.swift               | 24 +++++++++++++++----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubController.swift b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubController.swift
index 0fe8156e067..5c28412c4fa 100644
--- a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubController.swift
+++ b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubController.swift
@@ -76,14 +76,16 @@ final class RealtimeHubController: NSObject, RealtimeHubSessionDelegate {
 
   func setup(barState: FloatingControlBarState) {
     self.barState = barState
-    // Register the observer exactly once — duplicate registrations (re-entrant
-    // setup) fired settingsChanged N times, each tearing down + recreating the
-    // socket, which orphaned a connecting session (Gemini 1001/1008 closes).
+    // The hub provider follows the "Voice Model" picker, so re-warm when it changes —
+    // observe the live settings notification (posted by the picker, RealtimeOmniSettings
+    // setters, and AutoModelSelector). Register exactly once — duplicate registrations
+    // (re-entrant setup) fired settingsChanged N times, each tearing down + recreating
+    // the socket, which orphaned a connecting session (Gemini 1001/1008 closes).
     NotificationCenter.default.removeObserver(
-      self, name: .realtimeHubSettingsDidChange, object: nil)
+      self, name: .realtimeOmniSettingsDidChange, object: nil)
     NotificationCenter.default.addObserver(
       self, selector: #selector(settingsChanged),
-      name: .realtimeHubSettingsDidChange, object: nil)
+      name: .realtimeOmniSettingsDidChange, object: nil)
     // Expose the headless E2E action (omi-ctl action hub_test_turn pcm=… provider=…).
     RealtimeHubTestHarness.registerAutomationAction()
   }
@@ -402,10 +404,22 @@ final class RealtimeHubController: NSObject, RealtimeHubSessionDelegate {
   /// Return the floating bar from its PTT voice state to compact after a hub turn.
   private func exitVoiceUI() {
     guard let barState else { return }
+    // Capture before clearing: a mid-turn error or silent-tap cancel clears the
+    // listening flag here, so PushToTalkManager.updateBarState() (which resizes only
+    // on a wasListening→false transition) would see no change and leave the bar wide.
+    let wasExpandedForVoice = barState.isVoiceListening
     barState.voiceTranscript = ""
     barState.isVoiceListening = false
     barState.isVoiceLocked = false
     barState.isVoiceFollowUp = false
+    // Collapse the bar ourselves in that case — guarded so we never shrink the bar out
+    // from under an open conversation, response, notification, hover, or onboarding.
+    guard wasExpandedForVoice,
+      !barState.showingAIConversation, !barState.showingAIResponse,
+      barState.currentNotification == nil, !barState.isHoveringBar,
+      UserDefaults.standard.bool(forKey: "hasCompletedOnboarding")
+    else { return }
+    FloatingControlBarManager.shared.resizeForPTT(expanded: false)
   }
 
   // MARK: - Tools

From ff7bcc0e4191540f19c2795682dfb13dd1b5b750 Mon Sep 17 00:00:00 2001
From: vendz <vasavandit@gmail.com>
Date: Wed, 17 Jun 2026 18:42:27 -0400
Subject: [PATCH 11/35] fix(desktop): remove dead .realtimeHubSettingsDidChange
 notification

Nothing posts it after the hub toggle was removed; the controller now listens to
.realtimeOmniSettingsDidChange instead.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../Sources/FloatingControlBar/RealtimeHubSettings.swift      | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubSettings.swift b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubSettings.swift
index e06f27d0ec0..3ae0e0ec18e 100644
--- a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubSettings.swift
+++ b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubSettings.swift
@@ -72,7 +72,3 @@ final class RealtimeHubSettings {
     APIKeyService.byokKey(provider.byokProvider) != nil
   }
 }
-
-extension Notification.Name {
-  static let realtimeHubSettingsDidChange = Notification.Name("realtimeHubSettingsDidChange")
-}

From 1793120ba1b4607aa3c573ac4ef24c9751dbd688 Mon Sep 17 00:00:00 2001
From: vendz <vasavandit@gmail.com>
Date: Wed, 17 Jun 2026 18:42:28 -0400
Subject: [PATCH 12/35] fix(desktop): Voice Model picker posts
 .realtimeOmniSettingsDidChange
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The picker writes @AppStorage directly (bypassing the setter), so post the change
ourselves — this re-warms the realtime hub on the newly selected provider.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../macos/Desktop/Sources/MainWindow/Pages/SettingsPage.swift | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/desktop/macos/Desktop/Sources/MainWindow/Pages/SettingsPage.swift b/desktop/macos/Desktop/Sources/MainWindow/Pages/SettingsPage.swift
index 488b1ab852a..79fa7458530 100644
--- a/desktop/macos/Desktop/Sources/MainWindow/Pages/SettingsPage.swift
+++ b/desktop/macos/Desktop/Sources/MainWindow/Pages/SettingsPage.swift
@@ -3420,6 +3420,10 @@ struct SettingsContentView: View {
               if newValue == RealtimeOmniProvider.auto.rawValue {
                 AutoModelSelector.shared.refreshIfStale()
               }
+              // The picker writes @AppStorage directly (bypassing the RealtimeOmniSettings
+              // setter), so post the change ourselves — this is what re-warms the realtime
+              // hub on the newly selected provider (and is a no-op for unchanged providers).
+              NotificationCenter.default.post(name: .realtimeOmniSettingsDidChange, object: nil)
             }
           }
 

From 713f5fff64b0bbc2a1ba9169257bd7a3924e9ee9 Mon Sep 17 00:00:00 2001
From: vendz <vasavandit@gmail.com>
Date: Wed, 17 Jun 2026 18:57:21 -0400
Subject: [PATCH 13/35] fix(desktop): hub takes a turn only when actually
 connected (graceful cascade fallback)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

isActive returned true whenever a BYOK key existed, so a stale/revoked OpenAI or
Gemini key kept entering hub mode on every PTT and lost the turn to a failing
realtime session (while managed-token failures already fell back).

Now isActive requires a live, authenticated session for the selected provider
(new hubConnected flag, set on the post-auth connect, cleared on teardown). A
key/token that can't connect never routes a turn to the hub — PTT transparently
uses the legacy cascade, so a broken hub never costs the user a turn. The hub
re-warms in the background and resumes taking turns the moment it connects (also
covers provider switches and idle-close reconnect windows). Reconnect churn is
still capped via a named maxReconnectStrikes constant.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../RealtimeHubController.swift               | 26 ++++++++++++++-----
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubController.swift b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubController.swift
index 5c28412c4fa..776cbdee28f 100644
--- a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubController.swift
+++ b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubController.swift
@@ -46,6 +46,15 @@ final class RealtimeHubController: NSObject, RealtimeHubSessionDelegate {
   /// Consecutive failed (re)connects with no surviving session — caps churn on a hard
   /// failure. Reset when a socket survives past the idle window or a turn completes.
   private var hubReconnectStrikes = 0
+  /// After this many consecutive fast failures (e.g. a stale/revoked key failing auth),
+  /// the hub stops re-warming so it doesn't hammer a dead endpoint.
+  private static let maxReconnectStrikes = 5
+  /// True only while a session is connected + authenticated for `sessionProvider`. This is
+  /// what gates `isActive`: a PTT turn enters hub mode only when the hub is genuinely
+  /// connected right now; otherwise it transparently uses the legacy cascade. Set in
+  /// hubDidConnect (fires post-auth, on "ready") and cleared on teardown/error, so a
+  /// stale/revoked key — which never connects — never costs the user a turn.
+  private var hubConnected = false
   /// True between commit and turn-done — used to detect barge-in (a new PTT while
   /// the previous reply is still in flight).
   private var responding = false
@@ -65,13 +74,14 @@ final class RealtimeHubController: NSObject, RealtimeHubSessionDelegate {
   private var minting = false
 
   /// True when the hub should drive this PTT turn. Read by PushToTalkManager at PTT
-  /// start. The hub is the default voice path (no opt-in toggle): BYOK users are ready
-  /// immediately (own key); managed users are ready only once a warm session exists
-  /// (token minted + connecting) — otherwise PTT falls back to the legacy cascade for
-  /// that turn.
+  /// start. The hub is the default voice path (no opt-in toggle).
   var isActive: Bool {
-    if RealtimeHubSettings.shared.canConnect { return true }
-    return session != nil && sessionProvider == RealtimeHubSettings.shared.provider
+    // Drive a turn only when the hub is actually CONNECTED + authenticated for the
+    // currently-selected provider. A turn never enters hub mode on a key/token that can't
+    // connect (stale/revoked key, failed mint, mid-reconnect, or a just-switched provider):
+    // PTT transparently uses the legacy cascade instead, so a broken hub never costs the
+    // user a turn. The hub re-warms in the background and flips this true once it connects.
+    hubConnected && sessionProvider == RealtimeHubSettings.shared.provider
   }
 
   func setup(barState: FloatingControlBarState) {
@@ -162,6 +172,7 @@ final class RealtimeHubController: NSObject, RealtimeHubSessionDelegate {
     session?.stop()
     session = nil
     sessionProvider = nil
+    hubConnected = false  // no live session → PTT falls back to the cascade until re-warm
   }
 
   // MARK: - PTT integration
@@ -243,6 +254,7 @@ final class RealtimeHubController: NSObject, RealtimeHubSessionDelegate {
 
   func hubDidConnect() {
     lastWarmAt = Date()
+    hubConnected = true  // authenticated + ready — PTT may now route turns to the hub
     log("RealtimeHub: connected (\(sessionProvider?.displayName ?? "?"))")
   }
 
@@ -391,7 +403,7 @@ final class RealtimeHubController: NSObject, RealtimeHubSessionDelegate {
     // strike budget and keep re-warming forever; one that died fast is likely a config/
     // auth failure → let the strikes cap stop the churn.
     if aliveFor > 60 { hubReconnectStrikes = 0 }
-    guard !reconnectPending, hubReconnectStrikes < 5 else { return }
+    guard !reconnectPending, hubReconnectStrikes < Self.maxReconnectStrikes else { return }
     hubReconnectStrikes += 1
     reconnectPending = true
     DispatchQueue.main.asyncAfter(deadline: .now() + 1.5) { [weak self] in

From 1b67d9c5db4f8068682cc9e068d20b115820a577 Mon Sep 17 00:00:00 2001
From: vendz <vasavandit@gmail.com>
Date: Wed, 17 Jun 2026 23:29:20 -0400
Subject: [PATCH 14/35] feat(desktop): give the realtime voice agent
 personal-data tools

Add get_memories, search_memories, search_conversations, get_conversations,
create_action_item and update_action_item to the floating-bar voice agent so it
answers "who am I / what do you know about me" and "most recent conversation"
instead of refusing. Reads are synchronous and spoken; the system prompt no
longer claims it can't see personal data and routes recency vs semantic vs
spawn_agent. Tool dicts are static `let` (built once, not per reconnect).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../FloatingControlBar/RealtimeHubTools.swift | 179 +++++++++++++++---
 1 file changed, 148 insertions(+), 31 deletions(-)

diff --git a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTools.swift b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTools.swift
index dc526d867cf..d3ba37d796d 100644
--- a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTools.swift
+++ b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTools.swift
@@ -3,9 +3,12 @@ import Foundation
 // MARK: - Realtime Hub tool surface
 //
 // The realtime model IS the router: instead of a separate Haiku classify() call,
-// the model decides what to do by choosing a tool. The same four tools are
+// the model decides what to do by choosing a tool. The same tool surface is
 // declared to both providers (OpenAI Realtime `tools`, Gemini `functionDeclarations`);
 // `RealtimeHubController` executes them by calling EXISTING app code / endpoints.
+// Reads (get_tasks, get_memories, search_memories, search_conversations) and simple
+// writes (create_action_item, update_action_item) run synchronously and speak their
+// result; multi-step / other-app work still goes to spawn_agent.
 
 enum HubTool: String {
   /// Escalate a hard / knowledge-heavy question to the smarter Claude model via
@@ -17,6 +20,21 @@ enum HubTool: String {
   /// Read the user's tasks locally (TasksStore) and return them inline to speak — a
   /// fast synchronous READ, NOT a background agent.
   case getTasks = "get_tasks"
+  /// Read what Omi knows about the user (memories / facts) and return it inline to speak.
+  /// Fast synchronous READ — the answer to "who am I" / "what do you know about me".
+  case getMemories = "get_memories"
+  /// Semantically search the user's memories / facts for something specific. Fast READ.
+  case searchMemories = "search_memories"
+  /// Semantically search the user's past conversations (titles + summaries, no transcripts).
+  /// Fast synchronous READ.
+  case searchConversations = "search_conversations"
+  /// List the user's MOST RECENT conversations, newest first (titles + summaries, no
+  /// transcripts). Fast READ — the answer to "most recent / latest / last conversation".
+  case getConversations = "get_conversations"
+  /// Create a new task / to-do / reminder for the user. Fast synchronous WRITE.
+  case createActionItem = "create_action_item"
+  /// Update an existing task (mark done, change text/due). Needs the task id from get_tasks.
+  case updateActionItem = "update_action_item"
   /// Capture the user's screen so the model can see what they're looking at.
   case screenshot = "screenshot"
   /// Click at on-screen coordinates (local).
@@ -33,34 +51,51 @@ enum RealtimeHubTools {
     give the full answer yourself — don't shorten it and don't offload it. \
     Always reply in English.
 
-    IMPORTANT: You have NO direct access to the user's personal data or their apps. \
-    You cannot see their tasks, to-dos, calendar, notes, emails, messages, past \
-    conversations, memories, files, or reminders on your own. The spawn_agent tool \
-    CAN — it hands the request to a background agent that has all of those tools and \
-    can act in the user's apps and browser.
+    IMPORTANT: You CAN read the user's Omi data directly with fast tools — their tasks \
+    (get_tasks), what Omi knows about them / their memories & facts (get_memories, \
+    search_memories), and their past conversations (search_conversations) — and you can \
+    make simple task changes (create_action_item, update_action_item). For anything in \
+    their OTHER apps (calendar, notes, emails, messages, files, reminders, browser) or any \
+    multi-step "do X for me" work, use spawn_agent — it hands the request to a background \
+    agent that has those tools and can act in the user's apps.
 
     Using tools: the moment a request needs a tool, briefly acknowledge it OUT LOUD in your \
     own natural, varied words (keep it short, and don't include any answer or data you don't \
-    have yet), then immediately call the tool. For a data tool (get_tasks, ask_higher_model), \
-    speak its result after it returns. NEVER put an answer — real or guessed — in that \
-    acknowledgment, NEVER skip the tool call, and never read tool JSON aloud. You cannot see \
-    tasks, data, or the screen without calling a tool.
+    have yet), then immediately call the tool. For a READ tool (get_tasks, get_memories, \
+    search_memories, search_conversations, ask_higher_model), speak its result after it \
+    returns. NEVER put an answer — real or guessed — in that acknowledgment, NEVER skip the \
+    tool call, and never read tool JSON or ids aloud. You cannot see the user's data or \
+    screen without calling a tool.
 
     Decide what to do with each request:
     - The user's TASKS / to-dos / what's due — a READ ("what are my tasks", "what's due \
     today", "what's on my list", "do I have anything today"): you MUST call get_tasks and \
-    speak ONLY what it returns. You CANNOT see their tasks any other way — never guess, \
-    summarize from memory, or make up tasks. Always call get_tasks; do NOT use an agent.
-    - DOING something for the user, or their OTHER personal data (calendar, notes, emails, \
-    messages, conversations, memories, files, reminders) — create/send/open/edit/search/ \
-    schedule/automate/"do X for me"/any multi-step work: you CANNOT do these yourself. You \
-    MUST actually EMIT the spawn_agent function call (with a clear, self-contained `brief`). \
-    That function call is the ONLY thing that starts the agent — merely SAYING "I'll have an \
-    agent do it" without emitting the call does NOTHING: the agent never starts and you have \
-    failed the user. So always emit the spawn_agent call. You may add one short natural \
-    sentence as you call it, but never instead of it. Do NOT ask clarifying questions before \
-    spawning — spawn with what you have. Do NOT wait for it, narrate its steps, refuse, or \
-    claim you can't.
+    speak ONLY what it returns. Never guess, summarize from memory, or make up tasks.
+    - WHO the user is / what you know about them / their memories or facts ("who am I", \
+    "what do you know about me", "what are my preferences"): you MUST call get_memories (no \
+    query) and speak what it returns. For a SPECIFIC fact ("what's my dog's name", "where do \
+    I work"), call search_memories with a focused query. NEVER answer "I don't know" or guess \
+    — always call the tool first; this data is the whole point.
+    - The user's MOST RECENT / latest / last conversation ("what was my most recent \
+    conversation", "what did we just talk about", "my recent conversations"): call \
+    get_conversations (newest first) — NOT search_conversations, which is semantic and does \
+    NOT sort by time. Speak the latest one.
+    - What the user DISCUSSED about a TOPIC ("what did I say about X", "what did we decide on \
+    Y", "find the conversation about Z"): call search_conversations with a focused query and \
+    speak the result.
+    - ADD a task / to-do / reminder ("remind me to…", "add … to my list", "I need to…"): \
+    call create_action_item with a clear `description` (and `due_at` if a time was given), \
+    then confirm out loud. CHANGE an existing task (mark done, edit, reschedule): first \
+    call get_tasks to get the matching task's id, then call update_action_item with that id.
+    - DOING something for the user in their OTHER apps (calendar, notes, emails, messages, \
+    files, browser) or any multi-step work — create/send/open/edit/search/schedule/automate/ \
+    "do X for me": you CANNOT do these yourself. You MUST actually EMIT the spawn_agent \
+    function call (with a clear, self-contained `brief`). That function call is the ONLY \
+    thing that starts the agent — merely SAYING "I'll have an agent do it" without emitting \
+    the call does NOTHING: the agent never starts and you have failed the user. So always \
+    emit the spawn_agent call. You may add one short natural sentence as you call it, but \
+    never instead of it. Do NOT ask clarifying questions before spawning — spawn with what \
+    you have. Do NOT wait for it, narrate its steps, refuse, or claim you can't.
     - Everything else — general questions, facts, chit-chat, explanations, advice, jokes, \
     and creative or long-form requests (stories, brainstorming, drafts): ANSWER YOURSELF. \
     You are fully capable; do it directly, even when the ask is long or open-ended. Do \
@@ -75,9 +110,9 @@ enum RealtimeHubTools {
     Keep latency low: prefer answering directly when you can.
     """
 
-  /// OpenAI Realtime GA `session.tools` entries.
-  static var openAITools: [[String: Any]] {
-    [
+  /// OpenAI Realtime GA `session.tools` entries. Static `let` — built once, not rebuilt on
+  /// every session (re)connect that reads it.
+  static let openAITools: [[String: Any]] = [
       [
         "type": "function",
         "name": HubTool.askHigherModel.rawValue,
@@ -103,6 +138,90 @@ enum RealtimeHubTools {
           + "my list'. Do NOT use spawn_agent for reading tasks.",
         "parameters": ["type": "object", "properties": [:]],
       ],
+      [
+        "type": "function",
+        "name": HubTool.getMemories.rawValue,
+        "description":
+          "Read what Omi knows about the user — their memories and facts (preferences, "
+          + "background, people, habits). Fast synchronous read with NO query. Use this for "
+          + "'who am I', 'what do you know about me', 'what are my preferences'. Speak what it returns.",
+        "parameters": ["type": "object", "properties": [:]],
+      ],
+      [
+        "type": "function",
+        "name": HubTool.searchMemories.rawValue,
+        "description":
+          "Search the user's memories / facts for a SPECIFIC thing ('what's my dog's name', "
+          + "'where do I work', 'what's my partner's name'). Fast synchronous read. Speak the result.",
+        "parameters": [
+          "type": "object",
+          "properties": [
+            "query": ["type": "string", "description": "What to look up about the user."]
+          ],
+          "required": ["query"],
+        ],
+      ],
+      [
+        "type": "function",
+        "name": HubTool.searchConversations.rawValue,
+        "description":
+          "Search the user's past conversations for what they discussed ('what did I say about X', "
+          + "'what did we decide', 'summarize my last meeting'). Returns titles + summaries only "
+          + "(no full transcripts). Fast synchronous read. Speak the result.",
+        "parameters": [
+          "type": "object",
+          "properties": [
+            "query": ["type": "string", "description": "What topic / conversation to find."]
+          ],
+          "required": ["query"],
+        ],
+      ],
+      [
+        "type": "function",
+        "name": HubTool.getConversations.rawValue,
+        "description":
+          "List the user's MOST RECENT conversations, newest first (titles + summaries, no full "
+          + "transcripts). Use this — NOT search_conversations — for 'what was my most recent / "
+          + "latest / last conversation', 'what did we just talk about', or 'my recent conversations'. "
+          + "search_conversations is semantic and does NOT order by time, so it's wrong for 'recent'. "
+          + "Fast synchronous read. Speak the result.",
+        "parameters": ["type": "object", "properties": [:]],
+      ],
+      [
+        "type": "function",
+        "name": HubTool.createActionItem.rawValue,
+        "description":
+          "Create a new task / to-do / reminder for the user ('remind me to…', 'add … to my "
+          + "list', 'I need to…'). Fast synchronous write. Confirm out loud after it returns.",
+        "parameters": [
+          "type": "object",
+          "properties": [
+            "description": ["type": "string", "description": "The task text."],
+            "due_at": [
+              "type": "string",
+              "description": "Optional ISO-8601 due date/time, only if the user gave one.",
+            ],
+          ],
+          "required": ["description"],
+        ],
+      ],
+      [
+        "type": "function",
+        "name": HubTool.updateActionItem.rawValue,
+        "description":
+          "Update an existing task: mark it done, edit its text, or reschedule it. You MUST first "
+          + "call get_tasks to get the matching task's id, then pass that id here. Fast synchronous write.",
+        "parameters": [
+          "type": "object",
+          "properties": [
+            "id": ["type": "string", "description": "The task id from get_tasks."],
+            "completed": ["type": "boolean", "description": "Set true to mark the task done."],
+            "description": ["type": "string", "description": "New task text, if changing it."],
+            "due_at": ["type": "string", "description": "New ISO-8601 due date/time, if rescheduling."],
+          ],
+          "required": ["id"],
+        ],
+      ],
       [
         "type": "function",
         "name": HubTool.spawnAgent.rawValue,
@@ -140,12 +259,11 @@ enum RealtimeHubTools {
           "required": ["x", "y"],
         ],
       ],
-    ]
-  }
+  ]
 
-  /// Gemini Live `setup.tools[0].functionDeclarations` entries (same surface).
-  static var geminiFunctionDeclarations: [[String: Any]] {
-    openAITools.map { tool in
+  /// Gemini Live `setup.tools[0].functionDeclarations` entries (same surface). Derived once
+  /// from `openAITools`.
+  static let geminiFunctionDeclarations: [[String: Any]] = openAITools.map { tool in
       // Gemini wants {name, description, parameters} without the OpenAI "type" wrapper.
       var decl: [String: Any] = [
         "name": tool["name"] as? String ?? "",
@@ -159,7 +277,6 @@ enum RealtimeHubTools {
       }
       return decl
     }
-  }
 
   /// Recursively uppercase every `type` value in a JSON-schema dict so it matches Gemini's
   /// Schema enum (object → OBJECT, string → STRING, …).

From 9c419b1beea71267b41e77e941b2269522dbd266 Mon Sep 17 00:00:00 2001
From: vendz <vasavandit@gmail.com>
Date: Wed, 17 Jun 2026 23:29:27 -0400
Subject: [PATCH 15/35] feat(desktop): execute the voice-agent data tools via
 APIClient
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Wire the new HubTool cases in hubDidRequestTool to the existing APIClient.tool*
endpoints (capped for voice: top 3/5, no transcripts). Reads/writes share one
runToolAndSpeak helper (Task / do-catch / empty-fallback / log / sendToolResult)
and a small arg() accessor; get_tasks output carries [id:…] so update_action_item
can target a task.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../RealtimeHubController.swift               | 103 +++++++++++++++++-
 1 file changed, 100 insertions(+), 3 deletions(-)

diff --git a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubController.swift b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubController.swift
index 776cbdee28f..894159abf50 100644
--- a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubController.swift
+++ b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubController.swift
@@ -295,6 +295,25 @@ final class RealtimeHubController: NSObject, RealtimeHubSessionDelegate {
     }
   }
 
+  /// Run an async tool `body`, then speak its result: on throw → `errorText`, on an
+  /// empty/whitespace result → `emptyText`. Shared by the data read/write tool cases so the
+  /// Task / do-catch / blank-check / log / sendToolResult tail lives in exactly one place.
+  private func runToolAndSpeak(
+    callId: String, name: String, detail: String = "",
+    emptyText: String, errorText: String,
+    _ body: @escaping () async throws -> String
+  ) {
+    Task { [weak self] in
+      guard let self else { return }
+      var out: String
+      do { out = try await body() } catch { out = errorText }
+      if out.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty { out = emptyText }
+      let suffix = detail.isEmpty ? "" : " \(detail)"
+      log("RealtimeHub[\(self.providerTag)]: tool \(name)\(suffix) → \(out.prefix(60))")
+      self.session?.sendToolResult(callId: callId, name: name, output: out)
+    }
+  }
+
   func hubDidRequestTool(name: String, callId: String, argumentsJSON: String) {
     let arguments =
       (try? JSONSerialization.jsonObject(with: Data(argumentsJSON.utf8)) as? [String: Any]) ?? [:]
@@ -303,9 +322,10 @@ final class RealtimeHubController: NSObject, RealtimeHubSessionDelegate {
       session?.sendToolResult(callId: callId, name: name, output: "Unknown tool.")
       return
     }
+    func arg(_ key: String) -> String { (arguments[key] as? String) ?? turnTranscript }
     switch tool {
     case .askHigherModel:
-      let query = (arguments["query"] as? String) ?? turnTranscript
+      let query = arg("query")
       log("RealtimeHub[\(providerTag)]: tool ask_higher_model → POST /v2/chat/completions (claude-sonnet-4-6) query=\"\(query.prefix(80))\"")
       Task { [weak self] in
         guard let self else { return }
@@ -320,8 +340,9 @@ final class RealtimeHubController: NSObject, RealtimeHubSessionDelegate {
         await TasksStore.shared.loadDashboardTasks()
         let overdue = TasksStore.shared.overdueTasks
         let today = TasksStore.shared.todaysTasks
+        // Include the task id (for update_action_item) — the model is told never to speak ids.
         func list(_ items: [TaskActionItem]) -> String {
-          items.prefix(15).map { "- \($0.description)" }.joined(separator: "\n")
+          items.prefix(15).map { "- \($0.description) [id:\($0.id)]" }.joined(separator: "\n")
         }
         var out = ""
         if !overdue.isEmpty { out += "Overdue (\(overdue.count)):\n\(list(overdue))\n" }
@@ -330,8 +351,84 @@ final class RealtimeHubController: NSObject, RealtimeHubSessionDelegate {
         log("RealtimeHub[\(self.providerTag)]: tool get_tasks → \(overdue.count) overdue, \(today.count) today")
         self.session?.sendToolResult(callId: callId, name: name, output: out)
       }
+    case .getMemories:
+      // Fast READ — "who am I" / "what do you know about me". Backend memories+facts.
+      runToolAndSpeak(
+        callId: callId, name: name,
+        emptyText: "I don't have any memories saved about you yet.",
+        errorText: "Could not read your memories right now."
+      ) { try await APIClient.shared.toolGetMemories(limit: 15).resultText }
+    case .searchMemories:
+      let query = arg("query")
+      runToolAndSpeak(
+        callId: callId, name: name, detail: "q=\"\(query.prefix(60))\"",
+        emptyText: "I couldn't find anything about that.",
+        errorText: "Could not search your memories right now."
+      ) { try await APIClient.shared.toolSearchMemories(query: query, limit: 5).resultText }
+    case .searchConversations:
+      // Capped for voice: top 5, summaries only (no full transcripts).
+      let query = arg("query")
+      runToolAndSpeak(
+        callId: callId, name: name, detail: "q=\"\(query.prefix(60))\"",
+        emptyText: "I couldn't find a conversation about that.",
+        errorText: "Could not search your conversations right now."
+      ) {
+        try await APIClient.shared.toolSearchConversations(
+          query: query, limit: 5, includeTranscript: false
+        ).resultText
+      }
+    case .getConversations:
+      // Fast READ — most recent conversations, newest first (backend orders created_at DESC).
+      // Capped for voice: top 3, summaries only. This is the recency path; search_conversations
+      // is semantic and must NOT be used for "most recent".
+      runToolAndSpeak(
+        callId: callId, name: name,
+        emptyText: "I don't see any recent conversations.",
+        errorText: "Could not read your recent conversations right now."
+      ) {
+        try await APIClient.shared.toolGetConversations(
+          limit: 3, includeTranscript: false
+        ).resultText
+      }
+    case .createActionItem:
+      let description = (arguments["description"] as? String)?
+        .trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
+      let dueAt = arguments["due_at"] as? String
+      guard !description.isEmpty else {
+        session?.sendToolResult(
+          callId: callId, name: name, output: "No task description was given.")
+        return
+      }
+      runToolAndSpeak(
+        callId: callId, name: name, detail: "\"\(description.prefix(60))\"",
+        emptyText: "Task created.",
+        errorText: "Could not create the task right now."
+      ) {
+        try await APIClient.shared.toolCreateActionItem(
+          description: description, dueAt: dueAt
+        ).resultText
+      }
+    case .updateActionItem:
+      guard let id = (arguments["id"] as? String), !id.isEmpty else {
+        session?.sendToolResult(
+          callId: callId, name: name,
+          output: "Missing the task id — call get_tasks first to find it.")
+        return
+      }
+      let completed = arguments["completed"] as? Bool
+      let newDescription = arguments["description"] as? String
+      let dueAt = arguments["due_at"] as? String
+      runToolAndSpeak(
+        callId: callId, name: name, detail: "id=\(id.prefix(8))",
+        emptyText: "Task updated.",
+        errorText: "Could not update the task right now."
+      ) {
+        try await APIClient.shared.toolUpdateActionItem(
+          id: id, completed: completed, description: newDescription, dueAt: dueAt
+        ).resultText
+      }
     case .spawnAgent:
-      let brief = (arguments["brief"] as? String) ?? turnTranscript
+      let brief = arg("brief")
       let model = ShortcutSettings.shared.selectedModel.isEmpty
         ? "claude-sonnet-4-6" : ShortcutSettings.shared.selectedModel
       // Non-blocking: spawn renders its own pill ("text bubble") and runs on its

From 8e98a2b1b10f34cc01985daf1137c62f80ca021d Mon Sep 17 00:00:00 2001
From: vendz <vasavandit@gmail.com>
Date: Wed, 17 Jun 2026 23:29:35 -0400
Subject: [PATCH 16/35] test(desktop): stub the new voice-agent data tools in
 the hub harness

Keep the harness tool-result switch exhaustive for the six new HubTool cases.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../FloatingControlBar/RealtimeHubTestHarness.swift       | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTestHarness.swift b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTestHarness.swift
index 8585d3033b3..0a01753181d 100644
--- a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTestHarness.swift
+++ b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTestHarness.swift
@@ -118,7 +118,13 @@ final class RealtimeHubTestHarness: NSObject, RealtimeHubSessionDelegate {
     let stub: String
     switch HubTool(rawValue: name) {
     case .askHigherModel: stub = "Paris is the capital of France."
-    case .getTasks: stub = "Due today (1):\n- Example task"
+    case .getTasks: stub = "Due today (1):\n- Example task [id:task_123]"
+    case .getMemories: stub = "You live in San Francisco and prefer concise answers."
+    case .searchMemories: stub = "Your dog's name is Rex."
+    case .searchConversations: stub = "On Monday you discussed the launch timeline."
+    case .getConversations: stub = "Most recent: today, 'Standup notes'. Before that: yesterday, 'Design review'."
+    case .createActionItem: stub = "Created task: Example task."
+    case .updateActionItem: stub = "Updated the task."
     case .spawnAgent: stub = "Started a background agent."
     case .screenshot: stub = "Screen captured."
     case .pointClick: stub = "Clicked."

From 6945db1bbffbe490e50e62b5aab06f9d79d7d764 Mon Sep 17 00:00:00 2001
From: vendz <vasavandit@gmail.com>
Date: Thu, 18 Jun 2026 15:25:00 -0400
Subject: [PATCH 17/35] feat(desktop): give the realtime hub activity, screen,
 and full-task tools + varied heads-ups

Add get_daily_recap, search_screen_history, and get_action_items to the hub tool
surface (enum + OpenAI/Gemini schemas) so voice can answer "what did I do
yesterday", screen-history lookups, and filtered/completed task queries. Route
productivity/workflow questions to pull get_daily_recap (+get_action_items) instead
of answering generically, and rework the tool-use rule so the spoken heads-up before
a tool call is specific to the request and varied each turn (no repeated filler).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../FloatingControlBar/RealtimeHubTools.swift | 120 ++++++++++++++++--
 1 file changed, 109 insertions(+), 11 deletions(-)

diff --git a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTools.swift b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTools.swift
index d3ba37d796d..f00d5f71311 100644
--- a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTools.swift
+++ b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTools.swift
@@ -18,8 +18,12 @@ enum HubTool: String {
   /// Non-blocking: the model acknowledges and moves on.
   case spawnAgent = "spawn_agent"
   /// Read the user's tasks locally (TasksStore) and return them inline to speak — a
-  /// fast synchronous READ, NOT a background agent.
+  /// fast synchronous READ, NOT a background agent. Overdue + due-today only.
   case getTasks = "get_tasks"
+  /// Read the user's full action-item list from the backend with filters (completed,
+  /// due-date range). Fast READ — use for completed tasks, date ranges, or the whole list
+  /// (get_tasks only covers overdue + due-today).
+  case getActionItems = "get_action_items"
   /// Read what Omi knows about the user (memories / facts) and return it inline to speak.
   /// Fast synchronous READ — the answer to "who am I" / "what do you know about me".
   case getMemories = "get_memories"
@@ -31,6 +35,13 @@ enum HubTool: String {
   /// List the user's MOST RECENT conversations, newest first (titles + summaries, no
   /// transcripts). Fast READ — the answer to "most recent / latest / last conversation".
   case getConversations = "get_conversations"
+  /// Formatted recap of what the user actually DID on their Mac — apps used (with minutes),
+  /// conversations, tasks, focus, screen activity. Fast LOCAL READ — the answer to "what did I
+  /// do yesterday / today", "which apps did I use the most", "how did I spend my time".
+  case getDailyRecap = "get_daily_recap"
+  /// Semantically search the user's on-screen history (what they saw / read / worked on).
+  /// Fast LOCAL READ — "when was I looking at X", "find where I read about Y".
+  case searchScreenHistory = "search_screen_history"
   /// Create a new task / to-do / reminder for the user. Fast synchronous WRITE.
   case createActionItem = "create_action_item"
   /// Update an existing task (mark done, change text/due). Needs the task id from get_tasks.
@@ -53,24 +64,34 @@ enum RealtimeHubTools {
 
     IMPORTANT: You CAN read the user's Omi data directly with fast tools — their tasks \
     (get_tasks), what Omi knows about them / their memories & facts (get_memories, \
-    search_memories), and their past conversations (search_conversations) — and you can \
-    make simple task changes (create_action_item, update_action_item). For anything in \
+    search_memories), their past conversations (search_conversations), what they DID on \
+    their Mac (get_daily_recap), and their on-screen history (search_screen_history) — and \
+    you can make simple task changes (create_action_item, update_action_item). For anything in \
     their OTHER apps (calendar, notes, emails, messages, files, reminders, browser) or any \
     multi-step "do X for me" work, use spawn_agent — it hands the request to a background \
     agent that has those tools and can act in the user's apps.
 
-    Using tools: the moment a request needs a tool, briefly acknowledge it OUT LOUD in your \
-    own natural, varied words (keep it short, and don't include any answer or data you don't \
-    have yet), then immediately call the tool. For a READ tool (get_tasks, get_memories, \
-    search_memories, search_conversations, ask_higher_model), speak its result after it \
-    returns. NEVER put an answer — real or guessed — in that acknowledgment, NEVER skip the \
-    tool call, and never read tool JSON or ids aloud. You cannot see the user's data or \
-    screen without calling a tool.
+    Using tools: when a request needs a tool, ALWAYS give a short spoken heads-up first so the \
+    user knows you're on it and that it won't be instant — then call the tool and speak the \
+    result when it returns. Never go silent during a tool call; the user can't see what you're \
+    doing, so a quiet gap feels broken. The catch is variety: that heads-up must be SPECIFIC to \
+    what they actually asked and DIFFERENT every time. Name the real thing you're fetching — \
+    "Pulling up yesterday's activity…", "Scanning your task list…", "Digging through your notes \
+    on the launch…", "Checking your memories for that…", "Getting the latest on that, one \
+    sec…". The thing to avoid is repetition: do NOT reach for the same generic opener ("let me \
+    check", "let me look that up") turn after turn — it's what makes you sound robotic. Keep it \
+    to a few words, vary the wording each turn, and don't include any answer or data you don't \
+    have yet. For a slower step (ask_higher_model, spawn_agent) it's fine to signal it'll take a \
+    moment. NEVER speak an answer — real or guessed — before the tool returns, NEVER skip the \
+    tool call, and never read tool JSON or ids aloud. You cannot see the user's data or screen \
+    without calling a tool.
 
     Decide what to do with each request:
     - The user's TASKS / to-dos / what's due — a READ ("what are my tasks", "what's due \
     today", "what's on my list", "do I have anything today"): you MUST call get_tasks and \
-    speak ONLY what it returns. Never guess, summarize from memory, or make up tasks.
+    speak ONLY what it returns. Never guess, summarize from memory, or make up tasks. For \
+    COMPLETED tasks ("what did I finish"), a SPECIFIC due-date range ("what's due next week"), \
+    or the FULL list ("all my tasks"), call get_action_items instead (it supports filters).
     - WHO the user is / what you know about them / their memories or facts ("who am I", \
     "what do you know about me", "what are my preferences"): you MUST call get_memories (no \
     query) and speak what it returns. For a SPECIFIC fact ("what's my dog's name", "where do \
@@ -83,6 +104,21 @@ enum RealtimeHubTools {
     - What the user DISCUSSED about a TOPIC ("what did I say about X", "what did we decide on \
     Y", "find the conversation about Z"): call search_conversations with a focused query and \
     speak the result.
+    - The user's own ACTIVITY / what they DID / how they spent their time ("what did I do \
+    yesterday", "what did I do today", "which apps did I use the most", "how did I spend my \
+    morning", "summarize my day"): you MUST call get_daily_recap (days_ago: 0 = today, 1 = \
+    yesterday) and speak a SHORT spoken summary of the highlights it returns — top apps, key \
+    conversations, tasks. Do NOT use search_conversations or spawn_agent for this, and never \
+    guess; this is exactly what get_daily_recap is for.
+    - What the user SAW / read / worked on ON SCREEN ("when was I looking at X", "find where I \
+    read about Y", "what was I doing in app Z"): call search_screen_history with a focused \
+    query and speak the result.
+    - ADVICE about the user's OWN productivity / workflow / habits / focus ("how can I improve \
+    my workflow", "how can I be more productive", "what should I change", "how am I doing", \
+    "where am I wasting time"): do NOT answer generically. FIRST call get_daily_recap (days_ago: \
+    1 for today, 7 for the week) — and get_action_items when tasks matter — then base EVERY \
+    suggestion on what they ACTUALLY did: their apps, distracted vs focused sessions, and \
+    overdue / duplicate tasks. Generic advice with no tool call is a failure here.
     - ADD a task / to-do / reminder ("remind me to…", "add … to my list", "I need to…"): \
     call create_action_item with a clear `description` (and `due_at` if a time was given), \
     then confirm out loud. CHANGE an existing task (mark done, edit, reschedule): first \
@@ -187,6 +223,68 @@ enum RealtimeHubTools {
           + "Fast synchronous read. Speak the result.",
         "parameters": ["type": "object", "properties": [:]],
       ],
+      [
+        "type": "function",
+        "name": HubTool.getDailyRecap.rawValue,
+        "description":
+          "Get a recap of what the user actually DID on their Mac — apps used (with minutes), "
+          + "conversations, tasks, focus sessions, and screen activity — for a day. THIS is the tool "
+          + "for 'what did I do yesterday', 'what did I do today', 'which apps did I use the most', "
+          + "'how did I spend my time'. Do NOT use search_conversations or spawn_agent for these. "
+          + "Fast synchronous read — speak a short summary of what it returns.",
+        "parameters": [
+          "type": "object",
+          "properties": [
+            "days_ago": [
+              "type": "number",
+              "description": "0 = today, 1 = yesterday (default), 7 = the past week.",
+            ]
+          ],
+        ],
+      ],
+      [
+        "type": "function",
+        "name": HubTool.searchScreenHistory.rawValue,
+        "description":
+          "Search the user's on-screen history — what they saw, read, or worked on — by meaning. "
+          + "Use for 'when was I looking at X', 'find where I read about Y', 'what was I doing in "
+          + "app Z'. Returns matching moments with the app and context. Fast synchronous read. "
+          + "Speak the result.",
+        "parameters": [
+          "type": "object",
+          "properties": [
+            "query": [
+              "type": "string", "description": "What the user was looking at / reading / doing.",
+            ],
+            "days": ["type": "number", "description": "How many days back to search; default 7."],
+          ],
+          "required": ["query"],
+        ],
+      ],
+      [
+        "type": "function",
+        "name": HubTool.getActionItems.rawValue,
+        "description":
+          "Read the user's tasks / to-dos from the backend, with optional filters. Use for "
+          + "COMPLETED tasks ('what did I finish'), a DATE RANGE ('what's due next week'), or the "
+          + "FULL list ('all my tasks') — for plain 'what's due today / overdue', prefer get_tasks. "
+          + "Fast synchronous read. Speak a short summary of what it returns.",
+        "parameters": [
+          "type": "object",
+          "properties": [
+            "completed": [
+              "type": "boolean",
+              "description": "true = only done tasks, false = only open tasks. Omit for both.",
+            ],
+            "due_start_date": [
+              "type": "string", "description": "Optional ISO-8601 start of the due-date range.",
+            ],
+            "due_end_date": [
+              "type": "string", "description": "Optional ISO-8601 end of the due-date range.",
+            ],
+          ],
+        ],
+      ],
       [
         "type": "function",
         "name": HubTool.createActionItem.rawValue,

From dd9e9cbce159fb8b5a9acab9ac7730035c041e4c Mon Sep 17 00:00:00 2001
From: vendz <vasavandit@gmail.com>
Date: Thu, 18 Jun 2026 15:25:12 -0400
Subject: [PATCH 18/35] feat(desktop): dispatch the hub's get_daily_recap,
 search_screen_history, get_action_items

Wire the three new hub tools in hubDidRequestTool: get_daily_recap and
search_screen_history reuse the local ChatToolExecutor (on-device activity DB, the
same path the desktop chat uses); get_action_items reads the backend via
APIClient.toolGetActionItems with completed/due-date filters. Add a small argInt
helper to dedupe Int argument parsing across the read cases.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../RealtimeHubController.swift               | 42 +++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubController.swift b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubController.swift
index 894159abf50..d6d0534dddc 100644
--- a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubController.swift
+++ b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubController.swift
@@ -323,6 +323,7 @@ final class RealtimeHubController: NSObject, RealtimeHubSessionDelegate {
       return
     }
     func arg(_ key: String) -> String { (arguments[key] as? String) ?? turnTranscript }
+    func argInt(_ key: String) -> Int? { (arguments[key] as? Int) ?? (arguments[key] as? NSNumber)?.intValue }
     switch tool {
     case .askHigherModel:
       let query = arg("query")
@@ -390,6 +391,47 @@ final class RealtimeHubController: NSObject, RealtimeHubSessionDelegate {
           limit: 3, includeTranscript: false
         ).resultText
       }
+    case .getDailyRecap:
+      // Fast LOCAL read of the on-device activity DB — apps/minutes, conversations, tasks,
+      // focus, screen context. Reuses the SAME executor the desktop chat uses, so voice and
+      // chat answer "what did I do yesterday" from one code path.
+      let daysAgo = argInt("days_ago") ?? 1
+      runToolAndSpeak(
+        callId: callId, name: name, detail: "days_ago=\(daysAgo)",
+        emptyText: "I don't have any activity recorded for then.",
+        errorText: "Could not pull up your activity right now."
+      ) {
+        await ChatToolExecutor.execute(
+          ToolCall(name: "get_daily_recap", arguments: ["days_ago": daysAgo], thoughtSignature: nil))
+      }
+    case .getActionItems:
+      // Backend READ of the full task list with filters (completed / due-date range) — the
+      // capable sibling of the local get_tasks. Same APIClient path the chat agent uses.
+      let completed = arguments["completed"] as? Bool
+      let dueStart = arguments["due_start_date"] as? String
+      let dueEnd = arguments["due_end_date"] as? String
+      runToolAndSpeak(
+        callId: callId, name: name, detail: completed.map { "completed=\($0)" } ?? "",
+        emptyText: "I couldn't find any matching tasks.",
+        errorText: "Could not read your tasks right now."
+      ) {
+        try await APIClient.shared.toolGetActionItems(
+          limit: 25, completed: completed, dueStartDate: dueStart, dueEndDate: dueEnd
+        ).resultText
+      }
+    case .searchScreenHistory:
+      // Fast LOCAL semantic search over screen history (same executor as chat).
+      let query = arg("query")
+      var toolArgs: [String: Any] = ["query": query]
+      if let days = argInt("days") { toolArgs["days"] = days }
+      runToolAndSpeak(
+        callId: callId, name: name, detail: "q=\"\(query.prefix(60))\"",
+        emptyText: "I couldn't find anything on your screen about that.",
+        errorText: "Could not search your screen history right now."
+      ) {
+        await ChatToolExecutor.execute(
+          ToolCall(name: "search_screen_history", arguments: toolArgs, thoughtSignature: nil))
+      }
     case .createActionItem:
       let description = (arguments["description"] as? String)?
         .trimmingCharacters(in: .whitespacesAndNewlines) ?? ""

From cc5a8e2715047dfb03454ff1a4e31c6ceb261e63 Mon Sep 17 00:00:00 2001
From: vendz <vasavandit@gmail.com>
Date: Thu, 18 Jun 2026 15:25:18 -0400
Subject: [PATCH 19/35] test(desktop): stub the new hub data tools in the test
 harness

Add stub results for get_daily_recap, search_screen_history, and get_action_items so
the RealtimeHubTestHarness switch stays exhaustive and hub_test_turn can exercise the
full turn loop for these tools.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../Sources/FloatingControlBar/RealtimeHubTestHarness.swift    | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTestHarness.swift b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTestHarness.swift
index 0a01753181d..ed9af5c461a 100644
--- a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTestHarness.swift
+++ b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTestHarness.swift
@@ -123,6 +123,9 @@ final class RealtimeHubTestHarness: NSObject, RealtimeHubSessionDelegate {
     case .searchMemories: stub = "Your dog's name is Rex."
     case .searchConversations: stub = "On Monday you discussed the launch timeline."
     case .getConversations: stub = "Most recent: today, 'Standup notes'. Before that: yesterday, 'Design review'."
+    case .getActionItems: stub = "Open: Buy milk (due tomorrow). Completed: Ship the PR."
+    case .getDailyRecap: stub = "Yesterday: 3 hrs in Xcode, 1 hr in Safari; 2 conversations; 1 task created."
+    case .searchScreenHistory: stub = "Found it: yesterday afternoon you were reading the launch doc in Safari."
     case .createActionItem: stub = "Created task: Example task."
     case .updateActionItem: stub = "Updated the task."
     case .spawnAgent: stub = "Started a background agent."

From 59996555a11278b7ea207d456b3751b2987beee4 Mon Sep 17 00:00:00 2001
From: vendz <vasavandit@gmail.com>
Date: Thu, 18 Jun 2026 16:02:11 -0400
Subject: [PATCH 20/35] feat(desktop): local <about_user> identity-card builder
 for the realtime hub

---
 .../FloatingControlBar/AboutUserCard.swift    | 49 +++++++++++++++++++
 .../Desktop/Tests/AboutUserCardTests.swift    | 28 +++++++++++
 2 files changed, 77 insertions(+)
 create mode 100644 desktop/macos/Desktop/Sources/FloatingControlBar/AboutUserCard.swift
 create mode 100644 desktop/macos/Desktop/Tests/AboutUserCardTests.swift

diff --git a/desktop/macos/Desktop/Sources/FloatingControlBar/AboutUserCard.swift b/desktop/macos/Desktop/Sources/FloatingControlBar/AboutUserCard.swift
new file mode 100644
index 00000000000..1e17bfa7614
--- /dev/null
+++ b/desktop/macos/Desktop/Sources/FloatingControlBar/AboutUserCard.swift
@@ -0,0 +1,49 @@
+import Foundation
+
+/// Builds the compact, local-only `<about_user>` block injected into the hub's
+/// system instruction at warm time. Identity + rough situation only; exact/current
+/// lists stay behind the read tools (the card hedges this). No network calls.
+enum AboutUserCard {
+  /// Pure formatter — kept separate from `build()` so it is unit-testable.
+  static func render(name: String, facts: [String], overdue: Int, dueToday: Int) -> String {
+    var lines: [String] = ["<about_user>"]
+    if !name.isEmpty { lines.append("Name: \(name)") }
+    lines.append("What Omi knows about them:")
+    if facts.isEmpty {
+      lines.append("- Nothing saved yet.")
+    } else {
+      lines.append(contentsOf: facts.map { "- \($0)" })
+    }
+    if overdue == 0 && dueToday == 0 {
+      lines.append("Right now: nothing overdue or due today.")
+    } else {
+      lines.append("Right now: \(overdue) overdue, \(dueToday) due today.")
+    }
+    lines.append(
+      "(This is a quick snapshot — for the exact or current list, call get_tasks / get_action_items.)")
+    lines.append("</about_user>")
+    return lines.joined(separator: "\n")
+  }
+
+  /// Gathers local data (auth name, top memories, task counts) and renders the card.
+  /// Best-effort: any failure degrades to a smaller card, never throws.
+  @MainActor
+  static func build() async -> String {
+    let name = AuthService.shared.givenName.trimmingCharacters(in: .whitespacesAndNewlines)
+
+    var facts: [String] = []
+    if let memories = try? await MemoryStorage.shared.getLocalMemories(limit: 8) {
+      facts = memories.prefix(8).compactMap { mem in
+        let t = mem.content.trimmingCharacters(in: .whitespacesAndNewlines)
+        guard !t.isEmpty else { return nil }
+        return t.count > 120 ? String(t.prefix(117)) + "…" : t
+      }
+    }
+
+    await TasksStore.shared.loadDashboardTasks()
+    let overdue = TasksStore.shared.overdueTasks.count
+    let dueToday = TasksStore.shared.todaysTasks.count
+
+    return render(name: name, facts: facts, overdue: overdue, dueToday: dueToday)
+  }
+}
diff --git a/desktop/macos/Desktop/Tests/AboutUserCardTests.swift b/desktop/macos/Desktop/Tests/AboutUserCardTests.swift
new file mode 100644
index 00000000000..389ab2f6bb6
--- /dev/null
+++ b/desktop/macos/Desktop/Tests/AboutUserCardTests.swift
@@ -0,0 +1,28 @@
+import XCTest
+@testable import Omi_Computer
+
+final class AboutUserCardTests: XCTestCase {
+    func testRenderIncludesNameFactsCountsAndHedge() {
+        let card = AboutUserCard.render(
+            name: "Sam",
+            facts: ["Lives in San Francisco", "Prefers concise answers"],
+            overdue: 2,
+            dueToday: 3
+        )
+        XCTAssertTrue(card.contains("<about_user>"))
+        XCTAssertTrue(card.contains("</about_user>"))
+        XCTAssertTrue(card.contains("Name: Sam"))
+        XCTAssertTrue(card.contains("- Lives in San Francisco"))
+        XCTAssertTrue(card.contains("- Prefers concise answers"))
+        XCTAssertTrue(card.contains("2 overdue"))
+        XCTAssertTrue(card.contains("3 due today"))
+        XCTAssertTrue(card.contains("snapshot"))
+    }
+
+    func testRenderEmptyState() {
+        let card = AboutUserCard.render(name: "", facts: [], overdue: 0, dueToday: 0)
+        XCTAssertFalse(card.contains("Name:"))                 // no name line when empty
+        XCTAssertTrue(card.contains("Nothing saved"))          // facts empty-state
+        XCTAssertTrue(card.contains("nothing overdue or due today"))
+    }
+}

From 47800765f429cc4081c83f0d3fbae1e9d1c26197 Mon Sep 17 00:00:00 2001
From: vendz <vasavandit@gmail.com>
Date: Thu, 18 Jun 2026 16:05:25 -0400
Subject: [PATCH 21/35] fix(desktop): AboutUserCard name falls back to
 displayName; tighten hedge assertion

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .../Desktop/Sources/FloatingControlBar/AboutUserCard.swift    | 4 +++-
 desktop/macos/Desktop/Tests/AboutUserCardTests.swift          | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/desktop/macos/Desktop/Sources/FloatingControlBar/AboutUserCard.swift b/desktop/macos/Desktop/Sources/FloatingControlBar/AboutUserCard.swift
index 1e17bfa7614..6c9eefe9c35 100644
--- a/desktop/macos/Desktop/Sources/FloatingControlBar/AboutUserCard.swift
+++ b/desktop/macos/Desktop/Sources/FloatingControlBar/AboutUserCard.swift
@@ -29,7 +29,9 @@ enum AboutUserCard {
   /// Best-effort: any failure degrades to a smaller card, never throws.
   @MainActor
   static func build() async -> String {
-    let name = AuthService.shared.givenName.trimmingCharacters(in: .whitespacesAndNewlines)
+    let auth = AuthService.shared
+    let rawName = auth.givenName.isEmpty ? auth.displayName : auth.givenName
+    let name = rawName.trimmingCharacters(in: .whitespacesAndNewlines)
 
     var facts: [String] = []
     if let memories = try? await MemoryStorage.shared.getLocalMemories(limit: 8) {
diff --git a/desktop/macos/Desktop/Tests/AboutUserCardTests.swift b/desktop/macos/Desktop/Tests/AboutUserCardTests.swift
index 389ab2f6bb6..df06db6bdbd 100644
--- a/desktop/macos/Desktop/Tests/AboutUserCardTests.swift
+++ b/desktop/macos/Desktop/Tests/AboutUserCardTests.swift
@@ -16,7 +16,7 @@ final class AboutUserCardTests: XCTestCase {
         XCTAssertTrue(card.contains("- Prefers concise answers"))
         XCTAssertTrue(card.contains("2 overdue"))
         XCTAssertTrue(card.contains("3 due today"))
-        XCTAssertTrue(card.contains("snapshot"))
+        XCTAssertTrue(card.contains("quick snapshot"))
     }
 
     func testRenderEmptyState() {

From d09705bb40ecaf78957ed7ebe007138e04f10503 Mon Sep 17 00:00:00 2001
From: vendz <vasavandit@gmail.com>
Date: Thu, 18 Jun 2026 16:10:30 -0400
Subject: [PATCH 22/35] feat(desktop): inject local <about_user> card +
 user-language reply into the hub system prompt (#1)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .../RealtimeHubController.swift               | 15 ++++-
 .../RealtimeHubSession.swift                  |  8 ++-
 .../RealtimeHubTestHarness.swift              |  5 +-
 .../FloatingControlBar/RealtimeHubTools.swift | 65 ++++++++++++-------
 .../Tests/HubSystemInstructionTests.swift     | 14 ++++
 5 files changed, 78 insertions(+), 29 deletions(-)
 create mode 100644 desktop/macos/Desktop/Tests/HubSystemInstructionTests.swift

diff --git a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubController.swift b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubController.swift
index d6d0534dddc..a48dc4e0757 100644
--- a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubController.swift
+++ b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubController.swift
@@ -62,6 +62,16 @@ final class RealtimeHubController: NSObject, RealtimeHubSessionDelegate {
   /// Log tag for the currently-connected provider.
   private var providerTag: String { sessionProvider == .gemini ? "gemini" : "openai" }
 
+  /// Latest local identity card, injected into each new session's system instruction.
+  /// Refreshed off the hot path; an empty string just means "no card yet" (graceful).
+  private var aboutUserCard: String = ""
+
+  private func refreshAboutUserCard() {
+    Task { @MainActor [weak self] in
+      self?.aboutUserCard = await AboutUserCard.build()
+    }
+  }
+
   /// Held warm so spawn_agent's pi-mono bridge boot is off the hot path. The pill
   /// spawn creates its own provider; warming this one primes node/auth caches.
   private var warmProvider: ChatProvider?
@@ -98,6 +108,7 @@ final class RealtimeHubController: NSObject, RealtimeHubSessionDelegate {
       name: .realtimeOmniSettingsDidChange, object: nil)
     // Expose the headless E2E action (omi-ctl action hub_test_turn pcm=… provider=…).
     RealtimeHubTestHarness.registerAutomationAction()
+    refreshAboutUserCard()
   }
 
   @objc private func settingsChanged() {
@@ -105,6 +116,7 @@ final class RealtimeHubController: NSObject, RealtimeHubSessionDelegate {
     // teardown/recreate races on unrelated notifications.
     if session != nil, sessionProvider == RealtimeHubSettings.shared.provider { return }
     teardownSession()
+    refreshAboutUserCard()
     ensureWarm()
   }
 
@@ -151,7 +163,8 @@ final class RealtimeHubController: NSObject, RealtimeHubSessionDelegate {
   }
 
   private func startSession(provider: RealtimeHubProvider, auth: HubAuth) {
-    let s = RealtimeHubSession(provider: provider, auth: auth, delegate: self)
+    let instructions = RealtimeHubTools.systemInstruction(aboutUser: aboutUserCard)
+    let s = RealtimeHubSession(provider: provider, auth: auth, instructions: instructions, delegate: self)
     session = s
     sessionProvider = provider
     // Both providers stream native spoken audio (24k PCM) → StreamingPCMPlayer;
diff --git a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubSession.swift b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubSession.swift
index 2e009f35140..86fe07fdc0b 100644
--- a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubSession.swift
+++ b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubSession.swift
@@ -62,6 +62,7 @@ enum HubAuth {
 final class RealtimeHubSession: NSObject {
   private let provider: RealtimeHubProvider
   private let auth: HubAuth
+  private let instructions: String
   private weak var delegate: RealtimeHubSessionDelegate?
 
   /// Mic PCM input rate per provider (Gemini 16k native, OpenAI GA needs 24k).
@@ -118,9 +119,10 @@ final class RealtimeHubSession: NSObject {
   /// clear which model produced which event.
   private var tag: String { "RealtimeHub[\(provider == .openai ? "openai" : "gemini"):\(provider.modelID)]" }
 
-  init(provider: RealtimeHubProvider, auth: HubAuth, delegate: RealtimeHubSessionDelegate) {
+  init(provider: RealtimeHubProvider, auth: HubAuth, instructions: String, delegate: RealtimeHubSessionDelegate) {
     self.provider = provider
     self.auth = auth
+    self.instructions = instructions
     self.delegate = delegate
     super.init()
   }
@@ -402,7 +404,7 @@ final class RealtimeHubSession: NSObject {
         "type": "session.update",
         "session": [
           "type": "realtime",
-          "instructions": RealtimeHubTools.systemInstruction,
+          "instructions": instructions,
           "output_modalities": ["audio"],
           "audio": [
             "input": [
@@ -431,7 +433,7 @@ final class RealtimeHubSession: NSObject {
             "responseModalities": ["AUDIO"], "temperature": 0.3,
             "mediaResolution": "MEDIA_RESOLUTION_HIGH",
           ],
-          "systemInstruction": ["parts": [["text": RealtimeHubTools.systemInstruction]]],
+          "systemInstruction": ["parts": [["text": instructions]]],
           "tools": [["functionDeclarations": RealtimeHubTools.geminiFunctionDeclarations]],
           "inputAudioTranscription": [:],
           "outputAudioTranscription": [:],
diff --git a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTestHarness.swift b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTestHarness.swift
index ed9af5c461a..46384dd9caf 100644
--- a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTestHarness.swift
+++ b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTestHarness.swift
@@ -44,7 +44,10 @@ final class RealtimeHubTestHarness: NSObject, RealtimeHubSessionDelegate {
   }
 
   func run(timeoutSeconds: Double) async -> [String: String] {
-    let s = RealtimeHubSession(provider: provider, auth: auth, delegate: self)
+    let s = RealtimeHubSession(
+      provider: provider, auth: auth,
+      instructions: RealtimeHubTools.systemInstruction(aboutUser: ""),
+      delegate: self)
     session = s
     let rate = s.requiredInputSampleRate
     let audio = rate == 16000 ? pcm16k : PushToTalkManager.resamplePCM16(pcm16k, from: 16000, to: rate)
diff --git a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTools.swift b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTools.swift
index f00d5f71311..d4354579401 100644
--- a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTools.swift
+++ b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTools.swift
@@ -54,13 +54,16 @@ enum HubTool: String {
 
 enum RealtimeHubTools {
 
-  static let systemInstruction = """
+  static func systemInstruction(aboutUser: String) -> String {
+    """
     You are Omi, a fast spoken-voice assistant on the user's Mac and the single hub \
     for their voice requests. You hear the user's microphone; reply by speaking, \
     conversationally. Default to one or two sentences, but when the user asks for \
     something longer or creative (a story, a detailed explanation, brainstorming), \
     give the full answer yourself — don't shorten it and don't offload it. \
-    Always reply in English.
+    Reply in the same language the user is speaking.
+
+    \(aboutUser)
 
     IMPORTANT: You CAN read the user's Omi data directly with fast tools — their tasks \
     (get_tasks), what Omi knows about them / their memories & facts (get_memories, \
@@ -87,16 +90,19 @@ enum RealtimeHubTools {
     without calling a tool.
 
     Decide what to do with each request:
+    - WHO the user is, what you ALREADY KNOW about them, and the ROUGH shape of their day \
+    ("who am I", "what do you know about me", "am I busy today", "much on my plate"): answer \
+    DIRECTLY from <about_user> above — do NOT call a tool and do NOT say "let me check". Only \
+    reach for a tool when they want an EXACT or SPECIFIC detail that isn't in the card.
     - The user's TASKS / to-dos / what's due — a READ ("what are my tasks", "what's due \
     today", "what's on my list", "do I have anything today"): you MUST call get_tasks and \
-    speak ONLY what it returns. Never guess, summarize from memory, or make up tasks. For \
-    COMPLETED tasks ("what did I finish"), a SPECIFIC due-date range ("what's due next week"), \
-    or the FULL list ("all my tasks"), call get_action_items instead (it supports filters).
-    - WHO the user is / what you know about them / their memories or facts ("who am I", \
-    "what do you know about me", "what are my preferences"): you MUST call get_memories (no \
-    query) and speak what it returns. For a SPECIFIC fact ("what's my dog's name", "where do \
-    I work"), call search_memories with a focused query. NEVER answer "I don't know" or guess \
-    — always call the tool first; this data is the whole point.
+    speak ONLY what it returns (the card's counts are a rough snapshot, not the list). Never \
+    guess or make up tasks. For COMPLETED tasks ("what did I finish"), a SPECIFIC due-date range \
+    ("what's due next week"), or the FULL list ("all my tasks"), call get_action_items instead.
+    - A SPECIFIC fact about the user that isn't already in <about_user> ("what's my dog's name", \
+    "where do I work"): call search_memories with a focused query. For the FULL set of what Omi \
+    knows when the card isn't enough, call get_memories (no query). NEVER answer "I don't know" \
+    or guess about the user without checking first.
     - The user's MOST RECENT / latest / last conversation ("what was my most recent \
     conversation", "what did we just talk about", "my recent conversations"): call \
     get_conversations (newest first) — NOT search_conversations, which is semantic and does \
@@ -126,25 +132,36 @@ enum RealtimeHubTools {
     - DOING something for the user in their OTHER apps (calendar, notes, emails, messages, \
     files, browser) or any multi-step work — create/send/open/edit/search/schedule/automate/ \
     "do X for me": you CANNOT do these yourself. You MUST actually EMIT the spawn_agent \
-    function call (with a clear, self-contained `brief`). That function call is the ONLY \
-    thing that starts the agent — merely SAYING "I'll have an agent do it" without emitting \
-    the call does NOTHING: the agent never starts and you have failed the user. So always \
-    emit the spawn_agent call. You may add one short natural sentence as you call it, but \
-    never instead of it. Do NOT ask clarifying questions before spawning — spawn with what \
-    you have. Do NOT wait for it, narrate its steps, refuse, or claim you can't.
-    - Everything else — general questions, facts, chit-chat, explanations, advice, jokes, \
-    and creative or long-form requests (stories, brainstorming, drafts): ANSWER YOURSELF. \
-    You are fully capable; do it directly, even when the ask is long or open-ended. Do \
-    NOT escalate just because a request seems long or hard.
-    - Call ask_higher_model in ONLY two cases: (1) the user is unhappy with your previous \
-    answer — they push back, rephrase, say you're wrong, or ask for a better/deeper/more \
-    thorough answer; or (2) you genuinely need precise, up-to-date facts (current events, \
-    specific numbers) you don't reliably know. Pass a clear `query`, then speak the result.
+    function call (with a clear, self-contained `brief` and a short `title`). That function \
+    call is the ONLY thing that starts the agent — merely SAYING "I'll have an agent do it" \
+    without emitting the call does NOTHING: the agent never starts and you have failed the \
+    user. So always emit the spawn_agent call. You may add one short natural sentence as you \
+    call it, but never instead of it. Do NOT ask clarifying questions before spawning — spawn \
+    with what you have. Do NOT wait for it, narrate its steps, refuse, or claim you can't.
+    - Everything else — general questions, single facts, chit-chat, explanations, advice, \
+    jokes, and creative or long-form requests (stories, brainstorming, drafts): ANSWER \
+    YOURSELF. You are fully capable; do it directly, even when the ask is long, open-ended, \
+    or mentions a specific name, date, number, or fact — a request is NOT hard just because \
+    it contains one. Do NOT escalate based on how unsure you feel about your own knowledge: \
+    you are a poor judge of that, so escalate only on the explicit, observable signals below, \
+    never on a gut feeling.
+    - Call ask_higher_model ONLY on these explicit, observable signals — judged from what the \
+    user SAYS and the SHAPE of the request, never from how confident you feel: (1) the user is \
+    unhappy with your previous answer — they push back, rephrase, say you're wrong, or ask for \
+    a better / deeper / more thorough answer; (2) the user EXPLICITLY asks you to look it up, \
+    research it, double-check, be sure, or think hard about it; or (3) the request genuinely \
+    needs heavy multi-step reasoning or careful technical work — non-trivial math, code, or \
+    synthesizing several constraints into one answer — that a quick spoken reply would get \
+    wrong. Do NOT escalate for ordinary questions, single facts, or anything you can answer in \
+    a sentence or two. Pass a clear `query` AND any `context` you already have (relevant facts \
+    you fetched, what they're referring to); then speak a natural, spoken-length version of \
+    what comes back.
     - When you need to see what's on screen, call screenshot first. Use point_click only \
     when the user clearly asks you to click something.
 
     Keep latency low: prefer answering directly when you can.
     """
+  }
 
   /// OpenAI Realtime GA `session.tools` entries. Static `let` — built once, not rebuilt on
   /// every session (re)connect that reads it.
diff --git a/desktop/macos/Desktop/Tests/HubSystemInstructionTests.swift b/desktop/macos/Desktop/Tests/HubSystemInstructionTests.swift
new file mode 100644
index 00000000000..a9cd92c1f69
--- /dev/null
+++ b/desktop/macos/Desktop/Tests/HubSystemInstructionTests.swift
@@ -0,0 +1,14 @@
+import XCTest
+@testable import Omi_Computer
+
+final class HubSystemInstructionTests: XCTestCase {
+    func testInstructionInjectsCardAndUsesUserLanguage() {
+        let card = "<about_user>\nName: Sam\n</about_user>"
+        let instr = RealtimeHubTools.systemInstruction(aboutUser: card)
+        XCTAssertTrue(instr.contains(card))                                   // card injected
+        XCTAssertTrue(instr.lowercased().contains("language the user"))        // reply-in-user-language
+        XCTAssertFalse(instr.contains("Always reply in English"))             // old rule gone
+        XCTAssertTrue(instr.contains("spawn_agent"))                          // guardrails preserved
+        XCTAssertTrue(instr.contains("get_daily_recap"))
+    }
+}

From 7f84b72e2a9e56674ba058cbb6a5142bd36468bb Mon Sep 17 00:00:00 2001
From: vendz <vasavandit@gmail.com>
Date: Thu, 18 Jun 2026 16:16:06 -0400
Subject: [PATCH 23/35] fix(desktop): restore ask_higher_model +
 everything-else lanes to spec; tighten prompt test

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .../FloatingControlBar/RealtimeHubTools.swift | 27 +++++++------------
 .../Tests/HubSystemInstructionTests.swift     |  2 ++
 2 files changed, 11 insertions(+), 18 deletions(-)

diff --git a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTools.swift b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTools.swift
index d4354579401..ea12b4171b3 100644
--- a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTools.swift
+++ b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTools.swift
@@ -138,24 +138,15 @@ enum RealtimeHubTools {
     user. So always emit the spawn_agent call. You may add one short natural sentence as you \
     call it, but never instead of it. Do NOT ask clarifying questions before spawning — spawn \
     with what you have. Do NOT wait for it, narrate its steps, refuse, or claim you can't.
-    - Everything else — general questions, single facts, chit-chat, explanations, advice, \
-    jokes, and creative or long-form requests (stories, brainstorming, drafts): ANSWER \
-    YOURSELF. You are fully capable; do it directly, even when the ask is long, open-ended, \
-    or mentions a specific name, date, number, or fact — a request is NOT hard just because \
-    it contains one. Do NOT escalate based on how unsure you feel about your own knowledge: \
-    you are a poor judge of that, so escalate only on the explicit, observable signals below, \
-    never on a gut feeling.
-    - Call ask_higher_model ONLY on these explicit, observable signals — judged from what the \
-    user SAYS and the SHAPE of the request, never from how confident you feel: (1) the user is \
-    unhappy with your previous answer — they push back, rephrase, say you're wrong, or ask for \
-    a better / deeper / more thorough answer; (2) the user EXPLICITLY asks you to look it up, \
-    research it, double-check, be sure, or think hard about it; or (3) the request genuinely \
-    needs heavy multi-step reasoning or careful technical work — non-trivial math, code, or \
-    synthesizing several constraints into one answer — that a quick spoken reply would get \
-    wrong. Do NOT escalate for ordinary questions, single facts, or anything you can answer in \
-    a sentence or two. Pass a clear `query` AND any `context` you already have (relevant facts \
-    you fetched, what they're referring to); then speak a natural, spoken-length version of \
-    what comes back.
+    - Everything else — general questions, facts, chit-chat, explanations, advice, jokes, \
+    and creative or long-form requests (stories, brainstorming, drafts): ANSWER YOURSELF. \
+    You are fully capable; do it directly, even when the ask is long or open-ended. Do \
+    NOT escalate just because a request seems long or hard.
+    - Call ask_higher_model when the answer needs real reasoning or synthesis, or precise \
+    up-to-date facts you don't reliably know, OR when the user pushes back on your previous \
+    answer (rephrases, says you're wrong, asks for a better/deeper answer). Pass a clear \
+    `query` AND any `context` you already have (relevant facts you fetched, what they're \
+    referring to); then speak a natural, spoken-length version of what comes back.
     - When you need to see what's on screen, call screenshot first. Use point_click only \
     when the user clearly asks you to click something.
 
diff --git a/desktop/macos/Desktop/Tests/HubSystemInstructionTests.swift b/desktop/macos/Desktop/Tests/HubSystemInstructionTests.swift
index a9cd92c1f69..52e2ff52e29 100644
--- a/desktop/macos/Desktop/Tests/HubSystemInstructionTests.swift
+++ b/desktop/macos/Desktop/Tests/HubSystemInstructionTests.swift
@@ -10,5 +10,7 @@ final class HubSystemInstructionTests: XCTestCase {
         XCTAssertFalse(instr.contains("Always reply in English"))             // old rule gone
         XCTAssertTrue(instr.contains("spawn_agent"))                          // guardrails preserved
         XCTAssertTrue(instr.contains("get_daily_recap"))
+        XCTAssertTrue(instr.contains("ask_higher_model"))
+        XCTAssertTrue(instr.contains("ANSWER YOURSELF"))
     }
 }

From 8367bed64d8435d21d1483c5552475752ecf5a8e Mon Sep 17 00:00:00 2001
From: vendz <vasavandit@gmail.com>
Date: Thu, 18 Jun 2026 16:21:33 -0400
Subject: [PATCH 24/35] feat(desktop): ask_higher_model carries context +
 persona/card system prompt (#2)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .../RealtimeHubController.swift               | 26 +++---
 .../FloatingControlBar/RealtimeHubTools.swift | 82 +++++++++++++++----
 .../Desktop/Tests/HubEscalationTests.swift    | 27 ++++++
 3 files changed, 106 insertions(+), 29 deletions(-)
 create mode 100644 desktop/macos/Desktop/Tests/HubEscalationTests.swift

diff --git a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubController.swift b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubController.swift
index a48dc4e0757..2f6e06c80cc 100644
--- a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubController.swift
+++ b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubController.swift
@@ -340,10 +340,14 @@ final class RealtimeHubController: NSObject, RealtimeHubSessionDelegate {
     switch tool {
     case .askHigherModel:
       let query = arg("query")
-      log("RealtimeHub[\(providerTag)]: tool ask_higher_model → POST /v2/chat/completions (claude-sonnet-4-6) query=\"\(query.prefix(80))\"")
+      let context = (arguments["context"] as? String) ?? ""
+      log(
+        "RealtimeHub[\(providerTag)]: tool ask_higher_model → POST /v2/chat/completions (claude-sonnet-4-6) query=\"\(query.prefix(80))\""
+      )
       Task { [weak self] in
         guard let self else { return }
-        let answer = await self.escalateToHigherModel(query)
+        let answer = await self.escalateToHigherModel(
+          query, context: context, aboutUser: self.aboutUserCard)
         self.session?.sendToolResult(callId: callId, name: name, output: answer)
       }
     case .getTasks:
@@ -590,7 +594,9 @@ final class RealtimeHubController: NSObject, RealtimeHubSessionDelegate {
 
   /// ask_higher_model — reuse the EXISTING prompt-cached /v2/chat/completions
   /// (no new backend route). Returns the assistant text for the model to speak.
-  private func escalateToHigherModel(_ query: String) async -> String {
+  private func escalateToHigherModel(_ query: String, context: String, aboutUser: String)
+    async -> String
+  {
     let baseURL = await APIClient.shared.rustBackendURL
     guard !baseURL.isEmpty else { return "I couldn't reach the model right now." }
     let normalized = baseURL.hasSuffix("/") ? baseURL : baseURL + "/"
@@ -606,18 +612,8 @@ final class RealtimeHubController: NSObject, RealtimeHubSessionDelegate {
     } catch {
       return "I couldn't authenticate to the model."
     }
-    let body: [String: Any] = [
-      "model": "claude-sonnet-4-6",
-      "max_tokens": 1024,
-      "messages": [
-        [
-          "role": "user",
-          "content":
-            "Answer concisely for a spoken reply (a few sentences max):\n\n\(query)",
-        ]
-      ],
-      "stream": false,
-    ]
+    let body = RealtimeHubTools.escalationBody(
+      query: query, context: context, aboutUser: aboutUser)
     let t0 = Date()
     do {
       request.httpBody = try JSONSerialization.data(withJSONObject: body)
diff --git a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTools.swift b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTools.swift
index ea12b4171b3..89ef6f92038 100644
--- a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTools.swift
+++ b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTools.swift
@@ -130,21 +130,35 @@ enum RealtimeHubTools {
     then confirm out loud. CHANGE an existing task (mark done, edit, reschedule): first \
     call get_tasks to get the matching task's id, then call update_action_item with that id.
     - DOING something for the user in their OTHER apps (calendar, notes, emails, messages, \
-    files, browser) or any multi-step work — create/send/open/edit/search/schedule/automate/ \
-    "do X for me": you CANNOT do these yourself. You MUST actually EMIT the spawn_agent \
+    files, browser), any multi-step work, OR anything needing a real look-up / current info \
+    from the web (research something online, find the latest on X) — create/send/open/edit/ \
+    search/schedule/automate/research/"do X for me": you CANNOT do these yourself. You MUST actually EMIT the spawn_agent \
     function call (with a clear, self-contained `brief` and a short `title`). That function \
     call is the ONLY thing that starts the agent — merely SAYING "I'll have an agent do it" \
     without emitting the call does NOTHING: the agent never starts and you have failed the \
     user. So always emit the spawn_agent call. You may add one short natural sentence as you \
     call it, but never instead of it. Do NOT ask clarifying questions before spawning — spawn \
     with what you have. Do NOT wait for it, narrate its steps, refuse, or claim you can't.
-    - Everything else — general questions, facts, chit-chat, explanations, advice, jokes, \
-    and creative or long-form requests (stories, brainstorming, drafts): ANSWER YOURSELF. \
-    You are fully capable; do it directly, even when the ask is long or open-ended. Do \
-    NOT escalate just because a request seems long or hard.
-    - Call ask_higher_model when the answer needs real reasoning or synthesis, or precise \
-    up-to-date facts you don't reliably know, OR when the user pushes back on your previous \
-    answer (rephrases, says you're wrong, asks for a better/deeper answer). Pass a clear \
+    - Everything else — general questions, single facts, simple look-ups you know, chit-chat, \
+    explanations, opinions, advice, jokes, and creative or long-form requests (stories, \
+    brainstorming, drafts): ANSWER YOURSELF. You are fully capable; do it directly, even when \
+    the ask is long, open-ended, or mentions a specific name, date, number, or fact — a \
+    request is NOT hard just because it contains one, and a simple look-up is NEVER a reason \
+    to escalate. Do NOT escalate based on how unsure you feel about your own knowledge: you \
+    are a poor judge of that, so escalate only on the explicit, observable signals below.
+    - There are TWO escalation paths — do not confuse them. ask_higher_model buys more \
+    INTELLIGENCE on something you could already reason about: it returns a smarter spoken \
+    answer but it does NOT browse, search, or fetch live data. spawn_agent is for DOING \
+    multi-step work and for anything needing a real look-up / current web info (see above).
+    - Call ask_higher_model ONLY on these explicit signals — judged from what the user SAYS \
+    and the SHAPE of the request, never from how unsure you feel: (1) the user is unhappy with \
+    your previous answer — pushes back, rephrases, says you're wrong, or asks for a better / \
+    deeper / more thorough answer; (2) the user explicitly asks you to think harder, be more \
+    careful, or reason it through; or (3) the request genuinely needs heavy multi-step \
+    reasoning or careful technical work — non-trivial math, complex code, or weighing several \
+    constraints into one answer — that a quick spoken reply would get wrong. Do NOT use it for \
+    simple look-ups, single facts, current events, or anything you can answer in a sentence or \
+    two — answer those yourself, or use spawn_agent if it truly needs live data. Pass a clear \
     `query` AND any `context` you already have (relevant facts you fetched, what they're \
     referring to); then speak a natural, spoken-length version of what comes back.
     - When you need to see what's on screen, call screenshot first. Use point_click only \
@@ -161,14 +175,25 @@ enum RealtimeHubTools {
         "type": "function",
         "name": HubTool.askHigherModel.rawValue,
         "description":
-          "Get a second opinion from a smarter model and receive text to speak. Use ONLY when the user "
-          + "is dissatisfied with your previous answer (pushes back, rephrases, says you're wrong, or asks "
-          + "for a better/deeper answer), OR when you genuinely need precise up-to-date facts you don't "
-          + "know. Do NOT use it for general, creative, or long-form requests — answer those yourself.",
+          "A smarter model for MORE INTELLIGENCE on something you could already reason about — it returns "
+          + "text to speak but does NOT browse, search, or fetch live data. Use ONLY when (1) the user is "
+          + "dissatisfied with your previous answer (pushes back, rephrases, says you're wrong, asks for a "
+          + "better/deeper answer), (2) the user explicitly asks you to think harder or reason it through, OR "
+          + "(3) the request needs heavy multi-step reasoning or careful technical work (non-trivial math, "
+          + "complex code, multi-constraint synthesis). Do NOT use it for simple look-ups, single facts, "
+          + "current events, or general/creative/long-form requests — answer those yourself, or use spawn_agent "
+          + "if it truly needs live data.",
         "parameters": [
           "type": "object",
           "properties": [
-            "query": ["type": "string", "description": "The full question to escalate."]
+            "query": ["type": "string", "description": "The full question to escalate."],
+            "context": [
+              "type": "string",
+              "description":
+                "Relevant context you already have that helps answer well — facts you fetched, "
+                + "what the user is referring to, or the previous answer they pushed back on. "
+                + "Include only what's relevant; omit if there's nothing useful.",
+            ],
           ],
           "required": ["query"],
         ],
@@ -399,4 +424,33 @@ enum RealtimeHubTools {
     if let items = schema["items"] as? [String: Any] { out["items"] = upcasedSchemaTypes(items) }
     return out
   }
+
+  /// System prompt for an escalated (ask_higher_model) answer. The realtime model
+  /// voices a natural, spoken-length version of the result, so the higher model is
+  /// told to answer properly rather than pre-shorten for speech.
+  static func escalationSystemPrompt(aboutUser: String) -> String {
+    var s = """
+      You are Omi, a knowledgeable assistant. Answer the user's question accurately and \
+      usefully. A voice assistant will relay your answer aloud and adapt the phrasing for \
+      speech, so be clear and well-structured; you don't need to pre-shorten it.
+      """
+    if !aboutUser.isEmpty { s += "\n\n" + aboutUser }
+    return s
+  }
+
+  static func escalationBody(query: String, context: String, aboutUser: String) -> [String: Any] {
+    let trimmedContext = context.trimmingCharacters(in: .whitespacesAndNewlines)
+    let userContent =
+      trimmedContext.isEmpty ? query : query + "\n\nContext I already have:\n" + trimmedContext
+    let messages: [[String: String]] = [
+      ["role": "system", "content": escalationSystemPrompt(aboutUser: aboutUser)],
+      ["role": "user", "content": userContent],
+    ]
+    return [
+      "model": "claude-sonnet-4-6",
+      "max_tokens": 1024,
+      "messages": messages,
+      "stream": false,
+    ]
+  }
 }
diff --git a/desktop/macos/Desktop/Tests/HubEscalationTests.swift b/desktop/macos/Desktop/Tests/HubEscalationTests.swift
new file mode 100644
index 00000000000..f7d185da3ea
--- /dev/null
+++ b/desktop/macos/Desktop/Tests/HubEscalationTests.swift
@@ -0,0 +1,27 @@
+import XCTest
+
+@testable import Omi_Computer
+
+final class HubEscalationTests: XCTestCase {
+  func testBodyHasSystemPromptAndAppendsContext() {
+    let body = RealtimeHubTools.escalationBody(
+      query: "What's the best plan?",
+      context: "User is comparing the M3 and M4 MacBook.",
+      aboutUser: "<about_user>\nName: Sam\n</about_user>")
+    XCTAssertEqual(body["model"] as? String, "claude-sonnet-4-6")
+    let messages = body["messages"] as! [[String: String]]
+    XCTAssertEqual(messages[0]["role"], "system")
+    XCTAssertTrue(messages[0]["content"]!.contains("<about_user>"))
+    XCTAssertEqual(messages[1]["role"], "user")
+    XCTAssertTrue(messages[1]["content"]!.contains("What's the best plan?"))
+    XCTAssertTrue(messages[1]["content"]!.contains("M3 and M4"))  // context appended
+  }
+
+  func testBodyOmitsContextSectionWhenEmpty() {
+    let body = RealtimeHubTools.escalationBody(
+      query: "Capital of France?", context: "", aboutUser: "")
+    let messages = body["messages"] as! [[String: String]]
+    XCTAssertFalse(messages[1]["content"]!.contains("Context"))
+    XCTAssertFalse(messages[1]["content"]!.contains("Answer concisely for a spoken reply"))
+  }
+}

From 7781422d56b0edad1ec0784e39dc475605e5d467 Mon Sep 17 00:00:00 2001
From: vendz <vasavandit@gmail.com>
Date: Thu, 18 Jun 2026 16:29:56 -0400
Subject: [PATCH 25/35] feat(desktop): spawn_agent supplies its own title,
 skipping the redundant Haiku title call (#4)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .../FloatingControlBar/RealtimeHubController.swift        | 7 +++++--
 .../Sources/FloatingControlBar/RealtimeHubTools.swift     | 8 +++++++-
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubController.swift b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubController.swift
index 2f6e06c80cc..5d8a4ddafdd 100644
--- a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubController.swift
+++ b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubController.swift
@@ -488,14 +488,17 @@ final class RealtimeHubController: NSObject, RealtimeHubSessionDelegate {
       }
     case .spawnAgent:
       let brief = arg("brief")
+      let title = (arguments["title"] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines)
       let model = ShortcutSettings.shared.selectedModel.isEmpty
         ? "claude-sonnet-4-6" : ShortcutSettings.shared.selectedModel
       // Non-blocking: spawn renders its own pill ("text bubble") and runs on its
       // own ChatProvider/AgentBridge. We don't await it on the voice loop.
       // fromVoice:false — the hub model speaks its own natural acknowledgment, so the pill
       // must NOT also speak its canned randomAck ("on it") or we double up.
-      let pill = AgentPillsManager.shared.spawnFromUserQuery(brief, model: model, fromVoice: false)
-      log("RealtimeHub[\(providerTag)]: tool spawn_agent → AgentBridge pill=\"\(pill.title)\" model=\(model)")
+      let pill = AgentPillsManager.shared.spawnFromUserQuery(
+        brief, model: model, fromVoice: false,
+        preFetchedTitle: (title?.isEmpty == false) ? title : nil)
+      log("RealtimeHub[\(providerTag)]: tool spawn_agent → AgentBridge pill=\"\(pill.title)\" model=\(model) titled=\(title?.isEmpty == false)")
       // Terse directive (not speakable content): the model already said its one-line ack
       // BEFORE calling, so it should NOT generate a slow second utterance after this.
       session?.sendToolResult(
diff --git a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTools.swift b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTools.swift
index 89ef6f92038..c205f2cf90e 100644
--- a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTools.swift
+++ b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTools.swift
@@ -366,7 +366,13 @@ enum RealtimeHubTools {
           "properties": [
             "brief": [
               "type": "string", "description": "A clear, self-contained brief of the task.",
-            ]
+            ],
+            "title": [
+              "type": "string",
+              "description":
+                "A short Title Case label for the task pill (≤ ~5 words, no trailing "
+                + "punctuation), e.g. 'Draft Launch Email'.",
+            ],
           ],
           "required": ["brief"],
         ],

From 030f9ad533e59cbfb585372774ead3196177f4ad Mon Sep 17 00:00:00 2001
From: vendz <vasavandit@gmail.com>
Date: Thu, 18 Jun 2026 17:00:42 -0400
Subject: [PATCH 26/35] revert(desktop): roll back escalation-policy prompt
 prose to lean spec (keep ask_higher_model context mechanism)

---
 .../FloatingControlBar/RealtimeHubTools.swift | 44 ++++++-------------
 1 file changed, 13 insertions(+), 31 deletions(-)

diff --git a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTools.swift b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTools.swift
index c205f2cf90e..98850b0be19 100644
--- a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTools.swift
+++ b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTools.swift
@@ -130,35 +130,21 @@ enum RealtimeHubTools {
     then confirm out loud. CHANGE an existing task (mark done, edit, reschedule): first \
     call get_tasks to get the matching task's id, then call update_action_item with that id.
     - DOING something for the user in their OTHER apps (calendar, notes, emails, messages, \
-    files, browser), any multi-step work, OR anything needing a real look-up / current info \
-    from the web (research something online, find the latest on X) — create/send/open/edit/ \
-    search/schedule/automate/research/"do X for me": you CANNOT do these yourself. You MUST actually EMIT the spawn_agent \
+    files, browser) or any multi-step work — create/send/open/edit/search/schedule/automate/ \
+    "do X for me": you CANNOT do these yourself. You MUST actually EMIT the spawn_agent \
     function call (with a clear, self-contained `brief` and a short `title`). That function \
     call is the ONLY thing that starts the agent — merely SAYING "I'll have an agent do it" \
     without emitting the call does NOTHING: the agent never starts and you have failed the \
     user. So always emit the spawn_agent call. You may add one short natural sentence as you \
     call it, but never instead of it. Do NOT ask clarifying questions before spawning — spawn \
     with what you have. Do NOT wait for it, narrate its steps, refuse, or claim you can't.
-    - Everything else — general questions, single facts, simple look-ups you know, chit-chat, \
-    explanations, opinions, advice, jokes, and creative or long-form requests (stories, \
-    brainstorming, drafts): ANSWER YOURSELF. You are fully capable; do it directly, even when \
-    the ask is long, open-ended, or mentions a specific name, date, number, or fact — a \
-    request is NOT hard just because it contains one, and a simple look-up is NEVER a reason \
-    to escalate. Do NOT escalate based on how unsure you feel about your own knowledge: you \
-    are a poor judge of that, so escalate only on the explicit, observable signals below.
-    - There are TWO escalation paths — do not confuse them. ask_higher_model buys more \
-    INTELLIGENCE on something you could already reason about: it returns a smarter spoken \
-    answer but it does NOT browse, search, or fetch live data. spawn_agent is for DOING \
-    multi-step work and for anything needing a real look-up / current web info (see above).
-    - Call ask_higher_model ONLY on these explicit signals — judged from what the user SAYS \
-    and the SHAPE of the request, never from how unsure you feel: (1) the user is unhappy with \
-    your previous answer — pushes back, rephrases, says you're wrong, or asks for a better / \
-    deeper / more thorough answer; (2) the user explicitly asks you to think harder, be more \
-    careful, or reason it through; or (3) the request genuinely needs heavy multi-step \
-    reasoning or careful technical work — non-trivial math, complex code, or weighing several \
-    constraints into one answer — that a quick spoken reply would get wrong. Do NOT use it for \
-    simple look-ups, single facts, current events, or anything you can answer in a sentence or \
-    two — answer those yourself, or use spawn_agent if it truly needs live data. Pass a clear \
+    - Everything else — general questions, facts, chit-chat, explanations, advice, jokes, \
+    and creative or long-form requests (stories, brainstorming, drafts): ANSWER YOURSELF. \
+    You are fully capable; do it directly, even when the ask is long or open-ended. Do \
+    NOT escalate just because a request seems long or hard.
+    - Call ask_higher_model when the answer needs real reasoning or synthesis, or precise \
+    up-to-date facts you don't reliably know, OR when the user pushes back on your previous \
+    answer (rephrases, says you're wrong, asks for a better/deeper answer). Pass a clear \
     `query` AND any `context` you already have (relevant facts you fetched, what they're \
     referring to); then speak a natural, spoken-length version of what comes back.
     - When you need to see what's on screen, call screenshot first. Use point_click only \
@@ -175,14 +161,10 @@ enum RealtimeHubTools {
         "type": "function",
         "name": HubTool.askHigherModel.rawValue,
         "description":
-          "A smarter model for MORE INTELLIGENCE on something you could already reason about — it returns "
-          + "text to speak but does NOT browse, search, or fetch live data. Use ONLY when (1) the user is "
-          + "dissatisfied with your previous answer (pushes back, rephrases, says you're wrong, asks for a "
-          + "better/deeper answer), (2) the user explicitly asks you to think harder or reason it through, OR "
-          + "(3) the request needs heavy multi-step reasoning or careful technical work (non-trivial math, "
-          + "complex code, multi-constraint synthesis). Do NOT use it for simple look-ups, single facts, "
-          + "current events, or general/creative/long-form requests — answer those yourself, or use spawn_agent "
-          + "if it truly needs live data.",
+          "Get a second opinion from a smarter model and receive text to speak. Use ONLY when the user "
+          + "is dissatisfied with your previous answer (pushes back, rephrases, says you're wrong, or asks "
+          + "for a better/deeper answer), OR when you genuinely need precise up-to-date facts you don't "
+          + "know. Do NOT use it for general, creative, or long-form requests — answer those yourself.",
         "parameters": [
           "type": "object",
           "properties": [

From d4f9d1b142ee0af49df2ce1ff2ec8339b1b14667 Mon Sep 17 00:00:00 2001
From: vendz <vasavandit@gmail.com>
Date: Thu, 18 Jun 2026 17:18:27 -0400
Subject: [PATCH 27/35] =?UTF-8?q?feat(desktop):=20mandatory=20spoken=20hea?=
 =?UTF-8?q?ds-up=20before=20slow=20tools=20(ask=5Fhigher=5Fmodel/spawn=5Fa?=
 =?UTF-8?q?gent)=20=E2=80=94=20no=20more=20dead=20silence?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../Sources/FloatingControlBar/RealtimeHubTools.swift       | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTools.swift b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTools.swift
index 98850b0be19..53fc3db14f1 100644
--- a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTools.swift
+++ b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTools.swift
@@ -84,8 +84,10 @@ enum RealtimeHubTools {
     sec…". The thing to avoid is repetition: do NOT reach for the same generic opener ("let me \
     check", "let me look that up") turn after turn — it's what makes you sound robotic. Keep it \
     to a few words, vary the wording each turn, and don't include any answer or data you don't \
-    have yet. For a slower step (ask_higher_model, spawn_agent) it's fine to signal it'll take a \
-    moment. NEVER speak an answer — real or guessed — before the tool returns, NEVER skip the \
+    have yet. This matters MOST for the slow steps: BEFORE you call ask_higher_model or spawn_agent you \
+    MUST first say a brief, varied heads-up that you're thinking it through (e.g. "let me work \
+    that through…", "give me a second on that…") — these take several seconds and silence feels \
+    broken. NEVER speak an answer — real or guessed — before the tool returns, NEVER skip the \
     tool call, and never read tool JSON or ids aloud. You cannot see the user's data or screen \
     without calling a tool.
 

From 60802eaad3afabf10d08bd9d2e85b3c99a12b2b9 Mon Sep 17 00:00:00 2001
From: vendz <vasavandit@gmail.com>
Date: Thu, 18 Jun 2026 19:01:13 -0400
Subject: [PATCH 28/35] feat(desktop): playful 5-bar voice-reactive PTT mic
 waveform
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replaces the floating-bar pulsing red dot with VoiceWaveformBars — 5 chunky
bars that bounce to the live mic level (auto-gain + underdamped spring), drawn
via TimelineView+Canvas reading AudioLevelMonitor.shared.microphoneLevel.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../VoiceWaveformBars.swift                   | 134 ++++++++++++++++++
 1 file changed, 134 insertions(+)
 create mode 100644 desktop/macos/Desktop/Sources/FloatingControlBar/VoiceWaveformBars.swift

diff --git a/desktop/macos/Desktop/Sources/FloatingControlBar/VoiceWaveformBars.swift b/desktop/macos/Desktop/Sources/FloatingControlBar/VoiceWaveformBars.swift
new file mode 100644
index 00000000000..8b038aaaaf6
--- /dev/null
+++ b/desktop/macos/Desktop/Sources/FloatingControlBar/VoiceWaveformBars.swift
@@ -0,0 +1,134 @@
+import SwiftUI
+
+/// Playful, compact mic visualizer shown in the floating control bar while
+/// push-to-talk is active — a few chunky bars that bounce to the user's voice
+/// (HeyClicky-style), replacing the old pulsing red dot.
+///
+/// Animation notes (this is what makes it actually move):
+/// - `TimelineView(.animation)` is the clock. The Canvas closure **uses
+///   `timeline.date`** every frame (via `model.advance(to:)`) so SwiftUI treats
+///   the drawing as changed each tick and redraws — without referencing the
+///   per-frame date the Canvas is cached and freezes (the original bug).
+/// - We read `AudioLevelMonitor.shared.microphoneLevel` (one RMS scalar, ~5 Hz)
+///   each frame and spring the bars toward it at 60fps, so 5 Hz data still looks
+///   smooth. Per-bar phase + a center arch make it feel alive, not mechanical.
+/// - `paused: !isActive` stops the loop when PTT isn't listening; the bars are a
+///   live `@State` model (no retained history), so each session starts fresh and
+///   never shows a frozen "last word."
+struct VoiceWaveformBars: View {
+    let isActive: Bool
+
+    private static let barCount = 5
+    private static let barWidth: CGFloat = 4
+    private static let barSpacing: CGFloat = 3
+    private static let barHeight: CGFloat = 18
+    private static let fillGradient = Gradient(colors: [OmiColors.purpleAccent, OmiColors.purplePrimary])
+
+    @State private var model: WaveBarsModel
+
+    init(isActive: Bool) {
+        self.isActive = isActive
+        _model = State(initialValue: WaveBarsModel(barCount: Self.barCount))
+    }
+
+    private var width: CGFloat {
+        let n = CGFloat(Self.barCount)
+        return n * Self.barWidth + (n - 1) * Self.barSpacing
+    }
+
+    var body: some View {
+        TimelineView(.animation(paused: !isActive)) { timeline in
+            Canvas { context, size in
+                let level = isActive ? CGFloat(AudioLevelMonitor.shared.microphoneLevel) : 0
+                model.advance(to: timeline.date, level: level, active: isActive)
+                draw(into: &context, size: size)
+            }
+        }
+        .frame(width: width, height: Self.barHeight)
+        .accessibilityHidden(true)
+    }
+
+    private func draw(into context: inout GraphicsContext, size: CGSize) {
+        let minH: CGFloat = 2
+        let maxH = size.height
+        let step = Self.barWidth + Self.barSpacing
+        let centerY = size.height / 2
+
+        for i in 0..<Self.barCount {
+            let x = CGFloat(i) * step
+            let h = max(minH, minH + (maxH - minH) * model.values[i])
+            let path = Path(
+                roundedRect: CGRect(x: x, y: centerY - h / 2, width: Self.barWidth, height: h),
+                cornerRadius: Self.barWidth / 2
+            )
+            context.fill(
+                path,
+                with: .linearGradient(
+                    Self.fillGradient,
+                    startPoint: CGPoint(x: x, y: centerY - h / 2),
+                    endPoint: CGPoint(x: x, y: centerY + h / 2)
+                )
+            )
+        }
+    }
+}
+
+/// Per-bar bounce state for `VoiceWaveformBars`. Advanced once per frame from a
+/// single mic level. Reference type so it persists across the Canvas redraws.
+@MainActor
+final class WaveBarsModel {
+    let barCount: Int
+    private(set) var values: [CGFloat]
+    private var velocities: [Double]
+
+    private let phases: [Double]
+    private let speeds: [Double]
+    private let weights: [Double]
+    private var lastTime: CFTimeInterval?
+    private var envelope: Double = 0 // decaying recent-peak follower for auto-gain
+
+    // Underdamped spring -> visible bounce/overshoot (ζ ≈ 0.35).
+    private let stiffness: Double = 200
+    private let damping: Double = 10
+
+    init(barCount: Int) {
+        self.barCount = barCount
+        values = Array(repeating: 0, count: barCount)
+        velocities = Array(repeating: 0, count: barCount)
+        phases = (0..<barCount).map { Double($0) * 1.9 }
+        speeds = (0..<barCount).map { 6.0 + 2.5 * sin(Double($0) * 1.3) }
+        // Center bars taller -> a friendly arch.
+        let mid = Double(barCount - 1) / 2
+        weights = (0..<barCount).map { i in
+            0.72 + 0.45 * (1.0 - abs(Double(i) - mid) / max(mid, 1.0))
+        }
+    }
+
+    func advance(to date: Date, level: CGFloat, active: Bool) {
+        let now = date.timeIntervalSinceReferenceDate
+        // Clamp dt small so the spring integration stays stable.
+        let dt: Double = lastTime.map { min(0.032, max(0.0, now - $0)) } ?? (1.0 / 60.0)
+        lastTime = now
+
+        let lvl = Double(max(0, level))
+        // Auto-gain: normalize against a decaying recent peak so the bars use the
+        // full height no matter how loud the mic actually is (fixes "barely moving").
+        envelope = max(lvl, envelope - 0.7 * dt)
+        let norm = envelope > 0.04 ? min(1.0, lvl / envelope) : 0.0
+        let gained = pow(norm, 0.75)
+
+        for i in 0..<barCount {
+            // Lively idle bounce so it always feels alive while listening.
+            let idle = active ? (0.14 + 0.12 * (0.5 + 0.5 * sin(now * speeds[i] + phases[i]))) : 0.0
+            let wobble = 0.55 + 0.45 * sin(now * speeds[i] + phases[i])
+            let target = max(idle, min(1.0, gained * weights[i] * wobble))
+
+            // Critically-underdamped spring (semi-implicit Euler) -> bouncy overshoot.
+            let x = Double(values[i])
+            let accel = stiffness * (target - x) - damping * velocities[i]
+            velocities[i] += accel * dt
+            let nx = x + velocities[i] * dt
+            values[i] = CGFloat(max(0.0, min(1.0, nx)))
+        }
+    }
+}

From e611b62eb064115630915be42160aa3078289966 Mon Sep 17 00:00:00 2001
From: vendz <vasavandit@gmail.com>
Date: Thu, 18 Jun 2026 19:01:31 -0400
Subject: [PATCH 29/35] refactor(desktop): use VoiceWaveformBars in
 floating-bar listening view

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../FloatingControlBar/FloatingControlBarView.swift       | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/desktop/macos/Desktop/Sources/FloatingControlBar/FloatingControlBarView.swift b/desktop/macos/Desktop/Sources/FloatingControlBar/FloatingControlBarView.swift
index 763fd6f3494..9e06e25dbb9 100644
--- a/desktop/macos/Desktop/Sources/FloatingControlBar/FloatingControlBarView.swift
+++ b/desktop/macos/Desktop/Sources/FloatingControlBar/FloatingControlBarView.swift
@@ -360,12 +360,8 @@ struct FloatingControlBarView: View {
 
     private var voiceListeningView: some View {
         HStack(spacing: 8) {
-            // Pulsing mic icon
-            Circle()
-                .fill(Color.red)
-                .frame(width: 10, height: 10)
-                .scaleEffect(state.isVoiceListening ? 1.2 : 1.0)
-                .animation(.easeInOut(duration: 0.6).repeatForever(autoreverses: true), value: state.isVoiceListening)
+            // Playful realtime mic waveform (replaces the old pulsing red dot)
+            VoiceWaveformBars(isActive: state.isVoiceListening)
 
             Image(systemName: "mic.fill")
                 .scaledFont(size: 14, weight: .semibold)

From 46d1b0c4eae317e1ffe9bb3ce56e4a93b0370f5b Mon Sep 17 00:00:00 2001
From: vendz <vasavandit@gmail.com>
Date: Thu, 18 Jun 2026 19:01:31 -0400
Subject: [PATCH 30/35] refactor(desktop): use VoiceWaveformBars in voice
 follow-up view; purple bg

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../Sources/FloatingControlBar/AIResponseView.swift      | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/desktop/macos/Desktop/Sources/FloatingControlBar/AIResponseView.swift b/desktop/macos/Desktop/Sources/FloatingControlBar/AIResponseView.swift
index 42e9c5ba788..bb11e6cc18e 100644
--- a/desktop/macos/Desktop/Sources/FloatingControlBar/AIResponseView.swift
+++ b/desktop/macos/Desktop/Sources/FloatingControlBar/AIResponseView.swift
@@ -349,11 +349,8 @@ struct AIResponseView: View {
 
     private var voiceFollowUpView: some View {
         HStack(spacing: 8) {
-            Circle()
-                .fill(Color.red)
-                .frame(width: 10, height: 10)
-                .scaleEffect(1.2)
-                .animation(.easeInOut(duration: 0.6).repeatForever(autoreverses: true), value: isVoiceFollowUp)
+            // Playful realtime mic waveform (replaces the old pulsing red dot)
+            VoiceWaveformBars(isActive: isVoiceFollowUp)
 
             Image(systemName: "mic.fill")
                 .scaledFont(size: 14, weight: .semibold)
@@ -375,7 +372,7 @@ struct AIResponseView: View {
         }
         .padding(.horizontal, 10)
         .padding(.vertical, 8)
-        .background(Color.red.opacity(0.15))
+        .background(OmiColors.purplePrimary.opacity(0.12))
         .cornerRadius(8)
     }
 

From f221dc3d13a8722139de775db3b6a0cb187447ba Mon Sep 17 00:00:00 2001
From: vendz <vasavandit@gmail.com>
Date: Thu, 18 Jun 2026 19:01:31 -0400
Subject: [PATCH 31/35] feat(desktop): feed live mic level to the PTT waveform
 via AudioLevelMonitor

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../Sources/FloatingControlBar/PushToTalkManager.swift      | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/desktop/macos/Desktop/Sources/FloatingControlBar/PushToTalkManager.swift b/desktop/macos/Desktop/Sources/FloatingControlBar/PushToTalkManager.swift
index 8daa4225417..0e7a283bd02 100644
--- a/desktop/macos/Desktop/Sources/FloatingControlBar/PushToTalkManager.swift
+++ b/desktop/macos/Desktop/Sources/FloatingControlBar/PushToTalkManager.swift
@@ -974,7 +974,11 @@ class PushToTalkManager: ObservableObject {
               self.transcriptionService?.sendAudio(audioData)
             }
           },
-          onAudioLevel: { _ in }
+          onAudioLevel: { level in
+            // Feed the floating-bar mic waveform (VoiceWaveformBars). Throttled to ~5 Hz
+            // inside the monitor; used only for visualization.
+            AudioLevelMonitor.shared.updateMicrophoneLevel(level)
+          }
         )
         log("PushToTalkManager: mic capture started (batch=\(batchMode))")
       } catch {

From aa4e1d8224dc7fa6b4a1f0feafad15c8e21d978c Mon Sep 17 00:00:00 2001
From: vendz <vasavandit@gmail.com>
Date: Thu, 18 Jun 2026 19:01:31 -0400
Subject: [PATCH 32/35] fix(desktop): typed follow-up after a voice turn is no
 longer spoken (force fromVoice:false)

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../FloatingControlBar/FloatingControlBarWindow.swift  | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/desktop/macos/Desktop/Sources/FloatingControlBar/FloatingControlBarWindow.swift b/desktop/macos/Desktop/Sources/FloatingControlBar/FloatingControlBarWindow.swift
index d97aa6cb7e0..bd6bdf2c1ec 100644
--- a/desktop/macos/Desktop/Sources/FloatingControlBar/FloatingControlBarWindow.swift
+++ b/desktop/macos/Desktop/Sources/FloatingControlBar/FloatingControlBarWindow.swift
@@ -1325,12 +1325,16 @@ class FloatingControlBarManager {
         guard let provider = activeFloatingProvider() else { return }
 
         // Re-wire the onSendQuery to use the isolated floating-bar provider.
-        // Subsequent typed messages also go through the AI router.
+        // Subsequent typed messages also go through the AI router. A message arriving
+        // through onSendQuery was always TYPED (PTT/voice bypass this closure and call
+        // routeQuery directly), so force fromVoice:false — otherwise a typed follow-up
+        // after a voice turn inherits the stale currentQueryFromVoice=true and gets
+        // spoken aloud.
         window.onSendQuery = { [weak self, weak window, weak provider] message in
             guard let self = self, let window = window, let provider = provider else { return }
             Task { @MainActor in
-                await self.withQueryTracer(query: message, fromVoice: window.state.currentQueryFromVoice) {
-                    await self.routeQuery(message, barWindow: window, provider: provider, fromVoice: window.state.currentQueryFromVoice)
+                await self.withQueryTracer(query: message, fromVoice: false) {
+                    await self.routeQuery(message, barWindow: window, provider: provider, fromVoice: false)
                 }
             }
         }

From c48c434fe490946a0dce6757d44687a608ef068f Mon Sep 17 00:00:00 2001
From: vendz <vasavandit@gmail.com>
Date: Thu, 18 Jun 2026 19:01:54 -0400
Subject: [PATCH 33/35] refactor(desktop): soften the slow-tool heads-up prompt
 wording

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../Sources/FloatingControlBar/RealtimeHubTools.swift       | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTools.swift b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTools.swift
index 53fc3db14f1..98850b0be19 100644
--- a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTools.swift
+++ b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTools.swift
@@ -84,10 +84,8 @@ enum RealtimeHubTools {
     sec…". The thing to avoid is repetition: do NOT reach for the same generic opener ("let me \
     check", "let me look that up") turn after turn — it's what makes you sound robotic. Keep it \
     to a few words, vary the wording each turn, and don't include any answer or data you don't \
-    have yet. This matters MOST for the slow steps: BEFORE you call ask_higher_model or spawn_agent you \
-    MUST first say a brief, varied heads-up that you're thinking it through (e.g. "let me work \
-    that through…", "give me a second on that…") — these take several seconds and silence feels \
-    broken. NEVER speak an answer — real or guessed — before the tool returns, NEVER skip the \
+    have yet. For a slower step (ask_higher_model, spawn_agent) it's fine to signal it'll take a \
+    moment. NEVER speak an answer — real or guessed — before the tool returns, NEVER skip the \
     tool call, and never read tool JSON or ids aloud. You cannot see the user's data or screen \
     without calling a tool.
 

From 3b9d18ef060bd612704cfa7782adf28494047e1c Mon Sep 17 00:00:00 2001
From: vendz <vasavandit@gmail.com>
Date: Thu, 18 Jun 2026 19:01:54 -0400
Subject: [PATCH 34/35] chore(desktop): changelog entry for the PTT mic
 waveform

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 desktop/macos/CHANGELOG.json | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/desktop/macos/CHANGELOG.json b/desktop/macos/CHANGELOG.json
index 278ff53fa00..ac8958e9476 100644
--- a/desktop/macos/CHANGELOG.json
+++ b/desktop/macos/CHANGELOG.json
@@ -3,7 +3,8 @@
     "Faster, cheaper assistant responses via Anthropic prompt caching of the system+tools prefix and conversation history",
     "Faster voice replies (experimental): the realtime model now handles your whole voice turn \u2014 listening, deciding, and speaking \u2014 instead of the slower transcribe\u2192route\u2192answer pipeline",
     "Voice (push-to-talk) conversations now appear in your chat history",
-    "Fixed older chat messages failing to load in long chats"
+    "Fixed older chat messages failing to load in long chats",
+    "Replaced the push-to-talk red dot in the floating bar with a realtime audio equalizer"
   ],
   "releases": [
     {

From f8ac95d7c3d6407b8f0ffc243f2bec9b7f221408 Mon Sep 17 00:00:00 2001
From: Nik Shevchenko <kodjima33@gmail.com>
Date: Thu, 18 Jun 2026 20:42:38 -0400
Subject: [PATCH 35/35] fix(desktop): break up VoiceWaveformBars weight
 expression so it type-checks

The single-line bar-weight expression hit Swift's 'unable to type-check in
reasonable time' error (Double/CGFloat inference). Split into typed
sub-expressions; identical math.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../Sources/FloatingControlBar/VoiceWaveformBars.swift        | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/desktop/macos/Desktop/Sources/FloatingControlBar/VoiceWaveformBars.swift b/desktop/macos/Desktop/Sources/FloatingControlBar/VoiceWaveformBars.swift
index 8b038aaaaf6..76737568c91 100644
--- a/desktop/macos/Desktop/Sources/FloatingControlBar/VoiceWaveformBars.swift
+++ b/desktop/macos/Desktop/Sources/FloatingControlBar/VoiceWaveformBars.swift
@@ -100,7 +100,9 @@ final class WaveBarsModel {
         // Center bars taller -> a friendly arch.
         let mid = Double(barCount - 1) / 2
         weights = (0..<barCount).map { i in
-            0.72 + 0.45 * (1.0 - abs(Double(i) - mid) / max(mid, 1.0))
+            let dist: Double = abs(Double(i) - mid) / max(mid, 1.0)
+            let arch: Double = 1.0 - dist
+            return 0.72 + 0.45 * arch
         }
     }