diff --git a/app/lib/services/devices/plaud_connection.dart b/app/lib/services/devices/plaud_connection.dart
index 92a33ffeaa6..b13978cab19 100644
--- a/app/lib/services/devices/plaud_connection.dart
+++ b/app/lib/services/devices/plaud_connection.dart
@@ -288,8 +288,7 @@ class PlaudDeviceConnection extends DeviceConnection {
   @override
   Future<StreamSubscription?> performGetImageListener({
     required void Function(OrientedImage orientedImage) onImageReceived,
-  }) async =>
-      null;
+  }) async => null;
 
   @override
   Future<StreamSubscription<List<int>>?> performGetAccelListener({void Function(int)? onAccelChange}) async => null;
@@ -321,15 +320,15 @@ class PlaudDeviceConnection extends DeviceConnection {
   List<int> _toBytes32(int v) => [v & 0xFF, (v >> 8) & 0xFF, (v >> 16) & 0xFF, (v >> 24) & 0xFF];
 
   List<int> _toBytes64(int v) => [
-        v & 0xFF,
-        (v >> 8) & 0xFF,
-        (v >> 16) & 0xFF,
-        (v >> 24) & 0xFF,
-        (v >> 32) & 0xFF,
-        (v >> 40) & 0xFF,
-        (v >> 48) & 0xFF,
-        (v >> 56) & 0xFF,
-      ];
+    v & 0xFF,
+    (v >> 8) & 0xFF,
+    (v >> 16) & 0xFF,
+    (v >> 24) & 0xFF,
+    (v >> 32) & 0xFF,
+    (v >> 40) & 0xFF,
+    (v >> 48) & 0xFF,
+    (v >> 56) & 0xFF,
+  ];
 
   int _toInt32(List<int> b) => b[0] | (b[1] << 8) | (b[2] << 16) | (b[3] << 24);
 }
diff --git a/desktop/macos/Desktop/Sources/FloatingControlBar/AIResponseView.swift b/desktop/macos/Desktop/Sources/FloatingControlBar/AIResponseView.swift
index 42e9c5ba788..bb11e6cc18e 100644
--- a/desktop/macos/Desktop/Sources/FloatingControlBar/AIResponseView.swift
+++ b/desktop/macos/Desktop/Sources/FloatingControlBar/AIResponseView.swift
@@ -349,11 +349,8 @@ struct AIResponseView: View {
 
     private var voiceFollowUpView: some View {
         HStack(spacing: 8) {
-            Circle()
-                .fill(Color.red)
-                .frame(width: 10, height: 10)
-                .scaleEffect(1.2)
-                .animation(.easeInOut(duration: 0.6).repeatForever(autoreverses: true), value: isVoiceFollowUp)
+            // Playful realtime mic waveform (replaces the old pulsing red dot)
+            VoiceWaveformBars(isActive: isVoiceFollowUp)
 
             Image(systemName: "mic.fill")
                 .scaledFont(size: 14, weight: .semibold)
@@ -375,7 +372,7 @@ struct AIResponseView: View {
         }
         .padding(.horizontal, 10)
         .padding(.vertical, 8)
-        .background(Color.red.opacity(0.15))
+        .background(OmiColors.purplePrimary.opacity(0.12))
         .cornerRadius(8)
     }
 
diff --git a/desktop/macos/Desktop/Sources/FloatingControlBar/AboutUserCard.swift b/desktop/macos/Desktop/Sources/FloatingControlBar/AboutUserCard.swift
new file mode 100644
index 00000000000..6c9eefe9c35
--- /dev/null
+++ b/desktop/macos/Desktop/Sources/FloatingControlBar/AboutUserCard.swift
@@ -0,0 +1,51 @@
+import Foundation
+
+/// Builds the compact, local-only `<about_user>` block injected into the hub's
+/// system instruction at warm time. Identity + rough situation only; exact/current
+/// lists stay behind the read tools (the card hedges this). No network calls.
+enum AboutUserCard {
+  /// Pure formatter — kept separate from `build()` so it is unit-testable.
+  static func render(name: String, facts: [String], overdue: Int, dueToday: Int) -> String {
+    var lines: [String] = ["<about_user>"]
+    if !name.isEmpty { lines.append("Name: \(name)") }
+    lines.append("What Omi knows about them:")
+    if facts.isEmpty {
+      lines.append("- Nothing saved yet.")
+    } else {
+      lines.append(contentsOf: facts.map { "- \($0)" })
+    }
+    if overdue == 0 && dueToday == 0 {
+      lines.append("Right now: nothing overdue or due today.")
+    } else {
+      lines.append("Right now: \(overdue) overdue, \(dueToday) due today.")
+    }
+    lines.append(
+      "(This is a quick snapshot — for the exact or current list, call get_tasks / get_action_items.)")
+    lines.append("</about_user>")
+    return lines.joined(separator: "\n")
+  }
+
+  /// Gathers local data (auth name, top memories, task counts) and renders the card.
+  /// Best-effort: any failure degrades to a smaller card, never throws.
+  @MainActor
+  static func build() async -> String {
+    let auth = AuthService.shared
+    let rawName = auth.givenName.isEmpty ? auth.displayName : auth.givenName
+    let name = rawName.trimmingCharacters(in: .whitespacesAndNewlines)
+
+    var facts: [String] = []
+    if let memories = try? await MemoryStorage.shared.getLocalMemories(limit: 8) {
+      facts = memories.prefix(8).compactMap { mem in
+        let t = mem.content.trimmingCharacters(in: .whitespacesAndNewlines)
+        guard !t.isEmpty else { return nil }
+        return t.count > 120 ? String(t.prefix(117)) + "…" : t
+      }
+    }
+
+    await TasksStore.shared.loadDashboardTasks()
+    let overdue = TasksStore.shared.overdueTasks.count
+    let dueToday = TasksStore.shared.todaysTasks.count
+
+    return render(name: name, facts: facts, overdue: overdue, dueToday: dueToday)
+  }
+}
diff --git a/desktop/macos/Desktop/Sources/FloatingControlBar/FloatingControlBarState.swift b/desktop/macos/Desktop/Sources/FloatingControlBar/FloatingControlBarState.swift
index f39cf6618f3..8608cb771ec 100644
--- a/desktop/macos/Desktop/Sources/FloatingControlBar/FloatingControlBarState.swift
+++ b/desktop/macos/Desktop/Sources/FloatingControlBar/FloatingControlBarState.swift
@@ -51,21 +51,6 @@ struct FloatingBarNotification: Identifiable, Equatable {
     }
 }
 
-/// The high-level voice activity the floating bar is reflecting right now. Derived
-/// from the lower-level PTT/hub flags so the status indicator has a single, ordered
-/// source of truth (each state has exactly one visual treatment).
-enum VoiceActivity: Equatable {
-    /// Nothing happening — the bar rests as a calm, barely-breathing sliver.
-    case idle
-    /// User is holding push-to-talk; we're capturing their voice (red, "you").
-    case listening
-    /// Turn committed, waiting on the model's reply — the model may answer late,
-    /// so this MUST read as "working, wait" rather than "done" (cool autonomous swirl).
-    case thinking
-    /// The model is speaking its reply (warm, audio-reactive waveform — "it").
-    case speaking
-}
-
 /// Observable object holding the state for the floating control bar.
 @MainActor
 class FloatingControlBarState: NSObject, ObservableObject {
@@ -108,35 +93,6 @@ class FloatingControlBarState: NSObject, ObservableObject {
     @Published var isVoiceListening: Bool = false
     @Published var isVoiceLocked: Bool = false
     @Published var voiceTranscript: String = ""
-    /// True after a voice turn is committed and we're waiting on the model's reply
-    /// (vs. still recording) — drives the "Thinking…/Responding…" indicator so the user
-    /// knows to wait rather than re-pressing (which would interrupt a slow reply).
-    @Published var isVoiceThinking: Bool = false
-    /// True while the model is actually speaking its reply (native audio playing or the
-    /// AVSpeech fallback talking). Distinct from `isVoiceThinking` so the indicator can
-    /// show a clearly different "it's talking" treatment vs. "it's working".
-    @Published var isVoiceSpeaking: Bool = false
-    /// Smoothed 0…1 output amplitude of the model's spoken reply, sampled from the
-    /// playback engine. Drives the speaking waveform so it reacts to the actual voice
-    /// (premium feel) rather than animating blindly. 0 when not speaking.
-    @Published var voiceLevel: CGFloat = 0
-
-    /// Single ordered source of truth for the status indicator. Listening wins (the user
-    /// is actively talking), then speaking, then thinking, else idle — by construction the
-    /// hub sets these mutually exclusively, the ordering just makes barge-in race-safe.
-    var voiceActivity: VoiceActivity {
-        if isVoiceListening { return .listening }
-        if isVoiceSpeaking { return .speaking }
-        if isVoiceThinking { return .thinking }
-        return .idle
-    }
-
-    /// Whether any voice turn is in flight — keeps the bar expanded across the whole
-    /// listening → thinking → speaking arc so the indicator stays visible (one expand,
-    /// one collapse per turn — no resize churn mid-turn).
-    var isVoiceActive: Bool {
-        isVoiceListening || isVoiceThinking || isVoiceSpeaking
-    }
 
     // Voice follow-up state (PTT while AI conversation is active)
     @Published var isVoiceFollowUp: Bool = false
@@ -180,8 +136,6 @@ class FloatingControlBarState: NSObject, ObservableObject {
         isVoiceFollowUp = false
         voiceFollowUpTranscript = ""
         currentQueryFromVoice = false
-        isVoiceSpeaking = false
-        voiceLevel = 0
         lastConversationActivityAt = nil
     }
 }
diff --git a/desktop/macos/Desktop/Sources/FloatingControlBar/FloatingControlBarView.swift b/desktop/macos/Desktop/Sources/FloatingControlBar/FloatingControlBarView.swift
index 9188f5d11b3..9e06e25dbb9 100644
--- a/desktop/macos/Desktop/Sources/FloatingControlBar/FloatingControlBarView.swift
+++ b/desktop/macos/Desktop/Sources/FloatingControlBar/FloatingControlBarView.swift
@@ -33,11 +33,9 @@ struct FloatingControlBarView: View {
         .animation(.spring(response: 0.35, dampingFraction: 0.82), value: state.currentNotification?.id)
     }
 
-    /// Whether the bar chrome should stretch to fill the window width. Stays full-width
-    /// for the whole voice turn (listening → thinking → speaking) so the status indicator
-    /// has room and the bar resizes exactly once per turn.
+    /// Whether the bar chrome should stretch to fill the window width
     private var barNeedsFullWidth: Bool {
-        isHovering || state.showingAIConversation || state.isVoiceActive
+        isHovering || state.showingAIConversation || state.isVoiceListening
     }
 
     private var barChrome: some View {
@@ -85,7 +83,7 @@ struct FloatingControlBarView: View {
             }
         }
         .overlay(alignment: .topTrailing) {
-            if isHovering && !state.isVoiceActive {
+            if isHovering && !state.isVoiceListening {
                 Button {
                     openFloatingBarSettings()
                 } label: {
@@ -281,8 +279,8 @@ struct FloatingControlBarView: View {
 
     private var controlBarView: some View {
         Group {
-            if state.isVoiceActive && !state.isVoiceFollowUp {
-                voiceActiveView
+            if state.isVoiceListening && !state.isVoiceFollowUp {
+                voiceListeningView
                     .padding(.horizontal, 6)
                     .padding(.vertical, 3)
                     .frame(height: 50)
@@ -308,11 +306,11 @@ struct FloatingControlBarView: View {
         }
     }
 
-    /// Minimal resting indicator shown when not hovering and no voice turn is active —
-    /// a calm, slowly breathing sliver. (Active turns render `voiceActiveView` instead.)
+    /// Minimal thin bar shown when not hovering
     private var compactCircleView: some View {
-        VoiceActivityIndicator(activity: state.voiceActivity, level: state.voiceLevel)
-            .frame(width: 28, height: 14)
+        RoundedRectangle(cornerRadius: 3)
+            .fill(Color.white.opacity(0.5))
+            .frame(width: 28, height: 6)
     }
 
     private func compactToggle(_ title: String, isOn: Binding<Bool>) -> some View {
@@ -360,15 +358,16 @@ struct FloatingControlBarView: View {
         }
     }
 
-    /// Unified expanded voice view for the whole turn. The status indicator carries the
-    /// state (listening / thinking / speaking) visually; the text is just the helpful
-    /// detail (transcript, "Release to send", "Thinking…"). One element, no jarring swaps.
-    private var voiceActiveView: some View {
+    private var voiceListeningView: some View {
         HStack(spacing: 8) {
-            VoiceActivityIndicator(activity: state.voiceActivity, level: state.voiceLevel)
-                .frame(width: 34, height: 18)
+            // Playful realtime mic waveform (replaces the old pulsing red dot)
+            VoiceWaveformBars(isActive: state.isVoiceListening)
 
-            if state.isVoiceLocked && state.isVoiceListening {
+            Image(systemName: "mic.fill")
+                .scaledFont(size: 14, weight: .semibold)
+                .foregroundColor(.white)
+
+            if state.isVoiceLocked {
                 Text("LOCKED")
                     .scaledFont(size: 10, weight: .bold)
                     .foregroundColor(.orange)
@@ -378,31 +377,21 @@ struct FloatingControlBarView: View {
                     .cornerRadius(4)
             }
 
-            // Dim only the "Release to send" hint; live transcript / status reads brighter.
-            let isHint = state.voiceActivity == .listening && state.voiceTranscript.isEmpty
-            Text(voiceStatusText)
-                .scaledFont(size: 13)
-                .foregroundColor(.white.opacity(isHint ? 0.5 : 0.85))
-                .lineLimit(1)
-                .truncationMode(.head)
-        }
-    }
-
-    /// The detail text beside the indicator for the current voice state. The indicator
-    /// itself carries the state visually; this is just the helpful detail.
-    private var voiceStatusText: String {
-        switch state.voiceActivity {
-        case .listening:
-            if !state.voiceTranscript.isEmpty { return state.voiceTranscript }
-            return state.isVoiceLocked
-                ? "Tap \(shortcutSettings.pttShortcut.displayLabel) to send"
-                : "Release \(shortcutSettings.pttShortcut.displayLabel) to send"
-        case .thinking:
-            return "Thinking…"
-        case .speaking:
-            return "Speaking…"
-        case .idle:
-            return ""
+            if !state.voiceTranscript.isEmpty {
+                Text(state.voiceTranscript)
+                    .scaledFont(size: 13)
+                    .foregroundColor(.white.opacity(0.8))
+                    .lineLimit(1)
+                    .truncationMode(.head)
+            } else {
+                Text(
+                    state.isVoiceLocked
+                        ? "Tap \(shortcutSettings.pttShortcut.displayLabel) to send"
+                        : "Release \(shortcutSettings.pttShortcut.displayLabel) to send"
+                )
+                    .scaledFont(size: 13)
+                    .foregroundColor(.white.opacity(0.5))
+            }
         }
     }
 
diff --git a/desktop/macos/Desktop/Sources/FloatingControlBar/FloatingControlBarWindow.swift b/desktop/macos/Desktop/Sources/FloatingControlBar/FloatingControlBarWindow.swift
index 7dc37d7d622..bd6bdf2c1ec 100644
--- a/desktop/macos/Desktop/Sources/FloatingControlBar/FloatingControlBarWindow.swift
+++ b/desktop/macos/Desktop/Sources/FloatingControlBar/FloatingControlBarWindow.swift
@@ -41,8 +41,6 @@ class FloatingControlBarWindow: NSPanel, NSWindowDelegate {
     private var suppressHoverResize = false
     private var inputHeightCancellable: AnyCancellable?
     private var responseHeightCancellable: AnyCancellable?
-    private var voiceActivityCancellable: AnyCancellable?
-    private var collapseWorkItem: DispatchWorkItem?
     private var resizeWorkItem: DispatchWorkItem?
     /// Saved center point from before chat opened, used to restore position on close.
     private var preChatCenter: NSPoint?
@@ -87,7 +85,6 @@ class FloatingControlBarWindow: NSPanel, NSWindowDelegate {
         self.maxSize = FloatingControlBarWindow.maxBarSize
 
         setupViews()
-        setupVoiceActivityObserver()
 
         if ShortcutSettings.shared.draggableBarEnabled,
            let savedPosition = UserDefaults.standard.string(forKey: FloatingControlBarWindow.positionKey) {
@@ -522,54 +519,6 @@ class FloatingControlBarWindow: NSPanel, NSWindowDelegate {
         inputHeightCancellable = nil
     }
 
-    /// Single owner of the voice-turn expand/collapse. The bar is wide whenever a voice
-    /// turn is active (`isVoiceActive` = listening || thinking || speaking) and collapses
-    /// to the resting sliver when it ends — derived reactively from the published flags
-    /// instead of imperative resize calls scattered across the PTT/hub code (which had to
-    /// coordinate via a `skipResize` flag).
-    private func setupVoiceActivityObserver() {
-        voiceActivityCancellable = state.$isVoiceListening
-            .combineLatest(state.$isVoiceThinking, state.$isVoiceSpeaking)
-            .map { $0 || $1 || $2 }
-            .removeDuplicates()
-            .receive(on: DispatchQueue.main)
-            .sink { [weak self] active in
-                self?.onVoiceActiveChanged(active)
-            }
-    }
-
-    /// Expand immediately so the window is already wide when the indicator + text render
-    /// (a delayed expand flashes the content cramped in the sliver first). Defer the
-    /// collapse a beat so the transient listening→thinking dip on PTT-up — `isVoiceActive`
-    /// momentarily clears before commitTurn sets thinking — doesn't blink the bar shut.
-    private func onVoiceActiveChanged(_ active: Bool) {
-        collapseWorkItem?.cancel()
-        collapseWorkItem = nil
-        if active {
-            applyVoiceExpansion(true)
-        } else {
-            let work = DispatchWorkItem { [weak self] in self?.applyVoiceExpansion(false) }
-            collapseWorkItem = work
-            DispatchQueue.main.asyncAfter(deadline: .now() + 0.12, execute: work)
-        }
-    }
-
-    private func applyVoiceExpansion(_ active: Bool) {
-        // Onboarding shows no separate bar; follow-up and the AI conversation own their
-        // own layout, so the voice indicator never drives the window size in those modes.
-        guard UserDefaults.standard.bool(forKey: "hasCompletedOnboarding"),
-              !state.isVoiceFollowUp else { return }
-        if active {
-            guard !state.showingAIConversation else { return }
-            resizeForPTTState(expanded: true, animated: false)  // snap — content is ready now
-        } else {
-            // Collapse only when nothing else needs the window expanded.
-            guard !state.showingAIConversation, !state.showingAIResponse,
-                  state.currentNotification == nil, !state.isHoveringBar else { return }
-            resizeForPTTState(expanded: false, animated: true)
-        }
-    }
-
     func updateAIResponse(type: String, text: String) {
         guard state.showingAIConversation else { return }
 
@@ -670,7 +619,7 @@ class FloatingControlBarWindow: NSPanel, NSWindowDelegate {
 
     /// Resize for hover expand/collapse — anchored from center so the circle grows outward.
     func resizeForHover(expanded: Bool) {
-        guard !state.showingAIConversation, !state.isVoiceActive, !state.isShowingNotification, !suppressHoverResize else { return }
+        guard !state.showingAIConversation, !state.isVoiceListening, !state.isShowingNotification, !suppressHoverResize else { return }
         resizeWorkItem?.cancel()
         resizeWorkItem = nil
 
@@ -679,7 +628,7 @@ class FloatingControlBarWindow: NSPanel, NSWindowDelegate {
         let doResize: () -> Void = { [weak self] in
             guard let self = self else { return }
             guard !self.state.showingAIConversation,
-                  !self.state.isVoiceActive,
+                  !self.state.isVoiceListening,
                   !self.state.isShowingNotification,
                   !self.suppressHoverResize
             else { return }
@@ -709,16 +658,12 @@ class FloatingControlBarWindow: NSPanel, NSWindowDelegate {
         }
     }
 
-    /// Resize window for PTT state (expanded when listening, compact circle when idle).
-    /// Expand snaps (animated:false) so the indicator + text never flash cramped while the
-    /// window grows; collapse animates for a smooth shrink back to the resting sliver.
-    func resizeForPTTState(expanded: Bool, animated: Bool = true) {
+    /// Resize window for PTT state (expanded when listening, compact circle when idle)
+    func resizeForPTTState(expanded: Bool) {
         let size = expanded
             ? NSSize(width: FloatingControlBarWindow.expandedWidth, height: FloatingControlBarWindow.expandedBarSize.height)
             : FloatingControlBarWindow.minBarSize
-        // Idempotent: skip when already at the target size (avoids a no-op resize).
-        if abs(frame.width - size.width) < 1, abs(frame.height - size.height) < 1 { return }
-        resizeAnchored(to: size, makeResizable: false, animated: animated)
+        resizeAnchored(to: size, makeResizable: false, animated: true)
     }
 
     func showNotification(_ notification: FloatingBarNotification, animated: Bool = true) {
@@ -737,7 +682,7 @@ class FloatingControlBarWindow: NSPanel, NSWindowDelegate {
         state.currentNotification = nil
 
         let targetSize: NSSize
-        if state.isVoiceActive {
+        if state.isVoiceListening {
             targetSize = NSSize(width: Self.expandedWidth, height: Self.expandedBarSize.height)
         } else {
             targetSize = state.isHoveringBar ? Self.expandedBarSize : Self.minBarSize
@@ -748,7 +693,7 @@ class FloatingControlBarWindow: NSPanel, NSWindowDelegate {
     /// Restore the compact pill size when we temporarily surface the bar outside
     /// of an active hover, notification, voice session, or AI conversation.
     func normalizeForTemporaryShow() {
-        guard !state.showingAIConversation, !state.isVoiceActive, state.currentNotification == nil else { return }
+        guard !state.showingAIConversation, !state.isVoiceListening, state.currentNotification == nil else { return }
         resizeAnchored(to: Self.minBarSize, makeResizable: false, animated: false, anchorTop: true)
     }
 
@@ -892,7 +837,7 @@ class FloatingControlBarWindow: NSPanel, NSWindowDelegate {
             minimumWidth = FloatingControlBarWindow.expandedWidth
         } else if state.currentNotification != nil {
             minimumWidth = FloatingControlBarWindow.notificationWidth
-        } else if state.isVoiceActive {
+        } else if state.isVoiceListening {
             minimumWidth = FloatingControlBarWindow.expandedWidth
         } else if state.isHoveringBar {
             minimumWidth = FloatingControlBarWindow.expandedBarSize.width
@@ -1380,12 +1325,16 @@ class FloatingControlBarManager {
         guard let provider = activeFloatingProvider() else { return }
 
         // Re-wire the onSendQuery to use the isolated floating-bar provider.
-        // Subsequent typed messages also go through the AI router.
+        // Subsequent typed messages also go through the AI router. A message arriving
+        // through onSendQuery was always TYPED (PTT/voice bypass this closure and call
+        // routeQuery directly), so force fromVoice:false — otherwise a typed follow-up
+        // after a voice turn inherits the stale currentQueryFromVoice=true and gets
+        // spoken aloud.
         window.onSendQuery = { [weak self, weak window, weak provider] message in
             guard let self = self, let window = window, let provider = provider else { return }
             Task { @MainActor in
-                await self.withQueryTracer(query: message, fromVoice: window.state.currentQueryFromVoice) {
-                    await self.routeQuery(message, barWindow: window, provider: provider, fromVoice: window.state.currentQueryFromVoice)
+                await self.withQueryTracer(query: message, fromVoice: false) {
+                    await self.routeQuery(message, barWindow: window, provider: provider, fromVoice: false)
                 }
             }
         }
@@ -1737,6 +1686,11 @@ class FloatingControlBarManager {
         return window?.state
     }
 
+    /// Resize the floating bar for PTT state changes.
+    func resizeForPTT(expanded: Bool) {
+        window?.resizeForPTTState(expanded: expanded)
+    }
+
     // MARK: - AI Query
 
     private func prepareVisibleQueryState(_ message: String, in barWindow: FloatingControlBarWindow, fromVoice: Bool) {
diff --git a/desktop/macos/Desktop/Sources/FloatingControlBar/PushToTalkManager.swift b/desktop/macos/Desktop/Sources/FloatingControlBar/PushToTalkManager.swift
index 3eca7fcdd98..0e7a283bd02 100644
--- a/desktop/macos/Desktop/Sources/FloatingControlBar/PushToTalkManager.swift
+++ b/desktop/macos/Desktop/Sources/FloatingControlBar/PushToTalkManager.swift
@@ -523,10 +523,6 @@ class PushToTalkManager: ObservableObject {
     state = .finalizing
     finalizeWorkItem?.cancel()
     finalizeWorkItem = nil
-    // Flags only — the window keeps the bar expanded into "thinking" because commitTurn
-    // sets isVoiceThinking before the reactive resize observer settles (so isVoiceActive
-    // never dips), which is why there's no flicker and no skip-resize coordination here.
-    updateBarState()
 
     // Stop mic immediately — no more audio capture
     audioCaptureService?.stopCapture()
@@ -563,10 +559,10 @@ class PushToTalkManager: ObservableObject {
       // Real speech — instant local ack + commit. The hub speaks the reply and
       // dispatches tools itself; no transcript/router/LLM hop here.
       if ShortcutSettings.shared.pttSoundsEnabled { ackSound?.play() }
-      barState?.voiceTranscript = "…"
       RealtimeHubController.shared.commitTurn()
-      // Leave the bar showing "…"; the hub controller exits the voice UI on turn
-      // completion (so we skip the clearing updateBarState()).
+      // Collapse the bar on release — the hub speaks its reply as audio (no inline
+      // status UI), the same as the legacy voice path.
+      updateBarState()
       AnalyticsManager.shared.floatingBarPTTEnded(
         mode: finalizedMode, hadTranscript: true, transcriptLength: 0)
       log("PushToTalkManager: hub turn committed (instant ack)")
@@ -729,14 +725,14 @@ class PushToTalkManager: ObservableObject {
 
     isCurrentSessionFollowUp = false
 
-    // Reset state. The reactive resize observer won't collapse the bar when a query is in
-    // flight or a conversation is open — it guards on showingAIConversation/showingAIResponse,
-    // which openAIInputWithQuery sets (to the correct response size) right after this.
+    // Reset state — skip PTT collapse resize when we have a query,
+    // because openAIInputWithQuery will resize to the correct size.
+    // Also skip resize when in follow-up mode (panel is already at response size).
     state = .idle
     transcriptSegments = []
     lastInterimText = ""
     currentContextSnapshot = nil
-    updateBarState()
+    updateBarState(skipResize: hasQuery || wasFollowUp)
 
     guard hasQuery else {
       log("PushToTalkManager: no transcript to send")
@@ -978,7 +974,11 @@ class PushToTalkManager: ObservableObject {
               self.transcriptionService?.sendAudio(audioData)
             }
           },
-          onAudioLevel: { _ in }
+          onAudioLevel: { level in
+            // Feed the floating-bar mic waveform (VoiceWaveformBars). Throttled to ~5 Hz
+            // inside the monitor; used only for visualization.
+            AudioLevelMonitor.shared.updateMicrophoneLevel(level)
+          }
         )
         log("PushToTalkManager: mic capture started (batch=\(batchMode))")
       } catch {
@@ -1037,8 +1037,9 @@ class PushToTalkManager: ObservableObject {
 
   // MARK: - Bar State Sync
 
-  private func updateBarState() {
+  private func updateBarState(skipResize: Bool = false) {
     guard let barState = barState else { return }
+    let wasListening = barState.isVoiceListening
     let isShowingVoiceUI = (state == .listening || state == .lockedListening)
     barState.isVoiceListening = isShowingVoiceUI
     barState.isVoiceLocked = (state == .lockedListening)
@@ -1047,9 +1048,16 @@ class PushToTalkManager: ObservableObject {
       barState.voiceTranscript = ""
       barState.voiceFollowUpTranscript = ""
     }
-    // The bar's expand/collapse is derived reactively from these flags by the window
-    // (FloatingControlBarWindow.setupVoiceActivityObserver) — one resize per turn, no
-    // imperative calls or skip-flags to keep in sync here.
+
+    // Skip resize when in follow-up mode, expanded AI conversation, or during onboarding
+    // (during onboarding the floating bar shouldn't appear as a separate window)
+    let isOnboarding = !UserDefaults.standard.bool(forKey: "hasCompletedOnboarding")
+    guard !skipResize && !barState.isVoiceFollowUp && !barState.showingAIConversation && !isOnboarding else { return }
+    if barState.isVoiceListening && !wasListening {
+      FloatingControlBarManager.shared.resizeForPTT(expanded: true)
+    } else if !barState.isVoiceListening && wasListening {
+      FloatingControlBarManager.shared.resizeForPTT(expanded: false)
+    }
   }
 }
 
diff --git a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubController.swift b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubController.swift
index cf8e7356e4c..5d8a4ddafdd 100644
--- a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubController.swift
+++ b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubController.swift
@@ -46,6 +46,15 @@ final class RealtimeHubController: NSObject, RealtimeHubSessionDelegate {
   /// Consecutive failed (re)connects with no surviving session — caps churn on a hard
   /// failure. Reset when a socket survives past the idle window or a turn completes.
   private var hubReconnectStrikes = 0
+  /// After this many consecutive fast failures (e.g. a stale/revoked key failing auth),
+  /// the hub stops re-warming so it doesn't hammer a dead endpoint.
+  private static let maxReconnectStrikes = 5
+  /// True only while a session is connected + authenticated for `sessionProvider`. This is
+  /// what gates `isActive`: a PTT turn enters hub mode only when the hub is genuinely
+  /// connected right now; otherwise it transparently uses the legacy cascade. Set in
+  /// hubDidConnect (fires post-auth, on "ready") and cleared on teardown/error, so a
+  /// stale/revoked key — which never connects — never costs the user a turn.
+  private var hubConnected = false
   /// True between commit and turn-done — used to detect barge-in (a new PTT while
   /// the previous reply is still in flight).
   private var responding = false
@@ -53,61 +62,70 @@ final class RealtimeHubController: NSObject, RealtimeHubSessionDelegate {
   /// Log tag for the currently-connected provider.
   private var providerTag: String { sessionProvider == .gemini ? "gemini" : "openai" }
 
+  /// Latest local identity card, injected into each new session's system instruction.
+  /// Refreshed off the hot path; an empty string just means "no card yet" (graceful).
+  private var aboutUserCard: String = ""
+
+  private func refreshAboutUserCard() {
+    Task { @MainActor [weak self] in
+      self?.aboutUserCard = await AboutUserCard.build()
+    }
+  }
+
   /// Held warm so spawn_agent's pi-mono bridge boot is off the hot path. The pill
   /// spawn creates its own provider; warming this one primes node/auth caches.
   private var warmProvider: ChatProvider?
 
   private override init() {
     super.init()
-    // Clear "speaking" when the AVSpeech fallback finishes (native audio uses the
-    // player's drain callback instead).
-    speech.delegate = self
   }
 
   /// In-flight ephemeral mint guard (managed users).
   private var minting = false
 
   /// True when the hub should drive this PTT turn. Read by PushToTalkManager at PTT
-  /// start. BYOK users are ready immediately (own key); managed users are ready only
-  /// once a warm session exists (token minted + connecting) — otherwise PTT falls
-  /// back to the legacy cascade for that turn.
+  /// start. The hub is the default voice path (no opt-in toggle).
   var isActive: Bool {
-    guard RealtimeHubSettings.shared.isEnabled else { return false }
-    let provider = RealtimeHubSettings.shared.provider
-    if APIKeyService.byokKey(provider.byokProvider) != nil { return true }
-    return session != nil && sessionProvider == provider
+    // Drive a turn only when the hub is actually CONNECTED + authenticated for the
+    // currently-selected provider. A turn never enters hub mode on a key/token that can't
+    // connect (stale/revoked key, failed mint, mid-reconnect, or a just-switched provider):
+    // PTT transparently uses the legacy cascade instead, so a broken hub never costs the
+    // user a turn. The hub re-warms in the background and flips this true once it connects.
+    hubConnected && sessionProvider == RealtimeHubSettings.shared.provider
   }
 
   func setup(barState: FloatingControlBarState) {
     self.barState = barState
-    // Register the observer exactly once — duplicate registrations (re-entrant
-    // setup) fired settingsChanged N times, each tearing down + recreating the
-    // socket, which orphaned a connecting session (Gemini 1001/1008 closes).
+    // The hub provider follows the "Voice Model" picker, so re-warm when it changes —
+    // observe the live settings notification (posted by the picker, RealtimeOmniSettings
+    // setters, and AutoModelSelector). Register exactly once — duplicate registrations
+    // (re-entrant setup) fired settingsChanged N times, each tearing down + recreating
+    // the socket, which orphaned a connecting session (Gemini 1001/1008 closes).
     NotificationCenter.default.removeObserver(
-      self, name: .realtimeHubSettingsDidChange, object: nil)
+      self, name: .realtimeOmniSettingsDidChange, object: nil)
     NotificationCenter.default.addObserver(
       self, selector: #selector(settingsChanged),
-      name: .realtimeHubSettingsDidChange, object: nil)
+      name: .realtimeOmniSettingsDidChange, object: nil)
     // Expose the headless E2E action (omi-ctl action hub_test_turn pcm=… provider=…).
     RealtimeHubTestHarness.registerAutomationAction()
+    refreshAboutUserCard()
   }
 
   @objc private func settingsChanged() {
-    // Only reconnect if enabled and the provider actually changed — avoids
-    // redundant teardown/recreate races on unrelated notifications.
-    if !RealtimeHubSettings.shared.isEnabled { teardownSession(); return }
+    // Only reconnect if the provider actually changed — avoids redundant
+    // teardown/recreate races on unrelated notifications.
     if session != nil, sessionProvider == RealtimeHubSettings.shared.provider { return }
     teardownSession()
+    refreshAboutUserCard()
     ensureWarm()
   }
 
   // MARK: - Warm session lifecycle (kept open between turns)
 
-  /// Open the WS now if it isn't already (no-op if disabled or already warm).
-  /// BYOK → connect client-direct with the user's key (Phase 1). Otherwise, if
-  /// signed in → mint a server-side ephemeral token (Phase 2) and connect with it.
+  /// Open the WS now if it isn't already (no-op if already warm). BYOK → connect
+  /// client-direct with the user's key. Otherwise, if signed in → mint a server-side
+  /// ephemeral token and connect with it.
   func ensureWarm() {
-    guard RealtimeHubSettings.shared.isEnabled else { return }
     let provider = RealtimeHubSettings.shared.provider
     if session != nil, sessionProvider == provider { return }
     if session != nil { teardownSession() }
@@ -117,7 +135,7 @@ final class RealtimeHubController: NSObject, RealtimeHubSessionDelegate {
     } else if AuthService.shared.isSignedIn {
       mintAndConnect(provider: provider)
     } else {
-      log("RealtimeHub: enabled but no BYOK key and not signed in — hub unavailable (cascade).")
+      log("RealtimeHub: no BYOK key and not signed in — hub unavailable (cascade).")
     }
   }
 
@@ -137,39 +155,22 @@ final class RealtimeHubController: NSObject, RealtimeHubSessionDelegate {
         log("⚠️ RealtimeHub: ephemeral mint failed / not entitled — staying on cascade")
         return
       }
-      // Provider/enable may have changed while minting; only connect if still wanted.
-      guard RealtimeHubSettings.shared.isEnabled,
-        RealtimeHubSettings.shared.provider == provider, self.session == nil
+      // Provider may have changed while minting; only connect if still wanted.
+      guard RealtimeHubSettings.shared.provider == provider, self.session == nil
       else { return }
       self.startSession(provider: provider, auth: .ephemeral(token))
     }
   }
 
   private func startSession(provider: RealtimeHubProvider, auth: HubAuth) {
-    let s = RealtimeHubSession(provider: provider, auth: auth, delegate: self)
+    let instructions = RealtimeHubTools.systemInstruction(aboutUser: aboutUserCard)
+    let s = RealtimeHubSession(provider: provider, auth: auth, instructions: instructions, delegate: self)
     session = s
     sessionProvider = provider
     // Both providers stream native spoken audio (24k PCM) → StreamingPCMPlayer;
     // AVSpeech is only a no-audio fallback.
     if pcmPlayer == nil {
-      let p = StreamingPCMPlayer(sampleRate: 24000)
-      // Feed the live output amplitude to the speaking waveform — but only while we're
-      // actually in the speaking state, so publishing `voiceLevel` never re-renders the
-      // bar outside that window.
-      p.onLevel = { [weak self] level in
-        guard let self, self.barState?.isVoiceSpeaking == true else { return }
-        self.barState?.voiceLevel = CGFloat(level)
-      }
-      // The reply isn't truly over until the buffered audio finishes draining — only
-      // then do we drop "speaking" and let the bar collapse back to idle.
-      p.onPlayingChanged = { [weak self] playing in
-        guard let self, let barState = self.barState else { return }
-        if !playing {
-          barState.isVoiceSpeaking = false
-          barState.voiceLevel = 0
-        }
-      }
-      pcmPlayer = p
+      pcmPlayer = StreamingPCMPlayer(sampleRate: 24000)
     }
     s.start()
     log(
@@ -184,6 +185,7 @@ final class RealtimeHubController: NSObject, RealtimeHubSessionDelegate {
     session?.stop()
     session = nil
     sessionProvider = nil
+    hubConnected = false  // no live session → PTT falls back to the cascade until re-warm
   }
 
   // MARK: - PTT integration
@@ -202,9 +204,6 @@ final class RealtimeHubController: NSObject, RealtimeHubSessionDelegate {
     audioReceivedThisTurn = false
     turnRecorded = false
     lastTurnAt = Date()
-    barState?.isVoiceThinking = false  // new turn → we're recording again, not waiting
-    barState?.isVoiceSpeaking = false  // any prior reply is being cut off below
-    barState?.voiceLevel = 0
     pcmPlayer?.stop()  // stop any prior reply locally
     if speech.isSpeaking { speech.stopSpeaking(at: .immediate) }
     if bargeIn {
@@ -246,11 +245,6 @@ final class RealtimeHubController: NSObject, RealtimeHubSessionDelegate {
   /// PTT-up: end the turn; the model now responds (and may call tools).
   func commitTurn() {
     responding = true
-    // Show a distinct "waiting on the model" state (not the red recording dot, which
-    // reads as "still listening") so the user knows to wait rather than re-press. Setting
-    // this keeps the bar's `isVoiceActive` true across the PTT-up → thinking handoff, so
-    // the window stays expanded (the window observes the flags and resizes itself).
-    barState?.isVoiceThinking = true
     // (The screen frame is sent at turn START — see beginTurn — so it has time to
     // upload/decode before the model answers. Nothing to attach here.)
     session?.commitInputTurn()
@@ -273,6 +267,7 @@ final class RealtimeHubController: NSObject, RealtimeHubSessionDelegate {
 
   func hubDidConnect() {
     lastWarmAt = Date()
+    hubConnected = true  // authenticated + ready — PTT may now route turns to the hub
     log("RealtimeHub: connected (\(sessionProvider?.displayName ?? "?"))")
   }
 
@@ -297,11 +292,6 @@ final class RealtimeHubController: NSObject, RealtimeHubSessionDelegate {
   }
 
   func hubDidReceiveAudio(_ pcm24k: Data) {
-    if !audioReceivedThisTurn {
-      // First audio of the turn: it's no longer thinking, it's speaking.
-      barState?.isVoiceThinking = false
-      barState?.isVoiceSpeaking = true
-    }
     audioReceivedThisTurn = true
     pcmPlayer?.enqueue(pcm24k)  // native spoken audio (OpenAI + Gemini)
   }
@@ -318,6 +308,25 @@ final class RealtimeHubController: NSObject, RealtimeHubSessionDelegate {
     }
   }
 
+  /// Run an async tool `body`, then speak its result: on throw → `errorText`, on an
+  /// empty/whitespace result → `emptyText`. Shared by the data read/write tool cases so the
+  /// Task / do-catch / blank-check / log / sendToolResult tail lives in exactly one place.
+  private func runToolAndSpeak(
+    callId: String, name: String, detail: String = "",
+    emptyText: String, errorText: String,
+    _ body: @escaping () async throws -> String
+  ) {
+    Task { [weak self] in
+      guard let self else { return }
+      var out: String
+      do { out = try await body() } catch { out = errorText }
+      if out.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty { out = emptyText }
+      let suffix = detail.isEmpty ? "" : " \(detail)"
+      log("RealtimeHub[\(self.providerTag)]: tool \(name)\(suffix) → \(out.prefix(60))")
+      self.session?.sendToolResult(callId: callId, name: name, output: out)
+    }
+  }
+
   func hubDidRequestTool(name: String, callId: String, argumentsJSON: String) {
     let arguments =
       (try? JSONSerialization.jsonObject(with: Data(argumentsJSON.utf8)) as? [String: Any]) ?? [:]
@@ -326,13 +335,19 @@ final class RealtimeHubController: NSObject, RealtimeHubSessionDelegate {
       session?.sendToolResult(callId: callId, name: name, output: "Unknown tool.")
       return
     }
+    func arg(_ key: String) -> String { (arguments[key] as? String) ?? turnTranscript }
+    func argInt(_ key: String) -> Int? { (arguments[key] as? Int) ?? (arguments[key] as? NSNumber)?.intValue }
     switch tool {
     case .askHigherModel:
-      let query = (arguments["query"] as? String) ?? turnTranscript
-      log("RealtimeHub[\(providerTag)]: tool ask_higher_model → POST /v2/chat/completions (claude-sonnet-4-6) query=\"\(query.prefix(80))\"")
+      let query = arg("query")
+      let context = (arguments["context"] as? String) ?? ""
+      log(
+        "RealtimeHub[\(providerTag)]: tool ask_higher_model → POST /v2/chat/completions (claude-sonnet-4-6) query=\"\(query.prefix(80))\""
+      )
       Task { [weak self] in
         guard let self else { return }
-        let answer = await self.escalateToHigherModel(query)
+        let answer = await self.escalateToHigherModel(
+          query, context: context, aboutUser: self.aboutUserCard)
         self.session?.sendToolResult(callId: callId, name: name, output: answer)
       }
     case .getTasks:
@@ -343,8 +358,9 @@ final class RealtimeHubController: NSObject, RealtimeHubSessionDelegate {
         await TasksStore.shared.loadDashboardTasks()
         let overdue = TasksStore.shared.overdueTasks
         let today = TasksStore.shared.todaysTasks
+        // Include the task id (for update_action_item) — the model is told never to speak ids.
         func list(_ items: [TaskActionItem]) -> String {
-          items.prefix(15).map { "- \($0.description)" }.joined(separator: "\n")
+          items.prefix(15).map { "- \($0.description) [id:\($0.id)]" }.joined(separator: "\n")
         }
         var out = ""
         if !overdue.isEmpty { out += "Overdue (\(overdue.count)):\n\(list(overdue))\n" }
@@ -353,16 +369,136 @@ final class RealtimeHubController: NSObject, RealtimeHubSessionDelegate {
         log("RealtimeHub[\(self.providerTag)]: tool get_tasks → \(overdue.count) overdue, \(today.count) today")
         self.session?.sendToolResult(callId: callId, name: name, output: out)
       }
+    case .getMemories:
+      // Fast READ — "who am I" / "what do you know about me". Backend memories+facts.
+      runToolAndSpeak(
+        callId: callId, name: name,
+        emptyText: "I don't have any memories saved about you yet.",
+        errorText: "Could not read your memories right now."
+      ) { try await APIClient.shared.toolGetMemories(limit: 15).resultText }
+    case .searchMemories:
+      let query = arg("query")
+      runToolAndSpeak(
+        callId: callId, name: name, detail: "q=\"\(query.prefix(60))\"",
+        emptyText: "I couldn't find anything about that.",
+        errorText: "Could not search your memories right now."
+      ) { try await APIClient.shared.toolSearchMemories(query: query, limit: 5).resultText }
+    case .searchConversations:
+      // Capped for voice: top 5, summaries only (no full transcripts).
+      let query = arg("query")
+      runToolAndSpeak(
+        callId: callId, name: name, detail: "q=\"\(query.prefix(60))\"",
+        emptyText: "I couldn't find a conversation about that.",
+        errorText: "Could not search your conversations right now."
+      ) {
+        try await APIClient.shared.toolSearchConversations(
+          query: query, limit: 5, includeTranscript: false
+        ).resultText
+      }
+    case .getConversations:
+      // Fast READ — most recent conversations, newest first (backend orders created_at DESC).
+      // Capped for voice: top 3, summaries only. This is the recency path; search_conversations
+      // is semantic and must NOT be used for "most recent".
+      runToolAndSpeak(
+        callId: callId, name: name,
+        emptyText: "I don't see any recent conversations.",
+        errorText: "Could not read your recent conversations right now."
+      ) {
+        try await APIClient.shared.toolGetConversations(
+          limit: 3, includeTranscript: false
+        ).resultText
+      }
+    case .getDailyRecap:
+      // Fast LOCAL read of the on-device activity DB — apps/minutes, conversations, tasks,
+      // focus, screen context. Reuses the SAME executor the desktop chat uses, so voice and
+      // chat answer "what did I do yesterday" from one code path.
+      let daysAgo = argInt("days_ago") ?? 1
+      runToolAndSpeak(
+        callId: callId, name: name, detail: "days_ago=\(daysAgo)",
+        emptyText: "I don't have any activity recorded for then.",
+        errorText: "Could not pull up your activity right now."
+      ) {
+        await ChatToolExecutor.execute(
+          ToolCall(name: "get_daily_recap", arguments: ["days_ago": daysAgo], thoughtSignature: nil))
+      }
+    case .getActionItems:
+      // Backend READ of the full task list with filters (completed / due-date range) — the
+      // capable sibling of the local get_tasks. Same APIClient path the chat agent uses.
+      let completed = arguments["completed"] as? Bool
+      let dueStart = arguments["due_start_date"] as? String
+      let dueEnd = arguments["due_end_date"] as? String
+      runToolAndSpeak(
+        callId: callId, name: name, detail: completed.map { "completed=\($0)" } ?? "",
+        emptyText: "I couldn't find any matching tasks.",
+        errorText: "Could not read your tasks right now."
+      ) {
+        try await APIClient.shared.toolGetActionItems(
+          limit: 25, completed: completed, dueStartDate: dueStart, dueEndDate: dueEnd
+        ).resultText
+      }
+    case .searchScreenHistory:
+      // Fast LOCAL semantic search over screen history (same executor as chat).
+      let query = arg("query")
+      var toolArgs: [String: Any] = ["query": query]
+      if let days = argInt("days") { toolArgs["days"] = days }
+      runToolAndSpeak(
+        callId: callId, name: name, detail: "q=\"\(query.prefix(60))\"",
+        emptyText: "I couldn't find anything on your screen about that.",
+        errorText: "Could not search your screen history right now."
+      ) {
+        await ChatToolExecutor.execute(
+          ToolCall(name: "search_screen_history", arguments: toolArgs, thoughtSignature: nil))
+      }
+    case .createActionItem:
+      let description = (arguments["description"] as? String)?
+        .trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
+      let dueAt = arguments["due_at"] as? String
+      guard !description.isEmpty else {
+        session?.sendToolResult(
+          callId: callId, name: name, output: "No task description was given.")
+        return
+      }
+      runToolAndSpeak(
+        callId: callId, name: name, detail: "\"\(description.prefix(60))\"",
+        emptyText: "Task created.",
+        errorText: "Could not create the task right now."
+      ) {
+        try await APIClient.shared.toolCreateActionItem(
+          description: description, dueAt: dueAt
+        ).resultText
+      }
+    case .updateActionItem:
+      guard let id = (arguments["id"] as? String), !id.isEmpty else {
+        session?.sendToolResult(
+          callId: callId, name: name,
+          output: "Missing the task id — call get_tasks first to find it.")
+        return
+      }
+      let completed = arguments["completed"] as? Bool
+      let newDescription = arguments["description"] as? String
+      let dueAt = arguments["due_at"] as? String
+      runToolAndSpeak(
+        callId: callId, name: name, detail: "id=\(id.prefix(8))",
+        emptyText: "Task updated.",
+        errorText: "Could not update the task right now."
+      ) {
+        try await APIClient.shared.toolUpdateActionItem(
+          id: id, completed: completed, description: newDescription, dueAt: dueAt
+        ).resultText
+      }
     case .spawnAgent:
-      let brief = (arguments["brief"] as? String) ?? turnTranscript
+      let brief = arg("brief")
+      let title = (arguments["title"] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines)
       let model = ShortcutSettings.shared.selectedModel.isEmpty
         ? "claude-sonnet-4-6" : ShortcutSettings.shared.selectedModel
       // Non-blocking: spawn renders its own pill ("text bubble") and runs on its
       // own ChatProvider/AgentBridge. We don't await it on the voice loop.
       // fromVoice:false — the hub model speaks its own natural acknowledgment, so the pill
       // must NOT also speak its canned randomAck ("on it") or we double up.
-      let pill = AgentPillsManager.shared.spawnFromUserQuery(brief, model: model, fromVoice: false)
-      log("RealtimeHub[\(providerTag)]: tool spawn_agent → AgentBridge pill=\"\(pill.title)\" model=\(model)")
+      let pill = AgentPillsManager.shared.spawnFromUserQuery(
+        brief, model: model, fromVoice: false,
+        preFetchedTitle: (title?.isEmpty == false) ? title : nil)
+      log("RealtimeHub[\(providerTag)]: tool spawn_agent → AgentBridge pill=\"\(pill.title)\" model=\(model) titled=\(title?.isEmpty == false)")
       // Terse directive (not speakable content): the model already said its one-line ack
       // BEFORE calling, so it should NOT generate a slow second utterance after this.
       session?.sendToolResult(
@@ -412,51 +548,58 @@ final class RealtimeHubController: NSObject, RealtimeHubSessionDelegate {
     // land here.
     responding = false
     logError("RealtimeHub: session error — \(message)")
-    // The reply is dead — stop any buffered audio and drop the speaking state before
-    // collapsing (the drain callback won't fire for a torn-down engine).
+    // The reply is dead — stop any buffered audio before collapsing.
     pcmPlayer?.stop()
     if speech.isSpeaking { speech.stopSpeaking(at: .immediate) }
-    barState?.isVoiceSpeaking = false
-    barState?.voiceLevel = 0
     exitVoiceUI()
     let aliveFor = lastWarmAt.map { Date().timeIntervalSince($0) } ?? 0
     teardownSession()
     // Re-warm so the NEXT PTT uses the hub, not the STT cascade. Gemini idle-closes
     // the socket (~2.5 min, close 1008) even before the first turn; managed users have
     // no BYOK key, so once `session` is nil `isActive` is false and PTT silently falls
-    // back to omni STT. So gate on isEnabled (NOT isActive, which needs a live session).
+    // back to omni STT. So always try to re-warm (the hub is the default voice path).
     // A socket that survived past the idle window was a normal idle-close → reset the
     // strike budget and keep re-warming forever; one that died fast is likely a config/
     // auth failure → let the strikes cap stop the churn.
     if aliveFor > 60 { hubReconnectStrikes = 0 }
-    guard RealtimeHubSettings.shared.isEnabled, !reconnectPending, hubReconnectStrikes < 5 else { return }
+    guard !reconnectPending, hubReconnectStrikes < Self.maxReconnectStrikes else { return }
     hubReconnectStrikes += 1
     reconnectPending = true
     DispatchQueue.main.asyncAfter(deadline: .now() + 1.5) { [weak self] in
       guard let self else { return }
       self.reconnectPending = false
-      if RealtimeHubSettings.shared.isEnabled, self.session == nil { self.ensureWarm() }
+      if self.session == nil { self.ensureWarm() }
     }
   }
 
   /// Return the floating bar from its PTT voice state to compact after a hub turn.
-  /// Leaves `isVoiceSpeaking` alone — the turn can finish generating while the buffered
-  /// reply is still playing; the player's drain callback drops speaking when it ends. The
-  /// window observes these flags and collapses itself once `isVoiceActive` goes false.
   private func exitVoiceUI() {
     guard let barState else { return }
+    // Capture before clearing: a mid-turn error or silent-tap cancel clears the
+    // listening flag here, so PushToTalkManager.updateBarState() (which resizes only
+    // on a wasListening→false transition) would see no change and leave the bar wide.
+    let wasExpandedForVoice = barState.isVoiceListening
     barState.voiceTranscript = ""
-    barState.isVoiceThinking = false
     barState.isVoiceListening = false
     barState.isVoiceLocked = false
     barState.isVoiceFollowUp = false
+    // Collapse the bar ourselves in that case — guarded so we never shrink the bar out
+    // from under an open conversation, response, notification, hover, or onboarding.
+    guard wasExpandedForVoice,
+      !barState.showingAIConversation, !barState.showingAIResponse,
+      barState.currentNotification == nil, !barState.isHoveringBar,
+      UserDefaults.standard.bool(forKey: "hasCompletedOnboarding")
+    else { return }
+    FloatingControlBarManager.shared.resizeForPTT(expanded: false)
   }
 
   // MARK: - Tools
 
   /// ask_higher_model — reuse the EXISTING prompt-cached /v2/chat/completions
   /// (no new backend route). Returns the assistant text for the model to speak.
-  private func escalateToHigherModel(_ query: String) async -> String {
+  private func escalateToHigherModel(_ query: String, context: String, aboutUser: String)
+    async -> String
+  {
     let baseURL = await APIClient.shared.rustBackendURL
     guard !baseURL.isEmpty else { return "I couldn't reach the model right now." }
     let normalized = baseURL.hasSuffix("/") ? baseURL : baseURL + "/"
@@ -472,18 +615,8 @@ final class RealtimeHubController: NSObject, RealtimeHubSessionDelegate {
     } catch {
       return "I couldn't authenticate to the model."
     }
-    let body: [String: Any] = [
-      "model": "claude-sonnet-4-6",
-      "max_tokens": 1024,
-      "messages": [
-        [
-          "role": "user",
-          "content":
-            "Answer concisely for a spoken reply (a few sentences max):\n\n\(query)",
-        ]
-      ],
-      "stream": false,
-    ]
+    let body = RealtimeHubTools.escalationBody(
+      query: query, context: context, aboutUser: aboutUser)
     let t0 = Date()
     do {
       request.httpBody = try JSONSerialization.data(withJSONObject: body)
@@ -523,17 +656,9 @@ final class RealtimeHubController: NSObject, RealtimeHubSessionDelegate {
     utterance.voice =
       AVSpeechSynthesisVoice(language: AVSpeechSynthesisVoice.currentLanguageCode())
       ?? AVSpeechSynthesisVoice(language: "en-US")
-    barState?.isVoiceThinking = false
-    barState?.isVoiceSpeaking = true
     speech.speak(utterance)
   }
 
-  /// Drop the speaking state once the AVSpeech fallback stops talking.
-  private func finishedSpeaking() {
-    barState?.isVoiceSpeaking = false
-    barState?.voiceLevel = 0
-  }
-
   /// Local synthetic mouse click (point_click tool).
   @discardableResult
   static func click(at point: CGPoint) -> Bool {
@@ -549,19 +674,3 @@ final class RealtimeHubController: NSObject, RealtimeHubSessionDelegate {
     return true
   }
 }
-
-// MARK: - AVSpeech fallback completion
-
-extension RealtimeHubController: AVSpeechSynthesizerDelegate {
-  nonisolated func speechSynthesizer(
-    _ synthesizer: AVSpeechSynthesizer, didFinish utterance: AVSpeechUtterance
-  ) {
-    Task { @MainActor [weak self] in self?.finishedSpeaking() }
-  }
-
-  nonisolated func speechSynthesizer(
-    _ synthesizer: AVSpeechSynthesizer, didCancel utterance: AVSpeechUtterance
-  ) {
-    Task { @MainActor [weak self] in self?.finishedSpeaking() }
-  }
-}
diff --git a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubSession.swift b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubSession.swift
index 7dc991138c0..86fe07fdc0b 100644
--- a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubSession.swift
+++ b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubSession.swift
@@ -1,12 +1,12 @@
 import Foundation
 import Network
 
-// MARK: - Realtime Hub Session (Phase 1, CLIENT-DIRECT)
+// MARK: - Realtime Hub Session
 //
-// One persistent WebSocket to a realtime provider, opened with the user's own
-// BYOK key (dev/test only — gated by RealtimeHubSettings.canConnect). The model
-// is the hub: it does in-session STT + reasoning + routing (via tool calls) and
-// speaks the answer.
+// One persistent WebSocket to a realtime provider, opened either with the user's
+// own BYOK key (client-direct, gated by RealtimeHubSettings.canConnect) or with a
+// server-minted ephemeral token (managed users). The model is the hub: it does
+// in-session STT + reasoning + routing (via tool calls) and speaks the answer.
 //
 // Two providers, normalized to ONE internal stream surface
 // (RealtimeHubSessionDelegate):
@@ -62,6 +62,7 @@ enum HubAuth {
 final class RealtimeHubSession: NSObject {
   private let provider: RealtimeHubProvider
   private let auth: HubAuth
+  private let instructions: String
   private weak var delegate: RealtimeHubSessionDelegate?
 
   /// Mic PCM input rate per provider (Gemini 16k native, OpenAI GA needs 24k).
@@ -118,9 +119,10 @@ final class RealtimeHubSession: NSObject {
   /// clear which model produced which event.
   private var tag: String { "RealtimeHub[\(provider == .openai ? "openai" : "gemini"):\(provider.modelID)]" }
 
-  init(provider: RealtimeHubProvider, auth: HubAuth, delegate: RealtimeHubSessionDelegate) {
+  init(provider: RealtimeHubProvider, auth: HubAuth, instructions: String, delegate: RealtimeHubSessionDelegate) {
     self.provider = provider
     self.auth = auth
+    self.instructions = instructions
     self.delegate = delegate
     super.init()
   }
@@ -402,7 +404,7 @@ final class RealtimeHubSession: NSObject {
         "type": "session.update",
         "session": [
           "type": "realtime",
-          "instructions": RealtimeHubTools.systemInstruction,
+          "instructions": instructions,
           "output_modalities": ["audio"],
           "audio": [
             "input": [
@@ -431,7 +433,7 @@ final class RealtimeHubSession: NSObject {
             "responseModalities": ["AUDIO"], "temperature": 0.3,
             "mediaResolution": "MEDIA_RESOLUTION_HIGH",
           ],
-          "systemInstruction": ["parts": [["text": RealtimeHubTools.systemInstruction]]],
+          "systemInstruction": ["parts": [["text": instructions]]],
           "tools": [["functionDeclarations": RealtimeHubTools.geminiFunctionDeclarations]],
           "inputAudioTranscription": [:],
           "outputAudioTranscription": [:],
diff --git a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubSettings.swift b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubSettings.swift
index 4f4b5952e44..3ae0e0ec18e 100644
--- a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubSettings.swift
+++ b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubSettings.swift
@@ -1,18 +1,20 @@
 import Foundation
 
-// MARK: - Realtime Hub (Phase 1)
+// MARK: - Realtime Hub
 //
 // "Realtime-as-hub": instead of the cascade (STT → router → Claude → TTS), one
 // realtime model is the single hub. It does in-session STT, reasoning, routing
 // (as tool choice), and speaks the answer. Its tools call the EXISTING backend
 // endpoints / app code — no new backend routes.
 //
-// Phase 1 is CLIENT-DIRECT + dev/test only: the realtime WS connects straight to
-// the provider with the user's own BYOK key (see APIKeyService). It is gated so
-// it never runs for managed (non-BYOK) users. Phase 2 will replace the BYOK key
-// with a server-minted ephemeral token to make it shippable.
+// The hub is the default voice path — there is no opt-in toggle. Every PTT turn
+// routes through it whenever it can connect: BYOK users connect client-direct with
+// their own key (see APIKeyService); managed users connect with a server-minted
+// ephemeral token. When neither is available (no key, mint fails / not entitled) the
+// turn falls back to the legacy STT cascade. The provider follows the user's "Voice
+// Model" choice in Advanced settings (RealtimeOmniSettings) — no separate picker.
 
-enum RealtimeHubProvider: String, CaseIterable, Sendable {
+enum RealtimeHubProvider: String, Sendable {
   case openai
   case gemini
 
@@ -23,13 +25,6 @@ enum RealtimeHubProvider: String, CaseIterable, Sendable {
     }
   }
 
-  var subtitle: String {
-    switch self {
-    case .openai: return "gpt-realtime-2 · native spoken audio"
-    case .gemini: return "gemini native-audio Live · spoken audio + tools"
-    }
-  }
-
   /// Concrete model identifier sent to the provider.
   var modelID: String {
     switch self {
@@ -58,48 +53,22 @@ enum RealtimeHubProvider: String, CaseIterable, Sendable {
 final class RealtimeHubSettings {
   static let shared = RealtimeHubSettings()
 
-  private let enabledKey = "realtimeHubEnabled"
-  private let providerKey = "realtimeHubProvider"
-
-  private init() {
-    UserDefaults.standard.register(defaults: [
-      enabledKey: false,
-      providerKey: RealtimeHubProvider.openai.rawValue,
-    ])
-  }
-
-  /// Master switch. When off, the floating bar uses the legacy STT → router →
-  /// Claude → TTS cascade. Ships behind this flag.
-  var isEnabled: Bool {
-    get { UserDefaults.standard.bool(forKey: enabledKey) }
-    set {
-      UserDefaults.standard.set(newValue, forKey: enabledKey)
-      NotificationCenter.default.post(name: .realtimeHubSettingsDidChange, object: nil)
-    }
-  }
+  private init() {}
 
+  /// The hub provider follows the user's "Voice Model" choice in Advanced settings —
+  /// there is no separate hub picker. The two map 1:1 (same underlying models), and
+  /// `.auto` is already resolved to a concrete provider by `effectiveProvider`.
   var provider: RealtimeHubProvider {
-    get {
-      let raw = UserDefaults.standard.string(forKey: providerKey)
-      return raw.flatMap(RealtimeHubProvider.init(rawValue:)) ?? .openai
-    }
-    set {
-      UserDefaults.standard.set(newValue.rawValue, forKey: providerKey)
-      NotificationCenter.default.post(name: .realtimeHubSettingsDidChange, object: nil)
+    switch RealtimeOmniSettings.shared.effectiveProvider {
+    case .gptRealtime2: return .openai
+    case .geminiFlashLive, .auto: return .gemini
     }
   }
 
-  /// The hub may only run client-direct when the user has supplied the selected
-  /// provider's own key (BYOK / dev key). This is the managed-user gate: managed
-  /// users have no BYOK key, so the hub stays off and the cascade is used.
+  /// True when the hub can connect client-direct with the user's own provider key
+  /// (BYOK / dev key). Managed users without a key connect via a minted ephemeral
+  /// token instead (see RealtimeHubController.ensureWarm); both reach the hub.
   var canConnect: Bool {
     APIKeyService.byokKey(provider.byokProvider) != nil
   }
-
-  /// True when the hub should drive this PTT turn (enabled + a usable key).
-  var isActive: Bool { isEnabled && canConnect }
-}
-
-extension Notification.Name {
-  static let realtimeHubSettingsDidChange = Notification.Name("realtimeHubSettingsDidChange")
 }
diff --git a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTestHarness.swift b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTestHarness.swift
index 8585d3033b3..46384dd9caf 100644
--- a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTestHarness.swift
+++ b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTestHarness.swift
@@ -44,7 +44,10 @@ final class RealtimeHubTestHarness: NSObject, RealtimeHubSessionDelegate {
   }
 
   func run(timeoutSeconds: Double) async -> [String: String] {
-    let s = RealtimeHubSession(provider: provider, auth: auth, delegate: self)
+    let s = RealtimeHubSession(
+      provider: provider, auth: auth,
+      instructions: RealtimeHubTools.systemInstruction(aboutUser: ""),
+      delegate: self)
     session = s
     let rate = s.requiredInputSampleRate
     let audio = rate == 16000 ? pcm16k : PushToTalkManager.resamplePCM16(pcm16k, from: 16000, to: rate)
@@ -118,7 +121,16 @@ final class RealtimeHubTestHarness: NSObject, RealtimeHubSessionDelegate {
     let stub: String
     switch HubTool(rawValue: name) {
     case .askHigherModel: stub = "Paris is the capital of France."
-    case .getTasks: stub = "Due today (1):\n- Example task"
+    case .getTasks: stub = "Due today (1):\n- Example task [id:task_123]"
+    case .getMemories: stub = "You live in San Francisco and prefer concise answers."
+    case .searchMemories: stub = "Your dog's name is Rex."
+    case .searchConversations: stub = "On Monday you discussed the launch timeline."
+    case .getConversations: stub = "Most recent: today, 'Standup notes'. Before that: yesterday, 'Design review'."
+    case .getActionItems: stub = "Open: Buy milk (due tomorrow). Completed: Ship the PR."
+    case .getDailyRecap: stub = "Yesterday: 3 hrs in Xcode, 1 hr in Safari; 2 conversations; 1 task created."
+    case .searchScreenHistory: stub = "Found it: yesterday afternoon you were reading the launch doc in Safari."
+    case .createActionItem: stub = "Created task: Example task."
+    case .updateActionItem: stub = "Updated the task."
     case .spawnAgent: stub = "Started a background agent."
     case .screenshot: stub = "Screen captured."
     case .pointClick: stub = "Clicked."
diff --git a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTools.swift b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTools.swift
index dc526d867cf..98850b0be19 100644
--- a/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTools.swift
+++ b/desktop/macos/Desktop/Sources/FloatingControlBar/RealtimeHubTools.swift
@@ -3,9 +3,12 @@ import Foundation
 // MARK: - Realtime Hub tool surface
 //
 // The realtime model IS the router: instead of a separate Haiku classify() call,
-// the model decides what to do by choosing a tool. The same four tools are
+// the model decides what to do by choosing a tool. The same tool surface is
 // declared to both providers (OpenAI Realtime `tools`, Gemini `functionDeclarations`);
 // `RealtimeHubController` executes them by calling EXISTING app code / endpoints.
+// Reads (get_tasks, get_memories, search_memories, search_conversations) and simple
+// writes (create_action_item, update_action_item) run synchronously and speak their
+// result; multi-step / other-app work still goes to spawn_agent.
 
 enum HubTool: String {
   /// Escalate a hard / knowledge-heavy question to the smarter Claude model via
@@ -15,8 +18,34 @@ enum HubTool: String {
   /// Non-blocking: the model acknowledges and moves on.
   case spawnAgent = "spawn_agent"
   /// Read the user's tasks locally (TasksStore) and return them inline to speak — a
-  /// fast synchronous READ, NOT a background agent.
+  /// fast synchronous READ, NOT a background agent. Overdue + due-today only.
   case getTasks = "get_tasks"
+  /// Read the user's full action-item list from the backend with filters (completed,
+  /// due-date range). Fast READ — use for completed tasks, date ranges, or the whole list
+  /// (get_tasks only covers overdue + due-today).
+  case getActionItems = "get_action_items"
+  /// Read what Omi knows about the user (memories / facts) and return it inline to speak.
+  /// Fast synchronous READ — the answer to "who am I" / "what do you know about me".
+  case getMemories = "get_memories"
+  /// Semantically search the user's memories / facts for something specific. Fast READ.
+  case searchMemories = "search_memories"
+  /// Semantically search the user's past conversations (titles + summaries, no transcripts).
+  /// Fast synchronous READ.
+  case searchConversations = "search_conversations"
+  /// List the user's MOST RECENT conversations, newest first (titles + summaries, no
+  /// transcripts). Fast READ — the answer to "most recent / latest / last conversation".
+  case getConversations = "get_conversations"
+  /// Formatted recap of what the user actually DID on their Mac — apps used (with minutes),
+  /// conversations, tasks, focus, screen activity. Fast LOCAL READ — the answer to "what did I
+  /// do yesterday / today", "which apps did I use the most", "how did I spend my time".
+  case getDailyRecap = "get_daily_recap"
+  /// Semantically search the user's on-screen history (what they saw / read / worked on).
+  /// Fast LOCAL READ — "when was I looking at X", "find where I read about Y".
+  case searchScreenHistory = "search_screen_history"
+  /// Create a new task / to-do / reminder for the user. Fast synchronous WRITE.
+  case createActionItem = "create_action_item"
+  /// Update an existing task (mark done, change text/due). Needs the task id from get_tasks.
+  case updateActionItem = "update_action_item"
   /// Capture the user's screen so the model can see what they're looking at.
   case screenshot = "screenshot"
   /// Click at on-screen coordinates (local).
@@ -25,59 +54,109 @@ enum HubTool: String {
 
 enum RealtimeHubTools {
 
-  static let systemInstruction = """
+  static func systemInstruction(aboutUser: String) -> String {
+    """
     You are Omi, a fast spoken-voice assistant on the user's Mac and the single hub \
     for their voice requests. You hear the user's microphone; reply by speaking, \
     conversationally. Default to one or two sentences, but when the user asks for \
     something longer or creative (a story, a detailed explanation, brainstorming), \
     give the full answer yourself — don't shorten it and don't offload it. \
-    Always reply in English.
+    Reply in the same language the user is speaking.
+
+    \(aboutUser)
 
-    IMPORTANT: You have NO direct access to the user's personal data or their apps. \
-    You cannot see their tasks, to-dos, calendar, notes, emails, messages, past \
-    conversations, memories, files, or reminders on your own. The spawn_agent tool \
-    CAN — it hands the request to a background agent that has all of those tools and \
-    can act in the user's apps and browser.
+    IMPORTANT: You CAN read the user's Omi data directly with fast tools — their tasks \
+    (get_tasks), what Omi knows about them / their memories & facts (get_memories, \
+    search_memories), their past conversations (search_conversations), what they DID on \
+    their Mac (get_daily_recap), and their on-screen history (search_screen_history) — and \
+    you can make simple task changes (create_action_item, update_action_item). For anything in \
+    their OTHER apps (calendar, notes, emails, messages, files, reminders, browser) or any \
+    multi-step "do X for me" work, use spawn_agent — it hands the request to a background \
+    agent that has those tools and can act in the user's apps.
 
-    Using tools: the moment a request needs a tool, briefly acknowledge it OUT LOUD in your \
-    own natural, varied words (keep it short, and don't include any answer or data you don't \
-    have yet), then immediately call the tool. For a data tool (get_tasks, ask_higher_model), \
-    speak its result after it returns. NEVER put an answer — real or guessed — in that \
-    acknowledgment, NEVER skip the tool call, and never read tool JSON aloud. You cannot see \
-    tasks, data, or the screen without calling a tool.
+    Using tools: when a request needs a tool, ALWAYS give a short spoken heads-up first so the \
+    user knows you're on it and that it won't be instant — then call the tool and speak the \
+    result when it returns. Never go silent during a tool call; the user can't see what you're \
+    doing, so a quiet gap feels broken. The catch is variety: that heads-up must be SPECIFIC to \
+    what they actually asked and DIFFERENT every time. Name the real thing you're fetching — \
+    "Pulling up yesterday's activity…", "Scanning your task list…", "Digging through your notes \
+    on the launch…", "Checking your memories for that…", "Getting the latest on that, one \
+    sec…". The thing to avoid is repetition: do NOT reach for the same generic opener ("let me \
+    check", "let me look that up") turn after turn — it's what makes you sound robotic. Keep it \
+    to a few words, vary the wording each turn, and don't include any answer or data you don't \
+    have yet. For a slower step (ask_higher_model, spawn_agent) it's fine to signal it'll take a \
+    moment. NEVER speak an answer — real or guessed — before the tool returns, NEVER skip the \
+    tool call, and never read tool JSON or ids aloud. You cannot see the user's data or screen \
+    without calling a tool.
 
     Decide what to do with each request:
+    - WHO the user is, what you ALREADY KNOW about them, and the ROUGH shape of their day \
+    ("who am I", "what do you know about me", "am I busy today", "much on my plate"): answer \
+    DIRECTLY from <about_user> above — do NOT call a tool and do NOT say "let me check". Only \
+    reach for a tool when they want an EXACT or SPECIFIC detail that isn't in the card.
     - The user's TASKS / to-dos / what's due — a READ ("what are my tasks", "what's due \
     today", "what's on my list", "do I have anything today"): you MUST call get_tasks and \
-    speak ONLY what it returns. You CANNOT see their tasks any other way — never guess, \
-    summarize from memory, or make up tasks. Always call get_tasks; do NOT use an agent.
-    - DOING something for the user, or their OTHER personal data (calendar, notes, emails, \
-    messages, conversations, memories, files, reminders) — create/send/open/edit/search/ \
-    schedule/automate/"do X for me"/any multi-step work: you CANNOT do these yourself. You \
-    MUST actually EMIT the spawn_agent function call (with a clear, self-contained `brief`). \
-    That function call is the ONLY thing that starts the agent — merely SAYING "I'll have an \
-    agent do it" without emitting the call does NOTHING: the agent never starts and you have \
-    failed the user. So always emit the spawn_agent call. You may add one short natural \
-    sentence as you call it, but never instead of it. Do NOT ask clarifying questions before \
-    spawning — spawn with what you have. Do NOT wait for it, narrate its steps, refuse, or \
-    claim you can't.
+    speak ONLY what it returns (the card's counts are a rough snapshot, not the list). Never \
+    guess or make up tasks. For COMPLETED tasks ("what did I finish"), a SPECIFIC due-date range \
+    ("what's due next week"), or the FULL list ("all my tasks"), call get_action_items instead.
+    - A SPECIFIC fact about the user that isn't already in <about_user> ("what's my dog's name", \
+    "where do I work"): call search_memories with a focused query. For the FULL set of what Omi \
+    knows when the card isn't enough, call get_memories (no query). NEVER answer "I don't know" \
+    or guess about the user without checking first.
+    - The user's MOST RECENT / latest / last conversation ("what was my most recent \
+    conversation", "what did we just talk about", "my recent conversations"): call \
+    get_conversations (newest first) — NOT search_conversations, which is semantic and does \
+    NOT sort by time. Speak the latest one.
+    - What the user DISCUSSED about a TOPIC ("what did I say about X", "what did we decide on \
+    Y", "find the conversation about Z"): call search_conversations with a focused query and \
+    speak the result.
+    - The user's own ACTIVITY / what they DID / how they spent their time ("what did I do \
+    yesterday", "what did I do today", "which apps did I use the most", "how did I spend my \
+    morning", "summarize my day"): you MUST call get_daily_recap (days_ago: 0 = today, 1 = \
+    yesterday) and speak a SHORT spoken summary of the highlights it returns — top apps, key \
+    conversations, tasks. Do NOT use search_conversations or spawn_agent for this, and never \
+    guess; this is exactly what get_daily_recap is for.
+    - What the user SAW / read / worked on ON SCREEN ("when was I looking at X", "find where I \
+    read about Y", "what was I doing in app Z"): call search_screen_history with a focused \
+    query and speak the result.
+    - ADVICE about the user's OWN productivity / workflow / habits / focus ("how can I improve \
+    my workflow", "how can I be more productive", "what should I change", "how am I doing", \
+    "where am I wasting time"): do NOT answer generically. FIRST call get_daily_recap (days_ago: \
+    1 for today, 7 for the week) — and get_action_items when tasks matter — then base EVERY \
+    suggestion on what they ACTUALLY did: their apps, distracted vs focused sessions, and \
+    overdue / duplicate tasks. Generic advice with no tool call is a failure here.
+    - ADD a task / to-do / reminder ("remind me to…", "add … to my list", "I need to…"): \
+    call create_action_item with a clear `description` (and `due_at` if a time was given), \
+    then confirm out loud. CHANGE an existing task (mark done, edit, reschedule): first \
+    call get_tasks to get the matching task's id, then call update_action_item with that id.
+    - DOING something for the user in their OTHER apps (calendar, notes, emails, messages, \
+    files, browser) or any multi-step work — create/send/open/edit/search/schedule/automate/ \
+    "do X for me": you CANNOT do these yourself. You MUST actually EMIT the spawn_agent \
+    function call (with a clear, self-contained `brief` and a short `title`). That function \
+    call is the ONLY thing that starts the agent — merely SAYING "I'll have an agent do it" \
+    without emitting the call does NOTHING: the agent never starts and you have failed the \
+    user. So always emit the spawn_agent call. You may add one short natural sentence as you \
+    call it, but never instead of it. Do NOT ask clarifying questions before spawning — spawn \
+    with what you have. Do NOT wait for it, narrate its steps, refuse, or claim you can't.
     - Everything else — general questions, facts, chit-chat, explanations, advice, jokes, \
     and creative or long-form requests (stories, brainstorming, drafts): ANSWER YOURSELF. \
     You are fully capable; do it directly, even when the ask is long or open-ended. Do \
     NOT escalate just because a request seems long or hard.
-    - Call ask_higher_model in ONLY two cases: (1) the user is unhappy with your previous \
-    answer — they push back, rephrase, say you're wrong, or ask for a better/deeper/more \
-    thorough answer; or (2) you genuinely need precise, up-to-date facts (current events, \
-    specific numbers) you don't reliably know. Pass a clear `query`, then speak the result.
+    - Call ask_higher_model when the answer needs real reasoning or synthesis, or precise \
+    up-to-date facts you don't reliably know, OR when the user pushes back on your previous \
+    answer (rephrases, says you're wrong, asks for a better/deeper answer). Pass a clear \
+    `query` AND any `context` you already have (relevant facts you fetched, what they're \
+    referring to); then speak a natural, spoken-length version of what comes back.
     - When you need to see what's on screen, call screenshot first. Use point_click only \
     when the user clearly asks you to click something.
 
     Keep latency low: prefer answering directly when you can.
     """
+  }
 
-  /// OpenAI Realtime GA `session.tools` entries.
-  static var openAITools: [[String: Any]] {
-    [
+  /// OpenAI Realtime GA `session.tools` entries. Static `let` — built once, not rebuilt on
+  /// every session (re)connect that reads it.
+  static let openAITools: [[String: Any]] = [
       [
         "type": "function",
         "name": HubTool.askHigherModel.rawValue,
@@ -89,7 +168,14 @@ enum RealtimeHubTools {
         "parameters": [
           "type": "object",
           "properties": [
-            "query": ["type": "string", "description": "The full question to escalate."]
+            "query": ["type": "string", "description": "The full question to escalate."],
+            "context": [
+              "type": "string",
+              "description":
+                "Relevant context you already have that helps answer well — facts you fetched, "
+                + "what the user is referring to, or the previous answer they pushed back on. "
+                + "Include only what's relevant; omit if there's nothing useful.",
+            ],
           ],
           "required": ["query"],
         ],
@@ -103,6 +189,152 @@ enum RealtimeHubTools {
           + "my list'. Do NOT use spawn_agent for reading tasks.",
         "parameters": ["type": "object", "properties": [:]],
       ],
+      [
+        "type": "function",
+        "name": HubTool.getMemories.rawValue,
+        "description":
+          "Read what Omi knows about the user — their memories and facts (preferences, "
+          + "background, people, habits). Fast synchronous read with NO query. Use this for "
+          + "'who am I', 'what do you know about me', 'what are my preferences'. Speak what it returns.",
+        "parameters": ["type": "object", "properties": [:]],
+      ],
+      [
+        "type": "function",
+        "name": HubTool.searchMemories.rawValue,
+        "description":
+          "Search the user's memories / facts for a SPECIFIC thing ('what's my dog's name', "
+          + "'where do I work', 'what's my partner's name'). Fast synchronous read. Speak the result.",
+        "parameters": [
+          "type": "object",
+          "properties": [
+            "query": ["type": "string", "description": "What to look up about the user."]
+          ],
+          "required": ["query"],
+        ],
+      ],
+      [
+        "type": "function",
+        "name": HubTool.searchConversations.rawValue,
+        "description":
+          "Search the user's past conversations for what they discussed ('what did I say about X', "
+          + "'what did we decide', 'summarize my last meeting'). Returns titles + summaries only "
+          + "(no full transcripts). Fast synchronous read. Speak the result.",
+        "parameters": [
+          "type": "object",
+          "properties": [
+            "query": ["type": "string", "description": "What topic / conversation to find."]
+          ],
+          "required": ["query"],
+        ],
+      ],
+      [
+        "type": "function",
+        "name": HubTool.getConversations.rawValue,
+        "description":
+          "List the user's MOST RECENT conversations, newest first (titles + summaries, no full "
+          + "transcripts). Use this — NOT search_conversations — for 'what was my most recent / "
+          + "latest / last conversation', 'what did we just talk about', or 'my recent conversations'. "
+          + "search_conversations is semantic and does NOT order by time, so it's wrong for 'recent'. "
+          + "Fast synchronous read. Speak the result.",
+        "parameters": ["type": "object", "properties": [:]],
+      ],
+      [
+        "type": "function",
+        "name": HubTool.getDailyRecap.rawValue,
+        "description":
+          "Get a recap of what the user actually DID on their Mac — apps used (with minutes), "
+          + "conversations, tasks, focus sessions, and screen activity — for a day. THIS is the tool "
+          + "for 'what did I do yesterday', 'what did I do today', 'which apps did I use the most', "
+          + "'how did I spend my time'. Do NOT use search_conversations or spawn_agent for these. "
+          + "Fast synchronous read — speak a short summary of what it returns.",
+        "parameters": [
+          "type": "object",
+          "properties": [
+            "days_ago": [
+              "type": "number",
+              "description": "0 = today, 1 = yesterday (default), 7 = the past week.",
+            ]
+          ],
+        ],
+      ],
+      [
+        "type": "function",
+        "name": HubTool.searchScreenHistory.rawValue,
+        "description":
+          "Search the user's on-screen history — what they saw, read, or worked on — by meaning. "
+          + "Use for 'when was I looking at X', 'find where I read about Y', 'what was I doing in "
+          + "app Z'. Returns matching moments with the app and context. Fast synchronous read. "
+          + "Speak the result.",
+        "parameters": [
+          "type": "object",
+          "properties": [
+            "query": [
+              "type": "string", "description": "What the user was looking at / reading / doing.",
+            ],
+            "days": ["type": "number", "description": "How many days back to search; default 7."],
+          ],
+          "required": ["query"],
+        ],
+      ],
+      [
+        "type": "function",
+        "name": HubTool.getActionItems.rawValue,
+        "description":
+          "Read the user's tasks / to-dos from the backend, with optional filters. Use for "
+          + "COMPLETED tasks ('what did I finish'), a DATE RANGE ('what's due next week'), or the "
+          + "FULL list ('all my tasks') — for plain 'what's due today / overdue', prefer get_tasks. "
+          + "Fast synchronous read. Speak a short summary of what it returns.",
+        "parameters": [
+          "type": "object",
+          "properties": [
+            "completed": [
+              "type": "boolean",
+              "description": "true = only done tasks, false = only open tasks. Omit for both.",
+            ],
+            "due_start_date": [
+              "type": "string", "description": "Optional ISO-8601 start of the due-date range.",
+            ],
+            "due_end_date": [
+              "type": "string", "description": "Optional ISO-8601 end of the due-date range.",
+            ],
+          ],
+        ],
+      ],
+      [
+        "type": "function",
+        "name": HubTool.createActionItem.rawValue,
+        "description":
+          "Create a new task / to-do / reminder for the user ('remind me to…', 'add … to my "
+          + "list', 'I need to…'). Fast synchronous write. Confirm out loud after it returns.",
+        "parameters": [
+          "type": "object",
+          "properties": [
+            "description": ["type": "string", "description": "The task text."],
+            "due_at": [
+              "type": "string",
+              "description": "Optional ISO-8601 due date/time, only if the user gave one.",
+            ],
+          ],
+          "required": ["description"],
+        ],
+      ],
+      [
+        "type": "function",
+        "name": HubTool.updateActionItem.rawValue,
+        "description":
+          "Update an existing task: mark it done, edit its text, or reschedule it. You MUST first "
+          + "call get_tasks to get the matching task's id, then pass that id here. Fast synchronous write.",
+        "parameters": [
+          "type": "object",
+          "properties": [
+            "id": ["type": "string", "description": "The task id from get_tasks."],
+            "completed": ["type": "boolean", "description": "Set true to mark the task done."],
+            "description": ["type": "string", "description": "New task text, if changing it."],
+            "due_at": ["type": "string", "description": "New ISO-8601 due date/time, if rescheduling."],
+          ],
+          "required": ["id"],
+        ],
+      ],
       [
         "type": "function",
         "name": HubTool.spawnAgent.rawValue,
@@ -116,7 +348,13 @@ enum RealtimeHubTools {
           "properties": [
             "brief": [
               "type": "string", "description": "A clear, self-contained brief of the task.",
-            ]
+            ],
+            "title": [
+              "type": "string",
+              "description":
+                "A short Title Case label for the task pill (≤ ~5 words, no trailing "
+                + "punctuation), e.g. 'Draft Launch Email'.",
+            ],
           ],
           "required": ["brief"],
         ],
@@ -140,12 +378,11 @@ enum RealtimeHubTools {
           "required": ["x", "y"],
         ],
       ],
-    ]
-  }
+  ]
 
-  /// Gemini Live `setup.tools[0].functionDeclarations` entries (same surface).
-  static var geminiFunctionDeclarations: [[String: Any]] {
-    openAITools.map { tool in
+  /// Gemini Live `setup.tools[0].functionDeclarations` entries (same surface). Derived once
+  /// from `openAITools`.
+  static let geminiFunctionDeclarations: [[String: Any]] = openAITools.map { tool in
       // Gemini wants {name, description, parameters} without the OpenAI "type" wrapper.
       var decl: [String: Any] = [
         "name": tool["name"] as? String ?? "",
@@ -159,7 +396,6 @@ enum RealtimeHubTools {
       }
       return decl
     }
-  }
 
   /// Recursively uppercase every `type` value in a JSON-schema dict so it matches Gemini's
   /// Schema enum (object → OBJECT, string → STRING, …).
@@ -176,4 +412,33 @@ enum RealtimeHubTools {
     if let items = schema["items"] as? [String: Any] { out["items"] = upcasedSchemaTypes(items) }
     return out
   }
+
+  /// System prompt for an escalated (ask_higher_model) answer. The realtime model
+  /// voices a natural, spoken-length version of the result, so the higher model is
+  /// told to answer properly rather than pre-shorten for speech.
+  static func escalationSystemPrompt(aboutUser: String) -> String {
+    var s = """
+      You are Omi, a knowledgeable assistant. Answer the user's question accurately and \
+      usefully. A voice assistant will relay your answer aloud and adapt the phrasing for \
+      speech, so be clear and well-structured; you don't need to pre-shorten it.
+      """
+    if !aboutUser.isEmpty { s += "\n\n" + aboutUser }
+    return s
+  }
+
+  static func escalationBody(query: String, context: String, aboutUser: String) -> [String: Any] {
+    let trimmedContext = context.trimmingCharacters(in: .whitespacesAndNewlines)
+    let userContent =
+      trimmedContext.isEmpty ? query : query + "\n\nContext I already have:\n" + trimmedContext
+    let messages: [[String: String]] = [
+      ["role": "system", "content": escalationSystemPrompt(aboutUser: aboutUser)],
+      ["role": "user", "content": userContent],
+    ]
+    return [
+      "model": "claude-sonnet-4-6",
+      "max_tokens": 1024,
+      "messages": messages,
+      "stream": false,
+    ]
+  }
 }
diff --git a/desktop/macos/Desktop/Sources/FloatingControlBar/StreamingPCMPlayer.swift b/desktop/macos/Desktop/Sources/FloatingControlBar/StreamingPCMPlayer.swift
index 28f4ab4d60e..0836dea0aa2 100644
--- a/desktop/macos/Desktop/Sources/FloatingControlBar/StreamingPCMPlayer.swift
+++ b/desktop/macos/Desktop/Sources/FloatingControlBar/StreamingPCMPlayer.swift
@@ -14,27 +14,6 @@ final class StreamingPCMPlayer {
   private let format: AVAudioFormat
   private var configObserver: NSObjectProtocol?
 
-  /// Smoothed 0…1 output amplitude, delivered on the main thread (~40×/s) while the
-  /// engine runs. Driven by a tap on the mixer so it tracks what's *actually audible*,
-  /// not what's been buffered ahead. Used to make the speaking waveform audio-reactive.
-  var onLevel: ((Float) -> Void)?
-  /// Fires on the main thread when playback starts (false→true) and when the queue
-  /// fully drains (true→false). Lets the caller mark "speaking" precisely — including
-  /// the silent tail after the last chunk arrives but before it finishes playing.
-  var onPlayingChanged: ((Bool) -> Void)?
-
-  /// Outstanding scheduled buffers (incremented on enqueue, decremented when each
-  /// finishes). Guarded by `bufferLock` because completion handlers run off-main.
-  private var pendingBuffers = 0
-  private let bufferLock = NSLock()
-  private var isPlayingState = false
-  // Exponential moving average of the output RMS (smoothed so the waveform never jitters).
-  private var smoothedLevel: Float = 0
-  // Last value handed to `onLevel`, so we skip main-thread hops while the level is flat
-  // (e.g. the silent tail of a reply) instead of publishing the same number ~40×/s.
-  private var lastDispatchedLevel: Float = -1
-  private var levelTapInstalled = false
-
   init(sampleRate: Double = 24000) {
     // Float32 mono at the source rate; the mixer resamples to the device rate.
     format = AVAudioFormat(
@@ -55,8 +34,6 @@ final class StreamingPCMPlayer {
       log("StreamingPCMPlayer: audio config changed — rebuilding engine")
       self.player.stop()
       self.engine.stop()
-      // The rebuilt graph loses the old tap; let ensureRunning() reinstall it.
-      self.removeLevelTap()
       self.engine.disconnectNodeOutput(self.player)
       self.engine.connect(self.player, to: self.engine.mainMixerNode, format: self.format)
       self.ensureRunning()
@@ -69,42 +46,6 @@ final class StreamingPCMPlayer {
     }
   }
 
-  /// Tap the mixer output once the engine is live so `onLevel` reflects the audio the
-  /// user actually hears. Cheap: one RMS pass per ~1024-frame buffer, EMA-smoothed.
-  private func installLevelTapIfNeeded() {
-    guard !levelTapInstalled, engine.isRunning else { return }
-    levelTapInstalled = true
-    engine.mainMixerNode.installTap(onBus: 0, bufferSize: 1024, format: nil) {
-      [weak self] buffer, _ in
-      guard let self, self.onLevel != nil, let data = buffer.floatChannelData else { return }
-      let frames = Int(buffer.frameLength)
-      guard frames > 0 else { return }
-      let samples = data[0]
-      var sumSquares: Float = 0
-      for i in 0..<frames { sumSquares += samples[i] * samples[i] }
-      let rms = (sumSquares / Float(frames)).squareRoot()
-      // Normalize: speech RMS is small, so apply gain and clamp. Attack fast, release
-      // slow so the bars rise crisply with the voice but settle smoothly between words.
-      let target = min(1.0, rms * 3.2)
-      let alpha: Float = target > self.smoothedLevel ? 0.35 : 0.12
-      self.smoothedLevel += (target - self.smoothedLevel) * alpha
-      let out = self.smoothedLevel
-      // Only hop to main when the level actually moved — flat/silent stretches stay quiet.
-      guard abs(out - self.lastDispatchedLevel) > 0.01 else { return }
-      self.lastDispatchedLevel = out
-      DispatchQueue.main.async { self.onLevel?(out) }
-    }
-  }
-
-  /// Detach the level tap (call when playback stops; reinstalled on the next play).
-  private func removeLevelTap() {
-    guard levelTapInstalled else { return }
-    engine.mainMixerNode.removeTap(onBus: 0)
-    levelTapInstalled = false
-    smoothedLevel = 0
-    lastDispatchedLevel = -1
-  }
-
   /// Ensure the engine + player are actually running before scheduling. Checking
   /// the real `isRunning`/`isPlaying` state (not a one-shot flag) is what makes
   /// playback survive past the first turn: AVAudioEngine auto-suspends when idle
@@ -125,19 +66,6 @@ final class StreamingPCMPlayer {
     if !player.isPlaying {
       player.play()
     }
-    installLevelTapIfNeeded()
-  }
-
-  /// Adjust the outstanding-buffer count and emit `onPlayingChanged` on the edges.
-  private func adjustPending(by delta: Int) {
-    bufferLock.lock()
-    pendingBuffers = max(0, pendingBuffers + delta)
-    let nowPlaying = pendingBuffers > 0
-    let changed = nowPlaying != isPlayingState
-    if changed { isPlayingState = nowPlaying }
-    bufferLock.unlock()
-    guard changed else { return }
-    DispatchQueue.main.async { [weak self] in self?.onPlayingChanged?(nowPlaying) }
   }
 
   /// `data` = little-endian Int16 PCM, mono, at the configured sample rate.
@@ -156,22 +84,11 @@ final class StreamingPCMPlayer {
         channel[i] = max(-1.0, min(1.0, Float(src[i]) / 32768.0))
       }
     }
-    adjustPending(by: 1)
-    player.scheduleBuffer(buffer, completionHandler: { [weak self] in self?.adjustPending(by: -1) })
+    player.scheduleBuffer(buffer)
   }
 
   func stop() {
-    removeLevelTap()  // no playback → no reason to keep tapping (reinstalled on next play)
     player.stop()
     engine.stop()
-    bufferLock.lock()
-    pendingBuffers = 0
-    let wasPlaying = isPlayingState
-    isPlayingState = false
-    bufferLock.unlock()
-    smoothedLevel = 0
-    if wasPlaying {
-      DispatchQueue.main.async { [weak self] in self?.onPlayingChanged?(false) }
-    }
   }
 }
diff --git a/desktop/macos/Desktop/Sources/FloatingControlBar/VoiceActivityIndicator.swift b/desktop/macos/Desktop/Sources/FloatingControlBar/VoiceActivityIndicator.swift
deleted file mode 100644
index 46f0eb99883..00000000000
--- a/desktop/macos/Desktop/Sources/FloatingControlBar/VoiceActivityIndicator.swift
+++ /dev/null
@@ -1,199 +0,0 @@
-import SwiftUI
-
-/// The floating bar's single status element. One coherent shape that changes its
-/// motion law, color, and energy per state — never a hard icon swap — so the user
-/// always knows, at a glance and without labels, whether the assistant is:
-///
-///   • idle      — a calm, barely-breathing sliver (nearly still, muted)
-///   • listening — a red waveform reacting to "you" (red is reserved for recording)
-///   • thinking  — a cool blue→violet gradient sweeping on its own fixed clock; the
-///                 self-driven motion (no audio) reads as "working, wait" — critical
-///                 so a late reply never looks like "done / idle"
-///   • speaking  — a green waveform driven by the model's actual output amplitude
-///                 ("it's talking", clearly distinct from the red "you" waveform)
-///
-/// Performance: idle (the long-lived resting state) uses a single Core Animation
-/// property animation — no per-frame redraw. The active states use one
-/// `TimelineView(.animation)` + `Canvas` (a single GPU-friendly draw pass, no
-/// view-graph diffing per frame), and only run for the few seconds a turn is live.
-/// No blur/shadow/material (those force offscreen passes) — glow is faked with
-/// translucent gradient fills.
-struct VoiceActivityIndicator: View {
-    let activity: VoiceActivity
-    /// Smoothed 0…1 amplitude of the model's spoken reply (drives the speaking waveform).
-    var level: CGFloat = 0
-
-    var body: some View {
-        ZStack {
-            switch activity {
-            case .idle:
-                IdleBreath()
-            case .listening:
-                WaveformBars(palette: .listening, level: 0, reactive: false)
-            case .thinking:
-                ThinkingSweep()
-            case .speaking:
-                WaveformBars(palette: .speaking, level: level, reactive: true)
-            }
-        }
-        // Cross-fade + gentle scale between states so energy "ramps" rather than snaps.
-        .transition(.opacity.combined(with: .scale(scale: 0.7)))
-        .animation(.spring(response: 0.4, dampingFraction: 0.86), value: activity)
-    }
-}
-
-// MARK: - Idle
-
-/// A short muted capsule that breathes very slowly. Intentionally low-energy so the
-/// resting bar never pulls the eye. Pure Core Animation — no redraw loop.
-private struct IdleBreath: View {
-    @State private var breathing = false
-
-    var body: some View {
-        Capsule()
-            .fill(Color.white.opacity(breathing ? 0.55 : 0.26))
-            .frame(width: 26, height: 5)
-            .scaleEffect(x: 1, y: breathing ? 1.0 : 0.7, anchor: .center)
-            .onAppear {
-                withAnimation(.easeInOut(duration: 2.8).repeatForever(autoreverses: true)) {
-                    breathing = true
-                }
-            }
-    }
-}
-
-// MARK: - Thinking
-
-/// A cool blue→violet gradient that pans continuously across a capsule at a fixed,
-/// self-driven rate. The autonomous (non-audio) motion is the cue that the model is
-/// working — so a slow reply reads as "wait", never as "done".
-private struct ThinkingSweep: View {
-    // Hoisted: the colors are state-constant, so only the gradient positions change
-    // per frame — no point rebuilding these Gradient values 60–120×/s.
-    private static let sweepGradient = Gradient(colors: [
-        Color(red: 0.70, green: 0.49, blue: 1.0),  // violet
-        Color(red: 0.43, green: 0.55, blue: 1.0),  // blue
-        Color(red: 0.70, green: 0.49, blue: 1.0),
-        Color(red: 0.43, green: 0.55, blue: 1.0),
-        Color(red: 0.70, green: 0.49, blue: 1.0),
-    ])
-    private static let glowGradient = Gradient(colors: [
-        Color.white.opacity(0.45), Color.white.opacity(0),
-    ])
-
-    var body: some View {
-        TimelineView(.animation) { timeline in
-            Canvas { context, size in
-                let t = timeline.date.timeIntervalSinceReferenceDate
-                let rect = CGRect(origin: .zero, size: size)
-                let capsule = Capsule().path(in: rect)
-
-                // Dim base track so the capsule reads even at the low point of the sweep.
-                context.fill(capsule, with: .color(.white.opacity(0.10)))
-
-                context.drawLayer { layer in
-                    layer.clip(to: capsule)
-
-                    // Pan a symmetric violet→blue→violet gradient horizontally. Symmetric
-                    // stops + a span twice the width mean the loop has no visible seam.
-                    let period = 2.2  // seconds per full pan
-                    let phase = (t.truncatingRemainder(dividingBy: period)) / period
-                    let span = size.width * 2
-                    let shift = CGFloat(phase) * span
-                    layer.fill(
-                        Rectangle().path(in: rect),
-                        with: .linearGradient(
-                            Self.sweepGradient,
-                            startPoint: CGPoint(x: -span + shift, y: 0),
-                            endPoint: CGPoint(x: shift, y: 0)))
-
-                    // Soft moving highlight (faked glow) gliding with an eased ping-pong
-                    // so it slows at the ends instead of snapping back.
-                    let eased = 0.5 - 0.5 * cos(phase * 2 * .pi)
-                    let cx = size.width * CGFloat(eased)
-                    let glowR = max(size.height, size.width * 0.32)
-                    layer.fill(
-                        Rectangle().path(in: rect),
-                        with: .radialGradient(
-                            Self.glowGradient,
-                            center: CGPoint(x: cx, y: size.height / 2),
-                            startRadius: 0, endRadius: glowR))
-                }
-            }
-        }
-        .frame(width: 34, height: 8)
-    }
-}
-
-// MARK: - Waveform (listening + speaking)
-
-/// Color treatment for a waveform state — a precomputed top→bottom gradient (constant
-/// per state, so it's built once here, not per-bar per-frame inside the Canvas).
-private struct WaveformPalette {
-    let gradient: Gradient
-
-    /// Red — reserved exclusively for recording the user ("you").
-    static let listening = WaveformPalette(gradient: Gradient(colors: [
-        Color(red: 1.0, green: 0.42, blue: 0.42),
-        Color(red: 1.0, green: 0.18, blue: 0.33),
-    ]))
-
-    /// Green/mint — the assistant speaking ("it"); clearly not the red "you" or blue "thinking".
-    static let speaking = WaveformPalette(gradient: Gradient(colors: [
-        Color(red: 0.46, green: 0.93, blue: 0.74),
-        Color(red: 0.20, green: 0.83, blue: 0.60),
-    ]))
-}
-
-/// A small centered equalizer. `reactive` bars track the live `level` (speaking);
-/// non-reactive bars animate on a lively synthetic clock (listening). A per-bar phase
-/// + center weighting gives an organic "voice blob" rather than a marching pattern.
-private struct WaveformBars: View {
-    let palette: WaveformPalette
-    var level: CGFloat
-    var reactive: Bool
-
-    private let barCount = 5
-
-    var body: some View {
-        TimelineView(.animation) { timeline in
-            Canvas { context, size in
-                let t = timeline.date.timeIntervalSinceReferenceDate
-                // Equal bars and gaps: n bars, n-1 gaps, all one unit wide.
-                let unit = size.width / CGFloat(barCount * 2 - 1)
-                let radius = unit / 2
-                let minH = size.height * 0.28
-
-                for i in 0..<barCount {
-                    // Center bars are weighted taller so it reads as a rounded voice shape.
-                    let distFromCenter = abs(CGFloat(i) - CGFloat(barCount - 1) / 2)
-                    let centerWeight = 1.0 - distFromCenter / CGFloat(barCount)
-
-                    let wobble = (sin(t * 7.5 + Double(i) * 1.1) + 1) / 2  // 0…1
-                    let drive: CGFloat
-                    if reactive {
-                        // Audio-reactive: height follows the smoothed amplitude, with a
-                        // little per-bar wobble so quiet passages still feel alive.
-                        drive = min(1, level * (0.55 + 0.9 * centerWeight) + CGFloat(wobble) * 0.18)
-                    } else {
-                        // Listening: purely synthetic but lively equalizer motion.
-                        drive = CGFloat(wobble) * (0.45 + 0.55 * centerWeight)
-                    }
-
-                    let h = max(minH, minH + (size.height - minH) * drive)
-                    let x = CGFloat(i) * unit * 2
-                    let y = (size.height - h) / 2
-                    let barRect = CGRect(x: x, y: y, width: unit, height: h)
-                    let bar = Capsule().path(in: barRect)
-                    context.fill(
-                        bar,
-                        with: .linearGradient(
-                            palette.gradient,
-                            startPoint: CGPoint(x: x, y: y),
-                            endPoint: CGPoint(x: x, y: y + h)))
-                }
-            }
-        }
-        .frame(width: 34, height: 16)
-    }
-}
diff --git a/desktop/macos/Desktop/Sources/FloatingControlBar/VoiceWaveformBars.swift b/desktop/macos/Desktop/Sources/FloatingControlBar/VoiceWaveformBars.swift
new file mode 100644
index 00000000000..76737568c91
--- /dev/null
+++ b/desktop/macos/Desktop/Sources/FloatingControlBar/VoiceWaveformBars.swift
@@ -0,0 +1,136 @@
+import SwiftUI
+
+/// Playful, compact mic visualizer shown in the floating control bar while
+/// push-to-talk is active — a few chunky bars that bounce to the user's voice
+/// (HeyClicky-style), replacing the old pulsing red dot.
+///
+/// Animation notes (this is what makes it actually move):
+/// - `TimelineView(.animation)` is the clock. The Canvas closure **uses
+///   `timeline.date`** every frame (via `model.advance(to:)`) so SwiftUI treats
+///   the drawing as changed each tick and redraws — without referencing the
+///   per-frame date the Canvas is cached and freezes (the original bug).
+/// - We read `AudioLevelMonitor.shared.microphoneLevel` (one RMS scalar, ~5 Hz)
+///   each frame and spring the bars toward it at 60fps, so 5 Hz data still looks
+///   smooth. Per-bar phase + a center arch make it feel alive, not mechanical.
+/// - `paused: !isActive` stops the loop when PTT isn't listening; the bars are a
+///   live `@State` model (no retained history), so each session starts fresh and
+///   never shows a frozen "last word."
+struct VoiceWaveformBars: View {
+    let isActive: Bool
+
+    private static let barCount = 5
+    private static let barWidth: CGFloat = 4
+    private static let barSpacing: CGFloat = 3
+    private static let barHeight: CGFloat = 18
+    private static let fillGradient = Gradient(colors: [OmiColors.purpleAccent, OmiColors.purplePrimary])
+
+    @State private var model: WaveBarsModel
+
+    init(isActive: Bool) {
+        self.isActive = isActive
+        _model = State(initialValue: WaveBarsModel(barCount: Self.barCount))
+    }
+
+    private var width: CGFloat {
+        let n = CGFloat(Self.barCount)
+        return n * Self.barWidth + (n - 1) * Self.barSpacing
+    }
+
+    var body: some View {
+        TimelineView(.animation(paused: !isActive)) { timeline in
+            Canvas { context, size in
+                let level = isActive ? CGFloat(AudioLevelMonitor.shared.microphoneLevel) : 0
+                model.advance(to: timeline.date, level: level, active: isActive)
+                draw(into: &context, size: size)
+            }
+        }
+        .frame(width: width, height: Self.barHeight)
+        .accessibilityHidden(true)
+    }
+
+    private func draw(into context: inout GraphicsContext, size: CGSize) {
+        let minH: CGFloat = 2
+        let maxH = size.height
+        let step = Self.barWidth + Self.barSpacing
+        let centerY = size.height / 2
+
+        for i in 0..<Self.barCount {
+            let x = CGFloat(i) * step
+            let h = max(minH, minH + (maxH - minH) * model.values[i])
+            let path = Path(
+                roundedRect: CGRect(x: x, y: centerY - h / 2, width: Self.barWidth, height: h),
+                cornerRadius: Self.barWidth / 2
+            )
+            context.fill(
+                path,
+                with: .linearGradient(
+                    Self.fillGradient,
+                    startPoint: CGPoint(x: x, y: centerY - h / 2),
+                    endPoint: CGPoint(x: x, y: centerY + h / 2)
+                )
+            )
+        }
+    }
+}
+
+/// Per-bar bounce state for `VoiceWaveformBars`. Advanced once per frame from a
+/// single mic level. Reference type so it persists across the Canvas redraws.
+@MainActor
+final class WaveBarsModel {
+    let barCount: Int
+    private(set) var values: [CGFloat]
+    private var velocities: [Double]
+
+    private let phases: [Double]
+    private let speeds: [Double]
+    private let weights: [Double]
+    private var lastTime: CFTimeInterval?
+    private var envelope: Double = 0 // decaying recent-peak follower for auto-gain
+
+    // Underdamped spring -> visible bounce/overshoot (ζ ≈ 0.35).
+    private let stiffness: Double = 200
+    private let damping: Double = 10
+
+    init(barCount: Int) {
+        self.barCount = barCount
+        values = Array(repeating: 0, count: barCount)
+        velocities = Array(repeating: 0, count: barCount)
+        phases = (0..<barCount).map { Double($0) * 1.9 }
+        speeds = (0..<barCount).map { 6.0 + 2.5 * sin(Double($0) * 1.3) }
+        // Center bars taller -> a friendly arch.
+        let mid = Double(barCount - 1) / 2
+        weights = (0..<barCount).map { i in
+            let dist: Double = abs(Double(i) - mid) / max(mid, 1.0)
+            let arch: Double = 1.0 - dist
+            return 0.72 + 0.45 * arch
+        }
+    }
+
+    func advance(to date: Date, level: CGFloat, active: Bool) {
+        let now = date.timeIntervalSinceReferenceDate
+        // Clamp dt small so the spring integration stays stable.
+        let dt: Double = lastTime.map { min(0.032, max(0.0, now - $0)) } ?? (1.0 / 60.0)
+        lastTime = now
+
+        let lvl = Double(max(0, level))
+        // Auto-gain: normalize against a decaying recent peak so the bars use the
+        // full height no matter how loud the mic actually is (fixes "barely moving").
+        envelope = max(lvl, envelope - 0.7 * dt)
+        let norm = envelope > 0.04 ? min(1.0, lvl / envelope) : 0.0
+        let gained = pow(norm, 0.75)
+
+        for i in 0..<barCount {
+            // Lively idle bounce so it always feels alive while listening.
+            let idle = active ? (0.14 + 0.12 * (0.5 + 0.5 * sin(now * speeds[i] + phases[i]))) : 0.0
+            let wobble = 0.55 + 0.45 * sin(now * speeds[i] + phases[i])
+            let target = max(idle, min(1.0, gained * weights[i] * wobble))
+
+            // Critically-underdamped spring (semi-implicit Euler) -> bouncy overshoot.
+            let x = Double(values[i])
+            let accel = stiffness * (target - x) - damping * velocities[i]
+            velocities[i] += accel * dt
+            let nx = x + velocities[i] * dt
+            values[i] = CGFloat(max(0.0, min(1.0, nx)))
+        }
+    }
+}
diff --git a/desktop/macos/Desktop/Sources/MainWindow/Pages/SettingsPage.swift b/desktop/macos/Desktop/Sources/MainWindow/Pages/SettingsPage.swift
index fdb8bc462e7..79fa7458530 100644
--- a/desktop/macos/Desktop/Sources/MainWindow/Pages/SettingsPage.swift
+++ b/desktop/macos/Desktop/Sources/MainWindow/Pages/SettingsPage.swift
@@ -271,12 +271,7 @@ struct SettingsContentView: View {
 
   // AI Chat settings
   @AppStorage("chatBridgeMode") private var chatBridgeMode: String = "piMono"
-  @AppStorage("realtimeOmniProvider") private var realtimeOmniProvider: String = RealtimeOmniProvider.auto.rawValue
-  // Realtime-as-hub (Phase 1, dev/BYOK only): the realtime model is the single
-  // tool-dispatching voice hub. Provider toggle persisted here; RealtimeHubSession
-  // reads it at connect.
-  @AppStorage("realtimeHubEnabled") private var realtimeHubEnabled = false
-  @AppStorage("realtimeHubProvider") private var realtimeHubProvider: String = RealtimeHubProvider.openai.rawValue
+  @AppStorage("realtimeOmniProvider") private var realtimeOmniProvider: String = RealtimeOmniProvider.gptRealtime2.rawValue
   @AppStorage("askModeEnabled") private var askModeEnabled = false
   @AppStorage("claudeMdEnabled") private var claudeMdEnabled = true
   @AppStorage("projectClaudeMdEnabled") private var projectClaudeMdEnabled = true
@@ -2534,75 +2529,6 @@ struct SettingsContentView: View {
       voiceSpeedSlider(settingId: "floatingbar.voicespeed")
         .opacity(shortcutSettings.hasAnyFloatingBarVoiceAnswersEnabled ? 1 : 0.55)
         .disabled(!shortcutSettings.hasAnyFloatingBarVoiceAnswersEnabled)
-
-      realtimeHubCard
-      realtimeHubProviderCard
-        .opacity(realtimeHubEnabled ? 1 : 0.55)
-        .disabled(!realtimeHubEnabled)
-    }
-  }
-
-  // MARK: Realtime-as-hub (Phase 1, dev/BYOK only)
-
-  /// The realtime model becomes the single voice hub: in-session STT + reasoning
-  /// + tool-choice routing + spoken reply, bypassing the STT→Haiku router→Claude
-  /// cascade. Client-direct using the user's own BYOK key (dev/test only).
-  private var realtimeHubCard: some View {
-    settingsCard(settingId: "floatingbar.realtimehub") {
-      HStack(spacing: 16) {
-        VStack(alignment: .leading, spacing: 4) {
-          Text("Realtime Voice Hub (experimental)")
-            .scaledFont(size: 16, weight: .semibold)
-            .foregroundColor(OmiColors.textPrimary)
-          Text(
-            "Let the realtime model run the whole voice turn — listen, decide, and speak — "
-              + "instead of the slower transcribe→route→answer pipeline. Uses your own provider key."
-          )
-          .scaledFont(size: 13)
-          .foregroundColor(OmiColors.textSecondary)
-        }
-        Spacer()
-        Toggle("", isOn: $realtimeHubEnabled)
-          .toggleStyle(.switch)
-          .tint(OmiColors.purplePrimary)
-          .onChange(of: realtimeHubEnabled) { _ in
-            NotificationCenter.default.post(name: .realtimeHubSettingsDidChange, object: nil)
-          }
-      }
-    }
-  }
-
-  private var realtimeHubProviderCard: some View {
-    let provider = RealtimeHubProvider(rawValue: realtimeHubProvider) ?? .openai
-    let hasKey = APIKeyService.byokKey(provider.byokProvider) != nil
-    return settingsCard(settingId: "floatingbar.realtimehub.provider") {
-      HStack(spacing: 16) {
-        VStack(alignment: .leading, spacing: 4) {
-          Text("Hub Provider")
-            .scaledFont(size: 16, weight: .semibold)
-            .foregroundColor(OmiColors.textPrimary)
-          Text(
-            hasKey
-              ? provider.subtitle
-              : "⚠️ No \(provider.byokProvider.displayName) key set — add one in Developer settings to use this provider."
-          )
-          .scaledFont(size: 13)
-          .foregroundColor(hasKey ? OmiColors.textSecondary : OmiColors.purplePrimary)
-        }
-        Spacer()
-        Picker("", selection: $realtimeHubProvider) {
-          ForEach(RealtimeHubProvider.allCases, id: \.rawValue) { p in
-            Text(p.displayName).tag(p.rawValue)
-          }
-        }
-        .pickerStyle(.menu)
-        .labelsHidden()
-        .frame(width: 180)
-        .tint(OmiColors.purplePrimary)
-        .onChange(of: realtimeHubProvider) { _ in
-          NotificationCenter.default.post(name: .realtimeHubSettingsDidChange, object: nil)
-        }
-      }
     }
   }
 
@@ -3494,6 +3420,10 @@ struct SettingsContentView: View {
               if newValue == RealtimeOmniProvider.auto.rawValue {
                 AutoModelSelector.shared.refreshIfStale()
               }
+              // The picker writes @AppStorage directly (bypassing the RealtimeOmniSettings
+              // setter), so post the change ourselves — this is what re-warms the realtime
+              // hub on the newly selected provider (and is a no-op for unchanged providers).
+              NotificationCenter.default.post(name: .realtimeOmniSettingsDidChange, object: nil)
             }
           }
 
diff --git a/desktop/macos/Desktop/Sources/RealtimeOmni/RealtimeOmniSettings.swift b/desktop/macos/Desktop/Sources/RealtimeOmni/RealtimeOmniSettings.swift
index 393feeaa853..5edc05c7094 100644
--- a/desktop/macos/Desktop/Sources/RealtimeOmni/RealtimeOmniSettings.swift
+++ b/desktop/macos/Desktop/Sources/RealtimeOmni/RealtimeOmniSettings.swift
@@ -59,7 +59,9 @@ final class RealtimeOmniSettings {
 
     private init() {
         UserDefaults.standard.register(defaults: [
-            providerKey: RealtimeOmniProvider.auto.rawValue,
+            // Default to OpenAI (GPT Realtime 2); the user can switch to Gemini or Auto
+            // in Advanced → Voice Model. This default also drives the realtime hub provider.
+            providerKey: RealtimeOmniProvider.gptRealtime2.rawValue,
             enabledKey: false,
         ])
     }
diff --git a/desktop/macos/Desktop/Tests/AboutUserCardTests.swift b/desktop/macos/Desktop/Tests/AboutUserCardTests.swift
new file mode 100644
index 00000000000..df06db6bdbd
--- /dev/null
+++ b/desktop/macos/Desktop/Tests/AboutUserCardTests.swift
@@ -0,0 +1,28 @@
+import XCTest
+@testable import Omi_Computer
+
+final class AboutUserCardTests: XCTestCase {
+    func testRenderIncludesNameFactsCountsAndHedge() {
+        let card = AboutUserCard.render(
+            name: "Sam",
+            facts: ["Lives in San Francisco", "Prefers concise answers"],
+            overdue: 2,
+            dueToday: 3
+        )
+        XCTAssertTrue(card.contains("<about_user>"))
+        XCTAssertTrue(card.contains("</about_user>"))
+        XCTAssertTrue(card.contains("Name: Sam"))
+        XCTAssertTrue(card.contains("- Lives in San Francisco"))
+        XCTAssertTrue(card.contains("- Prefers concise answers"))
+        XCTAssertTrue(card.contains("2 overdue"))
+        XCTAssertTrue(card.contains("3 due today"))
+        XCTAssertTrue(card.contains("quick snapshot"))
+    }
+
+    func testRenderEmptyState() {
+        let card = AboutUserCard.render(name: "", facts: [], overdue: 0, dueToday: 0)
+        XCTAssertFalse(card.contains("Name:"))                 // no name line when empty
+        XCTAssertTrue(card.contains("Nothing saved"))          // facts empty-state
+        XCTAssertTrue(card.contains("nothing overdue or due today"))
+    }
+}
diff --git a/desktop/macos/Desktop/Tests/HubEscalationTests.swift b/desktop/macos/Desktop/Tests/HubEscalationTests.swift
new file mode 100644
index 00000000000..f7d185da3ea
--- /dev/null
+++ b/desktop/macos/Desktop/Tests/HubEscalationTests.swift
@@ -0,0 +1,27 @@
+import XCTest
+
+@testable import Omi_Computer
+
+final class HubEscalationTests: XCTestCase {
+  func testBodyHasSystemPromptAndAppendsContext() {
+    let body = RealtimeHubTools.escalationBody(
+      query: "What's the best plan?",
+      context: "User is comparing the M3 and M4 MacBook.",
+      aboutUser: "<about_user>\nName: Sam\n</about_user>")
+    XCTAssertEqual(body["model"] as? String, "claude-sonnet-4-6")
+    let messages = body["messages"] as! [[String: String]]
+    XCTAssertEqual(messages[0]["role"], "system")
+    XCTAssertTrue(messages[0]["content"]!.contains("<about_user>"))
+    XCTAssertEqual(messages[1]["role"], "user")
+    XCTAssertTrue(messages[1]["content"]!.contains("What's the best plan?"))
+    XCTAssertTrue(messages[1]["content"]!.contains("M3 and M4"))  // context appended
+  }
+
+  func testBodyOmitsContextSectionWhenEmpty() {
+    let body = RealtimeHubTools.escalationBody(
+      query: "Capital of France?", context: "", aboutUser: "")
+    let messages = body["messages"] as! [[String: String]]
+    XCTAssertFalse(messages[1]["content"]!.contains("Context"))
+    XCTAssertFalse(messages[1]["content"]!.contains("Answer concisely for a spoken reply"))
+  }
+}
diff --git a/desktop/macos/Desktop/Tests/HubSystemInstructionTests.swift b/desktop/macos/Desktop/Tests/HubSystemInstructionTests.swift
new file mode 100644
index 00000000000..52e2ff52e29
--- /dev/null
+++ b/desktop/macos/Desktop/Tests/HubSystemInstructionTests.swift
@@ -0,0 +1,16 @@
+import XCTest
+@testable import Omi_Computer
+
+final class HubSystemInstructionTests: XCTestCase {
+    func testInstructionInjectsCardAndUsesUserLanguage() {
+        let card = "<about_user>\nName: Sam\n</about_user>"
+        let instr = RealtimeHubTools.systemInstruction(aboutUser: card)
+        XCTAssertTrue(instr.contains(card))                                   // card injected
+        XCTAssertTrue(instr.lowercased().contains("language the user"))        // reply-in-user-language
+        XCTAssertFalse(instr.contains("Always reply in English"))             // old rule gone
+        XCTAssertTrue(instr.contains("spawn_agent"))                          // guardrails preserved
+        XCTAssertTrue(instr.contains("get_daily_recap"))
+        XCTAssertTrue(instr.contains("ask_higher_model"))
+        XCTAssertTrue(instr.contains("ANSWER YOURSELF"))
+    }
+}