Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
f121f31
refactor(desktop): hub always-on — drop enable toggle, derive provide…
vendz Jun 17, 2026
73bebbd
refactor(desktop): remove hub enable gates; unwire pill flags; single…
vendz Jun 17, 2026
ce7c34e
refactor(desktop): remove floating-bar Realtime Hub toggle + provider…
vendz Jun 17, 2026
884fd41
feat(desktop): default Voice Model to OpenAI (GPT Realtime 2)
vendz Jun 17, 2026
f858824
revert(desktop): restore imperative PTT resize; drop reactive voice-a…
vendz Jun 17, 2026
bb26c29
revert(desktop): restore updateBarState(skipResize:) imperative resize
vendz Jun 17, 2026
155c171
refactor(desktop): drop orphaned audio level-tap from StreamingPCMPlayer
vendz Jun 17, 2026
4b047bc
docs(desktop): update RealtimeHubSession header (BYOK + managed, not …
vendz Jun 17, 2026
26c445b
chore(desktop): changelog — hub as default voice path; drop reverted …
vendz Jun 17, 2026
6666c64
fix(desktop): re-warm hub on Voice Model change; collapse bar on mid-…
vendz Jun 17, 2026
ff7bcc0
fix(desktop): remove dead .realtimeHubSettingsDidChange notification
vendz Jun 17, 2026
1793120
fix(desktop): Voice Model picker posts .realtimeOmniSettingsDidChange
vendz Jun 17, 2026
713f5ff
fix(desktop): hub takes a turn only when actually connected (graceful…
vendz Jun 17, 2026
1b67d9c
feat(desktop): give the realtime voice agent personal-data tools
vendz Jun 18, 2026
9c419b1
feat(desktop): execute the voice-agent data tools via APIClient
vendz Jun 18, 2026
8e98a2b
test(desktop): stub the new voice-agent data tools in the hub harness
vendz Jun 18, 2026
6945db1
feat(desktop): give the realtime hub activity, screen, and full-task …
vendz Jun 18, 2026
dd9e9cb
feat(desktop): dispatch the hub's get_daily_recap, search_screen_hist…
vendz Jun 18, 2026
cc5a8e2
test(desktop): stub the new hub data tools in the test harness
vendz Jun 18, 2026
5999655
feat(desktop): local <about_user> identity-card builder for the realt…
vendz Jun 18, 2026
4780076
fix(desktop): AboutUserCard name falls back to displayName; tighten h…
vendz Jun 18, 2026
d09705b
feat(desktop): inject local <about_user> card + user-language reply i…
vendz Jun 18, 2026
7f84b72
fix(desktop): restore ask_higher_model + everything-else lanes to spe…
vendz Jun 18, 2026
8367bed
feat(desktop): ask_higher_model carries context + persona/card system…
vendz Jun 18, 2026
7781422
feat(desktop): spawn_agent supplies its own title, skipping the redun…
vendz Jun 18, 2026
030f9ad
revert(desktop): roll back escalation-policy prompt prose to lean spe…
vendz Jun 18, 2026
d4f9d1b
feat(desktop): mandatory spoken heads-up before slow tools (ask_highe…
vendz Jun 18, 2026
60802ea
feat(desktop): playful 5-bar voice-reactive PTT mic waveform
vendz Jun 18, 2026
e611b62
refactor(desktop): use VoiceWaveformBars in floating-bar listening view
vendz Jun 18, 2026
46d1b0c
refactor(desktop): use VoiceWaveformBars in voice follow-up view; pur…
vendz Jun 18, 2026
f221dc3
feat(desktop): feed live mic level to the PTT waveform via AudioLevel…
vendz Jun 18, 2026
aa4e1d8
fix(desktop): typed follow-up after a voice turn is no longer spoken …
vendz Jun 18, 2026
c48c434
refactor(desktop): soften the slow-tool heads-up prompt wording
vendz Jun 18, 2026
3b9d18e
chore(desktop): changelog entry for the PTT mic waveform
vendz Jun 18, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions desktop/macos/CHANGELOG.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
{
"unreleased": [
"Faster, cheaper assistant responses via Anthropic prompt caching of the system+tools prefix and conversation history",
"Added an experimental Realtime Voice Hub (Settings \u2192 Floating Bar): the realtime model handles your whole voice turn \u2014 listening, deciding, and speaking \u2014 for noticeably faster replies",
"Redesigned the floating bar voice indicator with smooth, distinct idle, listening, thinking, and speaking states so you always know whether the assistant is working or done",
"Faster voice replies (experimental): the realtime model now handles your whole voice turn \u2014 listening, deciding, and speaking \u2014 instead of the slower transcribe\u2192route\u2192answer pipeline",
"Voice (push-to-talk) conversations now appear in your chat history",
"Fixed older chat messages failing to load in long chats"
"Fixed older chat messages failing to load in long chats",
"Replaced the push-to-talk red dot in the floating bar with a realtime audio equalizer"
],
"releases": [
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -349,11 +349,8 @@ struct AIResponseView: View {

private var voiceFollowUpView: some View {
HStack(spacing: 8) {
Circle()
.fill(Color.red)
.frame(width: 10, height: 10)
.scaleEffect(1.2)
.animation(.easeInOut(duration: 0.6).repeatForever(autoreverses: true), value: isVoiceFollowUp)
// Playful realtime mic waveform (replaces the old pulsing red dot)
VoiceWaveformBars(isActive: isVoiceFollowUp)

Image(systemName: "mic.fill")
.scaledFont(size: 14, weight: .semibold)
Expand All @@ -375,7 +372,7 @@ struct AIResponseView: View {
}
.padding(.horizontal, 10)
.padding(.vertical, 8)
.background(Color.red.opacity(0.15))
.background(OmiColors.purplePrimary.opacity(0.12))
.cornerRadius(8)
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import Foundation

/// Builds the compact, local-only `<about_user>` block injected into the hub's
/// system instruction at warm time. Identity + rough situation only; exact/current
/// lists stay behind the read tools (the card hedges this). No network calls.
enum AboutUserCard {
/// Pure formatter — kept separate from `build()` so it is unit-testable.
static func render(name: String, facts: [String], overdue: Int, dueToday: Int) -> String {
var lines: [String] = ["<about_user>"]
if !name.isEmpty { lines.append("Name: \(name)") }
lines.append("What Omi knows about them:")
if facts.isEmpty {
lines.append("- Nothing saved yet.")
} else {
lines.append(contentsOf: facts.map { "- \($0)" })

@cubic-dev-ai cubic-dev-ai Bot Jun 18, 2026

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1: Raw memory text is injected into <about_user> without sanitization. A memory containing markup/newlines can break the section and inject instructions into trusted hub prompt context.

Prompt for AI agents
Check if this issue is valid — if so, understand the root cause and fix it. At desktop/macos/Desktop/Sources/FloatingControlBar/AboutUserCard.swift, line 15:

<comment>Raw memory text is injected into `<about_user>` without sanitization. A memory containing markup/newlines can break the section and inject instructions into trusted hub prompt context.</comment>

<file context>
@@ -0,0 +1,51 @@
+    if facts.isEmpty {
+      lines.append("- Nothing saved yet.")
+    } else {
+      lines.append(contentsOf: facts.map { "- \($0)" })
+    }
+    if overdue == 0 && dueToday == 0 {
</file context>
Fix with cubic

}
if overdue == 0 && dueToday == 0 {
lines.append("Right now: nothing overdue or due today.")
} else {
lines.append("Right now: \(overdue) overdue, \(dueToday) due today.")
}
lines.append(
"(This is a quick snapshot — for the exact or current list, call get_tasks / get_action_items.)")
lines.append("</about_user>")
return lines.joined(separator: "\n")
}

/// Gathers local data (auth name, top memories, task counts) and renders the card.
/// Best-effort: any failure degrades to a smaller card, never throws.
@MainActor
static func build() async -> String {
let auth = AuthService.shared
let rawName = auth.givenName.isEmpty ? auth.displayName : auth.givenName
let name = rawName.trimmingCharacters(in: .whitespacesAndNewlines)

var facts: [String] = []
if let memories = try? await MemoryStorage.shared.getLocalMemories(limit: 8) {
facts = memories.prefix(8).compactMap { mem in
let t = mem.content.trimmingCharacters(in: .whitespacesAndNewlines)
guard !t.isEmpty else { return nil }
return t.count > 120 ? String(t.prefix(117)) + "…" : t
}
}

await TasksStore.shared.loadDashboardTasks()
let overdue = TasksStore.shared.overdueTasks.count
let dueToday = TasksStore.shared.todaysTasks.count

return render(name: name, facts: facts, overdue: overdue, dueToday: dueToday)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -51,21 +51,6 @@ struct FloatingBarNotification: Identifiable, Equatable {
}
}

/// The high-level voice activity the floating bar is reflecting right now. Derived
/// from the lower-level PTT/hub flags so the status indicator has a single, ordered
/// source of truth (each state has exactly one visual treatment).
enum VoiceActivity: Equatable {
/// Nothing happening — the bar rests as a calm, barely-breathing sliver.
case idle
/// User is holding push-to-talk; we're capturing their voice (red, "you").
case listening
/// Turn committed, waiting on the model's reply — the model may answer late,
/// so this MUST read as "working, wait" rather than "done" (cool autonomous swirl).
case thinking
/// The model is speaking its reply (warm, audio-reactive waveform — "it").
case speaking
}

/// Observable object holding the state for the floating control bar.
@MainActor
class FloatingControlBarState: NSObject, ObservableObject {
Expand Down Expand Up @@ -108,35 +93,6 @@ class FloatingControlBarState: NSObject, ObservableObject {
@Published var isVoiceListening: Bool = false
@Published var isVoiceLocked: Bool = false
@Published var voiceTranscript: String = ""
/// True after a voice turn is committed and we're waiting on the model's reply
/// (vs. still recording) — drives the "Thinking…/Responding…" indicator so the user
/// knows to wait rather than re-pressing (which would interrupt a slow reply).
@Published var isVoiceThinking: Bool = false
/// True while the model is actually speaking its reply (native audio playing or the
/// AVSpeech fallback talking). Distinct from `isVoiceThinking` so the indicator can
/// show a clearly different "it's talking" treatment vs. "it's working".
@Published var isVoiceSpeaking: Bool = false
/// Smoothed 0…1 output amplitude of the model's spoken reply, sampled from the
/// playback engine. Drives the speaking waveform so it reacts to the actual voice
/// (premium feel) rather than animating blindly. 0 when not speaking.
@Published var voiceLevel: CGFloat = 0

/// Single ordered source of truth for the status indicator. Listening wins (the user
/// is actively talking), then speaking, then thinking, else idle — by construction the
/// hub sets these mutually exclusively, the ordering just makes barge-in race-safe.
var voiceActivity: VoiceActivity {
if isVoiceListening { return .listening }
if isVoiceSpeaking { return .speaking }
if isVoiceThinking { return .thinking }
return .idle
}

/// Whether any voice turn is in flight — keeps the bar expanded across the whole
/// listening → thinking → speaking arc so the indicator stays visible (one expand,
/// one collapse per turn — no resize churn mid-turn).
var isVoiceActive: Bool {
isVoiceListening || isVoiceThinking || isVoiceSpeaking
}

// Voice follow-up state (PTT while AI conversation is active)
@Published var isVoiceFollowUp: Bool = false
Expand Down Expand Up @@ -180,8 +136,6 @@ class FloatingControlBarState: NSObject, ObservableObject {
isVoiceFollowUp = false
voiceFollowUpTranscript = ""
currentQueryFromVoice = false
isVoiceSpeaking = false
voiceLevel = 0
lastConversationActivityAt = nil
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,9 @@ struct FloatingControlBarView: View {
.animation(.spring(response: 0.35, dampingFraction: 0.82), value: state.currentNotification?.id)
}

/// Whether the bar chrome should stretch to fill the window width. Stays full-width
/// for the whole voice turn (listening → thinking → speaking) so the status indicator
/// has room and the bar resizes exactly once per turn.
/// Whether the bar chrome should stretch to fill the window width
private var barNeedsFullWidth: Bool {
isHovering || state.showingAIConversation || state.isVoiceActive
isHovering || state.showingAIConversation || state.isVoiceListening
}

private var barChrome: some View {
Expand Down Expand Up @@ -85,7 +83,7 @@ struct FloatingControlBarView: View {
}
}
.overlay(alignment: .topTrailing) {
if isHovering && !state.isVoiceActive {
if isHovering && !state.isVoiceListening {
Button {
openFloatingBarSettings()
} label: {
Expand Down Expand Up @@ -281,8 +279,8 @@ struct FloatingControlBarView: View {

private var controlBarView: some View {
Group {
if state.isVoiceActive && !state.isVoiceFollowUp {
voiceActiveView
if state.isVoiceListening && !state.isVoiceFollowUp {
voiceListeningView
.padding(.horizontal, 6)
.padding(.vertical, 3)
.frame(height: 50)
Expand All @@ -308,11 +306,11 @@ struct FloatingControlBarView: View {
}
}

/// Minimal resting indicator shown when not hovering and no voice turn is active —
/// a calm, slowly breathing sliver. (Active turns render `voiceActiveView` instead.)
/// Minimal thin bar shown when not hovering
private var compactCircleView: some View {
VoiceActivityIndicator(activity: state.voiceActivity, level: state.voiceLevel)
.frame(width: 28, height: 14)
RoundedRectangle(cornerRadius: 3)
.fill(Color.white.opacity(0.5))
.frame(width: 28, height: 6)
}

private func compactToggle(_ title: String, isOn: Binding<Bool>) -> some View {
Expand Down Expand Up @@ -360,15 +358,16 @@ struct FloatingControlBarView: View {
}
}

/// Unified expanded voice view for the whole turn. The status indicator carries the
/// state (listening / thinking / speaking) visually; the text is just the helpful
/// detail (transcript, "Release to send", "Thinking…"). One element, no jarring swaps.
private var voiceActiveView: some View {
private var voiceListeningView: some View {
HStack(spacing: 8) {
VoiceActivityIndicator(activity: state.voiceActivity, level: state.voiceLevel)
.frame(width: 34, height: 18)
// Playful realtime mic waveform (replaces the old pulsing red dot)
VoiceWaveformBars(isActive: state.isVoiceListening)

if state.isVoiceLocked && state.isVoiceListening {
Image(systemName: "mic.fill")
.scaledFont(size: 14, weight: .semibold)
.foregroundColor(.white)

if state.isVoiceLocked {
Text("LOCKED")
.scaledFont(size: 10, weight: .bold)
.foregroundColor(.orange)
Expand All @@ -378,31 +377,21 @@ struct FloatingControlBarView: View {
.cornerRadius(4)
}

// Dim only the "Release to send" hint; live transcript / status reads brighter.
let isHint = state.voiceActivity == .listening && state.voiceTranscript.isEmpty
Text(voiceStatusText)
.scaledFont(size: 13)
.foregroundColor(.white.opacity(isHint ? 0.5 : 0.85))
.lineLimit(1)
.truncationMode(.head)
}
}

/// The detail text beside the indicator for the current voice state. The indicator
/// itself carries the state visually; this is just the helpful detail.
private var voiceStatusText: String {
switch state.voiceActivity {
case .listening:
if !state.voiceTranscript.isEmpty { return state.voiceTranscript }
return state.isVoiceLocked
? "Tap \(shortcutSettings.pttShortcut.displayLabel) to send"
: "Release \(shortcutSettings.pttShortcut.displayLabel) to send"
case .thinking:
return "Thinking…"
case .speaking:
return "Speaking…"
case .idle:
return ""
if !state.voiceTranscript.isEmpty {
Text(state.voiceTranscript)
.scaledFont(size: 13)
.foregroundColor(.white.opacity(0.8))
.lineLimit(1)
.truncationMode(.head)
} else {
Text(
state.isVoiceLocked
? "Tap \(shortcutSettings.pttShortcut.displayLabel) to send"
: "Release \(shortcutSettings.pttShortcut.displayLabel) to send"
)
.scaledFont(size: 13)
.foregroundColor(.white.opacity(0.5))
}
}
}

Expand Down
Loading