Skip to content
29 changes: 29 additions & 0 deletions app/src/main/java/com/ryoncook/glassesai/ConfirmationParser.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package com.ryoncook.glassesai

enum class ConfirmationResult { AFFIRM, DENY }

object ConfirmationParser {

private val AFFIRMATIVES = setOf(
"yes", "yeah", "yep", "yup", "sure", "okay", "ok",
"do it", "go ahead", "go for it", "confirm", "absolutely", "definitely"
)

private val NEGATIVES = setOf(
"no", "nope", "nah", "cancel", "stop", "never mind", "nevermind",
"don't", "dont", "abort", "forget it", "negative"
)

fun parse(transcript: String): ConfirmationResult {
val lower = transcript.trim().lowercase().replace(Regex("[.,!?]+"), " ").trim()
if (AFFIRMATIVES.any { lower == it || lower.startsWith("$it ") }) return ConfirmationResult.AFFIRM
if (NEGATIVES.any { lower == it || lower.startsWith("$it ") || lower.contains(it) }) return ConfirmationResult.DENY
return ConfirmationResult.DENY
}

fun questionFor(action: Action): String = when (action) {
is Action.Call -> "Call ${action.contact}?"
is Action.Sms -> "Text ${action.contact}?"
else -> ""
}
}
17 changes: 17 additions & 0 deletions app/src/main/java/com/ryoncook/glassesai/ConversationContext.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package com.ryoncook.glassesai

class ConversationContext(val maxTurns: Int = 3) {

private val _turns = ArrayDeque<Pair<String, String>>()

val turns: List<Pair<String, String>> get() = _turns.toList()

val isEmpty: Boolean get() = _turns.isEmpty()

fun add(userText: String, assistantText: String) {
if (_turns.size >= maxTurns) _turns.removeFirst()
_turns.addLast(userText to assistantText)
}

fun clear() = _turns.clear()
}
203 changes: 191 additions & 12 deletions app/src/main/java/com/ryoncook/glassesai/GlassesAIService.kt
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,12 @@ class GlassesAIService : Service() {
private enum class State {
IDLE, MODEL_LOADING, WAKE_LISTENING, CALIBRATING, SCO_CONNECTING,
STREAMING, RESPONDING,
TRANSCRIBING, INFERRING, SYNTHESIZING
TRANSCRIBING, INFERRING, SYNTHESIZING,
AWAITING_CONFIRMATION, FOLLOW_UP_LISTENING
}

private enum class ReListenMode { NONE, CONFIRMATION, FOLLOW_UP }

@Volatile private var serviceState = State.IDLE
@Volatile private var isDestroyed = false
@Volatile private var appInForeground = false
Expand Down Expand Up @@ -122,6 +125,11 @@ class GlassesAIService : Service() {
@Volatile private var lastPrompt = ""
@Volatile private var ttsVolumeGain = 1.0f

private var pendingAction: Action? = null
private var reListenMode = ReListenMode.NONE
private val conversationContext = ConversationContext(maxTurns = 3)
private val reListenTimeoutRunnable = Runnable { onReListenTimeout() }

private var voskModel: Model? = null
private var wakeRecognizer: Recognizer? = null
@Volatile private var serverUrl = Config.DEFAULT_SERVER_URL
Expand All @@ -141,7 +149,8 @@ class GlassesAIService : Service() {
AudioManager.SCO_AUDIO_STATE_DISCONNECTED -> {
when (serviceState) {
State.STREAMING, State.RESPONDING,
State.TRANSCRIBING, State.INFERRING, State.SYNTHESIZING -> {
State.TRANSCRIBING, State.INFERRING, State.SYNTHESIZING,
State.AWAITING_CONFIRMATION, State.FOLLOW_UP_LISTENING -> {
// Unexpected drop during conversation — reconnect
Log.w(TAG, "SCO dropped mid-conversation, reconnecting")
audioManager.startBluetoothSco()
Expand Down Expand Up @@ -493,6 +502,7 @@ class GlassesAIService : Service() {
Log.d(TAG, "Wake word detected — starting SCO")
serviceState = State.SCO_CONNECTING
updateNotif("Activating...")
conversationContext.clear()
pauseMedia()
stopOpenWakeWord()
stopPhoneMicRecording()
Expand Down Expand Up @@ -692,8 +702,24 @@ class GlassesAIService : Service() {
}

private fun onUtteranceComplete(transcript: String) {
if (serviceState != State.TRANSCRIBING) return
mainHandler.removeCallbacks(reListenTimeoutRunnable)
Log.d(TAG, "Transcript: $transcript")

when (serviceState) {
State.AWAITING_CONFIRMATION -> {
glassesRecording = false
onConfirmationResponse(transcript)
return
}
State.FOLLOW_UP_LISTENING -> {
glassesRecording = false
onFollowUpUtterance(transcript)
return
}
State.TRANSCRIBING -> { /* normal path below */ }
else -> return
}

if (TranscriptValidator.isHallucination(transcript)) {
Log.w(TAG, "Whisper hallucination detected — discarding transcript")
glassesRecording = false
Expand All @@ -705,12 +731,63 @@ class GlassesAIService : Service() {
serviceState = State.INFERRING
updateNotif("Thinking...")

inferenceManager?.infer(transcript) { response ->
inferenceManager?.infer(transcript, conversationContext.turns) { response ->
Handler(Looper.getMainLooper()).post { speakResponse(response) }
} ?: afterOnDeviceTurnComplete()
}

private fun onConfirmationResponse(transcript: String) {
val action = pendingAction ?: run { afterOnDeviceTurnComplete(); return }
pendingAction = null
when (ConfirmationParser.parse(transcript)) {
ConfirmationResult.AFFIRM -> {
Log.d(TAG, "Confirmation: affirmed — executing $action")
val confirm = executeAction(action) ?: action.confirm
speakResponse(confirm, allowFollowUp = false)
}
ConfirmationResult.DENY -> {
Log.d(TAG, "Confirmation: denied")
conversationContext.clear()
speakResponse("Cancelled.", allowFollowUp = false)
}
}
}

private fun onFollowUpUtterance(transcript: String) {
if (TranscriptValidator.isHallucination(transcript)) {
Log.w(TAG, "Follow-up hallucination — discarding")
afterOnDeviceTurnComplete()
return
}
lastPrompt = transcript
serviceState = State.INFERRING
updateNotif("Thinking...")
inferenceManager?.infer(transcript, conversationContext.turns) { response ->
Handler(Looper.getMainLooper()).post { speakResponse(response) }
} ?: afterOnDeviceTurnComplete()
}

private fun speakResponse(response: String) {
private fun onReListenTimeout() {
when (serviceState) {
State.AWAITING_CONFIRMATION -> {
Log.d(TAG, "Confirmation timeout — cancelling")
pendingAction = null
reListenMode = ReListenMode.NONE
conversationContext.clear()
glassesRecording = false
speakResponse("Cancelled.", allowFollowUp = false)
}
State.FOLLOW_UP_LISTENING -> {
Log.d(TAG, "Follow-up timeout — returning to wake listening")
conversationContext.clear()
glassesRecording = false
afterOnDeviceTurnComplete()
}
else -> {}
}
}

private fun speakResponse(response: String, allowFollowUp: Boolean = true) {
serviceState = State.SYNTHESIZING

val cleaned = ResponseParser.cleanResponse(response)
Expand All @@ -720,12 +797,23 @@ class GlassesAIService : Service() {
if (json != null && json.has("action")) {
val action = ActionParser.parse(jsonStr!!)
?.let { ActionOverride.apply(it, lastPrompt) }
val override = if (action != null) executeAction(action) else null
when {
override != null -> override // feature blocked or error
else -> action?.confirm?.ifBlank { "" } ?: ""
action is Action.Call || action is Action.Sms -> {
// Hold execution — ask for verbal confirmation first
pendingAction = action
reListenMode = ReListenMode.CONFIRMATION
ConfirmationParser.questionFor(action)
}
else -> {
val override = if (action != null) executeAction(action) else null
when {
override != null -> override
else -> action?.confirm?.ifBlank { "" } ?: ""
}
}
}
} else {
if (allowFollowUp) reListenMode = ReListenMode.FOLLOW_UP
cleaned
}
} catch (e: Exception) {
Expand Down Expand Up @@ -765,34 +853,55 @@ class GlassesAIService : Service() {
}

updateNotif("Responding...")
val responseToStore = textToSpeak
val capturedLastPrompt = lastPrompt
Thread {
audioTrack?.play() // resume if paused from a previous re-listen window
try {
val pcm = tm.synthesize(textToSpeak)
if (pcm != null) {
audioTrack?.write(pcm, 0, pcm.size)
val durationMs = pcm.size.toLong() * 1000 / 2 / tm.sampleRate()
Thread.sleep(durationMs + 200)
}
val beep = generateDescendingBeepPcm(tm.sampleRate())
audioTrack?.write(beep, 0, beep.size)
Thread.sleep(beep.size.toLong() * 1000 / 2 / tm.sampleRate() + 50)
} catch (e: Exception) {
Log.e(TAG, "TTS synthesis error: ${e.message}")
}
Handler(Looper.getMainLooper()).post { afterOnDeviceTurnComplete() }
Handler(Looper.getMainLooper()).post {
if (reListenMode == ReListenMode.FOLLOW_UP && capturedLastPrompt.isNotBlank()) {
conversationContext.add(capturedLastPrompt, responseToStore)
}
afterOnDeviceTurnComplete()
}
}.start()
}

private fun afterOnDeviceTurnComplete() {
glassesRecording = false
glassesMicThread?.join(500)
glassesMicThread = null

val mode = reListenMode
reListenMode = ReListenMode.NONE

if (mode == ReListenMode.CONFIRMATION || mode == ReListenMode.FOLLOW_UP) {
// Reuse glassesMicRecord without stop/restart — stopping and recreating
// the SCO mic between turns causes audible hardware artifacts on the glasses.
// Pause the audioTrack so an empty buffer in PLAY state doesn't produce
// underrun noise on the SCO line during the listening window.
audioTrack?.pause()
startReListenRecording(mode)
return
}

glassesMicRecord?.stop()
glassesMicRecord?.release()
glassesMicRecord = null

val drainTrack = audioTrack
audioTrack = null
val endBeep = generateDescendingBeepPcm(ttsManager?.sampleRate() ?: Config.TTS_SAMPLE_RATE)
drainTrack?.write(endBeep, 0, endBeep.size)
drainTrack?.stop()

serviceState = State.IDLE
Expand All @@ -809,6 +918,76 @@ class GlassesAIService : Service() {
}, 500)
}

private fun startReListenRecording(mode: ReListenMode) {
serviceState = if (mode == ReListenMode.CONFIRMATION) State.AWAITING_CONFIRMATION
else State.FOLLOW_UP_LISTENING
updateNotif(if (mode == ReListenMode.CONFIRMATION) "Waiting for confirmation..." else "Listening...")

val timeoutMs = if (mode == ReListenMode.CONFIRMATION) 8000L else 5000L
mainHandler.postDelayed(reListenTimeoutRunnable, timeoutMs)

// glassesMicRecord is reused from the previous turn — no stop/restart.
// If it was released (e.g. after a timeout teardown), create a fresh one.
if (glassesMicRecord == null) {
val recBuf = AudioRecord.getMinBufferSize(
Config.INPUT_SAMPLE_RATE,
AudioFormat.CHANNEL_IN_MONO,
AudioFormat.ENCODING_PCM_16BIT
).coerceAtLeast(6400)
@Suppress("MissingPermission")
glassesMicRecord = AudioRecord(
MediaRecorder.AudioSource.VOICE_COMMUNICATION,
Config.INPUT_SAMPLE_RATE,
AudioFormat.CHANNEL_IN_MONO,
AudioFormat.ENCODING_PCM_16BIT,
recBuf
)
glassesMicRecord?.startRecording()
}
glassesRecording = true

val silenceEndChunks = 9
val maxChunks = if (mode == ReListenMode.CONFIRMATION) 80 else 50

glassesMicThread = Thread({
// Discard audio buffered during TTS playback so TTS echo doesn't
// register as speech and trigger a false DENY before the user speaks.
val discardBuf = ByteArray(3200)
repeat(4) { if (glassesRecording) glassesMicRecord?.read(discardBuf, 0, discardBuf.size) }

val chunk = ByteArray(3200)
val buffer = java.io.ByteArrayOutputStream()
var silentChunks = 0
var speechStarted = false
var totalChunks = 0

while (glassesRecording &&
(serviceState == State.AWAITING_CONFIRMATION || serviceState == State.FOLLOW_UP_LISTENING)) {
val read = glassesMicRecord?.read(chunk, 0, chunk.size) ?: break
if (read <= 0) continue
buffer.write(chunk, 0, read)
totalChunks++
if (SilenceDetector.isSilence(chunk.copyOf(read))) {
if (speechStarted) silentChunks++
} else {
speechStarted = true
silentChunks = 0
}
if ((speechStarted && silentChunks >= silenceEndChunks) || totalChunks >= maxChunks) {
glassesRecording = false
}
}

val audio = buffer.toByteArray()
if (audio.isNotEmpty() &&
(serviceState == State.AWAITING_CONFIRMATION || serviceState == State.FOLLOW_UP_LISTENING)) {
val text = whisperTranscriber?.transcribe(audio) ?: ""
Log.d(TAG, "Re-listen transcript: \"$text\"")
if (text.isNotBlank()) mainHandler.post { onUtteranceComplete(text) }
}
}, "glasses-mic-relisten").also { it.start() }
}

private fun startPlaybackMonitor() {
val track = audioTrack ?: return
lastPlaybackHeadPosition = track.playbackHeadPosition
Expand Down
Loading
Loading