Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
145 changes: 145 additions & 0 deletions PulseLoop/Coach/Attachments/CoachAttachmentStore.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
import Foundation
import UIKit

/// A reference to an image attached to a `CoachMessage`. The bytes live on disk in
/// `Documents/coach_attachments/<file>`; the message persists only this small ref
/// (as JSON in `CoachMessage.attachmentsJSON`). Mirrors the `*JSON` ref convention
/// already used for `PendingAction` / `CoachTurnError` — no SwiftData blob, no
/// `@Attribute(.externalStorage)`, so the store stays small and fast.
struct CoachAttachmentRef: Codable, Equatable, Hashable {
/// Filename within `coach_attachments/` (e.g. `<uuid>.jpg`).
let file: String
/// MIME type of the stored bytes (always `image/jpeg` in v1).
let mime: String
let width: Int
let height: Int

init(file: String, mime: String = "image/jpeg", width: Int, height: Int) {
self.file = file
self.mime = mime
self.width = width
self.height = height
}

/// JSON form for the (array-valued) `CoachMessage.attachmentsJSON` field.
static func encode(_ refs: [CoachAttachmentRef]) -> String? {
guard !refs.isEmpty, let data = try? JSONEncoder().encode(refs) else { return nil }
return String(data: data, encoding: .utf8)
}

static func decode(fromJSON json: String?) -> [CoachAttachmentRef] {
guard let json, let data = json.data(using: .utf8) else { return [] }
return (try? JSONDecoder().decode([CoachAttachmentRef].self, from: data)) ?? []
}
}

/// The wire-ready forms of one image, built once from a `CoachAttachmentRef`'s
/// bytes and handed to the request builders. Each provider picks the shape it
/// needs: OpenAI/OpenRouter take the `data:` URL; Gemini takes the raw base64 +
/// `mimeType`. Sendable so it can cross the orchestrator's concurrency boundary.
struct CoachImagePayload: Sendable, Equatable {
/// `data:image/jpeg;base64,<…>` — used by OpenAI `input_image` and OpenRouter `image_url`.
let dataURL: String
/// Bare base64 (no `data:` prefix) — used by Gemini `inlineData.data`.
let rawBase64: String
let mimeType: String
}

/// On-device store for coach image attachments: compresses + writes incoming
/// images, loads them back for the chat bubble, and produces the base64 payloads
/// the model clients send. Uses `FileManager` + the app Documents directory (the
/// same primitive `DiagnosticsExporter` already relies on).
enum CoachAttachmentStore {
/// Longest-edge cap applied before JPEG-encoding. Keeps request payloads small
/// (all three providers bill by image size / cap total request bytes) while
/// staying sharp enough for the model to read charts and labels.
private static let maxDimension: CGFloat = 1024
private static let jpegQuality: CGFloat = 0.7
static let mimeType = "image/jpeg"

/// `Documents/coach_attachments/`, created lazily.
private static func directory() -> URL? {
guard let docs = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask).first else {
return nil
}
let dir = docs.appendingPathComponent("coach_attachments", isDirectory: true)
if !FileManager.default.fileExists(atPath: dir.path) {
try? FileManager.default.createDirectory(at: dir, withIntermediateDirectories: true)
}
return dir
}

private static func url(for ref: CoachAttachmentRef) -> URL? {
directory()?.appendingPathComponent(ref.file, isDirectory: false)
}

// MARK: - Save

/// Downscales + JPEG-compresses `image`, writes it to a new `<uuid>.jpg`, and
/// returns the ref. Returns nil if the bytes can't be produced or written.
static func save(_ image: UIImage) -> CoachAttachmentRef? {
let scaled = downscaled(image)
guard let data = scaled.jpegData(compressionQuality: jpegQuality),
let dir = directory() else { return nil }
let file = "\(UUID().uuidString).jpg"
let dest = dir.appendingPathComponent(file, isDirectory: false)
do {
try data.write(to: dest, options: .atomic)
} catch {
return nil
}
return CoachAttachmentRef(
file: file,
mime: mimeType,
width: Int(scaled.size.width * scaled.scale),
height: Int(scaled.size.height * scaled.scale)
)
}

private static func downscaled(_ image: UIImage) -> UIImage {
let size = image.size
let longEdge = max(size.width, size.height)
guard longEdge > maxDimension else { return image }
let ratio = maxDimension / longEdge
let target = CGSize(width: size.width * ratio, height: size.height * ratio)
let format = UIGraphicsImageRendererFormat.default()
format.scale = 1
return UIGraphicsImageRenderer(size: target, format: format).image { _ in
image.draw(in: CGRect(origin: .zero, size: target))
}
}

// MARK: - Load

static func data(for ref: CoachAttachmentRef) -> Data? {
guard let url = url(for: ref) else { return nil }
return try? Data(contentsOf: url)
}

static func loadImage(_ ref: CoachAttachmentRef) -> UIImage? {
guard let data = data(for: ref) else { return nil }
return UIImage(data: data)
}

static func delete(_ ref: CoachAttachmentRef) {
guard let url = url(for: ref) else { return }
try? FileManager.default.removeItem(at: url)
}

// MARK: - Wire payloads

/// Builds the model-ready payload (data URL + raw base64) for a stored ref.
static func payload(for ref: CoachAttachmentRef) -> CoachImagePayload? {
guard let data = data(for: ref) else { return nil }
let base64 = data.base64EncodedString()
return CoachImagePayload(
dataURL: "data:\(ref.mime);base64,\(base64)",
rawBase64: base64,
mimeType: ref.mime
)
}

static func payloads(for refs: [CoachAttachmentRef]) -> [CoachImagePayload] {
refs.compactMap { payload(for: $0) }
}
}
1 change: 1 addition & 0 deletions PulseLoop/Coach/Config/CoachFeatureFlags.swift
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ struct CoachFeatureFlags {
var webSearchEnabled: Bool { settings.enableWebSearch }
var writeToolsEnabled: Bool { settings.enableWriteTools }
var liveMeasurementsEnabled: Bool { settings.enableLiveMeasurements }
var imageInputEnabled: Bool { settings.enableImageInput }

var maxToolCalls: Int { max(1, settings.maxToolCalls) }
var maxRounds: Int { max(1, settings.maxRounds) }
Expand Down
4 changes: 4 additions & 0 deletions PulseLoop/Coach/Config/CoachSettings.swift
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,9 @@ struct CoachSettings: Codable, Equatable {
/// until Milestone B wires confirmation gates.
var enableWriteTools: Bool = false
var enableLiveMeasurements: Bool = false
/// When true, the coach composer shows a camera/photo button so the user can
/// attach an image to a message (multimodal input). Off by default.
var enableImageInput: Bool = false
var maxToolCalls: Int = 8
var maxRounds: Int = 4
// Milestone D — automated daily check-in notifications.
Expand Down Expand Up @@ -158,6 +161,7 @@ struct CoachSettings: Codable, Equatable {
orProviderSort = try c.decodeIfPresent(String.self, forKey: .orProviderSort)
enableWriteTools = try c.decodeIfPresent(Bool.self, forKey: .enableWriteTools) ?? d.enableWriteTools
enableLiveMeasurements = try c.decodeIfPresent(Bool.self, forKey: .enableLiveMeasurements) ?? d.enableLiveMeasurements
enableImageInput = try c.decodeIfPresent(Bool.self, forKey: .enableImageInput) ?? d.enableImageInput
maxToolCalls = try c.decodeIfPresent(Int.self, forKey: .maxToolCalls) ?? d.maxToolCalls
maxRounds = try c.decodeIfPresent(Int.self, forKey: .maxRounds) ?? d.maxRounds
notificationsEnabled = try c.decodeIfPresent(Bool.self, forKey: .notificationsEnabled) ?? d.notificationsEnabled
Expand Down
4 changes: 4 additions & 0 deletions PulseLoop/Coach/Config/CoachSettingsSection.swift
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ struct CoachSettingsSection: View {

toggleRow("AI actions (set goals, log, edit)", isOn: writeToolsBinding)
toggleRow("Live ring measurements", isOn: liveMeasurementsBinding)
toggleRow("Image input (attach photos)", isOn: imageInputBinding)

if !memories.isEmpty {
SectionHeader(title: "Coach memory", action: nil)
Expand Down Expand Up @@ -398,6 +399,9 @@ struct CoachSettingsSection: View {
private var liveMeasurementsBinding: Binding<Bool> {
Binding(get: { store.settings.enableLiveMeasurements }, set: { store.settings.enableLiveMeasurements = $0 })
}
private var imageInputBinding: Binding<Bool> {
Binding(get: { store.settings.enableImageInput }, set: { store.settings.enableImageInput = $0 })
}

// MARK: - Key actions

Expand Down
53 changes: 47 additions & 6 deletions PulseLoop/Coach/Gemini/GeminiClient.swift
Original file line number Diff line number Diff line change
Expand Up @@ -120,13 +120,52 @@ final class GeminiClient: ResponsesClient, @unchecked Sendable {
systemText = systemParts.joined(separator: "\n\n")

for item in conversationItems {
guard let role = item["role"] as? String,
let content = item["content"] as? String else { continue }
guard let role = item["role"] as? String else { continue }
let parts = geminiParts(from: item)
guard !parts.isEmpty else { continue }
let geminiRole = role == "assistant" ? "model" : "user"
contents.append(["role": geminiRole, "parts": [["text": content]]])
contents.append(["role": geminiRole, "parts": parts])
}
}

/// Converts a Responses-API message item's `content` into Gemini `parts`. Text
/// items keep `content` as a String → `[{"text": …}]` (unchanged path). Image
/// items carry `content` as the OpenAI content-part array (`input_text` +
/// `input_image`), which we map to `{"text": …}` + `{"inlineData": {mimeType, data}}`.
private func geminiParts(from item: [String: Any]) -> [[String: Any]] {
if let text = item["content"] as? String {
return [["text": text]]
}
guard let parts = item["content"] as? [[String: Any]] else { return [] }
var out: [[String: Any]] = []
for part in parts {
switch part["type"] as? String {
case "input_text", "text":
if let text = part["text"] as? String { out.append(["text": text]) }
case "input_image":
if let inline = inlineData(fromImageURL: part["image_url"] as? String) {
out.append(["inlineData": inline])
}
default:
break
}
}
return out
}

/// Splits an `input_image` `data:<mime>;base64,<data>` URL into Gemini's
/// `inlineData` object (`mimeType` + bare base64 `data`).
private func inlineData(fromImageURL url: String?) -> [String: String]? {
guard let url, url.hasPrefix("data:"),
let comma = url.firstIndex(of: ","),
let semicolon = url.firstIndex(of: ";"),
url.distance(from: url.startIndex, to: semicolon) < url.distance(from: url.startIndex, to: comma)
else { return nil }
let mime = String(url[url.index(url.startIndex, offsetBy: 5)..<semicolon])
let data = String(url[url.index(after: comma)...])
return ["mimeType": mime, "data": data]
}

/// Subsequent turns (tool results or repair messages): append the stored
/// model response then the new user content.
private func appendContinuation(previousId: String, input: [[String: Any]]) {
Expand All @@ -144,15 +183,17 @@ final class GeminiClient: ResponsesClient, @unchecked Sendable {
for item in input {
if let toolPart = convertToolResult(item) {
userParts.append(toolPart)
} else if let role = item["role"] as? String, let content = item["content"] as? String {
} else if let role = item["role"] as? String, item["content"] != nil {
if role == "assistant" {
if !userParts.isEmpty {
contents.append(["role": "user", "parts": userParts])
userParts = []
}
contents.append(["role": "model", "parts": [["text": content]]])
// Assistant replays are always text, but route through the same
// converter so a String content still yields `[{"text": …}]`.
contents.append(["role": "model", "parts": geminiParts(from: item)])
} else {
userParts.append(["text": content])
userParts.append(contentsOf: geminiParts(from: item))
}
}
}
Expand Down
14 changes: 11 additions & 3 deletions PulseLoop/Coach/OpenAI/ResponsesTypes.swift
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,17 @@ struct OpenAIResponse: Sendable {
/// it. Kept dictionary-based because tool specs and the strict output schema are
/// naturally arbitrary JSON.
enum OpenAIRequestBuilder {
/// One input message item.
static func message(role: String, content: String) -> [String: Any] {
["role": role, "content": content]
/// One input message item. The text path keeps `content` a plain String so the
/// adapter clients' `content as? String` branches are untouched; images are
/// purely additive — only when `images` is non-empty does `content` become the
/// Responses-API content-part array (`input_text` + `input_image`).
static func message(role: String, content: String, images: [CoachImagePayload] = []) -> [String: Any] {
guard !images.isEmpty else { return ["role": role, "content": content] }
var parts: [[String: Any]] = [["type": "input_text", "text": content]]
for img in images {
parts.append(["type": "input_image", "image_url": img.dataURL])
}
return ["role": role, "content": parts]
}

/// A function-call result item to feed back into the next turn.
Expand Down
34 changes: 28 additions & 6 deletions PulseLoop/Coach/OpenRouter/OpenRouterClient.swift
Original file line number Diff line number Diff line change
Expand Up @@ -108,9 +108,8 @@ final class OpenRouterClient: ResponsesClient, @unchecked Sendable {
messages = []
storedAssistantMessage = [:]
for item in input {
guard let role = item["role"] as? String,
let content = item["content"] as? String else { continue }
messages.append(["role": chatRole(role), "content": content])
guard let role = item["role"] as? String, item["content"] != nil else { continue }
messages.append(["role": chatRole(role), "content": chatContent(from: item)])
}
// Unlike the native OpenAI/Gemini clients, OpenRouter sends no enforced
// `response_format` (several catalog models reject this app's schema), so
Expand All @@ -132,9 +131,8 @@ final class OpenRouterClient: ResponsesClient, @unchecked Sendable {
let callId = item["call_id"] as? String,
let output = item["output"] as? String {
messages.append(["role": "tool", "tool_call_id": callId, "content": output])
} else if let role = item["role"] as? String,
let content = item["content"] as? String {
messages.append(["role": chatRole(role), "content": content])
} else if let role = item["role"] as? String, item["content"] != nil {
messages.append(["role": chatRole(role), "content": chatContent(from: item)])
}
}
}
Expand All @@ -143,6 +141,30 @@ final class OpenRouterClient: ResponsesClient, @unchecked Sendable {
responsesRole == "developer" ? "system" : responsesRole
}

/// Converts a Responses-API message item's `content` into Chat Completions
/// `content`. Text items keep `content` a plain String (unchanged path, so the
/// cache-control rewrite still applies). Image items carry the OpenAI
/// content-part array (`input_text` + `input_image`), which we map to Chat
/// Completions parts (`{type:text}` + `{type:image_url, image_url:{url}}`).
private func chatContent(from item: [String: Any]) -> Any {
if let text = item["content"] as? String { return text }
guard let parts = item["content"] as? [[String: Any]] else { return "" }
var out: [[String: Any]] = []
for part in parts {
switch part["type"] as? String {
case "input_text", "text":
if let text = part["text"] as? String { out.append(["type": "text", "text": text]) }
case "input_image":
if let url = part["image_url"] as? String {
out.append(["type": "image_url", "image_url": ["url": url]])
}
default:
break
}
}
return out
}

// MARK: - Tool conversion (Responses flat → Chat Completions nested)

/// Converts the app's flat Responses function specs
Expand Down
20 changes: 16 additions & 4 deletions PulseLoop/Coach/Orchestration/CoachOrchestrator.swift
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,24 @@ struct CoachOrchestrator {
var error: CoachTurnError? = nil
}

struct PriorMessage { let role: String; let text: String }
struct PriorMessage { let role: String; let text: String; var images: [CoachImagePayload] = [] }

/// Substituted as the user prompt when an image is sent with no text, so the
/// schema/tool loop still has a non-empty user turn to anchor on.
private static let imageOnlyPrompt = "Please look at the attached image."

func runTurn(
userText: String,
packet: CoachContextPacket,
recentMessages: [PriorMessage],
userImages: [CoachImagePayload] = [],
onTrace: @escaping (CoachTraceEvent) -> Void = { _ in }
) async -> TurnResult {
guard flags.coachEnabled else {
return TurnResult(assistant: CoachFallbacks.scripted(packet: packet), trace: [])
}
do {
return try await runOpenAI(userText: userText, packet: packet, recentMessages: recentMessages, onTrace: onTrace)
return try await runOpenAI(userText: userText, packet: packet, recentMessages: recentMessages, userImages: userImages, onTrace: onTrace)
} catch {
onTrace(CoachTraceEvent(label: "Something went wrong", status: .failedTool))
return TurnResult(assistant: CoachFallbacks.fallback(), trace: [], error: CoachTurnError(error))
Expand All @@ -46,20 +51,27 @@ struct CoachOrchestrator {
userText: String,
packet: CoachContextPacket,
recentMessages: [PriorMessage],
userImages: [CoachImagePayload],
onTrace: @escaping (CoachTraceEvent) -> Void
) async throws -> TurnResult {
let toolSpecs = registry.toolSpecs
let textFormat = CoachResponseSchema.textFormat

// Initial input: system + developer + recent turns + the new user message.
// Images only ever ride on user turns (system/developer/assistant stay text).
var input: [[String: Any]] = [
OpenAIRequestBuilder.message(role: "system", content: CoachPromptBuilder.systemPrompt),
OpenAIRequestBuilder.message(role: "developer", content: CoachPromptBuilder.developerMessage(packet: packet)),
]
for m in recentMessages {
input.append(OpenAIRequestBuilder.message(role: m.role == "user" ? "user" : "assistant", content: m.text))
let isUser = m.role == "user"
input.append(OpenAIRequestBuilder.message(
role: isUser ? "user" : "assistant",
content: m.text,
images: isUser ? m.images : []))
}
input.append(OpenAIRequestBuilder.message(role: "user", content: userText))
let userContent = userText.isEmpty && !userImages.isEmpty ? Self.imageOnlyPrompt : userText
input.append(OpenAIRequestBuilder.message(role: "user", content: userContent, images: userImages))

onTrace(CoachTraceEvent(label: "Thinking about your question…", status: .thinking))

Expand Down
Loading
Loading