From c51b579554eca0d21dfe9d32e8f97d89e1dc1078 Mon Sep 17 00:00:00 2001 From: bobbytatum999 Date: Fri, 3 Apr 2026 16:31:18 -0500 Subject: [PATCH 1/8] fix: resolve quantization type conflicts and device scanner safety --- ModelQuantizer/ContentView.swift | 2 +- ModelQuantizer/Models/ModelTypes.swift | 26 + ModelQuantizer/Services/DeviceScanner.swift | 61 ++- ModelQuantizer/Services/GGUFBuilder.swift | 51 +- ModelQuantizer/Services/HuggingFaceAPI.swift | 18 +- ModelQuantizer/Services/ModelQuantizer.swift | 510 +++--------------- .../Services/QuantizationEngine.swift | 83 +-- ModelQuantizer/Views/DeviceInfoView.swift | 2 +- ModelQuantizer/Views/HomeView.swift | 2 +- 9 files changed, 196 insertions(+), 559 deletions(-) diff --git a/ModelQuantizer/ContentView.swift b/ModelQuantizer/ContentView.swift index 79dec2e..1a90c1c 100644 --- a/ModelQuantizer/ContentView.swift +++ b/ModelQuantizer/ContentView.swift @@ -48,7 +48,7 @@ struct ContentView: View { DeviceInfoView() .tabItem { - Image(systemName: "iphone.gen3") + Image(systemName: "iphone") Text("Device") } .tag(3) diff --git a/ModelQuantizer/Models/ModelTypes.swift b/ModelQuantizer/Models/ModelTypes.swift index 43d4dc8..c67db28 100644 --- a/ModelQuantizer/Models/ModelTypes.swift +++ b/ModelQuantizer/Models/ModelTypes.swift @@ -324,3 +324,29 @@ struct InferenceSettings { let maxTokens: Int let quantizationType: QuantizationType } + +enum QuantizationError: Error, LocalizedError { + case noDownloadURL + case noModelFiles + case downloadFailed + case invalidModelFormat + case unsupportedVersion + case quantizationFailed + case invalidOutput + case insufficientMemory + case cancelled + + var errorDescription: String? { + switch self { + case .noDownloadURL: return "No download URL provided for model" + case .noModelFiles: return "No model files found in repository" + case .downloadFailed: return "Failed to download model files" + case .invalidModelFormat: return "Invalid or unsupported model format" + case .unsupportedVersion: return "Unsupported GGUF version" + case .quantizationFailed: return "Quantization process failed" + case .invalidOutput: return "Generated model file is invalid" + case .insufficientMemory: return "Insufficient memory for quantization" + case .cancelled: return "Quantization was cancelled" + } + } +} diff --git a/ModelQuantizer/Services/DeviceScanner.swift b/ModelQuantizer/Services/DeviceScanner.swift index ebfab8b..bbcae3a 100644 --- a/ModelQuantizer/Services/DeviceScanner.swift +++ b/ModelQuantizer/Services/DeviceScanner.swift @@ -20,6 +20,7 @@ import AppKit /// Represents the device capability profile for model quantization struct DeviceCapabilityProfile: Codable, Equatable { let deviceModel: String + let deviceIdentifier: String let operatingSystem: String let operatingSystemVersion: String let deviceClass: DeviceClass @@ -98,14 +99,15 @@ struct DeviceCapabilityProfile: Codable, Equatable { } /// Comprehensive device scanner for ML model optimization -class DeviceScanner: ObservableObject, @unchecked Sendable { +@MainActor +final class DeviceScanner: ObservableObject { static let shared = DeviceScanner() @Published var currentProfile: DeviceCapabilityProfile? @Published var isScanning = false @Published var lastScanDate: Date? - private var timer: Timer? + private var monitoringTask: Task? private let metalDevice: MTLDevice? private init() { @@ -114,9 +116,7 @@ class DeviceScanner: ObservableObject, @unchecked Sendable { startMonitoring() } - deinit { - timer?.invalidate() - } + deinit { monitoringTask?.cancel() } // MARK: - Public Methods @@ -125,11 +125,9 @@ class DeviceScanner: ObservableObject, @unchecked Sendable { Task { let profile = await createProfile() - await MainActor.run { - self.currentProfile = profile - self.lastScanDate = Date() - self.isScanning = false - } + self.currentProfile = profile + self.lastScanDate = Date() + self.isScanning = false } } @@ -145,15 +143,19 @@ class DeviceScanner: ObservableObject, @unchecked Sendable { // MARK: - Private Methods private func startMonitoring() { - timer = Timer.scheduledTimer(withTimeInterval: 30.0, repeats: true) { [weak self] _ in - self?.performScan() + monitoringTask?.cancel() + monitoringTask = Task { [weak self] in + while !Task.isCancelled { + try? await Task.sleep(nanoseconds: 30_000_000_000) + self?.performScan() + } } } private func createProfile() async -> DeviceCapabilityProfile { - let deviceModel = getDeviceModel() + let device = getDeviceModel() let osInfo = getOperatingSystemInfo() - let deviceClass = classifyDevice(deviceModel) + let deviceClass = classifyDevice(device.name) let ram = getRAMInfo() let cpu = getCPUInfo() let gpu = getGPUInfo() @@ -162,7 +164,8 @@ class DeviceScanner: ObservableObject, @unchecked Sendable { let storage = getStorageInfo() return DeviceCapabilityProfile( - deviceModel: deviceModel, + deviceModel: device.name, + deviceIdentifier: device.identifier, operatingSystem: osInfo.name, operatingSystemVersion: osInfo.version, deviceClass: deviceClass, @@ -188,16 +191,20 @@ class DeviceScanner: ObservableObject, @unchecked Sendable { // MARK: - Device Information Gathering - private func getDeviceModel() -> String { - var systemInfo = utsname() - uname(&systemInfo) - let machineMirror = Mirror(reflecting: systemInfo.machine) - let identifier = machineMirror.children.reduce("") { identifier, element in - guard let value = element.value as? Int8, value != 0 else { return identifier } - return identifier + String(UnicodeScalar(UInt8(value))) - } - - return mapToMarketingName(identifier) + private func getDeviceModel() -> (identifier: String, name: String) { + #if targetEnvironment(simulator) + let simId = ProcessInfo.processInfo.environment["SIMULATOR_MODEL_IDENTIFIER"] ?? "Simulator" + return (simId, mapToMarketingName(simId)) + #else + var sysInfo = utsname() + uname(&sysInfo) + let mirror = Mirror(reflecting: sysInfo.machine) + let identifier = mirror.children.compactMap { element -> Character? in + guard let value = element.value as? Int8, value != 0 else { return nil } + return Character(UnicodeScalar(UInt8(value))) + }.reduce("") { $0 + String($1) } + return (identifier, mapToMarketingName(identifier)) + #endif } private func getOperatingSystemInfo() -> (name: String, version: String) { @@ -332,7 +339,7 @@ class DeviceScanner: ObservableObject, @unchecked Sendable { let name = device.name // Estimate GPU cores based on device class - let model = getDeviceModel() + let model = getDeviceModel().name var cores = 4 // Default if model.contains("Pro") || model.contains("Max") { @@ -372,7 +379,7 @@ class DeviceScanner: ObservableObject, @unchecked Sendable { private func getNeuralEngineInfo() -> (cores: Int, tops: Double) { // Estimate Neural Engine cores based on device - let model = getDeviceModel() + let model = getDeviceModel().name var cores = 8 var tops = 15.8 diff --git a/ModelQuantizer/Services/GGUFBuilder.swift b/ModelQuantizer/Services/GGUFBuilder.swift index b88ff42..fa09392 100644 --- a/ModelQuantizer/Services/GGUFBuilder.swift +++ b/ModelQuantizer/Services/GGUFBuilder.swift @@ -61,36 +61,29 @@ public struct GGUFBuilder { try appendMetadataValue(value, to: &data) } - // Write tensor info + // Write tensor info into a temporary buffer so offsets are stable + var tensorInfoData = Data() var tensorDataOffset = data.count + calculateTensorInfoSize() - // Align to 32 bytes tensorDataOffset = ((tensorDataOffset + 31) / 32) * 32 - + for tensor in tensors { - // Tensor name - data.append(UInt64(tensor.name.utf8.count).littleEndianData) - data.append(Data(tensor.name.utf8)) - - // Number of dimensions - data.append(UInt32(tensor.shape.count).littleEndianData) - - // Shape dimensions + tensorInfoData.append(UInt64(tensor.name.utf8.count).littleEndianData) + tensorInfoData.append(Data(tensor.name.utf8)) + tensorInfoData.append(UInt32(tensor.shape.count).littleEndianData) + for dim in tensor.shape { - data.append(UInt64(dim).littleEndianData) + tensorInfoData.append(UInt64(dim).littleEndianData) } - - // Data type - data.append(tensor.type.rawValue.littleEndianData) - - // Offset to tensor data - data.append(UInt64(tensorDataOffset).littleEndianData) - + + tensorInfoData.append(tensor.type.rawValue.littleEndianData) + tensorInfoData.append(UInt64(tensorDataOffset).littleEndianData) + tensorDataOffset += tensor.data.count - // Align each tensor to 32 bytes tensorDataOffset = ((tensorDataOffset + 31) / 32) * 32 } - - // Pad to alignment + + data.append(tensorInfoData) + while data.count % 32 != 0 { data.append(0) } @@ -240,17 +233,3 @@ extension FixedWidthInteger { return withUnsafeBytes(of: &value) { Data($0) } } } - -extension UInt32 { - var littleEndianData: Data { - var value = self.littleEndian - return withUnsafeBytes(of: &value) { Data($0) } - } -} - -extension UInt64 { - var littleEndianData: Data { - var value = self.littleEndian - return withUnsafeBytes(of: &value) { Data($0) } - } -} diff --git a/ModelQuantizer/Services/HuggingFaceAPI.swift b/ModelQuantizer/Services/HuggingFaceAPI.swift index 427dd98..cbae850 100644 --- a/ModelQuantizer/Services/HuggingFaceAPI.swift +++ b/ModelQuantizer/Services/HuggingFaceAPI.swift @@ -142,7 +142,7 @@ class HuggingFaceAPI: ObservableObject { private func getModelFilesFallback(modelId: String) async throws -> [ModelFile] { // Try to get files from the model page HTML - let url = URL(string: "https://huggingface.co/\(modelId)/tree/main")! + let url = URL(string: "\(baseURL)/models/\(modelId)/tree/main")! var request = URLRequest(url: url) request.setValue("application/json", forHTTPHeaderField: "Accept") @@ -209,10 +209,17 @@ class HuggingFaceAPI: ObservableObject { var lastProgressUpdate = Date() + var buffer = Data(capacity: 65_536) + for try await byte in asyncBytes { - fileHandle.write(Data([byte])) + buffer.append(byte) downloadedBytes += 1 - + + if buffer.count >= 65_536 { + fileHandle.write(buffer) + buffer.removeAll(keepingCapacity: true) + } + // Update progress every 100ms if totalBytes > 0, Date().timeIntervalSince(lastProgressUpdate) > 0.1 { @@ -221,6 +228,10 @@ class HuggingFaceAPI: ObservableObject { lastProgressUpdate = Date() } } + + if !buffer.isEmpty { + fileHandle.write(buffer) + } progressHandler(1.0) } @@ -337,6 +348,7 @@ class HuggingFaceAPI: ObservableObject { return .custom } + func setAuthToken(_ token: String?) { if let token = token { UserDefaults.standard.set(token, forKey: "hf_auth_token") diff --git a/ModelQuantizer/Services/ModelQuantizer.swift b/ModelQuantizer/Services/ModelQuantizer.swift index 89a83c4..f241c65 100644 --- a/ModelQuantizer/Services/ModelQuantizer.swift +++ b/ModelQuantizer/Services/ModelQuantizer.swift @@ -11,246 +11,92 @@ import MetalPerformanceShaders import Accelerate import Compression -/// Represents a Hugging Face model to be quantized -struct HFModel: Identifiable, Codable, Equatable { - let id: UUID - let modelId: String - let name: String - let description: String - let parameters: String - let architecture: ModelArchitecture - let downloadURL: URL? - let sizeBytes: Int64 - let quantizationOptions: [QuantizationType] - let recommendedContextLength: Int - let tags: [String] - let downloads: Int - let likes: Int - - init(modelId: String, name: String, description: String, parameters: String, - architecture: ModelArchitecture, downloadURL: URL? = nil, sizeBytes: Int64 = 0, - quantizationOptions: [QuantizationType] = QuantizationType.allCases, - recommendedContextLength: Int = 4096, tags: [String] = [], downloads: Int = 0, likes: Int = 0) { - self.id = UUID() - self.modelId = modelId - self.name = name - self.description = description - self.parameters = parameters - self.architecture = architecture - self.downloadURL = downloadURL - self.sizeBytes = sizeBytes - self.quantizationOptions = quantizationOptions - self.recommendedContextLength = recommendedContextLength - self.tags = tags - self.downloads = downloads - self.likes = likes - } -} - -enum ModelArchitecture: String, Codable, CaseIterable { - case llama = "Llama" - case mistral = "Mistral" - case qwen2 = "Qwen2" - case gemma = "Gemma" - case phi = "Phi" - case falcon = "Falcon" - case gpt2 = "GPT-2" - case bert = "BERT" - case custom = "Custom" - - var supportedQuantizations: [QuantizationType] { - switch self { - case .llama, .mistral, .qwen2, .gemma, .phi: - return [.q4_0, .q4_1, .q5_0, .q5_1, .q8_0, .fp16, .fp32] - case .falcon, .gpt2: - return [.q4_0, .q4_1, .q8_0, .fp16] - case .bert: - return [.q8_0, .fp16, .fp32] - case .custom: - return QuantizationType.allCases - } - } -} - -enum QuantizationType: String, Codable, CaseIterable { - case q2_K = "Q2_K" - case q3_K_S = "Q3_K_S" - case q3_K_M = "Q3_K_M" - case q3_K_L = "Q3_K_L" - case q4_0 = "Q4_0" - case q4_1 = "Q4_1" - case q4_K_S = "Q4_K_S" - case q4_K_M = "Q4_K_M" - case q5_0 = "Q5_0" - case q5_1 = "Q5_1" - case q5_K_S = "Q5_K_S" - case q5_K_M = "Q5_K_M" - case q6_K = "Q6_K" - case q8_0 = "Q8_0" - case fp16 = "F16" - case fp32 = "F32" - - var bits: Double { - switch self { - case .q2_K: return 2.0 - case .q3_K_S, .q3_K_M, .q3_K_L: return 3.0 - case .q4_0, .q4_1, .q4_K_S, .q4_K_M: return 4.0 - case .q5_0, .q5_1, .q5_K_S, .q5_K_M: return 5.0 - case .q6_K: return 6.0 - case .q8_0: return 8.0 - case .fp16: return 16.0 - case .fp32: return 32.0 - } - } - - var description: String { - switch self { - case .q2_K: return "2-bit (Smallest, Lowest Quality)" - case .q3_K_S: return "3-bit Small (Aggressive compression)" - case .q3_K_M: return "3-bit Medium (Balanced)" - case .q3_K_L: return "3-bit Large (Better quality)" - case .q4_0: return "4-bit Legacy (Fast)" - case .q4_1: return "4-bit Legacy v2 (Better accuracy)" - case .q4_K_S: return "4-bit K-Quants Small (Recommended)" - case .q4_K_M: return "4-bit K-Quants Medium (Best 4-bit)" - case .q5_0: return "5-bit Legacy (Good balance)" - case .q5_1: return "5-bit Legacy v2 (Better)" - case .q5_K_S: return "5-bit K-Quants Small (High quality)" - case .q5_K_M: return "5-bit K-Quants Medium (Best 5-bit)" - case .q6_K: return "6-bit (Near FP16 quality)" - case .q8_0: return "8-bit (Excellent quality)" - case .fp16: return "16-bit Float (Original quality)" - case .fp32: return "32-bit Float (Maximum precision)" - } - } - - var compressionRatio: Double { - return 32.0 / bits - } -} - -/// Quantization progress and status -enum QuantizationStatus: Equatable { - case idle - case downloading(progress: Double) - case analyzing - case quantizing(progress: Double, stage: String) - case optimizing - case validating - case completed(outputURL: URL) - case failed(error: String) - - static func == (lhs: QuantizationStatus, rhs: QuantizationStatus) -> Bool { - switch (lhs, rhs) { - case (.idle, .idle): return true - case (.downloading(let p1), .downloading(let p2)): return p1 == p2 - case (.analyzing, .analyzing): return true - case (.quantizing(let p1, let s1), .quantizing(let p2, let s2)): return p1 == p2 && s1 == s2 - case (.optimizing, .optimizing): return true - case (.validating, .validating): return true - case (.completed(let u1), .completed(let u2)): return u1 == u2 - case (.failed(let e1), .failed(let e2)): return e1 == e2 - default: return false - } - } -} - /// Main model quantizer engine @MainActor class ModelQuantizer: ObservableObject { static let shared = ModelQuantizer() - + @Published var status: QuantizationStatus = .idle @Published var currentModel: HFModel? @Published var quantizationHistory: [QuantizationJob] = [] - + private var quantizeTask: Task? private let fileManager = FileManager.default private let metalDevice: MTLDevice? - + private var modelsDirectory: URL { let docs = fileManager.urls(for: .documentDirectory, in: .userDomainMask).first! return docs.appendingPathComponent("Models", isDirectory: true) } - + private init() { self.metalDevice = MTLCreateSystemDefaultDevice() createModelsDirectory() loadHistory() } - - // MARK: - Public Methods - - func quantize(model: HFModel, to quantization: QuantizationType, + + func quantize(model: HFModel, to quantization: QuantizationType, contextLength: Int? = nil, useGPU: Bool = true) { guard status == .idle else { return } - + currentModel = model quantizeTask?.cancel() - + quantizeTask = Task { [weak self] in - await self?.performQuantization(model: model, quantization: quantization, - contextLength: contextLength, useGPU: useGPU) + await self?.performQuantization(model: model, quantization: quantization, + contextLength: contextLength, useGPU: useGPU) } } - + func cancel() { quantizeTask?.cancel() status = .idle } - + func getQuantizedModels() -> [QuantizedModel] { - guard let contents = try? fileManager.contentsOfDirectory(at: modelsDirectory, + guard let contents = try? fileManager.contentsOfDirectory(at: modelsDirectory, includingPropertiesForKeys: nil) else { return [] } - + return contents.compactMap { url in guard url.pathExtension == "gguf" else { return nil } return try? QuantizedModel(from: url) } } - + func deleteQuantizedModel(_ model: QuantizedModel) { try? fileManager.removeItem(at: model.url) loadHistory() } - - // MARK: - Private Methods - + private func createModelsDirectory() { try? fileManager.createDirectory(at: modelsDirectory, withIntermediateDirectories: true) } - + private func loadHistory() { - // Load from UserDefaults or local storage if let data = UserDefaults.standard.data(forKey: "quantizationHistory"), let history = try? JSONDecoder().decode([QuantizationJob].self, from: data) { quantizationHistory = history } } - + private func saveHistory() { if let data = try? JSONEncoder().encode(quantizationHistory) { UserDefaults.standard.set(data, forKey: "quantizationHistory") } } - - private func performQuantization(model: HFModel, quantization: QuantizationType, + + private func performQuantization(model: HFModel, quantization: QuantizationType, contextLength: Int?, useGPU: Bool) async { let startTime = Date() - + do { - // Step 1: Download model if needed let modelURL = try await downloadModel(model) - - // Step 2: Analyze model structure status = .analyzing let analysis = try await analyzeModel(at: modelURL) - - // Step 3: Perform quantization let outputURL = modelsDirectory.appendingPathComponent("\(model.modelId)_\(quantization.rawValue).gguf") - + try await performActualQuantization( inputURL: modelURL, outputURL: outputURL, @@ -259,12 +105,10 @@ class ModelQuantizer: ObservableObject { contextLength: contextLength ?? model.recommendedContextLength, useGPU: useGPU ) - - // Step 4: Validate output + status = .validating try await validateQuantizedModel(at: outputURL) - - // Complete + let job = QuantizationJob( id: UUID(), originalModel: model, @@ -275,88 +119,97 @@ class ModelQuantizer: ObservableObject { endTime: Date(), contextLength: contextLength ?? model.recommendedContextLength ) - + quantizationHistory.insert(job, at: 0) saveHistory() - status = .completed(outputURL: outputURL) - } catch { status = .failed(error: error.localizedDescription) } } - + private func downloadModel(_ model: HFModel) async throws -> URL { guard let downloadURL = model.downloadURL else { throw QuantizationError.noDownloadURL } - + let destination = modelsDirectory.appendingPathComponent("\(model.modelId).tmp") - - // Check if already downloaded + if fileManager.fileExists(atPath: destination.path) { let attrs = try fileManager.attributesOfItem(atPath: destination.path) if let size = attrs[.size] as? Int64, size == model.sizeBytes { return destination } } - - // Download with progress + let session = URLSession(configuration: .default) - let (asyncBytes, response) = try await session.bytes(from: downloadURL) let totalBytes = response.expectedContentLength var downloadedBytes: Int64 = 0 var lastProgress: Double = 0 - - var fileHandle = try FileHandle(forWritingTo: destination) + + try? fileManager.removeItem(at: destination) + fileManager.createFile(atPath: destination.path, contents: nil) + let fileHandle = try FileHandle(forWritingTo: destination) defer { try? fileHandle.close() } - + + var buffer = Data(capacity: 65_536) + for try await byte in asyncBytes { - fileHandle.write(Data([byte])) + buffer.append(byte) downloadedBytes += 1 - + + if buffer.count >= 65_536 { + fileHandle.write(buffer) + buffer.removeAll(keepingCapacity: true) + } + if totalBytes > 0 { let currentProgress = Double(downloadedBytes) / Double(totalBytes) if currentProgress - lastProgress > 0.01 { lastProgress = currentProgress - await MainActor.run { - self.status = .downloading(progress: currentProgress) - } + status = .downloading(progress: currentProgress) } } } - + + if !buffer.isEmpty { + fileHandle.write(buffer) + } + return destination } - + + private struct ModelAnalysis { + let architecture: ModelArchitecture + let layerCount: Int + let tensorCount: Int + let totalParameters: Int64 + let originalSize: Int64 + } + private func analyzeModel(at url: URL) async throws -> ModelAnalysis { - // Read model file and analyze structure let data = try Data(contentsOf: url, options: .mappedIfSafe) - - // Detect architecture and structure + var architecture: ModelArchitecture = .custom var layerCount = 0 var tensorCount = 0 var totalParameters: Int64 = 0 - - // Parse based on file format (safetensors, bin, etc.) + if url.pathExtension == "safetensors" { - // Parse safetensors format let analysis = try parseSafeTensors(data) architecture = analysis.architecture layerCount = analysis.layerCount tensorCount = analysis.tensorCount totalParameters = analysis.totalParameters } else if url.pathExtension == "bin" { - // Parse PyTorch bin format - let analysis = try parsePyTorchBin(data) + let analysis = parsePyTorchBin(data) architecture = analysis.architecture layerCount = analysis.layerCount tensorCount = analysis.tensorCount totalParameters = analysis.totalParameters } - + return ModelAnalysis( architecture: architecture, layerCount: layerCount, @@ -365,23 +218,17 @@ class ModelQuantizer: ObservableObject { originalSize: Int64(data.count) ) } - + private func parseSafeTensors(_ data: Data) throws -> ModelAnalysis { - // SafeTensors format parsing - // Header is JSON, followed by tensor data var architecture: ModelArchitecture = .custom var layerCount = 0 var tensorCount = 0 var totalParameters: Int64 = 0 - - // Read header length (first 8 bytes, little-endian uint64) + let headerLength = data.prefix(8).withUnsafeBytes { $0.load(as: UInt64.self) } - - // Parse header JSON let headerData = data.dropFirst(8).prefix(Int(headerLength)) + if let header = try? JSONSerialization.jsonObject(with: headerData) as? [String: Any] { - - // Detect architecture from tensor names let tensorNames = header.keys if tensorNames.contains(where: { $0.contains("llama") || $0.contains("self_attn") }) { architecture = .llama @@ -392,22 +239,20 @@ class ModelQuantizer: ObservableObject { } else if tensorNames.contains(where: { $0.contains("gemma") }) { architecture = .gemma } - - // Count tensors and parameters + for (key, value) in header { if let tensorInfo = value as? [String: Any], let shape = tensorInfo["shape"] as? [Int] { tensorCount += 1 - let paramCount = shape.reduce(1, *) - totalParameters += Int64(paramCount) - + totalParameters += Int64(shape.reduce(1, *)) + if key.contains("layers.") { layerCount = max(layerCount, Int(key.components(separatedBy: "layers.").last?.components(separatedBy: ".").first ?? "0") ?? 0) } } } } - + return ModelAnalysis( architecture: architecture, layerCount: layerCount, @@ -416,11 +261,9 @@ class ModelQuantizer: ObservableObject { originalSize: Int64(data.count) ) } - - private func parsePyTorchBin(_ data: Data) throws -> ModelAnalysis { - // PyTorch pickle format parsing (simplified) - // This would need a proper pickle parser for full support - return ModelAnalysis( + + private func parsePyTorchBin(_ data: Data) -> ModelAnalysis { + ModelAnalysis( architecture: .custom, layerCount: 0, tensorCount: 0, @@ -428,218 +271,27 @@ class ModelQuantizer: ObservableObject { originalSize: Int64(data.count) ) } - - private func performActualQuantization(inputURL: URL, outputURL: URL, - analysis: ModelAnalysis, quantization: QuantizationType, - contextLength: Int, useGPU: Bool) async throws { - - let stages = ["Loading tensors", "Quantizing weights", "Building GGUF", "Writing output"] - let totalStages = stages.count - - for (index, stage) in stages.enumerated() { - try Task.checkCancellation() - - let progress = Double(index) / Double(totalStages) - status = .quantizing(progress: progress, stage: stage) - - // Simulate work (in real implementation, this would be actual quantization) - try await Task.sleep(nanoseconds: 500_000_000) - - // Actual quantization would happen here - if index == 1 { - try await quantizeTensors(inputURL: inputURL, outputURL: outputURL, - analysis: analysis, quantization: quantization) - } - } - - status = .quantizing(progress: 1.0, stage: "Complete") - } - - private func quantizeTensors(inputURL: URL, outputURL: URL, - analysis: ModelAnalysis, quantization: QuantizationType) async throws { - - // Create GGUF file structure + + private func performActualQuantization(inputURL: URL, outputURL: URL, + analysis: ModelAnalysis, quantization: QuantizationType, + contextLength: Int, useGPU: Bool) async throws { + status = .quantizing(progress: 0.1, stage: "Building GGUF") var ggufBuilder = GGUFBuilder() - - // Add metadata ggufBuilder.addMetadata(key: "general.architecture", value: .string(analysis.architecture.rawValue.lowercased())) ggufBuilder.addMetadata(key: "general.name", value: .string(currentModel?.name ?? "Unknown")) ggufBuilder.addMetadata(key: "general.quantization_version", value: .uint32(2)) - - // Add tensor info - // This would read actual tensors and quantize them - - // Write GGUF file + ggufBuilder.addMetadata(key: "general.file_type", value: .uint32(quantization.ggufFileType)) + let ggufData = try ggufBuilder.build() try ggufData.write(to: outputURL) + status = .quantizing(progress: 1.0, stage: "Complete") } - + private func validateQuantizedModel(at url: URL) async throws { - // Verify the quantized model is valid let data = try Data(contentsOf: url, options: .mappedIfSafe) - - // Check GGUF magic number let magic = data.prefix(4) guard magic == Data("GGUF".utf8) else { throw QuantizationError.invalidOutput } - - // Additional validation would go here - } -} - -// MARK: - Supporting Types - -struct ModelAnalysis { - let architecture: ModelArchitecture - let layerCount: Int - let tensorCount: Int - let totalParameters: Int64 - let originalSize: Int64 -} - -struct QuantizationJob: Codable, Identifiable { - let id: UUID - let originalModel: HFModel - let quantizationType: QuantizationType - let outputURL: URL - let outputSize: Int64 - let startTime: Date - let endTime: Date - let contextLength: Int - - var duration: TimeInterval { - return endTime.timeIntervalSince(startTime) - } - - var compressionRatio: Double { - return Double(originalModel.sizeBytes) / Double(outputSize) - } -} - -struct QuantizedModel: Identifiable { - let id = UUID() - let url: URL - let name: String - let size: Int64 - let quantization: QuantizationType - let createdDate: Date - - init?(from url: URL) throws { - self.url = url - self.name = url.deletingPathExtension().lastPathComponent - - let attrs = try FileManager.default.attributesOfItem(atPath: url.path) - self.size = attrs[.size] as? Int64 ?? 0 - self.createdDate = attrs[.creationDate] as? Date ?? Date() - - // Detect quantization from filename - let filename = url.lastPathComponent.lowercased() - if let qType = QuantizationType.allCases.first(where: { filename.contains($0.rawValue.lowercased()) }) { - self.quantization = qType - } else { - self.quantization = .q4_0 - } - } -} - -enum QuantizationError: Error, LocalizedError { - case noDownloadURL - case downloadFailed - case invalidModelFormat - case quantizationFailed - case invalidOutput - case insufficientMemory - case cancelled - - var errorDescription: String? { - switch self { - case .noDownloadURL: return "No download URL provided for model" - case .downloadFailed: return "Failed to download model" - case .invalidModelFormat: return "Unsupported model format" - case .quantizationFailed: return "Quantization process failed" - case .invalidOutput: return "Generated model is invalid" - case .insufficientMemory: return "Insufficient memory for quantization" - case .cancelled: return "Quantization cancelled" - } - } -} - -// MARK: - Integer to Data Extension - -extension FixedWidthInteger { - var littleEndianData: Data { - var value = self.littleEndian - return withUnsafeBytes(of: &value) { Data($0) } - } -} - -// MARK: - GGUF Builder - -struct GGUFBuilder { - enum MetadataValue { - case uint32(UInt32) - case uint64(UInt64) - case int32(Int32) - case int64(Int64) - case float32(Float) - case float64(Double) - case bool(Bool) - case string(String) - case array([MetadataValue]) - } - - private var metadata: [(String, MetadataValue)] = [] - private var tensors: [(name: String, shape: [Int], data: Data)] = [] - - mutating func addMetadata(key: String, value: MetadataValue) { - metadata.append((key, value)) - } - - mutating func addTensor(name: String, shape: [Int], data: Data) { - tensors.append((name, shape, data)) - } - - func build() throws -> Data { - var data = Data() - - // Magic number - data.append(Data("GGUF".utf8)) - - // Version - data.append(UInt32(3).littleEndianData) - - // Tensor count - data.append(UInt64(tensors.count).littleEndianData) - - // Metadata count - data.append(UInt64(metadata.count).littleEndianData) - - // Metadata - for (key, value) in metadata { - // Key length and string - data.append(UInt64(key.utf8.count).littleEndianData) - data.append(Data(key.utf8)) - - // Value type and data - switch value { - case .uint32(let v): - data.append(UInt32(4).littleEndianData) // type - data.append(v.littleEndianData) - case .uint64(let v): - data.append(UInt32(5).littleEndianData) - data.append(v.littleEndianData) - case .string(let s): - data.append(UInt32(8).littleEndianData) - data.append(UInt64(s.utf8.count).littleEndianData) - data.append(Data(s.utf8)) - default: - break - } - } - - // Tensor info and data would follow - - return data } } diff --git a/ModelQuantizer/Services/QuantizationEngine.swift b/ModelQuantizer/Services/QuantizationEngine.swift index 521af95..9aecb3d 100644 --- a/ModelQuantizer/Services/QuantizationEngine.swift +++ b/ModelQuantizer/Services/QuantizationEngine.swift @@ -236,6 +236,17 @@ class QuantizationEngine: ObservableObject { if let layers = config["num_hidden_layers"] as? Int { layerCount = max(layerCount, layers) } + if let modelType = config["model_type"] as? String { + let normalized = modelType.lowercased() + if normalized.contains("llama") { architecture = .llama } + else if normalized.contains("mistral") { architecture = .mistral } + else if normalized.contains("qwen") { architecture = .qwen2 } + else if normalized.contains("gemma") { architecture = .gemma } + else if normalized.contains("phi") { architecture = .phi } + else if normalized.contains("falcon") { architecture = .falcon } + else if normalized.contains("gpt") { architecture = .gpt2 } + else if normalized.contains("bert") { architecture = .bert } + } } } @@ -566,11 +577,11 @@ class QuantizationEngine: ObservableObject { // Quantize values to 4-bit var quantizedBytes: [UInt8] = [] for i in stride(from: startIdx, to: endIdx, by: 2) { - let val1 = scale > 0 ? Int8(round(floatData[i] / scale)) : 0 - let val2 = (i + 1 < endIdx && scale > 0) ? Int8(round(floatData[i + 1] / scale)) : 0 - - let q1 = UInt8(clamping: Int(val1) & 0x0F) - let q2 = UInt8(clamping: Int(val2) & 0x0F) + let val1 = scale > 0 ? Int(round(floatData[i] / scale)) : 0 + let val2 = (i + 1 < endIdx && scale > 0) ? Int(round(floatData[i + 1] / scale)) : 0 + + let q1 = UInt8(max(-8, min(7, val1)) + 8) + let q2 = UInt8(max(-8, min(7, val2)) + 8) quantizedBytes.append(q1 | (q2 << 4)) } @@ -644,16 +655,14 @@ class QuantizationEngine: ObservableObject { // Q5_0 quantization private func quantizeToQ5_0(_ tensor: GGUFTensor) throws -> GGUFTensor { - // Similar to Q4_0 but with 5-bit precision - // Implementation would follow similar pattern with 32-element blocks - // For brevity, using Q4_0 as fallback - return try quantizeToQ4_0(tensor) + _ = tensor + throw QuantizationError.quantizationFailed } // Q5_1 quantization private func quantizeToQ5_1(_ tensor: GGUFTensor) throws -> GGUFTensor { - // Similar to Q4_1 but with 5-bit precision - return try quantizeToQ4_1(tensor) + _ = tensor + throw QuantizationError.quantizationFailed } // Q8_0 quantization: 8-bit with block-wise scaling @@ -1005,7 +1014,8 @@ public struct GGUFParser { case .float32, .float16: tensorSize = Int(numElements) * elementSize default: - tensorSize = Int(numElements) * elementSize / 32 // Block quantized formats + let numBlocks = (Int(numElements) + 31) / 32 + tensorSize = numBlocks * elementSize } let tensorData = readData(count: tensorSize) @@ -1111,52 +1121,3 @@ private func halfToFloat(_ bits: UInt16) -> Float { return floatResult } - -// MARK: - Quantization Type Extension - -extension QuantizationType { - var localGGUFFileType: UInt32 { - switch self { - case .fp32: return 0 - case .fp16: return 1 - case .q4_0: return 2 - case .q4_1: return 3 - case .q5_0: return 6 - case .q5_1: return 7 - case .q8_0: return 8 - default: return 2 // Default to Q4_0 - } - } -} - -enum QuantizationError: Error, LocalizedError { - case noModelFiles - case downloadFailed - case invalidModelFormat - case unsupportedVersion - case quantizationFailed - case invalidOutput - case insufficientMemory - case cancelled - - var errorDescription: String? { - switch self { - case .noModelFiles: - return "No model files found in repository" - case .downloadFailed: - return "Failed to download model files" - case .invalidModelFormat: - return "Invalid or unsupported model format" - case .unsupportedVersion: - return "Unsupported GGUF version" - case .quantizationFailed: - return "Quantization process failed" - case .invalidOutput: - return "Generated model file is invalid" - case .insufficientMemory: - return "Insufficient memory for quantization" - case .cancelled: - return "Quantization was cancelled" - } - } -} diff --git a/ModelQuantizer/Views/DeviceInfoView.swift b/ModelQuantizer/Views/DeviceInfoView.swift index cfd4479..28d574a 100644 --- a/ModelQuantizer/Views/DeviceInfoView.swift +++ b/ModelQuantizer/Views/DeviceInfoView.swift @@ -77,7 +77,7 @@ struct DeviceInfoView: View { ) .frame(width: 100, height: 100) - Image(systemName: "iphone.gen3") + Image(systemName: "iphone") .font(.system(size: 48)) .foregroundStyle(.white) } diff --git a/ModelQuantizer/Views/HomeView.swift b/ModelQuantizer/Views/HomeView.swift index 664eb13..1fd8c3d 100644 --- a/ModelQuantizer/Views/HomeView.swift +++ b/ModelQuantizer/Views/HomeView.swift @@ -97,7 +97,7 @@ struct HomeView: View { ) .frame(width: 60, height: 60) - Image(systemName: "iphone.gen3") + Image(systemName: "iphone") .font(.system(size: 28)) .foregroundStyle(.white) } From e51b8a66b6d31f771cc465f191c5e224c4ca22c8 Mon Sep 17 00:00:00 2001 From: bobbytatum999 Date: Fri, 3 Apr 2026 16:44:27 -0500 Subject: [PATCH 2/8] feat: harden auth/download flow and centralize curated model catalog --- ModelQuantizer.xcodeproj/project.pbxproj | 4 + ModelQuantizer/Models/ModelTypes.swift | 87 +++++- ModelQuantizer/Services/HuggingFaceAPI.swift | 288 ++++++++++-------- .../Services/KeychainTokenStore.swift | 48 +++ .../Services/SettingsSuggester.swift | 11 +- .../ViewModels/QuantizeViewModel.swift | 118 ++----- ModelQuantizer/Views/ModelDownloadView.swift | 105 +------ ModelQuantizer/Views/QuantizeView.swift | 3 +- ModelQuantizer/Views/SettingsView.swift | 6 +- 9 files changed, 354 insertions(+), 316 deletions(-) create mode 100644 ModelQuantizer/Services/KeychainTokenStore.swift diff --git a/ModelQuantizer.xcodeproj/project.pbxproj b/ModelQuantizer.xcodeproj/project.pbxproj index 6443f8e..065e559 100644 --- a/ModelQuantizer.xcodeproj/project.pbxproj +++ b/ModelQuantizer.xcodeproj/project.pbxproj @@ -23,6 +23,7 @@ AA00001B /* HuggingFaceAPI.swift in Sources */ = {isa = PBXBuildFile; fileRef = AA00001A /* HuggingFaceAPI.swift */; }; AA00001D /* GGUFBuilder.swift in Sources */ = {isa = PBXBuildFile; fileRef = AA00001C /* GGUFBuilder.swift */; }; AA00001F /* ModelTypes.swift in Sources */ = {isa = PBXBuildFile; fileRef = AA00001E /* ModelTypes.swift */; }; + AA000100 /* KeychainTokenStore.swift in Sources */ = {isa = PBXBuildFile; fileRef = AA000101 /* KeychainTokenStore.swift */; }; AA000021 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = AA000020 /* Assets.xcassets */; }; AA000023 /* Preview Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = AA000022 /* Preview Assets.xcassets */; }; /* End PBXBuildFile section */ @@ -44,6 +45,7 @@ AA00001A /* HuggingFaceAPI.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = HuggingFaceAPI.swift; sourceTree = ""; }; AA00001C /* GGUFBuilder.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = GGUFBuilder.swift; sourceTree = ""; }; AA00001E /* ModelTypes.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ModelTypes.swift; sourceTree = ""; }; + AA000101 /* KeychainTokenStore.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = KeychainTokenStore.swift; sourceTree = ""; }; AA000020 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = ""; }; AA000022 /* Preview Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = "Preview Assets.xcassets"; sourceTree = ""; }; AA000024 /* ModelQuantizer.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = ModelQuantizer.app; sourceTree = BUILT_PRODUCTS_DIR; }; @@ -120,6 +122,7 @@ AA000008 /* SettingsSuggester.swift */, AA00001A /* HuggingFaceAPI.swift */, AA00001C /* GGUFBuilder.swift */, + AA000101 /* KeychainTokenStore.swift */, ); path = Services; sourceTree = ""; @@ -235,6 +238,7 @@ AA00001B /* HuggingFaceAPI.swift in Sources */, AA00001D /* GGUFBuilder.swift in Sources */, AA00001F /* ModelTypes.swift in Sources */, + AA000100 /* KeychainTokenStore.swift in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/ModelQuantizer/Models/ModelTypes.swift b/ModelQuantizer/Models/ModelTypes.swift index c67db28..7b8b43c 100644 --- a/ModelQuantizer/Models/ModelTypes.swift +++ b/ModelQuantizer/Models/ModelTypes.swift @@ -67,7 +67,7 @@ enum ModelArchitecture: String, Codable, CaseIterable { var supportedQuantizations: [QuantizationType] { switch self { case .llama, .mistral, .qwen2, .gemma, .phi: - return [.q4_0, .q4_1, .q5_0, .q5_1, .q8_0, .fp16, .fp32] + return [.q4_0, .q4_1, .q8_0, .fp16, .fp32] case .falcon, .gpt2: return [.q4_0, .q4_1, .q8_0, .fp16] case .bert: @@ -294,6 +294,7 @@ struct HFModelConfig: Codable { struct HFSibling: Codable { let rfilename: String + let size: Int64? } // MARK: - Performance Estimate @@ -350,3 +351,87 @@ enum QuantizationError: Error, LocalizedError { } } } + + +enum ModelCatalog { + static let curatedModels: [HFModel] = [ + HFModel( + modelId: "microsoft/Phi-3-mini-4k-instruct", + name: "Phi-3 Mini 4K", + description: "Microsoft's efficient 3.8B parameter model with excellent performance", + parameters: "3.8B", + architecture: .phi, + downloadURL: URL(string: "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/resolve/main/model.safetensors"), + sizeBytes: 7_600_000_000, + recommendedContextLength: 4096, + tags: ["instruct", "chat", "efficient"], + downloads: 2_500_000, + likes: 8500 + ), + HFModel( + modelId: "meta-llama/Meta-Llama-3.1-8B-Instruct", + name: "Llama 3.1 8B Instruct", + description: "Meta's latest 8B parameter instruction-tuned model", + parameters: "8B", + architecture: .llama, + downloadURL: URL(string: "https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct/resolve/main/model.safetensors"), + sizeBytes: 16_000_000_000, + recommendedContextLength: 8192, + tags: ["instruct", "chat", "meta"], + downloads: 5_000_000, + likes: 15000 + ), + HFModel( + modelId: "mistralai/Mistral-7B-Instruct-v0.3", + name: "Mistral 7B Instruct v0.3", + description: "Mistral's powerful 7B instruction model", + parameters: "7B", + architecture: .mistral, + downloadURL: URL(string: "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3/resolve/main/model.safetensors"), + sizeBytes: 14_000_000_000, + recommendedContextLength: 32768, + tags: ["instruct", "chat", "long-context"], + downloads: 8_000_000, + likes: 22000 + ), + HFModel( + modelId: "google/gemma-2-2b-it", + name: "Gemma 2 2B IT", + description: "Google's lightweight 2B instruction model", + parameters: "2B", + architecture: .gemma, + downloadURL: URL(string: "https://huggingface.co/google/gemma-2-2b-it/resolve/main/model.safetensors"), + sizeBytes: 4_000_000_000, + recommendedContextLength: 8192, + tags: ["instruct", "chat", "lightweight"], + downloads: 1_200_000, + likes: 5600 + ), + HFModel( + modelId: "Qwen/Qwen2.5-7B-Instruct", + name: "Qwen2.5 7B Instruct", + description: "Alibaba's Qwen2.5 with improved reasoning", + parameters: "7B", + architecture: .qwen2, + downloadURL: URL(string: "https://huggingface.co/Qwen/Qwen2.5-7B-Instruct/resolve/main/model.safetensors"), + sizeBytes: 15_000_000_000, + recommendedContextLength: 32768, + tags: ["instruct", "chat", "multilingual"], + downloads: 3_000_000, + likes: 9800 + ), + HFModel( + modelId: "HuggingFaceTB/SmolLM2-1.7B-Instruct", + name: "SmolLM2 1.7B Instruct", + description: "Hugging Face's tiny but capable model", + parameters: "1.7B", + architecture: .llama, + downloadURL: URL(string: "https://huggingface.co/HuggingFaceTB/SmolLM2-1.7B-Instruct/resolve/main/model.safetensors"), + sizeBytes: 3_400_000_000, + recommendedContextLength: 8192, + tags: ["instruct", "chat", "tiny"], + downloads: 800_000, + likes: 4200 + ) + ] +} diff --git a/ModelQuantizer/Services/HuggingFaceAPI.swift b/ModelQuantizer/Services/HuggingFaceAPI.swift index cbae850..96334ce 100644 --- a/ModelQuantizer/Services/HuggingFaceAPI.swift +++ b/ModelQuantizer/Services/HuggingFaceAPI.swift @@ -7,27 +7,28 @@ import Foundation import Combine +import Network /// Hugging Face API Service for model search and metadata class HuggingFaceAPI: ObservableObject { static let shared = HuggingFaceAPI() - + private let baseURL = "https://huggingface.co/api" private let session: URLSession private var cancellables = Set() - + @Published var isSearching = false @Published var lastError: Error? - + private init() { let config = URLSessionConfiguration.default config.timeoutIntervalForRequest = 30 config.timeoutIntervalForResource = 300 self.session = URLSession(configuration: config) } - + // MARK: - Model Search - + /// Search for models on Hugging Face Hub func searchModels( query: String, @@ -35,49 +36,49 @@ class HuggingFaceAPI: ObservableObject { filter: ModelFilter = ModelFilter() ) async throws -> [HFModel] { var components = URLComponents(string: "\(baseURL)/models")! - + var queryItems: [URLQueryItem] = [ URLQueryItem(name: "limit", value: "\(limit)"), URLQueryItem(name: "full", value: "true"), URLQueryItem(name: "config", value: "true") ] - + if !query.isEmpty { queryItems.append(URLQueryItem(name: "search", value: query)) } - + // Apply filters if filter.architecture != nil { queryItems.append(URLQueryItem(name: "filter", value: filter.architecture)) } - + if filter.sortBy != .downloads { queryItems.append(URLQueryItem(name: "sort", value: filter.sortBy.rawValue)) } - + components.queryItems = queryItems - + guard let url = components.url else { throw HFAPIError.invalidURL } - + var request = URLRequest(url: url) request.setValue("application/json", forHTTPHeaderField: "Accept") - + // Add auth token if available if let token = getAuthToken() { request.setValue("Bearer \(token)", forHTTPHeaderField: "Authorization") } - + await MainActor.run { isSearching = true } defer { Task { @MainActor in isSearching = false } } - + let (data, response) = try await session.data(for: request) - + guard let httpResponse = response as? HTTPURLResponse else { throw HFAPIError.invalidResponse } - + switch httpResponse.statusCode { case 200: let models = try JSONDecoder().decode([HFAPIModel].self, from: data) @@ -90,45 +91,45 @@ class HuggingFaceAPI: ObservableObject { throw HFAPIError.httpError(statusCode: httpResponse.statusCode) } } - + /// Get detailed model info including files func getModelDetails(modelId: String) async throws -> ModelDetails { let url = URL(string: "\(baseURL)/models/\(modelId)")! - + var request = URLRequest(url: url) request.setValue("application/json", forHTTPHeaderField: "Accept") - + if let token = getAuthToken() { request.setValue("Bearer \(token)", forHTTPHeaderField: "Authorization") } - + let (data, response) = try await session.data(for: request) - + guard let httpResponse = response as? HTTPURLResponse, httpResponse.statusCode == 200 else { throw HFAPIError.invalidResponse } - + return try JSONDecoder().decode(ModelDetails.self, from: data) } - + /// Get model files (safetensors, bin, etc.) func getModelFiles(modelId: String) async throws -> [ModelFile] { let url = URL(string: "\(baseURL)/models/\(modelId)/tree/main")! - + var request = URLRequest(url: url) request.setValue("application/json", forHTTPHeaderField: "Accept") - + if let token = getAuthToken() { request.setValue("Bearer \(token)", forHTTPHeaderField: "Authorization") } - + let (data, response) = try await session.data(for: request) - + guard let httpResponse = response as? HTTPURLResponse, httpResponse.statusCode == 200 else { // Try fallback to main branch return try await getModelFilesFallback(modelId: modelId) } - + let files = try JSONDecoder().decode([HFRepoFile].self, from: data) return files.compactMap { file in guard file.type == "file" else { return nil } @@ -139,24 +140,24 @@ class HuggingFaceAPI: ObservableObject { ) } } - + private func getModelFilesFallback(modelId: String) async throws -> [ModelFile] { // Try to get files from the model page HTML let url = URL(string: "\(baseURL)/models/\(modelId)/tree/main")! - + var request = URLRequest(url: url) request.setValue("application/json", forHTTPHeaderField: "Accept") - + if let token = getAuthToken() { request.setValue("Bearer \(token)", forHTTPHeaderField: "Authorization") } - + let (data, response) = try await session.data(for: request) - + guard let httpResponse = response as? HTTPURLResponse, httpResponse.statusCode == 200 else { return [] } - + let files = try JSONDecoder().decode([HFRepoFile].self, from: data) return files.compactMap { file in guard file.type == "file" else { return nil } @@ -167,102 +168,123 @@ class HuggingFaceAPI: ObservableObject { ) } } - + /// Download a model file with progress tracking func downloadModelFile( from url: URL, to destination: URL, progressHandler: @escaping (Double) -> Void ) async throws { - var request = URLRequest(url: url) - request.setValue("application/octet-stream", forHTTPHeaderField: "Accept") - - if let token = getAuthToken() { - request.setValue("Bearer \(token)", forHTTPHeaderField: "Authorization") - } - - let (asyncBytes, response) = try await session.bytes(for: request) - - guard let httpResponse = response as? HTTPURLResponse, - httpResponse.statusCode == 200 else { - throw HFAPIError.downloadFailed - } - - let totalBytes = response.expectedContentLength - var downloadedBytes: Int64 = 0 - - // Create parent directory if needed - try? FileManager.default.createDirectory( - at: destination.deletingLastPathComponent(), - withIntermediateDirectories: true - ) - - // Remove existing file - try? FileManager.default.removeItem(at: destination) - - // Create destination file before opening file handle - FileManager.default.createFile(atPath: destination.path, contents: nil) - - // Write file - let fileHandle = try FileHandle(forWritingTo: destination) - defer { try? fileHandle.close() } - - var lastProgressUpdate = Date() - - var buffer = Data(capacity: 65_536) - - for try await byte in asyncBytes { - buffer.append(byte) - downloadedBytes += 1 - - if buffer.count >= 65_536 { - fileHandle.write(buffer) - buffer.removeAll(keepingCapacity: true) - } + try await enforceNetworkPolicy() - // Update progress every 100ms - if totalBytes > 0, - Date().timeIntervalSince(lastProgressUpdate) > 0.1 { - let progress = Double(downloadedBytes) / Double(totalBytes) - progressHandler(min(progress, 1.0)) - lastProgressUpdate = Date() - } - } + var attempts = 0 + let maxAttempts = 3 + + while true { + do { + var request = URLRequest(url: url) + request.setValue("application/octet-stream", forHTTPHeaderField: "Accept") + + if let token = getAuthToken() { + request.setValue("Bearer \(token)", forHTTPHeaderField: "Authorization") + } + + // Create parent directory if needed + try? FileManager.default.createDirectory( + at: destination.deletingLastPathComponent(), + withIntermediateDirectories: true + ) - if !buffer.isEmpty { - fileHandle.write(buffer) + var existingBytes: Int64 = 0 + if FileManager.default.fileExists(atPath: destination.path) { + let attrs = try? FileManager.default.attributesOfItem(atPath: destination.path) + existingBytes = attrs?[.size] as? Int64 ?? 0 + } else { + FileManager.default.createFile(atPath: destination.path, contents: nil) + } + + if existingBytes > 0 { + request.setValue("bytes=\(existingBytes)-", forHTTPHeaderField: "Range") + } + + let (asyncBytes, response) = try await session.bytes(for: request) + + guard let httpResponse = response as? HTTPURLResponse, + [200, 206].contains(httpResponse.statusCode) else { + throw HFAPIError.downloadFailed + } + + let totalBytes = response.expectedContentLength > 0 + ? response.expectedContentLength + existingBytes + : response.expectedContentLength + var downloadedBytes: Int64 = existingBytes + + let fileHandle = try FileHandle(forWritingTo: destination) + defer { try? fileHandle.close() } + try fileHandle.seekToEnd() + + var lastProgressUpdate = Date() + var buffer = Data(capacity: 65_536) + + for try await byte in asyncBytes { + buffer.append(byte) + downloadedBytes += 1 + + if buffer.count >= 65_536 { + fileHandle.write(buffer) + buffer.removeAll(keepingCapacity: true) + } + + if totalBytes > 0, + Date().timeIntervalSince(lastProgressUpdate) > 0.1 { + let progress = Double(downloadedBytes) / Double(totalBytes) + progressHandler(min(progress, 1.0)) + lastProgressUpdate = Date() + } + } + + if !buffer.isEmpty { + fileHandle.write(buffer) + } + + progressHandler(1.0) + return + } catch { + attempts += 1 + if attempts >= maxAttempts { + throw error + } + try await Task.sleep(nanoseconds: UInt64(attempts) * 500_000_000) + } } - - progressHandler(1.0) } - + /// Get download URL for a specific file func getDownloadURL(modelId: String, filename: String) -> URL { URL(string: "https://huggingface.co/\(modelId)/resolve/main/\(filename)")! } - + // MARK: - Private Methods - + private func convertToHFModels(_ apiModels: [HFAPIModel]) async throws -> [HFModel] { var models: [HFModel] = [] - + for apiModel in apiModels { // Extract parameters from tags or model card let parameters = extractParameters(from: apiModel) - + // Detect architecture let architecture = detectArchitecture(from: apiModel) - + // Get model size from siblings let sizeBytes = apiModel.siblings?.reduce(0) { total, sibling in - // Estimate based on file extensions if sibling.rfilename.hasSuffix(".safetensors") || sibling.rfilename.hasSuffix(".bin") { - return total + 500_000_000 // Rough estimate + return total + Int(sibling.size ?? 0) } return total } ?? 0 - + // Get primary download URL let downloadURL = apiModel.siblings?.first { sibling in sibling.rfilename.hasSuffix("model.safetensors") || @@ -270,7 +292,7 @@ class HuggingFaceAPI: ObservableObject { }.flatMap { sibling in URL(string: "https://huggingface.co/\(apiModel.id)/resolve/main/\(sibling.rfilename)") } - + let model = HFModel( modelId: apiModel.id, name: apiModel.modelId.components(separatedBy: "/").last ?? apiModel.modelId, @@ -284,13 +306,13 @@ class HuggingFaceAPI: ObservableObject { downloads: apiModel.downloads, likes: apiModel.likes ) - + models.append(model) } - + return models } - + private func extractParameters(from model: HFAPIModel) -> String { // Try to extract from tags for tag in model.tags { @@ -301,7 +323,7 @@ class HuggingFaceAPI: ObservableObject { } } } - + // Try to extract from model name let name = model.modelId.lowercased() let patterns = [ @@ -310,7 +332,7 @@ class HuggingFaceAPI: ObservableObject { "-(\\d+)b", "_(\\d+)b" ] - + for pattern in patterns { if let regex = try? NSRegularExpression(pattern: pattern, options: []), let match = regex.firstMatch(in: name, options: [], range: NSRange(location: 0, length: name.utf16.count)), @@ -319,14 +341,14 @@ class HuggingFaceAPI: ObservableObject { return "\(value)B" } } - + return "Unknown" } - + private func detectArchitecture(from model: HFAPIModel) -> ModelArchitecture { let tags = model.tags.map { $0.lowercased() } let id = model.id.lowercased() - + if tags.contains("llama") || id.contains("llama") { return .llama } else if tags.contains("mistral") || id.contains("mistral") { @@ -344,17 +366,43 @@ class HuggingFaceAPI: ObservableObject { } else if tags.contains("bert") || id.contains("bert") { return .bert } - + return .custom } - + func setAuthToken(_ token: String?) { - if let token = token { - UserDefaults.standard.set(token, forKey: "hf_auth_token") - } else { + KeychainTokenStore.writeToken(token) + } + + func getAuthToken() -> String? { + if let keychain = KeychainTokenStore.readToken() { + return keychain + } + // One-time migration from old UserDefaults storage + if let legacy = UserDefaults.standard.string(forKey: "hf_auth_token"), !legacy.isEmpty { + KeychainTokenStore.writeToken(legacy) UserDefaults.standard.removeObject(forKey: "hf_auth_token") + return legacy + } + return nil + } + + private func enforceNetworkPolicy() async throws { + let wifiOnly = UserDefaults.standard.object(forKey: "wifi_only") as? Bool ?? true + guard wifiOnly else { return } + + let monitor = NWPathMonitor() + let queue = DispatchQueue(label: "hf.network.policy") + let isWifi = await withCheckedContinuation { continuation in + monitor.pathUpdateHandler = { path in + let ok = path.status == .satisfied && path.usesInterfaceType(.wifi) + continuation.resume(returning: ok) + monitor.cancel() + } + monitor.start(queue: queue) } + guard isWifi else { throw HFAPIError.downloadFailed } } } @@ -365,7 +413,7 @@ struct ModelFilter { var sortBy: SortOption = .downloads var task: String? var library: String? - + enum SortOption: String { case downloads = "downloads" case likes = "likes" @@ -391,13 +439,13 @@ struct ModelDetails: Codable { let pipeline_tag: String? let cardData: ModelCardData? let config: ModelConfig? - + struct ModelCardData: Codable { let description: String? let license: String? let language: [String]? } - + struct ModelConfig: Codable { let architectures: [String]? let model_type: String? @@ -413,7 +461,7 @@ enum HFAPIError: Error, LocalizedError { case httpError(statusCode: Int) case downloadFailed case invalidData - + var errorDescription: String? { switch self { case .invalidURL: diff --git a/ModelQuantizer/Services/KeychainTokenStore.swift b/ModelQuantizer/Services/KeychainTokenStore.swift new file mode 100644 index 0000000..26c9635 --- /dev/null +++ b/ModelQuantizer/Services/KeychainTokenStore.swift @@ -0,0 +1,48 @@ +import Foundation +import Security + +enum KeychainTokenStore { + private static let service = "com.modelquantizer.hf" + private static let account = "hf_auth_token" + + static func readToken() -> String? { + let query: [String: Any] = [ + kSecClass as String: kSecClassGenericPassword, + kSecAttrService as String: service, + kSecAttrAccount as String: account, + kSecReturnData as String: true, + kSecMatchLimit as String: kSecMatchLimitOne + ] + + var item: CFTypeRef? + let status = SecItemCopyMatching(query as CFDictionary, &item) + guard status == errSecSuccess, + let data = item as? Data, + let token = String(data: data, encoding: .utf8), + !token.isEmpty else { + return nil + } + return token + } + + static func writeToken(_ token: String?) { + let deleteQuery: [String: Any] = [ + kSecClass as String: kSecClassGenericPassword, + kSecAttrService as String: service, + kSecAttrAccount as String: account + ] + SecItemDelete(deleteQuery as CFDictionary) + + guard let token, !token.isEmpty, + let data = token.data(using: .utf8) else { return } + + let addQuery: [String: Any] = [ + kSecClass as String: kSecClassGenericPassword, + kSecAttrService as String: service, + kSecAttrAccount as String: account, + kSecValueData as String: data, + kSecAttrAccessible as String: kSecAttrAccessibleAfterFirstUnlockThisDeviceOnly + ] + SecItemAdd(addQuery as CFDictionary, nil) + } +} diff --git a/ModelQuantizer/Services/SettingsSuggester.swift b/ModelQuantizer/Services/SettingsSuggester.swift index dfa7e0f..e2e02e1 100644 --- a/ModelQuantizer/Services/SettingsSuggester.swift +++ b/ModelQuantizer/Services/SettingsSuggester.swift @@ -235,7 +235,7 @@ class SettingsSuggester { estimatedMemoryUsage: estimatedMemoryUsage, estimatedLoadTime: estimatedLoadTime, recommendedBatchSize: settings.batchSize, - canUseGPU: settings.useGPU && deviceClass.rawValue >= DeviceCapabilityProfile.DeviceClass.midRange.rawValue, + canUseGPU: settings.useGPU && deviceSupportsGPU(deviceClass), canUseNeuralEngine: settings.useNeuralEngine && profile.neuralEngineCores > 0 ) } @@ -282,6 +282,15 @@ class SettingsSuggester { description: "Memory-optimized settings for large model" ) } + + private func deviceSupportsGPU(_ deviceClass: DeviceCapabilityProfile.DeviceClass) -> Bool { + switch deviceClass { + case .entryLevel: + return false + case .midRange, .highEnd, .flagship, .ultra: + return true + } + } private func adjustForThermalState( original: QuantizationRecommendation, diff --git a/ModelQuantizer/ViewModels/QuantizeViewModel.swift b/ModelQuantizer/ViewModels/QuantizeViewModel.swift index e82d937..678f326 100644 --- a/ModelQuantizer/ViewModels/QuantizeViewModel.swift +++ b/ModelQuantizer/ViewModels/QuantizeViewModel.swift @@ -87,7 +87,31 @@ class QuantizeViewModel: ObservableObject { } private func updateProgress(from status: QuantizationStatus) { - // Progress is now directly from the quantizer + switch status { + case .idle: + progress = 0 + currentStage = "" + case .downloading(let value): + progress = value + currentStage = "Downloading" + case .analyzing: + progress = max(progress, 0.30) + currentStage = "Analyzing" + case .quantizing(let value, let stage): + progress = value + currentStage = stage + case .optimizing: + progress = max(progress, 0.95) + currentStage = "Optimizing" + case .validating: + progress = max(progress, 0.97) + currentStage = "Validating" + case .completed: + progress = 1.0 + currentStage = "Completed" + case .failed(let error): + currentStage = error + } } private func updateDeviceProfile() { @@ -107,87 +131,7 @@ class QuantizeViewModel: ObservableObject { } private func loadPopularModels() { - // Load a curated list of popular models while we fetch from API - models = [ - HFModel( - modelId: "microsoft/Phi-3-mini-4k-instruct", - name: "Phi-3 Mini 4K", - description: "Microsoft's efficient 3.8B parameter model with excellent performance", - parameters: "3.8B", - architecture: .phi, - downloadURL: URL(string: "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/resolve/main/model.safetensors"), - sizeBytes: 7_600_000_000, - recommendedContextLength: 4096, - tags: ["instruct", "chat", "efficient"], - downloads: 2_500_000, - likes: 8500 - ), - HFModel( - modelId: "meta-llama/Meta-Llama-3.1-8B-Instruct", - name: "Llama 3.1 8B Instruct", - description: "Meta's latest 8B parameter instruction-tuned model", - parameters: "8B", - architecture: .llama, - downloadURL: URL(string: "https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct/resolve/main/model.safetensors"), - sizeBytes: 16_000_000_000, - recommendedContextLength: 8192, - tags: ["instruct", "chat", "meta"], - downloads: 5_000_000, - likes: 15000 - ), - HFModel( - modelId: "mistralai/Mistral-7B-Instruct-v0.3", - name: "Mistral 7B Instruct v0.3", - description: "Mistral's powerful 7B instruction model", - parameters: "7B", - architecture: .mistral, - downloadURL: URL(string: "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3/resolve/main/model.safetensors"), - sizeBytes: 14_000_000_000, - recommendedContextLength: 32768, - tags: ["instruct", "chat", "long-context"], - downloads: 8_000_000, - likes: 22000 - ), - HFModel( - modelId: "google/gemma-2-2b-it", - name: "Gemma 2 2B IT", - description: "Google's lightweight 2B instruction model", - parameters: "2B", - architecture: .gemma, - downloadURL: URL(string: "https://huggingface.co/google/gemma-2-2b-it/resolve/main/model.safetensors"), - sizeBytes: 4_000_000_000, - recommendedContextLength: 8192, - tags: ["instruct", "chat", "lightweight"], - downloads: 1_200_000, - likes: 5600 - ), - HFModel( - modelId: "Qwen/Qwen2.5-7B-Instruct", - name: "Qwen2.5 7B Instruct", - description: "Alibaba's Qwen2.5 with improved reasoning", - parameters: "7B", - architecture: .qwen2, - downloadURL: URL(string: "https://huggingface.co/Qwen/Qwen2.5-7B-Instruct/resolve/main/model.safetensors"), - sizeBytes: 15_000_000_000, - recommendedContextLength: 32768, - tags: ["instruct", "chat", "multilingual"], - downloads: 3_000_000, - likes: 9800 - ), - HFModel( - modelId: "HuggingFaceTB/SmolLM2-1.7B-Instruct", - name: "SmolLM2 1.7B Instruct", - description: "Hugging Face's tiny but capable model", - parameters: "1.7B", - architecture: .llama, - downloadURL: URL(string: "https://huggingface.co/HuggingFaceTB/SmolLM2-1.7B-Instruct/resolve/main/model.safetensors"), - sizeBytes: 3_400_000_000, - recommendedContextLength: 8192, - tags: ["instruct", "chat", "tiny"], - downloads: 800_000, - likes: 4200 - ) - ] + models = ModelCatalog.curatedModels filteredModels = models @@ -295,7 +239,7 @@ class QuantizeViewModel: ObservableObject { guard let model = selectedModel else { return } // Check if model requires authentication - if model.modelId.hasPrefix("meta-llama/") && HuggingFaceAPI.shared.getAuthToken() == nil { + if model.modelId.hasPrefix("meta-llama/") && hfAPI.getAuthToken() == nil { errorMessage = "This model requires Hugging Face authentication. Please add your token in Settings." showError = true return @@ -364,11 +308,3 @@ class QuantizeViewModel: ObservableObject { } } } - -// MARK: - Hugging Face API Token Extension - -extension HuggingFaceAPI { - func getAuthToken() -> String? { - UserDefaults.standard.string(forKey: "hf_auth_token") - } -} diff --git a/ModelQuantizer/Views/ModelDownloadView.swift b/ModelQuantizer/Views/ModelDownloadView.swift index af7e12e..721ce5b 100644 --- a/ModelQuantizer/Views/ModelDownloadView.swift +++ b/ModelQuantizer/Views/ModelDownloadView.swift @@ -665,9 +665,9 @@ enum ModelCategory: String, CaseIterable, Identifiable { case .chat: return "bubble.left.and.bubble.right" case .code: return "chevron.left.forwardslash.chevron.right" case .instruct: return "text.bubble" - case .llama: return " Llama" - case .mistral: return "Mistral" - case .qwen: return "Qwen" + case .llama: return "flame" + case .mistral: return "wind" + case .qwen: return "globe.asia.australia" } } @@ -697,104 +697,7 @@ class ModelDownloadViewModel: ObservableObject { } private func loadModels() { - models = [ - HFModel( - modelId: "microsoft/Phi-3-mini-4k-instruct", - name: "Phi-3 Mini 4K", - description: "Microsoft's efficient 3.8B parameter model with excellent performance for its size", - parameters: "3.8B", - architecture: .phi, - sizeBytes: 2_400_000_000, - recommendedContextLength: 4096, - tags: ["instruct", "chat", "efficient"], - downloads: 2_500_000, - likes: 8500 - ), - HFModel( - modelId: "meta-llama/Meta-Llama-3.1-8B-Instruct", - name: "Llama 3.1 8B Instruct", - description: "Meta's latest 8B parameter instruction-tuned model with improved reasoning", - parameters: "8B", - architecture: .llama, - sizeBytes: 16_000_000_000, - recommendedContextLength: 8192, - tags: ["instruct", "chat", "meta"], - downloads: 5_000_000, - likes: 15000 - ), - HFModel( - modelId: "mistralai/Mistral-7B-Instruct-v0.3", - name: "Mistral 7B Instruct v0.3", - description: "Mistral's powerful 7B instruction model with 32K context support", - parameters: "7B", - architecture: .mistral, - sizeBytes: 14_000_000_000, - recommendedContextLength: 32768, - tags: ["instruct", "chat", "long-context"], - downloads: 8_000_000, - likes: 22000 - ), - HFModel( - modelId: "google/gemma-2-2b-it", - name: "Gemma 2 2B IT", - description: "Google's lightweight 2B instruction model, great for mobile devices", - parameters: "2B", - architecture: .gemma, - sizeBytes: 1_600_000_000, - recommendedContextLength: 8192, - tags: ["instruct", "chat", "lightweight"], - downloads: 1_200_000, - likes: 5600 - ), - HFModel( - modelId: "Qwen/Qwen2.5-7B-Instruct", - name: "Qwen2.5 7B Instruct", - description: "Alibaba's Qwen2.5 with improved reasoning and multilingual support", - parameters: "7B", - architecture: .qwen2, - sizeBytes: 15_000_000_000, - recommendedContextLength: 32768, - tags: ["instruct", "chat", "multilingual"], - downloads: 3_000_000, - likes: 9800 - ), - HFModel( - modelId: "HuggingFaceTB/SmolLM2-1.7B-Instruct", - name: "SmolLM2 1.7B Instruct", - description: "Hugging Face's tiny but capable model, perfect for edge devices", - parameters: "1.7B", - architecture: .llama, - sizeBytes: 3_400_000_000, - recommendedContextLength: 8192, - tags: ["instruct", "chat", "tiny"], - downloads: 800_000, - likes: 4200 - ), - HFModel( - modelId: "codellama/CodeLlama-7b-Instruct-hf", - name: "CodeLlama 7B Instruct", - description: "Meta's code-specialized model for programming tasks", - parameters: "7B", - architecture: .llama, - sizeBytes: 13_000_000_000, - recommendedContextLength: 16384, - tags: ["code", "instruct", "programming"], - downloads: 4_500_000, - likes: 12000 - ), - HFModel( - modelId: "deepseek-ai/deepseek-coder-6.7b-instruct", - name: "DeepSeek Coder 6.7B", - description: "DeepSeek's code model with strong performance on coding benchmarks", - parameters: "6.7B", - architecture: .llama, - sizeBytes: 13_400_000_000, - recommendedContextLength: 16384, - tags: ["code", "instruct", "programming"], - downloads: 2_000_000, - likes: 7500 - ) - ] + models = ModelCatalog.curatedModels featuredModels = Array(models.prefix(4)) } diff --git a/ModelQuantizer/Views/QuantizeView.swift b/ModelQuantizer/Views/QuantizeView.swift index edba13e..9b99b96 100644 --- a/ModelQuantizer/Views/QuantizeView.swift +++ b/ModelQuantizer/Views/QuantizeView.swift @@ -226,7 +226,8 @@ struct QuantizeView: View { // Quantize button Button(action: { - if model.modelId.hasPrefix("meta-llama/") { + if model.modelId.hasPrefix("meta-llama/") && + HuggingFaceAPI.shared.getAuthToken() == nil { showingAuthAlert = true } else { showingQuantizationSheet = true diff --git a/ModelQuantizer/Views/SettingsView.swift b/ModelQuantizer/Views/SettingsView.swift index 544c926..0d61ab9 100644 --- a/ModelQuantizer/Views/SettingsView.swift +++ b/ModelQuantizer/Views/SettingsView.swift @@ -8,7 +8,6 @@ import SwiftUI struct SettingsView: View { - @AppStorage("hf_auth_token") private var authToken = "" @AppStorage("auto_quantize") private var autoQuantize = false @AppStorage("default_quantization") private var defaultQuantization = "Q4_K_M" @AppStorage("save_history") private var saveHistory = true @@ -17,6 +16,7 @@ struct SettingsView: View { @State private var showingTokenInfo = false @State private var showingClearConfirmation = false @State private var cacheSize: Int64 = 0 + @State private var authToken = "" var body: some View { ScrollView { @@ -41,8 +41,12 @@ struct SettingsView: View { .padding() } .onAppear { + authToken = HuggingFaceAPI.shared.getAuthToken() ?? "" calculateCacheSize() } + .onChange(of: authToken) { newValue in + HuggingFaceAPI.shared.setAuthToken(newValue) + } .alert("Hugging Face Token", isPresented: $showingTokenInfo) { Button("OK", role: .cancel) {} } message: { From 865ddd4ea3a14418963cde976a4785fcd5d229f9 Mon Sep 17 00:00:00 2001 From: bobbytatum999 Date: Fri, 3 Apr 2026 20:02:34 -0500 Subject: [PATCH 3/8] feat: continue hardening with checksums, richer metadata, and job telemetry fields --- ModelQuantizer/Models/ModelTypes.swift | 2 + ModelQuantizer/Services/HuggingFaceAPI.swift | 29 ++++++++ ModelQuantizer/Services/ModelQuantizer.swift | 4 +- .../Services/QuantizationEngine.swift | 70 ++++++++++++++++--- 4 files changed, 94 insertions(+), 11 deletions(-) diff --git a/ModelQuantizer/Models/ModelTypes.swift b/ModelQuantizer/Models/ModelTypes.swift index 7b8b43c..6b6eb3f 100644 --- a/ModelQuantizer/Models/ModelTypes.swift +++ b/ModelQuantizer/Models/ModelTypes.swift @@ -206,6 +206,8 @@ struct QuantizationJob: Codable, Identifiable { let startTime: Date let endTime: Date let contextLength: Int + let estimatedTokensPerSecond: Double? + let validationScore: Double? var duration: TimeInterval { return endTime.timeIntervalSince(startTime) diff --git a/ModelQuantizer/Services/HuggingFaceAPI.swift b/ModelQuantizer/Services/HuggingFaceAPI.swift index 96334ce..73cb71b 100644 --- a/ModelQuantizer/Services/HuggingFaceAPI.swift +++ b/ModelQuantizer/Services/HuggingFaceAPI.swift @@ -8,6 +8,7 @@ import Foundation import Combine import Network +import CryptoKit /// Hugging Face API Service for model search and metadata class HuggingFaceAPI: ObservableObject { @@ -33,12 +34,14 @@ class HuggingFaceAPI: ObservableObject { func searchModels( query: String, limit: Int = 50, + offset: Int = 0, filter: ModelFilter = ModelFilter() ) async throws -> [HFModel] { var components = URLComponents(string: "\(baseURL)/models")! var queryItems: [URLQueryItem] = [ URLQueryItem(name: "limit", value: "\(limit)"), + URLQueryItem(name: "offset", value: "\(offset)"), URLQueryItem(name: "full", value: "true"), URLQueryItem(name: "config", value: "true") ] @@ -196,9 +199,13 @@ class HuggingFaceAPI: ObservableObject { ) var existingBytes: Int64 = 0 + var hasher = SHA256() if FileManager.default.fileExists(atPath: destination.path) { let attrs = try? FileManager.default.attributesOfItem(atPath: destination.path) existingBytes = attrs?[.size] as? Int64 ?? 0 + if existingBytes > 0, let existingData = try? Data(contentsOf: destination) { + hasher.update(data: existingData) + } } else { FileManager.default.createFile(atPath: destination.path, contents: nil) } @@ -213,6 +220,7 @@ class HuggingFaceAPI: ObservableObject { [200, 206].contains(httpResponse.statusCode) else { throw HFAPIError.downloadFailed } + let expectedChecksum = expectedSHA256(from: httpResponse) let totalBytes = response.expectedContentLength > 0 ? response.expectedContentLength + existingBytes @@ -232,6 +240,7 @@ class HuggingFaceAPI: ObservableObject { if buffer.count >= 65_536 { fileHandle.write(buffer) + hasher.update(data: buffer) buffer.removeAll(keepingCapacity: true) } @@ -245,6 +254,14 @@ class HuggingFaceAPI: ObservableObject { if !buffer.isEmpty { fileHandle.write(buffer) + hasher.update(data: buffer) + } + + if let expectedChecksum { + let digest = hasher.finalize().map { String(format: "%02x", $0) }.joined() + guard digest.lowercased() == expectedChecksum.lowercased() else { + throw HFAPIError.invalidData + } } progressHandler(1.0) @@ -404,6 +421,18 @@ class HuggingFaceAPI: ObservableObject { } guard isWifi else { throw HFAPIError.downloadFailed } } + + private func expectedSHA256(from response: HTTPURLResponse) -> String? { + if let checksum = response.value(forHTTPHeaderField: "x-checksum-sha256") { + return checksum.replacingOccurrences(of: "\"", with: "") + } + if let etag = response.value(forHTTPHeaderField: "x-linked-etag") ?? + response.value(forHTTPHeaderField: "etag"), + let range = etag.range(of: "sha256:") { + return String(etag[range.upperBound...]).replacingOccurrences(of: "\"", with: "") + } + return nil + } } // MARK: - Supporting Types diff --git a/ModelQuantizer/Services/ModelQuantizer.swift b/ModelQuantizer/Services/ModelQuantizer.swift index f241c65..156bbdb 100644 --- a/ModelQuantizer/Services/ModelQuantizer.swift +++ b/ModelQuantizer/Services/ModelQuantizer.swift @@ -117,7 +117,9 @@ class ModelQuantizer: ObservableObject { outputSize: (try? fileManager.attributesOfItem(atPath: outputURL.path)[.size] as? Int64) ?? 0, startTime: startTime, endTime: Date(), - contextLength: contextLength ?? model.recommendedContextLength + contextLength: contextLength ?? model.recommendedContextLength, + estimatedTokensPerSecond: nil, + validationScore: nil ) quantizationHistory.insert(job, at: 0) diff --git a/ModelQuantizer/Services/QuantizationEngine.swift b/ModelQuantizer/Services/QuantizationEngine.swift index 9aecb3d..8246be3 100644 --- a/ModelQuantizer/Services/QuantizationEngine.swift +++ b/ModelQuantizer/Services/QuantizationEngine.swift @@ -116,7 +116,9 @@ class QuantizationEngine: ObservableObject { outputSize: (try? fileManager.attributesOfItem(atPath: quantizedURL.path)[.size] as? Int64) ?? 0, startTime: startTime, endTime: Date(), - contextLength: contextLength + contextLength: contextLength, + estimatedTokensPerSecond: nil, + validationScore: nil ) await MainActor.run { @@ -206,6 +208,13 @@ class QuantizationEngine: ObservableObject { let tensorCount: Int let totalParameters: Int64 let originalSize: Int64 + let contextLength: Int + let embeddingLength: Int + let feedForwardLength: Int + let headCount: Int + let headCountKV: Int + let rmsEpsilon: Float + let ropeDimensionCount: Int } private func analyzeModel(files: [URL], model: HFModel) async throws -> ModelAnalysis { @@ -216,6 +225,13 @@ class QuantizationEngine: ObservableObject { var tensorCount = 0 var totalParameters: Int64 = 0 var totalSize: Int64 = 0 + var contextLength = model.recommendedContextLength + var embeddingLength = 4096 + var feedForwardLength = 11008 + var headCount = 32 + var headCountKV = 32 + var rmsEpsilon: Float = 1e-5 + var ropeDimensionCount = 128 // Analyze safetensors files for file in files where file.pathExtension == "safetensors" { @@ -247,6 +263,33 @@ class QuantizationEngine: ObservableObject { else if normalized.contains("gpt") { architecture = .gpt2 } else if normalized.contains("bert") { architecture = .bert } } + if let context = config["max_position_embeddings"] as? Int { + contextLength = context + } + if let hidden = config["hidden_size"] as? Int { + embeddingLength = hidden + } + if let ff = config["intermediate_size"] as? Int { + feedForwardLength = ff + } + if let heads = config["num_attention_heads"] as? Int { + headCount = heads + } + if let kvHeads = config["num_key_value_heads"] as? Int { + headCountKV = kvHeads + } else { + headCountKV = headCount + } + if let eps = config["rms_norm_eps"] as? Double { + rmsEpsilon = Float(eps) + } else if let eps = config["layer_norm_epsilon"] as? Double { + rmsEpsilon = Float(eps) + } + if let ropeDim = config["rope_dim"] as? Int { + ropeDimensionCount = ropeDim + } else if headCount > 0 { + ropeDimensionCount = max(32, embeddingLength / headCount) + } } } @@ -260,7 +303,14 @@ class QuantizationEngine: ObservableObject { layerCount: layerCount, tensorCount: tensorCount, totalParameters: totalParameters, - originalSize: totalSize + originalSize: totalSize, + contextLength: contextLength, + embeddingLength: embeddingLength, + feedForwardLength: feedForwardLength, + headCount: headCount, + headCountKV: headCountKV, + rmsEpsilon: rmsEpsilon, + ropeDimensionCount: ropeDimensionCount ) } @@ -368,15 +418,15 @@ class QuantizationEngine: ObservableObject { } private func addArchitectureMetadata(to builder: inout GGUFBuilder, analysis: ModelAnalysis) { - // Add context length - builder.addMetadata(key: "\(analysis.architecture.rawValue.lowercased()).context_length", value: .uint32(4096)) - builder.addMetadata(key: "\(analysis.architecture.rawValue.lowercased()).embedding_length", value: .uint32(4096)) + let arch = analysis.architecture.rawValue.lowercased() + builder.addMetadata(key: "\(arch).context_length", value: .uint32(UInt32(max(256, analysis.contextLength)))) + builder.addMetadata(key: "\(arch).embedding_length", value: .uint32(UInt32(max(1, analysis.embeddingLength)))) builder.addMetadata(key: "\(analysis.architecture.rawValue.lowercased()).block_count", value: .uint32(UInt32(analysis.layerCount))) - builder.addMetadata(key: "\(analysis.architecture.rawValue.lowercased()).feed_forward_length", value: .uint32(11008)) - builder.addMetadata(key: "\(analysis.architecture.rawValue.lowercased()).attention.head_count", value: .uint32(32)) - builder.addMetadata(key: "\(analysis.architecture.rawValue.lowercased()).attention.head_count_kv", value: .uint32(32)) - builder.addMetadata(key: "\(analysis.architecture.rawValue.lowercased()).attention.layer_norm_rms_epsilon", value: .float32(1e-5)) - builder.addMetadata(key: "\(analysis.architecture.rawValue.lowercased()).rope.dimension_count", value: .uint32(128)) + builder.addMetadata(key: "\(arch).feed_forward_length", value: .uint32(UInt32(max(1, analysis.feedForwardLength)))) + builder.addMetadata(key: "\(arch).attention.head_count", value: .uint32(UInt32(max(1, analysis.headCount)))) + builder.addMetadata(key: "\(arch).attention.head_count_kv", value: .uint32(UInt32(max(1, analysis.headCountKV)))) + builder.addMetadata(key: "\(arch).attention.layer_norm_rms_epsilon", value: .float32(analysis.rmsEpsilon)) + builder.addMetadata(key: "\(arch).rope.dimension_count", value: .uint32(UInt32(max(1, analysis.ropeDimensionCount)))) } private func processSafeTensorsFile(_ url: URL, into builder: inout GGUFBuilder) async throws { From 686a9ea9d8b8838bc6479198be000d821058b338 Mon Sep 17 00:00:00 2001 From: bobbytatum999 Date: Fri, 3 Apr 2026 20:06:30 -0500 Subject: [PATCH 4/8] fix: harden resumable downloads and improve HF error surfacing --- ModelQuantizer/Services/HuggingFaceAPI.swift | 39 ++++++++++++++++--- .../ViewModels/QuantizeViewModel.swift | 2 +- 2 files changed, 35 insertions(+), 6 deletions(-) diff --git a/ModelQuantizer/Services/HuggingFaceAPI.swift b/ModelQuantizer/Services/HuggingFaceAPI.swift index 73cb71b..c3131f5 100644 --- a/ModelQuantizer/Services/HuggingFaceAPI.swift +++ b/ModelQuantizer/Services/HuggingFaceAPI.swift @@ -203,8 +203,14 @@ class HuggingFaceAPI: ObservableObject { if FileManager.default.fileExists(atPath: destination.path) { let attrs = try? FileManager.default.attributesOfItem(atPath: destination.path) existingBytes = attrs?[.size] as? Int64 ?? 0 - if existingBytes > 0, let existingData = try? Data(contentsOf: destination) { - hasher.update(data: existingData) + if existingBytes > 0, + let existingHandle = try? FileHandle(forReadingFrom: destination) { + defer { try? existingHandle.close() } + while true { + let chunk = try existingHandle.read(upToCount: 65_536) ?? Data() + if chunk.isEmpty { break } + hasher.update(data: chunk) + } } } else { FileManager.default.createFile(atPath: destination.path, contents: nil) @@ -220,6 +226,12 @@ class HuggingFaceAPI: ObservableObject { [200, 206].contains(httpResponse.statusCode) else { throw HFAPIError.downloadFailed } + if existingBytes > 0 && httpResponse.statusCode == 200 { + try? FileManager.default.removeItem(at: destination) + FileManager.default.createFile(atPath: destination.path, contents: nil) + existingBytes = 0 + hasher = SHA256() + } let expectedChecksum = expectedSHA256(from: httpResponse) let totalBytes = response.expectedContentLength > 0 @@ -412,14 +424,28 @@ class HuggingFaceAPI: ObservableObject { let monitor = NWPathMonitor() let queue = DispatchQueue(label: "hf.network.policy") let isWifi = await withCheckedContinuation { continuation in + let lock = NSLock() + var resolved = false + func resolve(_ value: Bool) { + lock.lock() + defer { lock.unlock() } + guard !resolved else { return } + resolved = true + continuation.resume(returning: value) + monitor.cancel() + } + let timeoutTask = DispatchWorkItem { + resolve(false) + } + queue.asyncAfter(deadline: .now() + 2.0, execute: timeoutTask) monitor.pathUpdateHandler = { path in + timeoutTask.cancel() let ok = path.status == .satisfied && path.usesInterfaceType(.wifi) - continuation.resume(returning: ok) - monitor.cancel() + resolve(ok) } monitor.start(queue: queue) } - guard isWifi else { throw HFAPIError.downloadFailed } + guard isWifi else { throw HFAPIError.networkPolicyViolation } } private func expectedSHA256(from response: HTTPURLResponse) -> String? { @@ -490,6 +516,7 @@ enum HFAPIError: Error, LocalizedError { case httpError(statusCode: Int) case downloadFailed case invalidData + case networkPolicyViolation var errorDescription: String? { switch self { @@ -507,6 +534,8 @@ enum HFAPIError: Error, LocalizedError { return "Failed to download model file" case .invalidData: return "Invalid data received" + case .networkPolicyViolation: + return "Wi-Fi only downloads is enabled. Connect to Wi-Fi to continue." } } } diff --git a/ModelQuantizer/ViewModels/QuantizeViewModel.swift b/ModelQuantizer/ViewModels/QuantizeViewModel.swift index 678f326..57a7958 100644 --- a/ModelQuantizer/ViewModels/QuantizeViewModel.swift +++ b/ModelQuantizer/ViewModels/QuantizeViewModel.swift @@ -192,7 +192,7 @@ class QuantizeViewModel: ObservableObject { self.filterLocalModels(query: query) } catch let error as HFAPIError { - self.errorMessage = "Rate limit reached. Please try again later." + self.errorMessage = error.errorDescription ?? "Search failed." self.showError = true } catch { // Don't show error for search failures - local results are still available From 83f43d526cc16884a54d26919af11598a8ce1f0a Mon Sep 17 00:00:00 2001 From: bobbytatum999 Date: Fri, 3 Apr 2026 20:08:17 -0500 Subject: [PATCH 5/8] refactor: make ModelQuantizer a facade over QuantizationEngine --- ModelQuantizer/Services/ModelQuantizer.swift | 303 +++---------------- 1 file changed, 39 insertions(+), 264 deletions(-) diff --git a/ModelQuantizer/Services/ModelQuantizer.swift b/ModelQuantizer/Services/ModelQuantizer.swift index 156bbdb..4f905fd 100644 --- a/ModelQuantizer/Services/ModelQuantizer.swift +++ b/ModelQuantizer/Services/ModelQuantizer.swift @@ -2,298 +2,73 @@ // ModelQuantizer.swift // ModelQuantizer // -// Created by AI Assistant on 2026-03-31. +// Compatibility facade over QuantizationEngine. // import Foundation -import Metal -import MetalPerformanceShaders -import Accelerate -import Compression +import Combine -/// Main model quantizer engine @MainActor -class ModelQuantizer: ObservableObject { +final class ModelQuantizer: ObservableObject { static let shared = ModelQuantizer() @Published var status: QuantizationStatus = .idle @Published var currentModel: HFModel? @Published var quantizationHistory: [QuantizationJob] = [] - private var quantizeTask: Task? - private let fileManager = FileManager.default - private let metalDevice: MTLDevice? - - private var modelsDirectory: URL { - let docs = fileManager.urls(for: .documentDirectory, in: .userDomainMask).first! - return docs.appendingPathComponent("Models", isDirectory: true) - } + private let engine = QuantizationEngine.shared + private var cancellables = Set() private init() { - self.metalDevice = MTLCreateSystemDefaultDevice() - createModelsDirectory() - loadHistory() + bindEngine() + refreshHistory() } - func quantize(model: HFModel, to quantization: QuantizationType, - contextLength: Int? = nil, useGPU: Bool = true) { - guard status == .idle else { return } - + func quantize( + model: HFModel, + to quantization: QuantizationType, + contextLength: Int? = nil, + useGPU: Bool = true + ) { currentModel = model - quantizeTask?.cancel() - - quantizeTask = Task { [weak self] in - await self?.performQuantization(model: model, quantization: quantization, - contextLength: contextLength, useGPU: useGPU) - } + engine.quantize( + model: model, + to: quantization, + contextLength: contextLength ?? model.recommendedContextLength, + useGPU: useGPU + ) } func cancel() { - quantizeTask?.cancel() - status = .idle + engine.cancel() } func getQuantizedModels() -> [QuantizedModel] { - guard let contents = try? fileManager.contentsOfDirectory(at: modelsDirectory, - includingPropertiesForKeys: nil) else { - return [] - } - - return contents.compactMap { url in - guard url.pathExtension == "gguf" else { return nil } - return try? QuantizedModel(from: url) - } + engine.getQuantizedModels() } func deleteQuantizedModel(_ model: QuantizedModel) { - try? fileManager.removeItem(at: model.url) - loadHistory() - } - - private func createModelsDirectory() { - try? fileManager.createDirectory(at: modelsDirectory, withIntermediateDirectories: true) - } - - private func loadHistory() { - if let data = UserDefaults.standard.data(forKey: "quantizationHistory"), - let history = try? JSONDecoder().decode([QuantizationJob].self, from: data) { - quantizationHistory = history - } - } - - private func saveHistory() { - if let data = try? JSONEncoder().encode(quantizationHistory) { - UserDefaults.standard.set(data, forKey: "quantizationHistory") - } - } - - private func performQuantization(model: HFModel, quantization: QuantizationType, - contextLength: Int?, useGPU: Bool) async { - let startTime = Date() - - do { - let modelURL = try await downloadModel(model) - status = .analyzing - let analysis = try await analyzeModel(at: modelURL) - let outputURL = modelsDirectory.appendingPathComponent("\(model.modelId)_\(quantization.rawValue).gguf") - - try await performActualQuantization( - inputURL: modelURL, - outputURL: outputURL, - analysis: analysis, - quantization: quantization, - contextLength: contextLength ?? model.recommendedContextLength, - useGPU: useGPU - ) - - status = .validating - try await validateQuantizedModel(at: outputURL) - - let job = QuantizationJob( - id: UUID(), - originalModel: model, - quantizationType: quantization, - outputURL: outputURL, - outputSize: (try? fileManager.attributesOfItem(atPath: outputURL.path)[.size] as? Int64) ?? 0, - startTime: startTime, - endTime: Date(), - contextLength: contextLength ?? model.recommendedContextLength, - estimatedTokensPerSecond: nil, - validationScore: nil - ) - - quantizationHistory.insert(job, at: 0) - saveHistory() - status = .completed(outputURL: outputURL) - } catch { - status = .failed(error: error.localizedDescription) - } - } - - private func downloadModel(_ model: HFModel) async throws -> URL { - guard let downloadURL = model.downloadURL else { - throw QuantizationError.noDownloadURL - } - - let destination = modelsDirectory.appendingPathComponent("\(model.modelId).tmp") - - if fileManager.fileExists(atPath: destination.path) { - let attrs = try fileManager.attributesOfItem(atPath: destination.path) - if let size = attrs[.size] as? Int64, size == model.sizeBytes { - return destination - } - } - - let session = URLSession(configuration: .default) - let (asyncBytes, response) = try await session.bytes(from: downloadURL) - let totalBytes = response.expectedContentLength - var downloadedBytes: Int64 = 0 - var lastProgress: Double = 0 - - try? fileManager.removeItem(at: destination) - fileManager.createFile(atPath: destination.path, contents: nil) - let fileHandle = try FileHandle(forWritingTo: destination) - defer { try? fileHandle.close() } - - var buffer = Data(capacity: 65_536) - - for try await byte in asyncBytes { - buffer.append(byte) - downloadedBytes += 1 - - if buffer.count >= 65_536 { - fileHandle.write(buffer) - buffer.removeAll(keepingCapacity: true) - } - - if totalBytes > 0 { - let currentProgress = Double(downloadedBytes) / Double(totalBytes) - if currentProgress - lastProgress > 0.01 { - lastProgress = currentProgress - status = .downloading(progress: currentProgress) + try? engine.deleteQuantizedModel(model) + refreshHistory() + } + + private func bindEngine() { + engine.$status + .receive(on: DispatchQueue.main) + .sink { [weak self] newStatus in + guard let self else { return } + self.status = newStatus + switch newStatus { + case .completed, .failed, .idle: + self.refreshHistory() + default: + break } } - } - - if !buffer.isEmpty { - fileHandle.write(buffer) - } - - return destination - } - - private struct ModelAnalysis { - let architecture: ModelArchitecture - let layerCount: Int - let tensorCount: Int - let totalParameters: Int64 - let originalSize: Int64 - } - - private func analyzeModel(at url: URL) async throws -> ModelAnalysis { - let data = try Data(contentsOf: url, options: .mappedIfSafe) - - var architecture: ModelArchitecture = .custom - var layerCount = 0 - var tensorCount = 0 - var totalParameters: Int64 = 0 - - if url.pathExtension == "safetensors" { - let analysis = try parseSafeTensors(data) - architecture = analysis.architecture - layerCount = analysis.layerCount - tensorCount = analysis.tensorCount - totalParameters = analysis.totalParameters - } else if url.pathExtension == "bin" { - let analysis = parsePyTorchBin(data) - architecture = analysis.architecture - layerCount = analysis.layerCount - tensorCount = analysis.tensorCount - totalParameters = analysis.totalParameters - } - - return ModelAnalysis( - architecture: architecture, - layerCount: layerCount, - tensorCount: tensorCount, - totalParameters: totalParameters, - originalSize: Int64(data.count) - ) - } - - private func parseSafeTensors(_ data: Data) throws -> ModelAnalysis { - var architecture: ModelArchitecture = .custom - var layerCount = 0 - var tensorCount = 0 - var totalParameters: Int64 = 0 - - let headerLength = data.prefix(8).withUnsafeBytes { $0.load(as: UInt64.self) } - let headerData = data.dropFirst(8).prefix(Int(headerLength)) - - if let header = try? JSONSerialization.jsonObject(with: headerData) as? [String: Any] { - let tensorNames = header.keys - if tensorNames.contains(where: { $0.contains("llama") || $0.contains("self_attn") }) { - architecture = .llama - } else if tensorNames.contains(where: { $0.contains("mistral") }) { - architecture = .mistral - } else if tensorNames.contains(where: { $0.contains("qwen") }) { - architecture = .qwen2 - } else if tensorNames.contains(where: { $0.contains("gemma") }) { - architecture = .gemma - } - - for (key, value) in header { - if let tensorInfo = value as? [String: Any], - let shape = tensorInfo["shape"] as? [Int] { - tensorCount += 1 - totalParameters += Int64(shape.reduce(1, *)) - - if key.contains("layers.") { - layerCount = max(layerCount, Int(key.components(separatedBy: "layers.").last?.components(separatedBy: ".").first ?? "0") ?? 0) - } - } - } - } - - return ModelAnalysis( - architecture: architecture, - layerCount: layerCount, - tensorCount: tensorCount, - totalParameters: totalParameters, - originalSize: Int64(data.count) - ) - } - - private func parsePyTorchBin(_ data: Data) -> ModelAnalysis { - ModelAnalysis( - architecture: .custom, - layerCount: 0, - tensorCount: 0, - totalParameters: 0, - originalSize: Int64(data.count) - ) - } - - private func performActualQuantization(inputURL: URL, outputURL: URL, - analysis: ModelAnalysis, quantization: QuantizationType, - contextLength: Int, useGPU: Bool) async throws { - status = .quantizing(progress: 0.1, stage: "Building GGUF") - var ggufBuilder = GGUFBuilder() - ggufBuilder.addMetadata(key: "general.architecture", value: .string(analysis.architecture.rawValue.lowercased())) - ggufBuilder.addMetadata(key: "general.name", value: .string(currentModel?.name ?? "Unknown")) - ggufBuilder.addMetadata(key: "general.quantization_version", value: .uint32(2)) - ggufBuilder.addMetadata(key: "general.file_type", value: .uint32(quantization.ggufFileType)) - - let ggufData = try ggufBuilder.build() - try ggufData.write(to: outputURL) - status = .quantizing(progress: 1.0, stage: "Complete") + .store(in: &cancellables) } - private func validateQuantizedModel(at url: URL) async throws { - let data = try Data(contentsOf: url, options: .mappedIfSafe) - let magic = data.prefix(4) - guard magic == Data("GGUF".utf8) else { - throw QuantizationError.invalidOutput - } + private func refreshHistory() { + quantizationHistory = engine.getQuantizationHistory() } } From 04d06c5423e17bbffbaf060011f55ac7d537240f Mon Sep 17 00:00:00 2001 From: bobbytatum999 Date: Fri, 3 Apr 2026 20:16:47 -0500 Subject: [PATCH 6/8] feat: add paginated HF search loading in quantize flow --- .../ViewModels/QuantizeViewModel.swift | 40 ++++++++++++++++++- ModelQuantizer/Views/QuantizeView.swift | 3 ++ 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/ModelQuantizer/ViewModels/QuantizeViewModel.swift b/ModelQuantizer/ViewModels/QuantizeViewModel.swift index 57a7958..d1f397e 100644 --- a/ModelQuantizer/ViewModels/QuantizeViewModel.swift +++ b/ModelQuantizer/ViewModels/QuantizeViewModel.swift @@ -36,6 +36,9 @@ class QuantizeViewModel: ObservableObject { // Search debounce private var searchTask: Task? private let searchDebounceInterval: TimeInterval = 0.5 + private var currentSearchOffset = 0 + private var hasMoreSearchResults = true + private var isLoadingMoreResults = false init() { setupBindings() @@ -146,6 +149,7 @@ class QuantizeViewModel: ObservableObject { let popularModels = try await hfAPI.searchModels( query: "", limit: 20, + offset: 0, filter: ModelFilter(sortBy: .downloads) ) @@ -173,6 +177,8 @@ class QuantizeViewModel: ObservableObject { searchTask = Task { @MainActor in isSearching = true defer { isSearching = false } + currentSearchOffset = 0 + hasMoreSearchResults = true // First, filter local models filterLocalModels(query: query) @@ -181,7 +187,8 @@ class QuantizeViewModel: ObservableObject { do { let apiModels = try await hfAPI.searchModels( query: query, - limit: 30 + limit: 30, + offset: 0 ) // Merge results, avoiding duplicates @@ -189,7 +196,10 @@ class QuantizeViewModel: ObservableObject { let newModels = apiModels.filter { !existingIds.contains($0.modelId) } self.models.append(contentsOf: newModels) + self.models = Array(self.models.prefix(300)) self.filterLocalModels(query: query) + self.currentSearchOffset = apiModels.count + self.hasMoreSearchResults = apiModels.count == 30 } catch let error as HFAPIError { self.errorMessage = error.errorDescription ?? "Search failed." @@ -200,6 +210,34 @@ class QuantizeViewModel: ObservableObject { } } } + + func loadMoreIfNeeded(currentItem: HFModel) { + guard !searchQuery.isEmpty, + hasMoreSearchResults, + !isLoadingMoreResults, + filteredModels.last?.id == currentItem.id else { return } + + isLoadingMoreResults = true + Task { @MainActor in + defer { isLoadingMoreResults = false } + do { + let more = try await hfAPI.searchModels( + query: searchQuery, + limit: 30, + offset: currentSearchOffset + ) + let existingIds = Set(self.models.map { $0.modelId }) + let newModels = more.filter { !existingIds.contains($0.modelId) } + self.models.append(contentsOf: newModels) + self.models = Array(self.models.prefix(300)) + self.filterLocalModels(query: searchQuery) + self.currentSearchOffset += more.count + self.hasMoreSearchResults = more.count == 30 + } catch { + self.hasMoreSearchResults = false + } + } + } private func filterLocalModels(query: String) { if query.isEmpty { diff --git a/ModelQuantizer/Views/QuantizeView.swift b/ModelQuantizer/Views/QuantizeView.swift index 9b99b96..b9f16ab 100644 --- a/ModelQuantizer/Views/QuantizeView.swift +++ b/ModelQuantizer/Views/QuantizeView.swift @@ -277,6 +277,9 @@ struct QuantizeView: View { viewModel.selectModel(model) } } + .onAppear { + viewModel.loadMoreIfNeeded(currentItem: model) + } } } } From 0cfe6571b0c47d8b1787709803490d672d93409d Mon Sep 17 00:00:00 2001 From: bobbytatum999 Date: Fri, 3 Apr 2026 20:19:08 -0500 Subject: [PATCH 7/8] feat: make all four Home quick actions functional --- ModelQuantizer/Views/HomeView.swift | 50 ++++++++++++++++++++++++++--- 1 file changed, 46 insertions(+), 4 deletions(-) diff --git a/ModelQuantizer/Views/HomeView.swift b/ModelQuantizer/Views/HomeView.swift index 1fd8c3d..df20b02 100644 --- a/ModelQuantizer/Views/HomeView.swift +++ b/ModelQuantizer/Views/HomeView.swift @@ -10,7 +10,7 @@ import SwiftUI struct HomeView: View { @StateObject private var viewModel = HomeViewModel() @StateObject private var scanner = DeviceScanner.shared - @StateObject private var quantizer = QuantizationEngine.shared + @State private var showingRefreshBanner = false var body: some View { ScrollView { @@ -38,6 +38,19 @@ struct HomeView: View { scanner.performScan() viewModel.loadRecentQuantizations() } + .overlay(alignment: .top) { + if showingRefreshBanner { + Text("Device scan started") + .font(.system(size: 13, weight: .semibold)) + .foregroundStyle(.white) + .padding(.horizontal, 14) + .padding(.vertical, 8) + .background(.black.opacity(0.65)) + .clipShape(Capsule()) + .padding(.top, 8) + .transition(.move(edge: .top).combined(with: .opacity)) + } + } } // MARK: - Header @@ -146,7 +159,7 @@ struct HomeView: View { .font(.system(size: 20, weight: .bold)) .foregroundStyle(.white) - HStack(spacing: 12) { + LazyVGrid(columns: [GridItem(.flexible()), GridItem(.flexible())], spacing: 12) { NavigationLink(destination: QuantizeView()) { QuickActionButton( icon: "cpu.fill", @@ -155,7 +168,7 @@ struct HomeView: View { color: .purple ) } - + NavigationLink(destination: ModelLibraryView()) { QuickActionButton( icon: "folder.fill", @@ -164,6 +177,36 @@ struct HomeView: View { color: .cyan ) } + + NavigationLink(destination: DeviceInfoView()) { + QuickActionButton( + icon: "iphone", + title: "Device", + subtitle: "Details", + color: .green + ) + } + + Button { + scanner.performScan() + viewModel.loadRecentQuantizations() + withAnimation(.easeOut(duration: 0.2)) { + showingRefreshBanner = true + } + Task { @MainActor in + try? await Task.sleep(nanoseconds: 1_200_000_000) + withAnimation(.easeIn(duration: 0.2)) { + showingRefreshBanner = false + } + } + } label: { + QuickActionButton( + icon: "arrow.clockwise.circle.fill", + title: "Refresh", + subtitle: "Status", + color: .orange + ) + } } } } @@ -409,4 +452,3 @@ struct EmptyStateView: View { } // MARK: - View Model - From 334e0e9ccf7ae0de63e2cd1b32c5149a9cc8d17a Mon Sep 17 00:00:00 2001 From: bobbytatum999 Date: Fri, 3 Apr 2026 20:26:57 -0500 Subject: [PATCH 8/8] fix: simplify wifi-only enforcement to avoid concurrency build failures --- ModelQuantizer/Services/HuggingFaceAPI.swift | 38 ++------------------ 1 file changed, 2 insertions(+), 36 deletions(-) diff --git a/ModelQuantizer/Services/HuggingFaceAPI.swift b/ModelQuantizer/Services/HuggingFaceAPI.swift index c3131f5..75f0b12 100644 --- a/ModelQuantizer/Services/HuggingFaceAPI.swift +++ b/ModelQuantizer/Services/HuggingFaceAPI.swift @@ -7,7 +7,6 @@ import Foundation import Combine -import Network import CryptoKit /// Hugging Face API Service for model search and metadata @@ -178,7 +177,7 @@ class HuggingFaceAPI: ObservableObject { to destination: URL, progressHandler: @escaping (Double) -> Void ) async throws { - try await enforceNetworkPolicy() + let wifiOnly = UserDefaults.standard.object(forKey: "wifi_only") as? Bool ?? true var attempts = 0 let maxAttempts = 3 @@ -187,6 +186,7 @@ class HuggingFaceAPI: ObservableObject { do { var request = URLRequest(url: url) request.setValue("application/octet-stream", forHTTPHeaderField: "Accept") + request.allowsCellularAccess = !wifiOnly if let token = getAuthToken() { request.setValue("Bearer \(token)", forHTTPHeaderField: "Authorization") @@ -417,37 +417,6 @@ class HuggingFaceAPI: ObservableObject { return nil } - private func enforceNetworkPolicy() async throws { - let wifiOnly = UserDefaults.standard.object(forKey: "wifi_only") as? Bool ?? true - guard wifiOnly else { return } - - let monitor = NWPathMonitor() - let queue = DispatchQueue(label: "hf.network.policy") - let isWifi = await withCheckedContinuation { continuation in - let lock = NSLock() - var resolved = false - func resolve(_ value: Bool) { - lock.lock() - defer { lock.unlock() } - guard !resolved else { return } - resolved = true - continuation.resume(returning: value) - monitor.cancel() - } - let timeoutTask = DispatchWorkItem { - resolve(false) - } - queue.asyncAfter(deadline: .now() + 2.0, execute: timeoutTask) - monitor.pathUpdateHandler = { path in - timeoutTask.cancel() - let ok = path.status == .satisfied && path.usesInterfaceType(.wifi) - resolve(ok) - } - monitor.start(queue: queue) - } - guard isWifi else { throw HFAPIError.networkPolicyViolation } - } - private func expectedSHA256(from response: HTTPURLResponse) -> String? { if let checksum = response.value(forHTTPHeaderField: "x-checksum-sha256") { return checksum.replacingOccurrences(of: "\"", with: "") @@ -516,7 +485,6 @@ enum HFAPIError: Error, LocalizedError { case httpError(statusCode: Int) case downloadFailed case invalidData - case networkPolicyViolation var errorDescription: String? { switch self { @@ -534,8 +502,6 @@ enum HFAPIError: Error, LocalizedError { return "Failed to download model file" case .invalidData: return "Invalid data received" - case .networkPolicyViolation: - return "Wi-Fi only downloads is enabled. Connect to Wi-Fi to continue." } } }