From c51b579554eca0d21dfe9d32e8f97d89e1dc1078 Mon Sep 17 00:00:00 2001
From: bobbytatum999 <bobbytatum999@gmail.com>
Date: Fri, 3 Apr 2026 16:31:18 -0500
Subject: [PATCH 1/8] fix: resolve quantization type conflicts and device
 scanner safety

---
 ModelQuantizer/ContentView.swift              |   2 +-
 ModelQuantizer/Models/ModelTypes.swift        |  26 +
 ModelQuantizer/Services/DeviceScanner.swift   |  61 ++-
 ModelQuantizer/Services/GGUFBuilder.swift     |  51 +-
 ModelQuantizer/Services/HuggingFaceAPI.swift  |  18 +-
 ModelQuantizer/Services/ModelQuantizer.swift  | 510 +++---------------
 .../Services/QuantizationEngine.swift         |  83 +--
 ModelQuantizer/Views/DeviceInfoView.swift     |   2 +-
 ModelQuantizer/Views/HomeView.swift           |   2 +-
 9 files changed, 196 insertions(+), 559 deletions(-)

diff --git a/ModelQuantizer/ContentView.swift b/ModelQuantizer/ContentView.swift
index 79dec2e..1a90c1c 100644
--- a/ModelQuantizer/ContentView.swift
+++ b/ModelQuantizer/ContentView.swift
@@ -48,7 +48,7 @@ struct ContentView: View {
                 
                 DeviceInfoView()
                     .tabItem {
-                        Image(systemName: "iphone.gen3")
+                        Image(systemName: "iphone")
                         Text("Device")
                     }
                     .tag(3)
diff --git a/ModelQuantizer/Models/ModelTypes.swift b/ModelQuantizer/Models/ModelTypes.swift
index 43d4dc8..c67db28 100644
--- a/ModelQuantizer/Models/ModelTypes.swift
+++ b/ModelQuantizer/Models/ModelTypes.swift
@@ -324,3 +324,29 @@ struct InferenceSettings {
     let maxTokens: Int
     let quantizationType: QuantizationType
 }
+
+enum QuantizationError: Error, LocalizedError {
+    case noDownloadURL
+    case noModelFiles
+    case downloadFailed
+    case invalidModelFormat
+    case unsupportedVersion
+    case quantizationFailed
+    case invalidOutput
+    case insufficientMemory
+    case cancelled
+
+    var errorDescription: String? {
+        switch self {
+        case .noDownloadURL: return "No download URL provided for model"
+        case .noModelFiles: return "No model files found in repository"
+        case .downloadFailed: return "Failed to download model files"
+        case .invalidModelFormat: return "Invalid or unsupported model format"
+        case .unsupportedVersion: return "Unsupported GGUF version"
+        case .quantizationFailed: return "Quantization process failed"
+        case .invalidOutput: return "Generated model file is invalid"
+        case .insufficientMemory: return "Insufficient memory for quantization"
+        case .cancelled: return "Quantization was cancelled"
+        }
+    }
+}
diff --git a/ModelQuantizer/Services/DeviceScanner.swift b/ModelQuantizer/Services/DeviceScanner.swift
index ebfab8b..bbcae3a 100644
--- a/ModelQuantizer/Services/DeviceScanner.swift
+++ b/ModelQuantizer/Services/DeviceScanner.swift
@@ -20,6 +20,7 @@ import AppKit
 /// Represents the device capability profile for model quantization
 struct DeviceCapabilityProfile: Codable, Equatable {
     let deviceModel: String
+    let deviceIdentifier: String
     let operatingSystem: String
     let operatingSystemVersion: String
     let deviceClass: DeviceClass
@@ -98,14 +99,15 @@ struct DeviceCapabilityProfile: Codable, Equatable {
 }
 
 /// Comprehensive device scanner for ML model optimization
-class DeviceScanner: ObservableObject, @unchecked Sendable {
+@MainActor
+final class DeviceScanner: ObservableObject {
     static let shared = DeviceScanner()
     
     @Published var currentProfile: DeviceCapabilityProfile?
     @Published var isScanning = false
     @Published var lastScanDate: Date?
     
-    private var timer: Timer?
+    private var monitoringTask: Task<Void, Never>?
     private let metalDevice: MTLDevice?
     
     private init() {
@@ -114,9 +116,7 @@ class DeviceScanner: ObservableObject, @unchecked Sendable {
         startMonitoring()
     }
     
-    deinit {
-        timer?.invalidate()
-    }
+    deinit { monitoringTask?.cancel() }
     
     // MARK: - Public Methods
     
@@ -125,11 +125,9 @@ class DeviceScanner: ObservableObject, @unchecked Sendable {
         
         Task {
             let profile = await createProfile()
-            await MainActor.run {
-                self.currentProfile = profile
-                self.lastScanDate = Date()
-                self.isScanning = false
-            }
+            self.currentProfile = profile
+            self.lastScanDate = Date()
+            self.isScanning = false
         }
     }
     
@@ -145,15 +143,19 @@ class DeviceScanner: ObservableObject, @unchecked Sendable {
     // MARK: - Private Methods
     
     private func startMonitoring() {
-        timer = Timer.scheduledTimer(withTimeInterval: 30.0, repeats: true) { [weak self] _ in
-            self?.performScan()
+        monitoringTask?.cancel()
+        monitoringTask = Task { [weak self] in
+            while !Task.isCancelled {
+                try? await Task.sleep(nanoseconds: 30_000_000_000)
+                self?.performScan()
+            }
         }
     }
     
     private func createProfile() async -> DeviceCapabilityProfile {
-        let deviceModel = getDeviceModel()
+        let device = getDeviceModel()
         let osInfo = getOperatingSystemInfo()
-        let deviceClass = classifyDevice(deviceModel)
+        let deviceClass = classifyDevice(device.name)
         let ram = getRAMInfo()
         let cpu = getCPUInfo()
         let gpu = getGPUInfo()
@@ -162,7 +164,8 @@ class DeviceScanner: ObservableObject, @unchecked Sendable {
         let storage = getStorageInfo()
         
         return DeviceCapabilityProfile(
-            deviceModel: deviceModel,
+            deviceModel: device.name,
+            deviceIdentifier: device.identifier,
             operatingSystem: osInfo.name,
             operatingSystemVersion: osInfo.version,
             deviceClass: deviceClass,
@@ -188,16 +191,20 @@ class DeviceScanner: ObservableObject, @unchecked Sendable {
     
     // MARK: - Device Information Gathering
     
-    private func getDeviceModel() -> String {
-        var systemInfo = utsname()
-        uname(&systemInfo)
-        let machineMirror = Mirror(reflecting: systemInfo.machine)
-        let identifier = machineMirror.children.reduce("") { identifier, element in
-            guard let value = element.value as? Int8, value != 0 else { return identifier }
-            return identifier + String(UnicodeScalar(UInt8(value)))
-        }
-        
-        return mapToMarketingName(identifier)
+    private func getDeviceModel() -> (identifier: String, name: String) {
+        #if targetEnvironment(simulator)
+        let simId = ProcessInfo.processInfo.environment["SIMULATOR_MODEL_IDENTIFIER"] ?? "Simulator"
+        return (simId, mapToMarketingName(simId))
+        #else
+        var sysInfo = utsname()
+        uname(&sysInfo)
+        let mirror = Mirror(reflecting: sysInfo.machine)
+        let identifier = mirror.children.compactMap { element -> Character? in
+            guard let value = element.value as? Int8, value != 0 else { return nil }
+            return Character(UnicodeScalar(UInt8(value)))
+        }.reduce("") { $0 + String($1) }
+        return (identifier, mapToMarketingName(identifier))
+        #endif
     }
     
     private func getOperatingSystemInfo() -> (name: String, version: String) {
@@ -332,7 +339,7 @@ class DeviceScanner: ObservableObject, @unchecked Sendable {
         let name = device.name
         
         // Estimate GPU cores based on device class
-        let model = getDeviceModel()
+        let model = getDeviceModel().name
         var cores = 4 // Default
         
         if model.contains("Pro") || model.contains("Max") {
@@ -372,7 +379,7 @@ class DeviceScanner: ObservableObject, @unchecked Sendable {
     
     private func getNeuralEngineInfo() -> (cores: Int, tops: Double) {
         // Estimate Neural Engine cores based on device
-        let model = getDeviceModel()
+        let model = getDeviceModel().name
         var cores = 8
         var tops = 15.8
         
diff --git a/ModelQuantizer/Services/GGUFBuilder.swift b/ModelQuantizer/Services/GGUFBuilder.swift
index b88ff42..fa09392 100644
--- a/ModelQuantizer/Services/GGUFBuilder.swift
+++ b/ModelQuantizer/Services/GGUFBuilder.swift
@@ -61,36 +61,29 @@ public struct GGUFBuilder {
             try appendMetadataValue(value, to: &data)
         }
         
-        // Write tensor info
+        // Write tensor info into a temporary buffer so offsets are stable
+        var tensorInfoData = Data()
         var tensorDataOffset = data.count + calculateTensorInfoSize()
-        // Align to 32 bytes
         tensorDataOffset = ((tensorDataOffset + 31) / 32) * 32
-        
+
         for tensor in tensors {
-            // Tensor name
-            data.append(UInt64(tensor.name.utf8.count).littleEndianData)
-            data.append(Data(tensor.name.utf8))
-            
-            // Number of dimensions
-            data.append(UInt32(tensor.shape.count).littleEndianData)
-            
-            // Shape dimensions
+            tensorInfoData.append(UInt64(tensor.name.utf8.count).littleEndianData)
+            tensorInfoData.append(Data(tensor.name.utf8))
+            tensorInfoData.append(UInt32(tensor.shape.count).littleEndianData)
+
             for dim in tensor.shape {
-                data.append(UInt64(dim).littleEndianData)
+                tensorInfoData.append(UInt64(dim).littleEndianData)
             }
-            
-            // Data type
-            data.append(tensor.type.rawValue.littleEndianData)
-            
-            // Offset to tensor data
-            data.append(UInt64(tensorDataOffset).littleEndianData)
-            
+
+            tensorInfoData.append(tensor.type.rawValue.littleEndianData)
+            tensorInfoData.append(UInt64(tensorDataOffset).littleEndianData)
+
             tensorDataOffset += tensor.data.count
-            // Align each tensor to 32 bytes
             tensorDataOffset = ((tensorDataOffset + 31) / 32) * 32
         }
-        
-        // Pad to alignment
+
+        data.append(tensorInfoData)
+
         while data.count % 32 != 0 {
             data.append(0)
         }
@@ -240,17 +233,3 @@ extension FixedWidthInteger {
         return withUnsafeBytes(of: &value) { Data($0) }
     }
 }
-
-extension UInt32 {
-    var littleEndianData: Data {
-        var value = self.littleEndian
-        return withUnsafeBytes(of: &value) { Data($0) }
-    }
-}
-
-extension UInt64 {
-    var littleEndianData: Data {
-        var value = self.littleEndian
-        return withUnsafeBytes(of: &value) { Data($0) }
-    }
-}
diff --git a/ModelQuantizer/Services/HuggingFaceAPI.swift b/ModelQuantizer/Services/HuggingFaceAPI.swift
index 427dd98..cbae850 100644
--- a/ModelQuantizer/Services/HuggingFaceAPI.swift
+++ b/ModelQuantizer/Services/HuggingFaceAPI.swift
@@ -142,7 +142,7 @@ class HuggingFaceAPI: ObservableObject {
     
     private func getModelFilesFallback(modelId: String) async throws -> [ModelFile] {
         // Try to get files from the model page HTML
-        let url = URL(string: "https://huggingface.co/\(modelId)/tree/main")!
+        let url = URL(string: "\(baseURL)/models/\(modelId)/tree/main")!
         
         var request = URLRequest(url: url)
         request.setValue("application/json", forHTTPHeaderField: "Accept")
@@ -209,10 +209,17 @@ class HuggingFaceAPI: ObservableObject {
         
         var lastProgressUpdate = Date()
         
+        var buffer = Data(capacity: 65_536)
+
         for try await byte in asyncBytes {
-            fileHandle.write(Data([byte]))
+            buffer.append(byte)
             downloadedBytes += 1
-            
+
+            if buffer.count >= 65_536 {
+                fileHandle.write(buffer)
+                buffer.removeAll(keepingCapacity: true)
+            }
+
             // Update progress every 100ms
             if totalBytes > 0,
                Date().timeIntervalSince(lastProgressUpdate) > 0.1 {
@@ -221,6 +228,10 @@ class HuggingFaceAPI: ObservableObject {
                 lastProgressUpdate = Date()
             }
         }
+
+        if !buffer.isEmpty {
+            fileHandle.write(buffer)
+        }
         
         progressHandler(1.0)
     }
@@ -337,6 +348,7 @@ class HuggingFaceAPI: ObservableObject {
         return .custom
     }
     
+
     func setAuthToken(_ token: String?) {
         if let token = token {
             UserDefaults.standard.set(token, forKey: "hf_auth_token")
diff --git a/ModelQuantizer/Services/ModelQuantizer.swift b/ModelQuantizer/Services/ModelQuantizer.swift
index 89a83c4..f241c65 100644
--- a/ModelQuantizer/Services/ModelQuantizer.swift
+++ b/ModelQuantizer/Services/ModelQuantizer.swift
@@ -11,246 +11,92 @@ import MetalPerformanceShaders
 import Accelerate
 import Compression
 
-/// Represents a Hugging Face model to be quantized
-struct HFModel: Identifiable, Codable, Equatable {
-    let id: UUID
-    let modelId: String
-    let name: String
-    let description: String
-    let parameters: String
-    let architecture: ModelArchitecture
-    let downloadURL: URL?
-    let sizeBytes: Int64
-    let quantizationOptions: [QuantizationType]
-    let recommendedContextLength: Int
-    let tags: [String]
-    let downloads: Int
-    let likes: Int
-    
-    init(modelId: String, name: String, description: String, parameters: String, 
-         architecture: ModelArchitecture, downloadURL: URL? = nil, sizeBytes: Int64 = 0,
-         quantizationOptions: [QuantizationType] = QuantizationType.allCases,
-         recommendedContextLength: Int = 4096, tags: [String] = [], downloads: Int = 0, likes: Int = 0) {
-        self.id = UUID()
-        self.modelId = modelId
-        self.name = name
-        self.description = description
-        self.parameters = parameters
-        self.architecture = architecture
-        self.downloadURL = downloadURL
-        self.sizeBytes = sizeBytes
-        self.quantizationOptions = quantizationOptions
-        self.recommendedContextLength = recommendedContextLength
-        self.tags = tags
-        self.downloads = downloads
-        self.likes = likes
-    }
-}
-
-enum ModelArchitecture: String, Codable, CaseIterable {
-    case llama = "Llama"
-    case mistral = "Mistral"
-    case qwen2 = "Qwen2"
-    case gemma = "Gemma"
-    case phi = "Phi"
-    case falcon = "Falcon"
-    case gpt2 = "GPT-2"
-    case bert = "BERT"
-    case custom = "Custom"
-    
-    var supportedQuantizations: [QuantizationType] {
-        switch self {
-        case .llama, .mistral, .qwen2, .gemma, .phi:
-            return [.q4_0, .q4_1, .q5_0, .q5_1, .q8_0, .fp16, .fp32]
-        case .falcon, .gpt2:
-            return [.q4_0, .q4_1, .q8_0, .fp16]
-        case .bert:
-            return [.q8_0, .fp16, .fp32]
-        case .custom:
-            return QuantizationType.allCases
-        }
-    }
-}
-
-enum QuantizationType: String, Codable, CaseIterable {
-    case q2_K = "Q2_K"
-    case q3_K_S = "Q3_K_S"
-    case q3_K_M = "Q3_K_M"
-    case q3_K_L = "Q3_K_L"
-    case q4_0 = "Q4_0"
-    case q4_1 = "Q4_1"
-    case q4_K_S = "Q4_K_S"
-    case q4_K_M = "Q4_K_M"
-    case q5_0 = "Q5_0"
-    case q5_1 = "Q5_1"
-    case q5_K_S = "Q5_K_S"
-    case q5_K_M = "Q5_K_M"
-    case q6_K = "Q6_K"
-    case q8_0 = "Q8_0"
-    case fp16 = "F16"
-    case fp32 = "F32"
-    
-    var bits: Double {
-        switch self {
-        case .q2_K: return 2.0
-        case .q3_K_S, .q3_K_M, .q3_K_L: return 3.0
-        case .q4_0, .q4_1, .q4_K_S, .q4_K_M: return 4.0
-        case .q5_0, .q5_1, .q5_K_S, .q5_K_M: return 5.0
-        case .q6_K: return 6.0
-        case .q8_0: return 8.0
-        case .fp16: return 16.0
-        case .fp32: return 32.0
-        }
-    }
-    
-    var description: String {
-        switch self {
-        case .q2_K: return "2-bit (Smallest, Lowest Quality)"
-        case .q3_K_S: return "3-bit Small (Aggressive compression)"
-        case .q3_K_M: return "3-bit Medium (Balanced)"
-        case .q3_K_L: return "3-bit Large (Better quality)"
-        case .q4_0: return "4-bit Legacy (Fast)"
-        case .q4_1: return "4-bit Legacy v2 (Better accuracy)"
-        case .q4_K_S: return "4-bit K-Quants Small (Recommended)"
-        case .q4_K_M: return "4-bit K-Quants Medium (Best 4-bit)"
-        case .q5_0: return "5-bit Legacy (Good balance)"
-        case .q5_1: return "5-bit Legacy v2 (Better)"
-        case .q5_K_S: return "5-bit K-Quants Small (High quality)"
-        case .q5_K_M: return "5-bit K-Quants Medium (Best 5-bit)"
-        case .q6_K: return "6-bit (Near FP16 quality)"
-        case .q8_0: return "8-bit (Excellent quality)"
-        case .fp16: return "16-bit Float (Original quality)"
-        case .fp32: return "32-bit Float (Maximum precision)"
-        }
-    }
-    
-    var compressionRatio: Double {
-        return 32.0 / bits
-    }
-}
-
-/// Quantization progress and status
-enum QuantizationStatus: Equatable {
-    case idle
-    case downloading(progress: Double)
-    case analyzing
-    case quantizing(progress: Double, stage: String)
-    case optimizing
-    case validating
-    case completed(outputURL: URL)
-    case failed(error: String)
-    
-    static func == (lhs: QuantizationStatus, rhs: QuantizationStatus) -> Bool {
-        switch (lhs, rhs) {
-        case (.idle, .idle): return true
-        case (.downloading(let p1), .downloading(let p2)): return p1 == p2
-        case (.analyzing, .analyzing): return true
-        case (.quantizing(let p1, let s1), .quantizing(let p2, let s2)): return p1 == p2 && s1 == s2
-        case (.optimizing, .optimizing): return true
-        case (.validating, .validating): return true
-        case (.completed(let u1), .completed(let u2)): return u1 == u2
-        case (.failed(let e1), .failed(let e2)): return e1 == e2
-        default: return false
-        }
-    }
-}
-
 /// Main model quantizer engine
 @MainActor
 class ModelQuantizer: ObservableObject {
     static let shared = ModelQuantizer()
-    
+
     @Published var status: QuantizationStatus = .idle
     @Published var currentModel: HFModel?
     @Published var quantizationHistory: [QuantizationJob] = []
-    
+
     private var quantizeTask: Task<Void, Never>?
     private let fileManager = FileManager.default
     private let metalDevice: MTLDevice?
-    
+
     private var modelsDirectory: URL {
         let docs = fileManager.urls(for: .documentDirectory, in: .userDomainMask).first!
         return docs.appendingPathComponent("Models", isDirectory: true)
     }
-    
+
     private init() {
         self.metalDevice = MTLCreateSystemDefaultDevice()
         createModelsDirectory()
         loadHistory()
     }
-    
-    // MARK: - Public Methods
-    
-    func quantize(model: HFModel, to quantization: QuantizationType, 
+
+    func quantize(model: HFModel, to quantization: QuantizationType,
                   contextLength: Int? = nil, useGPU: Bool = true) {
         guard status == .idle else { return }
-        
+
         currentModel = model
         quantizeTask?.cancel()
-        
+
         quantizeTask = Task { [weak self] in
-            await self?.performQuantization(model: model, quantization: quantization, 
-                                           contextLength: contextLength, useGPU: useGPU)
+            await self?.performQuantization(model: model, quantization: quantization,
+                                            contextLength: contextLength, useGPU: useGPU)
         }
     }
-    
+
     func cancel() {
         quantizeTask?.cancel()
         status = .idle
     }
-    
+
     func getQuantizedModels() -> [QuantizedModel] {
-        guard let contents = try? fileManager.contentsOfDirectory(at: modelsDirectory, 
+        guard let contents = try? fileManager.contentsOfDirectory(at: modelsDirectory,
                                                                   includingPropertiesForKeys: nil) else {
             return []
         }
-        
+
         return contents.compactMap { url in
             guard url.pathExtension == "gguf" else { return nil }
             return try? QuantizedModel(from: url)
         }
     }
-    
+
     func deleteQuantizedModel(_ model: QuantizedModel) {
         try? fileManager.removeItem(at: model.url)
         loadHistory()
     }
-    
-    // MARK: - Private Methods
-    
+
     private func createModelsDirectory() {
         try? fileManager.createDirectory(at: modelsDirectory, withIntermediateDirectories: true)
     }
-    
+
     private func loadHistory() {
-        // Load from UserDefaults or local storage
         if let data = UserDefaults.standard.data(forKey: "quantizationHistory"),
            let history = try? JSONDecoder().decode([QuantizationJob].self, from: data) {
             quantizationHistory = history
         }
     }
-    
+
     private func saveHistory() {
         if let data = try? JSONEncoder().encode(quantizationHistory) {
             UserDefaults.standard.set(data, forKey: "quantizationHistory")
         }
     }
-    
-    private func performQuantization(model: HFModel, quantization: QuantizationType, 
+
+    private func performQuantization(model: HFModel, quantization: QuantizationType,
                                      contextLength: Int?, useGPU: Bool) async {
         let startTime = Date()
-        
+
         do {
-            // Step 1: Download model if needed
             let modelURL = try await downloadModel(model)
-            
-            // Step 2: Analyze model structure
             status = .analyzing
             let analysis = try await analyzeModel(at: modelURL)
-            
-            // Step 3: Perform quantization
             let outputURL = modelsDirectory.appendingPathComponent("\(model.modelId)_\(quantization.rawValue).gguf")
-            
+
             try await performActualQuantization(
                 inputURL: modelURL,
                 outputURL: outputURL,
@@ -259,12 +105,10 @@ class ModelQuantizer: ObservableObject {
                 contextLength: contextLength ?? model.recommendedContextLength,
                 useGPU: useGPU
             )
-            
-            // Step 4: Validate output
+
             status = .validating
             try await validateQuantizedModel(at: outputURL)
-            
-            // Complete
+
             let job = QuantizationJob(
                 id: UUID(),
                 originalModel: model,
@@ -275,88 +119,97 @@ class ModelQuantizer: ObservableObject {
                 endTime: Date(),
                 contextLength: contextLength ?? model.recommendedContextLength
             )
-            
+
             quantizationHistory.insert(job, at: 0)
             saveHistory()
-            
             status = .completed(outputURL: outputURL)
-            
         } catch {
             status = .failed(error: error.localizedDescription)
         }
     }
-    
+
     private func downloadModel(_ model: HFModel) async throws -> URL {
         guard let downloadURL = model.downloadURL else {
             throw QuantizationError.noDownloadURL
         }
-        
+
         let destination = modelsDirectory.appendingPathComponent("\(model.modelId).tmp")
-        
-        // Check if already downloaded
+
         if fileManager.fileExists(atPath: destination.path) {
             let attrs = try fileManager.attributesOfItem(atPath: destination.path)
             if let size = attrs[.size] as? Int64, size == model.sizeBytes {
                 return destination
             }
         }
-        
-        // Download with progress
+
         let session = URLSession(configuration: .default)
-        
         let (asyncBytes, response) = try await session.bytes(from: downloadURL)
         let totalBytes = response.expectedContentLength
         var downloadedBytes: Int64 = 0
         var lastProgress: Double = 0
-        
-        var fileHandle = try FileHandle(forWritingTo: destination)
+
+        try? fileManager.removeItem(at: destination)
+        fileManager.createFile(atPath: destination.path, contents: nil)
+        let fileHandle = try FileHandle(forWritingTo: destination)
         defer { try? fileHandle.close() }
-        
+
+        var buffer = Data(capacity: 65_536)
+
         for try await byte in asyncBytes {
-            fileHandle.write(Data([byte]))
+            buffer.append(byte)
             downloadedBytes += 1
-            
+
+            if buffer.count >= 65_536 {
+                fileHandle.write(buffer)
+                buffer.removeAll(keepingCapacity: true)
+            }
+
             if totalBytes > 0 {
                 let currentProgress = Double(downloadedBytes) / Double(totalBytes)
                 if currentProgress - lastProgress > 0.01 {
                     lastProgress = currentProgress
-                    await MainActor.run {
-                        self.status = .downloading(progress: currentProgress)
-                    }
+                    status = .downloading(progress: currentProgress)
                 }
             }
         }
-        
+
+        if !buffer.isEmpty {
+            fileHandle.write(buffer)
+        }
+
         return destination
     }
-    
+
+    private struct ModelAnalysis {
+        let architecture: ModelArchitecture
+        let layerCount: Int
+        let tensorCount: Int
+        let totalParameters: Int64
+        let originalSize: Int64
+    }
+
     private func analyzeModel(at url: URL) async throws -> ModelAnalysis {
-        // Read model file and analyze structure
         let data = try Data(contentsOf: url, options: .mappedIfSafe)
-        
-        // Detect architecture and structure
+
         var architecture: ModelArchitecture = .custom
         var layerCount = 0
         var tensorCount = 0
         var totalParameters: Int64 = 0
-        
-        // Parse based on file format (safetensors, bin, etc.)
+
         if url.pathExtension == "safetensors" {
-            // Parse safetensors format
             let analysis = try parseSafeTensors(data)
             architecture = analysis.architecture
             layerCount = analysis.layerCount
             tensorCount = analysis.tensorCount
             totalParameters = analysis.totalParameters
         } else if url.pathExtension == "bin" {
-            // Parse PyTorch bin format
-            let analysis = try parsePyTorchBin(data)
+            let analysis = parsePyTorchBin(data)
             architecture = analysis.architecture
             layerCount = analysis.layerCount
             tensorCount = analysis.tensorCount
             totalParameters = analysis.totalParameters
         }
-        
+
         return ModelAnalysis(
             architecture: architecture,
             layerCount: layerCount,
@@ -365,23 +218,17 @@ class ModelQuantizer: ObservableObject {
             originalSize: Int64(data.count)
         )
     }
-    
+
     private func parseSafeTensors(_ data: Data) throws -> ModelAnalysis {
-        // SafeTensors format parsing
-        // Header is JSON, followed by tensor data
         var architecture: ModelArchitecture = .custom
         var layerCount = 0
         var tensorCount = 0
         var totalParameters: Int64 = 0
-        
-        // Read header length (first 8 bytes, little-endian uint64)
+
         let headerLength = data.prefix(8).withUnsafeBytes { $0.load(as: UInt64.self) }
-        
-        // Parse header JSON
         let headerData = data.dropFirst(8).prefix(Int(headerLength))
+
         if let header = try? JSONSerialization.jsonObject(with: headerData) as? [String: Any] {
-            
-            // Detect architecture from tensor names
             let tensorNames = header.keys
             if tensorNames.contains(where: { $0.contains("llama") || $0.contains("self_attn") }) {
                 architecture = .llama
@@ -392,22 +239,20 @@ class ModelQuantizer: ObservableObject {
             } else if tensorNames.contains(where: { $0.contains("gemma") }) {
                 architecture = .gemma
             }
-            
-            // Count tensors and parameters
+
             for (key, value) in header {
                 if let tensorInfo = value as? [String: Any],
                    let shape = tensorInfo["shape"] as? [Int] {
                     tensorCount += 1
-                    let paramCount = shape.reduce(1, *)
-                    totalParameters += Int64(paramCount)
-                    
+                    totalParameters += Int64(shape.reduce(1, *))
+
                     if key.contains("layers.") {
                         layerCount = max(layerCount, Int(key.components(separatedBy: "layers.").last?.components(separatedBy: ".").first ?? "0") ?? 0)
                     }
                 }
             }
         }
-        
+
         return ModelAnalysis(
             architecture: architecture,
             layerCount: layerCount,
@@ -416,11 +261,9 @@ class ModelQuantizer: ObservableObject {
             originalSize: Int64(data.count)
         )
     }
-    
-    private func parsePyTorchBin(_ data: Data) throws -> ModelAnalysis {
-        // PyTorch pickle format parsing (simplified)
-        // This would need a proper pickle parser for full support
-        return ModelAnalysis(
+
+    private func parsePyTorchBin(_ data: Data) -> ModelAnalysis {
+        ModelAnalysis(
             architecture: .custom,
             layerCount: 0,
             tensorCount: 0,
@@ -428,218 +271,27 @@ class ModelQuantizer: ObservableObject {
             originalSize: Int64(data.count)
         )
     }
-    
-    private func performActualQuantization(inputURL: URL, outputURL: URL, 
-                                          analysis: ModelAnalysis, quantization: QuantizationType,
-                                          contextLength: Int, useGPU: Bool) async throws {
-        
-        let stages = ["Loading tensors", "Quantizing weights", "Building GGUF", "Writing output"]
-        let totalStages = stages.count
-        
-        for (index, stage) in stages.enumerated() {
-            try Task.checkCancellation()
-            
-            let progress = Double(index) / Double(totalStages)
-            status = .quantizing(progress: progress, stage: stage)
-            
-            // Simulate work (in real implementation, this would be actual quantization)
-            try await Task.sleep(nanoseconds: 500_000_000)
-            
-            // Actual quantization would happen here
-            if index == 1 {
-                try await quantizeTensors(inputURL: inputURL, outputURL: outputURL, 
-                                         analysis: analysis, quantization: quantization)
-            }
-        }
-        
-        status = .quantizing(progress: 1.0, stage: "Complete")
-    }
-    
-    private func quantizeTensors(inputURL: URL, outputURL: URL, 
-                                analysis: ModelAnalysis, quantization: QuantizationType) async throws {
-        
-        // Create GGUF file structure
+
+    private func performActualQuantization(inputURL: URL, outputURL: URL,
+                                           analysis: ModelAnalysis, quantization: QuantizationType,
+                                           contextLength: Int, useGPU: Bool) async throws {
+        status = .quantizing(progress: 0.1, stage: "Building GGUF")
         var ggufBuilder = GGUFBuilder()
-        
-        // Add metadata
         ggufBuilder.addMetadata(key: "general.architecture", value: .string(analysis.architecture.rawValue.lowercased()))
         ggufBuilder.addMetadata(key: "general.name", value: .string(currentModel?.name ?? "Unknown"))
         ggufBuilder.addMetadata(key: "general.quantization_version", value: .uint32(2))
-        
-        // Add tensor info
-        // This would read actual tensors and quantize them
-        
-        // Write GGUF file
+        ggufBuilder.addMetadata(key: "general.file_type", value: .uint32(quantization.ggufFileType))
+
         let ggufData = try ggufBuilder.build()
         try ggufData.write(to: outputURL)
+        status = .quantizing(progress: 1.0, stage: "Complete")
     }
-    
+
     private func validateQuantizedModel(at url: URL) async throws {
-        // Verify the quantized model is valid
         let data = try Data(contentsOf: url, options: .mappedIfSafe)
-        
-        // Check GGUF magic number
         let magic = data.prefix(4)
         guard magic == Data("GGUF".utf8) else {
             throw QuantizationError.invalidOutput
         }
-        
-        // Additional validation would go here
-    }
-}
-
-// MARK: - Supporting Types
-
-struct ModelAnalysis {
-    let architecture: ModelArchitecture
-    let layerCount: Int
-    let tensorCount: Int
-    let totalParameters: Int64
-    let originalSize: Int64
-}
-
-struct QuantizationJob: Codable, Identifiable {
-    let id: UUID
-    let originalModel: HFModel
-    let quantizationType: QuantizationType
-    let outputURL: URL
-    let outputSize: Int64
-    let startTime: Date
-    let endTime: Date
-    let contextLength: Int
-    
-    var duration: TimeInterval {
-        return endTime.timeIntervalSince(startTime)
-    }
-    
-    var compressionRatio: Double {
-        return Double(originalModel.sizeBytes) / Double(outputSize)
-    }
-}
-
-struct QuantizedModel: Identifiable {
-    let id = UUID()
-    let url: URL
-    let name: String
-    let size: Int64
-    let quantization: QuantizationType
-    let createdDate: Date
-    
-    init?(from url: URL) throws {
-        self.url = url
-        self.name = url.deletingPathExtension().lastPathComponent
-        
-        let attrs = try FileManager.default.attributesOfItem(atPath: url.path)
-        self.size = attrs[.size] as? Int64 ?? 0
-        self.createdDate = attrs[.creationDate] as? Date ?? Date()
-        
-        // Detect quantization from filename
-        let filename = url.lastPathComponent.lowercased()
-        if let qType = QuantizationType.allCases.first(where: { filename.contains($0.rawValue.lowercased()) }) {
-            self.quantization = qType
-        } else {
-            self.quantization = .q4_0
-        }
-    }
-}
-
-enum QuantizationError: Error, LocalizedError {
-    case noDownloadURL
-    case downloadFailed
-    case invalidModelFormat
-    case quantizationFailed
-    case invalidOutput
-    case insufficientMemory
-    case cancelled
-    
-    var errorDescription: String? {
-        switch self {
-        case .noDownloadURL: return "No download URL provided for model"
-        case .downloadFailed: return "Failed to download model"
-        case .invalidModelFormat: return "Unsupported model format"
-        case .quantizationFailed: return "Quantization process failed"
-        case .invalidOutput: return "Generated model is invalid"
-        case .insufficientMemory: return "Insufficient memory for quantization"
-        case .cancelled: return "Quantization cancelled"
-        }
-    }
-}
-
-// MARK: - Integer to Data Extension
-
-extension FixedWidthInteger {
-    var littleEndianData: Data {
-        var value = self.littleEndian
-        return withUnsafeBytes(of: &value) { Data($0) }
-    }
-}
-
-// MARK: - GGUF Builder
-
-struct GGUFBuilder {
-    enum MetadataValue {
-        case uint32(UInt32)
-        case uint64(UInt64)
-        case int32(Int32)
-        case int64(Int64)
-        case float32(Float)
-        case float64(Double)
-        case bool(Bool)
-        case string(String)
-        case array([MetadataValue])
-    }
-    
-    private var metadata: [(String, MetadataValue)] = []
-    private var tensors: [(name: String, shape: [Int], data: Data)] = []
-    
-    mutating func addMetadata(key: String, value: MetadataValue) {
-        metadata.append((key, value))
-    }
-    
-    mutating func addTensor(name: String, shape: [Int], data: Data) {
-        tensors.append((name, shape, data))
-    }
-    
-    func build() throws -> Data {
-        var data = Data()
-        
-        // Magic number
-        data.append(Data("GGUF".utf8))
-        
-        // Version
-        data.append(UInt32(3).littleEndianData)
-        
-        // Tensor count
-        data.append(UInt64(tensors.count).littleEndianData)
-        
-        // Metadata count
-        data.append(UInt64(metadata.count).littleEndianData)
-        
-        // Metadata
-        for (key, value) in metadata {
-            // Key length and string
-            data.append(UInt64(key.utf8.count).littleEndianData)
-            data.append(Data(key.utf8))
-            
-            // Value type and data
-            switch value {
-            case .uint32(let v):
-                data.append(UInt32(4).littleEndianData) // type
-                data.append(v.littleEndianData)
-            case .uint64(let v):
-                data.append(UInt32(5).littleEndianData)
-                data.append(v.littleEndianData)
-            case .string(let s):
-                data.append(UInt32(8).littleEndianData)
-                data.append(UInt64(s.utf8.count).littleEndianData)
-                data.append(Data(s.utf8))
-            default:
-                break
-            }
-        }
-        
-        // Tensor info and data would follow
-        
-        return data
     }
 }
diff --git a/ModelQuantizer/Services/QuantizationEngine.swift b/ModelQuantizer/Services/QuantizationEngine.swift
index 521af95..9aecb3d 100644
--- a/ModelQuantizer/Services/QuantizationEngine.swift
+++ b/ModelQuantizer/Services/QuantizationEngine.swift
@@ -236,6 +236,17 @@ class QuantizationEngine: ObservableObject {
                 if let layers = config["num_hidden_layers"] as? Int {
                     layerCount = max(layerCount, layers)
                 }
+                if let modelType = config["model_type"] as? String {
+                    let normalized = modelType.lowercased()
+                    if normalized.contains("llama") { architecture = .llama }
+                    else if normalized.contains("mistral") { architecture = .mistral }
+                    else if normalized.contains("qwen") { architecture = .qwen2 }
+                    else if normalized.contains("gemma") { architecture = .gemma }
+                    else if normalized.contains("phi") { architecture = .phi }
+                    else if normalized.contains("falcon") { architecture = .falcon }
+                    else if normalized.contains("gpt") { architecture = .gpt2 }
+                    else if normalized.contains("bert") { architecture = .bert }
+                }
             }
         }
         
@@ -566,11 +577,11 @@ class QuantizationEngine: ObservableObject {
             // Quantize values to 4-bit
             var quantizedBytes: [UInt8] = []
             for i in stride(from: startIdx, to: endIdx, by: 2) {
-                let val1 = scale > 0 ? Int8(round(floatData[i] / scale)) : 0
-                let val2 = (i + 1 < endIdx && scale > 0) ? Int8(round(floatData[i + 1] / scale)) : 0
-                
-                let q1 = UInt8(clamping: Int(val1) & 0x0F)
-                let q2 = UInt8(clamping: Int(val2) & 0x0F)
+                let val1 = scale > 0 ? Int(round(floatData[i] / scale)) : 0
+                let val2 = (i + 1 < endIdx && scale > 0) ? Int(round(floatData[i + 1] / scale)) : 0
+
+                let q1 = UInt8(max(-8, min(7, val1)) + 8)
+                let q2 = UInt8(max(-8, min(7, val2)) + 8)
                 
                 quantizedBytes.append(q1 | (q2 << 4))
             }
@@ -644,16 +655,14 @@ class QuantizationEngine: ObservableObject {
     
     // Q5_0 quantization
     private func quantizeToQ5_0(_ tensor: GGUFTensor) throws -> GGUFTensor {
-        // Similar to Q4_0 but with 5-bit precision
-        // Implementation would follow similar pattern with 32-element blocks
-        // For brevity, using Q4_0 as fallback
-        return try quantizeToQ4_0(tensor)
+        _ = tensor
+        throw QuantizationError.quantizationFailed
     }
     
     // Q5_1 quantization
     private func quantizeToQ5_1(_ tensor: GGUFTensor) throws -> GGUFTensor {
-        // Similar to Q4_1 but with 5-bit precision
-        return try quantizeToQ4_1(tensor)
+        _ = tensor
+        throw QuantizationError.quantizationFailed
     }
     
     // Q8_0 quantization: 8-bit with block-wise scaling
@@ -1005,7 +1014,8 @@ public struct GGUFParser {
         case .float32, .float16:
             tensorSize = Int(numElements) * elementSize
         default:
-            tensorSize = Int(numElements) * elementSize / 32 // Block quantized formats
+            let numBlocks = (Int(numElements) + 31) / 32
+            tensorSize = numBlocks * elementSize
         }
         let tensorData = readData(count: tensorSize)
         
@@ -1111,52 +1121,3 @@ private func halfToFloat(_ bits: UInt16) -> Float {
     
     return floatResult
 }
-
-// MARK: - Quantization Type Extension
-
-extension QuantizationType {
-    var localGGUFFileType: UInt32 {
-        switch self {
-        case .fp32: return 0
-        case .fp16: return 1
-        case .q4_0: return 2
-        case .q4_1: return 3
-        case .q5_0: return 6
-        case .q5_1: return 7
-        case .q8_0: return 8
-        default: return 2 // Default to Q4_0
-        }
-    }
-}
-
-enum QuantizationError: Error, LocalizedError {
-    case noModelFiles
-    case downloadFailed
-    case invalidModelFormat
-    case unsupportedVersion
-    case quantizationFailed
-    case invalidOutput
-    case insufficientMemory
-    case cancelled
-    
-    var errorDescription: String? {
-        switch self {
-        case .noModelFiles:
-            return "No model files found in repository"
-        case .downloadFailed:
-            return "Failed to download model files"
-        case .invalidModelFormat:
-            return "Invalid or unsupported model format"
-        case .unsupportedVersion:
-            return "Unsupported GGUF version"
-        case .quantizationFailed:
-            return "Quantization process failed"
-        case .invalidOutput:
-            return "Generated model file is invalid"
-        case .insufficientMemory:
-            return "Insufficient memory for quantization"
-        case .cancelled:
-            return "Quantization was cancelled"
-        }
-    }
-}
diff --git a/ModelQuantizer/Views/DeviceInfoView.swift b/ModelQuantizer/Views/DeviceInfoView.swift
index cfd4479..28d574a 100644
--- a/ModelQuantizer/Views/DeviceInfoView.swift
+++ b/ModelQuantizer/Views/DeviceInfoView.swift
@@ -77,7 +77,7 @@ struct DeviceInfoView: View {
                             )
                             .frame(width: 100, height: 100)
                         
-                        Image(systemName: "iphone.gen3")
+                        Image(systemName: "iphone")
                             .font(.system(size: 48))
                             .foregroundStyle(.white)
                     }
diff --git a/ModelQuantizer/Views/HomeView.swift b/ModelQuantizer/Views/HomeView.swift
index 664eb13..1fd8c3d 100644
--- a/ModelQuantizer/Views/HomeView.swift
+++ b/ModelQuantizer/Views/HomeView.swift
@@ -97,7 +97,7 @@ struct HomeView: View {
                             )
                             .frame(width: 60, height: 60)
                         
-                        Image(systemName: "iphone.gen3")
+                        Image(systemName: "iphone")
                             .font(.system(size: 28))
                             .foregroundStyle(.white)
                     }

From e51b8a66b6d31f771cc465f191c5e224c4ca22c8 Mon Sep 17 00:00:00 2001
From: bobbytatum999 <bobbytatum999@gmail.com>
Date: Fri, 3 Apr 2026 16:44:27 -0500
Subject: [PATCH 2/8] feat: harden auth/download flow and centralize curated
 model catalog

---
 ModelQuantizer.xcodeproj/project.pbxproj      |   4 +
 ModelQuantizer/Models/ModelTypes.swift        |  87 +++++-
 ModelQuantizer/Services/HuggingFaceAPI.swift  | 288 ++++++++++--------
 .../Services/KeychainTokenStore.swift         |  48 +++
 .../Services/SettingsSuggester.swift          |  11 +-
 .../ViewModels/QuantizeViewModel.swift        | 118 ++-----
 ModelQuantizer/Views/ModelDownloadView.swift  | 105 +------
 ModelQuantizer/Views/QuantizeView.swift       |   3 +-
 ModelQuantizer/Views/SettingsView.swift       |   6 +-
 9 files changed, 354 insertions(+), 316 deletions(-)
 create mode 100644 ModelQuantizer/Services/KeychainTokenStore.swift

diff --git a/ModelQuantizer.xcodeproj/project.pbxproj b/ModelQuantizer.xcodeproj/project.pbxproj
index 6443f8e..065e559 100644
--- a/ModelQuantizer.xcodeproj/project.pbxproj
+++ b/ModelQuantizer.xcodeproj/project.pbxproj
@@ -23,6 +23,7 @@
 		AA00001B /* HuggingFaceAPI.swift in Sources */ = {isa = PBXBuildFile; fileRef = AA00001A /* HuggingFaceAPI.swift */; };
 		AA00001D /* GGUFBuilder.swift in Sources */ = {isa = PBXBuildFile; fileRef = AA00001C /* GGUFBuilder.swift */; };
 		AA00001F /* ModelTypes.swift in Sources */ = {isa = PBXBuildFile; fileRef = AA00001E /* ModelTypes.swift */; };
+		AA000100 /* KeychainTokenStore.swift in Sources */ = {isa = PBXBuildFile; fileRef = AA000101 /* KeychainTokenStore.swift */; };
 		AA000021 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = AA000020 /* Assets.xcassets */; };
 		AA000023 /* Preview Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = AA000022 /* Preview Assets.xcassets */; };
 /* End PBXBuildFile section */
@@ -44,6 +45,7 @@
 		AA00001A /* HuggingFaceAPI.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = HuggingFaceAPI.swift; sourceTree = "<group>"; };
 		AA00001C /* GGUFBuilder.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = GGUFBuilder.swift; sourceTree = "<group>"; };
 		AA00001E /* ModelTypes.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ModelTypes.swift; sourceTree = "<group>"; };
+		AA000101 /* KeychainTokenStore.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = KeychainTokenStore.swift; sourceTree = "<group>"; };
 		AA000020 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; };
 		AA000022 /* Preview Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = "Preview Assets.xcassets"; sourceTree = "<group>"; };
 		AA000024 /* ModelQuantizer.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = ModelQuantizer.app; sourceTree = BUILT_PRODUCTS_DIR; };
@@ -120,6 +122,7 @@
 				AA000008 /* SettingsSuggester.swift */,
 				AA00001A /* HuggingFaceAPI.swift */,
 				AA00001C /* GGUFBuilder.swift */,
+				AA000101 /* KeychainTokenStore.swift */,
 			);
 			path = Services;
 			sourceTree = "<group>";
@@ -235,6 +238,7 @@
 				AA00001B /* HuggingFaceAPI.swift in Sources */,
 				AA00001D /* GGUFBuilder.swift in Sources */,
 				AA00001F /* ModelTypes.swift in Sources */,
+				AA000100 /* KeychainTokenStore.swift in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
diff --git a/ModelQuantizer/Models/ModelTypes.swift b/ModelQuantizer/Models/ModelTypes.swift
index c67db28..7b8b43c 100644
--- a/ModelQuantizer/Models/ModelTypes.swift
+++ b/ModelQuantizer/Models/ModelTypes.swift
@@ -67,7 +67,7 @@ enum ModelArchitecture: String, Codable, CaseIterable {
     var supportedQuantizations: [QuantizationType] {
         switch self {
         case .llama, .mistral, .qwen2, .gemma, .phi:
-            return [.q4_0, .q4_1, .q5_0, .q5_1, .q8_0, .fp16, .fp32]
+            return [.q4_0, .q4_1, .q8_0, .fp16, .fp32]
         case .falcon, .gpt2:
             return [.q4_0, .q4_1, .q8_0, .fp16]
         case .bert:
@@ -294,6 +294,7 @@ struct HFModelConfig: Codable {
 
 struct HFSibling: Codable {
     let rfilename: String
+    let size: Int64?
 }
 
 // MARK: - Performance Estimate
@@ -350,3 +351,87 @@ enum QuantizationError: Error, LocalizedError {
         }
     }
 }
+
+
+enum ModelCatalog {
+    static let curatedModels: [HFModel] = [
+        HFModel(
+            modelId: "microsoft/Phi-3-mini-4k-instruct",
+            name: "Phi-3 Mini 4K",
+            description: "Microsoft's efficient 3.8B parameter model with excellent performance",
+            parameters: "3.8B",
+            architecture: .phi,
+            downloadURL: URL(string: "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/resolve/main/model.safetensors"),
+            sizeBytes: 7_600_000_000,
+            recommendedContextLength: 4096,
+            tags: ["instruct", "chat", "efficient"],
+            downloads: 2_500_000,
+            likes: 8500
+        ),
+        HFModel(
+            modelId: "meta-llama/Meta-Llama-3.1-8B-Instruct",
+            name: "Llama 3.1 8B Instruct",
+            description: "Meta's latest 8B parameter instruction-tuned model",
+            parameters: "8B",
+            architecture: .llama,
+            downloadURL: URL(string: "https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct/resolve/main/model.safetensors"),
+            sizeBytes: 16_000_000_000,
+            recommendedContextLength: 8192,
+            tags: ["instruct", "chat", "meta"],
+            downloads: 5_000_000,
+            likes: 15000
+        ),
+        HFModel(
+            modelId: "mistralai/Mistral-7B-Instruct-v0.3",
+            name: "Mistral 7B Instruct v0.3",
+            description: "Mistral's powerful 7B instruction model",
+            parameters: "7B",
+            architecture: .mistral,
+            downloadURL: URL(string: "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3/resolve/main/model.safetensors"),
+            sizeBytes: 14_000_000_000,
+            recommendedContextLength: 32768,
+            tags: ["instruct", "chat", "long-context"],
+            downloads: 8_000_000,
+            likes: 22000
+        ),
+        HFModel(
+            modelId: "google/gemma-2-2b-it",
+            name: "Gemma 2 2B IT",
+            description: "Google's lightweight 2B instruction model",
+            parameters: "2B",
+            architecture: .gemma,
+            downloadURL: URL(string: "https://huggingface.co/google/gemma-2-2b-it/resolve/main/model.safetensors"),
+            sizeBytes: 4_000_000_000,
+            recommendedContextLength: 8192,
+            tags: ["instruct", "chat", "lightweight"],
+            downloads: 1_200_000,
+            likes: 5600
+        ),
+        HFModel(
+            modelId: "Qwen/Qwen2.5-7B-Instruct",
+            name: "Qwen2.5 7B Instruct",
+            description: "Alibaba's Qwen2.5 with improved reasoning",
+            parameters: "7B",
+            architecture: .qwen2,
+            downloadURL: URL(string: "https://huggingface.co/Qwen/Qwen2.5-7B-Instruct/resolve/main/model.safetensors"),
+            sizeBytes: 15_000_000_000,
+            recommendedContextLength: 32768,
+            tags: ["instruct", "chat", "multilingual"],
+            downloads: 3_000_000,
+            likes: 9800
+        ),
+        HFModel(
+            modelId: "HuggingFaceTB/SmolLM2-1.7B-Instruct",
+            name: "SmolLM2 1.7B Instruct",
+            description: "Hugging Face's tiny but capable model",
+            parameters: "1.7B",
+            architecture: .llama,
+            downloadURL: URL(string: "https://huggingface.co/HuggingFaceTB/SmolLM2-1.7B-Instruct/resolve/main/model.safetensors"),
+            sizeBytes: 3_400_000_000,
+            recommendedContextLength: 8192,
+            tags: ["instruct", "chat", "tiny"],
+            downloads: 800_000,
+            likes: 4200
+        )
+    ]
+}
diff --git a/ModelQuantizer/Services/HuggingFaceAPI.swift b/ModelQuantizer/Services/HuggingFaceAPI.swift
index cbae850..96334ce 100644
--- a/ModelQuantizer/Services/HuggingFaceAPI.swift
+++ b/ModelQuantizer/Services/HuggingFaceAPI.swift
@@ -7,27 +7,28 @@
 
 import Foundation
 import Combine
+import Network
 
 /// Hugging Face API Service for model search and metadata
 class HuggingFaceAPI: ObservableObject {
     static let shared = HuggingFaceAPI()
-    
+
     private let baseURL = "https://huggingface.co/api"
     private let session: URLSession
     private var cancellables = Set<AnyCancellable>()
-    
+
     @Published var isSearching = false
     @Published var lastError: Error?
-    
+
     private init() {
         let config = URLSessionConfiguration.default
         config.timeoutIntervalForRequest = 30
         config.timeoutIntervalForResource = 300
         self.session = URLSession(configuration: config)
     }
-    
+
     // MARK: - Model Search
-    
+
     /// Search for models on Hugging Face Hub
     func searchModels(
         query: String,
@@ -35,49 +36,49 @@ class HuggingFaceAPI: ObservableObject {
         filter: ModelFilter = ModelFilter()
     ) async throws -> [HFModel] {
         var components = URLComponents(string: "\(baseURL)/models")!
-        
+
         var queryItems: [URLQueryItem] = [
             URLQueryItem(name: "limit", value: "\(limit)"),
             URLQueryItem(name: "full", value: "true"),
             URLQueryItem(name: "config", value: "true")
         ]
-        
+
         if !query.isEmpty {
             queryItems.append(URLQueryItem(name: "search", value: query))
         }
-        
+
         // Apply filters
         if filter.architecture != nil {
             queryItems.append(URLQueryItem(name: "filter", value: filter.architecture))
         }
-        
+
         if filter.sortBy != .downloads {
             queryItems.append(URLQueryItem(name: "sort", value: filter.sortBy.rawValue))
         }
-        
+
         components.queryItems = queryItems
-        
+
         guard let url = components.url else {
             throw HFAPIError.invalidURL
         }
-        
+
         var request = URLRequest(url: url)
         request.setValue("application/json", forHTTPHeaderField: "Accept")
-        
+
         // Add auth token if available
         if let token = getAuthToken() {
             request.setValue("Bearer \(token)", forHTTPHeaderField: "Authorization")
         }
-        
+
         await MainActor.run { isSearching = true }
         defer { Task { @MainActor in isSearching = false } }
-        
+
         let (data, response) = try await session.data(for: request)
-        
+
         guard let httpResponse = response as? HTTPURLResponse else {
             throw HFAPIError.invalidResponse
         }
-        
+
         switch httpResponse.statusCode {
         case 200:
             let models = try JSONDecoder().decode([HFAPIModel].self, from: data)
@@ -90,45 +91,45 @@ class HuggingFaceAPI: ObservableObject {
             throw HFAPIError.httpError(statusCode: httpResponse.statusCode)
         }
     }
-    
+
     /// Get detailed model info including files
     func getModelDetails(modelId: String) async throws -> ModelDetails {
         let url = URL(string: "\(baseURL)/models/\(modelId)")!
-        
+
         var request = URLRequest(url: url)
         request.setValue("application/json", forHTTPHeaderField: "Accept")
-        
+
         if let token = getAuthToken() {
             request.setValue("Bearer \(token)", forHTTPHeaderField: "Authorization")
         }
-        
+
         let (data, response) = try await session.data(for: request)
-        
+
         guard let httpResponse = response as? HTTPURLResponse, httpResponse.statusCode == 200 else {
             throw HFAPIError.invalidResponse
         }
-        
+
         return try JSONDecoder().decode(ModelDetails.self, from: data)
     }
-    
+
     /// Get model files (safetensors, bin, etc.)
     func getModelFiles(modelId: String) async throws -> [ModelFile] {
         let url = URL(string: "\(baseURL)/models/\(modelId)/tree/main")!
-        
+
         var request = URLRequest(url: url)
         request.setValue("application/json", forHTTPHeaderField: "Accept")
-        
+
         if let token = getAuthToken() {
             request.setValue("Bearer \(token)", forHTTPHeaderField: "Authorization")
         }
-        
+
         let (data, response) = try await session.data(for: request)
-        
+
         guard let httpResponse = response as? HTTPURLResponse, httpResponse.statusCode == 200 else {
             // Try fallback to main branch
             return try await getModelFilesFallback(modelId: modelId)
         }
-        
+
         let files = try JSONDecoder().decode([HFRepoFile].self, from: data)
         return files.compactMap { file in
             guard file.type == "file" else { return nil }
@@ -139,24 +140,24 @@ class HuggingFaceAPI: ObservableObject {
             )
         }
     }
-    
+
     private func getModelFilesFallback(modelId: String) async throws -> [ModelFile] {
         // Try to get files from the model page HTML
         let url = URL(string: "\(baseURL)/models/\(modelId)/tree/main")!
-        
+
         var request = URLRequest(url: url)
         request.setValue("application/json", forHTTPHeaderField: "Accept")
-        
+
         if let token = getAuthToken() {
             request.setValue("Bearer \(token)", forHTTPHeaderField: "Authorization")
         }
-        
+
         let (data, response) = try await session.data(for: request)
-        
+
         guard let httpResponse = response as? HTTPURLResponse, httpResponse.statusCode == 200 else {
             return []
         }
-        
+
         let files = try JSONDecoder().decode([HFRepoFile].self, from: data)
         return files.compactMap { file in
             guard file.type == "file" else { return nil }
@@ -167,102 +168,123 @@ class HuggingFaceAPI: ObservableObject {
             )
         }
     }
-    
+
     /// Download a model file with progress tracking
     func downloadModelFile(
         from url: URL,
         to destination: URL,
         progressHandler: @escaping (Double) -> Void
     ) async throws {
-        var request = URLRequest(url: url)
-        request.setValue("application/octet-stream", forHTTPHeaderField: "Accept")
-        
-        if let token = getAuthToken() {
-            request.setValue("Bearer \(token)", forHTTPHeaderField: "Authorization")
-        }
-        
-        let (asyncBytes, response) = try await session.bytes(for: request)
-        
-        guard let httpResponse = response as? HTTPURLResponse,
-              httpResponse.statusCode == 200 else {
-            throw HFAPIError.downloadFailed
-        }
-        
-        let totalBytes = response.expectedContentLength
-        var downloadedBytes: Int64 = 0
-        
-        // Create parent directory if needed
-        try? FileManager.default.createDirectory(
-            at: destination.deletingLastPathComponent(),
-            withIntermediateDirectories: true
-        )
-        
-        // Remove existing file
-        try? FileManager.default.removeItem(at: destination)
-        
-        // Create destination file before opening file handle
-        FileManager.default.createFile(atPath: destination.path, contents: nil)
-        
-        // Write file
-        let fileHandle = try FileHandle(forWritingTo: destination)
-        defer { try? fileHandle.close() }
-        
-        var lastProgressUpdate = Date()
-        
-        var buffer = Data(capacity: 65_536)
-
-        for try await byte in asyncBytes {
-            buffer.append(byte)
-            downloadedBytes += 1
-
-            if buffer.count >= 65_536 {
-                fileHandle.write(buffer)
-                buffer.removeAll(keepingCapacity: true)
-            }
+        try await enforceNetworkPolicy()
 
-            // Update progress every 100ms
-            if totalBytes > 0,
-               Date().timeIntervalSince(lastProgressUpdate) > 0.1 {
-                let progress = Double(downloadedBytes) / Double(totalBytes)
-                progressHandler(min(progress, 1.0))
-                lastProgressUpdate = Date()
-            }
-        }
+        var attempts = 0
+        let maxAttempts = 3
+
+        while true {
+            do {
+                var request = URLRequest(url: url)
+                request.setValue("application/octet-stream", forHTTPHeaderField: "Accept")
+
+                if let token = getAuthToken() {
+                    request.setValue("Bearer \(token)", forHTTPHeaderField: "Authorization")
+                }
+
+                // Create parent directory if needed
+                try? FileManager.default.createDirectory(
+                    at: destination.deletingLastPathComponent(),
+                    withIntermediateDirectories: true
+                )
 
-        if !buffer.isEmpty {
-            fileHandle.write(buffer)
+                var existingBytes: Int64 = 0
+                if FileManager.default.fileExists(atPath: destination.path) {
+                    let attrs = try? FileManager.default.attributesOfItem(atPath: destination.path)
+                    existingBytes = attrs?[.size] as? Int64 ?? 0
+                } else {
+                    FileManager.default.createFile(atPath: destination.path, contents: nil)
+                }
+
+                if existingBytes > 0 {
+                    request.setValue("bytes=\(existingBytes)-", forHTTPHeaderField: "Range")
+                }
+
+                let (asyncBytes, response) = try await session.bytes(for: request)
+
+                guard let httpResponse = response as? HTTPURLResponse,
+                      [200, 206].contains(httpResponse.statusCode) else {
+                    throw HFAPIError.downloadFailed
+                }
+
+                let totalBytes = response.expectedContentLength > 0
+                    ? response.expectedContentLength + existingBytes
+                    : response.expectedContentLength
+                var downloadedBytes: Int64 = existingBytes
+
+                let fileHandle = try FileHandle(forWritingTo: destination)
+                defer { try? fileHandle.close() }
+                try fileHandle.seekToEnd()
+
+                var lastProgressUpdate = Date()
+                var buffer = Data(capacity: 65_536)
+
+                for try await byte in asyncBytes {
+                    buffer.append(byte)
+                    downloadedBytes += 1
+
+                    if buffer.count >= 65_536 {
+                        fileHandle.write(buffer)
+                        buffer.removeAll(keepingCapacity: true)
+                    }
+
+                    if totalBytes > 0,
+                       Date().timeIntervalSince(lastProgressUpdate) > 0.1 {
+                        let progress = Double(downloadedBytes) / Double(totalBytes)
+                        progressHandler(min(progress, 1.0))
+                        lastProgressUpdate = Date()
+                    }
+                }
+
+                if !buffer.isEmpty {
+                    fileHandle.write(buffer)
+                }
+
+                progressHandler(1.0)
+                return
+            } catch {
+                attempts += 1
+                if attempts >= maxAttempts {
+                    throw error
+                }
+                try await Task.sleep(nanoseconds: UInt64(attempts) * 500_000_000)
+            }
         }
-        
-        progressHandler(1.0)
     }
-    
+
     /// Get download URL for a specific file
     func getDownloadURL(modelId: String, filename: String) -> URL {
         URL(string: "https://huggingface.co/\(modelId)/resolve/main/\(filename)")!
     }
-    
+
     // MARK: - Private Methods
-    
+
     private func convertToHFModels(_ apiModels: [HFAPIModel]) async throws -> [HFModel] {
         var models: [HFModel] = []
-        
+
         for apiModel in apiModels {
             // Extract parameters from tags or model card
             let parameters = extractParameters(from: apiModel)
-            
+
             // Detect architecture
             let architecture = detectArchitecture(from: apiModel)
-            
+
             // Get model size from siblings
             let sizeBytes = apiModel.siblings?.reduce(0) { total, sibling in
-                // Estimate based on file extensions
                 if sibling.rfilename.hasSuffix(".safetensors") ||
                    sibling.rfilename.hasSuffix(".bin") {
-                    return total + 500_000_000 // Rough estimate
+                    return total + Int(sibling.size ?? 0)
                 }
                 return total
             } ?? 0
-            
+
             // Get primary download URL
             let downloadURL = apiModel.siblings?.first { sibling in
                 sibling.rfilename.hasSuffix("model.safetensors") ||
@@ -270,7 +292,7 @@ class HuggingFaceAPI: ObservableObject {
             }.flatMap { sibling in
                 URL(string: "https://huggingface.co/\(apiModel.id)/resolve/main/\(sibling.rfilename)")
             }
-            
+
             let model = HFModel(
                 modelId: apiModel.id,
                 name: apiModel.modelId.components(separatedBy: "/").last ?? apiModel.modelId,
@@ -284,13 +306,13 @@ class HuggingFaceAPI: ObservableObject {
                 downloads: apiModel.downloads,
                 likes: apiModel.likes
             )
-            
+
             models.append(model)
         }
-        
+
         return models
     }
-    
+
     private func extractParameters(from model: HFAPIModel) -> String {
         // Try to extract from tags
         for tag in model.tags {
@@ -301,7 +323,7 @@ class HuggingFaceAPI: ObservableObject {
                 }
             }
         }
-        
+
         // Try to extract from model name
         let name = model.modelId.lowercased()
         let patterns = [
@@ -310,7 +332,7 @@ class HuggingFaceAPI: ObservableObject {
             "-(\\d+)b",
             "_(\\d+)b"
         ]
-        
+
         for pattern in patterns {
             if let regex = try? NSRegularExpression(pattern: pattern, options: []),
                let match = regex.firstMatch(in: name, options: [], range: NSRange(location: 0, length: name.utf16.count)),
@@ -319,14 +341,14 @@ class HuggingFaceAPI: ObservableObject {
                 return "\(value)B"
             }
         }
-        
+
         return "Unknown"
     }
-    
+
     private func detectArchitecture(from model: HFAPIModel) -> ModelArchitecture {
         let tags = model.tags.map { $0.lowercased() }
         let id = model.id.lowercased()
-        
+
         if tags.contains("llama") || id.contains("llama") {
             return .llama
         } else if tags.contains("mistral") || id.contains("mistral") {
@@ -344,17 +366,43 @@ class HuggingFaceAPI: ObservableObject {
         } else if tags.contains("bert") || id.contains("bert") {
             return .bert
         }
-        
+
         return .custom
     }
-    
+
 
     func setAuthToken(_ token: String?) {
-        if let token = token {
-            UserDefaults.standard.set(token, forKey: "hf_auth_token")
-        } else {
+        KeychainTokenStore.writeToken(token)
+    }
+
+    func getAuthToken() -> String? {
+        if let keychain = KeychainTokenStore.readToken() {
+            return keychain
+        }
+        // One-time migration from old UserDefaults storage
+        if let legacy = UserDefaults.standard.string(forKey: "hf_auth_token"), !legacy.isEmpty {
+            KeychainTokenStore.writeToken(legacy)
             UserDefaults.standard.removeObject(forKey: "hf_auth_token")
+            return legacy
+        }
+        return nil
+    }
+
+    private func enforceNetworkPolicy() async throws {
+        let wifiOnly = UserDefaults.standard.object(forKey: "wifi_only") as? Bool ?? true
+        guard wifiOnly else { return }
+
+        let monitor = NWPathMonitor()
+        let queue = DispatchQueue(label: "hf.network.policy")
+        let isWifi = await withCheckedContinuation { continuation in
+            monitor.pathUpdateHandler = { path in
+                let ok = path.status == .satisfied && path.usesInterfaceType(.wifi)
+                continuation.resume(returning: ok)
+                monitor.cancel()
+            }
+            monitor.start(queue: queue)
         }
+        guard isWifi else { throw HFAPIError.downloadFailed }
     }
 }
 
@@ -365,7 +413,7 @@ struct ModelFilter {
     var sortBy: SortOption = .downloads
     var task: String?
     var library: String?
-    
+
     enum SortOption: String {
         case downloads = "downloads"
         case likes = "likes"
@@ -391,13 +439,13 @@ struct ModelDetails: Codable {
     let pipeline_tag: String?
     let cardData: ModelCardData?
     let config: ModelConfig?
-    
+
     struct ModelCardData: Codable {
         let description: String?
         let license: String?
         let language: [String]?
     }
-    
+
     struct ModelConfig: Codable {
         let architectures: [String]?
         let model_type: String?
@@ -413,7 +461,7 @@ enum HFAPIError: Error, LocalizedError {
     case httpError(statusCode: Int)
     case downloadFailed
     case invalidData
-    
+
     var errorDescription: String? {
         switch self {
         case .invalidURL:
diff --git a/ModelQuantizer/Services/KeychainTokenStore.swift b/ModelQuantizer/Services/KeychainTokenStore.swift
new file mode 100644
index 0000000..26c9635
--- /dev/null
+++ b/ModelQuantizer/Services/KeychainTokenStore.swift
@@ -0,0 +1,48 @@
+import Foundation
+import Security
+
+enum KeychainTokenStore {
+    private static let service = "com.modelquantizer.hf"
+    private static let account = "hf_auth_token"
+
+    static func readToken() -> String? {
+        let query: [String: Any] = [
+            kSecClass as String: kSecClassGenericPassword,
+            kSecAttrService as String: service,
+            kSecAttrAccount as String: account,
+            kSecReturnData as String: true,
+            kSecMatchLimit as String: kSecMatchLimitOne
+        ]
+
+        var item: CFTypeRef?
+        let status = SecItemCopyMatching(query as CFDictionary, &item)
+        guard status == errSecSuccess,
+              let data = item as? Data,
+              let token = String(data: data, encoding: .utf8),
+              !token.isEmpty else {
+            return nil
+        }
+        return token
+    }
+
+    static func writeToken(_ token: String?) {
+        let deleteQuery: [String: Any] = [
+            kSecClass as String: kSecClassGenericPassword,
+            kSecAttrService as String: service,
+            kSecAttrAccount as String: account
+        ]
+        SecItemDelete(deleteQuery as CFDictionary)
+
+        guard let token, !token.isEmpty,
+              let data = token.data(using: .utf8) else { return }
+
+        let addQuery: [String: Any] = [
+            kSecClass as String: kSecClassGenericPassword,
+            kSecAttrService as String: service,
+            kSecAttrAccount as String: account,
+            kSecValueData as String: data,
+            kSecAttrAccessible as String: kSecAttrAccessibleAfterFirstUnlockThisDeviceOnly
+        ]
+        SecItemAdd(addQuery as CFDictionary, nil)
+    }
+}
diff --git a/ModelQuantizer/Services/SettingsSuggester.swift b/ModelQuantizer/Services/SettingsSuggester.swift
index dfa7e0f..e2e02e1 100644
--- a/ModelQuantizer/Services/SettingsSuggester.swift
+++ b/ModelQuantizer/Services/SettingsSuggester.swift
@@ -235,7 +235,7 @@ class SettingsSuggester {
             estimatedMemoryUsage: estimatedMemoryUsage,
             estimatedLoadTime: estimatedLoadTime,
             recommendedBatchSize: settings.batchSize,
-            canUseGPU: settings.useGPU && deviceClass.rawValue >= DeviceCapabilityProfile.DeviceClass.midRange.rawValue,
+            canUseGPU: settings.useGPU && deviceSupportsGPU(deviceClass),
             canUseNeuralEngine: settings.useNeuralEngine && profile.neuralEngineCores > 0
         )
     }
@@ -282,6 +282,15 @@ class SettingsSuggester {
             description: "Memory-optimized settings for large model"
         )
     }
+
+    private func deviceSupportsGPU(_ deviceClass: DeviceCapabilityProfile.DeviceClass) -> Bool {
+        switch deviceClass {
+        case .entryLevel:
+            return false
+        case .midRange, .highEnd, .flagship, .ultra:
+            return true
+        }
+    }
     
     private func adjustForThermalState(
         original: QuantizationRecommendation,
diff --git a/ModelQuantizer/ViewModels/QuantizeViewModel.swift b/ModelQuantizer/ViewModels/QuantizeViewModel.swift
index e82d937..678f326 100644
--- a/ModelQuantizer/ViewModels/QuantizeViewModel.swift
+++ b/ModelQuantizer/ViewModels/QuantizeViewModel.swift
@@ -87,7 +87,31 @@ class QuantizeViewModel: ObservableObject {
     }
     
     private func updateProgress(from status: QuantizationStatus) {
-        // Progress is now directly from the quantizer
+        switch status {
+        case .idle:
+            progress = 0
+            currentStage = ""
+        case .downloading(let value):
+            progress = value
+            currentStage = "Downloading"
+        case .analyzing:
+            progress = max(progress, 0.30)
+            currentStage = "Analyzing"
+        case .quantizing(let value, let stage):
+            progress = value
+            currentStage = stage
+        case .optimizing:
+            progress = max(progress, 0.95)
+            currentStage = "Optimizing"
+        case .validating:
+            progress = max(progress, 0.97)
+            currentStage = "Validating"
+        case .completed:
+            progress = 1.0
+            currentStage = "Completed"
+        case .failed(let error):
+            currentStage = error
+        }
     }
     
     private func updateDeviceProfile() {
@@ -107,87 +131,7 @@ class QuantizeViewModel: ObservableObject {
     }
     
     private func loadPopularModels() {
-        // Load a curated list of popular models while we fetch from API
-        models = [
-            HFModel(
-                modelId: "microsoft/Phi-3-mini-4k-instruct",
-                name: "Phi-3 Mini 4K",
-                description: "Microsoft's efficient 3.8B parameter model with excellent performance",
-                parameters: "3.8B",
-                architecture: .phi,
-                downloadURL: URL(string: "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/resolve/main/model.safetensors"),
-                sizeBytes: 7_600_000_000,
-                recommendedContextLength: 4096,
-                tags: ["instruct", "chat", "efficient"],
-                downloads: 2_500_000,
-                likes: 8500
-            ),
-            HFModel(
-                modelId: "meta-llama/Meta-Llama-3.1-8B-Instruct",
-                name: "Llama 3.1 8B Instruct",
-                description: "Meta's latest 8B parameter instruction-tuned model",
-                parameters: "8B",
-                architecture: .llama,
-                downloadURL: URL(string: "https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct/resolve/main/model.safetensors"),
-                sizeBytes: 16_000_000_000,
-                recommendedContextLength: 8192,
-                tags: ["instruct", "chat", "meta"],
-                downloads: 5_000_000,
-                likes: 15000
-            ),
-            HFModel(
-                modelId: "mistralai/Mistral-7B-Instruct-v0.3",
-                name: "Mistral 7B Instruct v0.3",
-                description: "Mistral's powerful 7B instruction model",
-                parameters: "7B",
-                architecture: .mistral,
-                downloadURL: URL(string: "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3/resolve/main/model.safetensors"),
-                sizeBytes: 14_000_000_000,
-                recommendedContextLength: 32768,
-                tags: ["instruct", "chat", "long-context"],
-                downloads: 8_000_000,
-                likes: 22000
-            ),
-            HFModel(
-                modelId: "google/gemma-2-2b-it",
-                name: "Gemma 2 2B IT",
-                description: "Google's lightweight 2B instruction model",
-                parameters: "2B",
-                architecture: .gemma,
-                downloadURL: URL(string: "https://huggingface.co/google/gemma-2-2b-it/resolve/main/model.safetensors"),
-                sizeBytes: 4_000_000_000,
-                recommendedContextLength: 8192,
-                tags: ["instruct", "chat", "lightweight"],
-                downloads: 1_200_000,
-                likes: 5600
-            ),
-            HFModel(
-                modelId: "Qwen/Qwen2.5-7B-Instruct",
-                name: "Qwen2.5 7B Instruct",
-                description: "Alibaba's Qwen2.5 with improved reasoning",
-                parameters: "7B",
-                architecture: .qwen2,
-                downloadURL: URL(string: "https://huggingface.co/Qwen/Qwen2.5-7B-Instruct/resolve/main/model.safetensors"),
-                sizeBytes: 15_000_000_000,
-                recommendedContextLength: 32768,
-                tags: ["instruct", "chat", "multilingual"],
-                downloads: 3_000_000,
-                likes: 9800
-            ),
-            HFModel(
-                modelId: "HuggingFaceTB/SmolLM2-1.7B-Instruct",
-                name: "SmolLM2 1.7B Instruct",
-                description: "Hugging Face's tiny but capable model",
-                parameters: "1.7B",
-                architecture: .llama,
-                downloadURL: URL(string: "https://huggingface.co/HuggingFaceTB/SmolLM2-1.7B-Instruct/resolve/main/model.safetensors"),
-                sizeBytes: 3_400_000_000,
-                recommendedContextLength: 8192,
-                tags: ["instruct", "chat", "tiny"],
-                downloads: 800_000,
-                likes: 4200
-            )
-        ]
+        models = ModelCatalog.curatedModels
         
         filteredModels = models
         
@@ -295,7 +239,7 @@ class QuantizeViewModel: ObservableObject {
         guard let model = selectedModel else { return }
         
         // Check if model requires authentication
-        if model.modelId.hasPrefix("meta-llama/") && HuggingFaceAPI.shared.getAuthToken() == nil {
+        if model.modelId.hasPrefix("meta-llama/") && hfAPI.getAuthToken() == nil {
             errorMessage = "This model requires Hugging Face authentication. Please add your token in Settings."
             showError = true
             return
@@ -364,11 +308,3 @@ class QuantizeViewModel: ObservableObject {
         }
     }
 }
-
-// MARK: - Hugging Face API Token Extension
-
-extension HuggingFaceAPI {
-    func getAuthToken() -> String? {
-        UserDefaults.standard.string(forKey: "hf_auth_token")
-    }
-}
diff --git a/ModelQuantizer/Views/ModelDownloadView.swift b/ModelQuantizer/Views/ModelDownloadView.swift
index af7e12e..721ce5b 100644
--- a/ModelQuantizer/Views/ModelDownloadView.swift
+++ b/ModelQuantizer/Views/ModelDownloadView.swift
@@ -665,9 +665,9 @@ enum ModelCategory: String, CaseIterable, Identifiable {
         case .chat: return "bubble.left.and.bubble.right"
         case .code: return "chevron.left.forwardslash.chevron.right"
         case .instruct: return "text.bubble"
-        case .llama: return " Llama"
-        case .mistral: return "Mistral"
-        case .qwen: return "Qwen"
+        case .llama: return "flame"
+        case .mistral: return "wind"
+        case .qwen: return "globe.asia.australia"
         }
     }
     
@@ -697,104 +697,7 @@ class ModelDownloadViewModel: ObservableObject {
     }
     
     private func loadModels() {
-        models = [
-            HFModel(
-                modelId: "microsoft/Phi-3-mini-4k-instruct",
-                name: "Phi-3 Mini 4K",
-                description: "Microsoft's efficient 3.8B parameter model with excellent performance for its size",
-                parameters: "3.8B",
-                architecture: .phi,
-                sizeBytes: 2_400_000_000,
-                recommendedContextLength: 4096,
-                tags: ["instruct", "chat", "efficient"],
-                downloads: 2_500_000,
-                likes: 8500
-            ),
-            HFModel(
-                modelId: "meta-llama/Meta-Llama-3.1-8B-Instruct",
-                name: "Llama 3.1 8B Instruct",
-                description: "Meta's latest 8B parameter instruction-tuned model with improved reasoning",
-                parameters: "8B",
-                architecture: .llama,
-                sizeBytes: 16_000_000_000,
-                recommendedContextLength: 8192,
-                tags: ["instruct", "chat", "meta"],
-                downloads: 5_000_000,
-                likes: 15000
-            ),
-            HFModel(
-                modelId: "mistralai/Mistral-7B-Instruct-v0.3",
-                name: "Mistral 7B Instruct v0.3",
-                description: "Mistral's powerful 7B instruction model with 32K context support",
-                parameters: "7B",
-                architecture: .mistral,
-                sizeBytes: 14_000_000_000,
-                recommendedContextLength: 32768,
-                tags: ["instruct", "chat", "long-context"],
-                downloads: 8_000_000,
-                likes: 22000
-            ),
-            HFModel(
-                modelId: "google/gemma-2-2b-it",
-                name: "Gemma 2 2B IT",
-                description: "Google's lightweight 2B instruction model, great for mobile devices",
-                parameters: "2B",
-                architecture: .gemma,
-                sizeBytes: 1_600_000_000,
-                recommendedContextLength: 8192,
-                tags: ["instruct", "chat", "lightweight"],
-                downloads: 1_200_000,
-                likes: 5600
-            ),
-            HFModel(
-                modelId: "Qwen/Qwen2.5-7B-Instruct",
-                name: "Qwen2.5 7B Instruct",
-                description: "Alibaba's Qwen2.5 with improved reasoning and multilingual support",
-                parameters: "7B",
-                architecture: .qwen2,
-                sizeBytes: 15_000_000_000,
-                recommendedContextLength: 32768,
-                tags: ["instruct", "chat", "multilingual"],
-                downloads: 3_000_000,
-                likes: 9800
-            ),
-            HFModel(
-                modelId: "HuggingFaceTB/SmolLM2-1.7B-Instruct",
-                name: "SmolLM2 1.7B Instruct",
-                description: "Hugging Face's tiny but capable model, perfect for edge devices",
-                parameters: "1.7B",
-                architecture: .llama,
-                sizeBytes: 3_400_000_000,
-                recommendedContextLength: 8192,
-                tags: ["instruct", "chat", "tiny"],
-                downloads: 800_000,
-                likes: 4200
-            ),
-            HFModel(
-                modelId: "codellama/CodeLlama-7b-Instruct-hf",
-                name: "CodeLlama 7B Instruct",
-                description: "Meta's code-specialized model for programming tasks",
-                parameters: "7B",
-                architecture: .llama,
-                sizeBytes: 13_000_000_000,
-                recommendedContextLength: 16384,
-                tags: ["code", "instruct", "programming"],
-                downloads: 4_500_000,
-                likes: 12000
-            ),
-            HFModel(
-                modelId: "deepseek-ai/deepseek-coder-6.7b-instruct",
-                name: "DeepSeek Coder 6.7B",
-                description: "DeepSeek's code model with strong performance on coding benchmarks",
-                parameters: "6.7B",
-                architecture: .llama,
-                sizeBytes: 13_400_000_000,
-                recommendedContextLength: 16384,
-                tags: ["code", "instruct", "programming"],
-                downloads: 2_000_000,
-                likes: 7500
-            )
-        ]
+        models = ModelCatalog.curatedModels
         
         featuredModels = Array(models.prefix(4))
     }
diff --git a/ModelQuantizer/Views/QuantizeView.swift b/ModelQuantizer/Views/QuantizeView.swift
index edba13e..9b99b96 100644
--- a/ModelQuantizer/Views/QuantizeView.swift
+++ b/ModelQuantizer/Views/QuantizeView.swift
@@ -226,7 +226,8 @@ struct QuantizeView: View {
                 
                 // Quantize button
                 Button(action: {
-                    if model.modelId.hasPrefix("meta-llama/") {
+                    if model.modelId.hasPrefix("meta-llama/") &&
+                        HuggingFaceAPI.shared.getAuthToken() == nil {
                         showingAuthAlert = true
                     } else {
                         showingQuantizationSheet = true
diff --git a/ModelQuantizer/Views/SettingsView.swift b/ModelQuantizer/Views/SettingsView.swift
index 544c926..0d61ab9 100644
--- a/ModelQuantizer/Views/SettingsView.swift
+++ b/ModelQuantizer/Views/SettingsView.swift
@@ -8,7 +8,6 @@
 import SwiftUI
 
 struct SettingsView: View {
-    @AppStorage("hf_auth_token") private var authToken = ""
     @AppStorage("auto_quantize") private var autoQuantize = false
     @AppStorage("default_quantization") private var defaultQuantization = "Q4_K_M"
     @AppStorage("save_history") private var saveHistory = true
@@ -17,6 +16,7 @@ struct SettingsView: View {
     @State private var showingTokenInfo = false
     @State private var showingClearConfirmation = false
     @State private var cacheSize: Int64 = 0
+    @State private var authToken = ""
     
     var body: some View {
         ScrollView {
@@ -41,8 +41,12 @@ struct SettingsView: View {
             .padding()
         }
         .onAppear {
+            authToken = HuggingFaceAPI.shared.getAuthToken() ?? ""
             calculateCacheSize()
         }
+        .onChange(of: authToken) { newValue in
+            HuggingFaceAPI.shared.setAuthToken(newValue)
+        }
         .alert("Hugging Face Token", isPresented: $showingTokenInfo) {
             Button("OK", role: .cancel) {}
         } message: {

From 865ddd4ea3a14418963cde976a4785fcd5d229f9 Mon Sep 17 00:00:00 2001
From: bobbytatum999 <bobbytatum999@gmail.com>
Date: Fri, 3 Apr 2026 20:02:34 -0500
Subject: [PATCH 3/8] feat: continue hardening with checksums, richer metadata,
 and job telemetry fields

---
 ModelQuantizer/Models/ModelTypes.swift        |  2 +
 ModelQuantizer/Services/HuggingFaceAPI.swift  | 29 ++++++++
 ModelQuantizer/Services/ModelQuantizer.swift  |  4 +-
 .../Services/QuantizationEngine.swift         | 70 ++++++++++++++++---
 4 files changed, 94 insertions(+), 11 deletions(-)

diff --git a/ModelQuantizer/Models/ModelTypes.swift b/ModelQuantizer/Models/ModelTypes.swift
index 7b8b43c..6b6eb3f 100644
--- a/ModelQuantizer/Models/ModelTypes.swift
+++ b/ModelQuantizer/Models/ModelTypes.swift
@@ -206,6 +206,8 @@ struct QuantizationJob: Codable, Identifiable {
     let startTime: Date
     let endTime: Date
     let contextLength: Int
+    let estimatedTokensPerSecond: Double?
+    let validationScore: Double?
     
     var duration: TimeInterval {
         return endTime.timeIntervalSince(startTime)
diff --git a/ModelQuantizer/Services/HuggingFaceAPI.swift b/ModelQuantizer/Services/HuggingFaceAPI.swift
index 96334ce..73cb71b 100644
--- a/ModelQuantizer/Services/HuggingFaceAPI.swift
+++ b/ModelQuantizer/Services/HuggingFaceAPI.swift
@@ -8,6 +8,7 @@
 import Foundation
 import Combine
 import Network
+import CryptoKit
 
 /// Hugging Face API Service for model search and metadata
 class HuggingFaceAPI: ObservableObject {
@@ -33,12 +34,14 @@ class HuggingFaceAPI: ObservableObject {
     func searchModels(
         query: String,
         limit: Int = 50,
+        offset: Int = 0,
         filter: ModelFilter = ModelFilter()
     ) async throws -> [HFModel] {
         var components = URLComponents(string: "\(baseURL)/models")!
 
         var queryItems: [URLQueryItem] = [
             URLQueryItem(name: "limit", value: "\(limit)"),
+            URLQueryItem(name: "offset", value: "\(offset)"),
             URLQueryItem(name: "full", value: "true"),
             URLQueryItem(name: "config", value: "true")
         ]
@@ -196,9 +199,13 @@ class HuggingFaceAPI: ObservableObject {
                 )
 
                 var existingBytes: Int64 = 0
+                var hasher = SHA256()
                 if FileManager.default.fileExists(atPath: destination.path) {
                     let attrs = try? FileManager.default.attributesOfItem(atPath: destination.path)
                     existingBytes = attrs?[.size] as? Int64 ?? 0
+                    if existingBytes > 0, let existingData = try? Data(contentsOf: destination) {
+                        hasher.update(data: existingData)
+                    }
                 } else {
                     FileManager.default.createFile(atPath: destination.path, contents: nil)
                 }
@@ -213,6 +220,7 @@ class HuggingFaceAPI: ObservableObject {
                       [200, 206].contains(httpResponse.statusCode) else {
                     throw HFAPIError.downloadFailed
                 }
+                let expectedChecksum = expectedSHA256(from: httpResponse)
 
                 let totalBytes = response.expectedContentLength > 0
                     ? response.expectedContentLength + existingBytes
@@ -232,6 +240,7 @@ class HuggingFaceAPI: ObservableObject {
 
                     if buffer.count >= 65_536 {
                         fileHandle.write(buffer)
+                        hasher.update(data: buffer)
                         buffer.removeAll(keepingCapacity: true)
                     }
 
@@ -245,6 +254,14 @@ class HuggingFaceAPI: ObservableObject {
 
                 if !buffer.isEmpty {
                     fileHandle.write(buffer)
+                    hasher.update(data: buffer)
+                }
+
+                if let expectedChecksum {
+                    let digest = hasher.finalize().map { String(format: "%02x", $0) }.joined()
+                    guard digest.lowercased() == expectedChecksum.lowercased() else {
+                        throw HFAPIError.invalidData
+                    }
                 }
 
                 progressHandler(1.0)
@@ -404,6 +421,18 @@ class HuggingFaceAPI: ObservableObject {
         }
         guard isWifi else { throw HFAPIError.downloadFailed }
     }
+
+    private func expectedSHA256(from response: HTTPURLResponse) -> String? {
+        if let checksum = response.value(forHTTPHeaderField: "x-checksum-sha256") {
+            return checksum.replacingOccurrences(of: "\"", with: "")
+        }
+        if let etag = response.value(forHTTPHeaderField: "x-linked-etag") ??
+            response.value(forHTTPHeaderField: "etag"),
+           let range = etag.range(of: "sha256:") {
+            return String(etag[range.upperBound...]).replacingOccurrences(of: "\"", with: "")
+        }
+        return nil
+    }
 }
 
 // MARK: - Supporting Types
diff --git a/ModelQuantizer/Services/ModelQuantizer.swift b/ModelQuantizer/Services/ModelQuantizer.swift
index f241c65..156bbdb 100644
--- a/ModelQuantizer/Services/ModelQuantizer.swift
+++ b/ModelQuantizer/Services/ModelQuantizer.swift
@@ -117,7 +117,9 @@ class ModelQuantizer: ObservableObject {
                 outputSize: (try? fileManager.attributesOfItem(atPath: outputURL.path)[.size] as? Int64) ?? 0,
                 startTime: startTime,
                 endTime: Date(),
-                contextLength: contextLength ?? model.recommendedContextLength
+                contextLength: contextLength ?? model.recommendedContextLength,
+                estimatedTokensPerSecond: nil,
+                validationScore: nil
             )
 
             quantizationHistory.insert(job, at: 0)
diff --git a/ModelQuantizer/Services/QuantizationEngine.swift b/ModelQuantizer/Services/QuantizationEngine.swift
index 9aecb3d..8246be3 100644
--- a/ModelQuantizer/Services/QuantizationEngine.swift
+++ b/ModelQuantizer/Services/QuantizationEngine.swift
@@ -116,7 +116,9 @@ class QuantizationEngine: ObservableObject {
                 outputSize: (try? fileManager.attributesOfItem(atPath: quantizedURL.path)[.size] as? Int64) ?? 0,
                 startTime: startTime,
                 endTime: Date(),
-                contextLength: contextLength
+                contextLength: contextLength,
+                estimatedTokensPerSecond: nil,
+                validationScore: nil
             )
             
             await MainActor.run {
@@ -206,6 +208,13 @@ class QuantizationEngine: ObservableObject {
         let tensorCount: Int
         let totalParameters: Int64
         let originalSize: Int64
+        let contextLength: Int
+        let embeddingLength: Int
+        let feedForwardLength: Int
+        let headCount: Int
+        let headCountKV: Int
+        let rmsEpsilon: Float
+        let ropeDimensionCount: Int
     }
 
     private func analyzeModel(files: [URL], model: HFModel) async throws -> ModelAnalysis {
@@ -216,6 +225,13 @@ class QuantizationEngine: ObservableObject {
         var tensorCount = 0
         var totalParameters: Int64 = 0
         var totalSize: Int64 = 0
+        var contextLength = model.recommendedContextLength
+        var embeddingLength = 4096
+        var feedForwardLength = 11008
+        var headCount = 32
+        var headCountKV = 32
+        var rmsEpsilon: Float = 1e-5
+        var ropeDimensionCount = 128
         
         // Analyze safetensors files
         for file in files where file.pathExtension == "safetensors" {
@@ -247,6 +263,33 @@ class QuantizationEngine: ObservableObject {
                     else if normalized.contains("gpt") { architecture = .gpt2 }
                     else if normalized.contains("bert") { architecture = .bert }
                 }
+                if let context = config["max_position_embeddings"] as? Int {
+                    contextLength = context
+                }
+                if let hidden = config["hidden_size"] as? Int {
+                    embeddingLength = hidden
+                }
+                if let ff = config["intermediate_size"] as? Int {
+                    feedForwardLength = ff
+                }
+                if let heads = config["num_attention_heads"] as? Int {
+                    headCount = heads
+                }
+                if let kvHeads = config["num_key_value_heads"] as? Int {
+                    headCountKV = kvHeads
+                } else {
+                    headCountKV = headCount
+                }
+                if let eps = config["rms_norm_eps"] as? Double {
+                    rmsEpsilon = Float(eps)
+                } else if let eps = config["layer_norm_epsilon"] as? Double {
+                    rmsEpsilon = Float(eps)
+                }
+                if let ropeDim = config["rope_dim"] as? Int {
+                    ropeDimensionCount = ropeDim
+                } else if headCount > 0 {
+                    ropeDimensionCount = max(32, embeddingLength / headCount)
+                }
             }
         }
         
@@ -260,7 +303,14 @@ class QuantizationEngine: ObservableObject {
             layerCount: layerCount,
             tensorCount: tensorCount,
             totalParameters: totalParameters,
-            originalSize: totalSize
+            originalSize: totalSize,
+            contextLength: contextLength,
+            embeddingLength: embeddingLength,
+            feedForwardLength: feedForwardLength,
+            headCount: headCount,
+            headCountKV: headCountKV,
+            rmsEpsilon: rmsEpsilon,
+            ropeDimensionCount: ropeDimensionCount
         )
     }
     
@@ -368,15 +418,15 @@ class QuantizationEngine: ObservableObject {
     }
     
     private func addArchitectureMetadata(to builder: inout GGUFBuilder, analysis: ModelAnalysis) {
-        // Add context length
-        builder.addMetadata(key: "\(analysis.architecture.rawValue.lowercased()).context_length", value: .uint32(4096))
-        builder.addMetadata(key: "\(analysis.architecture.rawValue.lowercased()).embedding_length", value: .uint32(4096))
+        let arch = analysis.architecture.rawValue.lowercased()
+        builder.addMetadata(key: "\(arch).context_length", value: .uint32(UInt32(max(256, analysis.contextLength))))
+        builder.addMetadata(key: "\(arch).embedding_length", value: .uint32(UInt32(max(1, analysis.embeddingLength))))
         builder.addMetadata(key: "\(analysis.architecture.rawValue.lowercased()).block_count", value: .uint32(UInt32(analysis.layerCount)))
-        builder.addMetadata(key: "\(analysis.architecture.rawValue.lowercased()).feed_forward_length", value: .uint32(11008))
-        builder.addMetadata(key: "\(analysis.architecture.rawValue.lowercased()).attention.head_count", value: .uint32(32))
-        builder.addMetadata(key: "\(analysis.architecture.rawValue.lowercased()).attention.head_count_kv", value: .uint32(32))
-        builder.addMetadata(key: "\(analysis.architecture.rawValue.lowercased()).attention.layer_norm_rms_epsilon", value: .float32(1e-5))
-        builder.addMetadata(key: "\(analysis.architecture.rawValue.lowercased()).rope.dimension_count", value: .uint32(128))
+        builder.addMetadata(key: "\(arch).feed_forward_length", value: .uint32(UInt32(max(1, analysis.feedForwardLength))))
+        builder.addMetadata(key: "\(arch).attention.head_count", value: .uint32(UInt32(max(1, analysis.headCount))))
+        builder.addMetadata(key: "\(arch).attention.head_count_kv", value: .uint32(UInt32(max(1, analysis.headCountKV))))
+        builder.addMetadata(key: "\(arch).attention.layer_norm_rms_epsilon", value: .float32(analysis.rmsEpsilon))
+        builder.addMetadata(key: "\(arch).rope.dimension_count", value: .uint32(UInt32(max(1, analysis.ropeDimensionCount))))
     }
     
     private func processSafeTensorsFile(_ url: URL, into builder: inout GGUFBuilder) async throws {

From 686a9ea9d8b8838bc6479198be000d821058b338 Mon Sep 17 00:00:00 2001
From: bobbytatum999 <bobbytatum999@gmail.com>
Date: Fri, 3 Apr 2026 20:06:30 -0500
Subject: [PATCH 4/8] fix: harden resumable downloads and improve HF error
 surfacing

---
 ModelQuantizer/Services/HuggingFaceAPI.swift  | 39 ++++++++++++++++---
 .../ViewModels/QuantizeViewModel.swift        |  2 +-
 2 files changed, 35 insertions(+), 6 deletions(-)

diff --git a/ModelQuantizer/Services/HuggingFaceAPI.swift b/ModelQuantizer/Services/HuggingFaceAPI.swift
index 73cb71b..c3131f5 100644
--- a/ModelQuantizer/Services/HuggingFaceAPI.swift
+++ b/ModelQuantizer/Services/HuggingFaceAPI.swift
@@ -203,8 +203,14 @@ class HuggingFaceAPI: ObservableObject {
                 if FileManager.default.fileExists(atPath: destination.path) {
                     let attrs = try? FileManager.default.attributesOfItem(atPath: destination.path)
                     existingBytes = attrs?[.size] as? Int64 ?? 0
-                    if existingBytes > 0, let existingData = try? Data(contentsOf: destination) {
-                        hasher.update(data: existingData)
+                    if existingBytes > 0,
+                       let existingHandle = try? FileHandle(forReadingFrom: destination) {
+                        defer { try? existingHandle.close() }
+                        while true {
+                            let chunk = try existingHandle.read(upToCount: 65_536) ?? Data()
+                            if chunk.isEmpty { break }
+                            hasher.update(data: chunk)
+                        }
                     }
                 } else {
                     FileManager.default.createFile(atPath: destination.path, contents: nil)
@@ -220,6 +226,12 @@ class HuggingFaceAPI: ObservableObject {
                       [200, 206].contains(httpResponse.statusCode) else {
                     throw HFAPIError.downloadFailed
                 }
+                if existingBytes > 0 && httpResponse.statusCode == 200 {
+                    try? FileManager.default.removeItem(at: destination)
+                    FileManager.default.createFile(atPath: destination.path, contents: nil)
+                    existingBytes = 0
+                    hasher = SHA256()
+                }
                 let expectedChecksum = expectedSHA256(from: httpResponse)
 
                 let totalBytes = response.expectedContentLength > 0
@@ -412,14 +424,28 @@ class HuggingFaceAPI: ObservableObject {
         let monitor = NWPathMonitor()
         let queue = DispatchQueue(label: "hf.network.policy")
         let isWifi = await withCheckedContinuation { continuation in
+            let lock = NSLock()
+            var resolved = false
+            func resolve(_ value: Bool) {
+                lock.lock()
+                defer { lock.unlock() }
+                guard !resolved else { return }
+                resolved = true
+                continuation.resume(returning: value)
+                monitor.cancel()
+            }
+            let timeoutTask = DispatchWorkItem {
+                resolve(false)
+            }
+            queue.asyncAfter(deadline: .now() + 2.0, execute: timeoutTask)
             monitor.pathUpdateHandler = { path in
+                timeoutTask.cancel()
                 let ok = path.status == .satisfied && path.usesInterfaceType(.wifi)
-                continuation.resume(returning: ok)
-                monitor.cancel()
+                resolve(ok)
             }
             monitor.start(queue: queue)
         }
-        guard isWifi else { throw HFAPIError.downloadFailed }
+        guard isWifi else { throw HFAPIError.networkPolicyViolation }
     }
 
     private func expectedSHA256(from response: HTTPURLResponse) -> String? {
@@ -490,6 +516,7 @@ enum HFAPIError: Error, LocalizedError {
     case httpError(statusCode: Int)
     case downloadFailed
     case invalidData
+    case networkPolicyViolation
 
     var errorDescription: String? {
         switch self {
@@ -507,6 +534,8 @@ enum HFAPIError: Error, LocalizedError {
             return "Failed to download model file"
         case .invalidData:
             return "Invalid data received"
+        case .networkPolicyViolation:
+            return "Wi-Fi only downloads is enabled. Connect to Wi-Fi to continue."
         }
     }
 }
diff --git a/ModelQuantizer/ViewModels/QuantizeViewModel.swift b/ModelQuantizer/ViewModels/QuantizeViewModel.swift
index 678f326..57a7958 100644
--- a/ModelQuantizer/ViewModels/QuantizeViewModel.swift
+++ b/ModelQuantizer/ViewModels/QuantizeViewModel.swift
@@ -192,7 +192,7 @@ class QuantizeViewModel: ObservableObject {
                 self.filterLocalModels(query: query)
                 
             } catch let error as HFAPIError  {
-                self.errorMessage = "Rate limit reached. Please try again later."
+                self.errorMessage = error.errorDescription ?? "Search failed."
                 self.showError = true
             } catch {
                 // Don't show error for search failures - local results are still available

From 83f43d526cc16884a54d26919af11598a8ce1f0a Mon Sep 17 00:00:00 2001
From: bobbytatum999 <bobbytatum999@gmail.com>
Date: Fri, 3 Apr 2026 20:08:17 -0500
Subject: [PATCH 5/8] refactor: make ModelQuantizer a facade over
 QuantizationEngine

---
 ModelQuantizer/Services/ModelQuantizer.swift | 303 +++----------------
 1 file changed, 39 insertions(+), 264 deletions(-)

diff --git a/ModelQuantizer/Services/ModelQuantizer.swift b/ModelQuantizer/Services/ModelQuantizer.swift
index 156bbdb..4f905fd 100644
--- a/ModelQuantizer/Services/ModelQuantizer.swift
+++ b/ModelQuantizer/Services/ModelQuantizer.swift
@@ -2,298 +2,73 @@
 //  ModelQuantizer.swift
 //  ModelQuantizer
 //
-//  Created by AI Assistant on 2026-03-31.
+//  Compatibility facade over QuantizationEngine.
 //
 
 import Foundation
-import Metal
-import MetalPerformanceShaders
-import Accelerate
-import Compression
+import Combine
 
-/// Main model quantizer engine
 @MainActor
-class ModelQuantizer: ObservableObject {
+final class ModelQuantizer: ObservableObject {
     static let shared = ModelQuantizer()
 
     @Published var status: QuantizationStatus = .idle
     @Published var currentModel: HFModel?
     @Published var quantizationHistory: [QuantizationJob] = []
 
-    private var quantizeTask: Task<Void, Never>?
-    private let fileManager = FileManager.default
-    private let metalDevice: MTLDevice?
-
-    private var modelsDirectory: URL {
-        let docs = fileManager.urls(for: .documentDirectory, in: .userDomainMask).first!
-        return docs.appendingPathComponent("Models", isDirectory: true)
-    }
+    private let engine = QuantizationEngine.shared
+    private var cancellables = Set<AnyCancellable>()
 
     private init() {
-        self.metalDevice = MTLCreateSystemDefaultDevice()
-        createModelsDirectory()
-        loadHistory()
+        bindEngine()
+        refreshHistory()
     }
 
-    func quantize(model: HFModel, to quantization: QuantizationType,
-                  contextLength: Int? = nil, useGPU: Bool = true) {
-        guard status == .idle else { return }
-
+    func quantize(
+        model: HFModel,
+        to quantization: QuantizationType,
+        contextLength: Int? = nil,
+        useGPU: Bool = true
+    ) {
         currentModel = model
-        quantizeTask?.cancel()
-
-        quantizeTask = Task { [weak self] in
-            await self?.performQuantization(model: model, quantization: quantization,
-                                            contextLength: contextLength, useGPU: useGPU)
-        }
+        engine.quantize(
+            model: model,
+            to: quantization,
+            contextLength: contextLength ?? model.recommendedContextLength,
+            useGPU: useGPU
+        )
     }
 
     func cancel() {
-        quantizeTask?.cancel()
-        status = .idle
+        engine.cancel()
     }
 
     func getQuantizedModels() -> [QuantizedModel] {
-        guard let contents = try? fileManager.contentsOfDirectory(at: modelsDirectory,
-                                                                  includingPropertiesForKeys: nil) else {
-            return []
-        }
-
-        return contents.compactMap { url in
-            guard url.pathExtension == "gguf" else { return nil }
-            return try? QuantizedModel(from: url)
-        }
+        engine.getQuantizedModels()
     }
 
     func deleteQuantizedModel(_ model: QuantizedModel) {
-        try? fileManager.removeItem(at: model.url)
-        loadHistory()
-    }
-
-    private func createModelsDirectory() {
-        try? fileManager.createDirectory(at: modelsDirectory, withIntermediateDirectories: true)
-    }
-
-    private func loadHistory() {
-        if let data = UserDefaults.standard.data(forKey: "quantizationHistory"),
-           let history = try? JSONDecoder().decode([QuantizationJob].self, from: data) {
-            quantizationHistory = history
-        }
-    }
-
-    private func saveHistory() {
-        if let data = try? JSONEncoder().encode(quantizationHistory) {
-            UserDefaults.standard.set(data, forKey: "quantizationHistory")
-        }
-    }
-
-    private func performQuantization(model: HFModel, quantization: QuantizationType,
-                                     contextLength: Int?, useGPU: Bool) async {
-        let startTime = Date()
-
-        do {
-            let modelURL = try await downloadModel(model)
-            status = .analyzing
-            let analysis = try await analyzeModel(at: modelURL)
-            let outputURL = modelsDirectory.appendingPathComponent("\(model.modelId)_\(quantization.rawValue).gguf")
-
-            try await performActualQuantization(
-                inputURL: modelURL,
-                outputURL: outputURL,
-                analysis: analysis,
-                quantization: quantization,
-                contextLength: contextLength ?? model.recommendedContextLength,
-                useGPU: useGPU
-            )
-
-            status = .validating
-            try await validateQuantizedModel(at: outputURL)
-
-            let job = QuantizationJob(
-                id: UUID(),
-                originalModel: model,
-                quantizationType: quantization,
-                outputURL: outputURL,
-                outputSize: (try? fileManager.attributesOfItem(atPath: outputURL.path)[.size] as? Int64) ?? 0,
-                startTime: startTime,
-                endTime: Date(),
-                contextLength: contextLength ?? model.recommendedContextLength,
-                estimatedTokensPerSecond: nil,
-                validationScore: nil
-            )
-
-            quantizationHistory.insert(job, at: 0)
-            saveHistory()
-            status = .completed(outputURL: outputURL)
-        } catch {
-            status = .failed(error: error.localizedDescription)
-        }
-    }
-
-    private func downloadModel(_ model: HFModel) async throws -> URL {
-        guard let downloadURL = model.downloadURL else {
-            throw QuantizationError.noDownloadURL
-        }
-
-        let destination = modelsDirectory.appendingPathComponent("\(model.modelId).tmp")
-
-        if fileManager.fileExists(atPath: destination.path) {
-            let attrs = try fileManager.attributesOfItem(atPath: destination.path)
-            if let size = attrs[.size] as? Int64, size == model.sizeBytes {
-                return destination
-            }
-        }
-
-        let session = URLSession(configuration: .default)
-        let (asyncBytes, response) = try await session.bytes(from: downloadURL)
-        let totalBytes = response.expectedContentLength
-        var downloadedBytes: Int64 = 0
-        var lastProgress: Double = 0
-
-        try? fileManager.removeItem(at: destination)
-        fileManager.createFile(atPath: destination.path, contents: nil)
-        let fileHandle = try FileHandle(forWritingTo: destination)
-        defer { try? fileHandle.close() }
-
-        var buffer = Data(capacity: 65_536)
-
-        for try await byte in asyncBytes {
-            buffer.append(byte)
-            downloadedBytes += 1
-
-            if buffer.count >= 65_536 {
-                fileHandle.write(buffer)
-                buffer.removeAll(keepingCapacity: true)
-            }
-
-            if totalBytes > 0 {
-                let currentProgress = Double(downloadedBytes) / Double(totalBytes)
-                if currentProgress - lastProgress > 0.01 {
-                    lastProgress = currentProgress
-                    status = .downloading(progress: currentProgress)
+        try? engine.deleteQuantizedModel(model)
+        refreshHistory()
+    }
+
+    private func bindEngine() {
+        engine.$status
+            .receive(on: DispatchQueue.main)
+            .sink { [weak self] newStatus in
+                guard let self else { return }
+                self.status = newStatus
+                switch newStatus {
+                case .completed, .failed, .idle:
+                    self.refreshHistory()
+                default:
+                    break
                 }
             }
-        }
-
-        if !buffer.isEmpty {
-            fileHandle.write(buffer)
-        }
-
-        return destination
-    }
-
-    private struct ModelAnalysis {
-        let architecture: ModelArchitecture
-        let layerCount: Int
-        let tensorCount: Int
-        let totalParameters: Int64
-        let originalSize: Int64
-    }
-
-    private func analyzeModel(at url: URL) async throws -> ModelAnalysis {
-        let data = try Data(contentsOf: url, options: .mappedIfSafe)
-
-        var architecture: ModelArchitecture = .custom
-        var layerCount = 0
-        var tensorCount = 0
-        var totalParameters: Int64 = 0
-
-        if url.pathExtension == "safetensors" {
-            let analysis = try parseSafeTensors(data)
-            architecture = analysis.architecture
-            layerCount = analysis.layerCount
-            tensorCount = analysis.tensorCount
-            totalParameters = analysis.totalParameters
-        } else if url.pathExtension == "bin" {
-            let analysis = parsePyTorchBin(data)
-            architecture = analysis.architecture
-            layerCount = analysis.layerCount
-            tensorCount = analysis.tensorCount
-            totalParameters = analysis.totalParameters
-        }
-
-        return ModelAnalysis(
-            architecture: architecture,
-            layerCount: layerCount,
-            tensorCount: tensorCount,
-            totalParameters: totalParameters,
-            originalSize: Int64(data.count)
-        )
-    }
-
-    private func parseSafeTensors(_ data: Data) throws -> ModelAnalysis {
-        var architecture: ModelArchitecture = .custom
-        var layerCount = 0
-        var tensorCount = 0
-        var totalParameters: Int64 = 0
-
-        let headerLength = data.prefix(8).withUnsafeBytes { $0.load(as: UInt64.self) }
-        let headerData = data.dropFirst(8).prefix(Int(headerLength))
-
-        if let header = try? JSONSerialization.jsonObject(with: headerData) as? [String: Any] {
-            let tensorNames = header.keys
-            if tensorNames.contains(where: { $0.contains("llama") || $0.contains("self_attn") }) {
-                architecture = .llama
-            } else if tensorNames.contains(where: { $0.contains("mistral") }) {
-                architecture = .mistral
-            } else if tensorNames.contains(where: { $0.contains("qwen") }) {
-                architecture = .qwen2
-            } else if tensorNames.contains(where: { $0.contains("gemma") }) {
-                architecture = .gemma
-            }
-
-            for (key, value) in header {
-                if let tensorInfo = value as? [String: Any],
-                   let shape = tensorInfo["shape"] as? [Int] {
-                    tensorCount += 1
-                    totalParameters += Int64(shape.reduce(1, *))
-
-                    if key.contains("layers.") {
-                        layerCount = max(layerCount, Int(key.components(separatedBy: "layers.").last?.components(separatedBy: ".").first ?? "0") ?? 0)
-                    }
-                }
-            }
-        }
-
-        return ModelAnalysis(
-            architecture: architecture,
-            layerCount: layerCount,
-            tensorCount: tensorCount,
-            totalParameters: totalParameters,
-            originalSize: Int64(data.count)
-        )
-    }
-
-    private func parsePyTorchBin(_ data: Data) -> ModelAnalysis {
-        ModelAnalysis(
-            architecture: .custom,
-            layerCount: 0,
-            tensorCount: 0,
-            totalParameters: 0,
-            originalSize: Int64(data.count)
-        )
-    }
-
-    private func performActualQuantization(inputURL: URL, outputURL: URL,
-                                           analysis: ModelAnalysis, quantization: QuantizationType,
-                                           contextLength: Int, useGPU: Bool) async throws {
-        status = .quantizing(progress: 0.1, stage: "Building GGUF")
-        var ggufBuilder = GGUFBuilder()
-        ggufBuilder.addMetadata(key: "general.architecture", value: .string(analysis.architecture.rawValue.lowercased()))
-        ggufBuilder.addMetadata(key: "general.name", value: .string(currentModel?.name ?? "Unknown"))
-        ggufBuilder.addMetadata(key: "general.quantization_version", value: .uint32(2))
-        ggufBuilder.addMetadata(key: "general.file_type", value: .uint32(quantization.ggufFileType))
-
-        let ggufData = try ggufBuilder.build()
-        try ggufData.write(to: outputURL)
-        status = .quantizing(progress: 1.0, stage: "Complete")
+            .store(in: &cancellables)
     }
 
-    private func validateQuantizedModel(at url: URL) async throws {
-        let data = try Data(contentsOf: url, options: .mappedIfSafe)
-        let magic = data.prefix(4)
-        guard magic == Data("GGUF".utf8) else {
-            throw QuantizationError.invalidOutput
-        }
+    private func refreshHistory() {
+        quantizationHistory = engine.getQuantizationHistory()
     }
 }

From 04d06c5423e17bbffbaf060011f55ac7d537240f Mon Sep 17 00:00:00 2001
From: bobbytatum999 <bobbytatum999@gmail.com>
Date: Fri, 3 Apr 2026 20:16:47 -0500
Subject: [PATCH 6/8] feat: add paginated HF search loading in quantize flow

---
 .../ViewModels/QuantizeViewModel.swift        | 40 ++++++++++++++++++-
 ModelQuantizer/Views/QuantizeView.swift       |  3 ++
 2 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/ModelQuantizer/ViewModels/QuantizeViewModel.swift b/ModelQuantizer/ViewModels/QuantizeViewModel.swift
index 57a7958..d1f397e 100644
--- a/ModelQuantizer/ViewModels/QuantizeViewModel.swift
+++ b/ModelQuantizer/ViewModels/QuantizeViewModel.swift
@@ -36,6 +36,9 @@ class QuantizeViewModel: ObservableObject {
     // Search debounce
     private var searchTask: Task<Void, Never>?
     private let searchDebounceInterval: TimeInterval = 0.5
+    private var currentSearchOffset = 0
+    private var hasMoreSearchResults = true
+    private var isLoadingMoreResults = false
     
     init() {
         setupBindings()
@@ -146,6 +149,7 @@ class QuantizeViewModel: ObservableObject {
             let popularModels = try await hfAPI.searchModels(
                 query: "",
                 limit: 20,
+                offset: 0,
                 filter: ModelFilter(sortBy: .downloads)
             )
             
@@ -173,6 +177,8 @@ class QuantizeViewModel: ObservableObject {
         searchTask = Task { @MainActor in
             isSearching = true
             defer { isSearching = false }
+            currentSearchOffset = 0
+            hasMoreSearchResults = true
             
             // First, filter local models
             filterLocalModels(query: query)
@@ -181,7 +187,8 @@ class QuantizeViewModel: ObservableObject {
             do {
                 let apiModels = try await hfAPI.searchModels(
                     query: query,
-                    limit: 30
+                    limit: 30,
+                    offset: 0
                 )
                 
                 // Merge results, avoiding duplicates
@@ -189,7 +196,10 @@ class QuantizeViewModel: ObservableObject {
                 let newModels = apiModels.filter { !existingIds.contains($0.modelId) }
                 
                 self.models.append(contentsOf: newModels)
+                self.models = Array(self.models.prefix(300))
                 self.filterLocalModels(query: query)
+                self.currentSearchOffset = apiModels.count
+                self.hasMoreSearchResults = apiModels.count == 30
                 
             } catch let error as HFAPIError  {
                 self.errorMessage = error.errorDescription ?? "Search failed."
@@ -200,6 +210,34 @@ class QuantizeViewModel: ObservableObject {
             }
         }
     }
+
+    func loadMoreIfNeeded(currentItem: HFModel) {
+        guard !searchQuery.isEmpty,
+              hasMoreSearchResults,
+              !isLoadingMoreResults,
+              filteredModels.last?.id == currentItem.id else { return }
+
+        isLoadingMoreResults = true
+        Task { @MainActor in
+            defer { isLoadingMoreResults = false }
+            do {
+                let more = try await hfAPI.searchModels(
+                    query: searchQuery,
+                    limit: 30,
+                    offset: currentSearchOffset
+                )
+                let existingIds = Set(self.models.map { $0.modelId })
+                let newModels = more.filter { !existingIds.contains($0.modelId) }
+                self.models.append(contentsOf: newModels)
+                self.models = Array(self.models.prefix(300))
+                self.filterLocalModels(query: searchQuery)
+                self.currentSearchOffset += more.count
+                self.hasMoreSearchResults = more.count == 30
+            } catch {
+                self.hasMoreSearchResults = false
+            }
+        }
+    }
     
     private func filterLocalModels(query: String) {
         if query.isEmpty {
diff --git a/ModelQuantizer/Views/QuantizeView.swift b/ModelQuantizer/Views/QuantizeView.swift
index 9b99b96..b9f16ab 100644
--- a/ModelQuantizer/Views/QuantizeView.swift
+++ b/ModelQuantizer/Views/QuantizeView.swift
@@ -277,6 +277,9 @@ struct QuantizeView: View {
                             viewModel.selectModel(model)
                         }
                     }
+                    .onAppear {
+                        viewModel.loadMoreIfNeeded(currentItem: model)
+                    }
                 }
             }
         }

From 0cfe6571b0c47d8b1787709803490d672d93409d Mon Sep 17 00:00:00 2001
From: bobbytatum999 <bobbytatum999@gmail.com>
Date: Fri, 3 Apr 2026 20:19:08 -0500
Subject: [PATCH 7/8] feat: make all four Home quick actions functional

---
 ModelQuantizer/Views/HomeView.swift | 50 ++++++++++++++++++++++++++---
 1 file changed, 46 insertions(+), 4 deletions(-)

diff --git a/ModelQuantizer/Views/HomeView.swift b/ModelQuantizer/Views/HomeView.swift
index 1fd8c3d..df20b02 100644
--- a/ModelQuantizer/Views/HomeView.swift
+++ b/ModelQuantizer/Views/HomeView.swift
@@ -10,7 +10,7 @@ import SwiftUI
 struct HomeView: View {
     @StateObject private var viewModel = HomeViewModel()
     @StateObject private var scanner = DeviceScanner.shared
-    @StateObject private var quantizer = QuantizationEngine.shared
+    @State private var showingRefreshBanner = false
     
     var body: some View {
         ScrollView {
@@ -38,6 +38,19 @@ struct HomeView: View {
             scanner.performScan()
             viewModel.loadRecentQuantizations()
         }
+        .overlay(alignment: .top) {
+            if showingRefreshBanner {
+                Text("Device scan started")
+                    .font(.system(size: 13, weight: .semibold))
+                    .foregroundStyle(.white)
+                    .padding(.horizontal, 14)
+                    .padding(.vertical, 8)
+                    .background(.black.opacity(0.65))
+                    .clipShape(Capsule())
+                    .padding(.top, 8)
+                    .transition(.move(edge: .top).combined(with: .opacity))
+            }
+        }
     }
     
     // MARK: - Header
@@ -146,7 +159,7 @@ struct HomeView: View {
                 .font(.system(size: 20, weight: .bold))
                 .foregroundStyle(.white)
             
-            HStack(spacing: 12) {
+            LazyVGrid(columns: [GridItem(.flexible()), GridItem(.flexible())], spacing: 12) {
                 NavigationLink(destination: QuantizeView()) {
                     QuickActionButton(
                         icon: "cpu.fill",
@@ -155,7 +168,7 @@ struct HomeView: View {
                         color: .purple
                     )
                 }
-                
+
                 NavigationLink(destination: ModelLibraryView()) {
                     QuickActionButton(
                         icon: "folder.fill",
@@ -164,6 +177,36 @@ struct HomeView: View {
                         color: .cyan
                     )
                 }
+
+                NavigationLink(destination: DeviceInfoView()) {
+                    QuickActionButton(
+                        icon: "iphone",
+                        title: "Device",
+                        subtitle: "Details",
+                        color: .green
+                    )
+                }
+
+                Button {
+                    scanner.performScan()
+                    viewModel.loadRecentQuantizations()
+                    withAnimation(.easeOut(duration: 0.2)) {
+                        showingRefreshBanner = true
+                    }
+                    Task { @MainActor in
+                        try? await Task.sleep(nanoseconds: 1_200_000_000)
+                        withAnimation(.easeIn(duration: 0.2)) {
+                            showingRefreshBanner = false
+                        }
+                    }
+                } label: {
+                    QuickActionButton(
+                        icon: "arrow.clockwise.circle.fill",
+                        title: "Refresh",
+                        subtitle: "Status",
+                        color: .orange
+                    )
+                }
             }
         }
     }
@@ -409,4 +452,3 @@ struct EmptyStateView: View {
 }
 
 // MARK: - View Model
-

From 334e0e9ccf7ae0de63e2cd1b32c5149a9cc8d17a Mon Sep 17 00:00:00 2001
From: bobbytatum999 <bobbytatum999@gmail.com>
Date: Fri, 3 Apr 2026 20:26:57 -0500
Subject: [PATCH 8/8] fix: simplify wifi-only enforcement to avoid concurrency
 build failures

---
 ModelQuantizer/Services/HuggingFaceAPI.swift | 38 ++------------------
 1 file changed, 2 insertions(+), 36 deletions(-)

diff --git a/ModelQuantizer/Services/HuggingFaceAPI.swift b/ModelQuantizer/Services/HuggingFaceAPI.swift
index c3131f5..75f0b12 100644
--- a/ModelQuantizer/Services/HuggingFaceAPI.swift
+++ b/ModelQuantizer/Services/HuggingFaceAPI.swift
@@ -7,7 +7,6 @@
 
 import Foundation
 import Combine
-import Network
 import CryptoKit
 
 /// Hugging Face API Service for model search and metadata
@@ -178,7 +177,7 @@ class HuggingFaceAPI: ObservableObject {
         to destination: URL,
         progressHandler: @escaping (Double) -> Void
     ) async throws {
-        try await enforceNetworkPolicy()
+        let wifiOnly = UserDefaults.standard.object(forKey: "wifi_only") as? Bool ?? true
 
         var attempts = 0
         let maxAttempts = 3
@@ -187,6 +186,7 @@ class HuggingFaceAPI: ObservableObject {
             do {
                 var request = URLRequest(url: url)
                 request.setValue("application/octet-stream", forHTTPHeaderField: "Accept")
+                request.allowsCellularAccess = !wifiOnly
 
                 if let token = getAuthToken() {
                     request.setValue("Bearer \(token)", forHTTPHeaderField: "Authorization")
@@ -417,37 +417,6 @@ class HuggingFaceAPI: ObservableObject {
         return nil
     }
 
-    private func enforceNetworkPolicy() async throws {
-        let wifiOnly = UserDefaults.standard.object(forKey: "wifi_only") as? Bool ?? true
-        guard wifiOnly else { return }
-
-        let monitor = NWPathMonitor()
-        let queue = DispatchQueue(label: "hf.network.policy")
-        let isWifi = await withCheckedContinuation { continuation in
-            let lock = NSLock()
-            var resolved = false
-            func resolve(_ value: Bool) {
-                lock.lock()
-                defer { lock.unlock() }
-                guard !resolved else { return }
-                resolved = true
-                continuation.resume(returning: value)
-                monitor.cancel()
-            }
-            let timeoutTask = DispatchWorkItem {
-                resolve(false)
-            }
-            queue.asyncAfter(deadline: .now() + 2.0, execute: timeoutTask)
-            monitor.pathUpdateHandler = { path in
-                timeoutTask.cancel()
-                let ok = path.status == .satisfied && path.usesInterfaceType(.wifi)
-                resolve(ok)
-            }
-            monitor.start(queue: queue)
-        }
-        guard isWifi else { throw HFAPIError.networkPolicyViolation }
-    }
-
     private func expectedSHA256(from response: HTTPURLResponse) -> String? {
         if let checksum = response.value(forHTTPHeaderField: "x-checksum-sha256") {
             return checksum.replacingOccurrences(of: "\"", with: "")
@@ -516,7 +485,6 @@ enum HFAPIError: Error, LocalizedError {
     case httpError(statusCode: Int)
     case downloadFailed
     case invalidData
-    case networkPolicyViolation
 
     var errorDescription: String? {
         switch self {
@@ -534,8 +502,6 @@ enum HFAPIError: Error, LocalizedError {
             return "Failed to download model file"
         case .invalidData:
             return "Invalid data received"
-        case .networkPolicyViolation:
-            return "Wi-Fi only downloads is enabled. Connect to Wi-Fi to continue."
         }
     }
 }