diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index c41d8b9..a5ece03 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -3,12 +3,27 @@ name: Build iPhone IPA
 on:
   push:
     branches:
-      - main
+      - "**"
+    paths:
+      - ".github/workflows/**"
+      - "ModelQuantizer/**"
+      - "ModelQuantizer.xcodeproj/**"
+      - "README.md"
+      - "TRIGGER.md"
+  pull_request:
+    branches:
+      - "**"
+    paths:
+      - ".github/workflows/**"
+      - "ModelQuantizer/**"
+      - "ModelQuantizer.xcodeproj/**"
+      - "README.md"
+      - "TRIGGER.md"
   workflow_dispatch:
 
 jobs:
   build:
-    runs-on: macos-15
+    runs-on: macos-14
 
     env:
       PROJECT: ModelQuantizer.xcodeproj
@@ -25,7 +40,7 @@ jobs:
       - name: Select Xcode
         uses: maxim-lobanov/setup-xcode@v1
         with:
-          xcode-version: '16.4'
+          xcode-version: '15.4'
 
       - name: Check Xcode and project
         shell: bash
diff --git a/.github/workflows/ios.yml b/.github/workflows/ios.yml
index 75fae34..da7b46c 100644
--- a/.github/workflows/ios.yml
+++ b/.github/workflows/ios.yml
@@ -2,9 +2,21 @@ name: iOS Build and Test
 
 on:
   push:
-    branches: [ main, develop ]
+    branches: [ "**" ]
+    paths:
+      - ".github/workflows/**"
+      - "ModelQuantizer/**"
+      - "ModelQuantizer.xcodeproj/**"
+      - "README.md"
+      - "TRIGGER.md"
   pull_request:
-    branches: [ main ]
+    branches: [ "**" ]
+    paths:
+      - ".github/workflows/**"
+      - "ModelQuantizer/**"
+      - "ModelQuantizer.xcodeproj/**"
+      - "README.md"
+      - "TRIGGER.md"
 
 jobs:
   build:
@@ -15,8 +27,10 @@ jobs:
     - name: Checkout
       uses: actions/checkout@v4
       
-    - name: Select Xcode Version
-      run: sudo xcode-select -s /Applications/Xcode_15.4.app
+    - name: Select Xcode
+      uses: maxim-lobanov/setup-xcode@v1
+      with:
+        xcode-version: '15.4'
     
     - name: Show Xcode Version
       run: xcodebuild -version
@@ -35,12 +49,16 @@ jobs:
     
     - name: Run Tests
       run: |
-        xcodebuild test \
-          -project ModelQuantizer.xcodeproj \
-          -scheme ModelQuantizer \
-          -destination 'platform=iOS Simulator,name=iPhone 15' \
-          CODE_SIGNING_REQUIRED=NO \
-          CODE_SIGNING_ALLOWED=NO
+        if xcodebuild -project ModelQuantizer.xcodeproj -list | grep -q "Tests"; then
+          xcodebuild test \
+            -project ModelQuantizer.xcodeproj \
+            -scheme ModelQuantizer \
+            -destination 'platform=iOS Simulator,name=iPhone 15' \
+            CODE_SIGNING_REQUIRED=NO \
+            CODE_SIGNING_ALLOWED=NO
+        else
+          echo "No test target found; skipping test step."
+        fi
     
     - name: Archive Build Logs on Failure
       if: failure()
@@ -59,8 +77,10 @@ jobs:
     - name: Checkout
       uses: actions/checkout@v4
       
-    - name: Select Xcode Version
-      run: sudo xcode-select -s /Applications/Xcode_15.4.app
+    - name: Select Xcode
+      uses: maxim-lobanov/setup-xcode@v1
+      with:
+        xcode-version: '15.4'
     
     - name: Run Static Analysis
       run: |
diff --git a/ModelQuantizer/Models/ModelTypes.swift b/ModelQuantizer/Models/ModelTypes.swift
index 43d4dc8..9ae4ec9 100644
--- a/ModelQuantizer/Models/ModelTypes.swift
+++ b/ModelQuantizer/Models/ModelTypes.swift
@@ -31,7 +31,7 @@ struct HFModel: Identifiable, Codable, Equatable {
         architecture: ModelArchitecture,
         downloadURL: URL? = nil,
         sizeBytes: Int64 = 0,
-        quantizationOptions: [QuantizationType] = QuantizationType.allCases,
+        quantizationOptions: [QuantizationType] = QuantizationType.onDeviceSupportedCases,
         recommendedContextLength: Int = 4096,
         tags: [String] = [],
         downloads: Int = 0,
@@ -67,13 +67,13 @@ enum ModelArchitecture: String, Codable, CaseIterable {
     var supportedQuantizations: [QuantizationType] {
         switch self {
         case .llama, .mistral, .qwen2, .gemma, .phi:
-            return [.q4_0, .q4_1, .q5_0, .q5_1, .q8_0, .fp16, .fp32]
+            return QuantizationType.onDeviceSupportedCases
         case .falcon, .gpt2:
             return [.q4_0, .q4_1, .q8_0, .fp16]
         case .bert:
             return [.q8_0, .fp16, .fp32]
         case .custom:
-            return QuantizationType.allCases
+            return QuantizationType.onDeviceSupportedCases
         }
     }
     
@@ -148,6 +148,10 @@ enum QuantizationType: String, Codable, CaseIterable {
         return 32.0 / bits
     }
     
+    static var onDeviceSupportedCases: [QuantizationType] {
+        [.q4_0, .q4_1, .q8_0, .fp16, .fp32]
+    }
+    
     var ggufFileType: UInt32 {
         switch self {
         case .fp32: return 0
diff --git a/ModelQuantizer/Services/HuggingFaceAPI.swift b/ModelQuantizer/Services/HuggingFaceAPI.swift
index 2a3dfe6..da6b19e 100644
--- a/ModelQuantizer/Services/HuggingFaceAPI.swift
+++ b/ModelQuantizer/Services/HuggingFaceAPI.swift
@@ -2,7 +2,7 @@
 //  HuggingFaceAPI.swift
 //  ModelQuantizer
 //
-//  Real Hugging Face API integration for model search and download.
+//  Hugging Face API integration for model search and download.
 //
 
 import Foundation
@@ -200,6 +200,9 @@ class HuggingFaceAPI: ObservableObject {
         // Remove existing file
         try? FileManager.default.removeItem(at: destination)
         
+        // Create destination file before opening file handle
+        _ = FileManager.default.createFile(atPath: destination.path, contents: nil)
+        
         // Write file
         let fileHandle = try FileHandle(forWritingTo: destination)
         defer { try? fileHandle.close() }
diff --git a/ModelQuantizer/Services/ModelQuantizer.swift b/ModelQuantizer/Services/ModelQuantizer.swift
deleted file mode 100644
index 89a83c4..0000000
--- a/ModelQuantizer/Services/ModelQuantizer.swift
+++ /dev/null
@@ -1,645 +0,0 @@
-//
-//  ModelQuantizer.swift
-//  ModelQuantizer
-//
-//  Created by AI Assistant on 2026-03-31.
-//
-
-import Foundation
-import Metal
-import MetalPerformanceShaders
-import Accelerate
-import Compression
-
-/// Represents a Hugging Face model to be quantized
-struct HFModel: Identifiable, Codable, Equatable {
-    let id: UUID
-    let modelId: String
-    let name: String
-    let description: String
-    let parameters: String
-    let architecture: ModelArchitecture
-    let downloadURL: URL?
-    let sizeBytes: Int64
-    let quantizationOptions: [QuantizationType]
-    let recommendedContextLength: Int
-    let tags: [String]
-    let downloads: Int
-    let likes: Int
-    
-    init(modelId: String, name: String, description: String, parameters: String, 
-         architecture: ModelArchitecture, downloadURL: URL? = nil, sizeBytes: Int64 = 0,
-         quantizationOptions: [QuantizationType] = QuantizationType.allCases,
-         recommendedContextLength: Int = 4096, tags: [String] = [], downloads: Int = 0, likes: Int = 0) {
-        self.id = UUID()
-        self.modelId = modelId
-        self.name = name
-        self.description = description
-        self.parameters = parameters
-        self.architecture = architecture
-        self.downloadURL = downloadURL
-        self.sizeBytes = sizeBytes
-        self.quantizationOptions = quantizationOptions
-        self.recommendedContextLength = recommendedContextLength
-        self.tags = tags
-        self.downloads = downloads
-        self.likes = likes
-    }
-}
-
-enum ModelArchitecture: String, Codable, CaseIterable {
-    case llama = "Llama"
-    case mistral = "Mistral"
-    case qwen2 = "Qwen2"
-    case gemma = "Gemma"
-    case phi = "Phi"
-    case falcon = "Falcon"
-    case gpt2 = "GPT-2"
-    case bert = "BERT"
-    case custom = "Custom"
-    
-    var supportedQuantizations: [QuantizationType] {
-        switch self {
-        case .llama, .mistral, .qwen2, .gemma, .phi:
-            return [.q4_0, .q4_1, .q5_0, .q5_1, .q8_0, .fp16, .fp32]
-        case .falcon, .gpt2:
-            return [.q4_0, .q4_1, .q8_0, .fp16]
-        case .bert:
-            return [.q8_0, .fp16, .fp32]
-        case .custom:
-            return QuantizationType.allCases
-        }
-    }
-}
-
-enum QuantizationType: String, Codable, CaseIterable {
-    case q2_K = "Q2_K"
-    case q3_K_S = "Q3_K_S"
-    case q3_K_M = "Q3_K_M"
-    case q3_K_L = "Q3_K_L"
-    case q4_0 = "Q4_0"
-    case q4_1 = "Q4_1"
-    case q4_K_S = "Q4_K_S"
-    case q4_K_M = "Q4_K_M"
-    case q5_0 = "Q5_0"
-    case q5_1 = "Q5_1"
-    case q5_K_S = "Q5_K_S"
-    case q5_K_M = "Q5_K_M"
-    case q6_K = "Q6_K"
-    case q8_0 = "Q8_0"
-    case fp16 = "F16"
-    case fp32 = "F32"
-    
-    var bits: Double {
-        switch self {
-        case .q2_K: return 2.0
-        case .q3_K_S, .q3_K_M, .q3_K_L: return 3.0
-        case .q4_0, .q4_1, .q4_K_S, .q4_K_M: return 4.0
-        case .q5_0, .q5_1, .q5_K_S, .q5_K_M: return 5.0
-        case .q6_K: return 6.0
-        case .q8_0: return 8.0
-        case .fp16: return 16.0
-        case .fp32: return 32.0
-        }
-    }
-    
-    var description: String {
-        switch self {
-        case .q2_K: return "2-bit (Smallest, Lowest Quality)"
-        case .q3_K_S: return "3-bit Small (Aggressive compression)"
-        case .q3_K_M: return "3-bit Medium (Balanced)"
-        case .q3_K_L: return "3-bit Large (Better quality)"
-        case .q4_0: return "4-bit Legacy (Fast)"
-        case .q4_1: return "4-bit Legacy v2 (Better accuracy)"
-        case .q4_K_S: return "4-bit K-Quants Small (Recommended)"
-        case .q4_K_M: return "4-bit K-Quants Medium (Best 4-bit)"
-        case .q5_0: return "5-bit Legacy (Good balance)"
-        case .q5_1: return "5-bit Legacy v2 (Better)"
-        case .q5_K_S: return "5-bit K-Quants Small (High quality)"
-        case .q5_K_M: return "5-bit K-Quants Medium (Best 5-bit)"
-        case .q6_K: return "6-bit (Near FP16 quality)"
-        case .q8_0: return "8-bit (Excellent quality)"
-        case .fp16: return "16-bit Float (Original quality)"
-        case .fp32: return "32-bit Float (Maximum precision)"
-        }
-    }
-    
-    var compressionRatio: Double {
-        return 32.0 / bits
-    }
-}
-
-/// Quantization progress and status
-enum QuantizationStatus: Equatable {
-    case idle
-    case downloading(progress: Double)
-    case analyzing
-    case quantizing(progress: Double, stage: String)
-    case optimizing
-    case validating
-    case completed(outputURL: URL)
-    case failed(error: String)
-    
-    static func == (lhs: QuantizationStatus, rhs: QuantizationStatus) -> Bool {
-        switch (lhs, rhs) {
-        case (.idle, .idle): return true
-        case (.downloading(let p1), .downloading(let p2)): return p1 == p2
-        case (.analyzing, .analyzing): return true
-        case (.quantizing(let p1, let s1), .quantizing(let p2, let s2)): return p1 == p2 && s1 == s2
-        case (.optimizing, .optimizing): return true
-        case (.validating, .validating): return true
-        case (.completed(let u1), .completed(let u2)): return u1 == u2
-        case (.failed(let e1), .failed(let e2)): return e1 == e2
-        default: return false
-        }
-    }
-}
-
-/// Main model quantizer engine
-@MainActor
-class ModelQuantizer: ObservableObject {
-    static let shared = ModelQuantizer()
-    
-    @Published var status: QuantizationStatus = .idle
-    @Published var currentModel: HFModel?
-    @Published var quantizationHistory: [QuantizationJob] = []
-    
-    private var quantizeTask: Task<Void, Never>?
-    private let fileManager = FileManager.default
-    private let metalDevice: MTLDevice?
-    
-    private var modelsDirectory: URL {
-        let docs = fileManager.urls(for: .documentDirectory, in: .userDomainMask).first!
-        return docs.appendingPathComponent("Models", isDirectory: true)
-    }
-    
-    private init() {
-        self.metalDevice = MTLCreateSystemDefaultDevice()
-        createModelsDirectory()
-        loadHistory()
-    }
-    
-    // MARK: - Public Methods
-    
-    func quantize(model: HFModel, to quantization: QuantizationType, 
-                  contextLength: Int? = nil, useGPU: Bool = true) {
-        guard status == .idle else { return }
-        
-        currentModel = model
-        quantizeTask?.cancel()
-        
-        quantizeTask = Task { [weak self] in
-            await self?.performQuantization(model: model, quantization: quantization, 
-                                           contextLength: contextLength, useGPU: useGPU)
-        }
-    }
-    
-    func cancel() {
-        quantizeTask?.cancel()
-        status = .idle
-    }
-    
-    func getQuantizedModels() -> [QuantizedModel] {
-        guard let contents = try? fileManager.contentsOfDirectory(at: modelsDirectory, 
-                                                                  includingPropertiesForKeys: nil) else {
-            return []
-        }
-        
-        return contents.compactMap { url in
-            guard url.pathExtension == "gguf" else { return nil }
-            return try? QuantizedModel(from: url)
-        }
-    }
-    
-    func deleteQuantizedModel(_ model: QuantizedModel) {
-        try? fileManager.removeItem(at: model.url)
-        loadHistory()
-    }
-    
-    // MARK: - Private Methods
-    
-    private func createModelsDirectory() {
-        try? fileManager.createDirectory(at: modelsDirectory, withIntermediateDirectories: true)
-    }
-    
-    private func loadHistory() {
-        // Load from UserDefaults or local storage
-        if let data = UserDefaults.standard.data(forKey: "quantizationHistory"),
-           let history = try? JSONDecoder().decode([QuantizationJob].self, from: data) {
-            quantizationHistory = history
-        }
-    }
-    
-    private func saveHistory() {
-        if let data = try? JSONEncoder().encode(quantizationHistory) {
-            UserDefaults.standard.set(data, forKey: "quantizationHistory")
-        }
-    }
-    
-    private func performQuantization(model: HFModel, quantization: QuantizationType, 
-                                     contextLength: Int?, useGPU: Bool) async {
-        let startTime = Date()
-        
-        do {
-            // Step 1: Download model if needed
-            let modelURL = try await downloadModel(model)
-            
-            // Step 2: Analyze model structure
-            status = .analyzing
-            let analysis = try await analyzeModel(at: modelURL)
-            
-            // Step 3: Perform quantization
-            let outputURL = modelsDirectory.appendingPathComponent("\(model.modelId)_\(quantization.rawValue).gguf")
-            
-            try await performActualQuantization(
-                inputURL: modelURL,
-                outputURL: outputURL,
-                analysis: analysis,
-                quantization: quantization,
-                contextLength: contextLength ?? model.recommendedContextLength,
-                useGPU: useGPU
-            )
-            
-            // Step 4: Validate output
-            status = .validating
-            try await validateQuantizedModel(at: outputURL)
-            
-            // Complete
-            let job = QuantizationJob(
-                id: UUID(),
-                originalModel: model,
-                quantizationType: quantization,
-                outputURL: outputURL,
-                outputSize: (try? fileManager.attributesOfItem(atPath: outputURL.path)[.size] as? Int64) ?? 0,
-                startTime: startTime,
-                endTime: Date(),
-                contextLength: contextLength ?? model.recommendedContextLength
-            )
-            
-            quantizationHistory.insert(job, at: 0)
-            saveHistory()
-            
-            status = .completed(outputURL: outputURL)
-            
-        } catch {
-            status = .failed(error: error.localizedDescription)
-        }
-    }
-    
-    private func downloadModel(_ model: HFModel) async throws -> URL {
-        guard let downloadURL = model.downloadURL else {
-            throw QuantizationError.noDownloadURL
-        }
-        
-        let destination = modelsDirectory.appendingPathComponent("\(model.modelId).tmp")
-        
-        // Check if already downloaded
-        if fileManager.fileExists(atPath: destination.path) {
-            let attrs = try fileManager.attributesOfItem(atPath: destination.path)
-            if let size = attrs[.size] as? Int64, size == model.sizeBytes {
-                return destination
-            }
-        }
-        
-        // Download with progress
-        let session = URLSession(configuration: .default)
-        
-        let (asyncBytes, response) = try await session.bytes(from: downloadURL)
-        let totalBytes = response.expectedContentLength
-        var downloadedBytes: Int64 = 0
-        var lastProgress: Double = 0
-        
-        var fileHandle = try FileHandle(forWritingTo: destination)
-        defer { try? fileHandle.close() }
-        
-        for try await byte in asyncBytes {
-            fileHandle.write(Data([byte]))
-            downloadedBytes += 1
-            
-            if totalBytes > 0 {
-                let currentProgress = Double(downloadedBytes) / Double(totalBytes)
-                if currentProgress - lastProgress > 0.01 {
-                    lastProgress = currentProgress
-                    await MainActor.run {
-                        self.status = .downloading(progress: currentProgress)
-                    }
-                }
-            }
-        }
-        
-        return destination
-    }
-    
-    private func analyzeModel(at url: URL) async throws -> ModelAnalysis {
-        // Read model file and analyze structure
-        let data = try Data(contentsOf: url, options: .mappedIfSafe)
-        
-        // Detect architecture and structure
-        var architecture: ModelArchitecture = .custom
-        var layerCount = 0
-        var tensorCount = 0
-        var totalParameters: Int64 = 0
-        
-        // Parse based on file format (safetensors, bin, etc.)
-        if url.pathExtension == "safetensors" {
-            // Parse safetensors format
-            let analysis = try parseSafeTensors(data)
-            architecture = analysis.architecture
-            layerCount = analysis.layerCount
-            tensorCount = analysis.tensorCount
-            totalParameters = analysis.totalParameters
-        } else if url.pathExtension == "bin" {
-            // Parse PyTorch bin format
-            let analysis = try parsePyTorchBin(data)
-            architecture = analysis.architecture
-            layerCount = analysis.layerCount
-            tensorCount = analysis.tensorCount
-            totalParameters = analysis.totalParameters
-        }
-        
-        return ModelAnalysis(
-            architecture: architecture,
-            layerCount: layerCount,
-            tensorCount: tensorCount,
-            totalParameters: totalParameters,
-            originalSize: Int64(data.count)
-        )
-    }
-    
-    private func parseSafeTensors(_ data: Data) throws -> ModelAnalysis {
-        // SafeTensors format parsing
-        // Header is JSON, followed by tensor data
-        var architecture: ModelArchitecture = .custom
-        var layerCount = 0
-        var tensorCount = 0
-        var totalParameters: Int64 = 0
-        
-        // Read header length (first 8 bytes, little-endian uint64)
-        let headerLength = data.prefix(8).withUnsafeBytes { $0.load(as: UInt64.self) }
-        
-        // Parse header JSON
-        let headerData = data.dropFirst(8).prefix(Int(headerLength))
-        if let header = try? JSONSerialization.jsonObject(with: headerData) as? [String: Any] {
-            
-            // Detect architecture from tensor names
-            let tensorNames = header.keys
-            if tensorNames.contains(where: { $0.contains("llama") || $0.contains("self_attn") }) {
-                architecture = .llama
-            } else if tensorNames.contains(where: { $0.contains("mistral") }) {
-                architecture = .mistral
-            } else if tensorNames.contains(where: { $0.contains("qwen") }) {
-                architecture = .qwen2
-            } else if tensorNames.contains(where: { $0.contains("gemma") }) {
-                architecture = .gemma
-            }
-            
-            // Count tensors and parameters
-            for (key, value) in header {
-                if let tensorInfo = value as? [String: Any],
-                   let shape = tensorInfo["shape"] as? [Int] {
-                    tensorCount += 1
-                    let paramCount = shape.reduce(1, *)
-                    totalParameters += Int64(paramCount)
-                    
-                    if key.contains("layers.") {
-                        layerCount = max(layerCount, Int(key.components(separatedBy: "layers.").last?.components(separatedBy: ".").first ?? "0") ?? 0)
-                    }
-                }
-            }
-        }
-        
-        return ModelAnalysis(
-            architecture: architecture,
-            layerCount: layerCount,
-            tensorCount: tensorCount,
-            totalParameters: totalParameters,
-            originalSize: Int64(data.count)
-        )
-    }
-    
-    private func parsePyTorchBin(_ data: Data) throws -> ModelAnalysis {
-        // PyTorch pickle format parsing (simplified)
-        // This would need a proper pickle parser for full support
-        return ModelAnalysis(
-            architecture: .custom,
-            layerCount: 0,
-            tensorCount: 0,
-            totalParameters: 0,
-            originalSize: Int64(data.count)
-        )
-    }
-    
-    private func performActualQuantization(inputURL: URL, outputURL: URL, 
-                                          analysis: ModelAnalysis, quantization: QuantizationType,
-                                          contextLength: Int, useGPU: Bool) async throws {
-        
-        let stages = ["Loading tensors", "Quantizing weights", "Building GGUF", "Writing output"]
-        let totalStages = stages.count
-        
-        for (index, stage) in stages.enumerated() {
-            try Task.checkCancellation()
-            
-            let progress = Double(index) / Double(totalStages)
-            status = .quantizing(progress: progress, stage: stage)
-            
-            // Simulate work (in real implementation, this would be actual quantization)
-            try await Task.sleep(nanoseconds: 500_000_000)
-            
-            // Actual quantization would happen here
-            if index == 1 {
-                try await quantizeTensors(inputURL: inputURL, outputURL: outputURL, 
-                                         analysis: analysis, quantization: quantization)
-            }
-        }
-        
-        status = .quantizing(progress: 1.0, stage: "Complete")
-    }
-    
-    private func quantizeTensors(inputURL: URL, outputURL: URL, 
-                                analysis: ModelAnalysis, quantization: QuantizationType) async throws {
-        
-        // Create GGUF file structure
-        var ggufBuilder = GGUFBuilder()
-        
-        // Add metadata
-        ggufBuilder.addMetadata(key: "general.architecture", value: .string(analysis.architecture.rawValue.lowercased()))
-        ggufBuilder.addMetadata(key: "general.name", value: .string(currentModel?.name ?? "Unknown"))
-        ggufBuilder.addMetadata(key: "general.quantization_version", value: .uint32(2))
-        
-        // Add tensor info
-        // This would read actual tensors and quantize them
-        
-        // Write GGUF file
-        let ggufData = try ggufBuilder.build()
-        try ggufData.write(to: outputURL)
-    }
-    
-    private func validateQuantizedModel(at url: URL) async throws {
-        // Verify the quantized model is valid
-        let data = try Data(contentsOf: url, options: .mappedIfSafe)
-        
-        // Check GGUF magic number
-        let magic = data.prefix(4)
-        guard magic == Data("GGUF".utf8) else {
-            throw QuantizationError.invalidOutput
-        }
-        
-        // Additional validation would go here
-    }
-}
-
-// MARK: - Supporting Types
-
-struct ModelAnalysis {
-    let architecture: ModelArchitecture
-    let layerCount: Int
-    let tensorCount: Int
-    let totalParameters: Int64
-    let originalSize: Int64
-}
-
-struct QuantizationJob: Codable, Identifiable {
-    let id: UUID
-    let originalModel: HFModel
-    let quantizationType: QuantizationType
-    let outputURL: URL
-    let outputSize: Int64
-    let startTime: Date
-    let endTime: Date
-    let contextLength: Int
-    
-    var duration: TimeInterval {
-        return endTime.timeIntervalSince(startTime)
-    }
-    
-    var compressionRatio: Double {
-        return Double(originalModel.sizeBytes) / Double(outputSize)
-    }
-}
-
-struct QuantizedModel: Identifiable {
-    let id = UUID()
-    let url: URL
-    let name: String
-    let size: Int64
-    let quantization: QuantizationType
-    let createdDate: Date
-    
-    init?(from url: URL) throws {
-        self.url = url
-        self.name = url.deletingPathExtension().lastPathComponent
-        
-        let attrs = try FileManager.default.attributesOfItem(atPath: url.path)
-        self.size = attrs[.size] as? Int64 ?? 0
-        self.createdDate = attrs[.creationDate] as? Date ?? Date()
-        
-        // Detect quantization from filename
-        let filename = url.lastPathComponent.lowercased()
-        if let qType = QuantizationType.allCases.first(where: { filename.contains($0.rawValue.lowercased()) }) {
-            self.quantization = qType
-        } else {
-            self.quantization = .q4_0
-        }
-    }
-}
-
-enum QuantizationError: Error, LocalizedError {
-    case noDownloadURL
-    case downloadFailed
-    case invalidModelFormat
-    case quantizationFailed
-    case invalidOutput
-    case insufficientMemory
-    case cancelled
-    
-    var errorDescription: String? {
-        switch self {
-        case .noDownloadURL: return "No download URL provided for model"
-        case .downloadFailed: return "Failed to download model"
-        case .invalidModelFormat: return "Unsupported model format"
-        case .quantizationFailed: return "Quantization process failed"
-        case .invalidOutput: return "Generated model is invalid"
-        case .insufficientMemory: return "Insufficient memory for quantization"
-        case .cancelled: return "Quantization cancelled"
-        }
-    }
-}
-
-// MARK: - Integer to Data Extension
-
-extension FixedWidthInteger {
-    var littleEndianData: Data {
-        var value = self.littleEndian
-        return withUnsafeBytes(of: &value) { Data($0) }
-    }
-}
-
-// MARK: - GGUF Builder
-
-struct GGUFBuilder {
-    enum MetadataValue {
-        case uint32(UInt32)
-        case uint64(UInt64)
-        case int32(Int32)
-        case int64(Int64)
-        case float32(Float)
-        case float64(Double)
-        case bool(Bool)
-        case string(String)
-        case array([MetadataValue])
-    }
-    
-    private var metadata: [(String, MetadataValue)] = []
-    private var tensors: [(name: String, shape: [Int], data: Data)] = []
-    
-    mutating func addMetadata(key: String, value: MetadataValue) {
-        metadata.append((key, value))
-    }
-    
-    mutating func addTensor(name: String, shape: [Int], data: Data) {
-        tensors.append((name, shape, data))
-    }
-    
-    func build() throws -> Data {
-        var data = Data()
-        
-        // Magic number
-        data.append(Data("GGUF".utf8))
-        
-        // Version
-        data.append(UInt32(3).littleEndianData)
-        
-        // Tensor count
-        data.append(UInt64(tensors.count).littleEndianData)
-        
-        // Metadata count
-        data.append(UInt64(metadata.count).littleEndianData)
-        
-        // Metadata
-        for (key, value) in metadata {
-            // Key length and string
-            data.append(UInt64(key.utf8.count).littleEndianData)
-            data.append(Data(key.utf8))
-            
-            // Value type and data
-            switch value {
-            case .uint32(let v):
-                data.append(UInt32(4).littleEndianData) // type
-                data.append(v.littleEndianData)
-            case .uint64(let v):
-                data.append(UInt32(5).littleEndianData)
-                data.append(v.littleEndianData)
-            case .string(let s):
-                data.append(UInt32(8).littleEndianData)
-                data.append(UInt64(s.utf8.count).littleEndianData)
-                data.append(Data(s.utf8))
-            default:
-                break
-            }
-        }
-        
-        // Tensor info and data would follow
-        
-        return data
-    }
-}
diff --git a/ModelQuantizer/Services/QuantizationEngine.swift b/ModelQuantizer/Services/QuantizationEngine.swift
index fe801d1..a7d762d 100644
--- a/ModelQuantizer/Services/QuantizationEngine.swift
+++ b/ModelQuantizer/Services/QuantizationEngine.swift
@@ -2,7 +2,7 @@
 //  QuantizationEngine.swift
 //  ModelQuantizer
 //
-//  Real ML model quantization engine using llama.cpp
+//  Experimental on-device quantization engine.
 //
 
 import Foundation
@@ -10,7 +10,7 @@ import Accelerate
 import Metal
 import MetalPerformanceShaders
 
-/// Real quantization engine that performs actual model quantization
+/// Experimental quantization engine for GGUF conversion/quantization prototypes.
 @MainActor
 class QuantizationEngine: ObservableObject {
     static let shared = QuantizationEngine()
@@ -263,7 +263,7 @@ class QuantizationEngine: ObservableObject {
         
         // Read header length (first 8 bytes, little-endian uint64)
         let headerLength = data.prefix(8).withUnsafeBytes { ptr -> UInt64 in
-            ptr.load(as: UInt64.self)
+            UInt64(littleEndian: ptr.loadUnaligned(as: UInt64.self))
         }
         
         guard headerLength > 0 && headerLength < UInt64(data.count) else {
@@ -365,29 +365,33 @@ class QuantizationEngine: ObservableObject {
     
     private func processSafeTensorsFile(_ url: URL, into builder: inout GGUFBuilder) async throws {
         let data = try Data(contentsOf: url, options: .mappedIfSafe)
+        guard data.count >= 8 else { throw QuantizationError.invalidModelFormat }
         
         // Read header
-        let headerLength = data.prefix(8).withUnsafeBytes { $0.load(as: UInt64.self) }
+        let headerLength = data.prefix(8).withUnsafeBytes { UInt64(littleEndian: $0.loadUnaligned(as: UInt64.self)) }
         let headerData = data.dropFirst(8).prefix(Int(headerLength))
         
         guard let header = try JSONSerialization.jsonObject(with: headerData) as? [String: Any] else { return }
         
-        var offset = 8 + Int(headerLength)
-        
         for (key, value) in header {
             try Task.checkCancellation()
             
+            if key == "__metadata__" { continue }
+            
             guard let tensorInfo = value as? [String: Any],
                   let shape = tensorInfo["shape"] as? [Int],
-                  let dtype = tensorInfo["dtype"] as? String else { continue }
+                  let dtype = tensorInfo["dtype"] as? String,
+                  let dataOffsets = tensorInfo["data_offsets"] as? [Int],
+                  dataOffsets.count == 2 else { continue }
+            
+            let dataSectionOffset = 8 + Int(headerLength)
+            let tensorStart = dataSectionOffset + dataOffsets[0]
+            let tensorEnd = dataSectionOffset + dataOffsets[1]
             
-            // Calculate tensor size
-            let numElements = shape.reduce(1, *)
-            let elementSize = dtypeSize(for: dtype)
-            let tensorSize = numElements * elementSize
+            guard tensorStart >= 0, tensorEnd <= data.count, tensorStart < tensorEnd else { continue }
             
             // Read tensor data
-            let tensorData = data.subdata(in: offset..<(offset + tensorSize))
+            let tensorData = data.subdata(in: tensorStart..<tensorEnd)
             
             // Convert tensor name to GGUF format
             let ggufName = convertTensorName(key)
@@ -396,36 +400,24 @@ class QuantizationEngine: ObservableObject {
             builder.addTensor(
                 name: ggufName,
                 shape: shape.map { UInt32($0) },
-                dataType: .float16,
+                dataType: ggmlType(for: dtype),
                 data: tensorData
             )
-            
-            offset += tensorSize
-            
-            // Align to 32 bytes
-            let alignment = 32
-            let padding = (alignment - (offset % alignment)) % alignment
-            offset += padding
         }
     }
     
-    private func dtypeSize(for dtype: String) -> Int {
+    private func ggmlType(for dtype: String) -> GGMLType {
         switch dtype {
-        case "F32", "float32": return 4
-        case "F16", "float16": return 2
-        case "BF16", "bfloat16": return 2
-        case "I32", "int32": return 4
-        case "I16", "int16": return 2
-        case "I8", "int8": return 1
-        case "U8", "uint8": return 1
-        case "BOOL": return 1
-        default: return 4
+        case "F16", "float16", "BF16", "bfloat16":
+            return .float16
+        default:
+            return .float32
         }
     }
     
     private func convertTensorName(_ name: String) -> String {
         // Convert Hugging Face tensor names to GGUF format
-        var converted = name
+        let converted = name
             .replacingOccurrences(of: "model.embed_tokens.", with: "token_embd.")
             .replacingOccurrences(of: "model.norm.", with: "output_norm.")
             .replacingOccurrences(of: "lm_head.", with: "output.")
@@ -522,10 +514,6 @@ class QuantizationEngine: ObservableObject {
             return try quantizeToQ4_0(tensor)
         case .q4_1:
             return try quantizeToQ4_1(tensor)
-        case .q5_0:
-            return try quantizeToQ5_0(tensor)
-        case .q5_1:
-            return try quantizeToQ5_1(tensor)
         case .q8_0:
             return try quantizeToQ8_0(tensor)
         case .fp16:
@@ -533,29 +521,22 @@ class QuantizationEngine: ObservableObject {
         case .fp32:
             return tensor
         default:
-            // Default to Q4_0 for K-quants
-            return try quantizeToQ4_0(tensor)
+            throw QuantizationError.unsupportedQuantization(type: quantization.rawValue)
         }
     }
     
     // Q4_0 quantization: 4-bit with block-wise scaling
     private func quantizeToQ4_0(_ tensor: GGUFTensor) throws -> GGUFTensor {
         let blockSize = 32
-        let numElements = tensor.data.count / MemoryLayout<Float>.size
+        let floatData = try tensorFloatValues(from: tensor)
+        let numElements = floatData.count
         let numBlocks = (numElements + blockSize - 1) / blockSize
         
         var outputData = Data()
         
-        // Read float data
-        let floatData = tensor.data.withUnsafeBytes { ptr -> [Float] in
-            Array(ptr.bindMemory(to: Float.self))
-        }
-        
         for blockIdx in 0..<numBlocks {
             let startIdx = blockIdx * blockSize
             let endIdx = min(startIdx + blockSize, numElements)
-            let blockElements = endIdx - startIdx
-            
             // Find max absolute value in block
             var maxAbs: Float = 0
             for i in startIdx..<endIdx {
@@ -603,11 +584,11 @@ class QuantizationEngine: ObservableObject {
     // Q4_1 quantization: 4-bit with block-wise min/max
     private func quantizeToQ4_1(_ tensor: GGUFTensor) throws -> GGUFTensor {
         let blockSize = 32
-        let numElements = tensor.data.count / MemoryLayout<Float>.size
+        let floatData = try tensorFloatValues(from: tensor)
+        let numElements = floatData.count
         let numBlocks = (numElements + blockSize - 1) / blockSize
         
         var outputData = Data()
-        let floatData = tensor.data.withUnsafeBytes { Array($0.bindMemory(to: Float.self)) }
         
         for blockIdx in 0..<numBlocks {
             let startIdx = blockIdx * blockSize
@@ -650,28 +631,14 @@ class QuantizationEngine: ObservableObject {
         return GGUFTensor(name: tensor.name, shape: tensor.shape, dataType: .q4_1, data: outputData)
     }
     
-    // Q5_0 quantization
-    private func quantizeToQ5_0(_ tensor: GGUFTensor) throws -> GGUFTensor {
-        // Similar to Q4_0 but with 5-bit precision
-        // Implementation would follow similar pattern with 32-element blocks
-        // For brevity, using Q4_0 as fallback
-        return try quantizeToQ4_0(tensor)
-    }
-    
-    // Q5_1 quantization
-    private func quantizeToQ5_1(_ tensor: GGUFTensor) throws -> GGUFTensor {
-        // Similar to Q4_1 but with 5-bit precision
-        return try quantizeToQ4_1(tensor)
-    }
-    
     // Q8_0 quantization: 8-bit with block-wise scaling
     private func quantizeToQ8_0(_ tensor: GGUFTensor) throws -> GGUFTensor {
         let blockSize = 32
-        let numElements = tensor.data.count / MemoryLayout<Float>.size
+        let floatData = try tensorFloatValues(from: tensor)
+        let numElements = floatData.count
         let numBlocks = (numElements + blockSize - 1) / blockSize
         
         var outputData = Data()
-        let floatData = tensor.data.withUnsafeBytes { Array($0.bindMemory(to: Float.self)) }
         
         for blockIdx in 0..<numBlocks {
             let startIdx = blockIdx * blockSize
@@ -704,7 +671,7 @@ class QuantizationEngine: ObservableObject {
     
     // FP16 conversion
     private func convertToFP16(_ tensor: GGUFTensor) throws -> GGUFTensor {
-        let floatData = tensor.data.withUnsafeBytes { Array($0.bindMemory(to: Float.self)) }
+        let floatData = try tensorFloatValues(from: tensor)
         var outputData = Data()
         
         for value in floatData {
@@ -715,12 +682,31 @@ class QuantizationEngine: ObservableObject {
         return GGUFTensor(name: tensor.name, shape: tensor.shape, dataType: .float16, data: outputData)
     }
     
+    private func tensorFloatValues(from tensor: GGUFTensor) throws -> [Float] {
+        switch tensor.dataType {
+        case .float32:
+            guard tensor.data.count.isMultiple(of: MemoryLayout<Float>.size) else {
+                throw QuantizationError.invalidModelFormat
+            }
+            return tensor.data.withUnsafeBytes { Array($0.bindMemory(to: Float.self)) }
+        case .float16:
+            guard tensor.data.count.isMultiple(of: MemoryLayout<UInt16>.size) else {
+                throw QuantizationError.invalidModelFormat
+            }
+            let words = tensor.data.withUnsafeBytes { Array($0.bindMemory(to: UInt16.self)) }
+            return words.map { Float16(bits: $0).floatValue }
+        default:
+            throw QuantizationError.invalidModelFormat
+        }
+    }
+    
     // MARK: - Step 5: Validate
     
     private func validateQuantizedModel(at url: URL, originalModel: HFModel) async throws {
         await updateStatus(.validating, stage: "Validating output...")
         
         let data = try Data(contentsOf: url, options: .mappedIfSafe)
+        guard data.count >= 8 else { throw QuantizationError.invalidOutput }
         
         // Check GGUF magic number
         let magic = data.prefix(4)
@@ -729,7 +715,7 @@ class QuantizationEngine: ObservableObject {
         }
         
         // Verify version
-        let version = data.dropFirst(4).prefix(4).withUnsafeBytes { $0.load(as: UInt32.self) }
+        let version = data.dropFirst(4).prefix(4).withUnsafeBytes { UInt32(littleEndian: $0.loadUnaligned(as: UInt32.self)) }
         guard version == 3 else {
             throw QuantizationError.invalidOutput
         }
@@ -889,25 +875,31 @@ public struct GGUFParser {
     }
     
     internal mutating func readData(count: Int) -> Data {
+        guard count >= 0, offset >= 0, offset + count <= data.count else {
+            return Data()
+        }
         let data = self.data.subdata(in: offset..<(offset + count))
         offset += count
         return data
     }
     
     private mutating func readUInt32() -> UInt32 {
-        let value = data.subdata(in: offset..<(offset + 4)).withUnsafeBytes { $0.load(as: UInt32.self) }
-        offset += 4
-        return value
+        let bytes = readData(count: 4)
+        guard bytes.count == 4 else { return 0 }
+        let value = bytes.withUnsafeBytes { $0.loadUnaligned(as: UInt32.self) }
+        return UInt32(littleEndian: value)
     }
     
     private mutating func readUInt64() -> UInt64 {
-        let value = data.subdata(in: offset..<(offset + 8)).withUnsafeBytes { $0.load(as: UInt64.self) }
-        offset += 8
-        return value
+        let bytes = readData(count: 8)
+        guard bytes.count == 8 else { return 0 }
+        let value = bytes.withUnsafeBytes { $0.loadUnaligned(as: UInt64.self) }
+        return UInt64(littleEndian: value)
     }
     
     private mutating func readString() -> String {
         let length = Int(readUInt64())
+        guard length >= 0, offset >= 0, offset + length <= data.count else { return "" }
         let stringData = data.subdata(in: offset..<(offset + length))
         offset += length
         return String(data: stringData, encoding: .utf8) ?? ""
@@ -922,25 +914,34 @@ public struct GGUFParser {
         case 1: // INT8
             return .int8(Int8(bitPattern: readData(count: 1).first ?? 0))
         case 2: // UINT16
-            return .uint16(readData(count: 2).withUnsafeBytes { $0.load(as: UInt16.self) })
+            let bytes = readData(count: 2)
+            guard bytes.count == 2 else { throw QuantizationError.invalidModelFormat }
+            let raw = bytes.withUnsafeBytes { $0.loadUnaligned(as: UInt16.self) }
+            return .uint16(UInt16(littleEndian: raw))
         case 3: // INT16
-            return .int16(readData(count: 2).withUnsafeBytes { $0.load(as: Int16.self) })
+            let bytes = readData(count: 2)
+            guard bytes.count == 2 else { throw QuantizationError.invalidModelFormat }
+            let raw = bytes.withUnsafeBytes { $0.loadUnaligned(as: UInt16.self) }
+            return .int16(Int16(bitPattern: UInt16(littleEndian: raw)))
         case 4: // UINT32
             return .uint32(readUInt32())
         case 5: // INT32
             return .int32(Int32(bitPattern: readUInt32()))
         case 6: // FLOAT32
-            return .float32(readData(count: 4).withUnsafeBytes { $0.load(as: Float.self) })
+            let bytes = readData(count: 4)
+            guard bytes.count == 4 else { throw QuantizationError.invalidModelFormat }
+            let raw = bytes.withUnsafeBytes { $0.loadUnaligned(as: UInt32.self) }
+            return .float32(Float(bitPattern: UInt32(littleEndian: raw)))
         case 7: // BOOL
             return .bool(readData(count: 1).first != 0)
         case 8: // STRING
             return .string(readString())
         case 9: // ARRAY
-            let _ = readUInt32() // element type
+            let elementType = readUInt32()
             let count = readUInt64()
             var array: [GGUFBuilder.MetadataValue] = []
             for _ in 0..<count {
-                array.append(try readMetadataValue())
+                array.append(try readMetadataArrayElement(type: elementType))
             }
             return .array(array)
         case 10: // UINT64
@@ -948,7 +949,45 @@ public struct GGUFParser {
         case 11: // INT64
             return .int64(Int64(bitPattern: readUInt64()))
         case 12: // FLOAT64
-            return .float64(readData(count: 8).withUnsafeBytes { $0.load(as: Double.self) })
+            let bytes = readData(count: 8)
+            guard bytes.count == 8 else { throw QuantizationError.invalidModelFormat }
+            let raw = bytes.withUnsafeBytes { $0.loadUnaligned(as: UInt64.self) }
+            return .float64(Double(bitPattern: UInt64(littleEndian: raw)))
+        default:
+            throw QuantizationError.invalidModelFormat
+        }
+    }
+    
+    private mutating func readMetadataArrayElement(type: UInt32) throws -> GGUFBuilder.MetadataValue {
+        switch type {
+        case 0: return .uint8(readData(count: 1).first ?? 0)
+        case 1: return .int8(Int8(bitPattern: readData(count: 1).first ?? 0))
+        case 2:
+            let bytes = readData(count: 2)
+            guard bytes.count == 2 else { throw QuantizationError.invalidModelFormat }
+            let raw = bytes.withUnsafeBytes { $0.loadUnaligned(as: UInt16.self) }
+            return .uint16(UInt16(littleEndian: raw))
+        case 3:
+            let bytes = readData(count: 2)
+            guard bytes.count == 2 else { throw QuantizationError.invalidModelFormat }
+            let raw = bytes.withUnsafeBytes { $0.loadUnaligned(as: UInt16.self) }
+            return .int16(Int16(bitPattern: UInt16(littleEndian: raw)))
+        case 4: return .uint32(readUInt32())
+        case 5: return .int32(Int32(bitPattern: readUInt32()))
+        case 6:
+            let bytes = readData(count: 4)
+            guard bytes.count == 4 else { throw QuantizationError.invalidModelFormat }
+            let raw = bytes.withUnsafeBytes { $0.loadUnaligned(as: UInt32.self) }
+            return .float32(Float(bitPattern: UInt32(littleEndian: raw)))
+        case 7: return .bool(readData(count: 1).first != 0)
+        case 8: return .string(readString())
+        case 10: return .uint64(readUInt64())
+        case 11: return .int64(Int64(bitPattern: readUInt64()))
+        case 12:
+            let bytes = readData(count: 8)
+            guard bytes.count == 8 else { throw QuantizationError.invalidModelFormat }
+            let raw = bytes.withUnsafeBytes { $0.loadUnaligned(as: UInt64.self) }
+            return .float64(Double(bitPattern: UInt64(littleEndian: raw)))
         default:
             throw QuantizationError.invalidModelFormat
         }
@@ -971,8 +1010,17 @@ public struct GGUFParser {
         case .q8_0: elementSize = 34 // 2 bytes scale + 32 bytes data per 32 elements
         }
         
-        let tensorSize = Int(numElements) * elementSize / 32 // Adjust for block sizes
-        let tensorData = readData(count: max(tensorSize, Int(numElements) * 4)) // Fallback to 4 bytes per element
+        let tensorSize: Int
+        switch info.type {
+        case .float32, .float16:
+            tensorSize = Int(numElements) * elementSize
+        default:
+            tensorSize = ((Int(numElements) + 31) / 32) * elementSize // Block quantized formats
+        }
+        let tensorData = readData(count: tensorSize)
+        guard tensorData.count == tensorSize else {
+            throw QuantizationError.invalidModelFormat
+        }
         
         return GGUFTensor(
             name: info.name,
@@ -1002,8 +1050,39 @@ struct Float16: Equatable {
 }
 
 private func floatToHalf(_ value: Float) -> UInt16 {
-    // Simplified for CI validation speed - just return zero
-    return 0
+    let bits = value.bitPattern
+    let sign = UInt16((bits >> 16) & 0x8000)
+    var exponent = Int((bits >> 23) & 0xFF) - 127 + 15
+    var mantissa = bits & 0x007F_FFFF
+    
+    if exponent <= 0 {
+        if exponent < -10 { return sign }
+        mantissa |= 0x0080_0000
+        let shift = UInt32(14 - exponent)
+        var halfMantissa = UInt16(mantissa >> shift)
+        if ((mantissa >> (shift - 1)) & 1) == 1 {
+            halfMantissa &+= 1
+        }
+        return sign | halfMantissa
+    }
+    
+    if exponent >= 31 {
+        return sign | 0x7C00
+    }
+    
+    var halfMantissa = UInt16(mantissa >> 13)
+    if ((mantissa >> 12) & 1) == 1 {
+        halfMantissa &+= 1
+        if halfMantissa == 0x0400 {
+            halfMantissa = 0
+            exponent += 1
+            if exponent >= 31 {
+                return sign | 0x7C00
+            }
+        }
+    }
+    
+    return sign | UInt16(exponent << 10) | halfMantissa
 }
 
 private func halfToFloat(_ bits: UInt16) -> Float {
@@ -1072,6 +1151,7 @@ enum QuantizationError: Error, LocalizedError {
     case invalidOutput
     case insufficientMemory
     case cancelled
+    case unsupportedQuantization(type: String)
     
     var errorDescription: String? {
         switch self {
@@ -1091,6 +1171,8 @@ enum QuantizationError: Error, LocalizedError {
             return "Insufficient memory for quantization"
         case .cancelled:
             return "Quantization was cancelled"
+        case .unsupportedQuantization(let type):
+            return "Quantization type \(type) is not supported in this build"
         }
     }
 }
diff --git a/ModelQuantizer/ViewModels/QuantizeViewModel.swift b/ModelQuantizer/ViewModels/QuantizeViewModel.swift
index 6cf0fac..f63d1b7 100644
--- a/ModelQuantizer/ViewModels/QuantizeViewModel.swift
+++ b/ModelQuantizer/ViewModels/QuantizeViewModel.swift
@@ -10,12 +10,11 @@ import Combine
 
 @MainActor
 class QuantizeViewModel: ObservableObject {
-    func filterModels(query: String) {} // Placeholder to fix compiler error
     @Published var searchQuery = ""
     @Published var models: [HFModel] = []
     @Published var filteredModels: [HFModel] = []
     @Published var selectedModel: HFModel?
-    @Published var selectedQuantization: QuantizationType = .q4_K_M
+    @Published var selectedQuantization: QuantizationType = .q4_1
     @Published var customContextLength: Int = 4096
     @Published var isSearching = false
     @Published var errorMessage: String?
@@ -211,7 +210,7 @@ class QuantizeViewModel: ObservableObject {
                 let existingIds = Set(self.models.map { $0.modelId })
                 let newModels = popularModels.filter { !existingIds.contains($0.modelId) }
                 self.models.append(contentsOf: newModels)
-                self.filterModels(query: self.searchQuery)
+                self.filterLocalModels(query: self.searchQuery)
             }
         } catch {
             // Silently fail - we already have fallback models
@@ -295,6 +294,12 @@ class QuantizeViewModel: ObservableObject {
     func startQuantization() {
         guard let model = selectedModel else { return }
         
+        guard model.architecture.supportedQuantizations.contains(selectedQuantization) else {
+            errorMessage = "\(selectedQuantization.rawValue) is not supported for \(model.architecture.rawValue) in this build."
+            showError = true
+            return
+        }
+        
         // Check if model requires authentication
         if model.modelId.hasPrefix("meta-llama/") && HuggingFaceAPI.shared.getAuthToken() == nil {
             errorMessage = "This model requires Hugging Face authentication. Please add your token in Settings."
@@ -354,14 +359,12 @@ class QuantizeViewModel: ObservableObject {
     
     private func quantizationTypeFromBits(_ bits: Int) -> QuantizationType {
         switch bits {
-        case 2: return .q2_K
-        case 3: return .q3_K_M
-        case 4: return .q4_K_M
-        case 5: return .q5_K_M
-        case 6: return .q6_K
+        case 4: return .q4_1
+        case 5: return .q8_0
+        case 6: return .q8_0
         case 8: return .q8_0
         case 16: return .fp16
-        default: return .q4_K_M
+        default: return .q4_1
         }
     }
 }
diff --git a/ModelQuantizer/Views/QuantizeView.swift b/ModelQuantizer/Views/QuantizeView.swift
index edba13e..27658ae 100644
--- a/ModelQuantizer/Views/QuantizeView.swift
+++ b/ModelQuantizer/Views/QuantizeView.swift
@@ -74,6 +74,10 @@ struct QuantizeView: View {
             Text("Search Hugging Face and quantize models")
                 .font(.system(size: 16, weight: .medium))
                 .foregroundStyle(.white.opacity(0.7))
+            
+            Text("Experimental: output quality/compatibility may vary by model.")
+                .font(.system(size: 12, weight: .medium))
+                .foregroundStyle(.orange.opacity(0.9))
         }
         .frame(maxWidth: .infinity, alignment: .leading)
     }
diff --git a/README.md b/README.md
index c593e0a..2984c11 100644
--- a/README.md
+++ b/README.md
@@ -1,13 +1,13 @@
 # ModelQuantizer
 
-A fully functional iOS app for quantizing Hugging Face AI models directly on your device. Built with SwiftUI and featuring real ML quantization capabilities.
+An experimental iOS app for downloading compatible Hugging Face model files and converting/quantizing them to GGUF on-device. Built with SwiftUI.
 
 ![App Icon](ModelQuantizer/Resources/Assets.xcassets/AppIcon.appiconset/appicon-1024.png)
 
 ## Features
 
-### Real ML Quantization
-- **Actual Quantization**: Converts models to GGUF format with Q2_K through FP32 quantization types
+### On-Device Quantization (Current Build)
+- **Implemented quantizers**: Q4_0, Q4_1, Q8_0, FP16, FP32
 - **Hugging Face Integration**: Search and download models directly from Hugging Face Hub
 - **Architecture Support**: Llama, Mistral, Qwen2, Gemma, Phi, Falcon, GPT-2, BERT
 - **Real Progress**: Live progress tracking during download, analysis, and quantization
@@ -30,6 +30,13 @@ A fully functional iOS app for quantizing Hugging Face AI models directly on you
 - **Detailed Info**: View parameters, downloads, likes, and supported quantizations
 - **One-Tap Quantize**: Start quantization directly from model details
 
+## Current Limitations (Important)
+
+- This project is **experimental** and does not yet implement full llama.cpp parity.
+- Quantization quality and output compatibility can vary by architecture/model checkpoint.
+- Only the quantizers listed in this README are implemented in this build.
+- Verify generated GGUF files in your target runtime before production use.
+
 ## Requirements
 
 - iOS 18.0+
@@ -93,25 +100,23 @@ Some models (like Llama) require authentication:
 
 | Type | Bits | Compression | Quality | Use Case |
 |------|------|-------------|---------|----------|
-| Q2_K | 2 | 16× | Low | Entry-level devices |
-| Q3_K_M | 3 | 10.7× | Fair | Limited RAM |
-| Q4_K_M | 4 | 8× | Good | Balanced (Recommended) |
-| Q5_K_M | 5 | 6.4× | Very Good | High-end devices |
-| Q6_K | 6 | 5.3× | Excellent | Premium devices |
+| Q4_0 | 4 | 8× | Good | Fast 4-bit |
+| Q4_1 | 4 | 8× | Better | Better 4-bit accuracy |
 | Q8_0 | 8 | 4× | Near-Perfect | Maximum quality |
 | FP16 | 16 | 2× | Original | Research/development |
+| FP32 | 32 | 1× | Original | Baseline/uncompressed |
 
 ## Device Compatibility
 
 ### Ultra (iPhone 16 Pro/Max, iPad Pro M4)
 - **Max Model Size**: 24GB
-- **Recommended**: Q5-Q6 quantization
+- **Recommended**: Q8_0 quantization
 - **Context**: Up to 32K tokens
 - **Features**: Full Neural Engine, all GPU layers
 
 ### Flagship (iPhone 16/15 Pro)
 - **Max Model Size**: 12GB
-- **Recommended**: Q4-Q5 quantization
+- **Recommended**: Q4_1 to Q8_0 quantization
 - **Context**: Up to 16K tokens
 - **Features**: Neural Engine, most GPU layers
 
@@ -123,13 +128,13 @@ Some models (like Llama) require authentication:
 
 ### Mid-Range (iPhone 12/11)
 - **Max Model Size**: 4GB
-- **Recommended**: Q3-Q4 quantization
+- **Recommended**: Q4_0 quantization
 - **Context**: Up to 4K tokens
 - **Features**: Limited GPU
 
 ### Entry-Level
 - **Max Model Size**: 2GB
-- **Recommended**: Q2-Q3 quantization
+- **Recommended**: Q4_0 quantization
 - **Context**: Up to 2K tokens
 - **Features**: CPU only
 
@@ -146,7 +151,7 @@ Some models (like Llama) require authentication:
 - Real tensor analysis and quantization
 - Memory-mapped file I/O
 - Progressive quantization with checkpointing
-- Support for Q4_0, Q4_1, Q5_0, Q5_1, Q8_0, FP16, FP32
+- Support for Q4_0, Q4_1, Q8_0, FP16, FP32
 
 ### Performance
 - Background processing with progress callbacks