Skip to content
Open
4 changes: 4 additions & 0 deletions ModelQuantizer.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
AA00001B /* HuggingFaceAPI.swift in Sources */ = {isa = PBXBuildFile; fileRef = AA00001A /* HuggingFaceAPI.swift */; };
AA00001D /* GGUFBuilder.swift in Sources */ = {isa = PBXBuildFile; fileRef = AA00001C /* GGUFBuilder.swift */; };
AA00001F /* ModelTypes.swift in Sources */ = {isa = PBXBuildFile; fileRef = AA00001E /* ModelTypes.swift */; };
AA000100 /* KeychainTokenStore.swift in Sources */ = {isa = PBXBuildFile; fileRef = AA000101 /* KeychainTokenStore.swift */; };
AA000021 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = AA000020 /* Assets.xcassets */; };
AA000023 /* Preview Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = AA000022 /* Preview Assets.xcassets */; };
/* End PBXBuildFile section */
Expand All @@ -44,6 +45,7 @@
AA00001A /* HuggingFaceAPI.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = HuggingFaceAPI.swift; sourceTree = "<group>"; };
AA00001C /* GGUFBuilder.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = GGUFBuilder.swift; sourceTree = "<group>"; };
AA00001E /* ModelTypes.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ModelTypes.swift; sourceTree = "<group>"; };
AA000101 /* KeychainTokenStore.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = KeychainTokenStore.swift; sourceTree = "<group>"; };
AA000020 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; };
AA000022 /* Preview Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = "Preview Assets.xcassets"; sourceTree = "<group>"; };
AA000024 /* ModelQuantizer.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = ModelQuantizer.app; sourceTree = BUILT_PRODUCTS_DIR; };
Expand Down Expand Up @@ -120,6 +122,7 @@
AA000008 /* SettingsSuggester.swift */,
AA00001A /* HuggingFaceAPI.swift */,
AA00001C /* GGUFBuilder.swift */,
AA000101 /* KeychainTokenStore.swift */,
);
path = Services;
sourceTree = "<group>";
Expand Down Expand Up @@ -235,6 +238,7 @@
AA00001B /* HuggingFaceAPI.swift in Sources */,
AA00001D /* GGUFBuilder.swift in Sources */,
AA00001F /* ModelTypes.swift in Sources */,
AA000100 /* KeychainTokenStore.swift in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
Expand Down
2 changes: 1 addition & 1 deletion ModelQuantizer/ContentView.swift
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ struct ContentView: View {

DeviceInfoView()
.tabItem {
Image(systemName: "iphone.gen3")
Image(systemName: "iphone")
Text("Device")
}
.tag(3)
Expand Down
115 changes: 114 additions & 1 deletion ModelQuantizer/Models/ModelTypes.swift
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ enum ModelArchitecture: String, Codable, CaseIterable {
var supportedQuantizations: [QuantizationType] {
switch self {
case .llama, .mistral, .qwen2, .gemma, .phi:
return [.q4_0, .q4_1, .q5_0, .q5_1, .q8_0, .fp16, .fp32]
return [.q4_0, .q4_1, .q8_0, .fp16, .fp32]
case .falcon, .gpt2:
return [.q4_0, .q4_1, .q8_0, .fp16]
case .bert:
Expand Down Expand Up @@ -206,6 +206,8 @@ struct QuantizationJob: Codable, Identifiable {
let startTime: Date
let endTime: Date
let contextLength: Int
let estimatedTokensPerSecond: Double?
let validationScore: Double?

var duration: TimeInterval {
return endTime.timeIntervalSince(startTime)
Expand Down Expand Up @@ -294,6 +296,7 @@ struct HFModelConfig: Codable {

struct HFSibling: Codable {
let rfilename: String
let size: Int64?
}

// MARK: - Performance Estimate
Expand Down Expand Up @@ -324,3 +327,113 @@ struct InferenceSettings {
let maxTokens: Int
let quantizationType: QuantizationType
}

enum QuantizationError: Error, LocalizedError {
case noDownloadURL
case noModelFiles
case downloadFailed
case invalidModelFormat
case unsupportedVersion
case quantizationFailed
case invalidOutput
case insufficientMemory
case cancelled

var errorDescription: String? {
switch self {
case .noDownloadURL: return "No download URL provided for model"
case .noModelFiles: return "No model files found in repository"
case .downloadFailed: return "Failed to download model files"
case .invalidModelFormat: return "Invalid or unsupported model format"
case .unsupportedVersion: return "Unsupported GGUF version"
case .quantizationFailed: return "Quantization process failed"
case .invalidOutput: return "Generated model file is invalid"
case .insufficientMemory: return "Insufficient memory for quantization"
case .cancelled: return "Quantization was cancelled"
}
}
}


enum ModelCatalog {
static let curatedModels: [HFModel] = [
HFModel(
modelId: "microsoft/Phi-3-mini-4k-instruct",
name: "Phi-3 Mini 4K",
description: "Microsoft's efficient 3.8B parameter model with excellent performance",
parameters: "3.8B",
architecture: .phi,
downloadURL: URL(string: "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/resolve/main/model.safetensors"),
sizeBytes: 7_600_000_000,
recommendedContextLength: 4096,
tags: ["instruct", "chat", "efficient"],
downloads: 2_500_000,
likes: 8500
),
HFModel(
modelId: "meta-llama/Meta-Llama-3.1-8B-Instruct",
name: "Llama 3.1 8B Instruct",
description: "Meta's latest 8B parameter instruction-tuned model",
parameters: "8B",
architecture: .llama,
downloadURL: URL(string: "https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct/resolve/main/model.safetensors"),
sizeBytes: 16_000_000_000,
recommendedContextLength: 8192,
tags: ["instruct", "chat", "meta"],
downloads: 5_000_000,
likes: 15000
),
HFModel(
modelId: "mistralai/Mistral-7B-Instruct-v0.3",
name: "Mistral 7B Instruct v0.3",
description: "Mistral's powerful 7B instruction model",
parameters: "7B",
architecture: .mistral,
downloadURL: URL(string: "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3/resolve/main/model.safetensors"),
sizeBytes: 14_000_000_000,
recommendedContextLength: 32768,
tags: ["instruct", "chat", "long-context"],
downloads: 8_000_000,
likes: 22000
),
HFModel(
modelId: "google/gemma-2-2b-it",
name: "Gemma 2 2B IT",
description: "Google's lightweight 2B instruction model",
parameters: "2B",
architecture: .gemma,
downloadURL: URL(string: "https://huggingface.co/google/gemma-2-2b-it/resolve/main/model.safetensors"),
sizeBytes: 4_000_000_000,
recommendedContextLength: 8192,
tags: ["instruct", "chat", "lightweight"],
downloads: 1_200_000,
likes: 5600
),
HFModel(
modelId: "Qwen/Qwen2.5-7B-Instruct",
name: "Qwen2.5 7B Instruct",
description: "Alibaba's Qwen2.5 with improved reasoning",
parameters: "7B",
architecture: .qwen2,
downloadURL: URL(string: "https://huggingface.co/Qwen/Qwen2.5-7B-Instruct/resolve/main/model.safetensors"),
sizeBytes: 15_000_000_000,
recommendedContextLength: 32768,
tags: ["instruct", "chat", "multilingual"],
downloads: 3_000_000,
likes: 9800
),
HFModel(
modelId: "HuggingFaceTB/SmolLM2-1.7B-Instruct",
name: "SmolLM2 1.7B Instruct",
description: "Hugging Face's tiny but capable model",
parameters: "1.7B",
architecture: .llama,
downloadURL: URL(string: "https://huggingface.co/HuggingFaceTB/SmolLM2-1.7B-Instruct/resolve/main/model.safetensors"),
sizeBytes: 3_400_000_000,
recommendedContextLength: 8192,
tags: ["instruct", "chat", "tiny"],
downloads: 800_000,
likes: 4200
)
]
}
61 changes: 34 additions & 27 deletions ModelQuantizer/Services/DeviceScanner.swift
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import AppKit
/// Represents the device capability profile for model quantization
struct DeviceCapabilityProfile: Codable, Equatable {
let deviceModel: String
let deviceIdentifier: String
let operatingSystem: String
let operatingSystemVersion: String
let deviceClass: DeviceClass
Expand Down Expand Up @@ -98,14 +99,15 @@ struct DeviceCapabilityProfile: Codable, Equatable {
}

/// Comprehensive device scanner for ML model optimization
class DeviceScanner: ObservableObject, @unchecked Sendable {
@MainActor
final class DeviceScanner: ObservableObject {
static let shared = DeviceScanner()

@Published var currentProfile: DeviceCapabilityProfile?
@Published var isScanning = false
@Published var lastScanDate: Date?

private var timer: Timer?
private var monitoringTask: Task<Void, Never>?
private let metalDevice: MTLDevice?

private init() {
Expand All @@ -114,9 +116,7 @@ class DeviceScanner: ObservableObject, @unchecked Sendable {
startMonitoring()
}

deinit {
timer?.invalidate()
}
deinit { monitoringTask?.cancel() }

// MARK: - Public Methods

Expand All @@ -125,11 +125,9 @@ class DeviceScanner: ObservableObject, @unchecked Sendable {

Task {
let profile = await createProfile()
await MainActor.run {
self.currentProfile = profile
self.lastScanDate = Date()
self.isScanning = false
}
self.currentProfile = profile
self.lastScanDate = Date()
self.isScanning = false
}
}

Expand All @@ -145,15 +143,19 @@ class DeviceScanner: ObservableObject, @unchecked Sendable {
// MARK: - Private Methods

private func startMonitoring() {
timer = Timer.scheduledTimer(withTimeInterval: 30.0, repeats: true) { [weak self] _ in
self?.performScan()
monitoringTask?.cancel()
monitoringTask = Task { [weak self] in
while !Task.isCancelled {
try? await Task.sleep(nanoseconds: 30_000_000_000)
self?.performScan()
}
}
}

private func createProfile() async -> DeviceCapabilityProfile {
let deviceModel = getDeviceModel()
let device = getDeviceModel()
let osInfo = getOperatingSystemInfo()
let deviceClass = classifyDevice(deviceModel)
let deviceClass = classifyDevice(device.name)
let ram = getRAMInfo()
let cpu = getCPUInfo()
let gpu = getGPUInfo()
Expand All @@ -162,7 +164,8 @@ class DeviceScanner: ObservableObject, @unchecked Sendable {
let storage = getStorageInfo()

return DeviceCapabilityProfile(
deviceModel: deviceModel,
deviceModel: device.name,
deviceIdentifier: device.identifier,
operatingSystem: osInfo.name,
operatingSystemVersion: osInfo.version,
deviceClass: deviceClass,
Expand All @@ -188,16 +191,20 @@ class DeviceScanner: ObservableObject, @unchecked Sendable {

// MARK: - Device Information Gathering

private func getDeviceModel() -> String {
var systemInfo = utsname()
uname(&systemInfo)
let machineMirror = Mirror(reflecting: systemInfo.machine)
let identifier = machineMirror.children.reduce("") { identifier, element in
guard let value = element.value as? Int8, value != 0 else { return identifier }
return identifier + String(UnicodeScalar(UInt8(value)))
}

return mapToMarketingName(identifier)
private func getDeviceModel() -> (identifier: String, name: String) {
#if targetEnvironment(simulator)
let simId = ProcessInfo.processInfo.environment["SIMULATOR_MODEL_IDENTIFIER"] ?? "Simulator"
return (simId, mapToMarketingName(simId))
#else
var sysInfo = utsname()
uname(&sysInfo)
let mirror = Mirror(reflecting: sysInfo.machine)
let identifier = mirror.children.compactMap { element -> Character? in
guard let value = element.value as? Int8, value != 0 else { return nil }
return Character(UnicodeScalar(UInt8(value)))
}.reduce("") { $0 + String($1) }
return (identifier, mapToMarketingName(identifier))
#endif
}

private func getOperatingSystemInfo() -> (name: String, version: String) {
Expand Down Expand Up @@ -332,7 +339,7 @@ class DeviceScanner: ObservableObject, @unchecked Sendable {
let name = device.name

// Estimate GPU cores based on device class
let model = getDeviceModel()
let model = getDeviceModel().name
var cores = 4 // Default

if model.contains("Pro") || model.contains("Max") {
Expand Down Expand Up @@ -372,7 +379,7 @@ class DeviceScanner: ObservableObject, @unchecked Sendable {

private func getNeuralEngineInfo() -> (cores: Int, tops: Double) {
// Estimate Neural Engine cores based on device
let model = getDeviceModel()
let model = getDeviceModel().name
var cores = 8
var tops = 15.8

Expand Down
51 changes: 15 additions & 36 deletions ModelQuantizer/Services/GGUFBuilder.swift
Original file line number Diff line number Diff line change
Expand Up @@ -61,36 +61,29 @@ public struct GGUFBuilder {
try appendMetadataValue(value, to: &data)
}

// Write tensor info
// Write tensor info into a temporary buffer so offsets are stable
var tensorInfoData = Data()
var tensorDataOffset = data.count + calculateTensorInfoSize()
// Align to 32 bytes
tensorDataOffset = ((tensorDataOffset + 31) / 32) * 32

for tensor in tensors {
// Tensor name
data.append(UInt64(tensor.name.utf8.count).littleEndianData)
data.append(Data(tensor.name.utf8))

// Number of dimensions
data.append(UInt32(tensor.shape.count).littleEndianData)

// Shape dimensions
tensorInfoData.append(UInt64(tensor.name.utf8.count).littleEndianData)
tensorInfoData.append(Data(tensor.name.utf8))
tensorInfoData.append(UInt32(tensor.shape.count).littleEndianData)

for dim in tensor.shape {
data.append(UInt64(dim).littleEndianData)
tensorInfoData.append(UInt64(dim).littleEndianData)
}

// Data type
data.append(tensor.type.rawValue.littleEndianData)

// Offset to tensor data
data.append(UInt64(tensorDataOffset).littleEndianData)


tensorInfoData.append(tensor.type.rawValue.littleEndianData)
tensorInfoData.append(UInt64(tensorDataOffset).littleEndianData)

tensorDataOffset += tensor.data.count
// Align each tensor to 32 bytes
tensorDataOffset = ((tensorDataOffset + 31) / 32) * 32
}

// Pad to alignment

data.append(tensorInfoData)

while data.count % 32 != 0 {
data.append(0)
}
Expand Down Expand Up @@ -240,17 +233,3 @@ extension FixedWidthInteger {
return withUnsafeBytes(of: &value) { Data($0) }
}
}

extension UInt32 {
var littleEndianData: Data {
var value = self.littleEndian
return withUnsafeBytes(of: &value) { Data($0) }
}
}

extension UInt64 {
var littleEndianData: Data {
var value = self.littleEndian
return withUnsafeBytes(of: &value) { Data($0) }
}
}
Loading
Loading