leehack · leehack · Jun 7, 2026 · Jun 7, 2026 · Jun 7, 2026 · Jun 7, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,16 @@
 ## Unreleased
 
+* **LiteRT-LM LoRA adapters**:
+  * Added native `.litertlm` LoRA wiring for one LiteRT-LM adapter at scale
+    `1.0`, including `ModelParams.loras`, `setLora(...)`,
+    `removeLora(...)`, and `clearLoras()` on native LiteRT-LM contexts.
+  * Added Dart FFI coverage for
+    `litert_lm_session_config_set_lora_file`. Generation with a LoRA adapter
+    requires a matching `litert-lm-native` runtime that exports that C symbol;
+    older local runtime libraries fail with a focused unsupported-runtime
+    message.
+  * Kept multiple weighted adapters on the llama.cpp/GGUF path and kept
+    LiteRT-LM web rejecting LoRA explicitly.
 * **Structured output**:
   * Added `responseFormat` routing to `LlamaEngine.create(...)` for
     grammar-capable backends, deprecated the legacy `chatTemplate(...)`

diff --git a/README.md b/README.md
@@ -353,8 +353,13 @@ to unconstrained output. LiteRT-LM web is currently limited to single-turn text
 prompts through `@litert-lm/core`; it does
 not yet preserve structured chat history, system prompts, tool declarations, or
 thinking/tool-call parsing with the same semantics as native. The current
-implementation does not expose embeddings, state persistence, LoRA, or
-multimodal operations through LiteRT-LM. `ChatSession` uses a conservative
+implementation does not expose embeddings, state persistence, or multimodal
+operations through LiteRT-LM. Native `.litertlm` LoRA supports one LiteRT-LM
+adapter at scale `1.0` through `ModelParams.loras`, `setLora(...)`,
+`removeLora(...)`, and `clearLoras()` when the loaded `litert-lm-native`
+runtime exports `litert_lm_session_config_set_lora_file`. Multiple weighted
+adapters remain llama.cpp/GGUF-only, and LiteRT-LM web does not expose LoRA.
+`ChatSession` uses a conservative
 prompt-size estimate for history pruning only when exact tokenization is
 unavailable.
 `LiteRtLmBackendPreference.auto` chooses GPU on Android/macOS/web and CPU on
@@ -367,8 +372,8 @@ rejects native-only tuning fields. The native fields cover activation data type,
 CPU dynamic-model prefill chunk size, parallel `.litertlm` file-section
 loading, and Android NPU dispatch library directory. llama.cpp-only tuning knobs
 such as partial GPU layer offload, batch/micro-batch sizing, KV-cache type,
-flash attention, mmap/mlock, thread counts, LoRA load configs, and rope
-overrides are rejected instead of being silently ignored. `.litertlm`
+flash attention, mmap/mlock, thread counts, weighted/multiple LoRA configs,
+and rope overrides are rejected instead of being silently ignored. `.litertlm`
 generation honors `GenerationParams`
 `maxTokens`, `temp`, `topK`, `topP`, and `seed` on native and web, with
 `stopSequences` enforced by llamadart. Native LiteRT-LM also honors stream
@@ -953,11 +958,17 @@ void dispose() {
 
 ## 🎨 Low-Rank Adaptation (LoRA)
 
-`llamadart` supports applying multiple LoRA adapters dynamically at runtime.
+`llamadart` supports applying LoRA adapters dynamically at runtime.
 
-- **Dynamic Scaling**: Adjust the strength (`scale`) of each adapter on the fly.
+- **Dynamic Scaling**: Adjust the strength (`scale`) of each adapter on GGUF
+  llama.cpp backends.
 - **Isolate-Safe**: Native adapters are managed in a background Isolate to prevent UI jank.
-- **Efficient**: Multiple LoRAs share the memory of a single base model.
+- **Efficient**: Adapter state is tied to the active model context.
+
+GGUF llama.cpp backends support multiple weighted adapters. Native LiteRT-LM
+`.litertlm` currently supports one LiteRT-LM adapter at scale `1.0` and
+requires a `litert-lm-native` runtime that exports
+`litert_lm_session_config_set_lora_file`. LiteRT-LM web does not expose LoRA.
 
 Check out our [LoRA Training Notebook](https://github.com/leehack/llamadart/blob/main/example/training_notebook/lora_training.ipynb) to learn how to train and convert your own adapters.
 

diff --git a/lib/src/backends/litert_lm/litert_lm_runtime.dart b/lib/src/backends/litert_lm/litert_lm_runtime.dart
@@ -9,7 +9,9 @@ import 'package:path/path.dart' as path;
 
 import '../../core/models/inference/model_params.dart';
 
-const _litertLmVersion = '0.13.1';
+/// Pinned litert-lm-native runtime version used by bundled native assets.
+const liteRtLmNativeRuntimeVersion = '0.13.1';
+const _litertLmVersion = liteRtLmNativeRuntimeVersion;
 const _litertLmLibDirEnv = 'LLAMADART_LITERT_LM_LIB_DIR';
 const _liteRtLmIosNativeAsset = 'package:llamadart/litert_lm_LiteRtLm';
 const _processLibraryCandidate = '<process>';
@@ -555,6 +557,7 @@ class LiteRtLmRuntimeClient {
     List<Map<String, dynamic>>? messages,
     List<Map<String, dynamic>>? tools,
     Map<String, dynamic>? extraContext,
+    String? loraPath,
     double temperature = 0.8,
     int topK = 40,
     double topP = 0.95,
@@ -580,7 +583,7 @@ class LiteRtLmRuntimeClient {
       bindings.sessionConfigSetSamplerParams(sessionConfig, sampler);
       calloc.free(sampler);
     }
-
+    Pointer<Utf8>? loraPtr;
     final systemPtr = systemMessage == null
         ? nullptr
         : _systemMessageJson(systemMessage).toNativeUtf8(allocator: calloc);
@@ -595,6 +598,25 @@ class LiteRtLmRuntimeClient {
         : jsonEncode(extraContext).toNativeUtf8(allocator: calloc);
     Pointer<_LiteRtLmConversationConfig> config = nullptr;
     try {
+      if (loraPath != null) {
+        if (loraPath.trim().isEmpty) {
+          throw ArgumentError.value(
+            loraPath,
+            'loraPath',
+            'must be non-empty when provided',
+          );
+        }
+        loraPtr = loraPath.toNativeUtf8(allocator: calloc);
+        final loraSet = bindings.sessionConfigSetLoraFile(
+          sessionConfig,
+          loraPtr.cast(),
+        );
+        if (!loraSet) {
+          throw StateError(
+            'litert_lm_session_config_set_lora_file failed for $loraPath',
+          );
+        }
+      }
       config = bindings.conversationConfigCreate();
       if (config == nullptr) {
         throw StateError('litert_lm_conversation_config_create returned null');
@@ -638,6 +660,9 @@ class LiteRtLmRuntimeClient {
       if (extraContextPtr != nullptr) {
         calloc.free(extraContextPtr);
       }
+      if (loraPtr != null) {
+        calloc.free(loraPtr);
+      }
     }
   }
 
@@ -1391,7 +1416,8 @@ class LiteRtLmRuntimeClient {
     final envPath = Platform.environment[_litertLmLibDirEnv];
     if (envPath != null && envPath.isNotEmpty) {
       final dir = Directory(envPath);
-      if (liteRtLmIsCacheDirectoryForAbi(dir, abi)) {
+      if (liteRtLmIsCacheDirectoryForAbi(dir, abi) ||
+          _liteRtLmOverrideDirectoryForAbi(dir, abi)) {
         return dir.absolute;
       }
     }
@@ -1418,6 +1444,11 @@ class LiteRtLmRuntimeClient {
     return null;
   }
 
+  bool _liteRtLmOverrideDirectoryForAbi(Directory dir, Abi abi) {
+    final library = _liteRtLmLibraryFileNameForAbi(abi);
+    return library != null && File('${dir.path}/$library').existsSync();
+  }
+
   List<Directory> _candidateSearchRoots() {
     final roots = <String>{Directory.current.path};
     final scriptPath = Platform.script.toFilePath();
@@ -1991,6 +2022,26 @@ class _LiteRtLmBindings {
         )
       >('litert_lm_session_config_set_sampler_params');
 
+  late final _sessionConfigSetLoraFile = _library
+      .lookupFunction<
+        Bool Function(Pointer<_LiteRtLmSessionConfig>, Pointer<Char>),
+        bool Function(Pointer<_LiteRtLmSessionConfig>, Pointer<Char>)
+      >('litert_lm_session_config_set_lora_file');
+
+  bool sessionConfigSetLoraFile(
+    Pointer<_LiteRtLmSessionConfig> config,
+    Pointer<Char> path,
+  ) {
+    try {
+      return _sessionConfigSetLoraFile(config, path);
+    } on ArgumentError catch (error) {
+      throw UnsupportedError(
+        'LiteRT-LM LoRA requires litert-lm-native with '
+        'litert_lm_session_config_set_lora_file. $error',
+      );
+    }
+  }
+
   late final conversationConfigCreate = _library
       .lookupFunction<
         Pointer<_LiteRtLmConversationConfig> Function(),

diff --git a/lib/src/backends/litert_lm/litert_lm_runtime_stub.dart b/lib/src/backends/litert_lm/litert_lm_runtime_stub.dart
@@ -96,6 +96,7 @@ class LiteRtLmRuntimeClient {
     List<Map<String, dynamic>>? messages,
     List<Map<String, dynamic>>? tools,
     Map<String, dynamic>? extraContext,
+    String? loraPath,
     double temperature = 0.8,
     int topK = 40,
     double topP = 0.95,

diff --git a/lib/src/backends/litert_lm/litert_lm_service.dart b/lib/src/backends/litert_lm/litert_lm_service.dart
@@ -8,6 +8,7 @@ import '../../core/models/chat/chat_role.dart';
 import '../../core/models/config/flash_attention.dart';
 import '../../core/models/config/gpu_backend.dart';
 import '../../core/models/config/kv_cache_type.dart';
+import '../../core/models/config/lora_config.dart';
 import '../../core/models/config/log_level.dart';
 import '../../core/models/inference/generation_params.dart';
 import '../../core/models/inference/model_params.dart';
@@ -19,6 +20,10 @@ import 'litert_lm_chat_templates.dart';
 import 'litert_lm_platform.dart';
 import 'litert_lm_runtime.dart';
 
+const _liteRtLmLoraLimitMessage =
+    'LiteRtLmBackend supports one LiteRT-LM LoRA adapter at scale 1.0. '
+    'Multiple weighted adapters remain llama.cpp/GGUF-only.';
+
 /// Worker-owned service for the LiteRT-LM backend.
 ///
 /// This keeps all LiteRT-LM FFI state inside the backend worker isolate. The
@@ -40,6 +45,7 @@ class LiteRtLmService {
   int _nextContextHandle = 1;
   int? _modelHandle;
   int? _contextHandle;
+  LoraAdapterConfig? _activeLora;
   LiteRtLmRuntimeMetrics? _lastMetrics;
   LlamaLogLevel _logLevel = LlamaLogLevel.warn;
   bool _modelLoaded = false;
@@ -82,6 +88,7 @@ class LiteRtLmService {
     _activeSpeculativeDecoding = null;
     _modelHandle = _nextModelHandle++;
     _contextHandle = null;
+    _activeLora = null;
     _lastMetrics = null;
     _cancelRequested = false;
     _modelLoaded = true;
@@ -101,6 +108,7 @@ class LiteRtLmService {
     _activeSpeculativeDecoding = null;
     _modelHandle = null;
     _contextHandle = null;
+    _activeLora = null;
     _lastMetrics = null;
     _cancelRequested = false;
     _modelLoaded = false;
@@ -115,6 +123,7 @@ class LiteRtLmService {
     _disposeContextRuntimeState();
     _modelParams = params;
     _contextHandle = _nextContextHandle++;
+    _activeLora = _loraForParams(params);
     _contextCreated = true;
     return _contextHandle!;
   }
@@ -124,6 +133,7 @@ class LiteRtLmService {
     _checkContextHandle(contextHandle);
     _disposeContextRuntimeState();
     _contextHandle = null;
+    _activeLora = null;
     _contextCreated = false;
   }
 
@@ -164,6 +174,7 @@ class LiteRtLmService {
       topP: params.topP,
       seed: params.seed ?? _defaultSamplerSeed(),
       npuBackend: backend == 'npu',
+      loraPath: _activeLora?.path,
     );
     if (_cancelRequested) {
       client.cancel();
@@ -273,6 +284,7 @@ class LiteRtLmService {
       topP: params.topP,
       seed: params.seed ?? _defaultSamplerSeed(),
       npuBackend: backend == 'npu',
+      loraPath: _activeLora?.path,
     );
     if (_cancelRequested) {
       client.cancel();
@@ -391,7 +403,28 @@ class LiteRtLmService {
   /// Handles LiteRT-LM LoRA operations.
   void handleLora(int contextHandle, String? path, double? scale, String op) {
     _checkContextHandle(contextHandle);
-    throw UnsupportedError('LiteRtLmBackend does not support LoRA adapters.');
+    switch (op) {
+      case 'set':
+        if (path == null) {
+          throw ArgumentError('LiteRT-LM LoRA set requires an adapter path.');
+        }
+        _activeLora = _validateLoraAdapter(
+          LoraAdapterConfig(path: path, scale: scale ?? 1.0),
+        );
+      case 'remove':
+        if (path == null) {
+          throw ArgumentError(
+            'LiteRT-LM LoRA remove requires an adapter path.',
+          );
+        }
+        if (_activeLora?.path == path) {
+          _activeLora = null;
+        }
+      case 'clear':
+        _activeLora = null;
+      default:
+        throw ArgumentError('Unsupported LiteRT-LM LoRA operation: $op');
+    }
   }
 
   /// Returns the active backend name.
@@ -504,6 +537,7 @@ class LiteRtLmService {
     _activeBackend = null;
     _modelHandle = null;
     _contextHandle = null;
+    _activeLora = null;
     _modelLoaded = false;
     _contextCreated = false;
   }
@@ -709,6 +743,13 @@ class LiteRtLmService {
     params.validate();
 
     final unsupported = <String>[];
+    String? loraError;
+    try {
+      _loraForParams(params);
+    } on ArgumentError catch (error) {
+      unsupported.add('loras');
+      loraError = error.message.toString();
+    }
     if (params.contextSize <= 0) {
       unsupported.add('contextSize=${params.contextSize}');
     }
@@ -721,9 +762,6 @@ class LiteRtLmService {
     if (params.mainGpu != 0) {
       unsupported.add('mainGpu');
     }
-    if (params.loras.isNotEmpty) {
-      unsupported.add('loras');
-    }
     if (params.numberOfThreads != 0) {
       unsupported.add('numberOfThreads');
     }
@@ -773,10 +811,39 @@ class LiteRtLmService {
       'contextSize, chatTemplate, preferredBackend, all-or-CPU gpuLayers '
       'hints, liteRtLmBackend for explicit CPU/GPU/NPU selection, '
       'liteRtLmActivationDataType, liteRtLmPrefillChunkSize, '
-      'liteRtLmParallelFileSectionLoading, and liteRtLmDispatchLibDir.',
+      'liteRtLmParallelFileSectionLoading, liteRtLmDispatchLibDir, and '
+      'one LiteRT-LM LoRA adapter at scale 1.0.'
+      '${loraError == null ? '' : ' $loraError'}',
     );
   }
 
+  LoraAdapterConfig? _loraForParams(ModelParams params) {
+    if (params.loras.isEmpty) {
+      return null;
+    }
+    if (params.loras.length > 1) {
+      throw ArgumentError(_liteRtLmLoraLimitMessage);
+    }
+    return _validateLoraAdapter(params.loras.single);
+  }
+
+  LoraAdapterConfig _validateLoraAdapter(LoraAdapterConfig adapter) {
+    if (adapter.scale != 1.0) {
+      throw ArgumentError(
+        '$_liteRtLmLoraLimitMessage Requested scale: ${adapter.scale}.',
+      );
+    }
+    if (adapter.path.trim().isEmpty) {
+      throw ArgumentError('LiteRT-LM LoRA adapter path must be non-empty.');
+    }
+    if (!File(adapter.path).existsSync()) {
+      throw ArgumentError(
+        'LiteRT-LM LoRA adapter does not exist: ${adapter.path}',
+      );
+    }
+    return adapter;
+  }
+
   void _validateContextBackendParams(ModelParams params) {
     final requestedBackend = _explicitContextBackendName(params);
     if (requestedBackend == null) {