huggingface · kiritozc · May 30, 2026 · Jun 1, 2026
diff --git a/src/peft/tuners/tuners_utils.py b/src/peft/tuners/tuners_utils.py
@@ -1043,7 +1043,13 @@ def inject_adapter(
         # It's important to set the adapter here (again), because otherwise it can happen that if a 2nd adapter is
         # added, and it targets different layer(s) than the first adapter (which is active), then those different
         # layers will be activated, which we don't want.
-        self.set_adapter(self.active_adapters, inference_mode=peft_config.inference_mode)
+        # Only pass inference_mode when the new adapter is the active adapter (first-time injection). For subsequent
+        # adapters, preserve the existing active adapter's trainability state — otherwise, adding an adapter with
+        # inference_mode=True would incorrectly freeze the active training adapter.
+        if adapter_name in self.active_adapters:
+            self.set_adapter(self.active_adapters, inference_mode=peft_config.inference_mode)
+        else:
+            self.set_adapter(self.active_adapters)
         self._mark_only_adapters_as_trainable(model)
 
         if self.peft_config[adapter_name].inference_mode:

diff --git a/tests/test_custom_models.py b/tests/test_custom_models.py
@@ -6242,10 +6242,9 @@ def test_loading_model_with_trainble_tokens_requires_grad_set_correctly(self, is
     def test_loading_model_requires_grad_set_correctly_switch_inference_mode(self, config_cls, tmp_path):
         # Same as test_loading_model_requires_grad_set_correctly but this time we first load with is_trainable=False and
         # then with is_trainable=True. Loading the second adapter should not affect the requires_grad of the first
-        # adapter, but it does. The reason is that is_training/inference_mode is taken from the current PEFT config, but
-        # that config does not necessarily belong to the active adapter, creating a mismatch.
-        # When/If this is fixed, the check can be integrated into test_loading_model_requires_grad_set_correctly and
-        # this test can be deleted.
+        # adapter, but it does. The reason is that set_adapter itself always sets requires_grad=True for the active
+        # adapter, which is still coupled to the active adapter selection. A proper fix would require decoupling these
+        # two concerns in set_adapter.
         model = DeepMLP(size=256)  # a size that works with all adapters
         extra_kwargs = {}
         config = config_cls(target_modules=["layers.0.lin0"])
@@ -6273,8 +6272,6 @@ def test_loading_model_requires_grad_load_adapter_then_add_adapter(self, config_
         # When adding a new adapter with model.add_adapter, through the set_adapter call in update_layer, we activate
         # the gradients of the first adapter, even if it's not desired. Since there is no is_trainable argument on
         # add_adapter, there is no way to disable that at the moment.
-        # When/If this is fixed, the check can be integrated into test_loading_model_requires_grad_set_correctly and
-        # this test can be deleted.
         model = DeepMLP(size=256)  # a size that works with all adapters
         extra_kwargs = {}
         config = config_cls(target_modules=["layers.0.lin0"])
@@ -6291,6 +6288,33 @@ def test_loading_model_requires_grad_load_adapter_then_add_adapter(self, config_
         params_with_grad = [n for n, p in model.named_parameters() if p.requires_grad]
         assert all(not p.requires_grad for p in model.parameters())
 
+    def test_inject_adapter_inference_mode_does_not_freeze_active_adapter(self, tmp_path):
+        # Regression test for a bug where adding a second adapter with inference_mode=True would incorrectly freeze
+        # the already-active training adapter. This happened because inject_adapter propagated the new adapter's
+        # inference_mode to set_adapter for the existing active adapters.
+        # See PR #3290
+        model = DeepMLP(size=256)
+        config = LoraConfig(target_modules=["layers.0.lin0"])
+        model = get_peft_model(model, config)
+
+        # Initially, the active (default) adapter should be trainable
+        assert any(p.requires_grad for n, p in model.named_parameters() if ".default" in n)
+
+        # Add a second adapter with inference_mode=True, simulating what happens during load_adapter with
+        # is_trainable=False (e.g. during save_pretrained with path_initial_model_for_weight_conversion)
+        config_inference = LoraConfig(target_modules=["layers.0.lin0"], inference_mode=True)
+        model.add_adapter("inference_adapter", config_inference)
+
+        # The existing active adapter should remain trainable
+        assert any(p.requires_grad for n, p in model.named_parameters() if ".default" in n), (
+            "Adding an adapter with inference_mode=True should not freeze the active adapter"
+        )
+
+        # The inference adapter should be frozen
+        assert all(not p.requires_grad for n, p in model.named_parameters() if ".inference_adapter" in n), (
+            "The inference adapter's parameters should be frozen"
+        )
+
 
 # this is for PEFT methods that support mixed adapter batches.
 MIXED_ADAPTER_TEST_CASES = [