From e055b0900cc20cbe23ba62bdee3cc69a7f9e1c80 Mon Sep 17 00:00:00 2001
From: kiritozc <2567192417@qq.com>
Date: Sat, 30 May 2026 15:13:37 +0800
Subject: [PATCH 1/2] FIX: inject_adapter no longer propagates inference_mode
 to existing active adapters

When injecting a new adapter via inject_adapter, the housekeeping section
called set_adapter(self.active_adapters, inference_mode=peft_config.inference_mode).
Here peft_config belongs to the newly injected adapter, but self.active_adapters
points to the existing active adapter(s). When the new adapter has
inference_mode=True (e.g. during save_pretrained with
path_initial_model_for_weight_conversion in PiSSA/OLoRA/CorDA workflows),
this erroneously freezes the already-active training adapter, causing
grad_norm to become 0 and training to effectively stop.

The fix only propagates inference_mode when the new adapter IS the active
adapter (first-time injection). For subsequent adapters, set_adapter is
called without inference_mode, preserving the existing active adapter's
trainability state. The new adapter's own inference_mode is still correctly
handled by the existing code that follows.

This was a regression introduced in commit 13fa0aea (PR #2765).

A regression test is added that verifies adding an adapter with
inference_mode=True does not freeze the existing active adapter.
---
 src/peft/tuners/tuners_utils.py |  8 ++++++-
 tests/test_custom_models.py     | 42 ++++++++++++++++++++++++++++-----
 2 files changed, 43 insertions(+), 7 deletions(-)

diff --git a/src/peft/tuners/tuners_utils.py b/src/peft/tuners/tuners_utils.py
index 7227e869dd..b9518fb2ff 100644
--- a/src/peft/tuners/tuners_utils.py
+++ b/src/peft/tuners/tuners_utils.py
@@ -1043,7 +1043,13 @@ def inject_adapter(
         # It's important to set the adapter here (again), because otherwise it can happen that if a 2nd adapter is
         # added, and it targets different layer(s) than the first adapter (which is active), then those different
         # layers will be activated, which we don't want.
-        self.set_adapter(self.active_adapters, inference_mode=peft_config.inference_mode)
+        # Only pass inference_mode when the new adapter is the active adapter (first-time injection). For subsequent
+        # adapters, preserve the existing active adapter's trainability state — otherwise, adding an adapter with
+        # inference_mode=True would incorrectly freeze the active training adapter.
+        if adapter_name in self.active_adapters:
+            self.set_adapter(self.active_adapters, inference_mode=peft_config.inference_mode)
+        else:
+            self.set_adapter(self.active_adapters)
         self._mark_only_adapters_as_trainable(model)
 
         if self.peft_config[adapter_name].inference_mode:
diff --git a/tests/test_custom_models.py b/tests/test_custom_models.py
index 7a809a6b4c..cdfd361149 100644
--- a/tests/test_custom_models.py
+++ b/tests/test_custom_models.py
@@ -6242,10 +6242,9 @@ def test_loading_model_with_trainble_tokens_requires_grad_set_correctly(self, is
     def test_loading_model_requires_grad_set_correctly_switch_inference_mode(self, config_cls, tmp_path):
         # Same as test_loading_model_requires_grad_set_correctly but this time we first load with is_trainable=False and
         # then with is_trainable=True. Loading the second adapter should not affect the requires_grad of the first
-        # adapter, but it does. The reason is that is_training/inference_mode is taken from the current PEFT config, but
-        # that config does not necessarily belong to the active adapter, creating a mismatch.
-        # When/If this is fixed, the check can be integrated into test_loading_model_requires_grad_set_correctly and
-        # this test can be deleted.
+        # adapter, but it does. The reason is that set_adapter itself always sets requires_grad=True for the active
+        # adapter, which is still coupled to the active adapter selection. A proper fix would require decoupling these
+        # two concerns in set_adapter.
         model = DeepMLP(size=256)  # a size that works with all adapters
         extra_kwargs = {}
         config = config_cls(target_modules=["layers.0.lin0"])
@@ -6273,8 +6272,6 @@ def test_loading_model_requires_grad_load_adapter_then_add_adapter(self, config_
         # When adding a new adapter with model.add_adapter, through the set_adapter call in update_layer, we activate
         # the gradients of the first adapter, even if it's not desired. Since there is no is_trainable argument on
         # add_adapter, there is no way to disable that at the moment.
-        # When/If this is fixed, the check can be integrated into test_loading_model_requires_grad_set_correctly and
-        # this test can be deleted.
         model = DeepMLP(size=256)  # a size that works with all adapters
         extra_kwargs = {}
         config = config_cls(target_modules=["layers.0.lin0"])
@@ -6291,6 +6288,39 @@ def test_loading_model_requires_grad_load_adapter_then_add_adapter(self, config_
         params_with_grad = [n for n, p in model.named_parameters() if p.requires_grad]
         assert all(not p.requires_grad for p in model.parameters())
 
+    @pytest.mark.parametrize("config_cls", [LoraConfig, LoHaConfig, LoKrConfig, IA3Config, OFTConfig, BOFTConfig])
+    def test_inject_adapter_inference_mode_does_not_freeze_active_adapter(self, config_cls, tmp_path):
+        # Regression test for a bug where adding a second adapter with inference_mode=True would incorrectly freeze
+        # the already-active training adapter. This happened because inject_adapter propagated the new adapter's
+        # inference_mode to set_adapter for the existing active adapters.
+        # See PR #XXXX
+        model = DeepMLP(size=256)
+        extra_kwargs = {}
+        if config_cls == IA3Config:
+            extra_kwargs["feedforward_modules"] = []
+        if config_cls in (BOFTConfig, OFTConfig):
+            extra_kwargs["boft_block_size"] = 4
+        config = config_cls(target_modules=["layers.0.lin0"], **extra_kwargs)
+        model = get_peft_model(model, config)
+
+        # Initially, the active (default) adapter should be trainable
+        assert any(p.requires_grad for n, p in model.named_parameters() if ".default" in n)
+
+        # Add a second adapter with inference_mode=True, simulating what happens during load_adapter with
+        # is_trainable=False (e.g. during save_pretrained with path_initial_model_for_weight_conversion)
+        config_inference = config_cls(target_modules=["layers.0.lin0"], inference_mode=True, **extra_kwargs)
+        model.add_adapter("inference_adapter", config_inference)
+
+        # The existing active adapter should remain trainable
+        assert any(p.requires_grad for n, p in model.named_parameters() if ".default" in n), (
+            "Adding an adapter with inference_mode=True should not freeze the active adapter"
+        )
+
+        # The inference adapter should be frozen
+        assert all(not p.requires_grad for n, p in model.named_parameters() if ".inference_adapter" in n), (
+            "The inference adapter's parameters should be frozen"
+        )
+
 
 # this is for PEFT methods that support mixed adapter batches.
 MIXED_ADAPTER_TEST_CASES = [

From b4a21242182bf0f5f170e856b9896c26bea430a2 Mon Sep 17 00:00:00 2001
From: kiritozc <2567192417@qq.com>
Date: Mon, 1 Jun 2026 21:10:39 +0800
Subject: [PATCH 2/2] Address review: simplify test to only LoRA, fix PR
 reference

---
 tests/test_custom_models.py | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/tests/test_custom_models.py b/tests/test_custom_models.py
index cdfd361149..4f918ab264 100644
--- a/tests/test_custom_models.py
+++ b/tests/test_custom_models.py
@@ -6288,19 +6288,13 @@ def test_loading_model_requires_grad_load_adapter_then_add_adapter(self, config_
         params_with_grad = [n for n, p in model.named_parameters() if p.requires_grad]
         assert all(not p.requires_grad for p in model.parameters())
 
-    @pytest.mark.parametrize("config_cls", [LoraConfig, LoHaConfig, LoKrConfig, IA3Config, OFTConfig, BOFTConfig])
-    def test_inject_adapter_inference_mode_does_not_freeze_active_adapter(self, config_cls, tmp_path):
+    def test_inject_adapter_inference_mode_does_not_freeze_active_adapter(self, tmp_path):
         # Regression test for a bug where adding a second adapter with inference_mode=True would incorrectly freeze
         # the already-active training adapter. This happened because inject_adapter propagated the new adapter's
         # inference_mode to set_adapter for the existing active adapters.
-        # See PR #XXXX
+        # See PR #3290
         model = DeepMLP(size=256)
-        extra_kwargs = {}
-        if config_cls == IA3Config:
-            extra_kwargs["feedforward_modules"] = []
-        if config_cls in (BOFTConfig, OFTConfig):
-            extra_kwargs["boft_block_size"] = 4
-        config = config_cls(target_modules=["layers.0.lin0"], **extra_kwargs)
+        config = LoraConfig(target_modules=["layers.0.lin0"])
         model = get_peft_model(model, config)
 
         # Initially, the active (default) adapter should be trainable
@@ -6308,7 +6302,7 @@ def test_inject_adapter_inference_mode_does_not_freeze_active_adapter(self, conf
 
         # Add a second adapter with inference_mode=True, simulating what happens during load_adapter with
         # is_trainable=False (e.g. during save_pretrained with path_initial_model_for_weight_conversion)
-        config_inference = config_cls(target_modules=["layers.0.lin0"], inference_mode=True, **extra_kwargs)
+        config_inference = LoraConfig(target_modules=["layers.0.lin0"], inference_mode=True)
         model.add_adapter("inference_adapter", config_inference)
 
         # The existing active adapter should remain trainable