From e055b0900cc20cbe23ba62bdee3cc69a7f9e1c80 Mon Sep 17 00:00:00 2001 From: kiritozc <2567192417@qq.com> Date: Sat, 30 May 2026 15:13:37 +0800 Subject: [PATCH 1/2] FIX: inject_adapter no longer propagates inference_mode to existing active adapters When injecting a new adapter via inject_adapter, the housekeeping section called set_adapter(self.active_adapters, inference_mode=peft_config.inference_mode). Here peft_config belongs to the newly injected adapter, but self.active_adapters points to the existing active adapter(s). When the new adapter has inference_mode=True (e.g. during save_pretrained with path_initial_model_for_weight_conversion in PiSSA/OLoRA/CorDA workflows), this erroneously freezes the already-active training adapter, causing grad_norm to become 0 and training to effectively stop. The fix only propagates inference_mode when the new adapter IS the active adapter (first-time injection). For subsequent adapters, set_adapter is called without inference_mode, preserving the existing active adapter's trainability state. The new adapter's own inference_mode is still correctly handled by the existing code that follows. This was a regression introduced in commit 13fa0aea (PR #2765). A regression test is added that verifies adding an adapter with inference_mode=True does not freeze the existing active adapter. --- src/peft/tuners/tuners_utils.py | 8 ++++++- tests/test_custom_models.py | 42 ++++++++++++++++++++++++++++----- 2 files changed, 43 insertions(+), 7 deletions(-) diff --git a/src/peft/tuners/tuners_utils.py b/src/peft/tuners/tuners_utils.py index 7227e869dd..b9518fb2ff 100644 --- a/src/peft/tuners/tuners_utils.py +++ b/src/peft/tuners/tuners_utils.py @@ -1043,7 +1043,13 @@ def inject_adapter( # It's important to set the adapter here (again), because otherwise it can happen that if a 2nd adapter is # added, and it targets different layer(s) than the first adapter (which is active), then those different # layers will be activated, which we don't want. - self.set_adapter(self.active_adapters, inference_mode=peft_config.inference_mode) + # Only pass inference_mode when the new adapter is the active adapter (first-time injection). For subsequent + # adapters, preserve the existing active adapter's trainability state — otherwise, adding an adapter with + # inference_mode=True would incorrectly freeze the active training adapter. + if adapter_name in self.active_adapters: + self.set_adapter(self.active_adapters, inference_mode=peft_config.inference_mode) + else: + self.set_adapter(self.active_adapters) self._mark_only_adapters_as_trainable(model) if self.peft_config[adapter_name].inference_mode: diff --git a/tests/test_custom_models.py b/tests/test_custom_models.py index 7a809a6b4c..cdfd361149 100644 --- a/tests/test_custom_models.py +++ b/tests/test_custom_models.py @@ -6242,10 +6242,9 @@ def test_loading_model_with_trainble_tokens_requires_grad_set_correctly(self, is def test_loading_model_requires_grad_set_correctly_switch_inference_mode(self, config_cls, tmp_path): # Same as test_loading_model_requires_grad_set_correctly but this time we first load with is_trainable=False and # then with is_trainable=True. Loading the second adapter should not affect the requires_grad of the first - # adapter, but it does. The reason is that is_training/inference_mode is taken from the current PEFT config, but - # that config does not necessarily belong to the active adapter, creating a mismatch. - # When/If this is fixed, the check can be integrated into test_loading_model_requires_grad_set_correctly and - # this test can be deleted. + # adapter, but it does. The reason is that set_adapter itself always sets requires_grad=True for the active + # adapter, which is still coupled to the active adapter selection. A proper fix would require decoupling these + # two concerns in set_adapter. model = DeepMLP(size=256) # a size that works with all adapters extra_kwargs = {} config = config_cls(target_modules=["layers.0.lin0"]) @@ -6273,8 +6272,6 @@ def test_loading_model_requires_grad_load_adapter_then_add_adapter(self, config_ # When adding a new adapter with model.add_adapter, through the set_adapter call in update_layer, we activate # the gradients of the first adapter, even if it's not desired. Since there is no is_trainable argument on # add_adapter, there is no way to disable that at the moment. - # When/If this is fixed, the check can be integrated into test_loading_model_requires_grad_set_correctly and - # this test can be deleted. model = DeepMLP(size=256) # a size that works with all adapters extra_kwargs = {} config = config_cls(target_modules=["layers.0.lin0"]) @@ -6291,6 +6288,39 @@ def test_loading_model_requires_grad_load_adapter_then_add_adapter(self, config_ params_with_grad = [n for n, p in model.named_parameters() if p.requires_grad] assert all(not p.requires_grad for p in model.parameters()) + @pytest.mark.parametrize("config_cls", [LoraConfig, LoHaConfig, LoKrConfig, IA3Config, OFTConfig, BOFTConfig]) + def test_inject_adapter_inference_mode_does_not_freeze_active_adapter(self, config_cls, tmp_path): + # Regression test for a bug where adding a second adapter with inference_mode=True would incorrectly freeze + # the already-active training adapter. This happened because inject_adapter propagated the new adapter's + # inference_mode to set_adapter for the existing active adapters. + # See PR #XXXX + model = DeepMLP(size=256) + extra_kwargs = {} + if config_cls == IA3Config: + extra_kwargs["feedforward_modules"] = [] + if config_cls in (BOFTConfig, OFTConfig): + extra_kwargs["boft_block_size"] = 4 + config = config_cls(target_modules=["layers.0.lin0"], **extra_kwargs) + model = get_peft_model(model, config) + + # Initially, the active (default) adapter should be trainable + assert any(p.requires_grad for n, p in model.named_parameters() if ".default" in n) + + # Add a second adapter with inference_mode=True, simulating what happens during load_adapter with + # is_trainable=False (e.g. during save_pretrained with path_initial_model_for_weight_conversion) + config_inference = config_cls(target_modules=["layers.0.lin0"], inference_mode=True, **extra_kwargs) + model.add_adapter("inference_adapter", config_inference) + + # The existing active adapter should remain trainable + assert any(p.requires_grad for n, p in model.named_parameters() if ".default" in n), ( + "Adding an adapter with inference_mode=True should not freeze the active adapter" + ) + + # The inference adapter should be frozen + assert all(not p.requires_grad for n, p in model.named_parameters() if ".inference_adapter" in n), ( + "The inference adapter's parameters should be frozen" + ) + # this is for PEFT methods that support mixed adapter batches. MIXED_ADAPTER_TEST_CASES = [ From b4a21242182bf0f5f170e856b9896c26bea430a2 Mon Sep 17 00:00:00 2001 From: kiritozc <2567192417@qq.com> Date: Mon, 1 Jun 2026 21:10:39 +0800 Subject: [PATCH 2/2] Address review: simplify test to only LoRA, fix PR reference --- tests/test_custom_models.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/tests/test_custom_models.py b/tests/test_custom_models.py index cdfd361149..4f918ab264 100644 --- a/tests/test_custom_models.py +++ b/tests/test_custom_models.py @@ -6288,19 +6288,13 @@ def test_loading_model_requires_grad_load_adapter_then_add_adapter(self, config_ params_with_grad = [n for n, p in model.named_parameters() if p.requires_grad] assert all(not p.requires_grad for p in model.parameters()) - @pytest.mark.parametrize("config_cls", [LoraConfig, LoHaConfig, LoKrConfig, IA3Config, OFTConfig, BOFTConfig]) - def test_inject_adapter_inference_mode_does_not_freeze_active_adapter(self, config_cls, tmp_path): + def test_inject_adapter_inference_mode_does_not_freeze_active_adapter(self, tmp_path): # Regression test for a bug where adding a second adapter with inference_mode=True would incorrectly freeze # the already-active training adapter. This happened because inject_adapter propagated the new adapter's # inference_mode to set_adapter for the existing active adapters. - # See PR #XXXX + # See PR #3290 model = DeepMLP(size=256) - extra_kwargs = {} - if config_cls == IA3Config: - extra_kwargs["feedforward_modules"] = [] - if config_cls in (BOFTConfig, OFTConfig): - extra_kwargs["boft_block_size"] = 4 - config = config_cls(target_modules=["layers.0.lin0"], **extra_kwargs) + config = LoraConfig(target_modules=["layers.0.lin0"]) model = get_peft_model(model, config) # Initially, the active (default) adapter should be trainable @@ -6308,7 +6302,7 @@ def test_inject_adapter_inference_mode_does_not_freeze_active_adapter(self, conf # Add a second adapter with inference_mode=True, simulating what happens during load_adapter with # is_trainable=False (e.g. during save_pretrained with path_initial_model_for_weight_conversion) - config_inference = config_cls(target_modules=["layers.0.lin0"], inference_mode=True, **extra_kwargs) + config_inference = LoraConfig(target_modules=["layers.0.lin0"], inference_mode=True) model.add_adapter("inference_adapter", config_inference) # The existing active adapter should remain trainable