Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion src/peft/tuners/tuners_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1043,7 +1043,13 @@ def inject_adapter(
# It's important to set the adapter here (again), because otherwise it can happen that if a 2nd adapter is
# added, and it targets different layer(s) than the first adapter (which is active), then those different
# layers will be activated, which we don't want.
self.set_adapter(self.active_adapters, inference_mode=peft_config.inference_mode)
# Only pass inference_mode when the new adapter is the active adapter (first-time injection). For subsequent
# adapters, preserve the existing active adapter's trainability state — otherwise, adding an adapter with
# inference_mode=True would incorrectly freeze the active training adapter.
if adapter_name in self.active_adapters:
self.set_adapter(self.active_adapters, inference_mode=peft_config.inference_mode)
else:
self.set_adapter(self.active_adapters)
self._mark_only_adapters_as_trainable(model)

if self.peft_config[adapter_name].inference_mode:
Expand Down
36 changes: 30 additions & 6 deletions tests/test_custom_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -6242,10 +6242,9 @@ def test_loading_model_with_trainble_tokens_requires_grad_set_correctly(self, is
def test_loading_model_requires_grad_set_correctly_switch_inference_mode(self, config_cls, tmp_path):
# Same as test_loading_model_requires_grad_set_correctly but this time we first load with is_trainable=False and
# then with is_trainable=True. Loading the second adapter should not affect the requires_grad of the first
# adapter, but it does. The reason is that is_training/inference_mode is taken from the current PEFT config, but
# that config does not necessarily belong to the active adapter, creating a mismatch.
# When/If this is fixed, the check can be integrated into test_loading_model_requires_grad_set_correctly and
# this test can be deleted.
# adapter, but it does. The reason is that set_adapter itself always sets requires_grad=True for the active
# adapter, which is still coupled to the active adapter selection. A proper fix would require decoupling these
# two concerns in set_adapter.
model = DeepMLP(size=256) # a size that works with all adapters
extra_kwargs = {}
config = config_cls(target_modules=["layers.0.lin0"])
Expand Down Expand Up @@ -6273,8 +6272,6 @@ def test_loading_model_requires_grad_load_adapter_then_add_adapter(self, config_
# When adding a new adapter with model.add_adapter, through the set_adapter call in update_layer, we activate
# the gradients of the first adapter, even if it's not desired. Since there is no is_trainable argument on
# add_adapter, there is no way to disable that at the moment.
# When/If this is fixed, the check can be integrated into test_loading_model_requires_grad_set_correctly and
# this test can be deleted.
model = DeepMLP(size=256) # a size that works with all adapters
extra_kwargs = {}
config = config_cls(target_modules=["layers.0.lin0"])
Expand All @@ -6291,6 +6288,33 @@ def test_loading_model_requires_grad_load_adapter_then_add_adapter(self, config_
params_with_grad = [n for n, p in model.named_parameters() if p.requires_grad]
assert all(not p.requires_grad for p in model.parameters())

def test_inject_adapter_inference_mode_does_not_freeze_active_adapter(self, tmp_path):
# Regression test for a bug where adding a second adapter with inference_mode=True would incorrectly freeze
# the already-active training adapter. This happened because inject_adapter propagated the new adapter's
# inference_mode to set_adapter for the existing active adapters.
# See PR #3290
model = DeepMLP(size=256)
config = LoraConfig(target_modules=["layers.0.lin0"])
model = get_peft_model(model, config)

# Initially, the active (default) adapter should be trainable
assert any(p.requires_grad for n, p in model.named_parameters() if ".default" in n)

# Add a second adapter with inference_mode=True, simulating what happens during load_adapter with
# is_trainable=False (e.g. during save_pretrained with path_initial_model_for_weight_conversion)
config_inference = LoraConfig(target_modules=["layers.0.lin0"], inference_mode=True)
model.add_adapter("inference_adapter", config_inference)

# The existing active adapter should remain trainable
assert any(p.requires_grad for n, p in model.named_parameters() if ".default" in n), (
"Adding an adapter with inference_mode=True should not freeze the active adapter"
)

# The inference adapter should be frozen
assert all(not p.requires_grad for n, p in model.named_parameters() if ".inference_adapter" in n), (
"The inference adapter's parameters should be frozen"
)


# this is for PEFT methods that support mixed adapter batches.
MIXED_ADAPTER_TEST_CASES = [
Expand Down
Loading