From 65280831c1beecac246953c827b1367207630b19 Mon Sep 17 00:00:00 2001 From: Kashif Rasul Date: Tue, 12 May 2026 09:21:47 +0000 Subject: [PATCH 1/3] FIX BEFT merge on bias-less layers; RandLoRA save_projection default BEFT: add a zero bias to base layers that have no bias at adapter init time so that merge()/unmerge() work correctly on models like Qwen3.5 whose linear projections have no bias term (attention_bias=False). Previously this raised a ValueError during vLLM weight sync. RandLoRA: change save_projection default from True to False. The random basis (randlora_A / randlora_B) is fully deterministic given projection_prng_key and can always be regenerated on load, so saving it by default inflated checkpoints by ~42 GB on 4B-parameter models. Users who need portability across PyTorch RNG versions can opt in with save_projection=True. --- src/peft/tuners/beft/layer.py | 8 +++++--- src/peft/tuners/randlora/config.py | 8 +++++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/peft/tuners/beft/layer.py b/src/peft/tuners/beft/layer.py index d1288e14b8..d9398c4fa4 100644 --- a/src/peft/tuners/beft/layer.py +++ b/src/peft/tuners/beft/layer.py @@ -45,9 +45,11 @@ def __init__(self, base_layer: nn.Module, **kwargs) -> None: def update_layer(self, adapter_name: str, config: BeftConfig, **kwargs): base_layer = self.get_base_layer() if base_layer.bias is None: - warnings.warn( - "Detected that the base layer has no bias term. " - "Note you cannot merge the BEFT adapter into the base layer." + # Add a zero bias so that merge() can write the BEFT bias into it. + # The zero initialisation means this is a no-op until the adapter is trained. + base_layer.bias = nn.Parameter( + torch.zeros(self.out_features, device=base_layer.weight.device, dtype=base_layer.weight.dtype), + requires_grad=False, ) init_weights = config.init_weights inference_mode = config.inference_mode diff --git a/src/peft/tuners/randlora/config.py b/src/peft/tuners/randlora/config.py index b194b97433..d985304490 100644 --- a/src/peft/tuners/randlora/config.py +++ b/src/peft/tuners/randlora/config.py @@ -103,12 +103,14 @@ class RandLoraConfig(PeftConfig): }, ) save_projection: bool = field( - default=True, + default=False, metadata={ "help": ( "Whether to save the basis_A / basis_B projections in the state dict alongside per layer lambda / " - "gamma weights. This will increase the size of the checkpoint, but guarantee that we can reload " - "the checkpoint on all system configurations." + "gamma weights. When False (default), projections are regenerated deterministically from " + "`projection_prng_key` on load, keeping checkpoints small (only lambda/gamma are saved). " + "Set to True only if you need to load the checkpoint on a system where the projection cannot " + "be reproduced (e.g. different PyTorch version with changed RNG behaviour)." ) }, ) From b36387c27d122593d2ab47d57c43b838dbd21892 Mon Sep 17 00:00:00 2001 From: Kashif Rasul Date: Tue, 12 May 2026 10:09:30 +0000 Subject: [PATCH 2/3] FIX BEFT unload restores bias-less layers to original state Track whether BEFT added a zero bias to a bias-less base layer (_beft_added_bias flag). On unload (merge=False), remove it so num_params is unchanged from the original model. Update the two test_beft_initialization/merge tests to reflect that merge now works on bias-less layers instead of raising. Co-Authored-By: Kashif Rasul --- src/peft/tuners/beft/layer.py | 16 ++++++++++++++++ tests/test_initialization.py | 15 ++++++--------- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/src/peft/tuners/beft/layer.py b/src/peft/tuners/beft/layer.py index d9398c4fa4..7c11dafe00 100644 --- a/src/peft/tuners/beft/layer.py +++ b/src/peft/tuners/beft/layer.py @@ -33,6 +33,7 @@ def __init__(self, base_layer: nn.Module, **kwargs) -> None: # Mark the weight as unmerged self._disable_adapters = False self.merged_adapters = [] + self._beft_added_bias = False base_layer = self.get_base_layer() if isinstance(base_layer, nn.Linear): @@ -51,6 +52,7 @@ def update_layer(self, adapter_name: str, config: BeftConfig, **kwargs): torch.zeros(self.out_features, device=base_layer.weight.device, dtype=base_layer.weight.dtype), requires_grad=False, ) + self._beft_added_bias = True init_weights = config.init_weights inference_mode = config.inference_mode weight = torch.randn((1, self.out_features)) @@ -142,6 +144,20 @@ def unmerge(self) -> None: beft_bias = self.beft_bias[active_adapter].data base_layer.bias.data = (base_layer.bias.data - beft_bias.squeeze()).to(orig_dtype) + def unload_and_optionally_merge_module( + self, + merge: bool, + safe_merge: bool = False, + adapter_names: Optional[list[str]] = None, + ) -> nn.Module: + if merge: + self.merge(safe_merge=safe_merge, adapter_names=adapter_names) + else: + base_layer = self.get_base_layer() + if self._beft_added_bias: + base_layer.bias = None + return self.get_base_layer() + def forward(self, x: torch.Tensor, *args: Any, **kwargs: Any) -> torch.Tensor: if self.disable_adapters: if self.merged: diff --git a/tests/test_initialization.py b/tests/test_initialization.py index 27d9ba16e2..a3840df7e5 100644 --- a/tests/test_initialization.py +++ b/tests/test_initialization.py @@ -2416,26 +2416,23 @@ def forward(self, X): return MLP(bias=bias).to(self.torch_device).eval() - def test_beft_initialization_no_bias_warning(self): + def test_beft_initialization_no_bias(self): model = self.get_model(bias=False) cfg = BeftConfig(target_modules=["lin0"]) - with pytest.warns(UserWarning, match="Note you cannot merge the BEFT adapter into the base layer."): - model = get_peft_model(model, cfg) + model = get_peft_model(model, cfg) - assert model.lin0.base_layer.bias is None + assert model.lin0.base_layer.bias is not None assert "default" in model.lin0.beft_bias - assert model.lin0.get_base_layer().bias is None - def test_beft_merge_no_bias_raises_error(self): + def test_beft_merge_no_bias(self): model = self.get_model(bias=False) cfg = BeftConfig(target_modules=["lin0"]) model = get_peft_model(model, cfg) assert hasattr(model.lin0, "beft_bias") - - with pytest.raises(ValueError, match="Base layer has no bias, cannot merge bias adapter"): - model.merge_and_unload() + merged_model = model.merge_and_unload() + assert merged_model.lin0.bias is not None class TestNoInfiniteRecursionDeepspeed: From 5e4a16d5540fdbffd740de0c891a1ee921e8f218 Mon Sep 17 00:00:00 2001 From: Kashif Rasul Date: Tue, 12 May 2026 11:10:44 +0000 Subject: [PATCH 3/3] FIX BEFT never modifies base layer bias; restore original test expectations Revert the approach of adding a zero bias to bias-less base layers at adapter init time. BEFT's forward() adds beft_bias directly to the output and does not need base_layer.bias to exist; only merge() does. Adding a bias at init changed the model architecture after unload, breaking test_unload_adapter for bias-less models (Llama, Gemma3). Instead, warn at init when the base layer has no bias (merge will not work), and let merge() raise a clear ValueError. unload() is then correct with no special handling needed. Co-Authored-By: Kashif Rasul --- src/peft/tuners/beft/layer.py | 24 +++--------------------- tests/test_initialization.py | 15 +++++++++------ 2 files changed, 12 insertions(+), 27 deletions(-) diff --git a/src/peft/tuners/beft/layer.py b/src/peft/tuners/beft/layer.py index 7c11dafe00..d1288e14b8 100644 --- a/src/peft/tuners/beft/layer.py +++ b/src/peft/tuners/beft/layer.py @@ -33,7 +33,6 @@ def __init__(self, base_layer: nn.Module, **kwargs) -> None: # Mark the weight as unmerged self._disable_adapters = False self.merged_adapters = [] - self._beft_added_bias = False base_layer = self.get_base_layer() if isinstance(base_layer, nn.Linear): @@ -46,13 +45,10 @@ def __init__(self, base_layer: nn.Module, **kwargs) -> None: def update_layer(self, adapter_name: str, config: BeftConfig, **kwargs): base_layer = self.get_base_layer() if base_layer.bias is None: - # Add a zero bias so that merge() can write the BEFT bias into it. - # The zero initialisation means this is a no-op until the adapter is trained. - base_layer.bias = nn.Parameter( - torch.zeros(self.out_features, device=base_layer.weight.device, dtype=base_layer.weight.dtype), - requires_grad=False, + warnings.warn( + "Detected that the base layer has no bias term. " + "Note you cannot merge the BEFT adapter into the base layer." ) - self._beft_added_bias = True init_weights = config.init_weights inference_mode = config.inference_mode weight = torch.randn((1, self.out_features)) @@ -144,20 +140,6 @@ def unmerge(self) -> None: beft_bias = self.beft_bias[active_adapter].data base_layer.bias.data = (base_layer.bias.data - beft_bias.squeeze()).to(orig_dtype) - def unload_and_optionally_merge_module( - self, - merge: bool, - safe_merge: bool = False, - adapter_names: Optional[list[str]] = None, - ) -> nn.Module: - if merge: - self.merge(safe_merge=safe_merge, adapter_names=adapter_names) - else: - base_layer = self.get_base_layer() - if self._beft_added_bias: - base_layer.bias = None - return self.get_base_layer() - def forward(self, x: torch.Tensor, *args: Any, **kwargs: Any) -> torch.Tensor: if self.disable_adapters: if self.merged: diff --git a/tests/test_initialization.py b/tests/test_initialization.py index a3840df7e5..27d9ba16e2 100644 --- a/tests/test_initialization.py +++ b/tests/test_initialization.py @@ -2416,23 +2416,26 @@ def forward(self, X): return MLP(bias=bias).to(self.torch_device).eval() - def test_beft_initialization_no_bias(self): + def test_beft_initialization_no_bias_warning(self): model = self.get_model(bias=False) cfg = BeftConfig(target_modules=["lin0"]) - model = get_peft_model(model, cfg) + with pytest.warns(UserWarning, match="Note you cannot merge the BEFT adapter into the base layer."): + model = get_peft_model(model, cfg) - assert model.lin0.base_layer.bias is not None + assert model.lin0.base_layer.bias is None assert "default" in model.lin0.beft_bias + assert model.lin0.get_base_layer().bias is None - def test_beft_merge_no_bias(self): + def test_beft_merge_no_bias_raises_error(self): model = self.get_model(bias=False) cfg = BeftConfig(target_modules=["lin0"]) model = get_peft_model(model, cfg) assert hasattr(model.lin0, "beft_bias") - merged_model = model.merge_and_unload() - assert merged_model.lin0.bias is not None + + with pytest.raises(ValueError, match="Base layer has no bias, cannot merge bias adapter"): + model.merge_and_unload() class TestNoInfiniteRecursionDeepspeed: