From 88ccfaa0183a837d63f88f410629822a9d0b4648 Mon Sep 17 00:00:00 2001
From: Bane-Elvin <gawan@buaa.edu.cn>
Date: Thu, 21 May 2026 16:19:26 +0000
Subject: [PATCH 1/9] Add FRoD tuner

---
 docs/source/_toctree.yml              |   2 +
 docs/source/package_reference/frod.md |  38 ++++
 src/peft/__init__.py                  |   4 +
 src/peft/tuners/__init__.py           |   3 +
 src/peft/tuners/frod/__init__.py      |  10 +
 src/peft/tuners/frod/config.py        | 146 +++++++++++++
 src/peft/tuners/frod/layer.py         | 303 ++++++++++++++++++++++++++
 src/peft/tuners/frod/model.py         | 276 +++++++++++++++++++++++
 src/peft/utils/__init__.py            |   2 +
 src/peft/utils/constants.py           |   2 +
 src/peft/utils/other.py               |   2 +
 src/peft/utils/peft_types.py          |   2 +
 src/peft/utils/save_and_load.py       |  41 ++++
 tests/test_custom_models.py           |  16 +-
 14 files changed, 845 insertions(+), 2 deletions(-)
 create mode 100644 docs/source/package_reference/frod.md
 create mode 100644 src/peft/tuners/frod/__init__.py
 create mode 100644 src/peft/tuners/frod/config.py
 create mode 100644 src/peft/tuners/frod/layer.py
 create mode 100644 src/peft/tuners/frod/model.py

diff --git a/docs/source/_toctree.yml b/docs/source/_toctree.yml
index a9db3fa19a..de928e9d6e 100644
--- a/docs/source/_toctree.yml
+++ b/docs/source/_toctree.yml
@@ -124,6 +124,8 @@
       title: PVeRA
     - local: package_reference/fourierft
       title: FourierFT
+    - local: package_reference/frod
+      title: FRoD
     - local: package_reference/gralora
       title: GraLoRA
     - local: package_reference/vblora
diff --git a/docs/source/package_reference/frod.md b/docs/source/package_reference/frod.md
new file mode 100644
index 0000000000..7fceb6fa41
--- /dev/null
+++ b/docs/source/package_reference/frod.md
@@ -0,0 +1,38 @@
+<!--Copyright 2026 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+
+⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
+rendered properly in your Markdown viewer.
+
+-->
+
+# FRoD: Full-Rank Efficient Fine-Tuning with Rotational Degrees
+
+FRoD is a parameter-efficient fine-tuning method that combines a shared full-rank basis with sparse learnable
+rotational degrees. The adapter update is expressed through fixed projection tensors and trainable coefficients, which
+allows FRoD to apply full-rank updates while keeping the number of trained parameters small.
+
+When saving the adapter parameters, it is possible to avoid storing the projection tensors by setting
+`save_projection=False` on the `FRODConfig`. In that case, the projections are restored from the base model weights and
+the fixed random seed from `projection_prng_key`. This reduces checkpoint size, but the default is
+`save_projection=True` to make checkpoint loading independent of regeneration details.
+
+FRoD currently has the following constraint:
+
+- Only `nn.Linear` and `transformers.pytorch_utils.Conv1D` layers are supported.
+
+## FRODConfig
+
+[[autodoc]] tuners.frod.config.FRODConfig
+
+## FRODModel
+
+[[autodoc]] tuners.frod.model.FRODModel
diff --git a/src/peft/__init__.py b/src/peft/__init__.py
index ec12d52583..df185d9d76 100644
--- a/src/peft/__init__.py
+++ b/src/peft/__init__.py
@@ -70,6 +70,8 @@
     EvaConfig,
     FourierFTConfig,
     FourierFTModel,
+    FRODConfig,
+    FRODModel,
     GraloraConfig,
     GraloraModel,
     HiraConfig,
@@ -198,6 +200,8 @@
     "DeloraConfig",
     "DeloraModel",
     "EvaConfig",
+    "FRODConfig",
+    "FRODModel",
     "FourierFTConfig",
     "FourierFTModel",
     "GraloraConfig",
diff --git a/src/peft/tuners/__init__.py b/src/peft/tuners/__init__.py
index 4900a71aa8..9ff82ea725 100644
--- a/src/peft/tuners/__init__.py
+++ b/src/peft/tuners/__init__.py
@@ -22,6 +22,7 @@
 from .cpt import CPTConfig, CPTEmbedding
 from .delora import DeloraConfig, DeloraModel
 from .fourierft import FourierFTConfig, FourierFTModel
+from .frod import FRODConfig, FRODModel
 from .gralora import GraloraConfig, GraloraModel
 from .hira import HiraConfig, HiraModel
 from .hra import HRAConfig, HRAModel
@@ -93,6 +94,8 @@
     "DeloraConfig",
     "DeloraModel",
     "EvaConfig",
+    "FRODConfig",
+    "FRODModel",
     "FourierFTConfig",
     "FourierFTModel",
     "GraloraConfig",
diff --git a/src/peft/tuners/frod/__init__.py b/src/peft/tuners/frod/__init__.py
new file mode 100644
index 0000000000..3be7152c07
--- /dev/null
+++ b/src/peft/tuners/frod/__init__.py
@@ -0,0 +1,10 @@
+from peft.utils import register_peft_method
+
+from .config import FRODConfig
+from .layer import FRODLayer, Linear
+from .model import FRODModel
+
+
+__all__ = ["FRODConfig", "FRODLayer", "FRODModel", "Linear"]
+
+register_peft_method(name="frod", config_cls=FRODConfig, model_cls=FRODModel, prefix="frod_")
diff --git a/src/peft/tuners/frod/config.py b/src/peft/tuners/frod/config.py
new file mode 100644
index 0000000000..354c1279b4
--- /dev/null
+++ b/src/peft/tuners/frod/config.py
@@ -0,0 +1,146 @@
+# Copyright 2023-present the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Optional, Union
+
+from peft.config import PeftConfig
+from peft.utils import PeftType
+
+
+@dataclass
+class FRODConfig(PeftConfig):
+    """
+    This is the configuration class to store the configuration of a [`FRODModel`].
+
+    Paper: https://doi.org/10.1609/aaai.v40i31.39813.
+
+    Args:
+        target_modules (`Union[List[str], str]`):
+            The names of the modules to apply FRoD to. Only linear layers are supported.
+        projection_prng_key (`int`):
+            Random seed used when initializing the sparse FRoD COO pattern.
+        save_projection (`bool`):
+            Whether to save the FRoD projection tensors in the state dict. This increases checkpoint size but makes
+            adapter reloading independent of local cache regeneration. Defaults to `True`.
+        frod_dropout (`float`):
+            The dropout probability for FRoD layers.
+        fan_in_fan_out (`bool`):
+            Set this to True if the layer to replace stores weight like (fan_in, fan_out). For example, gpt-2 uses
+            `Conv1D` which stores weights like (fan_in, fan_out) and hence this should be set to `True`.
+        bias (`str`):
+            Bias type for FRoD. Can be 'none', 'all' or 'frod_only'. If 'all' or 'frod_only', the corresponding biases
+            will be updated during training. Be aware that this means that, even when disabling the adapters, the model
+            will not produce the same output as the base model would have without adaptation.
+        modules_to_save (`List[str]`):
+            List of modules apart from FRoD layers to be set as trainable and saved in the final checkpoint.
+        init_weights (`bool`):
+            Whether to initialize the weights of the FRoD layers with their default initialization. Don't change this
+            setting, except if you know exactly what you're doing.
+        layers_to_transform (`Union[List[int],int]`):
+            The layer indexes to transform, if this argument is specified, it will apply the FRoD transformations on
+            the layer indexes that are specified in this list. If a single integer is passed, it will apply the FRoD
+            transformations on the layer at this index.
+        layers_pattern (`Optional[Union[List[str], str]]`):
+            The layer pattern name, used only if `layers_to_transform` is different from `None`. This should target the
+            `nn.ModuleList` of the model, which is often called `'layers'` or `'h'`.
+    """
+
+    target_modules: Optional[Union[list[str], str]] = field(
+        default=None,
+        metadata={
+            "help": (
+                "List of module names or regex expression of the module names to replace with FRoD."
+                "For example, ['q', 'v'] or '.*decoder.*(SelfAttention|EncDecAttention).*(q|v)$'. "
+                "Only linear layers are supported."
+            )
+        },
+    )
+    projection_prng_key: int = field(
+        default=0,
+        metadata={"help": "Random seed used when initializing the FRoD sparse COO structure."},
+    )
+    save_projection: bool = field(
+        default=True,
+        metadata={
+            "help": (
+                "Whether to save the FRoD projection tensors in the state dict. This increases checkpoint size but "
+                "guarantees that we can reload the adapter on all system configurations."
+            )
+        },
+    )
+    frod_dropout: float = field(default=0.0, metadata={"help": "Dropout in the FRoD adapter layers"})
+    fan_in_fan_out: bool = field(
+        default=False,
+        metadata={"help": "Set this to True if the layer to replace stores weight like (fan_in, fan_out)"},
+    )
+    bias: str = field(default="none", metadata={"help": "Bias type for FRoD. Can be 'none', 'all' or 'frod_only'"})
+    modules_to_save: Optional[list[str]] = field(
+        default=None,
+        metadata={
+            "help": (
+                "List of modules apart from FRoD layers to be set as trainable and saved in the final checkpoint. For"
+                " example, in Sequence Classification or Token Classification tasks, the final layer"
+                " `classifier/score` are randomly initialized and as such need to be trainable and saved."
+            )
+        },
+    )
+    init_weights: bool = field(
+        default=True,
+        metadata={
+            "help": (
+                "Whether to initialize the weights of the FRoD layers with their default initialization. Don't change "
+                "this setting, except if you know exactly what you're doing."
+            ),
+        },
+    )
+    layers_to_transform: Optional[Union[list[int], int]] = field(
+        default=None,
+        metadata={
+            "help": (
+                "The layer indexes to transform, is this argument is specified, PEFT will transform only the layers"
+                " indexes that are specified inside this list. If a single integer is passed, PEFT will transform only"
+                " the layer at this index."
+            )
+        },
+    )
+    layers_pattern: Optional[Union[list[str], str]] = field(
+        default=None,
+        metadata={
+            "help": (
+                "The layer pattern name, used only if `layers_to_transform` is different to None and if the layer "
+                "pattern is not in the common layers pattern. This should target the `nn.ModuleList` of the "
+                "model, which is often called `'layers'` or `'h'`."
+            )
+        },
+    )
+    sparse_rate: float = field(default=0.01, metadata={"help": "Sparse rate"})
+    regularization_alpha: float = field(
+        default=1e-3,
+        metadata={
+            "help": ("Regularization parameter used when building the shared FRoD basis."),
+        },
+    )
+
+    def __post_init__(self):
+        self.peft_type = PeftType.FROD
+        self.target_modules = (
+            set(self.target_modules) if isinstance(self.target_modules, list) else self.target_modules
+        )
+        # check for layers_to_transform and layers_pattern
+        if self.layers_pattern and not self.layers_to_transform:
+            raise ValueError("When `layers_pattern` is specified, `layers_to_transform` must also be specified. ")
+        if self.sparse_rate < 0 or self.sparse_rate > 1:
+            raise ValueError(f"`sparse_rate` should be between 0 and 1, got {self.sparse_rate}.")
diff --git a/src/peft/tuners/frod/layer.py b/src/peft/tuners/frod/layer.py
new file mode 100644
index 0000000000..edaf2ea2ba
--- /dev/null
+++ b/src/peft/tuners/frod/layer.py
@@ -0,0 +1,303 @@
+# Copyright 2023-present the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import warnings
+from typing import Optional
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+from numpy.linalg import inv
+from torch import nn
+from transformers.pytorch_utils import Conv1D
+
+from peft.tuners.tuners_utils import BaseTunerLayer, check_adapters_to_merge
+from peft.utils.other import transpose
+
+from .._buffer_dict import BufferDict
+
+
+class FRODLayer(BaseTunerLayer):
+    adapter_layer_names = ("frod_lambda_s_values", "frod_lambda_l")
+    other_param_names = ("frod_V", "frod_U", "frod_s_indices", "frod_s_size")
+
+    def __init__(self, base_layer: nn.Module, **kwargs):
+        self.base_layer = base_layer
+        self.r = {}
+        self.frod_dropout = nn.ModuleDict({})
+
+        # Sparse S is parameterized by its COO values only.
+        self.frod_lambda_s_values = nn.ParameterDict({})
+        self.frod_lambda_l = nn.ParameterDict({})
+
+        self.frod_s_indices: Optional[BufferDict] = None
+        self.frod_s_size: Optional[BufferDict] = None
+        self.frod_V: Optional[BufferDict] = None
+        self.frod_U: BufferDict = BufferDict({}, persistent=False)
+
+        self._disable_adapters = False
+        self.merged_adapters = []
+
+        base_layer = self.get_base_layer()
+        if isinstance(base_layer, nn.Linear):
+            in_features, out_features = base_layer.in_features, base_layer.out_features
+        elif isinstance(base_layer, Conv1D):
+            in_features, out_features = (
+                base_layer.weight.ds_shape if hasattr(base_layer.weight, "ds_shape") else base_layer.weight.shape
+            )
+
+        self.in_features = in_features
+        self.out_features = out_features
+        self.kwargs = kwargs
+
+    @property
+    def merged(self) -> bool:
+        return bool(self.merged_adapters)
+
+    def update_layer(
+        self,
+        adapter_name,
+        frod_V: BufferDict,
+        frod_s_indices: BufferDict,
+        frod_s_size: BufferDict,
+        frod_dropout,
+        init_weights,
+    ):
+        weight = self.get_base_layer().weight
+        device = weight.device
+        dtype = weight.dtype
+
+        param_dtype = dtype
+        if device.type == "cuda" and dtype == torch.float32:
+            param_dtype = torch.float16
+
+        self.r[adapter_name] = self.out_features
+        if frod_dropout > 0.0:
+            frod_dropout_layer = nn.Dropout(p=frod_dropout)
+        else:
+            frod_dropout_layer = nn.Identity()
+
+        self.frod_dropout.update(nn.ModuleDict({adapter_name: frod_dropout_layer}))
+
+        if adapter_name not in frod_V:
+            if not frod_V:
+                raise ValueError("The FRoD projection buffers are empty. This should not happen.")
+            frod_V[adapter_name] = next(iter(frod_V.values()))
+            frod_s_indices[adapter_name] = next(iter(frod_s_indices.values()))
+            frod_s_size[adapter_name] = next(iter(frod_s_size.values()))
+
+        nnz = frod_s_indices[adapter_name].shape[1]
+        self.frod_lambda_s_values[adapter_name] = nn.Parameter(torch.zeros(nnz, device=device, dtype=param_dtype))
+
+        self.__dict__["frod_V"] = frod_V
+        self.__dict__["frod_s_indices"] = frod_s_indices
+        self.__dict__["frod_s_size"] = frod_s_size
+
+        # Keep cached projections on CPU and move them lazily in forward.
+        self.frod_V[adapter_name] = self.frod_V[adapter_name].to(dtype=param_dtype, device="cpu")
+        self.frod_s_indices[adapter_name] = self.frod_s_indices[adapter_name].to(device="cpu", dtype=torch.long)
+        self.frod_s_size[adapter_name] = self.frod_s_size[adapter_name].to(device="cpu", dtype=torch.long)
+
+        U, L = self._calculate_frod_u_and_lambda(self.frod_V[adapter_name], weight)
+        U = U.to(param_dtype)
+        L = L.to(device=device, dtype=param_dtype)
+        self.frod_lambda_l[adapter_name] = nn.Parameter(L, requires_grad=True)
+        if init_weights:
+            self.reset_frod_parameters(adapter_name)
+
+        self.frod_U[adapter_name] = U.cpu()
+        self._move_adapter_to_device_of_base_layer(adapter_name)
+        self.set_adapter(self.active_adapters)
+
+    def _calculate_frod_u_and_lambda(self, V, W):
+        w = W.detach().to(torch.float32).cpu().numpy()
+        v = V.detach().to(torch.float32).cpu().numpy()
+        try:
+            v_inv_T = inv(v).T
+        except np.linalg.LinAlgError:
+            v_inv_T = np.linalg.pinv(v, rcond=1e-6).T
+        Bi = w @ v_inv_T
+        lambda_l = np.linalg.norm(Bi, axis=0)
+        u = np.divide(Bi, lambda_l, out=np.zeros_like(Bi), where=lambda_l > 1e-8)
+        U = torch.from_numpy(u).float()
+        L = torch.from_numpy(lambda_l).float()
+        return U, L
+
+    def reset_frod_parameters(self, adapter_name):
+        if adapter_name in self.frod_lambda_s_values:
+            with torch.no_grad():
+                nn.init.zeros_(self.frod_lambda_s_values[adapter_name])
+        if adapter_name in self.frod_lambda_l:
+            with torch.no_grad():
+                nn.init.zeros_(self.frod_lambda_l[adapter_name])
+
+
+class Linear(nn.Linear, FRODLayer):
+    def __init__(
+        self,
+        base_layer,
+        frod_V: BufferDict,
+        frod_s_indices: BufferDict,
+        frod_s_size: BufferDict,
+        adapter_name: str,
+        frod_dropout: float = 0.0,
+        fan_in_fan_out: bool = False,
+        is_target_conv_1d_layer: bool = False,
+        init_weights: bool = True,
+        **kwargs,
+    ) -> None:
+        super(nn.Linear, self).__init__()
+        FRODLayer.__init__(self, base_layer, **kwargs)
+        self.fan_in_fan_out = fan_in_fan_out
+
+        self._active_adapter = adapter_name
+        self.update_layer(adapter_name, frod_V, frod_s_indices, frod_s_size, frod_dropout, init_weights)
+        self.is_target_conv_1d_layer = is_target_conv_1d_layer
+
+    def merge(self, safe_merge: bool = False, adapter_names: Optional[list[str]] = None) -> None:
+        adapter_names = check_adapters_to_merge(self, adapter_names)
+        if not adapter_names:
+            return
+
+        for active_adapter in adapter_names:
+            if active_adapter in self.frod_lambda_l.keys():
+                base_layer = self.get_base_layer()
+                if safe_merge:
+                    orig_weights = base_layer.weight.data.clone()
+                    orig_weights += self.get_delta_weight(active_adapter)
+                    if not torch.isfinite(orig_weights).all():
+                        raise ValueError(
+                            f"NaNs detected in the merged weights. The adapter {active_adapter} seems to be broken"
+                        )
+                    base_layer.weight.data = orig_weights
+                else:
+                    base_layer.weight.data += self.get_delta_weight(active_adapter)
+                self.merged_adapters.append(active_adapter)
+
+    def unmerge(self) -> None:
+        if not self.merged:
+            warnings.warn("Already unmerged. Nothing to do.")
+            return
+
+        while len(self.merged_adapters) > 0:
+            active_adapter = self.merged_adapters.pop()
+            if active_adapter in self.frod_lambda_l.keys():
+                self.get_base_layer().weight.data -= self.get_delta_weight(active_adapter)
+
+    def get_delta_weight(self, adapter) -> torch.Tensor:
+        weight = self.get_base_layer().weight
+        device = weight.device
+        dtype = weight.dtype
+        U = self.frod_U[adapter].to(device=device, dtype=dtype)
+        V = self.frod_V[adapter].to(device=device, dtype=dtype)
+        indices = self.frod_s_indices[adapter].to(device=U.device, dtype=torch.long)
+        size_tensor = self.frod_s_size[adapter]
+        if isinstance(size_tensor, torch.Tensor):
+            size = tuple(int(dim) for dim in size_tensor.tolist())
+        else:
+            size = tuple(int(dim) for dim in size_tensor)
+        values = self.frod_lambda_s_values[adapter].to(U.device, U.dtype).clone()
+        lambda_l = self.frod_lambda_l[adapter].to(device=U.device, dtype=U.dtype)
+
+        S_sparse = torch.sparse_coo_tensor(indices, values, size).coalesce()
+        S = S_sparse.to_dense()
+        L = torch.diag_embed(lambda_l)
+
+        return transpose(U @ (S + L).T @ V.T, self.fan_in_fan_out)
+
+    def forward(self, x: torch.Tensor, *args, **kwargs) -> torch.Tensor:
+        previous_dtype = x.dtype
+
+        if self.disable_adapters:
+            if self.merged:
+                self.unmerge()
+            result = self.base_layer(x, *args, **kwargs)
+        elif self.merged:
+            result = self.base_layer(x, *args, **kwargs)
+        else:
+            result = self.base_layer(x, *args, **kwargs)
+            for active_adapter in self.active_adapters:
+                if active_adapter not in self.frod_lambda_s_values:
+                    continue
+
+                target_dtype = x.dtype
+                V = self.frod_V[active_adapter].to(device=x.device, dtype=target_dtype)
+                U = self.frod_U[active_adapter].to(device=x.device, dtype=target_dtype)
+                indices = self.frod_s_indices[active_adapter].to(device=x.device, dtype=torch.long)
+                size_tensor = self.frod_s_size[active_adapter]
+                if isinstance(size_tensor, torch.Tensor):
+                    size = tuple(int(dim) for dim in size_tensor.tolist())
+                else:
+                    size = tuple(int(dim) for dim in size_tensor)
+                values = self.frod_lambda_s_values[active_adapter].to(device=x.device, dtype=target_dtype)
+                lambda_l = self.frod_lambda_l[active_adapter].to(device=x.device, dtype=target_dtype)
+
+                x = x.to(target_dtype)
+                h = self.frod_dropout[active_adapter](x)
+
+                batch_shape = h.shape[:-1]
+                h_flat = h.reshape(-1, h.shape[-1])
+                z_flat = torch.matmul(h_flat, V)
+
+                matmul_dtype = z_flat.dtype
+                if z_flat.is_cuda and matmul_dtype in (torch.float16, torch.bfloat16):
+                    matmul_dtype = torch.float32
+
+                values = values.to(device=z_flat.device, dtype=matmul_dtype)
+                z_flat_mm = z_flat.to(matmul_dtype)
+                S_sparse = torch.sparse_coo_tensor(indices, values, size).coalesce()
+                if S_sparse.dtype != matmul_dtype:
+                    S_sparse = S_sparse.to(dtype=matmul_dtype)
+                z_S_flat = torch.sparse.mm(S_sparse.t(), z_flat_mm.t()).t()
+
+                lambda_l = lambda_l.to(device=z_flat.device, dtype=matmul_dtype)
+                z_L_flat = z_flat_mm * lambda_l
+
+                U_mm = U.to(device=z_flat.device, dtype=matmul_dtype)
+                out_add_flat = F.linear(z_S_flat + z_L_flat, U_mm)
+                out_add_flat = out_add_flat.to(target_dtype)
+                out_add = out_add_flat.reshape(*batch_shape, out_add_flat.shape[-1])
+
+                result = result + out_add
+
+        result = result.to(previous_dtype)
+        return result
+
+    def __repr__(self) -> str:
+        rep = super().__repr__()
+        return "frod." + rep
+
+    def _move_adapter_to_device_of_base_layer(self, adapter_name: str, device: Optional[torch.device] = None) -> None:
+        dtype = None
+        weight = None
+        if device is None:
+            for weight_name in ("weight", "qweight"):
+                weight = getattr(self.get_base_layer(), weight_name, None)
+                if weight is not None:
+                    device = weight.device
+                    dtype = weight.dtype
+                    break
+            else:
+                return
+
+        for adapter_layer_name in self.adapter_layer_names:
+            adapter_layer = getattr(self, adapter_layer_name, None)
+            if not isinstance(adapter_layer, nn.ParameterDict):
+                continue
+            if adapter_name not in adapter_layer:
+                continue
+            param = adapter_layer[adapter_name]
+            if param.is_meta:
+                continue
+            adapter_layer[adapter_name] = param.to(device, dtype=dtype)
diff --git a/src/peft/tuners/frod/model.py b/src/peft/tuners/frod/model.py
new file mode 100644
index 0000000000..6b148ce51e
--- /dev/null
+++ b/src/peft/tuners/frod/model.py
@@ -0,0 +1,276 @@
+# Copyright 2026-present the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+import warnings
+from collections import defaultdict
+
+import numpy as np
+import torch
+from numpy.linalg import qr
+from torch import nn
+from transformers.pytorch_utils import Conv1D
+
+from peft.tuners.tuners_utils import BaseTuner, BaseTunerLayer
+from peft.utils import TRANSFORMERS_MODELS_TO_FROD_TARGET_MODULES_MAPPING
+
+from .._buffer_dict import BufferDict
+from ..tuners_utils import _maybe_include_all_linear_layers
+from .config import FRODConfig
+from .layer import FRODLayer, Linear
+
+
+def _category_from_key(key: str) -> str:
+    parts = key.split(".")
+    if len(parts) == 1:
+        return parts[0]
+    if parts[-2].isdigit():
+        return parts[-1]
+    category = f"{parts[-2]}_{parts[-1]}"
+    if (category == "output_dense") and (len(parts) >= 3) and (parts[-3] == "attention"):
+        return "attention_output"
+    return category
+
+
+def _layer_index_from_key(key: str, fallback: int) -> int:
+    parts = key.split(".")
+    if "layers" in parts:
+        try:
+            return int(parts[parts.index("layers") + 1])
+        except (ValueError, IndexError):
+            pass
+    for part in parts:
+        if part.isdigit():
+            return int(part)
+    return fallback
+
+
+def _projection_from_weights(matrices: list[np.ndarray], regularization_alpha: float) -> np.ndarray:
+    stacked = np.vstack(matrices)
+    if stacked.shape[0] < stacked.shape[1]:
+        _, _, vh = np.linalg.svd(stacked, full_matrices=True)
+        return vh.T
+
+    q_matrix, r_matrix = qr(stacked)
+    q_slices = []
+    start = 0
+    for matrix in matrices:
+        rows = matrix.shape[0]
+        q_slices.append(q_matrix[start : start + rows, :])
+        start += rows
+
+    dim = r_matrix.shape[1]
+    t_pi = np.zeros((dim, dim), dtype=r_matrix.dtype)
+    for q_slice in q_slices:
+        q_term = q_slice.T @ q_slice + regularization_alpha * np.eye(dim, dtype=r_matrix.dtype)
+        t_pi += np.linalg.inv(q_term)
+    t_pi /= len(q_slices)
+
+    _, eigenvectors = np.linalg.eigh(t_pi)
+    return r_matrix.T @ eigenvectors
+
+
+class FRODModel(BaseTuner):
+    prefix: str = "frod_"
+    tuner_layer_cls = FRODLayer
+    target_module_mapping = TRANSFORMERS_MODELS_TO_FROD_TARGET_MODULES_MAPPING
+
+    def _init_frod_projections(self, config: FRODConfig, adapter_name: str) -> None:
+        weights = defaultdict(dict)
+        model_config = self.get_model_config(self.model)
+        peft_config = self._prepare_adapter_config(config, model_config)
+        peft_config = _maybe_include_all_linear_layers(peft_config, self.model)
+
+        fallback_index = 0
+        for key, module in self.model.named_modules():
+            if not self._check_target_module_exists(peft_config, key):
+                continue
+
+            if isinstance(module, nn.Linear):
+                weight = module.weight
+            elif isinstance(module, Conv1D):
+                weight = module.weight.T
+            else:
+                continue
+
+            category = _category_from_key(key)
+            layer_idx = _layer_index_from_key(key, fallback_index)
+            fallback_index += 1
+            weights[layer_idx][category] = weight
+
+        if not weights:
+            raise ValueError(
+                "No layer types compatible with FRoD were found. Please check `peft_config.target_modules`."
+            )
+
+        if not hasattr(self, "frod_V"):
+            self.frod_V = nn.ModuleDict()
+            self.frod_s_indices = nn.ModuleDict()
+            self.frod_s_size = nn.ModuleDict()
+
+        generator = torch.Generator(device="cpu").manual_seed(config.projection_prng_key)
+        categories = {category for layer_dict in weights.values() for category in layer_dict}
+        for category in sorted(categories):
+            matrices = [
+                layer_dict[category].detach().to(torch.float32).cpu().numpy()
+                for _, layer_dict in sorted(weights.items())
+                if category in layer_dict
+            ]
+            if not matrices:
+                continue
+
+            v_matrix = _projection_from_weights(matrices, config.regularization_alpha)
+            example_weight = next(layer_dict[category] for layer_dict in weights.values() if category in layer_dict)
+            v_tensor = torch.from_numpy(v_matrix).to(dtype=example_weight.dtype, device="cpu")
+
+            if category not in self.frod_V:
+                self.frod_V[category] = BufferDict({}, persistent=config.save_projection)
+            self.frod_V[category][adapter_name] = v_tensor
+
+            in_dim = v_tensor.shape[0]
+            rows, cols = torch.meshgrid(torch.arange(in_dim), torch.arange(in_dim), indexing="ij")
+            mask_indices = torch.stack([rows.flatten(), cols.flatten()], dim=1)
+            non_diag_indices = mask_indices[mask_indices[:, 0] != mask_indices[:, 1]]
+            nnz = min(int(in_dim * in_dim * config.sparse_rate), non_diag_indices.shape[0])
+            if nnz:
+                perm = torch.randperm(non_diag_indices.shape[0], generator=generator)[:nnz]
+                indices = non_diag_indices[perm].t().contiguous()
+            else:
+                indices = torch.empty(2, 0, dtype=torch.long)
+            size = torch.tensor([in_dim, in_dim], dtype=torch.long)
+
+            if category not in self.frod_s_indices:
+                self.frod_s_indices[category] = BufferDict({}, persistent=config.save_projection)
+            self.frod_s_indices[category][adapter_name] = indices.to(torch.long)
+            if category not in self.frod_s_size:
+                self.frod_s_size[category] = BufferDict({}, persistent=config.save_projection)
+            self.frod_s_size[category][adapter_name] = size
+
+    def _pre_injection_hook(self, model: nn.Module, config: FRODConfig, adapter_name: str) -> None:
+        self._init_frod_projections(config, adapter_name)
+
+    def _check_new_adapter_config(self, config: FRODConfig) -> None:
+        super()._check_new_adapter_config(config)
+
+        for existing_config in self.peft_config.values():
+            if existing_config is config:
+                continue
+            if existing_config.projection_prng_key != config.projection_prng_key:
+                raise ValueError(
+                    f"FRoD projection initialization key must be the same for all adapters. Got "
+                    f"{config.projection_prng_key=} but previous config had "
+                    f"{existing_config.projection_prng_key}."
+                )
+
+        save_projection_values = sorted({config.save_projection for config in self.peft_config.values()})
+        if len(save_projection_values) > 1:
+            raise ValueError(
+                "FRoD projection weights must be saved for all adapters or none, but got multiple different values: "
+                f"{save_projection_values}"
+            )
+
+    def _create_and_replace(
+        self,
+        frod_config,
+        adapter_name,
+        target,
+        target_name,
+        parent,
+        current_key,
+        **optional_kwargs,
+    ):
+        if current_key is None:
+            raise ValueError("Current Key shouldn't be `None`")
+
+        category = _category_from_key(current_key)
+        if category not in self.frod_V:
+            self._init_frod_projections(frod_config, adapter_name)
+        bias = hasattr(target, "bias") and target.bias is not None
+        kwargs = {
+            "frod_dropout": frod_config.frod_dropout,
+            "fan_in_fan_out": frod_config.fan_in_fan_out,
+            "init_weights": frod_config.init_weights,
+            "bias": bias,
+        }
+
+        if isinstance(target, Linear):
+            target.update_layer(
+                adapter_name,
+                self.frod_V[category],
+                self.frod_s_indices[category],
+                self.frod_s_size[category],
+                frod_config.frod_dropout,
+                frod_config.init_weights,
+            )
+        else:
+            new_module = self._create_new_module(
+                frod_config,
+                self.frod_V[category],
+                self.frod_s_indices[category],
+                self.frod_s_size[category],
+                adapter_name,
+                target,
+                **kwargs,
+            )
+            if adapter_name not in self.active_adapters:
+                new_module.requires_grad_(False)
+            self._replace_module(parent, target_name, new_module, target)
+
+    @staticmethod
+    def _create_new_module(
+        frod_config,
+        frod_V,
+        frod_s_indices,
+        frod_s_size,
+        adapter_name,
+        target,
+        **kwargs,
+    ):
+        bias = kwargs.pop("bias", False)
+
+        if isinstance(target, BaseTunerLayer):
+            target_base_layer = target.get_base_layer()
+        else:
+            target_base_layer = target
+
+        if isinstance(target_base_layer, torch.nn.Linear):
+            if kwargs["fan_in_fan_out"]:
+                warnings.warn(
+                    "fan_in_fan_out is set to True but the target module is `torch.nn.Linear`. "
+                    "Setting fan_in_fan_out to False."
+                )
+                kwargs["fan_in_fan_out"] = frod_config.fan_in_fan_out = False
+        elif isinstance(target_base_layer, Conv1D):
+            kwargs["is_target_conv_1d_layer"] = True
+            if not kwargs["fan_in_fan_out"]:
+                warnings.warn(
+                    "fan_in_fan_out is set to False but the target module is `Conv1D`. Setting fan_in_fan_out to True."
+                )
+                kwargs["fan_in_fan_out"] = frod_config.fan_in_fan_out = True
+        else:
+            raise TypeError(
+                f"Target module {target} is not supported. Currently, only the following modules are supported: "
+                "`torch.nn.Linear`, `transformers.pytorch_utils.Conv1D`."
+            )
+
+        return Linear(
+            target,
+            frod_V,
+            frod_s_indices,
+            frod_s_size,
+            adapter_name,
+            bias=bias,
+            **kwargs,
+        )
diff --git a/src/peft/utils/__init__.py b/src/peft/utils/__init__.py
index 4106c78060..8e56825f58 100644
--- a/src/peft/utils/__init__.py
+++ b/src/peft/utils/__init__.py
@@ -26,6 +26,7 @@
     TRANSFORMERS_MODELS_TO_C3A_TARGET_MODULES_MAPPING,
     TRANSFORMERS_MODELS_TO_DELORA_TARGET_MODULES_MAPPING,
     TRANSFORMERS_MODELS_TO_FOURIERFT_TARGET_MODULES_MAPPING,
+    TRANSFORMERS_MODELS_TO_FROD_TARGET_MODULES_MAPPING,
     TRANSFORMERS_MODELS_TO_GRALORA_TARGET_MODULES_MAPPING,
     TRANSFORMERS_MODELS_TO_HRA_TARGET_MODULES_MAPPING,
     TRANSFORMERS_MODELS_TO_IA3_FEEDFORWARD_MODULES_MAPPING,
@@ -89,6 +90,7 @@
     "TRANSFORMERS_MODELS_TO_C3A_TARGET_MODULES_MAPPING",
     "TRANSFORMERS_MODELS_TO_DELORA_TARGET_MODULES_MAPPING",
     "TRANSFORMERS_MODELS_TO_FOURIERFT_TARGET_MODULES_MAPPING",
+    "TRANSFORMERS_MODELS_TO_FROD_TARGET_MODULES_MAPPING",
     "TRANSFORMERS_MODELS_TO_GRALORA_TARGET_MODULES_MAPPING",
     "TRANSFORMERS_MODELS_TO_HRA_TARGET_MODULES_MAPPING",
     "TRANSFORMERS_MODELS_TO_IA3_FEEDFORWARD_MODULES_MAPPING",
diff --git a/src/peft/utils/constants.py b/src/peft/utils/constants.py
index 94aa475f0d..9cf874225f 100644
--- a/src/peft/utils/constants.py
+++ b/src/peft/utils/constants.py
@@ -135,6 +135,8 @@ def starcoder_model_postprocess_past_key_value(past_key_values):
 TRANSFORMERS_MODELS_TO_VERA_TARGET_MODULES_MAPPING = TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING.copy()
 TRANSFORMERS_MODELS_TO_VERA_TARGET_MODULES_MAPPING["phi"] = ["q_proj", "v_proj"]
 
+TRANSFORMERS_MODELS_TO_FROD_TARGET_MODULES_MAPPING = TRANSFORMERS_MODELS_TO_VERA_TARGET_MODULES_MAPPING.copy()
+
 TRANSFORMERS_MODELS_TO_PVERA_TARGET_MODULES_MAPPING = TRANSFORMERS_MODELS_TO_VERA_TARGET_MODULES_MAPPING.copy()
 TRANSFORMERS_MODELS_TO_PVERA_TARGET_MODULES_MAPPING["dinov2"] = ["query", "value"]
 
diff --git a/src/peft/utils/other.py b/src/peft/utils/other.py
index 4cc720c4a5..93fddebd5f 100644
--- a/src/peft/utils/other.py
+++ b/src/peft/utils/other.py
@@ -49,6 +49,7 @@
     TRANSFORMERS_MODELS_TO_C3A_TARGET_MODULES_MAPPING,
     TRANSFORMERS_MODELS_TO_DELORA_TARGET_MODULES_MAPPING,
     TRANSFORMERS_MODELS_TO_FOURIERFT_TARGET_MODULES_MAPPING,
+    TRANSFORMERS_MODELS_TO_FROD_TARGET_MODULES_MAPPING,
     TRANSFORMERS_MODELS_TO_GRALORA_TARGET_MODULES_MAPPING,
     TRANSFORMERS_MODELS_TO_HRA_TARGET_MODULES_MAPPING,
     TRANSFORMERS_MODELS_TO_IA3_FEEDFORWARD_MODULES_MAPPING,
@@ -96,6 +97,7 @@
     "TRANSFORMERS_MODELS_TO_C3A_TARGET_MODULES_MAPPING",
     "TRANSFORMERS_MODELS_TO_DELORA_TARGET_MODULES_MAPPING",
     "TRANSFORMERS_MODELS_TO_FOURIERFT_TARGET_MODULES_MAPPING",
+    "TRANSFORMERS_MODELS_TO_FROD_TARGET_MODULES_MAPPING",
     "TRANSFORMERS_MODELS_TO_GRALORA_TARGET_MODULES_MAPPING",
     "TRANSFORMERS_MODELS_TO_HRA_TARGET_MODULES_MAPPING",
     "TRANSFORMERS_MODELS_TO_IA3_FEEDFORWARD_MODULES_MAPPING",
diff --git a/src/peft/utils/peft_types.py b/src/peft/utils/peft_types.py
index 80fc1db8f5..9cf9b2e149 100644
--- a/src/peft/utils/peft_types.py
+++ b/src/peft/utils/peft_types.py
@@ -38,6 +38,7 @@ class PeftType(str, enum.Enum):
     - POLY
     - LN_TUNING
     - VERA
+    - FROD
     - FOURIERFT
     - HRA
     - BONE
@@ -71,6 +72,7 @@ class PeftType(str, enum.Enum):
     POLY = "POLY"
     LN_TUNING = "LN_TUNING"
     VERA = "VERA"
+    FROD = "FROD"
     PVERA = "PVERA"
     FOURIERFT = "FOURIERFT"
     XLORA = "XLORA"
diff --git a/src/peft/utils/save_and_load.py b/src/peft/utils/save_and_load.py
index 57660c4283..65cd7bf502 100644
--- a/src/peft/utils/save_and_load.py
+++ b/src/peft/utils/save_and_load.py
@@ -273,6 +273,23 @@ def renamed_dora_weights(k):
                 )
             to_return["base_model.pvera_A." + adapter_name] = state_dict["base_model.pvera_A." + adapter_name]
             to_return["base_model.pvera_B." + adapter_name] = state_dict["base_model.pvera_B." + adapter_name]
+    elif config.peft_type == PeftType.FROD:
+        frod_prefix = PEFT_TYPE_TO_PREFIX_MAPPING[config.peft_type]
+        projection_prefixes = ("base_model.frod_V.", "base_model.frod_s_indices.", "base_model.frod_s_size.")
+        layer_projection_parts = (".frod_V.", ".frod_s_indices.", ".frod_s_size.", ".frod_U.")
+        to_return = {
+            k: state_dict[k]
+            for k in state_dict
+            if (frod_prefix in k) and (adapter_name in k) and not any(part in k for part in layer_projection_parts)
+        }
+        if config.save_projection:
+            to_return.update(
+                {
+                    k: state_dict[k]
+                    for k in state_dict
+                    if k.startswith(projection_prefixes) and k.endswith(f".{adapter_name}")
+                }
+            )
     elif config.peft_type == PeftType.XLORA:
         to_return = {k: state_dict[k] for k in state_dict if "internal_xlora_classifier" in k}
     elif config.peft_type == PeftType.VBLORA:
@@ -715,6 +732,13 @@ def set_peft_model_state_dict(
                 new_key = k.replace(".tinylora_v.", f".tinylora_v.{adapter_name}.")
                 tinylora_v_state_dict[new_key] = state_dict.pop(k)
 
+        frod_projection_state_dict = {}
+        if config.peft_type == PeftType.FROD:
+            frod_projection_prefixes = ("base_model.frod_V.", "base_model.frod_s_indices.", "base_model.frod_s_size.")
+            frod_projection_keys = [k for k in state_dict if k.startswith(frod_projection_prefixes)]
+            for k in frod_projection_keys:
+                frod_projection_state_dict[f"{k}.{adapter_name}"] = state_dict.pop(k)
+
         peft_model_state_dict = _insert_adapter_name_into_state_dict(
             state_dict, adapter_name=adapter_name, parameter_prefix=parameter_prefix
         )
@@ -722,6 +746,8 @@ def set_peft_model_state_dict(
         # Add back the tinylora_v keys (now in the correct format)
         if config.peft_type == PeftType.TINYLORA:
             peft_model_state_dict.update(tinylora_v_state_dict)
+        elif config.peft_type == PeftType.FROD:
+            peft_model_state_dict.update(frod_projection_state_dict)
 
         if config.peft_type == PeftType.ADALORA:
             rank_pattern = config.rank_pattern
@@ -796,6 +822,21 @@ def set_peft_model_state_dict(
                     " PRNG initialisation to restore these projections using `config.projection_prng_key`, which may"
                     " not be accurate on all system configurations."
                 )
+        elif config.peft_type == PeftType.FROD:
+            has_projection = any(
+                k.startswith(("base_model.frod_V.", "base_model.frod_s_indices.", "base_model.frod_s_size."))
+                for k in peft_model_state_dict
+            )
+            if config.save_projection and not has_projection:
+                raise ValueError(
+                    "Specified to load FRoD projection tensors from state dictionary however they were not present!"
+                )
+            elif not config.save_projection and has_projection:
+                warnings.warn(
+                    "Specified to not load FRoD projection tensors from state dictionary however they are present. "
+                    "Consider using them to ensure checkpoint loading is correct by setting "
+                    "`peft_config.save_projection = True`."
+                )
         elif config.peft_type == PeftType.LORA:
             # Here we take care of a refactor of DoRA which changed lora_magnitude_vector from a ParameterDict to a
             # ModuleDict with a DoraLayer instance. The old parameter is now the "weight" attribute of that layer.
diff --git a/tests/test_custom_models.py b/tests/test_custom_models.py
index 7a809a6b4c..92811b85e0 100644
--- a/tests/test_custom_models.py
+++ b/tests/test_custom_models.py
@@ -37,6 +37,7 @@
     C3AConfig,
     DeloraConfig,
     FourierFTConfig,
+    FRODConfig,
     GraloraConfig,
     HiraConfig,
     HRAConfig,
@@ -870,6 +871,14 @@
         RandLoraConfig,
         {"target_modules": ["lin0"], "modules_to_save": ["lin1"], "randlora_alpha": 1},
     ),
+    ########
+    # FRoD #
+    ########
+    ("Vanilla MLP 1 FRoD", "MLP", FRODConfig, {"target_modules": "lin0"}),
+    ("Vanilla MLP 2 FRoD", "MLP", FRODConfig, {"target_modules": ["lin0"]}),
+    ("Vanilla MLP 3 FRoD", "MLP", FRODConfig, {"target_modules": ["lin1"]}),
+    ("Vanilla MLP 4 FRoD", "MLP", FRODConfig, {"target_modules": ["lin0", "lin1"]}),
+    ("Vanilla MLP 5 FRoD", "MLP", FRODConfig, {"target_modules": ["lin0"], "modules_to_save": ["lin1"]}),
     #######
     # C3A #
     #######
@@ -3455,7 +3464,8 @@ def test_multiple_adapters_automatic_modules_to_save(self):
         assert "other" in model.base_model.classifier.modules_to_save
 
     @pytest.mark.parametrize(
-        "config_cls", [IA3Config, BeftConfig, LoHaConfig, LoKrConfig, LoraConfig, HRAConfig, ShiraConfig, MissConfig]
+        "config_cls",
+        [IA3Config, BeftConfig, FRODConfig, LoHaConfig, LoKrConfig, LoraConfig, HRAConfig, ShiraConfig, MissConfig],
     )
     def test_multiple_adapters_mixed_modules_to_save(self, config_cls):
         # See issue 1574
@@ -3487,7 +3497,8 @@ def test_multiple_adapters_mixed_modules_to_save(self, config_cls):
         model(**inputs)
 
     @pytest.mark.parametrize(
-        "config_cls", [IA3Config, BeftConfig, LoHaConfig, LoKrConfig, LoraConfig, HRAConfig, ShiraConfig]
+        "config_cls",
+        [IA3Config, BeftConfig, FRODConfig, LoHaConfig, LoKrConfig, LoraConfig, HRAConfig, ShiraConfig],
     )
     def test_multiple_adapters_mixed_modules_to_save_order_switched(self, config_cls):
         # See issue 1574
@@ -3830,6 +3841,7 @@ def test_load_resized_embedding_ignore_mismatched_sizes(self):
             AdaLoraConfig(target_modules=["lin0"], init_lora_weights=False, total_step=1),
             IA3Config(target_modules=["lin0"], feedforward_modules=["lin0"], init_ia3_weights=False),
             BeftConfig(target_modules=["lin0"], init_weights=False),
+            FRODConfig(target_modules=["lin0"], init_weights=False),
             OFTConfig(target_modules=["lin0"], init_weights=False, r=2, oft_block_size=0),
             BOFTConfig(target_modules=["lin0"], init_weights=False, boft_block_size=2),
             HRAConfig(target_modules=["lin0"], init_weights=False),

From 2d3c730ad8e3477212c7102aa80d8e43aa2646be Mon Sep 17 00:00:00 2001
From: Bane-Elvin <gawan@buaa.edu.cn>
Date: Thu, 21 May 2026 16:42:22 +0000
Subject: [PATCH 2/9] Add FRoD-specific tests

---
 tests/test_frod.py | 238 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 238 insertions(+)
 create mode 100644 tests/test_frod.py

diff --git a/tests/test_frod.py b/tests/test_frod.py
new file mode 100644
index 0000000000..c21b3040f6
--- /dev/null
+++ b/tests/test_frod.py
@@ -0,0 +1,238 @@
+# Copyright 2026-present the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This test file is for tests specific to FRoD, since FRoD has shared projection buffers.
+
+import os
+
+import pytest
+import torch
+from accelerate.utils.imports import is_bf16_available
+from safetensors import safe_open
+from torch import nn
+
+from peft import FRODConfig, PeftModel, get_peft_model
+
+
+class MLP(nn.Module):
+    def __init__(self, bias=True):
+        super().__init__()
+        self.relu = nn.ReLU()
+        self.lin0 = nn.Linear(10, 20, bias=bias)
+        self.lin1 = nn.Linear(20, 20, bias=bias)  # lin1 and lin2 have same shape
+        self.lin2 = nn.Linear(20, 20, bias=bias)
+        self.lin3 = nn.Linear(20, 2, bias=bias)
+        self.sm = nn.LogSoftmax(dim=-1)
+
+    def forward(self, X):
+        X = self.lin0(X)
+        X = self.relu(X)
+        X = self.lin1(X)
+        X = self.relu(X)
+        X = self.lin2(X)
+        X = self.relu(X)
+        X = self.lin3(X)
+        X = self.sm(X)
+        return X
+
+
+class TestFROD:
+    @pytest.fixture
+    def mlp(self):
+        torch.manual_seed(0)
+        model = MLP()
+        return model
+
+    @pytest.fixture
+    def mlp_same_prng(self, mlp):
+        torch.manual_seed(0)
+
+        config = FRODConfig(target_modules=["lin1", "lin2"], init_weights=False)
+        peft_model = get_peft_model(mlp, config)
+        config2 = FRODConfig(target_modules=["lin1", "lin2"], init_weights=False)
+        peft_model.add_adapter("other", config2)
+        return peft_model
+
+    @staticmethod
+    def _make_second_adapter_different(peft_model):
+        with torch.no_grad():
+            for module in peft_model.base_model.model.modules():
+                if hasattr(module, "frod_lambda_l") and "second" in module.frod_lambda_l:
+                    module.frod_lambda_l["second"].add_(0.1)
+
+    def test_multiple_adapters_same_prng_projection_buffers(self, mlp_same_prng):
+        # Multiple adapters with the same PRNG key share fixed projection buffers within each FRoD layer.
+        assert (
+            mlp_same_prng.base_model.model.lin1.frod_V["default"].data_ptr()
+            == mlp_same_prng.base_model.model.lin1.frod_V["other"].data_ptr()
+        )
+        assert (
+            mlp_same_prng.base_model.model.lin1.frod_s_indices["default"].data_ptr()
+            == mlp_same_prng.base_model.model.lin1.frod_s_indices["other"].data_ptr()
+        )
+        assert (
+            mlp_same_prng.base_model.model.lin2.frod_V["default"].data_ptr()
+            == mlp_same_prng.base_model.model.lin2.frod_V["other"].data_ptr()
+        )
+        assert (
+            mlp_same_prng.base_model.model.lin2.frod_s_indices["default"].data_ptr()
+            == mlp_same_prng.base_model.model.lin2.frod_s_indices["other"].data_ptr()
+        )
+
+    def test_multiple_adapters_different_prng_raises(self):
+        model = MLP()
+        config = FRODConfig(target_modules=["lin1", "lin2"], init_weights=False)
+        peft_model = get_peft_model(model, config)
+        config2 = FRODConfig(target_modules=["lin1", "lin2"], init_weights=False, projection_prng_key=123)
+
+        msg = (
+            r"FRoD projection initialization key must be the same for all adapters. Got "
+            r"config.projection_prng_key=123 but previous config had 0"
+        )
+        with pytest.raises(ValueError, match=msg):
+            peft_model.add_adapter("other", config2)
+
+    def test_multiple_adapters_save_load_save_projection_false(self, mlp, tmp_path):
+        # Check saving and loading works with multiple adapters without saved projection tensors.
+        torch.manual_seed(1)
+        config = FRODConfig(target_modules=["lin1", "lin2"], init_weights=False, save_projection=False)
+        peft_model = get_peft_model(mlp, config, adapter_name="first")
+        config2 = FRODConfig(target_modules=["lin1", "lin2"], init_weights=False, save_projection=False)
+        peft_model.add_adapter("second", config2)
+        self._make_second_adapter_different(peft_model)
+        peft_model.eval()
+
+        input = torch.randn(5, 10)
+        peft_model.set_adapter("first")
+        output_first = peft_model(input)
+        peft_model.set_adapter("second")
+        output_second = peft_model(input)
+
+        assert not torch.allclose(output_first, output_second, atol=1e-3, rtol=1e-3)
+
+        save_path = tmp_path / "frod"
+        peft_model.save_pretrained(save_path)
+        assert os.path.exists(save_path / "first" / "adapter_config.json")
+        assert os.path.exists(save_path / "second" / "adapter_config.json")
+
+        torch.manual_seed(0)
+        mlp = MLP()
+        peft_model = PeftModel.from_pretrained(mlp, save_path / "first", adapter_name="first")
+        peft_model.load_adapter(save_path / "second", "second")
+        peft_model.eval()
+
+        peft_model.set_adapter("first")
+        output_first_loaded = peft_model(input)
+        peft_model.set_adapter("second")
+        output_second_loaded = peft_model(input)
+
+        assert torch.allclose(output_first, output_first_loaded, atol=1e-3, rtol=1e-3)
+        assert torch.allclose(output_second, output_second_loaded, atol=1e-3, rtol=1e-3)
+
+    def test_save_projection_false_contains_no_frod_projection_tensors(self, mlp, tmp_path):
+        config = FRODConfig(target_modules=["lin1", "lin2"], init_weights=False, save_projection=False)
+        peft_model = get_peft_model(mlp, config)
+
+        save_path = tmp_path / "frod"
+        peft_model.save_pretrained(save_path)
+
+        state_dict = {}
+        with safe_open(save_path / "adapter_model.safetensors", framework="pt", device="cpu") as f:
+            for key in f.keys():
+                state_dict[key] = f.get_tensor(key)
+
+        assert not any("frod_V" in key for key in state_dict)
+        assert not any("frod_s_indices" in key for key in state_dict)
+        assert not any("frod_s_size" in key for key in state_dict)
+        assert not any("frod_U" in key for key in state_dict)
+
+    def test_save_projection_true_contains_top_level_projection_tensors_only(self, mlp, tmp_path):
+        config = FRODConfig(target_modules=["lin1", "lin2"], init_weights=False)
+        peft_model = get_peft_model(mlp, config)
+
+        save_path = tmp_path / "frod"
+        peft_model.save_pretrained(save_path)
+
+        keys = []
+        with safe_open(save_path / "adapter_model.safetensors", framework="pt", device="cpu") as f:
+            keys = list(f.keys())
+
+        assert "base_model.frod_V.lin1" in keys
+        assert "base_model.frod_s_indices.lin1" in keys
+        assert "base_model.frod_s_size.lin1" in keys
+        assert "base_model.frod_V.lin2" in keys
+        assert not any(".model.lin1.frod_V" in key for key in keys)
+        assert not any("frod_U" in key for key in keys)
+
+    def test_frod_projection_buffers_share_memory_with_layers(self, mlp_same_prng):
+        frod_V_lin1 = mlp_same_prng.base_model.frod_V["lin1"]["default"]
+        frod_s_indices_lin1 = mlp_same_prng.base_model.frod_s_indices["lin1"]["default"]
+
+        assert frod_V_lin1.data_ptr() == mlp_same_prng.base_model.model.lin1.frod_V["default"].data_ptr()
+        assert frod_V_lin1.data_ptr() == mlp_same_prng.base_model.model.lin1.frod_V["other"].data_ptr()
+        assert (
+            frod_s_indices_lin1.data_ptr() == mlp_same_prng.base_model.model.lin1.frod_s_indices["default"].data_ptr()
+        )
+        assert frod_s_indices_lin1.data_ptr() == mlp_same_prng.base_model.model.lin1.frod_s_indices["other"].data_ptr()
+
+        # Different target categories have distinct projection buffers.
+        assert frod_V_lin1.data_ptr() != mlp_same_prng.base_model.frod_V["lin2"]["default"].data_ptr()
+
+    def test_frod_lambda_dont_share_memory(self, mlp_same_prng):
+        assert (
+            mlp_same_prng.base_model.model.lin1.frod_lambda_s_values["default"].data_ptr()
+            != mlp_same_prng.base_model.model.lin1.frod_lambda_s_values["other"].data_ptr()
+        )
+        assert (
+            mlp_same_prng.base_model.model.lin1.frod_lambda_s_values["default"].data_ptr()
+            != mlp_same_prng.base_model.model.lin2.frod_lambda_s_values["default"].data_ptr()
+        )
+        assert (
+            mlp_same_prng.base_model.model.lin1.frod_lambda_l["default"].data_ptr()
+            != mlp_same_prng.base_model.model.lin1.frod_lambda_l["other"].data_ptr()
+        )
+        assert (
+            mlp_same_prng.base_model.model.lin1.frod_lambda_l["default"].data_ptr()
+            != mlp_same_prng.base_model.model.lin2.frod_lambda_l["default"].data_ptr()
+        )
+
+    def test_frod_different_shapes(self, mlp):
+        config = FRODConfig(target_modules=["lin0", "lin3"], init_weights=False)
+        mlp_different_shapes = get_peft_model(mlp, config)
+
+        assert mlp.lin0.base_layer.weight.shape != mlp.lin3.base_layer.weight.shape
+        assert mlp_different_shapes.base_model.frod_V["lin0"]["default"].shape == (
+            mlp.lin0.in_features,
+            mlp.lin0.in_features,
+        )
+        assert mlp_different_shapes.base_model.frod_V["lin3"]["default"].shape == (
+            mlp.lin3.in_features,
+            mlp.lin3.in_features,
+        )
+
+        input = torch.randn(5, 10)
+        mlp_different_shapes(input)
+
+    @pytest.mark.parametrize("dtype", [torch.float32, torch.float16, torch.bfloat16])
+    def test_frod_dtypes(self, dtype):
+        if dtype == torch.bfloat16:
+            if not is_bf16_available():
+                pytest.skip("bfloat16 not supported on this system, skipping the test")
+
+        model = MLP().to(dtype)
+        config = FRODConfig(target_modules=["lin1", "lin2"], init_weights=False)
+        peft_model = get_peft_model(model, config)
+        inputs = torch.randn(5, 10).to(dtype)
+        output = peft_model(inputs)
+        assert output.dtype == dtype

From b252c65dc5ac2b48a036564661a63b7e20b9fc72 Mon Sep 17 00:00:00 2001
From: Bane-Elvin <gawan@buaa.edu.cn>
Date: Wed, 27 May 2026 04:27:29 +0000
Subject: [PATCH 3/9] Finalize FROD integration

---
 docs/source/package_reference/frod.md         |  28 ++++
 examples/frod_finetuning/README.md            |  27 ++++
 .../frod_image_classification.py              | 131 ++++++++++++++++++
 .../frod_text_classification.py               |  93 +++++++++++++
 examples/frod_finetuning/requirements.txt     |   6 +
 .../llama-3.2-3B-default/adapter_config.json  |  20 +++
 .../adapter_config.json                       |  20 +++
 .../training_params.json                      |   5 +
 src/peft/tuners/frod/config.py                |  22 ++-
 src/peft/tuners/frod/layer.py                 |  18 ++-
 src/peft/tuners/frod/model.py                 |   3 +
 src/peft/utils/constants.py                   |   1 +
 tests/test_config.py                          |   2 +
 tests/test_decoder_models.py                  |   9 ++
 tests/test_encoder_decoder_models.py          |   9 ++
 tests/test_feature_extraction_models.py       |   9 ++
 tests/test_seq_classifier.py                  |   9 ++
 17 files changed, 403 insertions(+), 9 deletions(-)
 create mode 100644 examples/frod_finetuning/README.md
 create mode 100644 examples/frod_finetuning/frod_image_classification.py
 create mode 100644 examples/frod_finetuning/frod_text_classification.py
 create mode 100644 examples/frod_finetuning/requirements.txt
 create mode 100644 method_comparison/MetaMathQA/experiments/frod/llama-3.2-3B-default/adapter_config.json
 create mode 100644 method_comparison/MetaMathQA/experiments/frod/llama-3.2-3B-sparse0.02-lr_0.001/adapter_config.json
 create mode 100644 method_comparison/MetaMathQA/experiments/frod/llama-3.2-3B-sparse0.02-lr_0.001/training_params.json

diff --git a/docs/source/package_reference/frod.md b/docs/source/package_reference/frod.md
index 7fceb6fa41..b2c146fe0f 100644
--- a/docs/source/package_reference/frod.md
+++ b/docs/source/package_reference/frod.md
@@ -20,15 +20,43 @@ FRoD is a parameter-efficient fine-tuning method that combines a shared full-ran
 rotational degrees. The adapter update is expressed through fixed projection tensors and trainable coefficients, which
 allows FRoD to apply full-rank updates while keeping the number of trained parameters small.
 
+Paper: [Full-Rank Efficient Fine-Tuning with Rotational Degrees](https://doi.org/10.1609/aaai.v40i31.39813).
+
 When saving the adapter parameters, it is possible to avoid storing the projection tensors by setting
 `save_projection=False` on the `FRODConfig`. In that case, the projections are restored from the base model weights and
 the fixed random seed from `projection_prng_key`. This reduces checkpoint size, but the default is
 `save_projection=True` to make checkpoint loading independent of regeneration details.
 
+Compared to LoRA, FRoD can express a full-rank update in each adapted linear layer while training only the diagonal
+coefficients and a sparse set of off-diagonal rotation coefficients. This can be useful when a low-rank update is too
+restrictive. The trade-off is that FRoD computes fixed projection tensors from the base weights during adapter
+injection, which makes setup more expensive and the implementation less broadly supported than LoRA.
+
 FRoD currently has the following constraint:
 
 - Only `nn.Linear` and `transformers.pytorch_utils.Conv1D` layers are supported.
 
+## Quickstart
+
+```python
+from transformers import AutoModelForSequenceClassification
+
+from peft import FRODConfig, TaskType, get_peft_model
+
+model = AutoModelForSequenceClassification.from_pretrained("google-bert/bert-base-uncased", num_labels=2)
+
+peft_config = FRODConfig(
+    task_type=TaskType.SEQ_CLS,
+    target_modules=["query", "value"],
+    modules_to_save=["classifier"],
+    sparse_rate=0.02,
+    frod_dropout=0.0,
+)
+
+model = get_peft_model(model, peft_config)
+model.print_trainable_parameters()
+```
+
 ## FRODConfig
 
 [[autodoc]] tuners.frod.config.FRODConfig
diff --git a/examples/frod_finetuning/README.md b/examples/frod_finetuning/README.md
new file mode 100644
index 0000000000..f818ed6b51
--- /dev/null
+++ b/examples/frod_finetuning/README.md
@@ -0,0 +1,27 @@
+# FRoD fine-tuning examples
+
+These examples show minimal FRoD fine-tuning with the Transformers `Trainer`.
+
+Install the example dependencies and run either script directly:
+
+```bash
+pip install -r examples/frod_finetuning/requirements.txt
+python examples/frod_finetuning/frod_text_classification.py
+python examples/frod_finetuning/frod_image_classification.py
+```
+
+The text example fine-tunes `google-bert/bert-base-uncased` on `nyu-mll/glue` with the `sst2` configuration. The image
+example fine-tunes `google/vit-base-patch16-224` on the train and test parquet splits from `tanganke/stanford_cars`.
+
+Both scripts use separate optimizer learning rates for FRoD diagonal coefficients, FRoD sparse coefficients, and the
+classification head. FRoD dropout is set to `0.0` because the sparse rotational parameterization is the main
+regularizer in these examples.
+
+To use local mirrors of the image model or dataset, override the image example paths with environment variables:
+
+```bash
+FROD_IMAGE_MODEL_NAME=/path/to/local/vit-model \
+FROD_STANFORD_CARS_DATA_DIR=/path/to/local/stanford_cars \
+FROD_IMAGE_OUTPUT_DIR=vit-local-frod-stanford-cars \
+python examples/frod_finetuning/frod_image_classification.py
+```
diff --git a/examples/frod_finetuning/frod_image_classification.py b/examples/frod_finetuning/frod_image_classification.py
new file mode 100644
index 0000000000..3f61f52f32
--- /dev/null
+++ b/examples/frod_finetuning/frod_image_classification.py
@@ -0,0 +1,131 @@
+# Copyright 2026-present the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+
+import os
+
+import numpy as np
+import torch
+from datasets import load_dataset
+from transformers import AutoImageProcessor, AutoModelForImageClassification, Trainer, TrainingArguments
+
+from peft import FRODConfig, get_peft_model
+
+
+MODEL_NAME = os.environ.get("FROD_IMAGE_MODEL_NAME", "google/vit-base-patch16-224")
+OUTPUT_DIR = os.environ.get("FROD_IMAGE_OUTPUT_DIR", "vit-base-patch16-224-frod-stanford-cars")
+DATA_DIR = os.environ.get("FROD_STANFORD_CARS_DATA_DIR")
+FROD_LAMBDA_L_LR = 5e-4
+FROD_LAMBDA_S_LR = 5e-5
+CLASSIFIER_LR = 1e-4
+
+def main():
+    if DATA_DIR:
+        data_files = {
+            "train": [
+                os.path.join(DATA_DIR, "data", "train-00000-of-00002.parquet"),
+                os.path.join(DATA_DIR, "data", "train-00001-of-00002.parquet"),
+            ],
+            "test": [
+                os.path.join(DATA_DIR, "data", "test-00000-of-00002.parquet"),
+                os.path.join(DATA_DIR, "data", "test-00001-of-00002.parquet"),
+            ],
+        }
+    else:
+        data_files = {
+            "train": [
+                "hf://datasets/tanganke/stanford_cars/data/train-00000-of-00002.parquet",
+                "hf://datasets/tanganke/stanford_cars/data/train-00001-of-00002.parquet",
+            ],
+            "test": [
+                "hf://datasets/tanganke/stanford_cars/data/test-00000-of-00002.parquet",
+                "hf://datasets/tanganke/stanford_cars/data/test-00001-of-00002.parquet",
+            ],
+        }
+
+    dataset = load_dataset("parquet", data_files=data_files)
+    train_split = dataset["train"]
+    eval_split = dataset["test"]
+    image_processor = AutoImageProcessor.from_pretrained(MODEL_NAME)
+    label_feature = train_split.features["label"]
+    label_names = (
+        label_feature.names if hasattr(label_feature, "names") else [str(i) for i in sorted(set(train_split["label"]))]
+    )
+    id2label = dict(enumerate(label_names))
+    label2id = {name: idx for idx, name in id2label.items()}
+
+    model = AutoModelForImageClassification.from_pretrained(
+        MODEL_NAME,
+        num_labels=len(label_names),
+        id2label=id2label,
+        label2id=label2id,
+        ignore_mismatched_sizes=True,
+    )
+    peft_config = FRODConfig(
+        target_modules=["query", "value"],
+        modules_to_save=["classifier"],
+        frod_dropout=0.0,
+        sparse_rate=0.02,
+    )
+    model = get_peft_model(model, peft_config)
+    model.print_trainable_parameters()
+
+    def transform(batch):
+        images = [image.convert("RGB") for image in batch["image"]]
+        inputs = image_processor(images, return_tensors="pt")
+        inputs["labels"] = batch["label"]
+        return inputs
+
+    train_dataset = train_split.with_transform(transform)
+    eval_dataset = eval_split.with_transform(transform)
+
+    def collate_fn(examples):
+        pixel_values = torch.stack([example["pixel_values"] for example in examples])
+        labels = torch.tensor([example["labels"] for example in examples])
+        return {"pixel_values": pixel_values, "labels": labels}
+
+    def compute_metrics(eval_pred):
+        predictions = np.argmax(eval_pred.predictions, axis=-1)
+        return {"accuracy": (predictions == eval_pred.label_ids).mean().item()}
+
+    optimizer = torch.optim.AdamW(
+        [
+            {"params": [p for n, p in model.named_parameters() if "frod_lambda_l" in n], "lr": FROD_LAMBDA_L_LR},
+            {
+                "params": [p for n, p in model.named_parameters() if "frod_lambda_s_values" in n],
+                "lr": FROD_LAMBDA_S_LR,
+            },
+            {"params": [p for n, p in model.named_parameters() if "classifier" in n], "lr": CLASSIFIER_LR},
+        ]
+    )
+
+    training_args = TrainingArguments(
+        output_dir=OUTPUT_DIR,
+        learning_rate=FROD_LAMBDA_L_LR,
+        per_device_train_batch_size=32,
+        per_device_eval_batch_size=64,
+        num_train_epochs=1,
+        eval_strategy="epoch",
+        save_strategy="epoch",
+        load_best_model_at_end=True,
+        metric_for_best_model="accuracy",
+        remove_unused_columns=False,
+        report_to="none",
+    )
+
+    trainer = Trainer(
+        model=model,
+        args=training_args,
+        train_dataset=train_dataset,
+        eval_dataset=eval_dataset,
+        data_collator=collate_fn,
+        compute_metrics=compute_metrics,
+        optimizers=(optimizer, None),
+    )
+    trainer.train()
+    trainer.evaluate()
+    model.save_pretrained(OUTPUT_DIR)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/frod_finetuning/frod_text_classification.py b/examples/frod_finetuning/frod_text_classification.py
new file mode 100644
index 0000000000..105beda9b9
--- /dev/null
+++ b/examples/frod_finetuning/frod_text_classification.py
@@ -0,0 +1,93 @@
+# Copyright 2026-present the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+
+import numpy as np
+import torch
+from datasets import load_dataset
+from transformers import (
+    AutoModelForSequenceClassification,
+    AutoTokenizer,
+    DataCollatorWithPadding,
+    Trainer,
+    TrainingArguments,
+)
+
+from peft import FRODConfig, TaskType, get_peft_model
+
+
+MODEL_NAME = "google-bert/bert-base-uncased"
+DATASET_NAME = "nyu-mll/glue"
+TASK_NAME = "sst2"
+OUTPUT_DIR = "bert-base-uncased-frod-sst2"
+FROD_LAMBDA_L_LR = 2e-2
+FROD_LAMBDA_S_LR = 2e-3
+CLASSIFIER_LR = 1e-2
+
+
+def main():
+    dataset = load_dataset(DATASET_NAME, TASK_NAME)
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+
+    def preprocess(batch):
+        return tokenizer(batch["sentence"], truncation=True)
+
+    tokenized = dataset.map(preprocess, batched=True)
+    tokenized = tokenized.rename_column("label", "labels")
+
+    model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=2)
+    peft_config = FRODConfig(
+        task_type=TaskType.SEQ_CLS,
+        target_modules=["query", "value"],
+        modules_to_save=["classifier"],
+        frod_dropout=0.0,
+        sparse_rate=0.02,
+    )
+    model = get_peft_model(model, peft_config)
+    model.print_trainable_parameters()
+
+    def compute_metrics(eval_pred):
+        predictions = np.argmax(eval_pred.predictions, axis=-1)
+        return {"accuracy": (predictions == eval_pred.label_ids).mean().item()}
+
+    optimizer = torch.optim.AdamW(
+        [
+            {"params": [p for n, p in model.named_parameters() if "frod_lambda_l" in n], "lr": FROD_LAMBDA_L_LR},
+            {
+                "params": [p for n, p in model.named_parameters() if "frod_lambda_s_values" in n],
+                "lr": FROD_LAMBDA_S_LR,
+            },
+            {"params": [p for n, p in model.named_parameters() if "classifier" in n], "lr": CLASSIFIER_LR},
+        ]
+    )
+
+    training_args = TrainingArguments(
+        output_dir=OUTPUT_DIR,
+        learning_rate=FROD_LAMBDA_L_LR,
+        per_device_train_batch_size=32,
+        per_device_eval_batch_size=64,
+        num_train_epochs=1,
+        eval_strategy="epoch",
+        save_strategy="epoch",
+        load_best_model_at_end=True,
+        metric_for_best_model="accuracy",
+        report_to="none",
+    )
+
+    trainer = Trainer(
+        model=model,
+        args=training_args,
+        train_dataset=tokenized["train"],
+        eval_dataset=tokenized["validation"],
+        tokenizer=tokenizer,
+        data_collator=DataCollatorWithPadding(tokenizer=tokenizer),
+        compute_metrics=compute_metrics,
+        optimizers=(optimizer, None),
+    )
+    trainer.train()
+    trainer.evaluate()
+    model.save_pretrained(OUTPUT_DIR)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/frod_finetuning/requirements.txt b/examples/frod_finetuning/requirements.txt
new file mode 100644
index 0000000000..8bcaa74619
--- /dev/null
+++ b/examples/frod_finetuning/requirements.txt
@@ -0,0 +1,6 @@
+peft
+transformers
+accelerate>=1.0.0
+datasets
+numpy
+Pillow
diff --git a/method_comparison/MetaMathQA/experiments/frod/llama-3.2-3B-default/adapter_config.json b/method_comparison/MetaMathQA/experiments/frod/llama-3.2-3B-default/adapter_config.json
new file mode 100644
index 0000000000..d52af61d39
--- /dev/null
+++ b/method_comparison/MetaMathQA/experiments/frod/llama-3.2-3B-default/adapter_config.json
@@ -0,0 +1,20 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": null,
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "frod_dropout": 0.0,
+  "inference_mode": false,
+  "init_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "modules_to_save": null,
+  "peft_type": "FROD",
+  "projection_prng_key": 0,
+  "regularization_alpha": 0.001,
+  "revision": null,
+  "save_projection": true,
+  "sparse_rate": 0.01,
+  "target_modules": null,
+  "task_type": "CAUSAL_LM"
+}
diff --git a/method_comparison/MetaMathQA/experiments/frod/llama-3.2-3B-sparse0.02-lr_0.001/adapter_config.json b/method_comparison/MetaMathQA/experiments/frod/llama-3.2-3B-sparse0.02-lr_0.001/adapter_config.json
new file mode 100644
index 0000000000..8abdd8540b
--- /dev/null
+++ b/method_comparison/MetaMathQA/experiments/frod/llama-3.2-3B-sparse0.02-lr_0.001/adapter_config.json
@@ -0,0 +1,20 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": null,
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "frod_dropout": 0.0,
+  "inference_mode": false,
+  "init_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "modules_to_save": null,
+  "peft_type": "FROD",
+  "projection_prng_key": 0,
+  "regularization_alpha": 0.001,
+  "revision": null,
+  "save_projection": true,
+  "sparse_rate": 0.02,
+  "target_modules": null,
+  "task_type": "CAUSAL_LM"
+}
diff --git a/method_comparison/MetaMathQA/experiments/frod/llama-3.2-3B-sparse0.02-lr_0.001/training_params.json b/method_comparison/MetaMathQA/experiments/frod/llama-3.2-3B-sparse0.02-lr_0.001/training_params.json
new file mode 100644
index 0000000000..52d87e3ef6
--- /dev/null
+++ b/method_comparison/MetaMathQA/experiments/frod/llama-3.2-3B-sparse0.02-lr_0.001/training_params.json
@@ -0,0 +1,5 @@
+{
+  "optimizer_kwargs": {
+    "lr": 1e-3
+  }
+}
diff --git a/src/peft/tuners/frod/config.py b/src/peft/tuners/frod/config.py
index 354c1279b4..2e5b724768 100644
--- a/src/peft/tuners/frod/config.py
+++ b/src/peft/tuners/frod/config.py
@@ -56,6 +56,12 @@ class FRODConfig(PeftConfig):
         layers_pattern (`Optional[Union[List[str], str]]`):
             The layer pattern name, used only if `layers_to_transform` is different from `None`. This should target the
             `nn.ModuleList` of the model, which is often called `'layers'` or `'h'`.
+        sparse_rate (`float`):
+            Fraction of off-diagonal entries in the sparse trainable rotation matrix. Higher values increase capacity
+            and trainable parameters; lower values are cheaper. Defaults to `0.01`.
+        regularization_alpha (`float`):
+            Small positive value used while building the shared basis from base weights. It stabilizes the matrix
+            inverse when layers in the same category have correlated weights. Defaults to `1e-3`.
     """
 
     target_modules: Optional[Union[list[str], str]] = field(
@@ -126,15 +132,27 @@ class FRODConfig(PeftConfig):
             )
         },
     )
-    sparse_rate: float = field(default=0.01, metadata={"help": "Sparse rate"})
+    sparse_rate: float = field(
+        default=0.01,
+        metadata={
+            "help": (
+                "Fraction of off-diagonal entries in the sparse trainable rotation matrix. Higher values increase "
+                "capacity and trainable parameters; lower values are cheaper."
+            )
+        },
+    )
     regularization_alpha: float = field(
         default=1e-3,
         metadata={
-            "help": ("Regularization parameter used when building the shared FRoD basis."),
+            "help": (
+                "Small positive value used while building the shared basis from base weights. It stabilizes matrix "
+                "inverses for correlated layers."
+            ),
         },
     )
 
     def __post_init__(self):
+        super().__post_init__()
         self.peft_type = PeftType.FROD
         self.target_modules = (
             set(self.target_modules) if isinstance(self.target_modules, list) else self.target_modules
diff --git a/src/peft/tuners/frod/layer.py b/src/peft/tuners/frod/layer.py
index edaf2ea2ba..6feb0b0729 100644
--- a/src/peft/tuners/frod/layer.py
+++ b/src/peft/tuners/frod/layer.py
@@ -29,7 +29,7 @@
 
 
 class FRODLayer(BaseTunerLayer):
-    adapter_layer_names = ("frod_lambda_s_values", "frod_lambda_l")
+    adapter_layer_names = ("frod_lambda_l", "frod_lambda_s_values")
     other_param_names = ("frod_V", "frod_U", "frod_s_indices", "frod_s_size")
 
     def __init__(self, base_layer: nn.Module, **kwargs):
@@ -38,8 +38,8 @@ def __init__(self, base_layer: nn.Module, **kwargs):
         self.frod_dropout = nn.ModuleDict({})
 
         # Sparse S is parameterized by its COO values only.
-        self.frod_lambda_s_values = nn.ParameterDict({})
         self.frod_lambda_l = nn.ParameterDict({})
+        self.frod_lambda_s_values = nn.ParameterDict({})
 
         self.frod_s_indices: Optional[BufferDict] = None
         self.frod_s_size: Optional[BufferDict] = None
@@ -74,13 +74,12 @@ def update_layer(
         frod_dropout,
         init_weights,
     ):
-        weight = self.get_base_layer().weight
-        device = weight.device
-        dtype = weight.dtype
+        base_layer = self.get_base_layer()
+        weight = base_layer.weight.T if isinstance(base_layer, Conv1D) else base_layer.weight
+        device = base_layer.weight.device
+        dtype = base_layer.weight.dtype
 
         param_dtype = dtype
-        if device.type == "cuda" and dtype == torch.float32:
-            param_dtype = torch.float16
 
         self.r[adapter_name] = self.out_features
         if frod_dropout > 0.0:
@@ -115,6 +114,11 @@ def update_layer(
         self.frod_lambda_l[adapter_name] = nn.Parameter(L, requires_grad=True)
         if init_weights:
             self.reset_frod_parameters(adapter_name)
+        else:
+            # PEFT convention: init_weights=False should produce a non-identity adapter for merge tests.
+            with torch.no_grad():
+                nn.init.normal_(self.frod_lambda_s_values[adapter_name], std=0.02)
+                self.frod_lambda_l[adapter_name].add_(torch.randn_like(self.frod_lambda_l[adapter_name]) * 0.02)
 
         self.frod_U[adapter_name] = U.cpu()
         self._move_adapter_to_device_of_base_layer(adapter_name)
diff --git a/src/peft/tuners/frod/model.py b/src/peft/tuners/frod/model.py
index 6b148ce51e..2968e65416 100644
--- a/src/peft/tuners/frod/model.py
+++ b/src/peft/tuners/frod/model.py
@@ -73,6 +73,7 @@ def _projection_from_weights(matrices: list[np.ndarray], regularization_alpha: f
 
     dim = r_matrix.shape[1]
     t_pi = np.zeros((dim, dim), dtype=r_matrix.dtype)
+    # Layers of the same projection category can be highly correlated; this ridge term keeps the inverse stable.
     for q_slice in q_slices:
         q_term = q_slice.T @ q_slice + regularization_alpha * np.eye(dim, dtype=r_matrix.dtype)
         t_pi += np.linalg.inv(q_term)
@@ -144,6 +145,8 @@ def _init_frod_projections(self, config: FRODConfig, adapter_name: str) -> None:
             mask_indices = torch.stack([rows.flatten(), cols.flatten()], dim=1)
             non_diag_indices = mask_indices[mask_indices[:, 0] != mask_indices[:, 1]]
             nnz = min(int(in_dim * in_dim * config.sparse_rate), non_diag_indices.shape[0])
+            if (config.sparse_rate > 0) and (non_diag_indices.shape[0] > 0):
+                nnz = max(1, nnz)
             if nnz:
                 perm = torch.randperm(non_diag_indices.shape[0], generator=generator)[:nnz]
                 indices = non_diag_indices[perm].t().contiguous()
diff --git a/src/peft/utils/constants.py b/src/peft/utils/constants.py
index 9cf874225f..3ef9c0f80a 100644
--- a/src/peft/utils/constants.py
+++ b/src/peft/utils/constants.py
@@ -136,6 +136,7 @@ def starcoder_model_postprocess_past_key_value(past_key_values):
 TRANSFORMERS_MODELS_TO_VERA_TARGET_MODULES_MAPPING["phi"] = ["q_proj", "v_proj"]
 
 TRANSFORMERS_MODELS_TO_FROD_TARGET_MODULES_MAPPING = TRANSFORMERS_MODELS_TO_VERA_TARGET_MODULES_MAPPING.copy()
+TRANSFORMERS_MODELS_TO_FROD_TARGET_MODULES_MAPPING["vit"] = ["query", "value"]
 
 TRANSFORMERS_MODELS_TO_PVERA_TARGET_MODULES_MAPPING = TRANSFORMERS_MODELS_TO_VERA_TARGET_MODULES_MAPPING.copy()
 TRANSFORMERS_MODELS_TO_PVERA_TARGET_MODULES_MAPPING["dinov2"] = ["query", "value"]
diff --git a/tests/test_config.py b/tests/test_config.py
index 02cd0004ad..c9ee5894c0 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -30,6 +30,7 @@
     CartridgeConfig,
     CPTConfig,
     FourierFTConfig,
+    FRODConfig,
     GraloraConfig,
     HiraConfig,
     HRAConfig,
@@ -78,6 +79,7 @@ class TestingCommitHashError(Exception):
     (BOFTConfig, {}),
     (C3AConfig, {}),
     (FourierFTConfig, {}),
+    (FRODConfig, {}),
     (GraloraConfig, {}),
     (HiraConfig, {}),
     (HRAConfig, {}),
diff --git a/tests/test_decoder_models.py b/tests/test_decoder_models.py
index f43763a554..b6de666976 100644
--- a/tests/test_decoder_models.py
+++ b/tests/test_decoder_models.py
@@ -38,6 +38,7 @@
     CPTConfig,
     DeloraConfig,
     FourierFTConfig,
+    FRODConfig,
     GraloraConfig,
     HiraConfig,
     HRAConfig,
@@ -145,6 +146,14 @@
             "target_modules": None,
         },
     ),
+    (
+        FRODConfig,
+        {
+            "task_type": "CAUSAL_LM",
+            "target_modules": None,
+            "sparse_rate": 0.01,
+        },
+    ),
     (
         GraloraConfig,
         {
diff --git a/tests/test_encoder_decoder_models.py b/tests/test_encoder_decoder_models.py
index 6411d30b93..bca8a7d3bf 100644
--- a/tests/test_encoder_decoder_models.py
+++ b/tests/test_encoder_decoder_models.py
@@ -25,6 +25,7 @@
     C3AConfig,
     DeloraConfig,
     FourierFTConfig,
+    FRODConfig,
     GraloraConfig,
     HiraConfig,
     HRAConfig,
@@ -108,6 +109,14 @@
             "task_type": "SEQ_2_SEQ_LM",
         },
     ),
+    (
+        FRODConfig,
+        {
+            "target_modules": None,
+            "task_type": "SEQ_2_SEQ_LM",
+            "sparse_rate": 0.01,
+        },
+    ),
     (
         GraloraConfig,
         {
diff --git a/tests/test_feature_extraction_models.py b/tests/test_feature_extraction_models.py
index 90493f275c..bece31bf15 100644
--- a/tests/test_feature_extraction_models.py
+++ b/tests/test_feature_extraction_models.py
@@ -23,6 +23,7 @@
     C3AConfig,
     DeloraConfig,
     FourierFTConfig,
+    FRODConfig,
     GraloraConfig,
     HiraConfig,
     HRAConfig,
@@ -105,6 +106,14 @@
             "target_modules": None,
         },
     ),
+    (
+        FRODConfig,
+        {
+            "task_type": "FEATURE_EXTRACTION",
+            "target_modules": None,
+            "sparse_rate": 0.01,
+        },
+    ),
     (
         GraloraConfig,
         {
diff --git a/tests/test_seq_classifier.py b/tests/test_seq_classifier.py
index 613ca1b8c3..b08f0a9945 100644
--- a/tests/test_seq_classifier.py
+++ b/tests/test_seq_classifier.py
@@ -23,6 +23,7 @@
     C3AConfig,
     DeloraConfig,
     FourierFTConfig,
+    FRODConfig,
     GraloraConfig,
     HiraConfig,
     HRAConfig,
@@ -106,6 +107,14 @@
             "target_modules": None,
         },
     ),
+    (
+        FRODConfig,
+        {
+            "task_type": "SEQ_CLS",
+            "target_modules": None,
+            "sparse_rate": 0.01,
+        },
+    ),
     (
         GraloraConfig,
         {

From 0e0d816832535af125e1006e1f305e3dd6c7a2f4 Mon Sep 17 00:00:00 2001
From: Bane-Elvin <gawan@buaa.edu.cn>
Date: Thu, 28 May 2026 09:47:47 +0000
Subject: [PATCH 4/9] Address FROD PR review feedback

---
 docs/source/package_reference/frod.md         |  14 +--
 .../frod_image_classification.py              |   4 +-
 .../frod_text_classification.py               |   4 +-
 src/peft/__init__.py                          |   8 +-
 src/peft/tuners/__init__.py                   |   6 +-
 src/peft/tuners/frod/__init__.py              |  24 +++-
 src/peft/tuners/frod/config.py                |   6 +-
 src/peft/tuners/frod/layer.py                 | 117 +++++++-----------
 src/peft/tuners/frod/model.py                 |  62 +++++-----
 src/peft/utils/save_and_load.py               |   5 +-
 tests/test_config.py                          |   4 +-
 tests/test_custom_models.py                   |  25 ++--
 tests/test_decoder_models.py                  |   4 +-
 tests/test_encoder_decoder_models.py          |   4 +-
 tests/test_feature_extraction_models.py       |   4 +-
 tests/test_frod.py                            |  60 ++-------
 tests/test_initialization.py                  |  66 ++++++++++
 tests/test_seq_classifier.py                  |   4 +-
 18 files changed, 222 insertions(+), 199 deletions(-)

diff --git a/docs/source/package_reference/frod.md b/docs/source/package_reference/frod.md
index b2c146fe0f..5369494d09 100644
--- a/docs/source/package_reference/frod.md
+++ b/docs/source/package_reference/frod.md
@@ -23,7 +23,7 @@ allows FRoD to apply full-rank updates while keeping the number of trained param
 Paper: [Full-Rank Efficient Fine-Tuning with Rotational Degrees](https://doi.org/10.1609/aaai.v40i31.39813).
 
 When saving the adapter parameters, it is possible to avoid storing the projection tensors by setting
-`save_projection=False` on the `FRODConfig`. In that case, the projections are restored from the base model weights and
+`save_projection=False` on the `FrodConfig`. In that case, the projections are restored from the base model weights and
 the fixed random seed from `projection_prng_key`. This reduces checkpoint size, but the default is
 `save_projection=True` to make checkpoint loading independent of regeneration details.
 
@@ -41,11 +41,11 @@ FRoD currently has the following constraint:
 ```python
 from transformers import AutoModelForSequenceClassification
 
-from peft import FRODConfig, TaskType, get_peft_model
+from peft import FrodConfig, TaskType, get_peft_model
 
 model = AutoModelForSequenceClassification.from_pretrained("google-bert/bert-base-uncased", num_labels=2)
 
-peft_config = FRODConfig(
+peft_config = FrodConfig(
     task_type=TaskType.SEQ_CLS,
     target_modules=["query", "value"],
     modules_to_save=["classifier"],
@@ -57,10 +57,10 @@ model = get_peft_model(model, peft_config)
 model.print_trainable_parameters()
 ```
 
-## FRODConfig
+## FrodConfig
 
-[[autodoc]] tuners.frod.config.FRODConfig
+[[autodoc]] tuners.frod.config.FrodConfig
 
-## FRODModel
+## FrodModel
 
-[[autodoc]] tuners.frod.model.FRODModel
+[[autodoc]] tuners.frod.model.FrodModel
diff --git a/examples/frod_finetuning/frod_image_classification.py b/examples/frod_finetuning/frod_image_classification.py
index 3f61f52f32..24e6c345c5 100644
--- a/examples/frod_finetuning/frod_image_classification.py
+++ b/examples/frod_finetuning/frod_image_classification.py
@@ -9,7 +9,7 @@
 from datasets import load_dataset
 from transformers import AutoImageProcessor, AutoModelForImageClassification, Trainer, TrainingArguments
 
-from peft import FRODConfig, get_peft_model
+from peft import FrodConfig, get_peft_model
 
 
 MODEL_NAME = os.environ.get("FROD_IMAGE_MODEL_NAME", "google/vit-base-patch16-224")
@@ -61,7 +61,7 @@ def main():
         label2id=label2id,
         ignore_mismatched_sizes=True,
     )
-    peft_config = FRODConfig(
+    peft_config = FrodConfig(
         target_modules=["query", "value"],
         modules_to_save=["classifier"],
         frod_dropout=0.0,
diff --git a/examples/frod_finetuning/frod_text_classification.py b/examples/frod_finetuning/frod_text_classification.py
index 105beda9b9..afa801a236 100644
--- a/examples/frod_finetuning/frod_text_classification.py
+++ b/examples/frod_finetuning/frod_text_classification.py
@@ -13,7 +13,7 @@
     TrainingArguments,
 )
 
-from peft import FRODConfig, TaskType, get_peft_model
+from peft import FrodConfig, TaskType, get_peft_model
 
 
 MODEL_NAME = "google-bert/bert-base-uncased"
@@ -36,7 +36,7 @@ def preprocess(batch):
     tokenized = tokenized.rename_column("label", "labels")
 
     model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=2)
-    peft_config = FRODConfig(
+    peft_config = FrodConfig(
         task_type=TaskType.SEQ_CLS,
         target_modules=["query", "value"],
         modules_to_save=["classifier"],
diff --git a/src/peft/__init__.py b/src/peft/__init__.py
index df185d9d76..078e322289 100644
--- a/src/peft/__init__.py
+++ b/src/peft/__init__.py
@@ -70,8 +70,8 @@
     EvaConfig,
     FourierFTConfig,
     FourierFTModel,
-    FRODConfig,
-    FRODModel,
+    FrodConfig,
+    FrodModel,
     GraloraConfig,
     GraloraModel,
     HiraConfig,
@@ -200,10 +200,10 @@
     "DeloraConfig",
     "DeloraModel",
     "EvaConfig",
-    "FRODConfig",
-    "FRODModel",
     "FourierFTConfig",
     "FourierFTModel",
+    "FrodConfig",
+    "FrodModel",
     "GraloraConfig",
     "GraloraModel",
     "HRAConfig",
diff --git a/src/peft/tuners/__init__.py b/src/peft/tuners/__init__.py
index 9ff82ea725..22908b57a9 100644
--- a/src/peft/tuners/__init__.py
+++ b/src/peft/tuners/__init__.py
@@ -22,7 +22,7 @@
 from .cpt import CPTConfig, CPTEmbedding
 from .delora import DeloraConfig, DeloraModel
 from .fourierft import FourierFTConfig, FourierFTModel
-from .frod import FRODConfig, FRODModel
+from .frod import FrodConfig, FrodModel
 from .gralora import GraloraConfig, GraloraModel
 from .hira import HiraConfig, HiraModel
 from .hra import HRAConfig, HRAModel
@@ -94,10 +94,10 @@
     "DeloraConfig",
     "DeloraModel",
     "EvaConfig",
-    "FRODConfig",
-    "FRODModel",
     "FourierFTConfig",
     "FourierFTModel",
+    "FrodConfig",
+    "FrodModel",
     "GraloraConfig",
     "GraloraModel",
     "HRAConfig",
diff --git a/src/peft/tuners/frod/__init__.py b/src/peft/tuners/frod/__init__.py
index 3be7152c07..b26001a796 100644
--- a/src/peft/tuners/frod/__init__.py
+++ b/src/peft/tuners/frod/__init__.py
@@ -1,10 +1,24 @@
+# Copyright 2026-present the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from peft.utils import register_peft_method
 
-from .config import FRODConfig
-from .layer import FRODLayer, Linear
-from .model import FRODModel
+from .config import FrodConfig
+from .layer import FrodLayer, Linear
+from .model import FrodModel
 
 
-__all__ = ["FRODConfig", "FRODLayer", "FRODModel", "Linear"]
+__all__ = ["FrodConfig", "FrodLayer", "FrodModel", "Linear"]
 
-register_peft_method(name="frod", config_cls=FRODConfig, model_cls=FRODModel, prefix="frod_")
+register_peft_method(name="frod", config_cls=FrodConfig, model_cls=FrodModel, prefix="frod_")
diff --git a/src/peft/tuners/frod/config.py b/src/peft/tuners/frod/config.py
index 2e5b724768..f581108a89 100644
--- a/src/peft/tuners/frod/config.py
+++ b/src/peft/tuners/frod/config.py
@@ -1,4 +1,4 @@
-# Copyright 2023-present the HuggingFace Inc. team.
+# Copyright 2026-present the HuggingFace Inc. team.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -21,9 +21,9 @@
 
 
 @dataclass
-class FRODConfig(PeftConfig):
+class FrodConfig(PeftConfig):
     """
-    This is the configuration class to store the configuration of a [`FRODModel`].
+    This is the configuration class to store the configuration of a [`FrodModel`].
 
     Paper: https://doi.org/10.1609/aaai.v40i31.39813.
 
diff --git a/src/peft/tuners/frod/layer.py b/src/peft/tuners/frod/layer.py
index 6feb0b0729..1289e7c9c8 100644
--- a/src/peft/tuners/frod/layer.py
+++ b/src/peft/tuners/frod/layer.py
@@ -1,4 +1,4 @@
-# Copyright 2023-present the HuggingFace Inc. team.
+# Copyright 2026-present the HuggingFace Inc. team.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -15,20 +15,18 @@
 import warnings
 from typing import Optional
 
-import numpy as np
 import torch
 import torch.nn.functional as F
-from numpy.linalg import inv
 from torch import nn
-from transformers.pytorch_utils import Conv1D
 
-from peft.tuners.tuners_utils import BaseTunerLayer, check_adapters_to_merge
+from peft.tuners.tuners_utils import BaseTunerLayer, _get_in_out_features, check_adapters_to_merge
 from peft.utils.other import transpose
 
 from .._buffer_dict import BufferDict
+from .config import FrodConfig
 
 
-class FRODLayer(BaseTunerLayer):
+class FrodLayer(BaseTunerLayer):
     adapter_layer_names = ("frod_lambda_l", "frod_lambda_s_values")
     other_param_names = ("frod_V", "frod_U", "frod_s_indices", "frod_s_size")
 
@@ -41,76 +39,64 @@ def __init__(self, base_layer: nn.Module, **kwargs):
         self.frod_lambda_l = nn.ParameterDict({})
         self.frod_lambda_s_values = nn.ParameterDict({})
 
-        self.frod_s_indices: Optional[BufferDict] = None
-        self.frod_s_size: Optional[BufferDict] = None
-        self.frod_V: Optional[BufferDict] = None
+        self.frod_s_indices: BufferDict = BufferDict({}, persistent=False)
+        self.frod_s_size: BufferDict = BufferDict({}, persistent=False)
+        self.frod_V: BufferDict = BufferDict({}, persistent=False)
         self.frod_U: BufferDict = BufferDict({}, persistent=False)
 
         self._disable_adapters = False
         self.merged_adapters = []
 
-        base_layer = self.get_base_layer()
-        if isinstance(base_layer, nn.Linear):
-            in_features, out_features = base_layer.in_features, base_layer.out_features
-        elif isinstance(base_layer, Conv1D):
-            in_features, out_features = (
-                base_layer.weight.ds_shape if hasattr(base_layer.weight, "ds_shape") else base_layer.weight.shape
-            )
-
-        self.in_features = in_features
-        self.out_features = out_features
+        self.in_features, self.out_features = _get_in_out_features(self.get_base_layer())
         self.kwargs = kwargs
 
-    @property
-    def merged(self) -> bool:
-        return bool(self.merged_adapters)
-
     def update_layer(
         self,
         adapter_name,
         frod_V: BufferDict,
         frod_s_indices: BufferDict,
         frod_s_size: BufferDict,
-        frod_dropout,
-        init_weights,
+        config: FrodConfig,
     ):
+        frod_dropout = config.frod_dropout
+        init_weights = config.init_weights
         base_layer = self.get_base_layer()
-        weight = base_layer.weight.T if isinstance(base_layer, Conv1D) else base_layer.weight
+        weight = transpose(base_layer.weight, self.fan_in_fan_out)
         device = base_layer.weight.device
         dtype = base_layer.weight.dtype
 
-        param_dtype = dtype
-
         self.r[adapter_name] = self.out_features
         if frod_dropout > 0.0:
             frod_dropout_layer = nn.Dropout(p=frod_dropout)
         else:
             frod_dropout_layer = nn.Identity()
 
-        self.frod_dropout.update(nn.ModuleDict({adapter_name: frod_dropout_layer}))
+        self.frod_dropout[adapter_name] = frod_dropout_layer
 
+        if frod_V is None or frod_s_indices is None or frod_s_size is None:
+            raise ValueError("The FRoD projection buffers are missing. This should not happen.")
         if adapter_name not in frod_V:
-            if not frod_V:
-                raise ValueError("The FRoD projection buffers are empty. This should not happen.")
-            frod_V[adapter_name] = next(iter(frod_V.values()))
-            frod_s_indices[adapter_name] = next(iter(frod_s_indices.values()))
-            frod_s_size[adapter_name] = next(iter(frod_s_size.values()))
+            # FRoD projection buffers are shared across adapters for the same module category.
+            reference_adapter = next(iter(frod_V))
+            frod_V[adapter_name] = frod_V[reference_adapter]
+            frod_s_indices[adapter_name] = frod_s_indices[reference_adapter]
+            frod_s_size[adapter_name] = frod_s_size[reference_adapter]
 
         nnz = frod_s_indices[adapter_name].shape[1]
-        self.frod_lambda_s_values[adapter_name] = nn.Parameter(torch.zeros(nnz, device=device, dtype=param_dtype))
+        self.frod_lambda_s_values[adapter_name] = nn.Parameter(torch.zeros(nnz, device=device, dtype=dtype))
 
-        self.__dict__["frod_V"] = frod_V
-        self.__dict__["frod_s_indices"] = frod_s_indices
-        self.__dict__["frod_s_size"] = frod_s_size
+        self.frod_V[adapter_name] = frod_V[adapter_name]
+        self.frod_s_indices[adapter_name] = frod_s_indices[adapter_name]
+        self.frod_s_size[adapter_name] = frod_s_size[adapter_name]
 
         # Keep cached projections on CPU and move them lazily in forward.
-        self.frod_V[adapter_name] = self.frod_V[adapter_name].to(dtype=param_dtype, device="cpu")
+        self.frod_V[adapter_name] = self.frod_V[adapter_name].to(dtype=dtype, device="cpu")
         self.frod_s_indices[adapter_name] = self.frod_s_indices[adapter_name].to(device="cpu", dtype=torch.long)
         self.frod_s_size[adapter_name] = self.frod_s_size[adapter_name].to(device="cpu", dtype=torch.long)
 
         U, L = self._calculate_frod_u_and_lambda(self.frod_V[adapter_name], weight)
-        U = U.to(param_dtype)
-        L = L.to(device=device, dtype=param_dtype)
+        U = U.to(dtype)
+        L = L.to(device=device, dtype=dtype)
         self.frod_lambda_l[adapter_name] = nn.Parameter(L, requires_grad=True)
         if init_weights:
             self.reset_frod_parameters(adapter_name)
@@ -125,18 +111,18 @@ def update_layer(
         self.set_adapter(self.active_adapters)
 
     def _calculate_frod_u_and_lambda(self, V, W):
-        w = W.detach().to(torch.float32).cpu().numpy()
-        v = V.detach().to(torch.float32).cpu().numpy()
+        w = W.detach().to(torch.float32).cpu()
+        v = V.detach().to(torch.float32).cpu()
         try:
-            v_inv_T = inv(v).T
-        except np.linalg.LinAlgError:
-            v_inv_T = np.linalg.pinv(v, rcond=1e-6).T
-        Bi = w @ v_inv_T
-        lambda_l = np.linalg.norm(Bi, axis=0)
-        u = np.divide(Bi, lambda_l, out=np.zeros_like(Bi), where=lambda_l > 1e-8)
-        U = torch.from_numpy(u).float()
-        L = torch.from_numpy(lambda_l).float()
-        return U, L
+            v_inv_T = torch.linalg.inv(v).T
+        except RuntimeError:
+            v_inv_T = torch.linalg.pinv(v, rtol=1e-6).T
+        bi = w @ v_inv_T
+        lambda_l = torch.linalg.norm(bi, dim=0)
+        u = torch.zeros_like(bi)
+        nonzero = lambda_l > 1e-8
+        u[:, nonzero] = bi[:, nonzero] / lambda_l[nonzero]
+        return u.float(), lambda_l.float()
 
     def reset_frod_parameters(self, adapter_name):
         if adapter_name in self.frod_lambda_s_values:
@@ -147,7 +133,7 @@ def reset_frod_parameters(self, adapter_name):
                 nn.init.zeros_(self.frod_lambda_l[adapter_name])
 
 
-class Linear(nn.Linear, FRODLayer):
+class Linear(nn.Linear, FrodLayer):
     def __init__(
         self,
         base_layer,
@@ -155,18 +141,16 @@ def __init__(
         frod_s_indices: BufferDict,
         frod_s_size: BufferDict,
         adapter_name: str,
-        frod_dropout: float = 0.0,
-        fan_in_fan_out: bool = False,
+        config: FrodConfig,
         is_target_conv_1d_layer: bool = False,
-        init_weights: bool = True,
         **kwargs,
     ) -> None:
         super(nn.Linear, self).__init__()
-        FRODLayer.__init__(self, base_layer, **kwargs)
-        self.fan_in_fan_out = fan_in_fan_out
+        FrodLayer.__init__(self, base_layer, **kwargs)
+        self.fan_in_fan_out = config.fan_in_fan_out
 
         self._active_adapter = adapter_name
-        self.update_layer(adapter_name, frod_V, frod_s_indices, frod_s_size, frod_dropout, init_weights)
+        self.update_layer(adapter_name, frod_V, frod_s_indices, frod_s_size, config=config)
         self.is_target_conv_1d_layer = is_target_conv_1d_layer
 
     def merge(self, safe_merge: bool = False, adapter_names: Optional[list[str]] = None) -> None:
@@ -206,11 +190,7 @@ def get_delta_weight(self, adapter) -> torch.Tensor:
         U = self.frod_U[adapter].to(device=device, dtype=dtype)
         V = self.frod_V[adapter].to(device=device, dtype=dtype)
         indices = self.frod_s_indices[adapter].to(device=U.device, dtype=torch.long)
-        size_tensor = self.frod_s_size[adapter]
-        if isinstance(size_tensor, torch.Tensor):
-            size = tuple(int(dim) for dim in size_tensor.tolist())
-        else:
-            size = tuple(int(dim) for dim in size_tensor)
+        size = tuple(int(dim) for dim in self.frod_s_size[adapter].tolist())
         values = self.frod_lambda_s_values[adapter].to(U.device, U.dtype).clone()
         lambda_l = self.frod_lambda_l[adapter].to(device=U.device, dtype=U.dtype)
 
@@ -239,11 +219,7 @@ def forward(self, x: torch.Tensor, *args, **kwargs) -> torch.Tensor:
                 V = self.frod_V[active_adapter].to(device=x.device, dtype=target_dtype)
                 U = self.frod_U[active_adapter].to(device=x.device, dtype=target_dtype)
                 indices = self.frod_s_indices[active_adapter].to(device=x.device, dtype=torch.long)
-                size_tensor = self.frod_s_size[active_adapter]
-                if isinstance(size_tensor, torch.Tensor):
-                    size = tuple(int(dim) for dim in size_tensor.tolist())
-                else:
-                    size = tuple(int(dim) for dim in size_tensor)
+                size = tuple(int(dim) for dim in self.frod_s_size[active_adapter].tolist())
                 values = self.frod_lambda_s_values[active_adapter].to(device=x.device, dtype=target_dtype)
                 lambda_l = self.frod_lambda_l[active_adapter].to(device=x.device, dtype=target_dtype)
 
@@ -254,6 +230,8 @@ def forward(self, x: torch.Tensor, *args, **kwargs) -> torch.Tensor:
                 h_flat = h.reshape(-1, h.shape[-1])
                 z_flat = torch.matmul(h_flat, V)
 
+                # This block computes the sparse FRoD update z @ S with torch.sparse.mm.
+                # CUDA sparse fp16/bf16 kernels are less reliable, so use fp32 here and cast the update back below.
                 matmul_dtype = z_flat.dtype
                 if z_flat.is_cuda and matmul_dtype in (torch.float16, torch.bfloat16):
                     matmul_dtype = torch.float32
@@ -279,6 +257,7 @@ def forward(self, x: torch.Tensor, *args, **kwargs) -> torch.Tensor:
         return result
 
     def __repr__(self) -> str:
+        # Match PEFT tuner convention so printed models show FRoD-wrapped layers as `frod.*`.
         rep = super().__repr__()
         return "frod." + rep
 
diff --git a/src/peft/tuners/frod/model.py b/src/peft/tuners/frod/model.py
index 2968e65416..5d3dd5c624 100644
--- a/src/peft/tuners/frod/model.py
+++ b/src/peft/tuners/frod/model.py
@@ -17,9 +17,7 @@
 import warnings
 from collections import defaultdict
 
-import numpy as np
 import torch
-from numpy.linalg import qr
 from torch import nn
 from transformers.pytorch_utils import Conv1D
 
@@ -28,8 +26,8 @@
 
 from .._buffer_dict import BufferDict
 from ..tuners_utils import _maybe_include_all_linear_layers
-from .config import FRODConfig
-from .layer import FRODLayer, Linear
+from .config import FrodConfig
+from .layer import FrodLayer, Linear
 
 
 def _category_from_key(key: str) -> str:
@@ -57,13 +55,13 @@ def _layer_index_from_key(key: str, fallback: int) -> int:
     return fallback
 
 
-def _projection_from_weights(matrices: list[np.ndarray], regularization_alpha: float) -> np.ndarray:
-    stacked = np.vstack(matrices)
+def _projection_from_weights(matrices: list[torch.Tensor], regularization_alpha: float) -> torch.Tensor:
+    stacked = torch.cat(matrices, dim=0)
     if stacked.shape[0] < stacked.shape[1]:
-        _, _, vh = np.linalg.svd(stacked, full_matrices=True)
+        _, _, vh = torch.linalg.svd(stacked, full_matrices=True)
         return vh.T
 
-    q_matrix, r_matrix = qr(stacked)
+    q_matrix, r_matrix = torch.linalg.qr(stacked)
     q_slices = []
     start = 0
     for matrix in matrices:
@@ -72,23 +70,23 @@ def _projection_from_weights(matrices: list[np.ndarray], regularization_alpha: f
         start += rows
 
     dim = r_matrix.shape[1]
-    t_pi = np.zeros((dim, dim), dtype=r_matrix.dtype)
+    t_pi = torch.zeros((dim, dim), dtype=r_matrix.dtype)
     # Layers of the same projection category can be highly correlated; this ridge term keeps the inverse stable.
     for q_slice in q_slices:
-        q_term = q_slice.T @ q_slice + regularization_alpha * np.eye(dim, dtype=r_matrix.dtype)
-        t_pi += np.linalg.inv(q_term)
+        q_term = q_slice.T @ q_slice + regularization_alpha * torch.eye(dim, dtype=r_matrix.dtype)
+        t_pi += torch.linalg.inv(q_term)
     t_pi /= len(q_slices)
 
-    _, eigenvectors = np.linalg.eigh(t_pi)
+    _, eigenvectors = torch.linalg.eigh(t_pi)
     return r_matrix.T @ eigenvectors
 
 
-class FRODModel(BaseTuner):
+class FrodModel(BaseTuner):
     prefix: str = "frod_"
-    tuner_layer_cls = FRODLayer
+    tuner_layer_cls = FrodLayer
     target_module_mapping = TRANSFORMERS_MODELS_TO_FROD_TARGET_MODULES_MAPPING
 
-    def _init_frod_projections(self, config: FRODConfig, adapter_name: str) -> None:
+    def _init_frod_projections(self, config: FrodConfig, adapter_name: str) -> None:
         weights = defaultdict(dict)
         model_config = self.get_model_config(self.model)
         peft_config = self._prepare_adapter_config(config, model_config)
@@ -116,6 +114,8 @@ def _init_frod_projections(self, config: FRODConfig, adapter_name: str) -> None:
                 "No layer types compatible with FRoD were found. Please check `peft_config.target_modules`."
             )
 
+        # BaseTuner.__init__() enters the pre-injection flow before a FrodModel subclass
+        # could assign ModuleDicts after super().__init__(), so create these containers lazily here.
         if not hasattr(self, "frod_V"):
             self.frod_V = nn.ModuleDict()
             self.frod_s_indices = nn.ModuleDict()
@@ -125,7 +125,7 @@ def _init_frod_projections(self, config: FRODConfig, adapter_name: str) -> None:
         categories = {category for layer_dict in weights.values() for category in layer_dict}
         for category in sorted(categories):
             matrices = [
-                layer_dict[category].detach().to(torch.float32).cpu().numpy()
+                layer_dict[category].detach().to(torch.float32).cpu()
                 for _, layer_dict in sorted(weights.items())
                 if category in layer_dict
             ]
@@ -134,7 +134,7 @@ def _init_frod_projections(self, config: FRODConfig, adapter_name: str) -> None:
 
             v_matrix = _projection_from_weights(matrices, config.regularization_alpha)
             example_weight = next(layer_dict[category] for layer_dict in weights.values() if category in layer_dict)
-            v_tensor = torch.from_numpy(v_matrix).to(dtype=example_weight.dtype, device="cpu")
+            v_tensor = v_matrix.to(dtype=example_weight.dtype, device="cpu")
 
             if category not in self.frod_V:
                 self.frod_V[category] = BufferDict({}, persistent=config.save_projection)
@@ -161,10 +161,10 @@ def _init_frod_projections(self, config: FRODConfig, adapter_name: str) -> None:
                 self.frod_s_size[category] = BufferDict({}, persistent=config.save_projection)
             self.frod_s_size[category][adapter_name] = size
 
-    def _pre_injection_hook(self, model: nn.Module, config: FRODConfig, adapter_name: str) -> None:
+    def _pre_injection_hook(self, model: nn.Module, config: FrodConfig, adapter_name: str) -> None:
         self._init_frod_projections(config, adapter_name)
 
-    def _check_new_adapter_config(self, config: FRODConfig) -> None:
+    def _check_new_adapter_config(self, config: FrodConfig) -> None:
         super()._check_new_adapter_config(config)
 
         for existing_config in self.peft_config.values():
@@ -186,7 +186,7 @@ def _check_new_adapter_config(self, config: FRODConfig) -> None:
 
     def _create_and_replace(
         self,
-        frod_config,
+        frod_config: FrodConfig,
         adapter_name,
         target,
         target_name,
@@ -201,12 +201,6 @@ def _create_and_replace(
         if category not in self.frod_V:
             self._init_frod_projections(frod_config, adapter_name)
         bias = hasattr(target, "bias") and target.bias is not None
-        kwargs = {
-            "frod_dropout": frod_config.frod_dropout,
-            "fan_in_fan_out": frod_config.fan_in_fan_out,
-            "init_weights": frod_config.init_weights,
-            "bias": bias,
-        }
 
         if isinstance(target, Linear):
             target.update_layer(
@@ -214,8 +208,7 @@ def _create_and_replace(
                 self.frod_V[category],
                 self.frod_s_indices[category],
                 self.frod_s_size[category],
-                frod_config.frod_dropout,
-                frod_config.init_weights,
+                config=frod_config,
             )
         else:
             new_module = self._create_new_module(
@@ -225,7 +218,7 @@ def _create_and_replace(
                 self.frod_s_size[category],
                 adapter_name,
                 target,
-                **kwargs,
+                bias=bias,
             )
             if adapter_name not in self.active_adapters:
                 new_module.requires_grad_(False)
@@ -233,7 +226,7 @@ def _create_and_replace(
 
     @staticmethod
     def _create_new_module(
-        frod_config,
+        frod_config: FrodConfig,
         frod_V,
         frod_s_indices,
         frod_s_size,
@@ -249,19 +242,19 @@ def _create_new_module(
             target_base_layer = target
 
         if isinstance(target_base_layer, torch.nn.Linear):
-            if kwargs["fan_in_fan_out"]:
+            if frod_config.fan_in_fan_out:
                 warnings.warn(
                     "fan_in_fan_out is set to True but the target module is `torch.nn.Linear`. "
                     "Setting fan_in_fan_out to False."
                 )
-                kwargs["fan_in_fan_out"] = frod_config.fan_in_fan_out = False
+                frod_config.fan_in_fan_out = False
         elif isinstance(target_base_layer, Conv1D):
             kwargs["is_target_conv_1d_layer"] = True
-            if not kwargs["fan_in_fan_out"]:
+            if not frod_config.fan_in_fan_out:
                 warnings.warn(
                     "fan_in_fan_out is set to False but the target module is `Conv1D`. Setting fan_in_fan_out to True."
                 )
-                kwargs["fan_in_fan_out"] = frod_config.fan_in_fan_out = True
+                frod_config.fan_in_fan_out = True
         else:
             raise TypeError(
                 f"Target module {target} is not supported. Currently, only the following modules are supported: "
@@ -274,6 +267,7 @@ def _create_new_module(
             frod_s_indices,
             frod_s_size,
             adapter_name,
+            config=frod_config,
             bias=bias,
             **kwargs,
         )
diff --git a/src/peft/utils/save_and_load.py b/src/peft/utils/save_and_load.py
index 65cd7bf502..a2148f28a9 100644
--- a/src/peft/utils/save_and_load.py
+++ b/src/peft/utils/save_and_load.py
@@ -829,7 +829,10 @@ def set_peft_model_state_dict(
             )
             if config.save_projection and not has_projection:
                 raise ValueError(
-                    "Specified to load FRoD projection tensors from state dictionary however they were not present!"
+                    "Specified to load FRoD projection tensors from state dictionary however they were not present. "
+                    "If this checkpoint was saved with `save_projection=False`, set `peft_config.save_projection` "
+                    "to `False` before loading so the projections are regenerated from the base model weights. "
+                    "Otherwise, re-save the adapter with `save_projection=True` to include these tensors."
                 )
             elif not config.save_projection and has_projection:
                 warnings.warn(
diff --git a/tests/test_config.py b/tests/test_config.py
index c9ee5894c0..b7a101a744 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -30,7 +30,7 @@
     CartridgeConfig,
     CPTConfig,
     FourierFTConfig,
-    FRODConfig,
+    FrodConfig,
     GraloraConfig,
     HiraConfig,
     HRAConfig,
@@ -79,7 +79,7 @@ class TestingCommitHashError(Exception):
     (BOFTConfig, {}),
     (C3AConfig, {}),
     (FourierFTConfig, {}),
-    (FRODConfig, {}),
+    (FrodConfig, {}),
     (GraloraConfig, {}),
     (HiraConfig, {}),
     (HRAConfig, {}),
diff --git a/tests/test_custom_models.py b/tests/test_custom_models.py
index 92811b85e0..358ea319b7 100644
--- a/tests/test_custom_models.py
+++ b/tests/test_custom_models.py
@@ -37,7 +37,7 @@
     C3AConfig,
     DeloraConfig,
     FourierFTConfig,
-    FRODConfig,
+    FrodConfig,
     GraloraConfig,
     HiraConfig,
     HRAConfig,
@@ -874,11 +874,11 @@
     ########
     # FRoD #
     ########
-    ("Vanilla MLP 1 FRoD", "MLP", FRODConfig, {"target_modules": "lin0"}),
-    ("Vanilla MLP 2 FRoD", "MLP", FRODConfig, {"target_modules": ["lin0"]}),
-    ("Vanilla MLP 3 FRoD", "MLP", FRODConfig, {"target_modules": ["lin1"]}),
-    ("Vanilla MLP 4 FRoD", "MLP", FRODConfig, {"target_modules": ["lin0", "lin1"]}),
-    ("Vanilla MLP 5 FRoD", "MLP", FRODConfig, {"target_modules": ["lin0"], "modules_to_save": ["lin1"]}),
+    ("Vanilla MLP 1 FRoD", "MLP", FrodConfig, {"target_modules": "lin0"}),
+    ("Vanilla MLP 2 FRoD", "MLP", FrodConfig, {"target_modules": ["lin0"]}),
+    ("Vanilla MLP 3 FRoD", "MLP", FrodConfig, {"target_modules": ["lin1"]}),
+    ("Vanilla MLP 4 FRoD", "MLP", FrodConfig, {"target_modules": ["lin0", "lin1"]}),
+    ("Vanilla MLP 5 FRoD", "MLP", FrodConfig, {"target_modules": ["lin0"], "modules_to_save": ["lin1"]}),
     #######
     # C3A #
     #######
@@ -1399,6 +1399,13 @@
         {"target_modules": ["lin0"], "init_weights": False},
         {"target_modules": ["lin0"], "init_weights": False},
     ),
+    (
+        "FRoD Same",
+        "frod",
+        FrodConfig,
+        {"target_modules": ["lin0"], "init_weights": False},
+        {"target_modules": ["lin0"], "init_weights": False},
+    ),
     # Note: PVeRA presents the same problem mentioned above for VeRA.
     (
         "PVeRA Same",
@@ -3465,7 +3472,7 @@ def test_multiple_adapters_automatic_modules_to_save(self):
 
     @pytest.mark.parametrize(
         "config_cls",
-        [IA3Config, BeftConfig, FRODConfig, LoHaConfig, LoKrConfig, LoraConfig, HRAConfig, ShiraConfig, MissConfig],
+        [IA3Config, BeftConfig, FrodConfig, LoHaConfig, LoKrConfig, LoraConfig, HRAConfig, ShiraConfig, MissConfig],
     )
     def test_multiple_adapters_mixed_modules_to_save(self, config_cls):
         # See issue 1574
@@ -3498,7 +3505,7 @@ def test_multiple_adapters_mixed_modules_to_save(self, config_cls):
 
     @pytest.mark.parametrize(
         "config_cls",
-        [IA3Config, BeftConfig, FRODConfig, LoHaConfig, LoKrConfig, LoraConfig, HRAConfig, ShiraConfig],
+        [IA3Config, BeftConfig, FrodConfig, LoHaConfig, LoKrConfig, LoraConfig, HRAConfig, ShiraConfig],
     )
     def test_multiple_adapters_mixed_modules_to_save_order_switched(self, config_cls):
         # See issue 1574
@@ -3841,7 +3848,7 @@ def test_load_resized_embedding_ignore_mismatched_sizes(self):
             AdaLoraConfig(target_modules=["lin0"], init_lora_weights=False, total_step=1),
             IA3Config(target_modules=["lin0"], feedforward_modules=["lin0"], init_ia3_weights=False),
             BeftConfig(target_modules=["lin0"], init_weights=False),
-            FRODConfig(target_modules=["lin0"], init_weights=False),
+            FrodConfig(target_modules=["lin0"], init_weights=False),
             OFTConfig(target_modules=["lin0"], init_weights=False, r=2, oft_block_size=0),
             BOFTConfig(target_modules=["lin0"], init_weights=False, boft_block_size=2),
             HRAConfig(target_modules=["lin0"], init_weights=False),
diff --git a/tests/test_decoder_models.py b/tests/test_decoder_models.py
index b6de666976..c1e4050e31 100644
--- a/tests/test_decoder_models.py
+++ b/tests/test_decoder_models.py
@@ -38,7 +38,7 @@
     CPTConfig,
     DeloraConfig,
     FourierFTConfig,
-    FRODConfig,
+    FrodConfig,
     GraloraConfig,
     HiraConfig,
     HRAConfig,
@@ -147,7 +147,7 @@
         },
     ),
     (
-        FRODConfig,
+        FrodConfig,
         {
             "task_type": "CAUSAL_LM",
             "target_modules": None,
diff --git a/tests/test_encoder_decoder_models.py b/tests/test_encoder_decoder_models.py
index bca8a7d3bf..603abbc495 100644
--- a/tests/test_encoder_decoder_models.py
+++ b/tests/test_encoder_decoder_models.py
@@ -25,7 +25,7 @@
     C3AConfig,
     DeloraConfig,
     FourierFTConfig,
-    FRODConfig,
+    FrodConfig,
     GraloraConfig,
     HiraConfig,
     HRAConfig,
@@ -110,7 +110,7 @@
         },
     ),
     (
-        FRODConfig,
+        FrodConfig,
         {
             "target_modules": None,
             "task_type": "SEQ_2_SEQ_LM",
diff --git a/tests/test_feature_extraction_models.py b/tests/test_feature_extraction_models.py
index bece31bf15..fe9450ca2f 100644
--- a/tests/test_feature_extraction_models.py
+++ b/tests/test_feature_extraction_models.py
@@ -23,7 +23,7 @@
     C3AConfig,
     DeloraConfig,
     FourierFTConfig,
-    FRODConfig,
+    FrodConfig,
     GraloraConfig,
     HiraConfig,
     HRAConfig,
@@ -107,7 +107,7 @@
         },
     ),
     (
-        FRODConfig,
+        FrodConfig,
         {
             "task_type": "FEATURE_EXTRACTION",
             "target_modules": None,
diff --git a/tests/test_frod.py b/tests/test_frod.py
index c21b3040f6..19bd494c39 100644
--- a/tests/test_frod.py
+++ b/tests/test_frod.py
@@ -22,7 +22,7 @@
 from safetensors import safe_open
 from torch import nn
 
-from peft import FRODConfig, PeftModel, get_peft_model
+from peft import FrodConfig, PeftModel, get_peft_model
 
 
 class MLP(nn.Module):
@@ -47,7 +47,7 @@ def forward(self, X):
         return X
 
 
-class TestFROD:
+class TestFrod:
     @pytest.fixture
     def mlp(self):
         torch.manual_seed(0)
@@ -58,59 +58,19 @@ def mlp(self):
     def mlp_same_prng(self, mlp):
         torch.manual_seed(0)
 
-        config = FRODConfig(target_modules=["lin1", "lin2"], init_weights=False)
+        config = FrodConfig(target_modules=["lin1", "lin2"], init_weights=False)
         peft_model = get_peft_model(mlp, config)
-        config2 = FRODConfig(target_modules=["lin1", "lin2"], init_weights=False)
+        config2 = FrodConfig(target_modules=["lin1", "lin2"], init_weights=False)
         peft_model.add_adapter("other", config2)
         return peft_model
 
-    @staticmethod
-    def _make_second_adapter_different(peft_model):
-        with torch.no_grad():
-            for module in peft_model.base_model.model.modules():
-                if hasattr(module, "frod_lambda_l") and "second" in module.frod_lambda_l:
-                    module.frod_lambda_l["second"].add_(0.1)
-
-    def test_multiple_adapters_same_prng_projection_buffers(self, mlp_same_prng):
-        # Multiple adapters with the same PRNG key share fixed projection buffers within each FRoD layer.
-        assert (
-            mlp_same_prng.base_model.model.lin1.frod_V["default"].data_ptr()
-            == mlp_same_prng.base_model.model.lin1.frod_V["other"].data_ptr()
-        )
-        assert (
-            mlp_same_prng.base_model.model.lin1.frod_s_indices["default"].data_ptr()
-            == mlp_same_prng.base_model.model.lin1.frod_s_indices["other"].data_ptr()
-        )
-        assert (
-            mlp_same_prng.base_model.model.lin2.frod_V["default"].data_ptr()
-            == mlp_same_prng.base_model.model.lin2.frod_V["other"].data_ptr()
-        )
-        assert (
-            mlp_same_prng.base_model.model.lin2.frod_s_indices["default"].data_ptr()
-            == mlp_same_prng.base_model.model.lin2.frod_s_indices["other"].data_ptr()
-        )
-
-    def test_multiple_adapters_different_prng_raises(self):
-        model = MLP()
-        config = FRODConfig(target_modules=["lin1", "lin2"], init_weights=False)
-        peft_model = get_peft_model(model, config)
-        config2 = FRODConfig(target_modules=["lin1", "lin2"], init_weights=False, projection_prng_key=123)
-
-        msg = (
-            r"FRoD projection initialization key must be the same for all adapters. Got "
-            r"config.projection_prng_key=123 but previous config had 0"
-        )
-        with pytest.raises(ValueError, match=msg):
-            peft_model.add_adapter("other", config2)
-
     def test_multiple_adapters_save_load_save_projection_false(self, mlp, tmp_path):
         # Check saving and loading works with multiple adapters without saved projection tensors.
         torch.manual_seed(1)
-        config = FRODConfig(target_modules=["lin1", "lin2"], init_weights=False, save_projection=False)
+        config = FrodConfig(target_modules=["lin1", "lin2"], init_weights=False, save_projection=False)
         peft_model = get_peft_model(mlp, config, adapter_name="first")
-        config2 = FRODConfig(target_modules=["lin1", "lin2"], init_weights=False, save_projection=False)
+        config2 = FrodConfig(target_modules=["lin1", "lin2"], init_weights=False, save_projection=False)
         peft_model.add_adapter("second", config2)
-        self._make_second_adapter_different(peft_model)
         peft_model.eval()
 
         input = torch.randn(5, 10)
@@ -141,7 +101,7 @@ def test_multiple_adapters_save_load_save_projection_false(self, mlp, tmp_path):
         assert torch.allclose(output_second, output_second_loaded, atol=1e-3, rtol=1e-3)
 
     def test_save_projection_false_contains_no_frod_projection_tensors(self, mlp, tmp_path):
-        config = FRODConfig(target_modules=["lin1", "lin2"], init_weights=False, save_projection=False)
+        config = FrodConfig(target_modules=["lin1", "lin2"], init_weights=False, save_projection=False)
         peft_model = get_peft_model(mlp, config)
 
         save_path = tmp_path / "frod"
@@ -158,7 +118,7 @@ def test_save_projection_false_contains_no_frod_projection_tensors(self, mlp, tm
         assert not any("frod_U" in key for key in state_dict)
 
     def test_save_projection_true_contains_top_level_projection_tensors_only(self, mlp, tmp_path):
-        config = FRODConfig(target_modules=["lin1", "lin2"], init_weights=False)
+        config = FrodConfig(target_modules=["lin1", "lin2"], init_weights=False)
         peft_model = get_peft_model(mlp, config)
 
         save_path = tmp_path / "frod"
@@ -208,7 +168,7 @@ def test_frod_lambda_dont_share_memory(self, mlp_same_prng):
         )
 
     def test_frod_different_shapes(self, mlp):
-        config = FRODConfig(target_modules=["lin0", "lin3"], init_weights=False)
+        config = FrodConfig(target_modules=["lin0", "lin3"], init_weights=False)
         mlp_different_shapes = get_peft_model(mlp, config)
 
         assert mlp.lin0.base_layer.weight.shape != mlp.lin3.base_layer.weight.shape
@@ -231,7 +191,7 @@ def test_frod_dtypes(self, dtype):
                 pytest.skip("bfloat16 not supported on this system, skipping the test")
 
         model = MLP().to(dtype)
-        config = FRODConfig(target_modules=["lin1", "lin2"], init_weights=False)
+        config = FrodConfig(target_modules=["lin1", "lin2"], init_weights=False)
         peft_model = get_peft_model(model, config)
         inputs = torch.randn(5, 10).to(dtype)
         output = peft_model(inputs)
diff --git a/tests/test_initialization.py b/tests/test_initialization.py
index 27d9ba16e2..6970d35904 100644
--- a/tests/test_initialization.py
+++ b/tests/test_initialization.py
@@ -35,6 +35,7 @@
     C3AConfig,
     DeloraConfig,
     EvaConfig,
+    FrodConfig,
     GraloraConfig,
     IA3Config,
     LilyConfig,
@@ -1835,6 +1836,71 @@ def test_vera_add_second_adapter_with_higher_rank(self):
             model.add_adapter("other", config1)
 
 
+class TestFrodInitialization:
+    torch_device = infer_device()
+
+    def get_model(self):
+        class MLP(nn.Module):
+            def __init__(self, bias=True):
+                super().__init__()
+                self.lin0 = nn.Linear(10, 20, bias=bias)
+                self.lin1 = nn.Linear(20, 20, bias=bias)
+                self.lin2 = nn.Linear(20, 2, bias=bias)
+
+            def forward(self, X):
+                X = self.lin0(X)
+                X = self.lin1(X)
+                X = self.lin2(X)
+                return X
+
+        return MLP().to(self.torch_device)
+
+    def test_frod_multiple_adapters_same_prng_share_projection_buffers(self):
+        torch.manual_seed(0)
+        config0 = FrodConfig(target_modules=["lin1", "lin2"], init_weights=False)
+        model = get_peft_model(self.get_model().cpu(), config0)
+
+        config1 = FrodConfig(target_modules=["lin1", "lin2"], init_weights=False)
+        model.add_adapter("other", config1)
+
+        assert model.base_model.model.lin1.frod_V["default"].data_ptr() == (
+            model.base_model.model.lin1.frod_V["other"].data_ptr()
+        )
+        assert model.base_model.model.lin1.frod_s_indices["default"].data_ptr() == (
+            model.base_model.model.lin1.frod_s_indices["other"].data_ptr()
+        )
+        assert model.base_model.model.lin2.frod_V["default"].data_ptr() == (
+            model.base_model.model.lin2.frod_V["other"].data_ptr()
+        )
+        assert model.base_model.model.lin2.frod_s_indices["default"].data_ptr() == (
+            model.base_model.model.lin2.frod_s_indices["other"].data_ptr()
+        )
+
+    def test_frod_mixing_save_projection_raises(self):
+        config0 = FrodConfig(target_modules=["lin0"], init_weights=False, save_projection=True)
+        model = get_peft_model(self.get_model(), config0)
+
+        config1 = FrodConfig(target_modules=["lin0"], init_weights=False, save_projection=False)
+        msg = re.escape(
+            "FRoD projection weights must be saved for all adapters or none, but got multiple different values: "
+            "[False, True]"
+        )
+        with pytest.raises(ValueError, match=msg):
+            model.add_adapter("other", config1)
+
+    def test_frod_add_second_adapter_with_different_prng_key_raises(self):
+        config0 = FrodConfig(target_modules=["lin0"], init_weights=False)
+        model = get_peft_model(self.get_model(), config0)
+
+        config1 = FrodConfig(target_modules=["lin0"], init_weights=False, projection_prng_key=123)
+        msg = re.escape(
+            "FRoD projection initialization key must be the same for all adapters. Got "
+            "config.projection_prng_key=123 but previous config had 0."
+        )
+        with pytest.raises(ValueError, match=msg):
+            model.add_adapter("other", config1)
+
+
 class TestVeloraInitialization:
     @pytest.mark.parametrize(
         "config_kwargs, msg",
diff --git a/tests/test_seq_classifier.py b/tests/test_seq_classifier.py
index b08f0a9945..c7de1272d3 100644
--- a/tests/test_seq_classifier.py
+++ b/tests/test_seq_classifier.py
@@ -23,7 +23,7 @@
     C3AConfig,
     DeloraConfig,
     FourierFTConfig,
-    FRODConfig,
+    FrodConfig,
     GraloraConfig,
     HiraConfig,
     HRAConfig,
@@ -108,7 +108,7 @@
         },
     ),
     (
-        FRODConfig,
+        FrodConfig,
         {
             "task_type": "SEQ_CLS",
             "target_modules": None,

From 17353444ac7cb54b848b0a7923656d01126e3d61 Mon Sep 17 00:00:00 2001
From: Bane-Elvin <gawan@buaa.edu.cn>
Date: Fri, 29 May 2026 04:48:04 +0000
Subject: [PATCH 5/9] Fix FRoD sparse forward semantics

---
 src/peft/tuners/frod/layer.py | 55 ++++++++++++++++++++++-------------
 tests/test_frod.py            | 20 +++++++++++++
 2 files changed, 55 insertions(+), 20 deletions(-)

diff --git a/src/peft/tuners/frod/layer.py b/src/peft/tuners/frod/layer.py
index 1289e7c9c8..1680c07c21 100644
--- a/src/peft/tuners/frod/layer.py
+++ b/src/peft/tuners/frod/layer.py
@@ -46,6 +46,7 @@ def __init__(self, base_layer: nn.Module, **kwargs):
 
         self._disable_adapters = False
         self.merged_adapters = []
+        self._frod_merged_delta = {}
 
         self.in_features, self.out_features = _get_in_out_features(self.get_base_layer())
         self.kwargs = kwargs
@@ -103,24 +104,23 @@ def update_layer(
         else:
             # PEFT convention: init_weights=False should produce a non-identity adapter for merge tests.
             with torch.no_grad():
-                nn.init.normal_(self.frod_lambda_s_values[adapter_name], std=0.02)
-                self.frod_lambda_l[adapter_name].add_(torch.randn_like(self.frod_lambda_l[adapter_name]) * 0.02)
+                nn.init.normal_(self.frod_lambda_s_values[adapter_name], std=0.05)
+                self.frod_lambda_l[adapter_name].add_(torch.randn_like(self.frod_lambda_l[adapter_name]) * 0.05)
 
         self.frod_U[adapter_name] = U.cpu()
         self._move_adapter_to_device_of_base_layer(adapter_name)
         self.set_adapter(self.active_adapters)
 
     def _calculate_frod_u_and_lambda(self, V, W):
-        w = W.detach().to(torch.float32).cpu()
-        v = V.detach().to(torch.float32).cpu()
+        w = W.detach().to(torch.float64).cpu()
+        v = V.detach().to(torch.float64).cpu()
         try:
-            v_inv_T = torch.linalg.inv(v).T
+            bi = torch.linalg.solve(v, w.T).T
         except RuntimeError:
-            v_inv_T = torch.linalg.pinv(v, rtol=1e-6).T
-        bi = w @ v_inv_T
+            bi = w @ torch.linalg.pinv(v, rtol=1e-6).T
         lambda_l = torch.linalg.norm(bi, dim=0)
         u = torch.zeros_like(bi)
-        nonzero = lambda_l > 1e-8
+        nonzero = lambda_l > 1e-12
         u[:, nonzero] = bi[:, nonzero] / lambda_l[nonzero]
         return u.float(), lambda_l.float()
 
@@ -128,9 +128,6 @@ def reset_frod_parameters(self, adapter_name):
         if adapter_name in self.frod_lambda_s_values:
             with torch.no_grad():
                 nn.init.zeros_(self.frod_lambda_s_values[adapter_name])
-        if adapter_name in self.frod_lambda_l:
-            with torch.no_grad():
-                nn.init.zeros_(self.frod_lambda_l[adapter_name])
 
 
 class Linear(nn.Linear, FrodLayer):
@@ -158,19 +155,22 @@ def merge(self, safe_merge: bool = False, adapter_names: Optional[list[str]] = N
         if not adapter_names:
             return
 
+        base_layer = self.get_base_layer()
+        base_weight = base_layer.weight.data.clone()
         for active_adapter in adapter_names:
             if active_adapter in self.frod_lambda_l.keys():
-                base_layer = self.get_base_layer()
+                delta_weight = self._get_delta_weight(active_adapter, base_weight=base_weight)
                 if safe_merge:
                     orig_weights = base_layer.weight.data.clone()
-                    orig_weights += self.get_delta_weight(active_adapter)
+                    orig_weights += delta_weight
                     if not torch.isfinite(orig_weights).all():
                         raise ValueError(
                             f"NaNs detected in the merged weights. The adapter {active_adapter} seems to be broken"
                         )
                     base_layer.weight.data = orig_weights
                 else:
-                    base_layer.weight.data += self.get_delta_weight(active_adapter)
+                    base_layer.weight.data += delta_weight
+                self._frod_merged_delta[active_adapter] = delta_weight
                 self.merged_adapters.append(active_adapter)
 
     def unmerge(self) -> None:
@@ -181,12 +181,19 @@ def unmerge(self) -> None:
         while len(self.merged_adapters) > 0:
             active_adapter = self.merged_adapters.pop()
             if active_adapter in self.frod_lambda_l.keys():
-                self.get_base_layer().weight.data -= self.get_delta_weight(active_adapter)
+                delta_weight = self._frod_merged_delta.pop(active_adapter, None)
+                if delta_weight is None:
+                    delta_weight = self.get_delta_weight(active_adapter)
+                self.get_base_layer().weight.data -= delta_weight
 
     def get_delta_weight(self, adapter) -> torch.Tensor:
-        weight = self.get_base_layer().weight
+        return self._get_delta_weight(adapter)
+
+    def _get_delta_weight(self, adapter, base_weight: Optional[torch.Tensor] = None) -> torch.Tensor:
+        weight = self.get_base_layer().weight if base_weight is None else base_weight
         device = weight.device
         dtype = weight.dtype
+        base_weight = transpose(weight, self.fan_in_fan_out)
         U = self.frod_U[adapter].to(device=device, dtype=dtype)
         V = self.frod_V[adapter].to(device=device, dtype=dtype)
         indices = self.frod_s_indices[adapter].to(device=U.device, dtype=torch.long)
@@ -197,8 +204,11 @@ def get_delta_weight(self, adapter) -> torch.Tensor:
         S_sparse = torch.sparse_coo_tensor(indices, values, size).coalesce()
         S = S_sparse.to_dense()
         L = torch.diag_embed(lambda_l)
+        frod_weight = U @ (S + L) @ V.T
 
-        return transpose(U @ (S + L).T @ V.T, self.fan_in_fan_out)
+        # FRoD parameterizes the adapted weight itself. Return only the difference so PEFT merge/unmerge and
+        # disable-adapter behavior preserve the base model while the active adapter still replaces the base weight.
+        return transpose(frod_weight - base_weight, self.fan_in_fan_out)
 
     def forward(self, x: torch.Tensor, *args, **kwargs) -> torch.Tensor:
         previous_dtype = x.dtype
@@ -230,7 +240,8 @@ def forward(self, x: torch.Tensor, *args, **kwargs) -> torch.Tensor:
                 h_flat = h.reshape(-1, h.shape[-1])
                 z_flat = torch.matmul(h_flat, V)
 
-                # This block computes the sparse FRoD update z @ S with torch.sparse.mm.
+                # This block computes the sparse FRoD update z @ S.T with torch.sparse.mm, matching
+                # F.linear(h, U @ (S + diag(lambda_l)) @ V.T).
                 # CUDA sparse fp16/bf16 kernels are less reliable, so use fp32 here and cast the update back below.
                 matmul_dtype = z_flat.dtype
                 if z_flat.is_cuda and matmul_dtype in (torch.float16, torch.bfloat16):
@@ -241,7 +252,7 @@ def forward(self, x: torch.Tensor, *args, **kwargs) -> torch.Tensor:
                 S_sparse = torch.sparse_coo_tensor(indices, values, size).coalesce()
                 if S_sparse.dtype != matmul_dtype:
                     S_sparse = S_sparse.to(dtype=matmul_dtype)
-                z_S_flat = torch.sparse.mm(S_sparse.t(), z_flat_mm.t()).t()
+                z_S_flat = torch.sparse.mm(S_sparse, z_flat_mm.t()).t()
 
                 lambda_l = lambda_l.to(device=z_flat.device, dtype=matmul_dtype)
                 z_L_flat = z_flat_mm * lambda_l
@@ -250,8 +261,12 @@ def forward(self, x: torch.Tensor, *args, **kwargs) -> torch.Tensor:
                 out_add_flat = F.linear(z_S_flat + z_L_flat, U_mm)
                 out_add_flat = out_add_flat.to(target_dtype)
                 out_add = out_add_flat.reshape(*batch_shape, out_add_flat.shape[-1])
+                base_weight = transpose(self.get_base_layer().weight, self.fan_in_fan_out).to(
+                    device=x.device, dtype=target_dtype
+                )
+                base_out = F.linear(x, base_weight)
 
-                result = result + out_add
+                result = result - base_out + out_add
 
         result = result.to(previous_dtype)
         return result
diff --git a/tests/test_frod.py b/tests/test_frod.py
index 19bd494c39..88ca464fe5 100644
--- a/tests/test_frod.py
+++ b/tests/test_frod.py
@@ -135,6 +135,26 @@ def test_save_projection_true_contains_top_level_projection_tensors_only(self, m
         assert not any(".model.lin1.frod_V" in key for key in keys)
         assert not any("frod_U" in key for key in keys)
 
+    def test_frod_default_initialization_reconstructs_base_weight(self, mlp):
+        torch.manual_seed(0)
+        mlp.eval()
+        inputs = torch.randn(5, 10)
+        expected = mlp(inputs)
+
+        config = FrodConfig(target_modules=["lin1", "lin2"])
+        peft_model = get_peft_model(mlp, config)
+        peft_model.eval()
+
+        actual = peft_model(inputs)
+        assert torch.allclose(actual, expected, atol=1e-4, rtol=1e-4)
+
+        for module in (peft_model.base_model.model.lin1, peft_model.base_model.model.lin2):
+            delta_weight = module.get_delta_weight("default")
+
+            assert module.frod_lambda_l["default"].norm() > 0
+            assert torch.count_nonzero(module.frod_lambda_s_values["default"]) == 0
+            assert torch.allclose(delta_weight, torch.zeros_like(delta_weight), atol=1e-4)
+
     def test_frod_projection_buffers_share_memory_with_layers(self, mlp_same_prng):
         frod_V_lin1 = mlp_same_prng.base_model.frod_V["lin1"]["default"]
         frod_s_indices_lin1 = mlp_same_prng.base_model.frod_s_indices["lin1"]["default"]

From 80c1870e1872d2d9872a40606c2970b6519429af Mon Sep 17 00:00:00 2001
From: Bane-Elvin <gawan@buaa.edu.cn>
Date: Fri, 29 May 2026 04:48:13 +0000
Subject: [PATCH 6/9] Align FRoD image example with CLIP setup

---
 examples/frod_finetuning/README.md            |  6 ++--
 .../frod_image_classification.py              | 28 ++++++++++++-------
 2 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/examples/frod_finetuning/README.md b/examples/frod_finetuning/README.md
index f818ed6b51..08c123713a 100644
--- a/examples/frod_finetuning/README.md
+++ b/examples/frod_finetuning/README.md
@@ -11,7 +11,7 @@ python examples/frod_finetuning/frod_image_classification.py
 ```
 
 The text example fine-tunes `google-bert/bert-base-uncased` on `nyu-mll/glue` with the `sst2` configuration. The image
-example fine-tunes `google/vit-base-patch16-224` on the train and test parquet splits from `tanganke/stanford_cars`.
+example fine-tunes `openai/clip-vit-base-patch32` on the train and test parquet splits from `tanganke/stanford_cars`.
 
 Both scripts use separate optimizer learning rates for FRoD diagonal coefficients, FRoD sparse coefficients, and the
 classification head. FRoD dropout is set to `0.0` because the sparse rotational parameterization is the main
@@ -20,8 +20,8 @@ regularizer in these examples.
 To use local mirrors of the image model or dataset, override the image example paths with environment variables:
 
 ```bash
-FROD_IMAGE_MODEL_NAME=/path/to/local/vit-model \
+FROD_IMAGE_MODEL_NAME=/path/to/local/clip-vit-model \
 FROD_STANFORD_CARS_DATA_DIR=/path/to/local/stanford_cars \
-FROD_IMAGE_OUTPUT_DIR=vit-local-frod-stanford-cars \
+FROD_IMAGE_OUTPUT_DIR=clip-vit-local-frod-stanford-cars \
 python examples/frod_finetuning/frod_image_classification.py
 ```
diff --git a/examples/frod_finetuning/frod_image_classification.py b/examples/frod_finetuning/frod_image_classification.py
index 24e6c345c5..0d3320a07f 100644
--- a/examples/frod_finetuning/frod_image_classification.py
+++ b/examples/frod_finetuning/frod_image_classification.py
@@ -12,12 +12,18 @@
 from peft import FrodConfig, get_peft_model
 
 
-MODEL_NAME = os.environ.get("FROD_IMAGE_MODEL_NAME", "google/vit-base-patch16-224")
-OUTPUT_DIR = os.environ.get("FROD_IMAGE_OUTPUT_DIR", "vit-base-patch16-224-frod-stanford-cars")
+MODEL_NAME = os.environ.get("FROD_IMAGE_MODEL_NAME", "openai/clip-vit-base-patch32")
+OUTPUT_DIR = os.environ.get("FROD_IMAGE_OUTPUT_DIR", "clip-vit-base-patch32-frod-stanford-cars")
 DATA_DIR = os.environ.get("FROD_STANFORD_CARS_DATA_DIR")
-FROD_LAMBDA_L_LR = 5e-4
-FROD_LAMBDA_S_LR = 5e-5
-CLASSIFIER_LR = 1e-4
+NUM_TRAIN_EPOCHS = int(os.environ.get("FROD_IMAGE_NUM_TRAIN_EPOCHS", "3"))
+TRAIN_BATCH_SIZE = int(os.environ.get("FROD_IMAGE_TRAIN_BATCH_SIZE", "64"))
+EVAL_BATCH_SIZE = int(os.environ.get("FROD_IMAGE_EVAL_BATCH_SIZE", "64"))
+SPARSE_RATE = float(os.environ.get("FROD_IMAGE_SPARSE_RATE", "0.01"))
+FROD_LAMBDA_L_LR = float(os.environ.get("FROD_IMAGE_LAMBDA_L_LR", "5e-4"))
+FROD_LAMBDA_S_LR = float(os.environ.get("FROD_IMAGE_LAMBDA_S_LR", "5e-5"))
+CLASSIFIER_LR = float(os.environ.get("FROD_IMAGE_CLASSIFIER_LR", "1e-4"))
+CLIP_TARGET_MODULES = ["q_proj", "k_proj", "v_proj", "out_proj", "fc1", "fc2"]
+
 
 def main():
     if DATA_DIR:
@@ -62,10 +68,11 @@ def main():
         ignore_mismatched_sizes=True,
     )
     peft_config = FrodConfig(
-        target_modules=["query", "value"],
+        target_modules=CLIP_TARGET_MODULES,
         modules_to_save=["classifier"],
         frod_dropout=0.0,
-        sparse_rate=0.02,
+        sparse_rate=SPARSE_RATE,
+        projection_prng_key=3,
     )
     model = get_peft_model(model, peft_config)
     model.print_trainable_parameters()
@@ -102,13 +109,14 @@ def compute_metrics(eval_pred):
     training_args = TrainingArguments(
         output_dir=OUTPUT_DIR,
         learning_rate=FROD_LAMBDA_L_LR,
-        per_device_train_batch_size=32,
-        per_device_eval_batch_size=64,
-        num_train_epochs=1,
+        per_device_train_batch_size=TRAIN_BATCH_SIZE,
+        per_device_eval_batch_size=EVAL_BATCH_SIZE,
+        num_train_epochs=NUM_TRAIN_EPOCHS,
         eval_strategy="epoch",
         save_strategy="epoch",
         load_best_model_at_end=True,
         metric_for_best_model="accuracy",
+        lr_scheduler_type="constant",
         remove_unused_columns=False,
         report_to="none",
     )

From e08e49f9969eb7a89f63514d4fc81940bdafc6e5 Mon Sep 17 00:00:00 2001
From: Bane-Elvin <gawan@buaa.edu.cn>
Date: Sat, 30 May 2026 11:23:56 +0000
Subject: [PATCH 7/9] Address FRoD layer review comments

---
 src/peft/tuners/frod/layer.py | 91 +++++++++++++++++++----------------
 1 file changed, 50 insertions(+), 41 deletions(-)

diff --git a/src/peft/tuners/frod/layer.py b/src/peft/tuners/frod/layer.py
index 1680c07c21..69534a5398 100644
--- a/src/peft/tuners/frod/layer.py
+++ b/src/peft/tuners/frod/layer.py
@@ -156,22 +156,24 @@ def merge(self, safe_merge: bool = False, adapter_names: Optional[list[str]] = N
             return
 
         base_layer = self.get_base_layer()
-        base_weight = base_layer.weight.data.clone()
+        adapter_deltas = []
         for active_adapter in adapter_names:
             if active_adapter in self.frod_lambda_l.keys():
-                delta_weight = self._get_delta_weight(active_adapter, base_weight=base_weight)
-                if safe_merge:
-                    orig_weights = base_layer.weight.data.clone()
-                    orig_weights += delta_weight
-                    if not torch.isfinite(orig_weights).all():
-                        raise ValueError(
-                            f"NaNs detected in the merged weights. The adapter {active_adapter} seems to be broken"
-                        )
-                    base_layer.weight.data = orig_weights
-                else:
-                    base_layer.weight.data += delta_weight
-                self._frod_merged_delta[active_adapter] = delta_weight
-                self.merged_adapters.append(active_adapter)
+                adapter_deltas.append((active_adapter, self.get_delta_weight(active_adapter)))
+
+        for active_adapter, delta_weight in adapter_deltas:
+            if safe_merge:
+                orig_weights = base_layer.weight.data.clone()
+                orig_weights += delta_weight
+                if not torch.isfinite(orig_weights).all():
+                    raise ValueError(
+                        f"NaNs detected in the merged weights. The adapter {active_adapter} seems to be broken"
+                    )
+                base_layer.weight.data = orig_weights
+            else:
+                base_layer.weight.data += delta_weight
+            self._frod_merged_delta[active_adapter] = delta_weight
+            self.merged_adapters.append(active_adapter)
 
     def unmerge(self) -> None:
         if not self.merged:
@@ -187,10 +189,7 @@ def unmerge(self) -> None:
                 self.get_base_layer().weight.data -= delta_weight
 
     def get_delta_weight(self, adapter) -> torch.Tensor:
-        return self._get_delta_weight(adapter)
-
-    def _get_delta_weight(self, adapter, base_weight: Optional[torch.Tensor] = None) -> torch.Tensor:
-        weight = self.get_base_layer().weight if base_weight is None else base_weight
+        weight = self.get_base_layer().weight
         device = weight.device
         dtype = weight.dtype
         base_weight = transpose(weight, self.fan_in_fan_out)
@@ -221,11 +220,15 @@ def forward(self, x: torch.Tensor, *args, **kwargs) -> torch.Tensor:
             result = self.base_layer(x, *args, **kwargs)
         else:
             result = self.base_layer(x, *args, **kwargs)
+            target_dtype = x.dtype
+            base_weight = transpose(self.get_base_layer().weight, self.fan_in_fan_out).to(
+                device=x.device, dtype=target_dtype
+            )
+            base_out = None
             for active_adapter in self.active_adapters:
                 if active_adapter not in self.frod_lambda_s_values:
                     continue
 
-                target_dtype = x.dtype
                 V = self.frod_V[active_adapter].to(device=x.device, dtype=target_dtype)
                 U = self.frod_U[active_adapter].to(device=x.device, dtype=target_dtype)
                 indices = self.frod_s_indices[active_adapter].to(device=x.device, dtype=torch.long)
@@ -233,8 +236,8 @@ def forward(self, x: torch.Tensor, *args, **kwargs) -> torch.Tensor:
                 values = self.frod_lambda_s_values[active_adapter].to(device=x.device, dtype=target_dtype)
                 lambda_l = self.frod_lambda_l[active_adapter].to(device=x.device, dtype=target_dtype)
 
-                x = x.to(target_dtype)
-                h = self.frod_dropout[active_adapter](x)
+                dropout = self.frod_dropout[active_adapter]
+                h = dropout(x)
 
                 batch_shape = h.shape[:-1]
                 h_flat = h.reshape(-1, h.shape[-1])
@@ -242,9 +245,9 @@ def forward(self, x: torch.Tensor, *args, **kwargs) -> torch.Tensor:
 
                 # This block computes the sparse FRoD update z @ S.T with torch.sparse.mm, matching
                 # F.linear(h, U @ (S + diag(lambda_l)) @ V.T).
-                # CUDA sparse fp16/bf16 kernels are less reliable, so use fp32 here and cast the update back below.
+                # Sparse fp16/bf16 kernels are less reliable, so use fp32 here and cast the update back below.
                 matmul_dtype = z_flat.dtype
-                if z_flat.is_cuda and matmul_dtype in (torch.float16, torch.bfloat16):
+                if matmul_dtype in (torch.float16, torch.bfloat16):
                     matmul_dtype = torch.float32
 
                 values = values.to(device=z_flat.device, dtype=matmul_dtype)
@@ -261,33 +264,36 @@ def forward(self, x: torch.Tensor, *args, **kwargs) -> torch.Tensor:
                 out_add_flat = F.linear(z_S_flat + z_L_flat, U_mm)
                 out_add_flat = out_add_flat.to(target_dtype)
                 out_add = out_add_flat.reshape(*batch_shape, out_add_flat.shape[-1])
-                base_weight = transpose(self.get_base_layer().weight, self.fan_in_fan_out).to(
-                    device=x.device, dtype=target_dtype
-                )
-                base_out = F.linear(x, base_weight)
+                # FRoD reconstructs the adapted weight directly, so subtract the base-weight contribution and only
+                # accumulate the adapter delta.
+                if isinstance(dropout, nn.Identity):
+                    if base_out is None:
+                        base_out = F.linear(x, base_weight)
+                    adapter_base_out = base_out
+                else:
+                    adapter_base_out = F.linear(h, base_weight)
 
-                result = result - base_out + out_add
+                result = result + out_add - adapter_base_out
 
         result = result.to(previous_dtype)
         return result
 
     def __repr__(self) -> str:
-        # Match PEFT tuner convention so printed models show FRoD-wrapped layers as `frod.*`.
         rep = super().__repr__()
         return "frod." + rep
 
     def _move_adapter_to_device_of_base_layer(self, adapter_name: str, device: Optional[torch.device] = None) -> None:
-        dtype = None
-        weight = None
-        if device is None:
-            for weight_name in ("weight", "qweight"):
-                weight = getattr(self.get_base_layer(), weight_name, None)
-                if weight is not None:
-                    device = weight.device
-                    dtype = weight.dtype
-                    break
-            else:
-                return
+        """Move trainable FRoD parameters while keeping shared projection buffers on CPU."""
+        base_layer = self.get_base_layer()
+        base_device, base_dtype = self._get_base_layer_device_and_dtype(base_layer)
+
+        target_device = device if device is not None else base_device
+        if target_device is None:
+            return
+
+        target_dtype = None
+        if base_dtype is not None and (base_dtype.is_floating_point or base_dtype.is_complex):
+            target_dtype = base_dtype
 
         for adapter_layer_name in self.adapter_layer_names:
             adapter_layer = getattr(self, adapter_layer_name, None)
@@ -298,4 +304,7 @@ def _move_adapter_to_device_of_base_layer(self, adapter_name: str, device: Optio
             param = adapter_layer[adapter_name]
             if param.is_meta:
                 continue
-            adapter_layer[adapter_name] = param.to(device, dtype=dtype)
+            if target_dtype is not None:
+                adapter_layer[adapter_name] = param.to(target_device, dtype=target_dtype)
+            else:
+                adapter_layer[adapter_name] = param.to(target_device)

From 3bfdc2394e41e9e9821e1d0660900567f580e56c Mon Sep 17 00:00:00 2001
From: Bane-Elvin <gawan@buaa.edu.cn>
Date: Tue, 2 Jun 2026 13:30:27 +0000
Subject: [PATCH 8/9] Document FRoD module key parsing

---
 src/peft/tuners/frod/model.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/src/peft/tuners/frod/model.py b/src/peft/tuners/frod/model.py
index 5d3dd5c624..f545b38e1a 100644
--- a/src/peft/tuners/frod/model.py
+++ b/src/peft/tuners/frod/model.py
@@ -31,6 +31,15 @@
 
 
 def _category_from_key(key: str) -> str:
+    """Infer the projection-sharing category from a dotted module key.
+
+    FRoD shares projection buffers across modules that play the same role in different transformer blocks. This helper
+    assumes keys follow the dotted paths returned by `named_modules()` and derives the role from the final path
+    components. For example, `encoder.layer.0.attention.self.query` maps to `self_query`, while
+    `vision_model.encoder.layers.0.self_attn.q_proj` maps to `self_attn_q_proj`. The BERT-style attention output key
+    `encoder.layer.0.attention.output.dense` is normalized to `attention_output` so it does not collide with MLP
+    `output.dense` modules.
+    """
     parts = key.split(".")
     if len(parts) == 1:
         return parts[0]
@@ -43,6 +52,13 @@ def _category_from_key(key: str) -> str:
 
 
 def _layer_index_from_key(key: str, fallback: int) -> int:
+    """Infer the transformer block index from a dotted module key.
+
+    Many decoder and vision models use paths like `model.layers.3.self_attn.q_proj`, so the first preference is the
+    integer immediately after a `layers` path component. Encoder models often use paths like
+    `encoder.layer.11.attention.self.query`; for those, the first numeric path component is used. If no numeric layer
+    id is present, e.g. for `classifier.dense`, `fallback` keeps the projection initialization order deterministic.
+    """
     parts = key.split(".")
     if "layers" in parts:
         try:

From 3209febae5d98eebe361aef2581dbecc70bd9f68 Mon Sep 17 00:00:00 2001
From: Bane-Elvin <gawan@buaa.edu.cn>
Date: Tue, 2 Jun 2026 13:34:48 +0000
Subject: [PATCH 9/9] Test FRoD category inference

---
 tests/test_frod.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/tests/test_frod.py b/tests/test_frod.py
index 88ca464fe5..67621faf3b 100644
--- a/tests/test_frod.py
+++ b/tests/test_frod.py
@@ -21,6 +21,7 @@
 from accelerate.utils.imports import is_bf16_available
 from safetensors import safe_open
 from torch import nn
+from transformers import LlamaConfig, LlamaForCausalLM
 
 from peft import FrodConfig, PeftModel, get_peft_model
 
@@ -169,6 +170,24 @@ def test_frod_projection_buffers_share_memory_with_layers(self, mlp_same_prng):
         # Different target categories have distinct projection buffers.
         assert frod_V_lin1.data_ptr() != mlp_same_prng.base_model.frod_V["lin2"]["default"].data_ptr()
 
+    def test_frod_categories_with_common_llama_targets(self):
+        model = LlamaForCausalLM(
+            LlamaConfig(
+                hidden_size=16,
+                intermediate_size=32,
+                num_attention_heads=4,
+                num_hidden_layers=2,
+                vocab_size=32,
+            )
+        )
+        config = FrodConfig(target_modules=["q_proj", "v_proj"])
+
+        peft_model = get_peft_model(model, config)
+
+        assert sorted(peft_model.base_model.frod_V.keys()) == ["self_attn_q_proj", "self_attn_v_proj"]
+        assert "default" in peft_model.base_model.frod_V["self_attn_q_proj"]
+        assert "default" in peft_model.base_model.frod_V["self_attn_v_proj"]
+
     def test_frod_lambda_dont_share_memory(self, mlp_same_prng):
         assert (
             mlp_same_prng.base_model.model.lin1.frod_lambda_s_values["default"].data_ptr()