huggingface · Bane-Elvin · May 21, 2026 · May 21, 2026 · May 27, 2026 · May 28, 2026
diff --git a/docs/source/_toctree.yml b/docs/source/_toctree.yml
@@ -124,6 +124,8 @@
       title: PVeRA
     - local: package_reference/fourierft
       title: FourierFT
+    - local: package_reference/frod
+      title: FRoD
     - local: package_reference/gralora
       title: GraLoRA
     - local: package_reference/vblora

diff --git a/docs/source/package_reference/frod.md b/docs/source/package_reference/frod.md
@@ -0,0 +1,66 @@
+<!--Copyright 2026 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+
+⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
+rendered properly in your Markdown viewer.
+
+-->
+
+# FRoD: Full-Rank Efficient Fine-Tuning with Rotational Degrees
+
+FRoD is a parameter-efficient fine-tuning method that combines a shared full-rank basis with sparse learnable
+rotational degrees. The adapter update is expressed through fixed projection tensors and trainable coefficients, which
+allows FRoD to apply full-rank updates while keeping the number of trained parameters small.
+
+Paper: [Full-Rank Efficient Fine-Tuning with Rotational Degrees](https://doi.org/10.1609/aaai.v40i31.39813).
+
+When saving the adapter parameters, it is possible to avoid storing the projection tensors by setting
+`save_projection=False` on the `FrodConfig`. In that case, the projections are restored from the base model weights and
+the fixed random seed from `projection_prng_key`. This reduces checkpoint size, but the default is
+`save_projection=True` to make checkpoint loading independent of regeneration details.
+
+Compared to LoRA, FRoD can express a full-rank update in each adapted linear layer while training only the diagonal
+coefficients and a sparse set of off-diagonal rotation coefficients. This can be useful when a low-rank update is too
+restrictive. The trade-off is that FRoD computes fixed projection tensors from the base weights during adapter
+injection, which makes setup more expensive and the implementation less broadly supported than LoRA.
+
+FRoD currently has the following constraint:
+
+- Only `nn.Linear` and `transformers.pytorch_utils.Conv1D` layers are supported.
+
+## Quickstart
+
+```python
+from transformers import AutoModelForSequenceClassification
+
+from peft import FrodConfig, TaskType, get_peft_model
+
+model = AutoModelForSequenceClassification.from_pretrained("google-bert/bert-base-uncased", num_labels=2)
+
+peft_config = FrodConfig(
+    task_type=TaskType.SEQ_CLS,
+    target_modules=["query", "value"],
+    modules_to_save=["classifier"],
+    sparse_rate=0.02,
+    frod_dropout=0.0,
+)
+
+model = get_peft_model(model, peft_config)
+model.print_trainable_parameters()
+```
+
+## FrodConfig
+
+[[autodoc]] tuners.frod.config.FrodConfig
+
+## FrodModel
+
+[[autodoc]] tuners.frod.model.FrodModel
diff --git a/examples/frod_finetuning/README.md b/examples/frod_finetuning/README.md
@@ -0,0 +1,27 @@
+# FRoD fine-tuning examples
+
+These examples show minimal FRoD fine-tuning with the Transformers `Trainer`.
+
+Install the example dependencies and run either script directly:
+
+```bash
+pip install -r examples/frod_finetuning/requirements.txt
+python examples/frod_finetuning/frod_text_classification.py
+python examples/frod_finetuning/frod_image_classification.py
+```
+
+The text example fine-tunes `google-bert/bert-base-uncased` on `nyu-mll/glue` with the `sst2` configuration. The image
+example fine-tunes `openai/clip-vit-base-patch32` on the train and test parquet splits from `tanganke/stanford_cars`.
+
+Both scripts use separate optimizer learning rates for FRoD diagonal coefficients, FRoD sparse coefficients, and the
+classification head. FRoD dropout is set to `0.0` because the sparse rotational parameterization is the main
+regularizer in these examples.
+
+To use local mirrors of the image model or dataset, override the image example paths with environment variables:
+
+```bash
+FROD_IMAGE_MODEL_NAME=/path/to/local/clip-vit-model \
+FROD_STANFORD_CARS_DATA_DIR=/path/to/local/stanford_cars \
+FROD_IMAGE_OUTPUT_DIR=clip-vit-local-frod-stanford-cars \
+python examples/frod_finetuning/frod_image_classification.py
+```
diff --git a/examples/frod_finetuning/frod_image_classification.py b/examples/frod_finetuning/frod_image_classification.py
@@ -0,0 +1,139 @@
+# Copyright 2026-present the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+
+import os
+
+import numpy as np
+import torch
+from datasets import load_dataset
+from transformers import AutoImageProcessor, AutoModelForImageClassification, Trainer, TrainingArguments
+
+from peft import FrodConfig, get_peft_model
+
+
+MODEL_NAME = os.environ.get("FROD_IMAGE_MODEL_NAME", "openai/clip-vit-base-patch32")
+OUTPUT_DIR = os.environ.get("FROD_IMAGE_OUTPUT_DIR", "clip-vit-base-patch32-frod-stanford-cars")
+DATA_DIR = os.environ.get("FROD_STANFORD_CARS_DATA_DIR")
+NUM_TRAIN_EPOCHS = int(os.environ.get("FROD_IMAGE_NUM_TRAIN_EPOCHS", "3"))
+TRAIN_BATCH_SIZE = int(os.environ.get("FROD_IMAGE_TRAIN_BATCH_SIZE", "64"))
+EVAL_BATCH_SIZE = int(os.environ.get("FROD_IMAGE_EVAL_BATCH_SIZE", "64"))
+SPARSE_RATE = float(os.environ.get("FROD_IMAGE_SPARSE_RATE", "0.01"))
+FROD_LAMBDA_L_LR = float(os.environ.get("FROD_IMAGE_LAMBDA_L_LR", "5e-4"))
+FROD_LAMBDA_S_LR = float(os.environ.get("FROD_IMAGE_LAMBDA_S_LR", "5e-5"))
+CLASSIFIER_LR = float(os.environ.get("FROD_IMAGE_CLASSIFIER_LR", "1e-4"))
+CLIP_TARGET_MODULES = ["q_proj", "k_proj", "v_proj", "out_proj", "fc1", "fc2"]
+
+
+def main():
+    if DATA_DIR:
+        data_files = {
+            "train": [
+                os.path.join(DATA_DIR, "data", "train-00000-of-00002.parquet"),
+                os.path.join(DATA_DIR, "data", "train-00001-of-00002.parquet"),
+            ],
+            "test": [
+                os.path.join(DATA_DIR, "data", "test-00000-of-00002.parquet"),
+                os.path.join(DATA_DIR, "data", "test-00001-of-00002.parquet"),
+            ],
+        }
+    else:
+        data_files = {
+            "train": [
+                "hf://datasets/tanganke/stanford_cars/data/train-00000-of-00002.parquet",
+                "hf://datasets/tanganke/stanford_cars/data/train-00001-of-00002.parquet",
+            ],
+            "test": [
+                "hf://datasets/tanganke/stanford_cars/data/test-00000-of-00002.parquet",
+                "hf://datasets/tanganke/stanford_cars/data/test-00001-of-00002.parquet",
+            ],
+        }
+
+    dataset = load_dataset("parquet", data_files=data_files)
+    train_split = dataset["train"]
+    eval_split = dataset["test"]
+    image_processor = AutoImageProcessor.from_pretrained(MODEL_NAME)
+    label_feature = train_split.features["label"]
+    label_names = (
+        label_feature.names if hasattr(label_feature, "names") else [str(i) for i in sorted(set(train_split["label"]))]
+    )
+    id2label = dict(enumerate(label_names))
+    label2id = {name: idx for idx, name in id2label.items()}
+
+    model = AutoModelForImageClassification.from_pretrained(
+        MODEL_NAME,
+        num_labels=len(label_names),
+        id2label=id2label,
+        label2id=label2id,
+        ignore_mismatched_sizes=True,
+    )
+    peft_config = FrodConfig(
+        target_modules=CLIP_TARGET_MODULES,
+        modules_to_save=["classifier"],
+        frod_dropout=0.0,
+        sparse_rate=SPARSE_RATE,
+        projection_prng_key=3,
+    )
+    model = get_peft_model(model, peft_config)
+    model.print_trainable_parameters()
+
+    def transform(batch):
+        images = [image.convert("RGB") for image in batch["image"]]
+        inputs = image_processor(images, return_tensors="pt")
+        inputs["labels"] = batch["label"]
+        return inputs
+
+    train_dataset = train_split.with_transform(transform)
+    eval_dataset = eval_split.with_transform(transform)
+
+    def collate_fn(examples):
+        pixel_values = torch.stack([example["pixel_values"] for example in examples])
+        labels = torch.tensor([example["labels"] for example in examples])
+        return {"pixel_values": pixel_values, "labels": labels}
+
+    def compute_metrics(eval_pred):
+        predictions = np.argmax(eval_pred.predictions, axis=-1)
+        return {"accuracy": (predictions == eval_pred.label_ids).mean().item()}
+
+    optimizer = torch.optim.AdamW(
+        [
+            {"params": [p for n, p in model.named_parameters() if "frod_lambda_l" in n], "lr": FROD_LAMBDA_L_LR},
+            {
+                "params": [p for n, p in model.named_parameters() if "frod_lambda_s_values" in n],
+                "lr": FROD_LAMBDA_S_LR,
+            },
+            {"params": [p for n, p in model.named_parameters() if "classifier" in n], "lr": CLASSIFIER_LR},
+        ]
+    )
+
+    training_args = TrainingArguments(
+        output_dir=OUTPUT_DIR,
+        learning_rate=FROD_LAMBDA_L_LR,
+        per_device_train_batch_size=TRAIN_BATCH_SIZE,
+        per_device_eval_batch_size=EVAL_BATCH_SIZE,
+        num_train_epochs=NUM_TRAIN_EPOCHS,
+        eval_strategy="epoch",
+        save_strategy="epoch",
+        load_best_model_at_end=True,
+        metric_for_best_model="accuracy",
+        lr_scheduler_type="constant",
+        remove_unused_columns=False,
+        report_to="none",
+    )
+
+    trainer = Trainer(
+        model=model,
+        args=training_args,
+        train_dataset=train_dataset,
+        eval_dataset=eval_dataset,
+        data_collator=collate_fn,
+        compute_metrics=compute_metrics,
+        optimizers=(optimizer, None),
+    )
+    trainer.train()
+    trainer.evaluate()
+    model.save_pretrained(OUTPUT_DIR)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/frod_finetuning/frod_text_classification.py b/examples/frod_finetuning/frod_text_classification.py
@@ -0,0 +1,93 @@
+# Copyright 2026-present the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+
+import numpy as np
+import torch
+from datasets import load_dataset
+from transformers import (
+    AutoModelForSequenceClassification,
+    AutoTokenizer,
+    DataCollatorWithPadding,
+    Trainer,
+    TrainingArguments,
+)
+
+from peft import FrodConfig, TaskType, get_peft_model
+
+
+MODEL_NAME = "google-bert/bert-base-uncased"
+DATASET_NAME = "nyu-mll/glue"
+TASK_NAME = "sst2"
+OUTPUT_DIR = "bert-base-uncased-frod-sst2"
+FROD_LAMBDA_L_LR = 2e-2
+FROD_LAMBDA_S_LR = 2e-3
+CLASSIFIER_LR = 1e-2
+
+
+def main():
+    dataset = load_dataset(DATASET_NAME, TASK_NAME)
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+
+    def preprocess(batch):
+        return tokenizer(batch["sentence"], truncation=True)
+
+    tokenized = dataset.map(preprocess, batched=True)
+    tokenized = tokenized.rename_column("label", "labels")
+
+    model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=2)
+    peft_config = FrodConfig(
+        task_type=TaskType.SEQ_CLS,
+        target_modules=["query", "value"],
+        modules_to_save=["classifier"],
+        frod_dropout=0.0,
+        sparse_rate=0.02,
+    )
+    model = get_peft_model(model, peft_config)
+    model.print_trainable_parameters()
+
+    def compute_metrics(eval_pred):
+        predictions = np.argmax(eval_pred.predictions, axis=-1)
+        return {"accuracy": (predictions == eval_pred.label_ids).mean().item()}
+
+    optimizer = torch.optim.AdamW(
+        [
+            {"params": [p for n, p in model.named_parameters() if "frod_lambda_l" in n], "lr": FROD_LAMBDA_L_LR},
+            {
+                "params": [p for n, p in model.named_parameters() if "frod_lambda_s_values" in n],
+                "lr": FROD_LAMBDA_S_LR,
+            },
+            {"params": [p for n, p in model.named_parameters() if "classifier" in n], "lr": CLASSIFIER_LR},
+        ]
+    )
+
+    training_args = TrainingArguments(
+        output_dir=OUTPUT_DIR,
+        learning_rate=FROD_LAMBDA_L_LR,
+        per_device_train_batch_size=32,
+        per_device_eval_batch_size=64,
+        num_train_epochs=1,
+        eval_strategy="epoch",
+        save_strategy="epoch",
+        load_best_model_at_end=True,
+        metric_for_best_model="accuracy",
+        report_to="none",
+    )
+
+    trainer = Trainer(
+        model=model,
+        args=training_args,
+        train_dataset=tokenized["train"],
+        eval_dataset=tokenized["validation"],
+        tokenizer=tokenizer,
+        data_collator=DataCollatorWithPadding(tokenizer=tokenizer),
+        compute_metrics=compute_metrics,
+        optimizers=(optimizer, None),
+    )
+    trainer.train()
+    trainer.evaluate()
+    model.save_pretrained(OUTPUT_DIR)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/frod_finetuning/requirements.txt b/examples/frod_finetuning/requirements.txt
@@ -0,0 +1,6 @@
+peft
+transformers
+accelerate>=1.0.0
+datasets
+numpy
+Pillow
diff --git a/method_comparison/MetaMathQA/experiments/frod/llama-3.2-3B-default/adapter_config.json b/method_comparison/MetaMathQA/experiments/frod/llama-3.2-3B-default/adapter_config.json
@@ -0,0 +1,20 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": null,
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "frod_dropout": 0.0,
+  "inference_mode": false,
+  "init_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "modules_to_save": null,
+  "peft_type": "FROD",
+  "projection_prng_key": 0,
+  "regularization_alpha": 0.001,
+  "revision": null,
+  "save_projection": true,
+  "sparse_rate": 0.01,
+  "target_modules": null,
+  "task_type": "CAUSAL_LM"
+}
diff --git a/...mparison/MetaMathQA/experiments/frod/llama-3.2-3B-sparse0.02-lr_0.001/adapter_config.json b/...mparison/MetaMathQA/experiments/frod/llama-3.2-3B-sparse0.02-lr_0.001/adapter_config.json
@@ -0,0 +1,20 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": null,
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "frod_dropout": 0.0,
+  "inference_mode": false,
+  "init_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "modules_to_save": null,
+  "peft_type": "FROD",
+  "projection_prng_key": 0,
+  "regularization_alpha": 0.001,
+  "revision": null,
+  "save_projection": true,
+  "sparse_rate": 0.02,
+  "target_modules": null,
+  "task_type": "CAUSAL_LM"
+}
diff --git a/...parison/MetaMathQA/experiments/frod/llama-3.2-3B-sparse0.02-lr_0.001/training_params.json b/...parison/MetaMathQA/experiments/frod/llama-3.2-3B-sparse0.02-lr_0.001/training_params.json
@@ -0,0 +1,5 @@
+{
+  "optimizer_kwargs": {
+    "lr": 1e-3
+  }
+}