Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions src/peft/tuners/adamss/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,15 @@ class AdamssConfig(PeftConfig):
)
},
)
random_seed: int = field(
default=0,
metadata={
"help": (
"Seed used to deterministically create and rebuild the adapter weights, so that a saved adapter "
"reproduces its outputs after loading. Default: 0."
)
},
)

def __post_init__(self):
self.peft_type = PeftType.ADAMSS
Expand Down
2 changes: 1 addition & 1 deletion src/peft/tuners/adamss/layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ def update_layer(

# Perform SVD decomposition with diagnostics in case of failure
try:
res = slice_pca(weight_tensor, r, device, torch.float32)
res = slice_pca(weight_tensor, r, device, torch.float32, random_seed=config.random_seed)
except Exception as e:
raise RuntimeError(
f"slice_pca raised an exception for layer {adapter_name} (shape={tuple(weight_tensor.shape)}, dtype={weight_tensor.dtype}, device={device}): {e}"
Expand Down
21 changes: 15 additions & 6 deletions src/peft/tuners/adamss/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import torch


def slice_pca(tensor, r, device, dtype=torch.float32):
def slice_pca(tensor, r, device, dtype=torch.float32, random_seed=0):
"""
Perform slice-wise PCA (SVD) on 4D tensor.

Expand All @@ -24,6 +24,8 @@ def slice_pca(tensor, r, device, dtype=torch.float32):
r: rank for low-rank approximation
device: computation device
dtype: data type
random_seed: seed for the random projection used by `torch.svd_lowrank`, so the decomposition is
deterministic and reproducible across save/load

Returns:
VVT: Right singular vectors (B, C, r, W) UU: Left singular vectors (B, C, H, r)
Expand All @@ -38,11 +40,18 @@ def slice_pca(tensor, r, device, dtype=torch.float32):
UU = torch.zeros(B, C, H, effective_r, dtype=dtype, device=device)
VVT = torch.zeros(B, C, effective_r, W, dtype=dtype, device=device)

for i in range(B):
for j in range(C):
U, _, V = torch.svd_lowrank(tensor[i, j, :, :], q=effective_r, niter=2, M=None)
UU[i, j, :, :] = U[:, 0:effective_r]
VVT[i, j, :, :] = V[:, 0:effective_r].T
# torch.svd_lowrank draws a random projection internally, so its result (and hence the downstream
# clustering and scatter_index) depends on the RNG state. Seed a forked RNG with the configurable
# random_seed so the result is deterministic (torch.svd_lowrank does not accept a generator argument);
# fork_rng leaves the global RNG stream untouched.
fork_devices = [device] if torch.device(device).type == "cuda" else []
Comment thread
BenjaminBossan marked this conversation as resolved.
with torch.random.fork_rng(devices=fork_devices):
torch.manual_seed(random_seed)
for i in range(B):
for j in range(C):
U, _, V = torch.svd_lowrank(tensor[i, j, :, :], q=effective_r, niter=2, M=None)
UU[i, j, :, :] = U[:, 0:effective_r]
VVT[i, j, :, :] = V[:, 0:effective_r].T
# Return computed matrices (important: ensure callers receive VVT and UU)
return VVT, UU

Expand Down
10 changes: 10 additions & 0 deletions src/peft/tuners/psoft/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,16 @@ class PsoftConfig(PeftConfig):
"help": "Number of power iterations used by torch.svd_lowrank when psoft_svd='lowrank'. Only used when psoft_svd='lowrank'. "
},
)
random_seed: int = field(
default=0,
metadata={
"help": (
"Seed used to deterministically create and rebuild the adapter weights when psoft_svd='lowrank', so "
"that a saved adapter reproduces its outputs after loading. Only used when psoft_svd='lowrank'. "
"Default: 0."
)
},
)
psoft_orth: bool = field(
default=True,
metadata={
Expand Down
13 changes: 11 additions & 2 deletions src/peft/tuners/psoft/layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,7 @@ def __init__(self, base_layer: nn.Module, **kwargs) -> None:
self.psoft_dropout = nn.ModuleDict({})
self.psoft_svd: dict[str, str] = {}
self.psoft_svd_lowrank_niter: dict[str, int] = {}
self.random_seed: dict[str, int] = {}
self.ab_svd_init: dict[str, Optional[str]] = {}

# per-adapter trainable module
Expand Down Expand Up @@ -251,6 +252,7 @@ def update_layer(self, adapter_name: str, config: PsoftConfig, **kwargs: Any) ->
self.ab_svd_init[adapter_name] = config.ab_svd_init
self.psoft_svd[adapter_name] = config.psoft_svd
self.psoft_svd_lowrank_niter[adapter_name] = config.psoft_svd_lowrank_niter
self.random_seed[adapter_name] = config.random_seed

self.psoft_R[adapter_name] = OrthLayer(
size=r,
Expand Down Expand Up @@ -290,6 +292,7 @@ def _build_psoft_ab_cache_buffers(self, adapter_name: str, init_type: str) -> No
r,
svd_mode=self.psoft_svd[adapter_name],
niter=self.psoft_svd_lowrank_niter[adapter_name],
random_seed=self.random_seed[adapter_name],
)

Sr_scaled = Sr / self.scaling[adapter_name]
Expand All @@ -309,15 +312,21 @@ def _build_psoft_ab_cache_buffers(self, adapter_name: str, init_type: str) -> No

self._set_psoft_ab_cache_buffers(adapter_name, A, B)

def _compute_svd_factors(self, weight: torch.Tensor, r: int, *, svd_mode: str, niter: int):
def _compute_svd_factors(self, weight: torch.Tensor, r: int, *, svd_mode: str, niter: int, random_seed: int = 0):
# weight: (out, in) fp32
if svd_mode == "full":
U, S, Vh = torch.linalg.svd(weight.data, full_matrices=False)
Vr = U[:, :r] # (out, r)
Sr = S[:r] # (r,)
Uhr = Vh[:r, :] # (r, in)
elif svd_mode == "lowrank":
U, S, V = svd_lowrank(weight.data, q=r, niter=niter) # V: (in, r)
# torch.svd_lowrank uses a random projection, so the A/B initialization it produces depends on the

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as above, the explanation is overly detailed.

# RNG state. Seed a forked RNG with the configurable random_seed to make it deterministic
# (torch.svd_lowrank does not accept a generator argument); fork_rng leaves the global RNG untouched.
fork_devices = [weight.device] if weight.device.type == "cuda" else []
with torch.random.fork_rng(devices=fork_devices):
torch.manual_seed(random_seed)
U, S, V = svd_lowrank(weight.data, q=r, niter=niter) # V: (in, r)
Vr = U[:, :r]
Sr = S[:r]
Uhr = V[:, :r].t() # (r, in)
Expand Down
34 changes: 34 additions & 0 deletions tests/test_custom_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2553,6 +2553,40 @@ def test_only_params_are_updated(self, test_name, model_id, config_cls, config_k
else:
assert torch.allclose(param_before, param_after, atol=tol, rtol=tol)

@pytest.mark.parametrize("test_name, model_id, config_cls, config_kwargs", TEST_CASES)
def test_save_load_roundtrip(self, test_name, model_id, config_cls, config_kwargs, tmp_path):
# An explicit test that when loading a trained model, the outputs from the forward pass remain the same
X = self.prepare_inputs_for_testing()
model = self.transformers_class.from_pretrained(model_id).to(self.torch_device)
with torch.inference_mode():
output_base = model(**X)

config_kwargs = set_init_weights_false(config_cls, config_kwargs)
config = config_cls(
base_model_name_or_path=model_id,
**config_kwargs,
)
torch.manual_seed(0)
model = get_peft_model(model, config)
model.eval()
with torch.inference_mode():
output_before = model(**X)

# sanity check
atol, rtol = 1e-5, 1e-5
assert not torch.allclose(output_base, output_before, atol=atol, rtol=rtol)

model.save_pretrained(tmp_path)
del model

model = self.transformers_class.from_pretrained(model_id).to(self.torch_device)
torch.manual_seed(54321) # ensure that the seed is different from what was used when get_peft_model was called
model = PeftModel.from_pretrained(model, tmp_path)
with torch.inference_mode():
output_after = model(**X)

assert torch.allclose(output_before, output_after, atol=atol, rtol=rtol)

@pytest.mark.parametrize("test_name, model_id, config_cls, config_kwargs", TEST_CASES)
def test_parameters_after_loading_model(self, test_name, model_id, config_cls, config_kwargs):
# An explicit test that when loading a trained model, the parameters are loaded correctly
Expand Down
Loading