这是indexloc提供的服务,不要输入任何密码
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions src/peft/tuners/loha/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ class LoHaConfig(LycorisConfig):
module_dropout (`float`):
The dropout probability for disabling LoHa modules during training.
use_effective_conv2d (`bool`):
Use parameter effective decomposition for Conv2d with ksize > 1 ("Proposition 3" from FedPara paper).
Use parameter effective decomposition for Conv2d (and Conv1d) with ksize > 1 ("Proposition 3" from FedPara
paper).
target_modules (`Optional[Union[List[str], str]]`):
The names of the modules to apply the adapter to. If this is specified, only the modules with the specified
names will be replaced. When passing a string, a regex match will be performed. When passing a list of
Expand Down Expand Up @@ -79,7 +80,10 @@ class LoHaConfig(LycorisConfig):
use_effective_conv2d: bool = field(
default=False,
metadata={
"help": 'Use parameter effective decomposition for Conv2d 3x3 with ksize > 1 ("Proposition 3" from FedPara paper)'
"help": (
"Use parameter effective decomposition for Conv2d (and Conv1d) with ksize > 1 "
'("Proposition 3" from FedPara paper)'
)
},
)
target_modules: Optional[Union[list[str], str]] = field(
Expand Down
76 changes: 74 additions & 2 deletions src/peft/tuners/loha/layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,15 +45,23 @@ def _available_adapters(self) -> set[str]:

def create_adapter_parameters(self, adapter_name: str, r: int, shape: tuple[int, ...]):
# https://github.com/KohakuBlueleaf/LyCORIS/blob/eb460098187f752a5d66406d3affade6f0a07ece/lycoris/modules/loha.py#L130C9-L143C75
if len(shape) == 4:
if len(shape) == 4: # Conv2d
self.hada_t1[adapter_name] = nn.Parameter(torch.empty(r, r, shape[2], shape[3]))
self.hada_w1_a[adapter_name] = nn.Parameter(torch.empty(r, shape[0])) # out_dim, 1-mode
self.hada_w1_b[adapter_name] = nn.Parameter(torch.empty(r, shape[1])) # in_dim , 2-mode

self.hada_t2[adapter_name] = nn.Parameter(torch.empty(r, r, shape[2], shape[3]))
self.hada_w2_a[adapter_name] = nn.Parameter(torch.empty(r, shape[0])) # out_dim, 1-mode
self.hada_w2_b[adapter_name] = nn.Parameter(torch.empty(r, shape[1])) # in_dim , 2-mode
else:
elif len(shape) == 3: # Conv1d
self.hada_t1[adapter_name] = nn.Parameter(torch.empty(r, r, shape[2], 1))
self.hada_w1_a[adapter_name] = nn.Parameter(torch.empty(r, shape[0])) # out_dim, 1-mode
self.hada_w1_b[adapter_name] = nn.Parameter(torch.empty(r, shape[1])) # in_dim , 2-mode

self.hada_t2[adapter_name] = nn.Parameter(torch.empty(r, r, shape[2], 1))
self.hada_w2_a[adapter_name] = nn.Parameter(torch.empty(r, shape[0])) # out_dim, 1-mode
self.hada_w2_b[adapter_name] = nn.Parameter(torch.empty(r, shape[1])) # in_dim , 2-mode
else: # Linear
self.hada_w1_a[adapter_name] = nn.Parameter(torch.empty(shape[0], r))
self.hada_w1_b[adapter_name] = nn.Parameter(torch.empty(r, shape[1]))

Expand Down Expand Up @@ -127,6 +135,11 @@ def update_layer(
if isinstance(base_layer, nn.Linear):
shape = tuple(base_layer.weight.shape)
elif isinstance(base_layer, nn.Conv2d):
# For 1x1 convolutions, disable effective_conv2d to avoid unnecessary tensor reshaping overhead.
# Since 1x1 convolutions are essentially pointwise operations (matrix multiplications),
# they can be more efficiently handled with the flattened weight representation,
# similar to how Linear layers work. This optimization reduces computational cost
# without affecting the mathematical equivalence of the operation.
use_effective_conv2d = use_effective_conv2d and base_layer.kernel_size != (1, 1)
if use_effective_conv2d:
shape = (base_layer.out_channels, base_layer.in_channels, *base_layer.kernel_size)
Expand All @@ -135,6 +148,19 @@ def update_layer(
base_layer.out_channels,
base_layer.in_channels * base_layer.kernel_size[0] * base_layer.kernel_size[1],
)
elif isinstance(base_layer, nn.Conv1d):
# For Conv1d with kernel_size=1, disable effective_conv2d for the same optimization reasons
# as 1x1 Conv2d. Kernel size 1 means no spatial/temporal context, making it equivalent
# to a Linear layer applied across the channel dimension. Using flattened representation
# avoids unnecessary reshaping and improves computational efficiency.
use_effective_conv2d = use_effective_conv2d and base_layer.kernel_size[0] != 1
if use_effective_conv2d:
shape = (base_layer.out_channels, base_layer.in_channels, base_layer.kernel_size[0])
else:
shape = (
base_layer.out_channels,
base_layer.in_channels * base_layer.kernel_size[0],
)
else:
raise TypeError(f"LoHa is not implemented for base layers of type {type(base_layer).__name__}")

Expand Down Expand Up @@ -173,6 +199,8 @@ def get_delta_weight(self, adapter_name: str) -> torch.Tensor:
)

base_layer = self.get_base_layer()

# Reshape to match base layer shape
weight = weight.reshape(base_layer.weight.shape)

# Perform rank dropout during training - drop rows of addition weights
Expand Down Expand Up @@ -292,6 +320,50 @@ def __repr__(self) -> str:
return "loha." + rep


class Conv1d(LoHaLayer):
"""LoHa implemented in Conv1d layer"""

def __init__(
self,
base_layer: nn.Module,
adapter_name: str = "default",
r: int = 0,
alpha: float = 0.0,
rank_dropout: float = 0.0,
module_dropout: float = 0.0,
use_effective_conv2d: bool = False,
init_weights: bool = True,
**kwargs,
):
super().__init__(base_layer)

# Create adapter and set it active
self._active_adapter = adapter_name
self.update_layer(
adapter_name, r, alpha, rank_dropout, module_dropout, init_weights, use_effective_conv2d, **kwargs
)

def _get_delta_activations(
self, adapter_name: str, input: torch.Tensor, *args: Any, **kwargs: Any
) -> torch.Tensor:
delta_weight = self.get_delta_weight(adapter_name)
input = self._cast_input_dtype(input, delta_weight.dtype)
# don't add bias here, because the bias is already included in the output of the base_layer
base_layer = self.get_base_layer()
return F.conv1d(
input,
delta_weight,
stride=base_layer.stride,
padding=base_layer.padding,
dilation=base_layer.dilation,
groups=base_layer.groups,
)

def __repr__(self) -> str:
rep = super().__repr__()
return "loha." + rep


# Below code is a direct copy from https://github.com/KohakuBlueleaf/LyCORIS/blob/eb460098187f752a5d66406d3affade6f0a07ece/lycoris/modules/loha.py#L9


Expand Down
3 changes: 2 additions & 1 deletion src/peft/tuners/loha/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from peft.utils import TRANSFORMERS_MODELS_TO_LOHA_TARGET_MODULES_MAPPING
from peft.utils.other import get_pattern_key

from .layer import Conv2d, Linear, LoHaLayer
from .layer import Conv1d, Conv2d, Linear, LoHaLayer


class LoHaModel(LycorisTuner):
Expand Down Expand Up @@ -85,6 +85,7 @@ class LoHaModel(LycorisTuner):
prefix: str = "hada_"
layers_mapping: dict[type[torch.nn.Module], type[LoHaLayer]] = {
torch.nn.Conv2d: Conv2d,
torch.nn.Conv1d: Conv1d,
torch.nn.Linear: Linear,
}

Expand Down
8 changes: 6 additions & 2 deletions src/peft/tuners/lokr/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ class LoKrConfig(LycorisConfig):
module_dropout (`float`):
The dropout probability for disabling LoKr modules during training.
use_effective_conv2d (`bool`):
Use parameter effective decomposition for Conv2d with ksize > 1 ("Proposition 3" from FedPara paper).
Use parameter effective decomposition for Conv2d (and Conv1d) with ksize > 1 ("Proposition 3" from FedPara
paper).
decompose_both (`bool`):
Perform rank decomposition of left kronecker product matrix.
decompose_factor (`int`):
Expand Down Expand Up @@ -85,7 +86,10 @@ class LoKrConfig(LycorisConfig):
use_effective_conv2d: bool = field(
default=False,
metadata={
"help": 'Use parameter effective decomposition for Conv2d 3x3 with ksize > 1 ("Proposition 3" from FedPara paper)'
"help": (
"Use parameter effective decomposition for Conv2d (and Conv1d) with ksize > 1 "
'("Proposition 3" from FedPara paper)'
)
},
)
decompose_both: bool = field(
Expand Down
89 changes: 86 additions & 3 deletions src/peft/tuners/lokr/layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,8 @@ def create_adapter_parameters(
self.lokr_w1_a[adapter_name] = nn.Parameter(torch.empty(shape[0][0], r))
self.lokr_w1_b[adapter_name] = nn.Parameter(torch.empty(r, shape[1][0]))

if len(shape) == 4:
# Conv2d
# Handle both Conv2d and Conv1d
if len(shape) == 4: # Conv2d
if use_w2:
self.lokr_w2[adapter_name] = nn.Parameter(torch.empty(shape[0][1], shape[1][1], *shape[2:]))
elif use_effective_conv2d:
Expand All @@ -86,6 +86,18 @@ def create_adapter_parameters(
else:
self.lokr_w2_a[adapter_name] = nn.Parameter(torch.empty(shape[0][1], r))
self.lokr_w2_b[adapter_name] = nn.Parameter(torch.empty(r, shape[1][1] * shape[2] * shape[3]))
elif len(shape) == 3: # Conv1d
if use_w2:
self.lokr_w2[adapter_name] = nn.Parameter(torch.empty(shape[0][1], shape[1][1], shape[2]))
elif use_effective_conv2d: # Even for Conv1d, use the effective parameter for kernel dimension
# We pass (r, r, kernel_size, 1) in order to be compatible with the 2d assumptions made
# in make_weight_cp (only relevant for the effective conv2d case).
self.lokr_t2[adapter_name] = nn.Parameter(torch.empty(r, r, shape[2], 1))
self.lokr_w2_a[adapter_name] = nn.Parameter(torch.empty(r, shape[0][1])) # b, 1-mode
self.lokr_w2_b[adapter_name] = nn.Parameter(torch.empty(r, shape[1][1])) # d, 2-mode
else:
self.lokr_w2_a[adapter_name] = nn.Parameter(torch.empty(shape[0][1], r))
self.lokr_w2_b[adapter_name] = nn.Parameter(torch.empty(r, shape[1][1] * shape[2]))
else:
# Linear
if use_w2:
Expand Down Expand Up @@ -201,7 +213,27 @@ def update_layer(

use_w1 = not (decompose_both and r < max(shape[0][0], shape[1][0]) / 2)
use_w2 = r >= max(shape[0][1], shape[1][1]) / 2
# For 1x1 convolutions, disable effective_conv2d to avoid unnecessary tensor reshaping overhead.
# Since 1x1 convolutions are essentially pointwise operations (matrix multiplications),
# they can be more efficiently handled with the flattened weight representation,
# similar to how Linear layers work. This optimization reduces computational cost
# without affecting the mathematical equivalence of the operation.
use_effective_conv2d = use_effective_conv2d and base_layer.kernel_size != (1, 1)
elif isinstance(base_layer, nn.Conv1d):
in_dim, out_dim = base_layer.in_channels, base_layer.out_channels
k_size = (base_layer.kernel_size[0],) # Convert to a tuple with single element

in_m, in_n = factorization(in_dim, decompose_factor)
out_l, out_k = factorization(out_dim, decompose_factor)
shape = ((out_l, out_k), (in_m, in_n), *k_size) # ((a, b), (c, d), k)

use_w1 = not (decompose_both and r < max(shape[0][0], shape[1][0]) / 2)
use_w2 = r >= max(shape[0][1], shape[1][1]) / 2
# For Conv1d with kernel_size=1, disable effective_conv2d for the same optimization reasons
# as 1x1 Conv2d. Kernel size 1 means no spatial/temporal context, making it equivalent
# to a Linear layer applied across the channel dimension. Using flattened representation
# avoids unnecessary reshaping and improves computational efficiency.
use_effective_conv2d = use_effective_conv2d and base_layer.kernel_size[0] != 1
else:
raise TypeError(f"LoKr is not implemented for base layers of type {type(base_layer).__name__}")

Expand Down Expand Up @@ -237,7 +269,12 @@ def get_delta_weight(self, adapter_name: str) -> torch.Tensor:

# Make weights with Kronecker product
weight = make_kron(w1, w2, self.scaling[adapter_name])
weight = weight.reshape(self.get_base_layer().weight.shape)

# Get base layer for reshaping
base_layer = self.get_base_layer()

# Regular reshape to match base layer shape
weight = weight.reshape(base_layer.weight.shape)

# Perform rank dropout during training - drop rows of addition weights
rank_dropout = self.rank_dropout[adapter_name]
Expand Down Expand Up @@ -358,6 +395,52 @@ def __repr__(self) -> str:
return "lokr." + rep


class Conv1d(LoKrLayer):
"""LoKr implemented in Conv1d layer"""

def __init__(
self,
base_layer: nn.Module,
device: Optional[Union[str, torch.device]] = None,
dtype: Optional[torch.dtype] = None,
adapter_name: str = "default",
r: int = 0,
alpha: float = 0.0,
rank_dropout: float = 0.0,
module_dropout: float = 0.0,
use_effective_conv2d: bool = False,
init_weights: bool = True,
**kwargs,
):
super().__init__(base_layer)

# Create adapter and set it active
self._active_adapter = adapter_name
self.update_layer(
adapter_name, r, alpha, rank_dropout, module_dropout, init_weights, use_effective_conv2d, **kwargs
)

def _get_delta_activations(
self, adapter_name: str, input: torch.Tensor, *args: Any, **kwargs: Any
) -> torch.Tensor:
delta_weight = self.get_delta_weight(adapter_name)
input = self._cast_input_dtype(input, delta_weight.dtype)
# don't add bias here, because the bias is already included in the output of the base_layer
base_layer = self.get_base_layer()
return F.conv1d(
input,
delta_weight,
stride=base_layer.stride,
padding=base_layer.padding,
dilation=base_layer.dilation,
groups=base_layer.groups,
)

def __repr__(self) -> str:
rep = super().__repr__()
return "lokr." + rep


# Below code is a direct copy from https://github.com/KohakuBlueleaf/LyCORIS/blob/eb460098187f752a5d66406d3affade6f0a07ece/lycoris/modules/lokr.py#L11


Expand Down
3 changes: 2 additions & 1 deletion src/peft/tuners/lokr/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from peft.utils import TRANSFORMERS_MODELS_TO_LOKR_TARGET_MODULES_MAPPING
from peft.utils.other import get_pattern_key

from .layer import Conv2d, Linear, LoKrLayer
from .layer import Conv1d, Conv2d, Linear, LoKrLayer


class LoKrModel(LycorisTuner):
Expand Down Expand Up @@ -86,6 +86,7 @@ class LoKrModel(LycorisTuner):
prefix: str = "lokr_"
layers_mapping: dict[type[torch.nn.Module], type[LoKrLayer]] = {
torch.nn.Conv2d: Conv2d,
torch.nn.Conv1d: Conv1d,
torch.nn.Linear: Linear,
}

Expand Down
2 changes: 1 addition & 1 deletion src/peft/tuners/lycoris_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ def _create_new_module(cls, config: LycorisConfig, adapter_name: str, target: nn
else:
target_base_layer = target

if isinstance(target_base_layer, torch.nn.Conv2d):
if isinstance(target_base_layer, (torch.nn.Conv2d, torch.nn.Conv1d)):
new_module = new_module_cls(target, adapter_name=adapter_name, **kwargs)
elif isinstance(target_base_layer, torch.nn.Linear):
new_module = new_module_cls(target, adapter_name=adapter_name, **kwargs)
Expand Down
Loading
Loading