这是indexloc提供的服务,不要输入任何密码
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions src/peft/mapping_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
from .mixed_model import PeftMixedModel
from .peft_model import PeftModel
from .tuners.tuners_utils import BaseTuner, BaseTunerLayer
from .utils import _prepare_prompt_learning_config


def get_peft_model(
Expand Down Expand Up @@ -120,8 +119,6 @@ def get_peft_model(
low_cpu_mem_usage=low_cpu_mem_usage,
)

if peft_config.is_prompt_learning:
peft_config = _prepare_prompt_learning_config(peft_config, model_config)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While debugging this issue, I noticed that _prepare_prompt_learning_config is being called twice, here and in peft_model.py:

peft_config = _prepare_prompt_learning_config(peft_config, dict_config)

This is not a huge deal, as the results should be the same and because the function is quick. Still, let's remove the redundant call here.

return MODEL_TYPE_TO_PEFT_MODEL_MAPPING[peft_config.task_type](
model,
peft_config,
Expand Down
8 changes: 6 additions & 2 deletions src/peft/utils/other.py
Original file line number Diff line number Diff line change
Expand Up @@ -1105,9 +1105,13 @@ def _prepare_prompt_learning_config(peft_config, model_config):
peft_config.num_attention_heads = num_attention_heads

# For grouped-query attention, see #1901.
if peft_config.peft_type == "PREFIX_TUNING" and "num_key_value_heads" in model_config:
if (peft_config.peft_type == "PREFIX_TUNING") and ("num_key_value_heads" in model_config):
num_key_value_heads = model_config["num_key_value_heads"]
peft_config.token_dim = peft_config.token_dim // peft_config.num_attention_heads * num_key_value_heads
if model_config.get("head_dim", None) is not None:
head_dim = model_config["head_dim"]
else:
head_dim = peft_config.token_dim // peft_config.num_attention_heads
peft_config.token_dim = head_dim * num_key_value_heads
peft_config.num_attention_heads = num_key_value_heads

if getattr(peft_config, "encoder_hidden_size", None) is None:
Expand Down
26 changes: 19 additions & 7 deletions tests/test_decoder_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -665,15 +665,27 @@ def test_lora_layer_replication(self):
self._test_prepare_for_training(model_id, LoraConfig, config_kwargs.copy())
self._test_generate(model_id, LoraConfig, config_kwargs.copy())

def test_prompt_learning_with_grouped_query_attention(self):
def test_prefix_tuning_qwen2_with_grouped_query_attention(self):
# See 1901, fixes a bug with handling GQA
model_id = "peft-internal-testing/tiny-dummy-qwen2"
base_model = AutoModelForCausalLM.from_pretrained(model_id)
peft_config = PrefixTuningConfig(num_virtual_tokens=10, task_type="CAUSAL_LM")
model = get_peft_model(base_model, peft_config)
x = torch.tensor([[1, 2, 3]])
# does not raise
model(x)
with hub_online_once(model_id):
base_model = AutoModelForCausalLM.from_pretrained(model_id)
peft_config = PrefixTuningConfig(num_virtual_tokens=10, task_type="CAUSAL_LM")
model = get_peft_model(base_model, peft_config)
x = torch.tensor([[1, 2, 3]])
# does not raise
model(x)

def test_prefix_tuning_qwen3_with_grouped_query_attention(self):
# See 2881, fixes a bug with handling GQA
model_id = "trl-internal-testing/tiny-Qwen3ForCausalLM"
with hub_online_once(model_id):
base_model = AutoModelForCausalLM.from_pretrained(model_id)
peft_config = PrefixTuningConfig(num_virtual_tokens=10, task_type="CAUSAL_LM")
model = get_peft_model(base_model, peft_config)
x = torch.tensor([[1, 2, 3]])
# does not raise
model(x)

def test_prefix_tuning_mistral(self):
# See issue 869, 1962
Expand Down
Loading