better handling of dora merge on Conv layers in Qwen 3.5 (#3599)
* better handling of dora merge on Conv layers in Qwen 3.5 * address issues from code review * stricter efficient merges for dora since we now have meta model to reference
This commit is contained in:
@@ -2,6 +2,7 @@ import json
|
||||
import math
|
||||
from unittest.mock import Mock, patch
|
||||
|
||||
import pytest
|
||||
import safetensors.torch
|
||||
import torch
|
||||
|
||||
@@ -773,8 +774,8 @@ class TestEfficientMerge:
|
||||
"v_proj should be unchanged (no LoRA weights for it)"
|
||||
)
|
||||
|
||||
def test_dora_missing_magnitude_falls_back(self):
|
||||
"""DoRA without magnitude vector falls back to standard LoRA merge."""
|
||||
def test_dora_missing_magnitude_raises(self):
|
||||
"""DoRA with missing magnitude vector raises an explicit error."""
|
||||
hidden = 16
|
||||
r = 4
|
||||
alpha = 8
|
||||
@@ -791,11 +792,13 @@ class TestEfficientMerge:
|
||||
}
|
||||
|
||||
config = {"r": r, "lora_alpha": alpha, "use_dora": True}
|
||||
merged, was_merged = _merge_tensor_with_lora(
|
||||
base, "layer.proj.weight", lora_state, scale, config, "cpu", use_dora=True
|
||||
)
|
||||
assert was_merged
|
||||
# No magnitude vector → PEFT creates DoRA layer but with default magnitude,
|
||||
# which produces a result different from plain W + scale * B @ A.
|
||||
# Just verify it was merged (not unchanged).
|
||||
assert not torch.equal(merged, base)
|
||||
with pytest.raises(ValueError, match="DoRA merge requires a magnitude vector"):
|
||||
_merge_tensor_with_lora(
|
||||
base,
|
||||
"layer.proj.weight",
|
||||
lora_state,
|
||||
scale,
|
||||
config,
|
||||
"cpu",
|
||||
use_dora=True,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user