better handling of dora merge on Conv layers in Qwen 3.5 (#3599)

* better handling of dora merge on Conv layers in Qwen 3.5

* address issues from code review

* stricter efficient merges for dora since we now have meta model to reference
This commit is contained in:
Wing Lian
2026-04-12 10:57:45 -04:00
committed by GitHub
parent b8358aa5ab
commit 66c3e5a3fd
3 changed files with 229 additions and 11 deletions

View File

@@ -2,6 +2,7 @@ import json
import math
from unittest.mock import Mock, patch
import pytest
import safetensors.torch
import torch
@@ -773,8 +774,8 @@ class TestEfficientMerge:
"v_proj should be unchanged (no LoRA weights for it)"
)
def test_dora_missing_magnitude_falls_back(self):
"""DoRA without magnitude vector falls back to standard LoRA merge."""
def test_dora_missing_magnitude_raises(self):
"""DoRA with missing magnitude vector raises an explicit error."""
hidden = 16
r = 4
alpha = 8
@@ -791,11 +792,13 @@ class TestEfficientMerge:
}
config = {"r": r, "lora_alpha": alpha, "use_dora": True}
merged, was_merged = _merge_tensor_with_lora(
base, "layer.proj.weight", lora_state, scale, config, "cpu", use_dora=True
)
assert was_merged
# No magnitude vector → PEFT creates DoRA layer but with default magnitude,
# which produces a result different from plain W + scale * B @ A.
# Just verify it was merged (not unchanged).
assert not torch.equal(merged, base)
with pytest.raises(ValueError, match="DoRA merge requires a magnitude vector"):
_merge_tensor_with_lora(
base,
"layer.proj.weight",
lora_state,
scale,
config,
"cpu",
use_dora=True,
)