better handling of dora merge on Conv layers in Qwen 3.5 (#3599)

* better handling of dora merge on Conv layers in Qwen 3.5 * address issues from code review * stricter efficient merges for dora since we now have meta model to reference
2026-04-12 10:57:45 -04:00
parent b8358aa5ab
commit 66c3e5a3fd
3 changed files with 229 additions and 11 deletions
--- a/tests/utils/lora/test_merge_lora.py
+++ b/tests/utils/lora/test_merge_lora.py
@@ -2,6 +2,7 @@ import json
 import math
 from unittest.mock import Mock, patch

+import pytest
 import safetensors.torch
 import torch

@@ -773,8 +774,8 @@ class TestEfficientMerge:
            "v_proj should be unchanged (no LoRA weights for it)"
        )

-    def test_dora_missing_magnitude_falls_back(self):
-        """DoRA without magnitude vector falls back to standard LoRA merge."""
+    def test_dora_missing_magnitude_raises(self):
+        """DoRA with missing magnitude vector raises an explicit error."""
        hidden = 16
        r = 4
        alpha = 8
@@ -791,11 +792,13 @@ class TestEfficientMerge:
        }

        config = {"r": r, "lora_alpha": alpha, "use_dora": True}
-        merged, was_merged = _merge_tensor_with_lora(
-            base, "layer.proj.weight", lora_state, scale, config, "cpu", use_dora=True
-        )
-        assert was_merged
-        # No magnitude vector → PEFT creates DoRA layer but with default magnitude,
-        # which produces a result different from plain W + scale * B @ A.
-        # Just verify it was merged (not unchanged).
-        assert not torch.equal(merged, base)
+        with pytest.raises(ValueError, match="DoRA merge requires a magnitude vector"):
+            _merge_tensor_with_lora(
+                base,
+                "layer.proj.weight",
+                lora_state,
+                scale,
+                config,
+                "cpu",
+                use_dora=True,
+            )