Fix: lora kernel pre-patch applied despite post-patch not applied (#2772)

* fix: do not pre-patch self attention if lora dropout non-zero * fix: add test to check patch not applied * fix: test * fix: test config check * fix where we check so that tests don't break * fix: test --------- Co-authored-by: Wing Lian <wing@axolotl.ai>
2025-06-14 11:54:06 -07:00
parent 80d5b066ec
commit 21388cf615
3 changed files with 97 additions and 11 deletions
--- a/src/axolotl/loaders/patch_manager.py
+++ b/src/axolotl/loaders/patch_manager.py
@@ -166,6 +166,17 @@ class PatchManager:
    def _apply_self_attention_lora_patch(self):
        """Apply self-attention LoRA patches if configured."""
        if self.cfg.lora_qkv_kernel or self.cfg.lora_o_kernel:
+            # Only patch if conditions are met
+            can_patch = (
+                self.cfg.lora_dropout == 0
+                if hasattr(self.cfg, "lora_dropout")
+                else True
+            )  # default to True if lora_dropout is not set
+
+            if not can_patch:
+                LOG.warning("Cannot patch self-attention - requires no dropout")
+                return
+
            from axolotl.monkeypatch.lora_kernels import patch_self_attn_lora

            patch_self_attn_lora(self.cfg)
--- a/src/axolotl/monkeypatch/lora_kernels.py
+++ b/src/axolotl/monkeypatch/lora_kernels.py
@@ -274,6 +274,29 @@ def find_mlp_in_layer(
                )


+def get_layers(model: PeftModelForCausalLM) -> list[nn.Module]:
+    """
+    Get the layers of the model. Handles text-only and multimodal models.
+
+    Args:
+        model: A PEFT model.
+
+    Returns:
+        A list of layers.
+    """
+    pretrained_model = model.model
+
+    # check for multimodal models first
+    if hasattr(pretrained_model, "language_model"):
+        return pretrained_model.language_model.layers
+    if hasattr(pretrained_model, "model"):
+        return pretrained_model.model.layers
+
+    raise NotImplementedError(
+        f"Model type {model.config.model_type} is not supported yet. Please create an Issue."
+    )
+
+
 def apply_lora_kernel_patches(
    model: PeftModelForCausalLM, cfg: DictDefault
 ) -> PeftModelForCausalLM:
@@ -345,17 +368,7 @@ def apply_lora_kernel_patches(
    if activation not in SUPPORTED_ACTIVATIONS:
        raise NotImplementedError(f"Activation {activation} is not supported")

-    layers = []
-    # check for multimodal models first
-    pretrained_model = model.model
-    if hasattr(pretrained_model, "language_model"):
-        layers = pretrained_model.language_model.layers
-    elif hasattr(pretrained_model, "model"):
-        layers = pretrained_model.model.layers
-    else:
-        raise NotImplementedError(
-            f"Model type {model.config.model_type} is not supported yet. Please create an Issue."
-        )
+    layers = get_layers(model)

    # Patch each layer
    for layer in layers: