fix: re-add gemma3 patch (#2817)

2025-06-24 10:51:30 +07:00
parent 12c826816d
commit c6b5d35e5d
3 changed files with 26 additions and 0 deletions
--- a/src/axolotl/loaders/patch_manager.py
+++ b/src/axolotl/loaders/patch_manager.py
@@ -64,6 +64,7 @@ class PatchManager:
        self._patch_llama_derived_model()
        self._apply_mistral_cross_entropy_patch()
        self._apply_self_attention_lora_patch()
+        self._apply_gemma3_conditional_generation_forward_patch()

    def apply_post_model_load_patches(self, model: PreTrainedModel):
        """Apply patches that require the model instance."""
@@ -221,6 +222,15 @@ class PatchManager:
                has_remote_code=has_remote_code,
            )

+    def _apply_gemma3_conditional_generation_forward_patch(self):
+        """Apply gemma3 conditional generation forward patch."""
+        if self.model_config.model_type in ["gemma3", "gemma3_text"]:
+            from axolotl.monkeypatch.models.gemma3.modeling import (
+                patch_gemma3_conditional_generation_forward,
+            )
+
+            patch_gemma3_conditional_generation_forward()
+
    def _patch_attention(self):
        """Apply attention-specific patches based on model type."""
        if not (self.cfg.flash_attention and hasattr(self.model_config, "model_type")):
--- a/src/axolotl/monkeypatch/models/gemma3/init.py
+++ b/src/axolotl/monkeypatch/models/gemma3/init.py
--- a/src/axolotl/monkeypatch/models/gemma3/modeling.py
+++ b/src/axolotl/monkeypatch/models/gemma3/modeling.py
@@ -0,0 +1,16 @@
+"""Monkeypatch for gemma3 conditional generation forward to fix high loss"""
+
+
+def patch_gemma3_conditional_generation_forward():
+    # Remove when https://github.com/huggingface/transformers/pull/37208 merged
+
+    from transformers.models.gemma3.modeling_gemma3 import (
+        Gemma3ForConditionalGeneration,
+    )
+
+    setattr(Gemma3ForConditionalGeneration, "accepts_loss_kwargs", False)
+
+    def unpatch():
+        delattr(Gemma3ForConditionalGeneration, "accepts_loss_kwargs")
+
+    return unpatch