fix: re-add gemma3 patch (#2817)

This commit is contained in:
NanoCode012
2025-06-24 10:51:30 +07:00
committed by GitHub
parent 12c826816d
commit c6b5d35e5d
3 changed files with 26 additions and 0 deletions

View File

@@ -64,6 +64,7 @@ class PatchManager:
self._patch_llama_derived_model()
self._apply_mistral_cross_entropy_patch()
self._apply_self_attention_lora_patch()
self._apply_gemma3_conditional_generation_forward_patch()
def apply_post_model_load_patches(self, model: PreTrainedModel):
"""Apply patches that require the model instance."""
@@ -221,6 +222,15 @@ class PatchManager:
has_remote_code=has_remote_code,
)
def _apply_gemma3_conditional_generation_forward_patch(self):
"""Apply gemma3 conditional generation forward patch."""
if self.model_config.model_type in ["gemma3", "gemma3_text"]:
from axolotl.monkeypatch.models.gemma3.modeling import (
patch_gemma3_conditional_generation_forward,
)
patch_gemma3_conditional_generation_forward()
def _patch_attention(self):
"""Apply attention-specific patches based on model type."""
if not (self.cfg.flash_attention and hasattr(self.model_config, "model_type")):

View File

@@ -0,0 +1,16 @@
"""Monkeypatch for gemma3 conditional generation forward to fix high loss"""
def patch_gemma3_conditional_generation_forward():
# Remove when https://github.com/huggingface/transformers/pull/37208 merged
from transformers.models.gemma3.modeling_gemma3 import (
Gemma3ForConditionalGeneration,
)
setattr(Gemma3ForConditionalGeneration, "accepts_loss_kwargs", False)
def unpatch():
delattr(Gemma3ForConditionalGeneration, "accepts_loss_kwargs")
return unpatch