remove dead gemma4 branch in _set_attention_config

2026-04-23 22:22:56 +00:00
parent 434a484fe9
commit aeca18a8b0
1 changed files with 3 additions and 10 deletions
--- a/src/axolotl/loaders/model.py
+++ b/src/axolotl/loaders/model.py
@@ -633,20 +633,13 @@ class ModelLoader:
        # replaces F.scaled_dot_product_attention post-load, so load under sdpa.
        # Every other canonical name (and hub-kernel paths) is passed through
        # verbatim — xformers/sage/flash_attention_* are registered under their
-        # own names in ALL_ATTENTION_FUNCTIONS before model load.
+        # own names in ALL_ATTENTION_FUNCTIONS before model load. gemma4_hybrid
+        # is already pinned to flash_attention_2 by normalize_attn_implementation.
        _LOAD_TIME_OVERRIDE = {"s2": "flash_attention_2", "fp8": "sdpa"}
-        if self.cfg.gemma4_hybrid_attn_impl:
-            # Load with flash_attention_2 for sliding-window layers; global
-            # layers are swapped to sdpa post-load.
-            hf_impl = "flash_attention_2"
-        elif self.cfg.attn_implementation:
+        if self.cfg.attn_implementation:
            hf_impl = _LOAD_TIME_OVERRIDE.get(
                self.cfg.attn_implementation, self.cfg.attn_implementation
            )
-        else:
-            hf_impl = None
-
-        if hf_impl is not None:
            self.model_kwargs["attn_implementation"] = hf_impl
            self.model_config._attn_implementation = hf_impl