From aeca18a8b03f4787429395bb2817e2f5184ea088 Mon Sep 17 00:00:00 2001
From: Wing Lian <wing@axolotl.ai>
Date: Thu, 23 Apr 2026 22:22:56 +0000
Subject: [PATCH] remove dead gemma4 branch in _set_attention_config

---
 src/axolotl/loaders/model.py | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/src/axolotl/loaders/model.py b/src/axolotl/loaders/model.py
index 997e0739d..7aa79533f 100644
--- a/src/axolotl/loaders/model.py
+++ b/src/axolotl/loaders/model.py
@@ -633,20 +633,13 @@ class ModelLoader:
         # replaces F.scaled_dot_product_attention post-load, so load under sdpa.
         # Every other canonical name (and hub-kernel paths) is passed through
         # verbatim — xformers/sage/flash_attention_* are registered under their
-        # own names in ALL_ATTENTION_FUNCTIONS before model load.
+        # own names in ALL_ATTENTION_FUNCTIONS before model load. gemma4_hybrid
+        # is already pinned to flash_attention_2 by normalize_attn_implementation.
         _LOAD_TIME_OVERRIDE = {"s2": "flash_attention_2", "fp8": "sdpa"}
-        if self.cfg.gemma4_hybrid_attn_impl:
-            # Load with flash_attention_2 for sliding-window layers; global
-            # layers are swapped to sdpa post-load.
-            hf_impl = "flash_attention_2"
-        elif self.cfg.attn_implementation:
+        if self.cfg.attn_implementation:
             hf_impl = _LOAD_TIME_OVERRIDE.get(
                 self.cfg.attn_implementation, self.cfg.attn_implementation
             )
-        else:
-            hf_impl = None
-
-        if hf_impl is not None:
             self.model_kwargs["attn_implementation"] = hf_impl
             self.model_config._attn_implementation = hf_impl