From aeca18a8b03f4787429395bb2817e2f5184ea088 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Thu, 23 Apr 2026 22:22:56 +0000 Subject: [PATCH] remove dead gemma4 branch in _set_attention_config --- src/axolotl/loaders/model.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/src/axolotl/loaders/model.py b/src/axolotl/loaders/model.py index 997e0739d..7aa79533f 100644 --- a/src/axolotl/loaders/model.py +++ b/src/axolotl/loaders/model.py @@ -633,20 +633,13 @@ class ModelLoader: # replaces F.scaled_dot_product_attention post-load, so load under sdpa. # Every other canonical name (and hub-kernel paths) is passed through # verbatim — xformers/sage/flash_attention_* are registered under their - # own names in ALL_ATTENTION_FUNCTIONS before model load. + # own names in ALL_ATTENTION_FUNCTIONS before model load. gemma4_hybrid + # is already pinned to flash_attention_2 by normalize_attn_implementation. _LOAD_TIME_OVERRIDE = {"s2": "flash_attention_2", "fp8": "sdpa"} - if self.cfg.gemma4_hybrid_attn_impl: - # Load with flash_attention_2 for sliding-window layers; global - # layers are swapped to sdpa post-load. - hf_impl = "flash_attention_2" - elif self.cfg.attn_implementation: + if self.cfg.attn_implementation: hf_impl = _LOAD_TIME_OVERRIDE.get( self.cfg.attn_implementation, self.cfg.attn_implementation ) - else: - hf_impl = None - - if hf_impl is not None: self.model_kwargs["attn_implementation"] = hf_impl self.model_config._attn_implementation = hf_impl