handle refactor upstream for flash attention (#2966)

2025-07-22 20:40:04 -04:00
parent 208fb7b8e7
commit 93709eb5ce
1 changed files with 8 additions and 2 deletions
--- a/src/axolotl/monkeypatch/ring_attn/patch.py
+++ b/src/axolotl/monkeypatch/ring_attn/patch.py
@@ -15,7 +15,13 @@ from typing import Optional
 import accelerate
 import torch
 import torch.distributed as dist
-from transformers.modeling_flash_attention_utils import _flash_supports_window_size
+
+try:
+    from transformers.modeling_flash_attention_utils import _flash_supports_window
+except ImportError:
+    from transformers.modeling_flash_attention_utils import (
+        _flash_supports_window_size as _flash_supports_window,
+    )

 from axolotl.monkeypatch.utils import get_cu_seqlens_from_pos_ids
 from axolotl.utils.logging import get_logger
@@ -106,7 +112,7 @@ def create_ring_flash_attention_forward(

        # Assuming 4D tensors, key_states.shape[1] is the key/value sequence length (source length).
        use_sliding_windows = (
-            _flash_supports_window_size
+            _flash_supports_window
            and sliding_window is not None
            and key_states.shape[1] > sliding_window
        )