batch api HF adapter for ring-flash-attn; cleanup and improvements (#2520)

* batch api HF adapter for ring-flash-attn; cleanup and improvements * update * adding all batch ring-flash-attn methods via single adapter * removing pad_to_sequence_len=False for now * fix * updating docs to include batch SP * review comments * fixes for batch API funcs, simplify * fixes * fix * updates * add batch_zigzag smoke test
2025-04-16 13:50:48 -04:00
parent 682a9cf79b
commit b8c633aa97
13 changed files with 397 additions and 49 deletions
--- a/tests/e2e/patched/test_sp.py
+++ b/tests/e2e/patched/test_sp.py
@@ -73,7 +73,10 @@ class TestRingAttention:
        self, mock_world_size, mock_rank, mock_new_group, partial_state
    ):
        """Test that ring attention groups are created correctly."""
-        from axolotl.monkeypatch.attention.ring_attn import register_ring_attn
+        from axolotl.monkeypatch.attention.ring_attn import (
+            RingAttnFunc,
+            register_ring_attn,
+        )

        # Setup mocks
        mock_world_size.return_value = 8  # 8 GPUs total
@@ -82,7 +85,11 @@ class TestRingAttention:
        mock_new_group.return_value = mock_group

        # Call register_ring_attn with size 4
-        register_ring_attn(sequence_parallel_degree=4, heads_k_stride=1)
+        register_ring_attn(
+            sequence_parallel_degree=4,
+            heads_k_stride=1,
+            ring_attn_func=RingAttnFunc.VARLEN_LLAMA3,
+        )

        # Verify the number of calls without examining the arguments
        assert mock_new_group.call_count == 2