From 0fa752e58b593440ced0dd1cec0630f9b7b92664 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Thu, 21 Aug 2025 15:04:10 -0400 Subject: [PATCH] upgrade flash-attn to 2.8.3 for gpt-oss attn sink support (#3082) --- examples/gpt-oss/gpt-oss-120b-fft-fsdp2-offload.yaml | 2 +- examples/gpt-oss/gpt-oss-20b-fft-deepspeed-zero3.yaml | 2 +- examples/gpt-oss/gpt-oss-20b-fft-fsdp2-offload.yaml | 2 +- examples/gpt-oss/gpt-oss-20b-fft-fsdp2.yaml | 2 +- examples/gpt-oss/gpt-oss-20b-sft-lora-singlegpu.yaml | 2 +- setup.py | 4 ++-- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/examples/gpt-oss/gpt-oss-120b-fft-fsdp2-offload.yaml b/examples/gpt-oss/gpt-oss-120b-fft-fsdp2-offload.yaml index 4b4fbd89b..62f3167e8 100644 --- a/examples/gpt-oss/gpt-oss-120b-fft-fsdp2-offload.yaml +++ b/examples/gpt-oss/gpt-oss-120b-fft-fsdp2-offload.yaml @@ -44,7 +44,7 @@ bf16: true tf32: true flash_attention: true -attn_implementation: kernels-community/vllm-flash-attn3 +attn_implementation: kernels-community/vllm-flash-attn3 # this is not needed if using flash_attn >= 2.8.3 gradient_checkpointing: true activation_offloading: true diff --git a/examples/gpt-oss/gpt-oss-20b-fft-deepspeed-zero3.yaml b/examples/gpt-oss/gpt-oss-20b-fft-deepspeed-zero3.yaml index 440f0c509..ccb84e28e 100644 --- a/examples/gpt-oss/gpt-oss-20b-fft-deepspeed-zero3.yaml +++ b/examples/gpt-oss/gpt-oss-20b-fft-deepspeed-zero3.yaml @@ -40,7 +40,7 @@ bf16: true tf32: true flash_attention: true -attn_implementation: kernels-community/vllm-flash-attn3 +attn_implementation: kernels-community/vllm-flash-attn3 # this is not needed if using flash_attn >= 2.8.3 gradient_checkpointing: true activation_offloading: true diff --git a/examples/gpt-oss/gpt-oss-20b-fft-fsdp2-offload.yaml b/examples/gpt-oss/gpt-oss-20b-fft-fsdp2-offload.yaml index 1b142b6c3..69a3c434d 100644 --- a/examples/gpt-oss/gpt-oss-20b-fft-fsdp2-offload.yaml +++ b/examples/gpt-oss/gpt-oss-20b-fft-fsdp2-offload.yaml @@ -41,7 +41,7 @@ bf16: true tf32: true flash_attention: true -attn_implementation: kernels-community/vllm-flash-attn3 +attn_implementation: kernels-community/vllm-flash-attn3 # this is not needed if using flash_attn >= 2.8.3 gradient_checkpointing: true activation_offloading: true diff --git a/examples/gpt-oss/gpt-oss-20b-fft-fsdp2.yaml b/examples/gpt-oss/gpt-oss-20b-fft-fsdp2.yaml index bdbb70fae..4a0f1ad70 100644 --- a/examples/gpt-oss/gpt-oss-20b-fft-fsdp2.yaml +++ b/examples/gpt-oss/gpt-oss-20b-fft-fsdp2.yaml @@ -40,7 +40,7 @@ bf16: true tf32: true flash_attention: true -attn_implementation: kernels-community/vllm-flash-attn3 +attn_implementation: kernels-community/vllm-flash-attn3 # this is not needed if using flash_attn >= 2.8.3 gradient_checkpointing: true activation_offloading: true diff --git a/examples/gpt-oss/gpt-oss-20b-sft-lora-singlegpu.yaml b/examples/gpt-oss/gpt-oss-20b-sft-lora-singlegpu.yaml index c4e1a982d..b6deacb1b 100644 --- a/examples/gpt-oss/gpt-oss-20b-sft-lora-singlegpu.yaml +++ b/examples/gpt-oss/gpt-oss-20b-sft-lora-singlegpu.yaml @@ -53,7 +53,7 @@ bf16: true tf32: true flash_attention: true -attn_implementation: kernels-community/vllm-flash-attn3 +attn_implementation: kernels-community/vllm-flash-attn3 # this is not needed if using flash_attn >= 2.8.3 gradient_checkpointing: true activation_offloading: true diff --git a/setup.py b/setup.py index de6f19e56..5aab9d7c0 100644 --- a/setup.py +++ b/setup.py @@ -118,9 +118,9 @@ def get_package_version(): extras_require = { - "flash-attn": ["flash-attn==2.8.2"], + "flash-attn": ["flash-attn==2.8.3"], "ring-flash-attn": [ - "flash-attn==2.8.2", + "flash-attn==2.8.3", "ring-flash-attn>=0.1.7", "yunchang==0.6.0", ],