upgrade flash-attn to 2.8.3 for gpt-oss attn sink support (#3082)
This commit is contained in:
@@ -44,7 +44,7 @@ bf16: true
|
|||||||
tf32: true
|
tf32: true
|
||||||
|
|
||||||
flash_attention: true
|
flash_attention: true
|
||||||
attn_implementation: kernels-community/vllm-flash-attn3
|
attn_implementation: kernels-community/vllm-flash-attn3 # this is not needed if using flash_attn >= 2.8.3
|
||||||
|
|
||||||
gradient_checkpointing: true
|
gradient_checkpointing: true
|
||||||
activation_offloading: true
|
activation_offloading: true
|
||||||
|
|||||||
@@ -40,7 +40,7 @@ bf16: true
|
|||||||
tf32: true
|
tf32: true
|
||||||
|
|
||||||
flash_attention: true
|
flash_attention: true
|
||||||
attn_implementation: kernels-community/vllm-flash-attn3
|
attn_implementation: kernels-community/vllm-flash-attn3 # this is not needed if using flash_attn >= 2.8.3
|
||||||
|
|
||||||
gradient_checkpointing: true
|
gradient_checkpointing: true
|
||||||
activation_offloading: true
|
activation_offloading: true
|
||||||
|
|||||||
@@ -41,7 +41,7 @@ bf16: true
|
|||||||
tf32: true
|
tf32: true
|
||||||
|
|
||||||
flash_attention: true
|
flash_attention: true
|
||||||
attn_implementation: kernels-community/vllm-flash-attn3
|
attn_implementation: kernels-community/vllm-flash-attn3 # this is not needed if using flash_attn >= 2.8.3
|
||||||
|
|
||||||
gradient_checkpointing: true
|
gradient_checkpointing: true
|
||||||
activation_offloading: true
|
activation_offloading: true
|
||||||
|
|||||||
@@ -40,7 +40,7 @@ bf16: true
|
|||||||
tf32: true
|
tf32: true
|
||||||
|
|
||||||
flash_attention: true
|
flash_attention: true
|
||||||
attn_implementation: kernels-community/vllm-flash-attn3
|
attn_implementation: kernels-community/vllm-flash-attn3 # this is not needed if using flash_attn >= 2.8.3
|
||||||
|
|
||||||
gradient_checkpointing: true
|
gradient_checkpointing: true
|
||||||
activation_offloading: true
|
activation_offloading: true
|
||||||
|
|||||||
@@ -53,7 +53,7 @@ bf16: true
|
|||||||
tf32: true
|
tf32: true
|
||||||
|
|
||||||
flash_attention: true
|
flash_attention: true
|
||||||
attn_implementation: kernels-community/vllm-flash-attn3
|
attn_implementation: kernels-community/vllm-flash-attn3 # this is not needed if using flash_attn >= 2.8.3
|
||||||
|
|
||||||
gradient_checkpointing: true
|
gradient_checkpointing: true
|
||||||
activation_offloading: true
|
activation_offloading: true
|
||||||
|
|||||||
4
setup.py
4
setup.py
@@ -118,9 +118,9 @@ def get_package_version():
|
|||||||
|
|
||||||
|
|
||||||
extras_require = {
|
extras_require = {
|
||||||
"flash-attn": ["flash-attn==2.8.2"],
|
"flash-attn": ["flash-attn==2.8.3"],
|
||||||
"ring-flash-attn": [
|
"ring-flash-attn": [
|
||||||
"flash-attn==2.8.2",
|
"flash-attn==2.8.3",
|
||||||
"ring-flash-attn>=0.1.7",
|
"ring-flash-attn>=0.1.7",
|
||||||
"yunchang==0.6.0",
|
"yunchang==0.6.0",
|
||||||
],
|
],
|
||||||
|
|||||||
Reference in New Issue
Block a user