migrate example configs to canonical attn_implementation

This commit is contained in:
Wing Lian
2026-04-23 22:15:07 +00:00
parent 2d64d009d8
commit 39226623d2
222 changed files with 209 additions and 243 deletions

View File

@@ -47,7 +47,7 @@ learning_rate: 2e-5
bf16: true
tf32: true
flash_attention: true
attn_implementation: flash_attention_2
attn_implementation: kernels-community/vllm-flash-attn3 # this is not needed if using flash_attn >= 2.8.3
gradient_checkpointing: true

View File

@@ -43,7 +43,7 @@ learning_rate: 2e-5
bf16: true
tf32: true
flash_attention: true
attn_implementation: flash_attention_2
attn_implementation: kernels-community/vllm-flash-attn3 # this is not needed if using flash_attn >= 2.8.3
gradient_checkpointing: true

View File

@@ -44,7 +44,7 @@ learning_rate: 2e-5
bf16: true
tf32: true
flash_attention: true
attn_implementation: flash_attention_2
attn_implementation: kernels-community/vllm-flash-attn3 # this is not needed if using flash_attn >= 2.8.3
gradient_checkpointing: true

View File

@@ -43,7 +43,7 @@ learning_rate: 2e-5
bf16: true
tf32: true
flash_attention: true
attn_implementation: flash_attention_2
attn_implementation: kernels-community/vllm-flash-attn3 # this is not needed if using flash_attn >= 2.8.3
gradient_checkpointing: true

View File

@@ -56,7 +56,7 @@ learning_rate: 2e-4
bf16: true
tf32: true
flash_attention: true
attn_implementation: flash_attention_2
attn_implementation: kernels-community/vllm-flash-attn3 # this is not needed if using flash_attn >= 2.8.3
gradient_checkpointing: true

View File

@@ -56,7 +56,7 @@ learning_rate: 2e-4
bf16: true
tf32: true
flash_attention: true
attn_implementation: flash_attention_2
attn_implementation: kernels-community/vllm-flash-attn3 # this is not needed if using flash_attn >= 2.8.3
gradient_checkpointing: true