migrate example configs to canonical attn_implementation

2026-04-23 22:15:07 +00:00
parent 2d64d009d8
commit 39226623d2
222 changed files with 209 additions and 243 deletions
--- a/examples/qwen2/adamw-pretrain-fsdp2.yaml
+++ b/examples/qwen2/adamw-pretrain-fsdp2.yaml
@@ -49,7 +49,7 @@ tf32: false

 gradient_checkpointing: false
 logging_steps: 1
-flash_attention: true
+attn_implementation: flash_attention_2

 warmup_steps: 10
 evals_per_epoch: 0
--- a/examples/qwen2/dpo.yaml
+++ b/examples/qwen2/dpo.yaml
@@ -48,7 +48,7 @@ tf32: false
 gradient_checkpointing: true
 resume_from_checkpoint:
 logging_steps: 1
-flash_attention: true
+attn_implementation: flash_attention_2

 warmup_ratio: 0.1
 evals_per_epoch: 4
--- a/examples/qwen2/muon-pretrain-fsdp2.yaml
+++ b/examples/qwen2/muon-pretrain-fsdp2.yaml
@@ -49,7 +49,7 @@ tf32: false

 gradient_checkpointing: false
 logging_steps: 1
-flash_attention: true
+attn_implementation: flash_attention_2

 warmup_steps: 10
 evals_per_epoch: 0
--- a/examples/qwen2/prm.yaml
+++ b/examples/qwen2/prm.yaml
@@ -47,7 +47,7 @@ gradient_checkpointing_kwargs:
  use_reentrant: false
 resume_from_checkpoint:
 logging_steps: 1
-flash_attention: true
+attn_implementation: flash_attention_2

 warmup_ratio: 0.1
 evals_per_epoch:
--- a/examples/qwen2/qlora-fsdp.yaml
+++ b/examples/qwen2/qlora-fsdp.yaml
@@ -47,7 +47,7 @@ gradient_checkpointing_kwargs:
  use_reentrant: false
 resume_from_checkpoint:
 logging_steps: 1
-flash_attention: true
+attn_implementation: flash_attention_2

 warmup_ratio: 0.1
 evals_per_epoch: 4
--- a/examples/qwen2/reward-model.yaml
+++ b/examples/qwen2/reward-model.yaml
@@ -42,7 +42,7 @@ gradient_checkpointing_kwargs:
  use_reentrant: false
 resume_from_checkpoint:
 logging_steps: 1
-flash_attention: true
+attn_implementation: flash_attention_2

 warmup_ratio: 0.1
 evals_per_epoch: