migrate example configs to canonical attn_implementation

This commit is contained in:
Wing Lian
2026-04-23 22:15:07 +00:00
parent 2d64d009d8
commit 39226623d2
222 changed files with 209 additions and 243 deletions

View File

@@ -60,7 +60,7 @@ bf16: true
tf32: true
logging_steps: 1
flash_attention: true
attn_implementation: flash_attention_2
gradient_checkpointing: offload
gradient_checkpointing_kwargs:

View File

@@ -67,7 +67,7 @@ bf16: true
tf32: true
logging_steps: 1
flash_attention: true
attn_implementation: flash_attention_2
warmup_ratio: 0.1
evals_per_epoch: 1

View File

@@ -70,7 +70,7 @@ bf16: true
tf32: true
logging_steps: 1
flash_attention: true
attn_implementation: flash_attention_2
gradient_checkpointing: offload
gradient_checkpointing_kwargs:

View File

@@ -62,7 +62,7 @@ bf16: true
tf32: true
logging_steps: 1
flash_attention: true
attn_implementation: flash_attention_2
warmup_ratio: 0.1
evals_per_epoch: 1

View File

@@ -59,7 +59,7 @@ bf16: true
tf32: true
logging_steps: 1
flex_attention: true
attn_implementation: flex_attention
flex_attn_compile_kwargs:
dynamic: false
mode: max-autotune-no-cudagraphs

View File

@@ -64,7 +64,7 @@ bf16: true
tf32: true
torch_compile: true
flex_attention: true
attn_implementation: flex_attention
flex_attn_compile_kwargs:
dynamic: false
mode: max-autotune-no-cudagraphs

View File

@@ -61,7 +61,7 @@ bf16: true
tf32: true
logging_steps: 1
flex_attention: true
attn_implementation: flex_attention
flex_attn_compile_kwargs:
dynamic: false
mode: max-autotune-no-cudagraphs