migrate example configs to canonical attn_implementation
This commit is contained in:
@@ -60,7 +60,7 @@ bf16: true
|
||||
tf32: true
|
||||
|
||||
logging_steps: 1
|
||||
flash_attention: true
|
||||
attn_implementation: flash_attention_2
|
||||
|
||||
gradient_checkpointing: offload
|
||||
gradient_checkpointing_kwargs:
|
||||
|
||||
@@ -67,7 +67,7 @@ bf16: true
|
||||
tf32: true
|
||||
|
||||
logging_steps: 1
|
||||
flash_attention: true
|
||||
attn_implementation: flash_attention_2
|
||||
|
||||
warmup_ratio: 0.1
|
||||
evals_per_epoch: 1
|
||||
|
||||
@@ -70,7 +70,7 @@ bf16: true
|
||||
tf32: true
|
||||
|
||||
logging_steps: 1
|
||||
flash_attention: true
|
||||
attn_implementation: flash_attention_2
|
||||
|
||||
gradient_checkpointing: offload
|
||||
gradient_checkpointing_kwargs:
|
||||
|
||||
@@ -62,7 +62,7 @@ bf16: true
|
||||
tf32: true
|
||||
|
||||
logging_steps: 1
|
||||
flash_attention: true
|
||||
attn_implementation: flash_attention_2
|
||||
|
||||
warmup_ratio: 0.1
|
||||
evals_per_epoch: 1
|
||||
|
||||
@@ -59,7 +59,7 @@ bf16: true
|
||||
tf32: true
|
||||
|
||||
logging_steps: 1
|
||||
flex_attention: true
|
||||
attn_implementation: flex_attention
|
||||
flex_attn_compile_kwargs:
|
||||
dynamic: false
|
||||
mode: max-autotune-no-cudagraphs
|
||||
|
||||
@@ -64,7 +64,7 @@ bf16: true
|
||||
tf32: true
|
||||
|
||||
torch_compile: true
|
||||
flex_attention: true
|
||||
attn_implementation: flex_attention
|
||||
flex_attn_compile_kwargs:
|
||||
dynamic: false
|
||||
mode: max-autotune-no-cudagraphs
|
||||
|
||||
@@ -61,7 +61,7 @@ bf16: true
|
||||
tf32: true
|
||||
|
||||
logging_steps: 1
|
||||
flex_attention: true
|
||||
attn_implementation: flex_attention
|
||||
flex_attn_compile_kwargs:
|
||||
dynamic: false
|
||||
mode: max-autotune-no-cudagraphs
|
||||
|
||||
Reference in New Issue
Block a user