migrate example configs to canonical attn_implementation

This commit is contained in:
Wing Lian
2026-04-23 22:15:07 +00:00
parent 2d64d009d8
commit 39226623d2
222 changed files with 209 additions and 243 deletions

View File

@@ -65,7 +65,7 @@ gradient_checkpointing_kwargs:
use_reentrant: false
resume_from_checkpoint:
logging_steps: 1
flash_attention: true
attn_implementation: flash_attention_2
warmup_ratio: 0.1
evals_per_epoch: 4

View File

@@ -65,7 +65,7 @@ gradient_checkpointing_kwargs:
use_reentrant: false
resume_from_checkpoint:
logging_steps: 1
flash_attention: true
attn_implementation: flash_attention_2
warmup_ratio: 0.1
evals_per_epoch: 4

View File

@@ -50,7 +50,7 @@ gradient_checkpointing_kwargs:
use_reentrant: false
resume_from_checkpoint:
logging_steps: 1
flash_attention: true
attn_implementation: flash_attention_2
warmup_ratio: 0.1
evals_per_epoch: 4

View File

@@ -61,7 +61,7 @@ gradient_checkpointing_kwargs:
use_reentrant: false
resume_from_checkpoint:
logging_steps: 1
flash_attention: true
attn_implementation: flash_attention_2
warmup_ratio: 0.1
evals_per_epoch: 4

View File

@@ -61,7 +61,7 @@ gradient_checkpointing_kwargs:
use_reentrant: false
resume_from_checkpoint:
logging_steps: 1
flash_attention: true
attn_implementation: flash_attention_2
warmup_ratio: 0.1
evals_per_epoch: 4

View File

@@ -65,7 +65,7 @@ gradient_checkpointing_kwargs:
use_reentrant: false
resume_from_checkpoint:
logging_steps: 1
flash_attention: true
attn_implementation: flash_attention_2
warmup_ratio: 0.1
evals_per_epoch: 4

View File

@@ -75,7 +75,7 @@ gradient_checkpointing: true
activation_offloading: true
resume_from_checkpoint:
logging_steps: 1
flash_attention: true
attn_implementation: flash_attention_2
warmup_ratio: 0.1
evals_per_epoch: 4

View File

@@ -50,7 +50,7 @@ gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
logging_steps: 1
flash_attention: true
attn_implementation: flash_attention_2
warmup_ratio: 0.1
weight_decay: 0.0

View File

@@ -40,7 +40,7 @@ gradient_checkpointing_kwargs:
use_reentrant: false
resume_from_checkpoint:
logging_steps: 1
flash_attention: true
attn_implementation: flash_attention_2
warmup_ratio: 0.1
evals_per_epoch: 1

View File

@@ -58,7 +58,7 @@ gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
logging_steps: 1
flash_attention: true
attn_implementation: flash_attention_2
warmup_ratio: 0.1
evals_per_epoch: 1