migrate example configs to canonical attn_implementation

This commit is contained in:
Wing Lian
2026-04-23 22:15:07 +00:00
parent 2d64d009d8
commit 39226623d2
222 changed files with 209 additions and 243 deletions

View File

@@ -20,7 +20,7 @@ output_dir: ./outputs/smollm2-135m-pretrain-streaming
sequence_len: 1024
sample_packing: true
pretrain_multipack_attn: true # Prevent cross-attention between packed sequences
flash_attention: true
attn_implementation: flash_attention_2
# Batch size settings
gradient_accumulation_steps: 8

View File

@@ -18,7 +18,7 @@ output_dir: ./outputs/smollm2-135m-sft-streaming
# Sequence and packing settings
sequence_len: 1024
sample_packing: true
flash_attention: true
attn_implementation: flash_attention_2
# Batch size settings
gradient_accumulation_steps: 4