diff --git a/examples/llama-3/diffusion/sft-1b.yaml b/examples/llama-3/diffusion/sft-1b.yaml index f3b29a809..a9a84ace4 100644 --- a/examples/llama-3/diffusion/sft-1b.yaml +++ b/examples/llama-3/diffusion/sft-1b.yaml @@ -30,7 +30,7 @@ eval_sample_packing: true gradient_accumulation_steps: 4 micro_batch_size: 4 num_epochs: 1 -warmup_steps: 0.1 +warmup_ratio: 0.1 optimizer: adamw_8bit lr_scheduler: cosine