diff --git a/1991.yml b/1991.yml index fb8705af2..3098515c0 100644 --- a/1991.yml +++ b/1991.yml @@ -254,7 +254,7 @@ unfrozen_parameters: # model.embed_tokens layers -gradient_accumulation_steps: 16 +gradient_accumulation_steps: 2 micro_batch_size: 2 num_epochs: 3 optimizer: adamw_torch_fused