swap batch size for gradient accumulation steps to decouple from num gpu

2023-05-31 09:38:12 -04:00
parent 5c3f5db38b
commit c2a0792680
16 changed files with 17 additions and 16 deletions
--- a/configs/gpt_neox_20b.yml
+++ b/configs/gpt_neox_20b.yml
@@ -25,7 +25,7 @@ wandb_watch:
 wandb_run_id:
 wandb_log_model:
 output_dir: ./gpt4all-neox-20b
-batch_size: 48
+gradient_accumulation_steps: 1
 micro_batch_size: 4
 num_epochs: 5
 learning_rate: 0.00003