swap batch size for gradient accumulation steps to decouple from num gpu

2023-05-31 09:38:12 -04:00
parent 5c3f5db38b
commit c2a0792680
16 changed files with 17 additions and 16 deletions
--- a/configs/stability_3b.yml
+++ b/configs/stability_3b.yml
@@ -22,7 +22,7 @@ wandb_watch:
 wandb_run_id:
 wandb_log_model:
 output_dir: ./stable-alpaca-3b
-batch_size: 2
+gradient_accumulation_steps: 1
 micro_batch_size: 1
 num_epochs: 1
 optimizer: adamw_bnb_8bit