swap batch size for gradient accumulation steps to decouple from num gpu

2023-05-31 09:38:12 -04:00
parent 5c3f5db38b
commit c2a0792680
16 changed files with 17 additions and 16 deletions
--- a/examples/gptq-lora-7b/config.yml
+++ b/examples/gptq-lora-7b/config.yml
@@ -26,7 +26,7 @@ wandb_watch:
 wandb_run_id:
 wandb_log_model:
 output_dir: ./llama-7b-lora-int4
-batch_size: 1
+gradient_accumulation_steps: 1
 micro_batch_size: 1
 num_epochs: 3
 optimizer: adamw_bnb_8bit
--- a/examples/mpt-7b/config.yml
+++ b/examples/mpt-7b/config.yml
@@ -24,7 +24,7 @@ wandb_watch:
 wandb_run_id:
 wandb_log_model:
 output_dir: ./mpt-alpaca-7b
-batch_size: 1
+gradient_accumulation_steps: 1
 micro_batch_size: 1
 num_epochs: 3
 optimizer: adamw_bnb_8bit