swap batch size for gradient accumulation steps to decouple from num gpu

This commit is contained in:
Wing Lian
2023-05-31 09:38:12 -04:00
parent 5c3f5db38b
commit c2a0792680
16 changed files with 17 additions and 16 deletions

View File

@@ -22,7 +22,7 @@ wandb_watch:
wandb_run_id:
wandb_log_model:
output_dir: ./stable-alpaca-3b
batch_size: 2
gradient_accumulation_steps: 1
micro_batch_size: 1
num_epochs: 1
optimizer: adamw_bnb_8bit