swap batch size for gradient accumulation steps to decouple from num gpu

This commit is contained in:
Wing Lian
2023-05-31 09:38:12 -04:00
parent 5c3f5db38b
commit c2a0792680
16 changed files with 17 additions and 16 deletions

View File

@@ -28,7 +28,7 @@ wandb_watch:
wandb_run_id:
wandb_log_model:
output_dir: ./lora-llama-alpaca
batch_size: 128
gradient_accumulation_steps: 1
micro_batch_size: 16
num_epochs: 5
learning_rate: 0.00003