swap batch size for gradient accumulation steps to decouple from num gpu

This commit is contained in:
Wing Lian
2023-05-31 09:38:12 -04:00
parent 5c3f5db38b
commit c2a0792680
16 changed files with 17 additions and 16 deletions

View File

@@ -26,7 +26,7 @@ wandb_watch:
wandb_run_id:
wandb_log_model:
output_dir: ./lora-alpaca
batch_size: 32
gradient_accumulation_steps: 1
micro_batch_size: 4
num_epochs: 5
learning_rate: 0.0003

View File

@@ -23,7 +23,7 @@ wandb_watch:
wandb_run_id:
wandb_log_model:
output_dir: ./lora-llama-alpaca
batch_size: 32
gradient_accumulation_steps: 1
micro_batch_size: 16
num_epochs: 3
learning_rate: 0.00003

View File

@@ -25,7 +25,7 @@ wandb_watch:
wandb_run_id:
wandb_log_model:
output_dir: ./gpt4all-neox-20b
batch_size: 48
gradient_accumulation_steps: 1
micro_batch_size: 4
num_epochs: 5
learning_rate: 0.00003

View File

@@ -23,7 +23,7 @@ wandb_watch:
wandb_run_id:
wandb_log_model:
output_dir: ./llama-13b-sharegpt
batch_size: 64
gradient_accumulation_steps: 1
micro_batch_size: 2
warmup_steps: 1000
save_steps:

View File

@@ -29,7 +29,7 @@ wandb_watch:
wandb_run_id:
wandb_log_model:
output_dir: ./lora-llama-alpaca
batch_size: 128
gradient_accumulation_steps: 1
micro_batch_size: 16
warmup_steps: 1000
save_steps:

View File

@@ -26,7 +26,7 @@ wandb_watch:
wandb_run_id:
wandb_log_model:
output_dir: ./lora-test
batch_size: 8
gradient_accumulation_steps: 1
micro_batch_size: 2
num_epochs: 3
warmup_steps: 100

View File

@@ -28,7 +28,7 @@ wandb_watch:
wandb_run_id:
wandb_log_model:
output_dir: ./lora-llama-alpaca
batch_size: 128
gradient_accumulation_steps: 1
micro_batch_size: 16
num_epochs: 5
learning_rate: 0.00003

View File

@@ -24,7 +24,7 @@ wandb_watch:
wandb_run_id:
wandb_log_model:
output_dir: ./jeopardy-bot-7b
batch_size: 4
gradient_accumulation_steps: 2
micro_batch_size: 1
num_epochs: 2
optimizer: adamw_bnb_8bit

View File

@@ -28,7 +28,7 @@ wandb_watch:
wandb_run_id:
wandb_log_model:
output_dir: ./lora-alpaca
batch_size: 48
gradient_accumulation_steps: 1
micro_batch_size: 4
num_epochs: 5
learning_rate: 0.00001

View File

@@ -26,7 +26,7 @@ wandb_watch:
wandb_run_id:
wandb_log_model:
output_dir: ./lora-test
batch_size: 4
gradient_accumulation_steps: 1
micro_batch_size: 1
num_epochs: 3
warmup_steps: 100

View File

@@ -53,7 +53,8 @@ wandb_log_model:
# where to save the finsihed model to
output_dir: ./completed-model
# training hyperparameters
batch_size: 8
gradient_accumulation_steps: 1
batch_size:
micro_batch_size: 2
num_epochs: 3
warmup_steps: 100

View File

@@ -22,7 +22,7 @@ wandb_watch:
wandb_run_id:
wandb_log_model:
output_dir: ./stable-alpaca-3b
batch_size: 2
gradient_accumulation_steps: 1
micro_batch_size: 1
num_epochs: 1
optimizer: adamw_bnb_8bit

View File

@@ -30,7 +30,7 @@ wandb_watch:
wandb_run_id:
wandb_log_model:
output_dir: ./lora-reflect
batch_size: 8
gradient_accumulation_steps: 1
micro_batch_size: 2
num_epochs: 3
learning_rate: 0.00003