Merge pull request #130 from OpenAccess-AI-Collective/gas

swap batch size for gradient accumulation steps to decouple from num gpu
This commit is contained in:
Wing Lian
2023-05-31 13:03:51 -04:00
committed by GitHub
16 changed files with 17 additions and 16 deletions

View File

@@ -265,7 +265,7 @@ wandb_log_model: # 'checkpoint'
output_dir: ./completed-model
# training hyperparameters
batch_size: 8
gradient_accumulation_steps: 1
micro_batch_size: 2
eval_batch_size: 2
num_epochs: 3

View File

@@ -26,7 +26,7 @@ wandb_watch:
wandb_run_id:
wandb_log_model:
output_dir: ./lora-alpaca
batch_size: 32
gradient_accumulation_steps: 1
micro_batch_size: 4
num_epochs: 5
learning_rate: 0.0003

View File

@@ -23,7 +23,7 @@ wandb_watch:
wandb_run_id:
wandb_log_model:
output_dir: ./lora-llama-alpaca
batch_size: 32
gradient_accumulation_steps: 1
micro_batch_size: 16
num_epochs: 3
learning_rate: 0.00003

View File

@@ -25,7 +25,7 @@ wandb_watch:
wandb_run_id:
wandb_log_model:
output_dir: ./gpt4all-neox-20b
batch_size: 48
gradient_accumulation_steps: 1
micro_batch_size: 4
num_epochs: 5
learning_rate: 0.00003

View File

@@ -23,7 +23,7 @@ wandb_watch:
wandb_run_id:
wandb_log_model:
output_dir: ./llama-13b-sharegpt
batch_size: 64
gradient_accumulation_steps: 1
micro_batch_size: 2
warmup_steps: 1000
save_steps:

View File

@@ -29,7 +29,7 @@ wandb_watch:
wandb_run_id:
wandb_log_model:
output_dir: ./lora-llama-alpaca
batch_size: 128
gradient_accumulation_steps: 1
micro_batch_size: 16
warmup_steps: 1000
save_steps:

View File

@@ -26,7 +26,7 @@ wandb_watch:
wandb_run_id:
wandb_log_model:
output_dir: ./lora-test
batch_size: 8
gradient_accumulation_steps: 1
micro_batch_size: 2
num_epochs: 3
warmup_steps: 100

View File

@@ -28,7 +28,7 @@ wandb_watch:
wandb_run_id:
wandb_log_model:
output_dir: ./lora-llama-alpaca
batch_size: 128
gradient_accumulation_steps: 1
micro_batch_size: 16
num_epochs: 5
learning_rate: 0.00003

View File

@@ -24,7 +24,7 @@ wandb_watch:
wandb_run_id:
wandb_log_model:
output_dir: ./jeopardy-bot-7b
batch_size: 4
gradient_accumulation_steps: 2
micro_batch_size: 1
num_epochs: 2
optimizer: adamw_bnb_8bit

View File

@@ -28,7 +28,7 @@ wandb_watch:
wandb_run_id:
wandb_log_model:
output_dir: ./lora-alpaca
batch_size: 48
gradient_accumulation_steps: 1
micro_batch_size: 4
num_epochs: 5
learning_rate: 0.00001

View File

@@ -26,7 +26,7 @@ wandb_watch:
wandb_run_id:
wandb_log_model:
output_dir: ./lora-test
batch_size: 4
gradient_accumulation_steps: 1
micro_batch_size: 1
num_epochs: 3
warmup_steps: 100

View File

@@ -53,7 +53,8 @@ wandb_log_model:
# where to save the finsihed model to
output_dir: ./completed-model
# training hyperparameters
batch_size: 8
gradient_accumulation_steps: 1
batch_size:
micro_batch_size: 2
num_epochs: 3
warmup_steps: 100

View File

@@ -22,7 +22,7 @@ wandb_watch:
wandb_run_id:
wandb_log_model:
output_dir: ./stable-alpaca-3b
batch_size: 2
gradient_accumulation_steps: 1
micro_batch_size: 1
num_epochs: 1
optimizer: adamw_bnb_8bit

View File

@@ -30,7 +30,7 @@ wandb_watch:
wandb_run_id:
wandb_log_model:
output_dir: ./lora-reflect
batch_size: 8
gradient_accumulation_steps: 1
micro_batch_size: 2
num_epochs: 3
learning_rate: 0.00003

View File

@@ -26,7 +26,7 @@ wandb_watch:
wandb_run_id:
wandb_log_model:
output_dir: ./llama-7b-lora-int4
batch_size: 1
gradient_accumulation_steps: 1
micro_batch_size: 1
num_epochs: 3
optimizer: adamw_bnb_8bit

View File

@@ -24,7 +24,7 @@ wandb_watch:
wandb_run_id:
wandb_log_model:
output_dir: ./mpt-alpaca-7b
batch_size: 1
gradient_accumulation_steps: 1
micro_batch_size: 1
num_epochs: 3
optimizer: adamw_bnb_8bit