new evals_per_epoch and saves_per_epoch to make things cleaner (#944)

* new evals_per_epoch and saves_per_epoch to make things cleaner

* update per PR feedback
This commit is contained in:
Wing Lian
2023-12-12 15:35:23 -05:00
committed by GitHub
parent f1de29dd1e
commit 5f79b8242f
37 changed files with 102 additions and 70 deletions

View File

@@ -72,8 +72,8 @@ gptq_groupsize:
gptq_model_v1:
warmup_steps: 32
eval_steps:
save_steps:
evals_per_epoch: 4
saves_per_epoch: 1
save_total_limit:
debug:

View File

@@ -49,8 +49,8 @@ flash_attention:
gptq_groupsize:
gptq_model_v1:
warmup_steps: 10
eval_steps: 0.05
save_steps:
evals_per_epoch: 4
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.1

View File

@@ -54,8 +54,8 @@ xformers_attention:
flash_attention: true
warmup_steps: 10
eval_steps: 0.05
save_steps:
evals_per_epoch: 4
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0

View File

@@ -56,8 +56,8 @@ xformers_attention:
flash_attention: true
warmup_steps: 10
eval_steps: 0.05
save_steps:
evals_per_epoch: 4
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0

View File

@@ -54,8 +54,8 @@ xformers_attention:
flash_attention: true
warmup_steps: 10
eval_steps: 0.05
save_steps:
evals_per_epoch: 4
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0

View File

@@ -56,8 +56,8 @@ xformers_attention:
flash_attention: true
warmup_steps: 10
eval_steps: 0.05
save_steps:
evals_per_epoch: 4
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0

View File

@@ -54,8 +54,8 @@ xformers_attention:
flash_attention: true
warmup_steps: 10
eval_steps: 0.05
save_steps:
evals_per_epoch: 4
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0

View File

@@ -56,8 +56,8 @@ xformers_attention:
flash_attention: true
warmup_steps: 10
eval_steps: 0.05
save_steps:
evals_per_epoch: 4
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0

View File

@@ -51,8 +51,8 @@ flash_attention:
gptq_groupsize:
gptq_model_v1:
warmup_steps: 40
eval_steps: 5
save_steps: 43
evals_per_epoch: 4
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0

View File

@@ -80,8 +80,8 @@ flash_attention:
gptq_groupsize:
gptq_model_v1:
warmup_steps: 10
eval_steps: 5
save_steps: 10
evals_per_epoch: 4
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.000001

View File

@@ -51,8 +51,8 @@ flash_attention:
gptq_groupsize:
gptq_model_v1:
warmup_steps: 40
eval_steps: 5
save_steps: 43
evals_per_epoch: 4
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0

View File

@@ -46,8 +46,8 @@ flash_attention:
gptq_groupsize:
gptq_model_v1:
warmup_steps: 10
eval_steps: 0.05
save_steps:
evals_per_epoch: 4
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.1

View File

@@ -42,8 +42,8 @@ flash_attention:
gptq_groupsize:
gptq_model_v1:
warmup_steps: 20
eval_steps: 110
save_steps: 660
evals_per_epoch: 4
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.1

View File

@@ -58,9 +58,9 @@ flash_attn_fuse_qkv: false
flash_attn_fuse_mlp: true
warmup_steps: 100
eval_steps: 0.05
evals_per_epoch: 4
eval_table_size:
save_steps:
saves_per_epoch: 1
debug:
deepspeed: #deepspeed/zero2.json # multi-gpu only
weight_decay: 0.1

View File

@@ -62,8 +62,8 @@ flash_attention:
sdp_attention:
flash_optimum:
warmup_steps: 100
eval_steps:
save_steps:
evals_per_epoch: 4
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.1

View File

@@ -54,10 +54,10 @@ xformers_attention:
flash_attention: true
warmup_steps: 10
eval_steps: 0.05
evals_per_epoch: 4
eval_table_size:
eval_table_max_new_tokens: 128
save_steps:
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0

View File

@@ -56,9 +56,9 @@ xformers_attention:
flash_attention: true
warmup_steps: 10
eval_steps: 0.05
evals_per_epoch: 4
eval_table_size:
save_steps:
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0

View File

@@ -60,8 +60,8 @@ xformers_attention:
flash_attention: true
warmup_steps: 10
eval_steps: 0.05
save_steps: 50
evals_per_epoch: 4
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0

View File

@@ -54,9 +54,9 @@ xformers_attention:
flash_attention: true
warmup_steps: 10
eval_steps: 0.05
evals_per_epoch: 4
eval_table_size:
save_steps:
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0

View File

@@ -47,10 +47,10 @@ xformers_attention:
flash_attention:
warmup_steps: 10
eval_steps:
evals_per_epoch: 4
eval_table_size:
eval_table_max_new_tokens: 128
save_steps: 0.25
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0

View File

@@ -46,10 +46,10 @@ xformers_attention:
flash_attention: true
warmup_steps: 10
eval_steps: 0.05
evals_per_epoch: 4
eval_table_size:
eval_table_max_new_tokens: 128
save_steps:
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0

View File

@@ -67,10 +67,10 @@ loss_watchdog_threshold: 5.0
loss_watchdog_patience: 3
warmup_steps: 10
eval_steps:
evals_per_epoch: 4
eval_table_size:
eval_table_max_new_tokens: 128
save_steps:
saves_per_epoch: 1
debug:
deepspeed: deepspeed/zero2.json
weight_decay: 0.0

View File

@@ -66,10 +66,10 @@ loss_watchdog_threshold: 5.0
loss_watchdog_patience: 3
warmup_steps: 10
eval_steps: 0.05
evals_per_epoch: 4
eval_table_size:
eval_table_max_new_tokens: 128
save_steps:
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0

View File

@@ -44,8 +44,8 @@ flash_attention:
gptq_groupsize:
gptq_model_v1:
warmup_steps: 20
eval_steps: 110
save_steps: 660
evals_per_epoch: 4
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0001

View File

@@ -49,8 +49,8 @@ flash_attention: true
gptq_groupsize:
gptq_model_v1:
warmup_steps: 20
eval_steps: 0.05
save_steps:
evals_per_epoch: 4
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.1

View File

@@ -54,8 +54,8 @@ flash_attention: true
gptq_groupsize:
gptq_model_v1:
warmup_steps: 20
eval_steps: 0.05
save_steps:
evals_per_epoch: 4
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.1

View File

@@ -48,8 +48,8 @@ flash_attention: true
gptq_groupsize:
gptq_model_v1:
warmup_steps: 20
eval_steps: 0.05
save_steps:
evals_per_epoch: 4
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.1

View File

@@ -59,8 +59,8 @@ xformers_attention:
flash_attention:
warmup_steps: 100
eval_steps: 0.05
save_steps:
evals_per_epoch: 4
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.1

View File

@@ -59,8 +59,8 @@ xformers_attention:
flash_attention:
warmup_steps: 100
eval_steps: 0.05
save_steps:
evals_per_epoch: 4
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.1

View File

@@ -33,5 +33,5 @@ early_stopping_patience:
resume_from_checkpoint:
local_rank:
weight_decay: 0.1
eval_steps: 0.05
evals_per_epoch: 4
logging_steps: 1

View File

@@ -56,10 +56,10 @@ xformers_attention:
flash_attention:
warmup_steps: 10
eval_steps: 0.05
evals_per_epoch: 4
eval_table_size:
eval_table_max_new_tokens: 128
save_steps:
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0

View File

@@ -56,10 +56,10 @@ xformers_attention:
flash_attention:
warmup_steps: 10
eval_steps: 0.05
evals_per_epoch: 4
eval_table_size:
eval_table_max_new_tokens: 128
save_steps:
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0

View File

@@ -45,8 +45,8 @@ flash_attention:
gptq_groupsize:
gptq_model_v1:
warmup_steps: 20
eval_steps: 110
save_steps: 660
evals_per_epoch: 4
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0001

View File

@@ -45,8 +45,8 @@ flash_attention:
gptq_groupsize:
gptq_model_v1:
warmup_steps: 20
eval_steps: 50
save_steps:
evals_per_epoch: 4
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0

View File

@@ -78,8 +78,8 @@ flash_attention:
gptq_groupsize:
gptq_model_v1:
warmup_steps: 10
eval_steps: 50
save_steps: 50
evals_per_epoch: 4
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0