Add seq2seq eval benchmark callback (#1274)
* Add CausalLMBenchEvalCallback for measuring seq2seq performance * Fix code for pre-commit * Fix typing and improve logging * eval_sample_packing must be false with CausalLMBenchEvalCallback
This commit is contained in:
@@ -60,7 +60,7 @@ s2_attention:
|
||||
warmup_steps: 10
|
||||
evals_per_epoch: 4
|
||||
eval_table_size:
|
||||
eval_table_max_new_tokens: 128
|
||||
eval_max_new_tokens: 128
|
||||
saves_per_epoch: 1
|
||||
debug:
|
||||
deepspeed:
|
||||
|
||||
@@ -57,7 +57,7 @@ s2_attention:
|
||||
warmup_steps: 10
|
||||
evals_per_epoch: 4
|
||||
eval_table_size:
|
||||
eval_table_max_new_tokens: 128
|
||||
eval_max_new_tokens: 128
|
||||
saves_per_epoch: 1
|
||||
debug:
|
||||
deepspeed:
|
||||
|
||||
@@ -49,7 +49,7 @@ flash_attention:
|
||||
warmup_steps: 10
|
||||
evals_per_epoch: 4
|
||||
eval_table_size:
|
||||
eval_table_max_new_tokens: 128
|
||||
eval_max_new_tokens: 128
|
||||
saves_per_epoch: 1
|
||||
debug:
|
||||
deepspeed:
|
||||
|
||||
@@ -61,7 +61,7 @@ flash_attention: true
|
||||
warmup_steps: 10
|
||||
evals_per_epoch: 4
|
||||
eval_table_size:
|
||||
eval_table_max_new_tokens: 128
|
||||
eval_max_new_tokens: 128
|
||||
saves_per_epoch: 1
|
||||
debug:
|
||||
#default deepspeed, can use more aggresive if needed like zero2, zero3
|
||||
|
||||
@@ -49,7 +49,7 @@ flash_attention: true
|
||||
warmup_steps: 10
|
||||
evals_per_epoch: 4
|
||||
eval_table_size:
|
||||
eval_table_max_new_tokens: 128
|
||||
eval_max_new_tokens: 128
|
||||
saves_per_epoch: 1
|
||||
debug:
|
||||
deepspeed:
|
||||
|
||||
@@ -81,7 +81,7 @@ loss_watchdog_patience: 3
|
||||
warmup_steps: 10
|
||||
evals_per_epoch: 4
|
||||
eval_table_size:
|
||||
eval_table_max_new_tokens: 128
|
||||
eval_max_new_tokens: 128
|
||||
saves_per_epoch: 1
|
||||
debug:
|
||||
deepspeed: deepspeed_configs/zero2.json
|
||||
|
||||
@@ -68,7 +68,7 @@ loss_watchdog_patience: 3
|
||||
warmup_steps: 10
|
||||
evals_per_epoch: 4
|
||||
eval_table_size:
|
||||
eval_table_max_new_tokens: 128
|
||||
eval_max_new_tokens: 128
|
||||
saves_per_epoch: 1
|
||||
debug:
|
||||
deepspeed:
|
||||
|
||||
@@ -58,7 +58,7 @@ flash_attention:
|
||||
warmup_steps: 10
|
||||
evals_per_epoch: 4
|
||||
eval_table_size:
|
||||
eval_table_max_new_tokens: 128
|
||||
eval_max_new_tokens: 128
|
||||
saves_per_epoch: 1
|
||||
debug:
|
||||
deepspeed:
|
||||
|
||||
@@ -58,7 +58,7 @@ flash_attention:
|
||||
warmup_steps: 10
|
||||
evals_per_epoch: 4
|
||||
eval_table_size:
|
||||
eval_table_max_new_tokens: 128
|
||||
eval_max_new_tokens: 128
|
||||
saves_per_epoch: 1
|
||||
debug:
|
||||
deepspeed:
|
||||
|
||||
@@ -29,7 +29,7 @@ num_epochs: 1
|
||||
val_set_size: 0.1
|
||||
evals_per_epoch: 5
|
||||
eval_table_size:
|
||||
eval_table_max_new_tokens: 128
|
||||
eval_max_new_tokens: 128
|
||||
eval_sample_packing: false
|
||||
eval_batch_size: 1
|
||||
|
||||
|
||||
Reference in New Issue
Block a user