Add seq2seq eval benchmark callback (#1274)

* Add CausalLMBenchEvalCallback for measuring seq2seq performance * Fix code for pre-commit * Fix typing and improve logging * eval_sample_packing must be false with CausalLMBenchEvalCallback
2024-02-13 17:24:30 +01:00
parent 8430db22e2
commit 5a5d47458d
15 changed files with 228 additions and 14 deletions
--- a/README.md
+++ b/README.md
@@ -784,7 +784,8 @@ save_total_limit: # Checkpoints saved at a time
 max_steps:

 eval_table_size: # Approximate number of predictions sent to wandb depending on batch size. Enabled above 0. Default is 0
-eval_table_max_new_tokens: # Total number of tokens generated for predictions sent to wandb. Default is 128
+eval_max_new_tokens: # Total number of tokens generated for predictions sent to wandb. Default is 128
+eval_causal_lm_metrics: # HF evaluate metrics used during evaluation. Default is ["sacrebleu", "comet", "ter", chrf]

 loss_watchdog_threshold: # High loss value, indicating the learning has broken down (a good estimate is ~2 times the loss at the start of training)
 loss_watchdog_patience: # Number of high-loss steps in a row before the trainer aborts (default: 3)