Merge branch 'main' into 775-option-to-drop-vs-truncate-on-rows-longer-than-context-length

This commit is contained in:
mhenrichsen
2025-05-27 12:31:31 +02:00
committed by GitHub
75 changed files with 2850 additions and 2821 deletions

View File

@@ -635,7 +635,9 @@ weight_decay:
# adamw hyperparams
adam_beta1:
adam_beta2:
adam_beta3: # only used for CAME Optimizer
adam_epsilon:
adam_epsilon2: # only used for CAME Optimizer
# Gradient clipping max norm
max_grad_norm: