diff --git a/docs/config.qmd b/docs/config.qmd index 7b0d40462..04d1320ca 100644 --- a/docs/config.qmd +++ b/docs/config.qmd @@ -547,7 +547,7 @@ gradient_checkpointing: false early_stopping_patience: 3 # Specify a scheduler and kwargs to use with the optimizer -lr_scheduler: # 'one_cycle' | 'rex' | 'log_sweep' | empty for cosine +lr_scheduler: # 'one_cycle' | 'rex' | 'log_sweep' | 'linear' | 'cosine_with_restarts' | 'polynomial' | 'constant' | 'constant_with_warmup' | 'inverse_sqrt' | 'reduce_lr_on_plateau' | 'cosine_with_min_lr' | 'warmup_stable_decay' | empty for cosine lr_scheduler_kwargs: cosine_min_lr_ratio: # decay lr to some percentage of the peak lr, e.g. cosine_min_lr_ratio=0.1 for 10% of peak lr cosine_constant_lr_ratio: # freeze lr at some percentage of the step, e.g. cosine_constant_lr_ratio=0.8 means start cosine_min_lr at 80% of training step (https://arxiv.org/pdf/2308.04014.pdf)