diff --git a/docs/config.qmd b/docs/config.qmd index 7c41c5126..6414b63a5 100644 --- a/docs/config.qmd +++ b/docs/config.qmd @@ -510,7 +510,8 @@ train_on_inputs: false # Note that training loss may have an oscillating pattern with this enabled. group_by_length: false -# Whether to use gradient checkpointing https://huggingface.co/docs/transformers/v4.18.0/en/performance#gradient-checkpointing +# Whether to use gradient checkpointing. Available options are: true, false, "offload". +# https://huggingface.co/docs/transformers/v4.18.0/en/performance#gradient-checkpointing gradient_checkpointing: false # additional kwargs to pass to the trainer for gradient checkpointing # gradient_checkpointing_kwargs: