From adb593abac7ce755e2d7b78005c8d0bfcf308523 Mon Sep 17 00:00:00 2001 From: NanoCode012 Date: Wed, 2 Apr 2025 20:35:42 +0700 Subject: [PATCH] fix: document offload gradient_checkpointing option (#2475) --- docs/config.qmd | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/config.qmd b/docs/config.qmd index 7c41c5126..6414b63a5 100644 --- a/docs/config.qmd +++ b/docs/config.qmd @@ -510,7 +510,8 @@ train_on_inputs: false # Note that training loss may have an oscillating pattern with this enabled. group_by_length: false -# Whether to use gradient checkpointing https://huggingface.co/docs/transformers/v4.18.0/en/performance#gradient-checkpointing +# Whether to use gradient checkpointing. Available options are: true, false, "offload". +# https://huggingface.co/docs/transformers/v4.18.0/en/performance#gradient-checkpointing gradient_checkpointing: false # additional kwargs to pass to the trainer for gradient checkpointing # gradient_checkpointing_kwargs: