fix: document offload gradient_checkpointing option (#2475)

This commit is contained in:
NanoCode012
2025-04-02 20:35:42 +07:00
committed by GitHub
parent a0117c9bce
commit adb593abac

View File

@@ -510,7 +510,8 @@ train_on_inputs: false
# Note that training loss may have an oscillating pattern with this enabled.
group_by_length: false
# Whether to use gradient checkpointing https://huggingface.co/docs/transformers/v4.18.0/en/performance#gradient-checkpointing
# Whether to use gradient checkpointing. Available options are: true, false, "offload".
# https://huggingface.co/docs/transformers/v4.18.0/en/performance#gradient-checkpointing
gradient_checkpointing: false
# additional kwargs to pass to the trainer for gradient checkpointing
# gradient_checkpointing_kwargs: