From b4d1d2278256dbac4240d4971c847d0f5df63b1d Mon Sep 17 00:00:00 2001 From: Aman Karmani Date: Mon, 7 Aug 2023 16:18:42 -0700 Subject: [PATCH] note pattern when using groups --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 479b1ab4a..bbba22b8f 100644 --- a/README.md +++ b/README.md @@ -427,7 +427,8 @@ save_safetensors: # whether to mask out or include the human's prompt from the training labels train_on_inputs: false # group similarly sized data to minimize padding -# may be slower to start as it must download and sort the entire dataset +# may be slower to start, as it must download and sort the entire dataset +# note that training loss may have an oscillating pattern with this enabled group_by_length: false # Whether to use gradient checkpointing https://huggingface.co/docs/transformers/v4.18.0/en/performance#gradient-checkpointing