diff --git a/examples/cerebras/qlora.yml b/examples/cerebras/qlora.yml index 9340299b9..2f3700249 100644 --- a/examples/cerebras/qlora.yml +++ b/examples/cerebras/qlora.yml @@ -35,7 +35,7 @@ torchdistx_path: lr_scheduler: cosine learning_rate: 0.0002 train_on_inputs: false -group_by_length: true +group_by_length: false bf16: true fp16: false tf32: true diff --git a/examples/gptj/qlora.yml b/examples/gptj/qlora.yml index 858c14862..f2427f4d4 100644 --- a/examples/gptj/qlora.yml +++ b/examples/gptj/qlora.yml @@ -32,7 +32,7 @@ torchdistx_path: lr_scheduler: cosine learning_rate: 0.0001 train_on_inputs: false -group_by_length: true +group_by_length: false bf16: true fp16: false tf32: true diff --git a/examples/llama-2/lora.yml b/examples/llama-2/lora.yml index 74934320c..494538fff 100644 --- a/examples/llama-2/lora.yml +++ b/examples/llama-2/lora.yml @@ -38,7 +38,7 @@ lr_scheduler: cosine learning_rate: 0.0002 train_on_inputs: false -group_by_length: true +group_by_length: false bf16: true fp16: false tf32: false diff --git a/examples/llama-2/qlora.yml b/examples/llama-2/qlora.yml index 1a45e7268..64728ac3d 100644 --- a/examples/llama-2/qlora.yml +++ b/examples/llama-2/qlora.yml @@ -39,7 +39,7 @@ lr_scheduler: cosine learning_rate: 0.0002 train_on_inputs: false -group_by_length: true +group_by_length: false bf16: true fp16: false tf32: false diff --git a/examples/openllama-3b/qlora.yml b/examples/openllama-3b/qlora.yml index 83ae31f91..dcad9bbcd 100644 --- a/examples/openllama-3b/qlora.yml +++ b/examples/openllama-3b/qlora.yml @@ -34,7 +34,7 @@ torchdistx_path: lr_scheduler: cosine learning_rate: 0.0002 train_on_inputs: false -group_by_length: true +group_by_length: false bf16: true fp16: false tf32: true