Merge pull request #350 from tmm1/group-len-false-examples
set `group_by_length` to false in all examples
This commit is contained in:
@@ -426,7 +426,9 @@ save_safetensors:
|
|||||||
|
|
||||||
# whether to mask out or include the human's prompt from the training labels
|
# whether to mask out or include the human's prompt from the training labels
|
||||||
train_on_inputs: false
|
train_on_inputs: false
|
||||||
# don't use this, leads to wonky training (according to someone on the internet)
|
# group similarly sized data to minimize padding
|
||||||
|
# may be slower to start, as it must download and sort the entire dataset
|
||||||
|
# note that training loss may have an oscillating pattern with this enabled
|
||||||
group_by_length: false
|
group_by_length: false
|
||||||
|
|
||||||
# Whether to use gradient checkpointing https://huggingface.co/docs/transformers/v4.18.0/en/performance#gradient-checkpointing
|
# Whether to use gradient checkpointing https://huggingface.co/docs/transformers/v4.18.0/en/performance#gradient-checkpointing
|
||||||
|
|||||||
@@ -35,7 +35,7 @@ torchdistx_path:
|
|||||||
lr_scheduler: cosine
|
lr_scheduler: cosine
|
||||||
learning_rate: 0.0002
|
learning_rate: 0.0002
|
||||||
train_on_inputs: false
|
train_on_inputs: false
|
||||||
group_by_length: true
|
group_by_length: false
|
||||||
bf16: true
|
bf16: true
|
||||||
fp16: false
|
fp16: false
|
||||||
tf32: true
|
tf32: true
|
||||||
|
|||||||
@@ -32,7 +32,7 @@ torchdistx_path:
|
|||||||
lr_scheduler: cosine
|
lr_scheduler: cosine
|
||||||
learning_rate: 0.0001
|
learning_rate: 0.0001
|
||||||
train_on_inputs: false
|
train_on_inputs: false
|
||||||
group_by_length: true
|
group_by_length: false
|
||||||
bf16: true
|
bf16: true
|
||||||
fp16: false
|
fp16: false
|
||||||
tf32: true
|
tf32: true
|
||||||
|
|||||||
@@ -38,7 +38,7 @@ lr_scheduler: cosine
|
|||||||
learning_rate: 0.0002
|
learning_rate: 0.0002
|
||||||
|
|
||||||
train_on_inputs: false
|
train_on_inputs: false
|
||||||
group_by_length: true
|
group_by_length: false
|
||||||
bf16: true
|
bf16: true
|
||||||
fp16: false
|
fp16: false
|
||||||
tf32: false
|
tf32: false
|
||||||
|
|||||||
@@ -39,7 +39,7 @@ lr_scheduler: cosine
|
|||||||
learning_rate: 0.0002
|
learning_rate: 0.0002
|
||||||
|
|
||||||
train_on_inputs: false
|
train_on_inputs: false
|
||||||
group_by_length: true
|
group_by_length: false
|
||||||
bf16: true
|
bf16: true
|
||||||
fp16: false
|
fp16: false
|
||||||
tf32: false
|
tf32: false
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ torchdistx_path:
|
|||||||
lr_scheduler: cosine
|
lr_scheduler: cosine
|
||||||
learning_rate: 0.0002
|
learning_rate: 0.0002
|
||||||
train_on_inputs: false
|
train_on_inputs: false
|
||||||
group_by_length: true
|
group_by_length: false
|
||||||
bf16: true
|
bf16: true
|
||||||
fp16: false
|
fp16: false
|
||||||
tf32: true
|
tf32: true
|
||||||
|
|||||||
Reference in New Issue
Block a user