set fp16 to false if bf16, update bf16: auto in example YAMLs (#1122) [skip ci]

* set fp16 to false if bf16, update bf16: auto in example YAMLs

* unset fp16 so that it fallsback properly if bf16 isn't available

* Update README.md [skip-ci]

Co-authored-by: NanoCode012 <kevinvong@rocketmail.com>

* test that bf16 disables fp16

---------

Co-authored-by: NanoCode012 <kevinvong@rocketmail.com>
This commit is contained in:
Wing Lian
2024-01-22 18:44:01 -05:00
committed by GitHub
parent eaaeefce55
commit 782b6a4216
38 changed files with 86 additions and 67 deletions

View File

@@ -53,8 +53,8 @@ lr_quadratic_warmup: true
learning_rate: 0.000085
train_on_inputs: true
group_by_length: false
bf16: true
fp16: false
bf16: auto
fp16:
tf32: true
gradient_checkpointing: false

View File

@@ -36,8 +36,8 @@ lr_scheduler: cosine
learning_rate: 0.0002
train_on_inputs: false
group_by_length: false
bf16: true
fp16: false
bf16: auto
fp16:
tf32: true
gradient_checkpointing: true
early_stopping_patience:

View File

@@ -41,8 +41,8 @@ learning_rate: 0.0002
train_on_inputs: false
group_by_length: false
bf16: true
fp16: false
bf16: auto
fp16:
tf32: false
gradient_checkpointing: true

View File

@@ -43,8 +43,8 @@ learning_rate: 0.0002
train_on_inputs: false
group_by_length: false
bf16: true
fp16: false
bf16: auto
fp16:
tf32: false
gradient_checkpointing: true

View File

@@ -41,8 +41,8 @@ learning_rate: 0.0002
train_on_inputs: false
group_by_length: false
bf16: true
fp16: false
bf16: auto
fp16:
tf32: false
gradient_checkpointing: true

View File

@@ -43,8 +43,8 @@ learning_rate: 0.0002
train_on_inputs: false
group_by_length: false
bf16: true
fp16: false
bf16: auto
fp16:
tf32: false
gradient_checkpointing: true

View File

@@ -41,8 +41,8 @@ learning_rate: 0.0002
train_on_inputs: false
group_by_length: false
bf16: true
fp16: false
bf16: auto
fp16:
tf32: false
gradient_checkpointing: true

View File

@@ -43,8 +43,8 @@ learning_rate: 0.0002
train_on_inputs: false
group_by_length: false
bf16: true
fp16: false
bf16: auto
fp16:
tf32: false
gradient_checkpointing: true

View File

@@ -38,8 +38,8 @@ lr_scheduler: cosine
learning_rate: 0.00003
train_on_inputs: false
group_by_length: false
bf16: true
fp16: false
bf16: auto
fp16:
tf32: true
gradient_checkpointing: true
early_stopping_patience:

View File

@@ -64,8 +64,8 @@ lr_scheduler: cosine
learning_rate: 0.0002
train_on_inputs: false
group_by_length: false
bf16: true
fp16: false
bf16: auto
fp16:
tf32: true
gradient_checkpointing: true
# stop training after this many evaluation losses have increased in a row

View File

@@ -38,8 +38,8 @@ lr_scheduler: cosine
learning_rate: 0.00003
train_on_inputs: false
group_by_length: false
bf16: true
fp16: false
bf16: auto
fp16:
tf32: true
gradient_checkpointing: true
early_stopping_patience:

View File

@@ -33,8 +33,8 @@ lr_scheduler: cosine
learning_rate: 0.0001
train_on_inputs: false
group_by_length: false
bf16: true
fp16: false
bf16: auto
fp16:
tf32: true
gradient_checkpointing: true
early_stopping_patience:

View File

@@ -31,7 +31,7 @@ lr_scheduler: cosine
learning_rate: 0.00003
train_on_inputs: false
group_by_length: false
bf16: true
bf16: auto
tf32: true
early_stopping_patience:
resume_from_checkpoint:

View File

@@ -41,8 +41,8 @@ learning_rate: 0.0002
train_on_inputs: false
group_by_length: false
bf16: true
fp16: false
bf16: auto
fp16:
tf32: false
gradient_checkpointing: true

View File

@@ -41,8 +41,8 @@ learning_rate: 0.0002
train_on_inputs: false
group_by_length: false
bf16: true
fp16: false
bf16: auto
fp16:
tf32: false
gradient_checkpointing: true

View File

@@ -43,8 +43,8 @@ learning_rate: 0.0002
train_on_inputs: false
group_by_length: false
bf16: true
fp16: false
bf16: auto
fp16:
tf32: false
gradient_checkpointing: true

View File

@@ -47,8 +47,8 @@ learning_rate: 0.0002
train_on_inputs: false
group_by_length: false
bf16: true
fp16: false
bf16: auto
fp16:
tf32: false
gradient_checkpointing: true

View File

@@ -34,8 +34,8 @@ learning_rate: 5e-5
train_on_inputs: false
group_by_length: true
bf16: true
fp16: false
bf16: auto
fp16:
tf32: true
gradient_checkpointing: false

View File

@@ -34,8 +34,8 @@ learning_rate: 0.000005
train_on_inputs: false
group_by_length: false
bf16: true
fp16: false
bf16: auto
fp16:
tf32: false
gradient_checkpointing: true

View File

@@ -63,8 +63,8 @@ learning_rate: 0.0002
train_on_inputs: false
group_by_length: false
bf16: true
fp16: false
bf16: auto
fp16:
tf32: false
gradient_checkpointing: true

View File

@@ -50,8 +50,8 @@ learning_rate: 0.0002
train_on_inputs: false
group_by_length: false
bf16: true
fp16: false
bf16: auto
fp16:
tf32: false
gradient_checkpointing: true

View File

@@ -33,7 +33,7 @@ lr_scheduler: cosine
learning_rate: 0.0000002
train_on_inputs: false
group_by_length: false
bf16: true
bf16: auto
tf32: true
early_stopping_patience:
resume_from_checkpoint:

View File

@@ -46,8 +46,8 @@ learning_rate: 0.000003
train_on_inputs: false
group_by_length: true
bf16: true
fp16: false
bf16: auto
fp16:
tf32: true
gradient_checkpointing:

View File

@@ -46,8 +46,8 @@ learning_rate: 0.000003
train_on_inputs: false
group_by_length: true
bf16: true
fp16: false
bf16: auto
fp16:
tf32: true
gradient_checkpointing:

View File

@@ -49,8 +49,8 @@ learning_rate: 1e-5
train_on_inputs: false
group_by_length: false
bf16: true
fp16: false
bf16: auto
fp16:
tf32: true
gradient_checkpointing: true

View File

@@ -27,7 +27,7 @@ num_epochs: 4
learning_rate: 0.00001
train_on_inputs: false
group_by_length: false
bf16: true
bf16: auto
tf32: true
early_stopping_patience:
resume_from_checkpoint:

View File

@@ -43,8 +43,8 @@ learning_rate: 0.0002
train_on_inputs: false
group_by_length: false
bf16: true
fp16: false
bf16: auto
fp16:
tf32: false
gradient_checkpointing: false

View File

@@ -43,8 +43,8 @@ learning_rate: 0.0002
train_on_inputs: false
group_by_length: false
bf16: true
fp16: false
bf16: auto
fp16:
tf32: false
gradient_checkpointing: false

View File

@@ -34,7 +34,7 @@ lr_scheduler: cosine
learning_rate: 0.0000002
train_on_inputs: false
group_by_length: false
bf16: true
bf16: auto
tf32: true
early_stopping_patience:
resume_from_checkpoint:

View File

@@ -33,7 +33,7 @@ lr_scheduler:
learning_rate: 0.00001
train_on_inputs: false
group_by_length: false
bf16: true
bf16: auto
tf32: true
gradient_checkpointing:
early_stopping_patience:

View File

@@ -41,8 +41,8 @@ learning_rate: 0.0002
train_on_inputs: false
group_by_length: false
bf16: true
fp16: false
bf16: auto
fp16:
tf32: false
gradient_checkpointing: true

View File

@@ -34,8 +34,8 @@ learning_rate: 0.0002
train_on_inputs: false
group_by_length: false
bf16: true
fp16: false
bf16: auto
fp16:
tf32: false
gradient_checkpointing: true

View File

@@ -43,8 +43,8 @@ learning_rate: 0.0002
train_on_inputs: false
group_by_length: false
bf16: true
fp16: false
bf16: auto
fp16:
tf32: false
gradient_checkpointing: true

View File

@@ -62,8 +62,8 @@ lr_scheduler: cosine
learning_rate: 0.00002
train_on_inputs: false
group_by_length: false
bf16: true
fp16: false
bf16: auto
fp16:
tf32: false
gradient_checkpointing: true
# stop training after this many evaluation losses have increased in a row

View File

@@ -7,8 +7,8 @@ load_in_8bit: false
load_in_4bit: true
strict: false
sequence_len: 1024
bf16: true
fp16: false
bf16: auto
fp16:
tf32: false
flash_attention: true
special_tokens: