deepspeed doesn't work with flash-attn, and the gpu savings w flash attn are better than the deepspeed headaches

This commit is contained in:
Wing Lian
2023-04-16 06:59:47 -04:00
parent a4593832a9
commit d1aed4c8e5
6 changed files with 68 additions and 80 deletions

View File

@@ -34,6 +34,6 @@ train_on_inputs: false
group_by_length: false
bf16: True
tf32: True
early_stopping_patience:
resume_from_checkpoint:
local_rank:
deepspeed:

View File

@@ -36,6 +36,6 @@ train_on_inputs: false
group_by_length: false
bf16: true
tf32: true
early_stopping_patience:
resume_from_checkpoint:
local_rank:
deepspeed:

View File

@@ -36,6 +36,6 @@ train_on_inputs: false
group_by_length: false
bf16: true
tf32: true
early_stopping_patience:
resume_from_checkpoint:
local_rank:
deepspeed:

View File

@@ -36,6 +36,6 @@ train_on_inputs: false
group_by_length: false
bf16: True
tf32: True
early_stopping_patience:
resume_from_checkpoint:
local_rank:
deepspeed: