deepspeed doesn't work with flash-attn, and the gpu savings w flash attn are better than the deepspeed headaches
This commit is contained in:
@@ -34,6 +34,6 @@ train_on_inputs: false
|
||||
group_by_length: false
|
||||
bf16: True
|
||||
tf32: True
|
||||
early_stopping_patience:
|
||||
resume_from_checkpoint:
|
||||
local_rank:
|
||||
deepspeed:
|
||||
|
||||
@@ -36,6 +36,6 @@ train_on_inputs: false
|
||||
group_by_length: false
|
||||
bf16: true
|
||||
tf32: true
|
||||
early_stopping_patience:
|
||||
resume_from_checkpoint:
|
||||
local_rank:
|
||||
deepspeed:
|
||||
|
||||
@@ -36,6 +36,6 @@ train_on_inputs: false
|
||||
group_by_length: false
|
||||
bf16: true
|
||||
tf32: true
|
||||
early_stopping_patience:
|
||||
resume_from_checkpoint:
|
||||
local_rank:
|
||||
deepspeed:
|
||||
|
||||
@@ -36,6 +36,6 @@ train_on_inputs: false
|
||||
group_by_length: false
|
||||
bf16: True
|
||||
tf32: True
|
||||
early_stopping_patience:
|
||||
resume_from_checkpoint:
|
||||
local_rank:
|
||||
deepspeed:
|
||||
|
||||
Reference in New Issue
Block a user