zero3 can'y use 8bit optimizer
This commit is contained in:
@@ -637,7 +637,7 @@ class TestMultiGPULlama:
|
|||||||
"gradient_accumulation_steps": 4,
|
"gradient_accumulation_steps": 4,
|
||||||
"output_dir": temp_dir,
|
"output_dir": temp_dir,
|
||||||
"learning_rate": 0.00001,
|
"learning_rate": 0.00001,
|
||||||
"optimizer": "adamw_8bit",
|
"optimizer": "adamw_torch",
|
||||||
"lr_scheduler": "cosine",
|
"lr_scheduler": "cosine",
|
||||||
"flash_attention": True,
|
"flash_attention": True,
|
||||||
"deepspeed": "deepspeed_configs/zero3_bf16_cpuoffload_all.json",
|
"deepspeed": "deepspeed_configs/zero3_bf16_cpuoffload_all.json",
|
||||||
|
|||||||
Reference in New Issue
Block a user