From c01015f33f257badd05fbf8e680f22c294e9628e Mon Sep 17 00:00:00 2001 From: NanoCode012 Date: Wed, 16 Aug 2023 01:22:43 +0900 Subject: [PATCH] Fix(config): Update handling of deepspeed config (#404) * Fix(config): Update handling of deepspeed config * feat: auto set deepspeed env if deepspeed passed * fix: update new deepspeed instructions --- README.md | 7 +++++-- src/axolotl/utils/config.py | 2 +- src/axolotl/utils/trainer.py | 19 +++++++------------ 3 files changed, 13 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 8c3f9adb4..4acf02214 100644 --- a/README.md +++ b/README.md @@ -519,7 +519,7 @@ tokens: fsdp: fsdp_config: -# Deepspeed +# Deepspeed config path deepspeed: # Path to torch distx for optim 'adamw_anyprecision' @@ -570,7 +570,10 @@ fsdp_config: fsdp_transformer_layer_cls_to_wrap: LlamaDecoderLayer ``` -- llama Deepspeed: append `ACCELERATE_USE_DEEPSPEED=true` in front of finetune command +- llama Deepspeed +```yaml +deepspeed: # path to config +``` ##### Weights & Biases Logging diff --git a/src/axolotl/utils/config.py b/src/axolotl/utils/config.py index b7bbab668..8d07ce9b6 100644 --- a/src/axolotl/utils/config.py +++ b/src/axolotl/utils/config.py @@ -147,7 +147,7 @@ def validate_config(cfg): "You should probably set bfloat16 or float16 to true to " "load the model in float16 for BetterTransformers" ) - if int(torch.__version__.split(".")[0]) < 2: + if int(torch.__version__.split(".", maxsplit=1)[0]) < 2: LOG.warning("torch>=2.0.0 required") raise ValueError( f"flash_optimum for BetterTransformers may not be used with {torch.__version__}" diff --git a/src/axolotl/utils/trainer.py b/src/axolotl/utils/trainer.py index eaf50d2d9..fdf67509b 100644 --- a/src/axolotl/utils/trainer.py +++ b/src/axolotl/utils/trainer.py @@ -364,6 +364,9 @@ def setup_fsdp_envs(cfg): def setup_trainer(cfg, train_dataset, eval_dataset, model, tokenizer, total_num_steps): if cfg.fsdp: setup_fsdp_envs(cfg) + elif cfg.deepspeed: + os.environ["ACCELERATE_USE_DEEPSPEED"] = "true" + warmup_steps = ( cfg.warmup_steps if cfg.warmup_steps is not None @@ -411,21 +414,13 @@ def setup_trainer(cfg, train_dataset, eval_dataset, model, tokenizer, total_num_ if cfg.fsdp_config: training_arguments_kwargs["fsdp_config"] = dict(cfg.fsdp_config) + # deepspeed + if cfg.deepspeed: + training_arguments_kwargs["deepspeed"] = cfg.deepspeed + if cfg.lr_quadratic_warmup is not None: training_arguments_kwargs["lr_quadratic_warmup"] = cfg.lr_quadratic_warmup - # deepspeed - if ( - os.environ.get("ACCELERATE_USE_DEEPSPEED") == "true" - and torch.cuda.device_count() > 1 - ): - if cfg.deepspeed: - training_arguments_kwargs["deepspeed"] = cfg.deepspeed - else: - # make a guess here - # TODO search Path("./") for one - training_arguments_kwargs["deepspeed"] = "./ds_config.json" - if cfg.adam_beta1: training_arguments_kwargs["adam_beta1"] = cfg.adam_beta1 if cfg.adam_beta2: