From 984be141477c9eac1bebf1d5b17fea3d95bbe469 Mon Sep 17 00:00:00 2001 From: Sunny Liu Date: Thu, 20 Feb 2025 00:01:59 -0500 Subject: [PATCH] add tp_size in config doc --- docs/config.qmd | 3 +++ src/axolotl/utils/trainer.py | 7 ------- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/docs/config.qmd b/docs/config.qmd index 3a11666a5..4686b8b3a 100644 --- a/docs/config.qmd +++ b/docs/config.qmd @@ -78,6 +78,9 @@ tf32: true # require >=ampere bfloat16: true # require >=ampere float16: true +# Tensor parallel +tp_size: 1 # should be set to the number of cuda devices available + # Limit the memory for all available GPUs to this amount (if an integer, expressed in gigabytes); default: unset gpu_memory_limit: 20GiB # Do the LoRA/PEFT loading on CPU -- this is required if the base model is so large it takes up most or all of the available GPU VRAM, e.g. during a model and LoRA merge diff --git a/src/axolotl/utils/trainer.py b/src/axolotl/utils/trainer.py index 755d60908..16e5c2c73 100644 --- a/src/axolotl/utils/trainer.py +++ b/src/axolotl/utils/trainer.py @@ -543,18 +543,11 @@ def setup_fsdp_envs(cfg): ] = cfg.fsdp_config.fsdp_transformer_layer_cls_to_wrap -def setup_tp_envs(): - os.environ["ACCELERATE_USE_TP"] = "true" - - def prepare_optim_env(cfg): if not check_cuda_p2p_ib_support(): if os.getenv("NCCL_P2P_DISABLE") is None: os.environ["NCCL_P2P_DISABLE"] = "1" - if cfg.tp_size > 1: - setup_tp_envs() - if cfg.fsdp: setup_fsdp_envs(cfg) elif cfg.deepspeed: