add tp_size in config doc
This commit is contained in:
committed by
Sung Ching Liu
parent
64adbf1a15
commit
984be14147
@@ -78,6 +78,9 @@ tf32: true # require >=ampere
|
|||||||
bfloat16: true # require >=ampere
|
bfloat16: true # require >=ampere
|
||||||
float16: true
|
float16: true
|
||||||
|
|
||||||
|
# Tensor parallel
|
||||||
|
tp_size: 1 # should be set to the number of cuda devices available
|
||||||
|
|
||||||
# Limit the memory for all available GPUs to this amount (if an integer, expressed in gigabytes); default: unset
|
# Limit the memory for all available GPUs to this amount (if an integer, expressed in gigabytes); default: unset
|
||||||
gpu_memory_limit: 20GiB
|
gpu_memory_limit: 20GiB
|
||||||
# Do the LoRA/PEFT loading on CPU -- this is required if the base model is so large it takes up most or all of the available GPU VRAM, e.g. during a model and LoRA merge
|
# Do the LoRA/PEFT loading on CPU -- this is required if the base model is so large it takes up most or all of the available GPU VRAM, e.g. during a model and LoRA merge
|
||||||
|
|||||||
@@ -543,18 +543,11 @@ def setup_fsdp_envs(cfg):
|
|||||||
] = cfg.fsdp_config.fsdp_transformer_layer_cls_to_wrap
|
] = cfg.fsdp_config.fsdp_transformer_layer_cls_to_wrap
|
||||||
|
|
||||||
|
|
||||||
def setup_tp_envs():
|
|
||||||
os.environ["ACCELERATE_USE_TP"] = "true"
|
|
||||||
|
|
||||||
|
|
||||||
def prepare_optim_env(cfg):
|
def prepare_optim_env(cfg):
|
||||||
if not check_cuda_p2p_ib_support():
|
if not check_cuda_p2p_ib_support():
|
||||||
if os.getenv("NCCL_P2P_DISABLE") is None:
|
if os.getenv("NCCL_P2P_DISABLE") is None:
|
||||||
os.environ["NCCL_P2P_DISABLE"] = "1"
|
os.environ["NCCL_P2P_DISABLE"] = "1"
|
||||||
|
|
||||||
if cfg.tp_size > 1:
|
|
||||||
setup_tp_envs()
|
|
||||||
|
|
||||||
if cfg.fsdp:
|
if cfg.fsdp:
|
||||||
setup_fsdp_envs(cfg)
|
setup_fsdp_envs(cfg)
|
||||||
elif cfg.deepspeed:
|
elif cfg.deepspeed:
|
||||||
|
|||||||
Reference in New Issue
Block a user