From b17b1aada79077de6698ca35e4ab4bd5c3ad6ddb Mon Sep 17 00:00:00 2001 From: bursteratom Date: Wed, 11 Dec 2024 11:37:21 -0500 Subject: [PATCH] initialise process group for tp --- src/axolotl/utils/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py index d44a4b4b6..fbe23430d 100644 --- a/src/axolotl/utils/models.py +++ b/src/axolotl/utils/models.py @@ -827,7 +827,7 @@ class ModelLoader: _ = _configure_zero3_memory_efficient_loading() if self.cfg.tensor_parallel == "auto": - rank = int(os.environ["RANK"]) + rank = int(os.environ.get("LOCAL_RANK", 0)) device = torch.device(f"cuda:{rank}") torch.distributed.init_process_group("nccl", device_id=device)