From 8ddc18ec8d3e29de8e0227cdf49fb6ef1c65de5e Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Tue, 17 Dec 2024 13:56:48 -0500 Subject: [PATCH] move the setting of PYTORCH_CUDA_ALLOC_CONF to the cli rather than train module (#2183) [skip ci] * move the setting of PYTORCH_CUDA_ALLOC_CONF to the cli rather than train module * move set_pytorch_cuda_alloc_conf to a different module to have fewer loaded dependencies for the CLI --- src/axolotl/cli/main.py | 4 ++++ src/axolotl/evaluate.py | 3 ++- src/axolotl/train.py | 5 +---- src/axolotl/utils/__init__.py | 11 ++++++++++- src/axolotl/utils/trainer.py | 11 ----------- 5 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/axolotl/cli/main.py b/src/axolotl/cli/main.py index ec7f5b694..6f883a2ac 100644 --- a/src/axolotl/cli/main.py +++ b/src/axolotl/cli/main.py @@ -13,6 +13,7 @@ from axolotl.cli.utils import ( fetch_from_github, ) from axolotl.common.cli import EvaluateCliArgs, PreprocessCliArgs, TrainerCliArgs +from axolotl.utils import set_pytorch_cuda_alloc_conf from axolotl.utils.config.models.input.v0_4_1 import AxolotlInputConfig @@ -48,6 +49,9 @@ def train(config: str, accelerate: bool, **kwargs): """Train or fine-tune a model.""" kwargs = {k: v for k, v in kwargs.items() if v is not None} + # Enable expandable segments for cuda allocation to improve VRAM usage + set_pytorch_cuda_alloc_conf() + if accelerate: base_cmd = ["accelerate", "launch", "-m", "axolotl.cli.train"] if config: diff --git a/src/axolotl/evaluate.py b/src/axolotl/evaluate.py index 7fd60cb5f..acf15e3fc 100644 --- a/src/axolotl/evaluate.py +++ b/src/axolotl/evaluate.py @@ -12,9 +12,10 @@ from accelerate.logging import get_logger from axolotl.common.cli import TrainerCliArgs from axolotl.logging_config import configure_logging from axolotl.train import TrainDatasetMeta +from axolotl.utils import set_pytorch_cuda_alloc_conf from axolotl.utils.dict import DictDefault from axolotl.utils.models import load_model, load_processor, load_tokenizer -from axolotl.utils.trainer import set_pytorch_cuda_alloc_conf, setup_trainer +from axolotl.utils.trainer import setup_trainer project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) src_dir = os.path.join(project_root, "src") diff --git a/src/axolotl/train.py b/src/axolotl/train.py index 851a71e54..af2ee9795 100644 --- a/src/axolotl/train.py +++ b/src/axolotl/train.py @@ -24,7 +24,7 @@ from axolotl.logging_config import configure_logging from axolotl.utils.dict import DictDefault from axolotl.utils.freeze import freeze_layers_except from axolotl.utils.models import load_model, load_processor, load_tokenizer -from axolotl.utils.trainer import set_pytorch_cuda_alloc_conf, setup_trainer +from axolotl.utils.trainer import setup_trainer try: from optimum.bettertransformer import BetterTransformer @@ -53,9 +53,6 @@ class TrainDatasetMeta: def train( *, cfg: DictDefault, cli_args: TrainerCliArgs, dataset_meta: TrainDatasetMeta ) -> Tuple[Union[PeftModel, PreTrainedModel], PreTrainedTokenizer]: - # Enable expandable segments for cuda allocation to improve VRAM usage - set_pytorch_cuda_alloc_conf() - # Load tokenizer LOG.debug( f"loading tokenizer... {cfg.tokenizer_config or cfg.base_model_config}", diff --git a/src/axolotl/utils/__init__.py b/src/axolotl/utils/__init__.py index 460205447..35ea14551 100644 --- a/src/axolotl/utils/__init__.py +++ b/src/axolotl/utils/__init__.py @@ -3,6 +3,7 @@ Basic utils for Axolotl """ import importlib.util +import os import re import torch @@ -33,4 +34,12 @@ def get_pytorch_version() -> tuple[int, int, int]: return major, minor, patch -# pylint: enable=duplicate-code +def set_pytorch_cuda_alloc_conf(): + """Set up CUDA allocation config if using PyTorch >= 2.2""" + torch_version = torch.__version__.split(".") + torch_major, torch_minor = int(torch_version[0]), int(torch_version[1]) + if torch_major == 2 and torch_minor >= 2: + if os.getenv("PYTORCH_CUDA_ALLOC_CONF") is None: + os.environ[ + "PYTORCH_CUDA_ALLOC_CONF" + ] = "expandable_segments:True,roundup_power2_divisions:16" diff --git a/src/axolotl/utils/trainer.py b/src/axolotl/utils/trainer.py index fd09b3eb6..32e54c9a8 100644 --- a/src/axolotl/utils/trainer.py +++ b/src/axolotl/utils/trainer.py @@ -512,17 +512,6 @@ def prepare_opinionated_env(cfg): os.environ["TOKENIZERS_PARALLELISM"] = "false" -def set_pytorch_cuda_alloc_conf(): - """Set up CUDA allocation config if using PyTorch >= 2.2""" - torch_version = torch.__version__.split(".") - torch_major, torch_minor = int(torch_version[0]), int(torch_version[1]) - if torch_major == 2 and torch_minor >= 2: - if os.getenv("PYTORCH_CUDA_ALLOC_CONF") is None: - os.environ[ - "PYTORCH_CUDA_ALLOC_CONF" - ] = "expandable_segments:True,roundup_power2_divisions:16" - - def setup_trainer( cfg, train_dataset, eval_dataset, model, tokenizer, processor, total_num_steps ):