From dda9b25994d2f3c09f1754132f0297f8632c0e9e Mon Sep 17 00:00:00 2001 From: Dan Saunders Date: Wed, 18 Dec 2024 01:38:56 +0000 Subject: [PATCH] fixes post-rebase --- src/axolotl/cli/main.py | 3 +++ src/axolotl/evaluate.py | 8 ++------ src/axolotl/train.py | 4 ++-- src/axolotl/utils/trainer.py | 11 ----------- 4 files changed, 7 insertions(+), 19 deletions(-) diff --git a/src/axolotl/cli/main.py b/src/axolotl/cli/main.py index 5cb88a1ea..41354dcb0 100644 --- a/src/axolotl/cli/main.py +++ b/src/axolotl/cli/main.py @@ -82,6 +82,9 @@ def evaluate(config: str, accelerate: bool, **kwargs): """Evaluate a model.""" kwargs = {k: v for k, v in kwargs.items() if v is not None} + # Enable expandable segments for cuda allocation to improve VRAM usage + set_pytorch_cuda_alloc_conf() + if accelerate: base_cmd = ["accelerate", "launch", "-m", "axolotl.cli.evaluate"] if config: diff --git a/src/axolotl/evaluate.py b/src/axolotl/evaluate.py index bc1799960..1c62fc6ab 100644 --- a/src/axolotl/evaluate.py +++ b/src/axolotl/evaluate.py @@ -12,10 +12,9 @@ from accelerate.logging import get_logger from axolotl.common.cli import EvaluateCliArgs, load_model_and_tokenizer from axolotl.logging_config import configure_logging from axolotl.train import TrainDatasetMeta -from axolotl.utils import set_pytorch_cuda_alloc_conf from axolotl.utils.dict import DictDefault from axolotl.utils.models import load_processor -from axolotl.utils.trainer import set_pytorch_cuda_alloc_conf, setup_trainer +from axolotl.utils.trainer import setup_trainer project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) src_dir = os.path.join(project_root, "src") @@ -62,6 +61,7 @@ def evaluate_dataset( return metrics +# pylint: disable=duplicate-code def evaluate( *, cfg: DictDefault, cli_args: EvaluateCliArgs, dataset_meta: TrainDatasetMeta ) -> Dict[str, float]: @@ -79,10 +79,6 @@ def evaluate( - The tokenizer - Dictionary of evaluation metrics """ - # pylint: disable=duplicate-code - # Enable expandable segments for cuda allocation to improve VRAM usage - set_pytorch_cuda_alloc_conf() - # Load model LOG.debug("loading model for evaluation...") diff --git a/src/axolotl/train.py b/src/axolotl/train.py index 7f51175bf..dc7289b09 100644 --- a/src/axolotl/train.py +++ b/src/axolotl/train.py @@ -26,7 +26,7 @@ from axolotl.logging_config import configure_logging from axolotl.utils.dict import DictDefault from axolotl.utils.freeze import freeze_layers_except from axolotl.utils.models import load_model, load_processor, load_tokenizer -from axolotl.utils.trainer import set_pytorch_cuda_alloc_conf, setup_trainer +from axolotl.utils.trainer import setup_trainer try: from optimum.bettertransformer import BetterTransformer @@ -87,7 +87,7 @@ def train( ) resume_from_checkpoint = cfg.resume_from_checkpoint - # Load the model + # Load the model and tokenizer msg = "loading model" if cfg.adapter: msg += " and peft_config..." diff --git a/src/axolotl/utils/trainer.py b/src/axolotl/utils/trainer.py index fd09b3eb6..32e54c9a8 100644 --- a/src/axolotl/utils/trainer.py +++ b/src/axolotl/utils/trainer.py @@ -512,17 +512,6 @@ def prepare_opinionated_env(cfg): os.environ["TOKENIZERS_PARALLELISM"] = "false" -def set_pytorch_cuda_alloc_conf(): - """Set up CUDA allocation config if using PyTorch >= 2.2""" - torch_version = torch.__version__.split(".") - torch_major, torch_minor = int(torch_version[0]), int(torch_version[1]) - if torch_major == 2 and torch_minor >= 2: - if os.getenv("PYTORCH_CUDA_ALLOC_CONF") is None: - os.environ[ - "PYTORCH_CUDA_ALLOC_CONF" - ] = "expandable_segments:True,roundup_power2_divisions:16" - - def setup_trainer( cfg, train_dataset, eval_dataset, model, tokenizer, processor, total_num_steps ):