From dda9b25994d2f3c09f1754132f0297f8632c0e9e Mon Sep 17 00:00:00 2001
From: Dan Saunders <danjsaund@gmail.com>
Date: Wed, 18 Dec 2024 01:38:56 +0000
Subject: [PATCH] fixes post-rebase

---
 src/axolotl/cli/main.py      |  3 +++
 src/axolotl/evaluate.py      |  8 ++------
 src/axolotl/train.py         |  4 ++--
 src/axolotl/utils/trainer.py | 11 -----------
 4 files changed, 7 insertions(+), 19 deletions(-)

diff --git a/src/axolotl/cli/main.py b/src/axolotl/cli/main.py
index 5cb88a1ea..41354dcb0 100644
--- a/src/axolotl/cli/main.py
+++ b/src/axolotl/cli/main.py
@@ -82,6 +82,9 @@ def evaluate(config: str, accelerate: bool, **kwargs):
     """Evaluate a model."""
     kwargs = {k: v for k, v in kwargs.items() if v is not None}
 
+    # Enable expandable segments for cuda allocation to improve VRAM usage
+    set_pytorch_cuda_alloc_conf()
+
     if accelerate:
         base_cmd = ["accelerate", "launch", "-m", "axolotl.cli.evaluate"]
         if config:
diff --git a/src/axolotl/evaluate.py b/src/axolotl/evaluate.py
index bc1799960..1c62fc6ab 100644
--- a/src/axolotl/evaluate.py
+++ b/src/axolotl/evaluate.py
@@ -12,10 +12,9 @@ from accelerate.logging import get_logger
 from axolotl.common.cli import EvaluateCliArgs, load_model_and_tokenizer
 from axolotl.logging_config import configure_logging
 from axolotl.train import TrainDatasetMeta
-from axolotl.utils import set_pytorch_cuda_alloc_conf
 from axolotl.utils.dict import DictDefault
 from axolotl.utils.models import load_processor
-from axolotl.utils.trainer import set_pytorch_cuda_alloc_conf, setup_trainer
+from axolotl.utils.trainer import setup_trainer
 
 project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
 src_dir = os.path.join(project_root, "src")
@@ -62,6 +61,7 @@ def evaluate_dataset(
     return metrics
 
 
+# pylint: disable=duplicate-code
 def evaluate(
     *, cfg: DictDefault, cli_args: EvaluateCliArgs, dataset_meta: TrainDatasetMeta
 ) -> Dict[str, float]:
@@ -79,10 +79,6 @@ def evaluate(
         - The tokenizer
         - Dictionary of evaluation metrics
     """
-    # pylint: disable=duplicate-code
-    # Enable expandable segments for cuda allocation to improve VRAM usage
-    set_pytorch_cuda_alloc_conf()
-
     # Load model
     LOG.debug("loading model for evaluation...")
 
diff --git a/src/axolotl/train.py b/src/axolotl/train.py
index 7f51175bf..dc7289b09 100644
--- a/src/axolotl/train.py
+++ b/src/axolotl/train.py
@@ -26,7 +26,7 @@ from axolotl.logging_config import configure_logging
 from axolotl.utils.dict import DictDefault
 from axolotl.utils.freeze import freeze_layers_except
 from axolotl.utils.models import load_model, load_processor, load_tokenizer
-from axolotl.utils.trainer import set_pytorch_cuda_alloc_conf, setup_trainer
+from axolotl.utils.trainer import setup_trainer
 
 try:
     from optimum.bettertransformer import BetterTransformer
@@ -87,7 +87,7 @@ def train(
             )
     resume_from_checkpoint = cfg.resume_from_checkpoint
 
-    # Load the model
+    # Load the model and tokenizer
     msg = "loading model"
     if cfg.adapter:
         msg += " and peft_config..."
diff --git a/src/axolotl/utils/trainer.py b/src/axolotl/utils/trainer.py
index fd09b3eb6..32e54c9a8 100644
--- a/src/axolotl/utils/trainer.py
+++ b/src/axolotl/utils/trainer.py
@@ -512,17 +512,6 @@ def prepare_opinionated_env(cfg):
         os.environ["TOKENIZERS_PARALLELISM"] = "false"
 
 
-def set_pytorch_cuda_alloc_conf():
-    """Set up CUDA allocation config if using PyTorch >= 2.2"""
-    torch_version = torch.__version__.split(".")
-    torch_major, torch_minor = int(torch_version[0]), int(torch_version[1])
-    if torch_major == 2 and torch_minor >= 2:
-        if os.getenv("PYTORCH_CUDA_ALLOC_CONF") is None:
-            os.environ[
-                "PYTORCH_CUDA_ALLOC_CONF"
-            ] = "expandable_segments:True,roundup_power2_divisions:16"
-
-
 def setup_trainer(
     cfg, train_dataset, eval_dataset, model, tokenizer, processor, total_num_steps
 ):