Basic evaluate CLI command / codepath (#2188)

* basic evaluate CLI command / codepath * tests for evaluate CLI command * fixes and cleanup * review comments; slightly DRYing up things --------- Co-authored-by: Dan Saunders <danjsaund@gmail.com>
2024-12-16 15:46:31 -05:00
parent 42bd32a233
commit 23ac14540b
3 changed files with 13 additions and 1 deletions
--- a/1
+++ b/1
@@ -0,0 +1 @@
 /workspace/data/axolotl-artifacts
--- a/src/axolotl/train.py
+++ b/src/axolotl/train.py
@@ -26,7 +26,7 @@ from axolotl.logging_config import configure_logging
 from axolotl.utils.dict import DictDefault
 from axolotl.utils.freeze import freeze_layers_except
 from axolotl.utils.models import load_model, load_processor, load_tokenizer
-from axolotl.utils.trainer import setup_trainer
+from axolotl.utils.trainer import set_pytorch_cuda_alloc_conf, setup_trainer
 try:
    from optimum.bettertransformer import BetterTransformer
--- a/src/axolotl/utils/trainer.py
+++ b/src/axolotl/utils/trainer.py
@@ -512,6 +512,17 @@ def prepare_opinionated_env(cfg):
        os.environ["TOKENIZERS_PARALLELISM"] = "false"
 def set_pytorch_cuda_alloc_conf():
    """Set up CUDA allocation config if using PyTorch >= 2.2"""
    torch_version = torch.__version__.split(".")
    torch_major, torch_minor = int(torch_version[0]), int(torch_version[1])
    if torch_major == 2 and torch_minor >= 2:
        if os.getenv("PYTORCH_CUDA_ALLOC_CONF") is None:
            os.environ[
                "PYTORCH_CUDA_ALLOC_CONF"
            ] = "expandable_segments:True,roundup_power2_divisions:16"
 def setup_trainer(
    cfg, train_dataset, eval_dataset, model, tokenizer, processor, total_num_steps
 ):