fix(doc): update min torch version

feat: pin vllm to 0.8.5 for all torch
2025-05-02 10:13:59 -04:00 · 2025-05-02 10:13:59 -04:00
11 changed files with 10 additions and 73 deletions
--- a/README.md
+++ b/README.md
@@ -51,7 +51,7 @@ Features:

 - NVIDIA GPU (Ampere or newer for `bf16` and Flash Attention) or AMD GPU
 - Python 3.11
- PyTorch ≥2.4.1
+- PyTorch ≥2.5.1

 ### Installation

--- a/setup.py
+++ b/setup.py
@@ -67,13 +67,11 @@ def parse_requirements(extras_require_map):
            if (major, minor) >= (2, 7):
                _install_requires.pop(_install_requires.index(xformers_version))
                # _install_requires.append("xformers==0.0.29.post3")  # xformers seems to be hard pinned to 2.6.0
-                extras_require_map["vllm"] = ["vllm==0.8.5"]
            elif (major, minor) >= (2, 6):
                _install_requires.pop(_install_requires.index(xformers_version))
                _install_requires.append(
                    "xformers==0.0.29.post2"
                )  # vllm needs post2 w torch 2.6
-                extras_require_map["vllm"] = ["vllm==0.8.5"]
            elif (major, minor) >= (2, 5):
                _install_requires.pop(_install_requires.index(xformers_version))
                if patch == 0:
@@ -147,7 +145,7 @@ extras_require = {
        "ray[train]",
    ],
    "vllm": [
-        "vllm==0.7.2",
+        "vllm==0.8.5",
    ],
    "llmcompressor": [
        "llmcompressor==0.5.1",
--- a/src/axolotl/cli/evaluate.py
+++ b/src/axolotl/cli/evaluate.py
@@ -15,7 +15,7 @@ from axolotl.cli.checks import check_accelerate_default_config, check_user_token
 from axolotl.cli.config import load_cfg
 from axolotl.common.datasets import load_datasets, load_preference_datasets
 from axolotl.evaluate import evaluate
-from axolotl.utils import patch_optimized_env
+from axolotl.utils import set_pytorch_cuda_alloc_conf
 from axolotl.utils.dict import DictDefault

 LOG = logging.getLogger(__name__)
@@ -32,7 +32,7 @@ def do_evaluate(cfg: DictDefault, cli_args: TrainerCliArgs) -> None:
        cli_args: CLI arguments.
    """
    # Enable expandable segments for cuda allocation to improve VRAM usage
-    patch_optimized_env()
+    set_pytorch_cuda_alloc_conf()

    # pylint: disable=duplicate-code
    print_axolotl_text_art()
--- a/src/axolotl/cli/main.py
+++ b/src/axolotl/cli/main.py
@@ -29,7 +29,7 @@ from axolotl.cli.utils import (
    filter_none_kwargs,
 )
 from axolotl.integrations.lm_eval.cli import lm_eval
-from axolotl.utils import patch_optimized_env
+from axolotl.utils import set_pytorch_cuda_alloc_conf
 from axolotl.utils.schemas.config import AxolotlInputConfig


@@ -55,8 +55,6 @@ def preprocess(config: str, cloud: Optional[str] = None, **kwargs) -> None:
        kwargs: Additional keyword arguments which correspond to CLI args or `axolotl`
            config options.
    """
-    patch_optimized_env()
-
    if cloud:
        from axolotl.cli.cloud import do_cli_preprocess

@@ -102,7 +100,7 @@ def train(
            config options.
    """
    # Enable expandable segments for cuda allocation to improve VRAM usage
-    patch_optimized_env()
+    set_pytorch_cuda_alloc_conf()

    if "use_ray" in kwargs and kwargs["use_ray"]:
        accelerate = False
--- a/src/axolotl/cli/train.py
+++ b/src/axolotl/cli/train.py
@@ -18,7 +18,7 @@ from axolotl.cli.config import load_cfg
 from axolotl.common.datasets import load_datasets, load_preference_datasets
 from axolotl.integrations.base import PluginManager
 from axolotl.train import train
-from axolotl.utils import patch_optimized_env
+from axolotl.utils import set_pytorch_cuda_alloc_conf
 from axolotl.utils.config import normalize_config, resolve_dtype
 from axolotl.utils.dict import DictDefault

@@ -36,7 +36,7 @@ def do_train(cfg: DictDefault, cli_args: TrainerCliArgs):
        cli_args: Training-specific CLI arguments.
    """
    # Enable expandable segments for cuda allocation to improve VRAM usage
-    patch_optimized_env()
+    set_pytorch_cuda_alloc_conf()

    print_axolotl_text_art()
    check_accelerate_default_config()
--- a/src/axolotl/core/trainers/base.py
+++ b/src/axolotl/core/trainers/base.py
@@ -610,15 +610,3 @@ class AxolotlTrainer(
        output_dir = os.path.join(run_dir, checkpoint_folder)
        os.makedirs(output_dir, exist_ok=True)
        return super()._save_checkpoint(model, trial, **kwargs)
-
-    def compute_loss_context_manager(self):
-        from contextlib import ExitStack
-
-        from torchtune.training import OffloadActivations
-
-        stack = ExitStack()
-
-        stack.enter_context(super().compute_loss_context_manager())
-        stack.enter_context(OffloadActivations())
-
-        return stack
--- a/src/axolotl/monkeypatch/multipack.py
+++ b/src/axolotl/monkeypatch/multipack.py
@@ -18,8 +18,6 @@ SUPPORTED_MULTIPACK_MODEL_TYPES = [
    "mixtral",
    "qwen2",
    "qwen2_moe",
-    "qwen3",
-    "qwen3_moe",
    "falcon",
    "phi",
    "phi3",
--- a/src/axolotl/utils/init.py
+++ b/src/axolotl/utils/init.py
@@ -43,12 +43,3 @@ def set_pytorch_cuda_alloc_conf():
            os.environ["PYTORCH_CUDA_ALLOC_CONF"] = (
                "expandable_segments:True,roundup_power2_divisions:16"
            )
-
-
-def patch_optimized_env():
-    """
-    Patch environment variables to improve VRAM usage and increase download speed
-    """
-    if os.getenv("HF_HUB_ENABLE_HF_TRANSFER") is None:
-        os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
-    set_pytorch_cuda_alloc_conf()
--- a/src/axolotl/utils/config/init.py
+++ b/src/axolotl/utils/config/init.py
@@ -59,7 +59,7 @@ def choose_device(cfg):

 def resolve_dtype(cfg):
    if (
-        not cfg.fp16 and cfg.bf16 == "auto" and not cfg.use_ray
+        cfg.bf16 == "auto" and not cfg.use_ray
    ):  # if we use ray we want to defer this check to the worker node
        if is_torch_bf16_gpu_available():
            LOG.debug("bf16 support detected, enabling for this configuration.")
--- a/src/axolotl/utils/gradient_checkpointing/init.py
+++ b/src/axolotl/utils/gradient_checkpointing/init.py
@@ -2,13 +2,6 @@

 from functools import partial

-import torch
-from torch.utils.checkpoint import (
-    CheckpointPolicy,
-    checkpoint,
-    create_selective_checkpoint_contexts,
-)
-
 from axolotl.utils.gradient_checkpointing.unsloth import (
    Unsloth_Offloaded_Gradient_Checkpointer,
 )
@@ -25,32 +18,3 @@ def hf_grad_checkpoint_offload_wrapper(
        ),
        *args,
    )
-
-
-aten = torch.ops.aten
-compute_intensive_ops = [
-    aten.mm.default,
-    aten.bmm.default,
-    aten.addmm.default,
-]
-
-
-def policy_fn(ctx, op, *args, **kwargs):
-    if op in compute_intensive_ops:
-        return CheckpointPolicy.MUST_SAVE
-    else:
-        return CheckpointPolicy.PREFER_RECOMPUTE
-
-
-context_fn = partial(create_selective_checkpoint_contexts, policy_fn)
-
-
-def checkpoint_w_policy(
-    decoder_layer, *args, use_reentrant=None
-):  # pylint: disable=unused-argument
-    return checkpoint(
-        decoder_layer,
-        *args,
-        use_reentrant=use_reentrant,
-        context_fn=context_fn,
-    )
--- a/src/axolotl/utils/samplers/multipack.py
+++ b/src/axolotl/utils/samplers/multipack.py
@@ -190,7 +190,7 @@ class MultipackBatchSampler(BatchSampler):
        self.len_across_ranks = None

        if self.sequential and not isinstance(sampler, SequentialSampler):
-            LOG.warning(
+            LOG.warn(
                "using sequential sample packing with non-sequential sampler, did you want to also enable curriculum_sampling?"
            )
Author	SHA1	Message	Date
NanoCode012	3474a9df88	fix(doc): update min torch version	2025-05-02 10:13:59 -04:00
NanoCode012	f6151ce5cb	feat: pin vllm to 0.8.5 for all torch	2025-05-02 10:13:59 -04:00