From c071a530f727199701c1d204dc8f9eccb9feb99c Mon Sep 17 00:00:00 2001
From: salman <salman.mohammadi@outlook.com>
Date: Wed, 29 Jan 2025 04:23:44 +0000
Subject: [PATCH] removing 2.3.1 (#2294)

---
 .github/workflows/base.yml                   | 12 -----------
 .github/workflows/main.yml                   | 22 +-------------------
 .github/workflows/multi-gpu-e2e.yml          |  6 ------
 .github/workflows/nightlies.yml              | 22 --------------------
 .github/workflows/tests-nightly.yml          |  9 +-------
 .github/workflows/tests.yml                  |  8 +------
 README.md                                    |  2 +-
 cicd/multigpu.py                             |  4 ++--
 cicd/tests.py                                |  4 ++--
 setup.py                                     | 20 +-----------------
 tests/e2e/patched/test_4d_multipack_llama.py |  3 +--
 tests/e2e/utils.py                           | 12 -----------
 12 files changed, 10 insertions(+), 114 deletions(-)

diff --git a/.github/workflows/base.yml b/.github/workflows/base.yml
index 640d2cd7a..0f4b3c9ce 100644
--- a/.github/workflows/base.yml
+++ b/.github/workflows/base.yml
@@ -22,18 +22,6 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          - cuda: "121"
-            cuda_version: 12.1.1
-            cudnn_version: 8
-            python_version: "3.10"
-            pytorch: 2.3.1
-            torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
-          - cuda: "121"
-            cuda_version: 12.1.1
-            cudnn_version: 8
-            python_version: "3.11"
-            pytorch: 2.3.1
-            torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
           - cuda: "124"
             cuda_version: 12.4.1
             cudnn_version: ""
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 89b2746e4..4f8074ad1 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -15,16 +15,6 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          - cuda: 121
-            cuda_version: 12.1.1
-            python_version: "3.10"
-            pytorch: 2.3.1
-            axolotl_extras: mamba-ssm
-          - cuda: 121
-            cuda_version: 12.1.1
-            python_version: "3.11"
-            pytorch: 2.3.1
-            axolotl_extras: mamba-ssm
           - cuda: 124
             cuda_version: 12.4.1
             python_version: "3.11"
@@ -82,16 +72,6 @@ jobs:
     strategy:
       matrix:
         include:
-          - cuda: 121
-            cuda_version: 12.1.1
-            python_version: "3.10"
-            pytorch: 2.3.1
-            axolotl_extras:
-          - cuda: 121
-            cuda_version: 12.1.1
-            python_version: "3.11"
-            pytorch: 2.3.1
-            axolotl_extras:
           - cuda: 124
             cuda_version: 12.4.1
             python_version: "3.11"
@@ -148,7 +128,7 @@ jobs:
           - cuda: 121
             cuda_version: 12.1.1
             python_version: "3.11"
-            pytorch: 2.3.1
+            pytorch: 2.4.1
             axolotl_extras:
     runs-on: axolotl-gpu-runner
     steps:
diff --git a/.github/workflows/multi-gpu-e2e.yml b/.github/workflows/multi-gpu-e2e.yml
index 1c6702760..c3bcc517b 100644
--- a/.github/workflows/multi-gpu-e2e.yml
+++ b/.github/workflows/multi-gpu-e2e.yml
@@ -20,12 +20,6 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          - cuda: 121
-            cuda_version: 12.1.1
-            python_version: "3.11"
-            pytorch: 2.3.1
-            axolotl_extras:
-            num_gpus: 2
           - cuda: 124
             cuda_version: 12.4.1
             python_version: "3.11"
diff --git a/.github/workflows/nightlies.yml b/.github/workflows/nightlies.yml
index e266122c6..0efeb80b9 100644
--- a/.github/workflows/nightlies.yml
+++ b/.github/workflows/nightlies.yml
@@ -12,17 +12,6 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          - cuda: 121
-            cuda_version: 12.1.1
-            python_version: "3.10"
-            pytorch: 2.3.1
-            axolotl_extras:
-          - cuda: 121
-            cuda_version: 12.1.1
-            python_version: "3.11"
-            pytorch: 2.3.1
-            axolotl_extras:
-            is_latest: true
           - cuda: 124
             cuda_version: 12.4.1
             python_version: "3.11"
@@ -76,17 +65,6 @@ jobs:
     strategy:
       matrix:
         include:
-          - cuda: 121
-            cuda_version: 12.1.1
-            python_version: "3.10"
-            pytorch: 2.3.1
-            axolotl_extras:
-          - cuda: 121
-            cuda_version: 12.1.1
-            python_version: "3.11"
-            pytorch: 2.3.1
-            axolotl_extras:
-            is_latest: true
           - cuda: 124
             cuda_version: 12.4.1
             python_version: "3.11"
diff --git a/.github/workflows/tests-nightly.yml b/.github/workflows/tests-nightly.yml
index bbed4e2c2..178b0a4d1 100644
--- a/.github/workflows/tests-nightly.yml
+++ b/.github/workflows/tests-nightly.yml
@@ -26,7 +26,7 @@ jobs:
       max-parallel: 2
       matrix:
         python_version: ["3.10", "3.11"]
-        pytorch_version: ["2.3.1", "2.4.1", "2.5.1"]
+        pytorch_version: ["2.4.1", "2.5.1"]
         exclude:
           - python_version: "3.10"
             pytorch_version: "2.4.1"
@@ -98,13 +98,6 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          - cuda: 121
-            cuda_version: 12.1.1
-            python_version: "3.10"
-            pytorch: 2.3.1
-            num_gpus: 1
-            axolotl_extras: mamba-ssm
-            nightly_build: "true"
           - cuda: 124
             cuda_version: 12.4.1
             python_version: "3.11"
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index a2a0e801e..87d532a3b 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -49,7 +49,7 @@ jobs:
       max-parallel: 2
       matrix:
         python_version: ["3.10", "3.11"]
-        pytorch_version: ["2.3.1", "2.4.1", "2.5.1"]
+        pytorch_version: ["2.4.1", "2.5.1"]
         exclude:
           - python_version: "3.10"
             pytorch_version: "2.4.1"
@@ -244,12 +244,6 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          - cuda: 121
-            cuda_version: 12.1.1
-            python_version: "3.10"
-            pytorch: 2.3.1
-            num_gpus: 1
-            axolotl_extras: mamba-ssm
           - cuda: 124
             cuda_version: 12.4.1
             python_version: "3.11"
diff --git a/README.md b/README.md
index ff77b982e..6ee3237e7 100644
--- a/README.md
+++ b/README.md
@@ -109,7 +109,7 @@ Features:
 
 Get started with Axolotl in just a few steps! This quickstart guide will walk you through setting up and running a basic fine-tuning task.
 
-**Requirements**: *Nvidia* GPU (Ampere architecture or newer for `bf16` and Flash Attention) or *AMD* GPU, Python >=3.10 and PyTorch >=2.3.1.
+**Requirements**: *Nvidia* GPU (Ampere architecture or newer for `bf16` and Flash Attention) or *AMD* GPU, Python >=3.10 and PyTorch >=2.4.1.
 
 ```bash
 pip3 install --no-build-isolation axolotl[flash-attn,deepspeed]
diff --git a/cicd/multigpu.py b/cicd/multigpu.py
index f9bad386a..2c0863034 100644
--- a/cicd/multigpu.py
+++ b/cicd/multigpu.py
@@ -23,8 +23,8 @@ df_template = template_env.get_template("Dockerfile.jinja")
 df_args = {
     "AXOLOTL_EXTRAS": os.environ.get("AXOLOTL_EXTRAS", ""),
     "AXOLOTL_ARGS": os.environ.get("AXOLOTL_ARGS", ""),
-    "PYTORCH_VERSION": os.environ.get("PYTORCH_VERSION", "2.3.1"),
-    "BASE_TAG": os.environ.get("BASE_TAG", "main-base-py3.11-cu121-2.3.1"),
+    "PYTORCH_VERSION": os.environ.get("PYTORCH_VERSION", "2.4.1"),
+    "BASE_TAG": os.environ.get("BASE_TAG", "main-base-py3.11-cu121-2.4.1"),
     "CUDA": os.environ.get("CUDA", "121"),
     "GITHUB_REF": os.environ.get("GITHUB_REF", "refs/heads/main"),
     "GITHUB_SHA": os.environ.get("GITHUB_SHA", ""),
diff --git a/cicd/tests.py b/cicd/tests.py
index d7ae5b5e8..616554e64 100644
--- a/cicd/tests.py
+++ b/cicd/tests.py
@@ -23,8 +23,8 @@ df_template = template_env.get_template("Dockerfile.jinja")
 df_args = {
     "AXOLOTL_EXTRAS": os.environ.get("AXOLOTL_EXTRAS", ""),
     "AXOLOTL_ARGS": os.environ.get("AXOLOTL_ARGS", ""),
-    "PYTORCH_VERSION": os.environ.get("PYTORCH_VERSION", "2.3.1"),
-    "BASE_TAG": os.environ.get("BASE_TAG", "main-base-py3.11-cu121-2.3.1"),
+    "PYTORCH_VERSION": os.environ.get("PYTORCH_VERSION", "2.4.1"),
+    "BASE_TAG": os.environ.get("BASE_TAG", "main-base-py3.11-cu121-2.4.1"),
     "CUDA": os.environ.get("CUDA", "121"),
     "GITHUB_REF": os.environ.get("GITHUB_REF", "refs/heads/main"),
     "GITHUB_SHA": os.environ.get("GITHUB_SHA", ""),
diff --git a/setup.py b/setup.py
index d7cb18ec0..ac0c96def 100644
--- a/setup.py
+++ b/setup.py
@@ -32,8 +32,6 @@ def parse_requirements():
                 _install_requires.append(line)
     try:
         xformers_version = [req for req in _install_requires if "xformers" in req][0]
-        triton_version = [req for req in _install_requires if "triton" in req][0]
-        torchao_version = [req for req in _install_requires if "torchao" in req][0]
         autoawq_version = [req for req in _install_requires if "autoawq" in req][0]
         if "Darwin" in platform.system():
             # skip packages not compatible with OSX
@@ -87,24 +85,8 @@ def parse_requirements():
                 else:
                     _install_requires.pop(_install_requires.index(xformers_version))
                     _install_requires.append("xformers==0.0.28.post1")
-            elif (major, minor) >= (2, 3):
-                _install_requires.pop(_install_requires.index(torchao_version))
-                _install_requires.pop(_install_requires.index(triton_version))
-                _install_requires.append("triton>=2.3.1")
-                if patch == 0:
-                    _install_requires.pop(_install_requires.index(xformers_version))
-                    _install_requires.append("xformers>=0.0.26.post1")
-                else:
-                    _install_requires.pop(_install_requires.index(xformers_version))
-                    _install_requires.append("xformers>=0.0.27")
-            elif (major, minor) >= (2, 2):
-                _install_requires.pop(_install_requires.index(torchao_version))
-                _install_requires.pop(_install_requires.index(xformers_version))
-                _install_requires.append("xformers>=0.0.25.post1")
             else:
-                _install_requires.pop(_install_requires.index(torchao_version))
-                _install_requires.pop(_install_requires.index(xformers_version))
-                _install_requires.append("xformers>=0.0.23.post1")
+                raise ValueError("axolotl requires torch>=2.4")
 
     except PackageNotFoundError:
         pass
diff --git a/tests/e2e/patched/test_4d_multipack_llama.py b/tests/e2e/patched/test_4d_multipack_llama.py
index da27069ac..af8eb3742 100644
--- a/tests/e2e/patched/test_4d_multipack_llama.py
+++ b/tests/e2e/patched/test_4d_multipack_llama.py
@@ -12,7 +12,7 @@ from axolotl.train import train
 from axolotl.utils.config import normalize_config
 from axolotl.utils.dict import DictDefault
 
-from ..utils import check_model_output_exists, require_torch_2_3_1, with_temp_dir
+from ..utils import check_model_output_exists, with_temp_dir
 
 LOG = logging.getLogger("axolotl.tests.e2e")
 os.environ["WANDB_DISABLED"] = "true"
@@ -23,7 +23,6 @@ class Test4dMultipackLlama(unittest.TestCase):
     Test case for Llama models using 4d attention with multipack
     """
 
-    @require_torch_2_3_1
     @with_temp_dir
     def test_sdp_lora_packing(self, temp_dir):
         # pylint: disable=duplicate-code
diff --git a/tests/e2e/utils.py b/tests/e2e/utils.py
index 759d59659..de0dba33a 100644
--- a/tests/e2e/utils.py
+++ b/tests/e2e/utils.py
@@ -42,18 +42,6 @@ def most_recent_subdir(path):
     return subdir
 
 
-def require_torch_2_3_1(test_case):
-    """
-    Decorator marking a test that requires torch >= 2.3.1
-    """
-
-    def is_min_2_3_1():
-        torch_version = version.parse(torch.__version__)
-        return torch_version >= version.parse("2.3.1")
-
-    return unittest.skipUnless(is_min_2_3_1(), "test requires torch>=2.3.1")(test_case)
-
-
 def require_torch_2_4_1(test_case):
     """
     Decorator marking a test that requires torch >= 2.5.1